Merge pull request #488 from treeform/guzba
avx2 line coverage blends, neon line blends
This commit is contained in:
commit
3fbd1da002
7 changed files with 520 additions and 93 deletions
|
@ -129,16 +129,25 @@ proc unfilter(
|
||||||
uncompressedStartIdx = uncompressedIdx(1, y)
|
uncompressedStartIdx = uncompressedIdx(1, y)
|
||||||
unfilteredStartIx = unfiteredIdx(0, y)
|
unfilteredStartIx = unfiteredIdx(0, y)
|
||||||
var x: int
|
var x: int
|
||||||
when allowSimd and defined(amd64):
|
when allowSimd and (defined(amd64) or defined(arm64)):
|
||||||
if y - 1 >= 0:
|
if y - 1 >= 0:
|
||||||
for _ in 0 ..< rowBytes div 16:
|
for _ in 0 ..< rowBytes div 16:
|
||||||
let
|
when defined(amd64):
|
||||||
bytes = mm_loadu_si128(uncompressed[uncompressedStartIdx + x].addr)
|
let
|
||||||
up = mm_loadu_si128(result[unfilteredStartIx + x - rowBytes].addr)
|
bytes = mm_loadu_si128(uncompressed[uncompressedStartIdx + x].addr)
|
||||||
mm_storeu_si128(
|
up = mm_loadu_si128(result[unfilteredStartIx + x - rowBytes].addr)
|
||||||
result[unfilteredStartIx + x].addr,
|
mm_storeu_si128(
|
||||||
mm_add_epi8(bytes, up)
|
result[unfilteredStartIx + x].addr,
|
||||||
)
|
mm_add_epi8(bytes, up)
|
||||||
|
)
|
||||||
|
else: # arm64
|
||||||
|
let
|
||||||
|
bytes = vld1q_u8(uncompressed[uncompressedStartIdx + x].addr)
|
||||||
|
up = vld1q_u8(result[unfilteredStartIx + x - rowBytes].addr)
|
||||||
|
vst1q_u8(
|
||||||
|
result[unfilteredStartIx + x].addr,
|
||||||
|
vaddq_u8(bytes, up)
|
||||||
|
)
|
||||||
x += 16
|
x += 16
|
||||||
for x in x ..< rowBytes:
|
for x in x ..< rowBytes:
|
||||||
var value = uncompressed[uncompressedStartIdx + x]
|
var value = uncompressed[uncompressedStartIdx + x]
|
||||||
|
|
|
@ -338,8 +338,9 @@ proc blur*(
|
||||||
var values: array[4, uint32]
|
var values: array[4, uint32]
|
||||||
for xx in x - radius ..< min(x + radius, 0):
|
for xx in x - radius ..< min(x + radius, 0):
|
||||||
values += outOfBounds * kernel[xx - x + radius]
|
values += outOfBounds * kernel[xx - x + radius]
|
||||||
|
var idx = image.dataIndex(0, y)
|
||||||
for xx in max(x - radius, 0) .. min(x + radius, image.width - 1):
|
for xx in max(x - radius, 0) .. min(x + radius, image.width - 1):
|
||||||
values += image.unsafe[xx, y] * kernel[xx - x + radius]
|
values += image.data[idx + xx] * kernel[xx - x + radius]
|
||||||
for xx in max(x - radius, image.width) .. x + radius:
|
for xx in max(x - radius, image.width) .. x + radius:
|
||||||
values += outOfBounds * kernel[xx - x + radius]
|
values += outOfBounds * kernel[xx - x + radius]
|
||||||
blurX.unsafe[y, x] = rgbx(values)
|
blurX.unsafe[y, x] = rgbx(values)
|
||||||
|
@ -350,8 +351,9 @@ proc blur*(
|
||||||
var values: array[4, uint32]
|
var values: array[4, uint32]
|
||||||
for yy in y - radius ..< min(y + radius, 0):
|
for yy in y - radius ..< min(y + radius, 0):
|
||||||
values += outOfBounds * kernel[yy - y + radius]
|
values += outOfBounds * kernel[yy - y + radius]
|
||||||
|
var idx = blurX.dataIndex(0, x)
|
||||||
for yy in max(y - radius, 0) .. min(y + radius, image.height - 1):
|
for yy in max(y - radius, 0) .. min(y + radius, image.height - 1):
|
||||||
values += blurX.unsafe[yy, x] * kernel[yy - y + radius]
|
values += blurX.data[idx + yy] * kernel[yy - y + radius]
|
||||||
for yy in max(y - radius, image.height) .. y + radius:
|
for yy in max(y - radius, image.height) .. y + radius:
|
||||||
values += outOfBounds * kernel[yy - y + radius]
|
values += outOfBounds * kernel[yy - y + radius]
|
||||||
image.unsafe[x, y] = rgbx(values)
|
image.unsafe[x, y] = rgbx(values)
|
||||||
|
@ -447,7 +449,9 @@ proc blendLineOverwrite(
|
||||||
) {.inline.} =
|
) {.inline.} =
|
||||||
copyMem(a[0].addr, b[0].addr, len * 4)
|
copyMem(a[0].addr, b[0].addr, len * 4)
|
||||||
|
|
||||||
proc blendLineNormal(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} =
|
proc blendLineNormal(
|
||||||
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
for i in 0 ..< len:
|
for i in 0 ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
|
||||||
if at.y == to.y: # Horizontal gradient
|
if at.y == to.y: # Horizontal gradient
|
||||||
var x: int
|
var x: int
|
||||||
while x < image.width:
|
while x < image.width:
|
||||||
when defined(amd64) and allowSimd:
|
when allowSimd and (defined(amd64) or defined(arm64)):
|
||||||
if x + 4 <= image.width:
|
if x + 4 <= image.width:
|
||||||
var colors: array[4, ColorRGBX]
|
var colors: array[4, ColorRGBX]
|
||||||
for i in 0 ..< 4:
|
for i in 0 ..< 4:
|
||||||
|
@ -128,10 +128,14 @@ proc fillGradientLinear(image: Image, paint: Paint) =
|
||||||
t = toLineSpace(at, to, xy)
|
t = toLineSpace(at, to, xy)
|
||||||
rgbx = paint.gradientColor(t)
|
rgbx = paint.gradientColor(t)
|
||||||
colors[i] = rgbx
|
colors[i] = rgbx
|
||||||
|
when defined(amd64):
|
||||||
let colorVec = cast[M128i](colors)
|
let colorVec = mm_loadu_si128(colors[0].addr)
|
||||||
for y in 0 ..< image.height:
|
for y in 0 ..< image.height:
|
||||||
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)
|
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)
|
||||||
|
else: # arm64
|
||||||
|
let colorVec = vld1q_u32(colors[0].addr)
|
||||||
|
for y in 0 ..< image.height:
|
||||||
|
vst1q_u32(image.data[image.dataIndex(x, y)].addr, colorVec)
|
||||||
x += 4
|
x += 4
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -150,11 +154,17 @@ proc fillGradientLinear(image: Image, paint: Paint) =
|
||||||
t = toLineSpace(at, to, xy)
|
t = toLineSpace(at, to, xy)
|
||||||
rgbx = paint.gradientColor(t)
|
rgbx = paint.gradientColor(t)
|
||||||
var x: int
|
var x: int
|
||||||
when defined(amd64) and allowSimd:
|
when allowSimd:
|
||||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
when defined(amd64):
|
||||||
for _ in 0 ..< image.width div 4:
|
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)
|
for _ in 0 ..< image.width div 4:
|
||||||
x += 4
|
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)
|
||||||
|
x += 4
|
||||||
|
elif defined(arm64):
|
||||||
|
let colorVec = vmovq_n_u32(cast[uint32](rgbx))
|
||||||
|
for _ in 0 ..< image.width div 4:
|
||||||
|
vst1q_u32(image.data[image.dataIndex(x, y)].addr, colorVec)
|
||||||
|
x += 4
|
||||||
for x in x ..< image.width:
|
for x in x ..< image.width:
|
||||||
image.unsafe[x, y] = rgbx
|
image.unsafe[x, y] = rgbx
|
||||||
|
|
||||||
|
@ -227,7 +237,6 @@ proc fillGradientAngular(image: Image, paint: Paint) =
|
||||||
|
|
||||||
proc fillGradient*(image: Image, paint: Paint) {.raises: [PixieError].} =
|
proc fillGradient*(image: Image, paint: Paint) {.raises: [PixieError].} =
|
||||||
## Fills with the Paint gradient.
|
## Fills with the Paint gradient.
|
||||||
|
|
||||||
case paint.kind:
|
case paint.kind:
|
||||||
of LinearGradientPaint:
|
of LinearGradientPaint:
|
||||||
image.fillGradientLinear(paint)
|
image.fillGradientLinear(paint)
|
||||||
|
|
|
@ -1410,13 +1410,21 @@ proc computeCoverage(
|
||||||
let fillLen = at.integer - fillStart
|
let fillLen = at.integer - fillStart
|
||||||
if fillLen > 0:
|
if fillLen > 0:
|
||||||
var i = fillStart
|
var i = fillStart
|
||||||
when defined(amd64) and allowSimd:
|
when allowSimd:
|
||||||
let sampleCoverageVec = mm_set1_epi8(sampleCoverage)
|
when defined(amd64):
|
||||||
for _ in 0 ..< fillLen div 16:
|
let sampleCoverageVec = mm_set1_epi8(sampleCoverage)
|
||||||
var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
|
for _ in 0 ..< fillLen div 16:
|
||||||
coverageVec = mm_add_epi8(coverageVec, sampleCoverageVec)
|
var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
|
||||||
mm_storeu_si128(coverages[i - startX].addr, coverageVec)
|
coverageVec = mm_add_epi8(coverageVec, sampleCoverageVec)
|
||||||
i += 16
|
mm_storeu_si128(coverages[i - startX].addr, coverageVec)
|
||||||
|
i += 16
|
||||||
|
elif defined(arm64):
|
||||||
|
let sampleCoverageVec = vmovq_n_u8(sampleCoverage)
|
||||||
|
for _ in 0 ..< fillLen div 16:
|
||||||
|
var coverageVec = vld1q_u8(coverages[i - startX].addr)
|
||||||
|
coverageVec = vaddq_u8(coverageVec, sampleCoverageVec)
|
||||||
|
vst1q_u8(coverages[i - startX].addr, coverageVec)
|
||||||
|
i += 16
|
||||||
for j in i ..< fillStart + fillLen:
|
for j in i ..< fillStart + fillLen:
|
||||||
coverages[j - startX] += sampleCoverage
|
coverages[j - startX] += sampleCoverage
|
||||||
|
|
||||||
|
|
|
@ -415,6 +415,76 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
||||||
# Set src as this result for if we do another power
|
# Set src as this result for if we do another power
|
||||||
src = result
|
src = result
|
||||||
|
|
||||||
|
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i =
|
||||||
|
## Unpack the first 8 coverage bytes.
|
||||||
|
let
|
||||||
|
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)
|
||||||
|
unpacked1 = mm_shuffle_epi8(mm_srli_si128(coverage, 4), coverageShuffle)
|
||||||
|
unpacked =
|
||||||
|
mm256_insertf128_si256(mm256_castsi128_si256(unpacked0), unpacked1, 1)
|
||||||
|
|
||||||
|
var
|
||||||
|
rgbxEven = mm256_slli_epi16(rgbxVec, 8)
|
||||||
|
rgbxOdd = mm256_and_si256(rgbxVec, oddMask)
|
||||||
|
rgbxEven = mm256_mulhi_epu16(rgbxEven, unpacked)
|
||||||
|
rgbxOdd = mm256_mulhi_epu16(rgbxOdd, unpacked)
|
||||||
|
rgbxEven = mm256_srli_epi16(mm256_mulhi_epu16(rgbxEven, div255), 7)
|
||||||
|
rgbxOdd = mm256_srli_epi16(mm256_mulhi_epu16(rgbxOdd, div255), 7)
|
||||||
|
|
||||||
|
mm256_or_si256(rgbxEven, mm256_slli_epi16(rgbxOdd, 8))
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwriteAvx2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 31) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm256_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm256_setzero_si256()
|
||||||
|
vec255 = mm256_set1_epi8(255)
|
||||||
|
oddMask = mm256_set1_epi16(0xff00)
|
||||||
|
div255 = mm256_set1_epi16(0x8081)
|
||||||
|
coverageShuffle = mm_set_epi8(
|
||||||
|
3, -1, 3, -1, 2, -1, 2, -1, 1, -1, 1, -1, 0, -1, 0, -1
|
||||||
|
)
|
||||||
|
while i < len - 32:
|
||||||
|
let
|
||||||
|
coverage = mm256_loadu_si256(coverages[i].addr)
|
||||||
|
eqZero = mm256_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm256_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm256_movemask_epi8(eqZero) == cast[int32](0xffffffff):
|
||||||
|
i += 32
|
||||||
|
elif mm256_movemask_epi8(eq255) == cast[int32](0xffffffff):
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm256_store_si256(line[i].addr, rgbxVec)
|
||||||
|
i += 8
|
||||||
|
else:
|
||||||
|
let
|
||||||
|
coverageLo = mm256_castsi256_si128(coverage)
|
||||||
|
coverageHi = mm256_extractf128_si256(coverage, 1)
|
||||||
|
coverages = [
|
||||||
|
coverageLo,
|
||||||
|
mm_srli_si128(coverageLo, 8),
|
||||||
|
coverageHi,
|
||||||
|
mm_srli_si128(coverageHi, 8),
|
||||||
|
]
|
||||||
|
for j in 0 ..< 4:
|
||||||
|
mm256_store_si256(line[i].addr, rgbxVec.applyCoverage(coverages[j]))
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
proc blendLineNormalAvx2*(
|
proc blendLineNormalAvx2*(
|
||||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
|
@ -473,6 +543,71 @@ proc blendLineNormalAvx2*(
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageNormalAvx2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 31) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm256_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm256_setzero_si256()
|
||||||
|
vec255 = mm256_set1_epi8(255)
|
||||||
|
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||||
|
vecAlpha255 = mm256_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
|
coverageShuffle = mm_set_epi8(
|
||||||
|
3, -1, 3, -1, 2, -1, 2, -1, 1, -1, 1, -1, 0, -1, 0, -1
|
||||||
|
)
|
||||||
|
shuffleControl = mm256_set_epi8(
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1,
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1
|
||||||
|
)
|
||||||
|
while i < len - 32:
|
||||||
|
let
|
||||||
|
coverage = mm256_loadu_si256(coverages[i].addr)
|
||||||
|
eqZero = mm256_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm256_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm256_movemask_epi8(eqZero) == cast[int32](0xffffffff):
|
||||||
|
i += 32
|
||||||
|
elif mm256_movemask_epi8(eq255) == cast[int32](0xffffffff) and rgbx.a == 255:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm256_store_si256(line[i].addr, rgbxVec)
|
||||||
|
i += 8
|
||||||
|
else:
|
||||||
|
let
|
||||||
|
coverageLo = mm256_castsi256_si128(coverage)
|
||||||
|
coverageHi = mm256_extractf128_si256(coverage, 1)
|
||||||
|
coverages = [
|
||||||
|
coverageLo,
|
||||||
|
mm_srli_si128(coverageLo, 8),
|
||||||
|
coverageHi,
|
||||||
|
mm_srli_si128(coverageHi, 8),
|
||||||
|
]
|
||||||
|
for j in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm256_loadu_si256(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverages[j])
|
||||||
|
mm256_store_si256(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
proc blendLineMaskAvx2*(
|
proc blendLineMaskAvx2*(
|
||||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
|
@ -529,5 +664,73 @@ proc blendLineMaskAvx2*(
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageMaskAvx2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 31) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm256_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm256_setzero_si256()
|
||||||
|
vec255 = mm256_set1_epi8(255)
|
||||||
|
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||||
|
coverageShuffle = mm_set_epi8(
|
||||||
|
3, -1, 3, -1, 2, -1, 2, -1, 1, -1, 1, -1, 0, -1, 0, -1
|
||||||
|
)
|
||||||
|
shuffleControl = mm256_set_epi8(
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1,
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1
|
||||||
|
)
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm256_loadu_si256(coverages[i].addr)
|
||||||
|
eqZero = mm256_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm256_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm256_movemask_epi8(eqZero) == cast[int32](0xffffffff):
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm256_store_si256(line[i].addr, vecZero)
|
||||||
|
i += 8
|
||||||
|
elif mm256_movemask_epi8(eq255) == cast[int32](0xffffffff) and rgbx.a == 255:
|
||||||
|
i += 32
|
||||||
|
else:
|
||||||
|
let
|
||||||
|
coverageLo = mm256_castsi256_si128(coverage)
|
||||||
|
coverageHi = mm256_extractf128_si256(coverage, 1)
|
||||||
|
coverages = [
|
||||||
|
coverageLo,
|
||||||
|
mm_srli_si128(coverageLo, 8),
|
||||||
|
coverageHi,
|
||||||
|
mm_srli_si128(coverageHi, 8),
|
||||||
|
]
|
||||||
|
for j in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm256_loadu_si256(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverages[j])
|
||||||
|
mm256_store_si256(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -3,6 +3,30 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
|
template multiplyDiv255*(c, a: uint8x8): uint8x8 =
|
||||||
|
let ca = vmull_u8(c, a)
|
||||||
|
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||||
|
|
||||||
|
template multiplyDiv255*(c, a: uint8x16): uint8x16 =
|
||||||
|
vcombine_u8(
|
||||||
|
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
|
||||||
|
multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
|
||||||
|
)
|
||||||
|
|
||||||
|
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 =
|
||||||
|
let multiplier = vsubq_u8(vec255, source.val[3])
|
||||||
|
|
||||||
|
var blended: uint8x16x4
|
||||||
|
blended.val[0] = multiplyDiv255(backdrop.val[0], multiplier)
|
||||||
|
blended.val[1] = multiplyDiv255(backdrop.val[1], multiplier)
|
||||||
|
blended.val[2] = multiplyDiv255(backdrop.val[2], multiplier)
|
||||||
|
blended.val[3] = multiplyDiv255(backdrop.val[3], multiplier)
|
||||||
|
blended.val[0] = vaddq_u8(blended.val[0], source.val[0])
|
||||||
|
blended.val[1] = vaddq_u8(blended.val[1], source.val[1])
|
||||||
|
blended.val[2] = vaddq_u8(blended.val[2], source.val[2])
|
||||||
|
blended.val[3] = vaddq_u8(blended.val[3], source.val[3])
|
||||||
|
blended
|
||||||
|
|
||||||
proc fillUnsafeNeon*(
|
proc fillUnsafeNeon*(
|
||||||
data: var seq[ColorRGBX],
|
data: var seq[ColorRGBX],
|
||||||
color: SomeColor,
|
color: SomeColor,
|
||||||
|
@ -146,22 +170,12 @@ proc toPremultipliedAlphaNeon*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||||
inc i
|
inc i
|
||||||
p += 4
|
p += 4
|
||||||
|
|
||||||
template multiply(c, a: uint8x8): uint8x8 =
|
|
||||||
let ca = vmull_u8(c, a)
|
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
|
||||||
|
|
||||||
template multiply(c, a: uint8x16): uint8x16 =
|
|
||||||
vcombine_u8(
|
|
||||||
multiply(vget_low_u8(c), vget_low_u8(a)),
|
|
||||||
multiply(vget_high_u8(c), vget_high_u8(a))
|
|
||||||
)
|
|
||||||
|
|
||||||
let iterations = (data.len - i) div 16
|
let iterations = (data.len - i) div 16
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
var channels = vld4q_u8(cast[pointer](p))
|
var channels = vld4q_u8(cast[pointer](p))
|
||||||
channels.val[0] = multiply(channels.val[0], channels.val[3])
|
channels.val[0] = multiplyDiv255(channels.val[0], channels.val[3])
|
||||||
channels.val[1] = multiply(channels.val[1], channels.val[3])
|
channels.val[1] = multiplyDiv255(channels.val[1], channels.val[3])
|
||||||
channels.val[2] = multiply(channels.val[2], channels.val[3])
|
channels.val[2] = multiplyDiv255(channels.val[2], channels.val[3])
|
||||||
vst4q_u8(cast[pointer](p), channels)
|
vst4q_u8(cast[pointer](p), channels)
|
||||||
p += 64
|
p += 64
|
||||||
i += 16 * iterations
|
i += 16 * iterations
|
||||||
|
@ -225,19 +239,15 @@ proc applyOpacityNeon*(image: Image, opacity: float32) {.simd.} =
|
||||||
i: int
|
i: int
|
||||||
p = cast[uint](image.data[0].addr)
|
p = cast[uint](image.data[0].addr)
|
||||||
|
|
||||||
template multiply(c, a: uint8x8): uint8x8 =
|
|
||||||
let ca = vmull_u8(c, a)
|
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
|
||||||
|
|
||||||
let
|
let
|
||||||
opacityVec = vmov_n_u8(opacity)
|
opacityVec = vmov_n_u8(opacity)
|
||||||
iterations = image.data.len div 8
|
iterations = image.data.len div 8
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
var channels = vld4_u8(cast[pointer](p))
|
var channels = vld4_u8(cast[pointer](p))
|
||||||
channels.val[0] = multiply(channels.val[0], opacityVec)
|
channels.val[0] = multiplyDiv255(channels.val[0], opacityVec)
|
||||||
channels.val[1] = multiply(channels.val[1], opacityVec)
|
channels.val[1] = multiplyDiv255(channels.val[1], opacityVec)
|
||||||
channels.val[2] = multiply(channels.val[2], opacityVec)
|
channels.val[2] = multiplyDiv255(channels.val[2], opacityVec)
|
||||||
channels.val[3] = multiply(channels.val[3], opacityVec)
|
channels.val[3] = multiplyDiv255(channels.val[3], opacityVec)
|
||||||
vst4_u8(cast[pointer](p), channels)
|
vst4_u8(cast[pointer](p), channels)
|
||||||
p += 32
|
p += 32
|
||||||
i += 8 * iterations
|
i += 8 * iterations
|
||||||
|
@ -414,11 +424,86 @@ proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwriteNeon*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
inc i
|
||||||
|
|
||||||
|
var vecRgbx: uint8x16x4
|
||||||
|
vecRgbx.val[0] = vmovq_n_u8(rgbx.r)
|
||||||
|
vecRgbx.val[1] = vmovq_n_u8(rgbx.g)
|
||||||
|
vecRgbx.val[2] = vmovq_n_u8(rgbx.b)
|
||||||
|
vecRgbx.val[3] = vmovq_n_u8(rgbx.a)
|
||||||
|
|
||||||
|
let
|
||||||
|
vecZero = vmovq_n_u8(0)
|
||||||
|
vec255 = vmovq_n_u8(255)
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = vld1q_u8(coverages[i].addr)
|
||||||
|
eqZero = vceqq_u8(coverage, vecZero)
|
||||||
|
eq255 = vceqq_u8(coverage, vec255)
|
||||||
|
maskZero = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eqZero), vget_high_u8(eqZero)
|
||||||
|
)), 0)
|
||||||
|
mask255 = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eq255), vget_high_u8(eq255)
|
||||||
|
)), 0)
|
||||||
|
if maskZero == uint64.high:
|
||||||
|
discard
|
||||||
|
elif mask255 == uint64.high:
|
||||||
|
vst4q_u8(line[i].addr, vecRgbx)
|
||||||
|
else:
|
||||||
|
var source: uint8x16x4
|
||||||
|
source.val[0] = multiplyDiv255(vecRgbx.val[0], coverage)
|
||||||
|
source.val[1] = multiplyDiv255(vecRgbx.val[1], coverage)
|
||||||
|
source.val[2] = multiplyDiv255(vecRgbx.val[2], coverage)
|
||||||
|
source.val[3] = multiplyDiv255(vecRgbx.val[3], coverage)
|
||||||
|
vst4q_u8(line[i].addr, source)
|
||||||
|
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
|
proc blendLineNormalNeon*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
var vecRgbx: uint8x16x4
|
||||||
|
vecRgbx.val[0] = vmovq_n_u8(rgbx.r)
|
||||||
|
vecRgbx.val[1] = vmovq_n_u8(rgbx.g)
|
||||||
|
vecRgbx.val[2] = vmovq_n_u8(rgbx.b)
|
||||||
|
vecRgbx.val[3] = vmovq_n_u8(rgbx.a)
|
||||||
|
|
||||||
|
let vec255 = vmovq_n_u8(255)
|
||||||
|
while i < len - 16:
|
||||||
|
let backdrop = vld4q_u8(line[i].addr)
|
||||||
|
vst4q_u8(line[i].addr, blendNormalSimd(backdrop, vecRgbx))
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
|
||||||
proc blendLineNormalNeon*(
|
proc blendLineNormalNeon*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 15) != 0:
|
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -433,41 +518,99 @@ proc blendLineNormalNeon*(
|
||||||
if mask == uint64.high:
|
if mask == uint64.high:
|
||||||
vst4q_u8(a[i].addr, source)
|
vst4q_u8(a[i].addr, source)
|
||||||
else:
|
else:
|
||||||
template multiply(c, a: uint8x8): uint8x8 =
|
let backdrop = vld4q_u8(a[i].addr)
|
||||||
let ca = vmull_u8(c, a)
|
vst4q_u8(a[i].addr, blendNormalSimd(backdrop, source))
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
|
||||||
|
|
||||||
template multiply(c, a: uint8x16): uint8x16 =
|
|
||||||
vcombine_u8(
|
|
||||||
multiply(vget_low_u8(c), vget_low_u8(a)),
|
|
||||||
multiply(vget_high_u8(c), vget_high_u8(a))
|
|
||||||
)
|
|
||||||
|
|
||||||
let
|
|
||||||
backdrop = vld4q_u8(a[i].addr)
|
|
||||||
multiplier = vsubq_u8(vec255, source.val[3])
|
|
||||||
|
|
||||||
var blended: uint8x16x4
|
|
||||||
blended.val[0] = multiply(backdrop.val[0], multiplier)
|
|
||||||
blended.val[1] = multiply(backdrop.val[1], multiplier)
|
|
||||||
blended.val[2] = multiply(backdrop.val[2], multiplier)
|
|
||||||
blended.val[3] = multiply(backdrop.val[3], multiplier)
|
|
||||||
blended.val[0] = vaddq_u8(blended.val[0], source.val[0])
|
|
||||||
blended.val[1] = vaddq_u8(blended.val[1], source.val[1])
|
|
||||||
blended.val[2] = vaddq_u8(blended.val[2], source.val[2])
|
|
||||||
blended.val[3] = vaddq_u8(blended.val[3], source.val[3])
|
|
||||||
vst4q_u8(a[i].addr, blended)
|
|
||||||
|
|
||||||
i += 16
|
i += 16
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageNormalNeon*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
var vecRgbx: uint8x16x4
|
||||||
|
vecRgbx.val[0] = vmovq_n_u8(rgbx.r)
|
||||||
|
vecRgbx.val[1] = vmovq_n_u8(rgbx.g)
|
||||||
|
vecRgbx.val[2] = vmovq_n_u8(rgbx.b)
|
||||||
|
vecRgbx.val[3] = vmovq_n_u8(rgbx.a)
|
||||||
|
|
||||||
|
let
|
||||||
|
vecZero = vmovq_n_u8(0)
|
||||||
|
vec255 = vmovq_n_u8(255)
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = vld1q_u8(coverages[i].addr)
|
||||||
|
eqZero = vceqq_u8(coverage, vecZero)
|
||||||
|
eq255 = vceqq_u8(coverage, vec255)
|
||||||
|
maskZero = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eqZero), vget_high_u8(eqZero)
|
||||||
|
)), 0)
|
||||||
|
mask255 = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eq255), vget_high_u8(eq255)
|
||||||
|
)), 0)
|
||||||
|
if maskZero == uint64.high:
|
||||||
|
discard
|
||||||
|
elif mask255 == uint64.high and rgbx.a == 255:
|
||||||
|
vst4q_u8(line[i].addr, vecRgbx)
|
||||||
|
else:
|
||||||
|
var source: uint8x16x4
|
||||||
|
source.val[0] = multiplyDiv255(vecRgbx.val[0], coverage)
|
||||||
|
source.val[1] = multiplyDiv255(vecRgbx.val[1], coverage)
|
||||||
|
source.val[2] = multiplyDiv255(vecRgbx.val[2], coverage)
|
||||||
|
source.val[3] = multiplyDiv255(vecRgbx.val[3], coverage)
|
||||||
|
|
||||||
|
let backdrop = vld4q_u8(line[i].addr)
|
||||||
|
vst4q_u8(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
|
proc blendLineMaskNeon*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let alpha = vmovq_n_u8(rgbx.a)
|
||||||
|
while i < len - 16:
|
||||||
|
let backdrop = vld4q_u8(line[i].addr)
|
||||||
|
var blended: uint8x16x4
|
||||||
|
blended.val[0] = multiplyDiv255(backdrop.val[0], alpha)
|
||||||
|
blended.val[1] = multiplyDiv255(backdrop.val[1], alpha)
|
||||||
|
blended.val[2] = multiplyDiv255(backdrop.val[2], alpha)
|
||||||
|
blended.val[3] = multiplyDiv255(backdrop.val[3], alpha)
|
||||||
|
vst4q_u8(line[i].addr, blended)
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
|
||||||
proc blendLineMaskNeon*(
|
proc blendLineMaskNeon*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 15) != 0:
|
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -482,22 +625,12 @@ proc blendLineMaskNeon*(
|
||||||
if mask == uint64.high:
|
if mask == uint64.high:
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
template multiply(c, a: uint8x8): uint8x8 =
|
|
||||||
let ca = vmull_u8(c, a)
|
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
|
||||||
|
|
||||||
template multiply(c, a: uint8x16): uint8x16 =
|
|
||||||
vcombine_u8(
|
|
||||||
multiply(vget_low_u8(c), vget_low_u8(a)),
|
|
||||||
multiply(vget_high_u8(c), vget_high_u8(a))
|
|
||||||
)
|
|
||||||
|
|
||||||
let backdrop = vld4q_u8(a[i].addr)
|
let backdrop = vld4q_u8(a[i].addr)
|
||||||
var blended: uint8x16x4
|
var blended: uint8x16x4
|
||||||
blended.val[0] = multiply(backdrop.val[0], source.val[3])
|
blended.val[0] = multiplyDiv255(backdrop.val[0], source.val[3])
|
||||||
blended.val[1] = multiply(backdrop.val[1], source.val[3])
|
blended.val[1] = multiplyDiv255(backdrop.val[1], source.val[3])
|
||||||
blended.val[2] = multiply(backdrop.val[2], source.val[3])
|
blended.val[2] = multiplyDiv255(backdrop.val[2], source.val[3])
|
||||||
blended.val[3] = multiply(backdrop.val[3], source.val[3])
|
blended.val[3] = multiplyDiv255(backdrop.val[3], source.val[3])
|
||||||
vst4q_u8(a[i].addr, blended)
|
vst4q_u8(a[i].addr, blended)
|
||||||
|
|
||||||
i += 16
|
i += 16
|
||||||
|
@ -505,5 +638,66 @@ proc blendLineMaskNeon*(
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageMaskNeon*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
alpha = vmovq_n_u8(rgbx.a)
|
||||||
|
vecZero = vmovq_n_u8(0)
|
||||||
|
vec255 = vmovq_n_u8(255)
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = vld1q_u8(coverages[i].addr)
|
||||||
|
eqZero = vceqq_u8(coverage, vecZero)
|
||||||
|
eq255 = vceqq_u8(coverage, vec255)
|
||||||
|
maskZero = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eqZero), vget_high_u8(eqZero)
|
||||||
|
)), 0)
|
||||||
|
mask255 = vget_lane_u64(cast[uint64x1](
|
||||||
|
vand_u8(vget_low_u8(eq255), vget_high_u8(eq255)
|
||||||
|
)), 0)
|
||||||
|
if maskZero == uint64.high:
|
||||||
|
vst1q_u8(line[i].addr, vecZero)
|
||||||
|
vst1q_u8(line[i + 4].addr, vecZero)
|
||||||
|
vst1q_u8(line[i + 8].addr, vecZero)
|
||||||
|
vst1q_u8(line[i + 12].addr, vecZero)
|
||||||
|
elif mask255 == uint64.high and rgbx.a == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
let
|
||||||
|
backdrop = vld4q_u8(line[i].addr)
|
||||||
|
alpha = multiplyDiv255(alpha, coverage)
|
||||||
|
var blended: uint8x16x4
|
||||||
|
blended.val[0] = multiplyDiv255(backdrop.val[0], alpha)
|
||||||
|
blended.val[1] = multiplyDiv255(backdrop.val[1], alpha)
|
||||||
|
blended.val[2] = multiplyDiv255(backdrop.val[2], alpha)
|
||||||
|
blended.val[3] = multiplyDiv255(backdrop.val[3], alpha)
|
||||||
|
vst4q_u8(line[i].addr, blended)
|
||||||
|
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -528,7 +528,7 @@ proc blendLineCoverageOverwriteSse2*(
|
||||||
coverages: ptr UncheckedArray[uint8],
|
coverages: ptr UncheckedArray[uint8],
|
||||||
rgbx: ColorRGBX,
|
rgbx: ColorRGBX,
|
||||||
len: int
|
len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
let coverage = coverages[i]
|
let coverage = coverages[i]
|
||||||
|
@ -691,7 +691,7 @@ proc blendLineMaskSse2*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -721,7 +721,7 @@ proc blendLineCoverageMaskSse2*(
|
||||||
len: int
|
len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
let coverage = coverages[i]
|
let coverage = coverages[i]
|
||||||
if coverage == 0:
|
if coverage == 0:
|
||||||
line[i] = rgbx(0, 0, 0, 0)
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
|
Loading…
Reference in a new issue