fillCoverage refactor

This commit is contained in:
Ryan Oldenburg 2022-06-17 00:04:31 -05:00
parent 460d97c520
commit 1f953f975a

View file

@ -1337,117 +1337,158 @@ proc fillCoverage(
blendMode: BlendMode blendMode: BlendMode
) = ) =
var x = startX var x = startX
when defined(amd64) and allowSimd:
if blendMode.hasSimdBlender(): when allowSimd:
# When supported, SIMD blend as much as possible when defined(amd64):
proc source(colorVec, coverageVec: M128i): M128i {.inline.} =
let let
blenderSimd = blendMode.blenderSimd()
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vecZero = mm_setzero_si128()
colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< coverages.len div 16:
let
index = image.dataIndex(x, y)
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff:
# If the coverages are not all zero
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
# If the coverages are all 255
if blendMode == OverwriteBlend:
for i in 0 ..< 4:
mm_storeu_si128(image.data[index + i * 4].addr, colorVec)
elif blendMode == NormalBlend:
if rgbx.a == 255:
for i in 0 ..< 4:
mm_storeu_si128(image.data[index + i * 4].addr, colorVec)
else:
for i in 0 ..< 4:
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
mm_storeu_si128(
image.data[index + i * 4].addr,
blendNormalSimd(backdrop, colorVec)
)
else:
for i in 0 ..< 4:
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
mm_storeu_si128(
image.data[index + i * 4].addr,
blenderSimd(backdrop, colorVec)
)
else:
# Coverages are not all 255
template useCoverage(blendProc: untyped) =
var coverageVec = coverageVec
for i in 0 ..< 4:
var unpacked = unpackAlphaValues(coverageVec) var unpacked = unpackAlphaValues(coverageVec)
# Shift the coverages from `a` to `g` and `a` for multiplying
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
var var
source = colorVec sourceEven = mm_slli_epi16(colorVec, 8)
sourceEven = mm_slli_epi16(source, 8) sourceOdd = mm_and_si128(colorVec, oddMask)
sourceOdd = mm_and_si128(source, oddMask)
sourceEven = mm_mulhi_epu16(sourceEven, unpacked) sourceEven = mm_mulhi_epu16(sourceEven, unpacked)
sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked) sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
result = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) iterator simd(coverages: seq[uint8], x: var int, startX: int): (M128i, bool, bool) =
for _ in 0 ..< coverages.len div 16:
if blendMode == OverwriteBlend: let
mm_storeu_si128(image.data[index + i * 4].addr, source) coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
else: eqZero = mm_cmpeq_epi8(coverageVec, mm_setzero_si128())
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(cast[int8](255)))
mm_storeu_si128( allZeroes = mm_movemask_epi8(eqZero) == 0xffff
image.data[index + i * 4].addr, all255 = mm_movemask_epi8(eq255) == 0xffff
blendProc(backdrop, source) yield (coverageVec, allZeroes, all255)
)
coverageVec = mm_srli_si128(coverageVec, 4)
if blendMode == NormalBlend:
useCoverage(blendNormalSimd)
else:
useCoverage(blenderSimd)
elif blendMode == MaskBlend:
for i in 0 ..< 4:
mm_storeu_si128(image.data[index + i * 4].addr, vecZero)
x += 16 x += 16
let colorVec = mm_set1_epi32(cast[int32](rgbx))
proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
if coverage > 0:
if coverage == 255:
result = rgbx
else:
result = rgbx(
((rgbx.r.uint32 * coverage) div 255).uint8,
((rgbx.g.uint32 * coverage) div 255).uint8,
((rgbx.b.uint32 * coverage) div 255).uint8,
((rgbx.a.uint32 * coverage) div 255).uint8
)
case blendMode:
of OverwriteBlend:
when allowSimd:
when defined(amd64):
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if all255:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec)
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let source = source(colorVec, coverageVec)
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, source)
coverageVec = mm_srli_si128(coverageVec, 4)
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
image.unsafe[x, y] = source(rgbx, coverage)
of NormalBlend:
when allowSimd:
when defined(amd64):
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if all255 and rgbx.a == 255:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec)
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let
backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr)
source = source(colorVec, coverageVec)
mm_storeu_si128(
image.unsafe[x + i * 4, y].addr,
blendNormalSimd(backdrop, source)
)
coverageVec = mm_srli_si128(coverageVec, 4)
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 255 and rgbx.a == 255:
image.unsafe[x, y] = rgbx
else:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendNormal(backdrop, source(rgbx, coverage))
of MaskBlend:
when allowSimd:
when defined(amd64):
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if all255:
discard
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let
backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr)
source = source(colorVec, coverageVec)
mm_storeu_si128(
image.unsafe[x + i * 4, y].addr,
blendMaskSimd(backdrop, source)
)
coverageVec = mm_srli_si128(coverageVec, 4)
else:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, mm_setzero_si128())
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 0:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
elif coverage == 255:
discard
else:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendMask(backdrop, source(rgbx, coverage))
image.clearUnsafe(0, y, startX, y)
image.clearUnsafe(startX + coverages.len, y, image.width, y)
of SubtractMaskBlend:
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 255 and rgbx.a == 255:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
elif coverage != 0:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendSubtractMask(backdrop, source(rgbx, coverage))
of ExcludeMaskBlend:
for x in x ..< startX + coverages.len:
let
coverage = coverages[x - startX]
backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendExcludeMask(backdrop, source(rgbx, coverage))
else:
let blender = blendMode.blender() let blender = blendMode.blender()
for x in x ..< startX + coverages.len: for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX] let coverage = coverages[x - startX]
if coverage != 0 or blendMode == ExcludeMaskBlend: if coverage != 0:
if blendMode == NormalBlend and coverage == 255 and rgbx.a == 255:
# Skip blending
image.unsafe[x, y] = rgbx
continue
var source = rgbx
if coverage != 255:
source.r = ((source.r.uint32 * coverage) div 255).uint8
source.g = ((source.g.uint32 * coverage) div 255).uint8
source.b = ((source.b.uint32 * coverage) div 255).uint8
source.a = ((source.a.uint32 * coverage) div 255).uint8
if blendMode == OverwriteBlend:
image.unsafe[x, y] = source
else:
let backdrop = image.unsafe[x, y] let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blender(backdrop, source) image.unsafe[x, y] = blender(backdrop, source(rgbx, coverage))
elif blendMode == MaskBlend:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
if blendMode == MaskBlend:
image.clearUnsafe(0, y, startX, y)
image.clearUnsafe(startX + coverages.len, y, image.width, y)
proc fillCoverage( proc fillCoverage(
mask: Mask, mask: Mask,
@ -1456,45 +1497,83 @@ proc fillCoverage(
blendMode: BlendMode blendMode: BlendMode
) = ) =
var x = startX var x = startX
when defined(amd64) and allowSimd:
if blendMode.hasSimdMaskBlender(): template simdBlob(blendProc: untyped) =
let when allowSimd:
maskerSimd = blendMode.maskBlenderSimd() when defined(amd64):
vecZero = mm_setzero_si128()
for _ in 0 ..< coverages.len div 16: for _ in 0 ..< coverages.len div 16:
let let
index = mask.dataIndex(x, y) coveragesVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr) eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128())
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: allZeroes = mm_movemask_epi8(eqZero) == 0xffff
# If the coverages are not all zero if not allZeroes:
if blendMode == OverwriteBlend: let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
mm_storeu_si128(mask.data[index].addr, coverageVec)
else:
let backdrop = mm_loadu_si128(mask.data[index].addr)
mm_storeu_si128( mm_storeu_si128(
mask.data[index].addr, mask.unsafe[x, y].addr,
maskerSimd(backdrop, coverageVec) blendProc(backdrop, coveragesVec)
) )
elif blendMode == MaskBlend:
mm_storeu_si128(mask.data[index].addr, vecZero)
x += 16 x += 16
let maskBlender = blendMode.maskBlender() template blendBlob(blendProc: untyped) =
for x in x ..< startX + coverages.len: for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX] let coverage = coverages[x - startX]
if coverage != 0 or blendMode == ExcludeMaskBlend: if coverage != 0:
if blendMode == OverwriteBlend:
mask.unsafe[x, y] = coverage
else:
let backdrop = mask.unsafe[x, y] let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlender(backdrop, coverage) mask.unsafe[x, y] = blendProc(backdrop, coverage)
elif blendMode == MaskBlend:
case blendMode:
of OverwriteBlend:
copyMem(
mask.unsafe[startX, y].addr,
coverages[0].unsafeAddr,
coverages.len
)
of NormalBlend:
simdBlob(maskBlendNormalSimd)
blendBlob(maskBlendNormal)
of MaskBlend:
when allowSimd:
when defined(amd64):
for _ in 0 ..< coverages.len div 16:
let
coveragesVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128())
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
if not allZeroes:
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
mm_storeu_si128(
mask.unsafe[x, y].addr,
maskBlendMaskSimd(backdrop, coveragesVec)
)
else:
mm_storeu_si128(mask.unsafe[x, y].addr, mm_setzero_si128())
x += 16
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendMask(backdrop, coverage)
else:
mask.unsafe[x, y] = 0 mask.unsafe[x, y] = 0
if blendMode == MaskBlend:
mask.clearUnsafe(0, y, startX, y) mask.clearUnsafe(0, y, startX, y)
mask.clearUnsafe(startX + coverages.len, y, mask.width, y) mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
of SubtractMaskBlend:
simdBlob(maskBlendSubtractSimd)
blendBlob(maskBlendSubtract)
of ExcludeMaskBlend:
simdBlob(maskBlendExcludeSimd)
blendBlob(maskBlendExclude)
else:
let maskBlender = blendMode.maskBlender()
blendBlob(maskBlender)
proc fillHits( proc fillHits(
image: Image, image: Image,
rgbx: ColorRGBX, rgbx: ColorRGBX,