diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 9f0e652..4a5e49b 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -11,8 +11,8 @@ when defined(amd64) and allowSimd: type Blender* = proc(backdrop, source: ColorRGBX): ColorRGBX {.gcsafe, raises: [].} ## Function signature returned by blender. - Masker* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].} - ## Function signature returned by masker. + MaskBlender* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].} + ## Function signature returned by maskBlender. when defined(release): {.push checks: off.} @@ -484,29 +484,29 @@ proc maskBlendExclude*(backdrop, source: uint8): uint8 {.inline.} = ## Exclude blend masks max(backdrop, source) - min(backdrop, source) -proc maskBlendNormalMasker(backdrop, source: uint8): uint8 = +proc maskBlendNormalMaskBlender(backdrop, source: uint8): uint8 = maskBlendNormal(backdrop, source) -proc maskBlendMaskMasker(backdrop, source: uint8): uint8 = +proc maskBlendMaskMaskBlender(backdrop, source: uint8): uint8 = maskBlendMask(backdrop, source) -proc maskBlendSubtractMasker(backdrop, source: uint8): uint8 = +proc maskBlendSubtractMaskBlender(backdrop, source: uint8): uint8 = maskBlendSubtract(backdrop, source) -proc maskBlendExcludeMasker(backdrop, source: uint8): uint8 = +proc maskBlendExcludeMaskBlender(backdrop, source: uint8): uint8 = maskBlendExclude(backdrop, source) -proc maskBlendOverwriteMasker(backdrop, source: uint8): uint8 = +proc maskBlendOverwriteMaskBlender(backdrop, source: uint8): uint8 = source -proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} = +proc maskBlender*(blendMode: BlendMode): MaskBlender {.raises: [PixieError].} = ## Returns a blend masking function for a given blend masking mode. case blendMode: - of NormalBlend: maskBlendNormalMasker - of MaskBlend: maskBlendMaskMasker - of OverwriteBlend: maskBlendOverwriteMasker - of SubtractMaskBlend: maskBlendSubtractMasker - of ExcludeMaskBlend: maskBlendExcludeMasker + of NormalBlend: maskBlendNormalMaskBlender + of MaskBlend: maskBlendMaskMaskBlender + of OverwriteBlend: maskBlendOverwriteMaskBlender + of SubtractMaskBlend: maskBlendSubtractMaskBlender + of ExcludeMaskBlend: maskBlendExcludeMaskBlender else: raise newException(PixieError, "No masker for " & $blendMode) @@ -647,24 +647,63 @@ when defined(amd64) and allowSimd: mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) - proc maskBlendNormalSimdMasker(backdrop, source: M128i): M128i = + proc maskBlendSubtractSimd*(backdrop, source: M128i): M128i {.inline.} = + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + vec255 = mm_set1_epi8(255) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + let sourceMinus255 = mm_sub_epi8(vec255, source) + + var + multiplierEven = mm_slli_epi16(sourceMinus255, 8) + multiplierOdd = mm_and_si128(sourceMinus255, oddMask) + backdropEven = mm_slli_epi16(backdrop, 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + backdropEven = mm_mulhi_epu16(backdropEven, multiplierEven) + backdropOdd = mm_mulhi_epu16(backdropOdd, multiplierOdd) + + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) + + proc maskBlendExcludeSimd*(backdrop, source: M128i): M128i {.inline.} = + mm_sub_epi8(mm_max_epu8(backdrop, source), mm_min_epu8(backdrop, source)) + + proc maskBlendNormalSimdMaskBlender(backdrop, source: M128i): M128i = maskBlendNormalSimd(backdrop, source) - proc maskBlendMaskSimdMasker(backdrop, source: M128i): M128i = + proc maskBlendMaskSimdMaskBlender(backdrop, source: M128i): M128i = maskBlendMaskSimd(backdrop, source) - proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} = + proc maskBlendExcludeSimdMaskBlender(backdrop, source: M128i): M128i = + maskBlendExcludeSimd(backdrop, source) + + proc maskBlendSubtractSimdMaskBlender(backdrop, source: M128i): M128i = + maskBlendSubtractSimd(backdrop, source) + + proc maskBlenderSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} = ## Returns a blend masking function with SIMD support. case blendMode: - of NormalBlend: maskBlendNormalSimdMasker - of MaskBlend: maskBlendMaskSimdMasker + of NormalBlend: maskBlendNormalSimdMaskBlender + of MaskBlend: maskBlendMaskSimdMaskBlender of OverwriteBlend: overwriteSimdBlender + of SubtractMaskBlend: maskBlendSubtractSimdMaskBlender + of ExcludeMaskBlend: maskBlendExcludeSimdMaskBlender else: raise newException(PixieError, "No SIMD masker for " & $blendMode) - proc hasSimdMasker*(blendMode: BlendMode): bool {.inline, raises: [].} = + proc hasSimdMaskBlender*(blendMode: BlendMode): bool {.inline, raises: [].} = ## Is there a blend masking function with SIMD support? - blendMode in {NormalBlend, MaskBlend, OverwriteBlend} + blendMode in { + NormalBlend, + MaskBlend, + OverwriteBlend, + SubtractMaskBlend, + ExcludeMaskBlend + } when defined(release): {.pop.} diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 1f0e26d..ac82d87 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -714,7 +714,7 @@ proc drawUber( when type(a) is Image: let blender = blendMode.blender() else: # a is a Mask - let masker = blendMode.masker() + let maskBlender = blendMode.maskBlender() if blendMode == MaskBlend: if yMin > 0: @@ -777,7 +777,7 @@ proc drawUber( let sample = b.getRgbaSmooth(srcPos.x, srcPos.y).a else: # b is a Mask let sample = b.getValueSmooth(srcPos.x, srcPos.y) - a.unsafe[x, y] = masker(backdrop, sample) + a.unsafe[x, y] = maskBlender(backdrop, sample) srcPos += dx @@ -972,8 +972,8 @@ proc drawUber( x += 16 sx += 16 else: # is a Mask - if blendMode.hasSimdMasker(): - let maskerSimd = blendMode.maskerSimd() + if blendMode.hasSimdMaskBlender(): + let maskerSimd = blendMode.maskBlenderSimd() for _ in 0 ..< (xStop - xStart) div 16: let backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) when type(b) is Image: @@ -1089,7 +1089,7 @@ proc drawUber( let sample = b.unsafe[samplePos.x, samplePos.y].a else: # b is a Mask let sample = b.unsafe[samplePos.x, samplePos.y] - a.unsafe[x, y] = masker(backdrop, sample) + a.unsafe[x, y] = maskBlender(backdrop, sample) srcPos += dx if blendMode == MaskBlend: diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index a75582b..d1f2637 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1223,6 +1223,20 @@ iterator walk( if prevAt != width.float32.fixed32 and count != 0: echo "Leak detected: ", count, " @ (", prevAt, ", ", y, ")" +iterator walkInteger( + hits: seq[(int32, int16)], + numHits: int, + windingRule: WindingRule, + y, width: int +): (int, int) = + for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): + let + fillStart = prevAt.integer + fillLen = at.integer - fillStart + if fillLen <= 0: + continue + yield (fillStart, fillLen) + proc computeCoverage( coverages: ptr UncheckedArray[uint8], hits: var seq[(Fixed32, int16)], @@ -1443,9 +1457,9 @@ proc fillCoverage( ) = var x = startX when defined(amd64) and allowSimd: - if blendMode.hasSimdMasker(): + if blendMode.hasSimdMaskBlender(): let - maskerSimd = blendMode.maskerSimd() + maskerSimd = blendMode.maskBlenderSimd() vecZero = mm_setzero_si128() for _ in 0 ..< coverages.len div 16: let @@ -1465,7 +1479,7 @@ proc fillCoverage( mm_storeu_si128(mask.data[index].addr, vecZero) x += 16 - let masker = blendMode.masker() + let maskBlender = blendMode.maskBlender() for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == ExcludeMaskBlend: @@ -1473,7 +1487,7 @@ proc fillCoverage( mask.unsafe[x, y] = coverage else: let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = masker(backdrop, coverage) + mask.unsafe[x, y] = maskBlender(backdrop, coverage) elif blendMode == MaskBlend: mask.unsafe[x, y] = 0 @@ -1481,22 +1495,6 @@ proc fillCoverage( mask.clearUnsafe(0, y, startX, y) mask.clearUnsafe(startX + coverages.len, y, mask.width, y) -template walkHits( - hits: seq[(int32, int16)], - numHits: int, - windingRule: WindingRule, - y, width: int, - inner: untyped -) = - for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): - let - fillStart {.inject.} = prevAt.integer - fillLen {.inject.} = at.integer - fillStart - if fillLen <= 0: - continue - - inner - proc fillHits( image: Image, rgbx: ColorRGBX, @@ -1506,38 +1504,36 @@ proc fillHits( windingRule: WindingRule, blendMode: BlendMode ) = - template simdBlob(image: Image, x: var int, blendProc: untyped) = + template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) = when allowSimd: when defined(amd64): let colorVec = mm_set1_epi32(cast[int32](rgbx)) - for _ in 0 ..< fillLen div 4: - let - index = image.dataIndex(x, y) - backdrop = mm_loadu_si128(image.data[index].addr) - mm_storeu_si128(image.data[index].addr, blendProc(backdrop, colorVec)) + for _ in 0 ..< len div 4: + let backdrop = mm_loadu_si128(image.unsafe[x, y].addr) + mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec)) x += 4 case blendMode: of OverwriteBlend: - walkHits hits, numHits, windingRule, y, image.width: - fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen) + for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): + fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len) of NormalBlend: - walkHits hits, numHits, windingRule, y, image.width: + for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): if rgbx.a == 255: - fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen) + fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len) else: - var x = fillStart - simdBlob(image, x, blendNormalSimd) - for x in x ..< fillStart + fillLen: + var x = start + simdBlob(image, x, len, blendNormalSimd) + for x in x ..< start + len: let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blendNormal(backdrop, rgbx) of MaskBlend: var filledTo = startX - walkHits hits, numHits, windingRule, y, image.width: + for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): block: # Clear any gap between this fill and the previous fill - let gapBetween = fillStart - filledTo + let gapBetween = start - filledTo if gapBetween > 0: fillUnsafe( image.data, @@ -1545,12 +1541,12 @@ proc fillHits( image.dataIndex(filledTo, y), gapBetween ) - filledTo = fillStart + fillLen + filledTo = start + len block: # Handle this fill if rgbx.a != 255: - var x = fillStart - simdBlob(image, x, blendMaskSimd) - for x in x ..< fillStart + fillLen: + var x = start + simdBlob(image, x, len, blendMaskSimd) + for x in x ..< start + len: let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blendMask(backdrop, rgbx) @@ -1559,8 +1555,8 @@ proc fillHits( else: let blender = blendMode.blender() - walkHits hits, numHits, windingRule, y, image.width: - for x in fillStart ..< fillStart + fillLen: + for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): + for x in start ..< start + len: let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blender(backdrop, rgbx) @@ -1572,31 +1568,44 @@ proc fillHits( windingRule: WindingRule, blendMode: BlendMode ) = + template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) = + when allowSimd: + when defined(amd64): + let vec255 = mm_set1_epi8(255) + for _ in 0 ..< len div 16: + let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr) + mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255)) + x += 16 + case blendMode: of NormalBlend, OverwriteBlend: - walkHits hits, numHits, windingRule, y, mask.width: - fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen) + for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): + fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len) of MaskBlend: var filledTo = startX - walkHits hits, numHits, windingRule,y, mask.width: - let gapBetween = fillStart - filledTo + for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): + let gapBetween = start - filledTo if gapBetween > 0: fillUnsafe(mask.data, 0, mask.dataIndex(filledTo, y), gapBetween) - filledTo = fillStart + fillLen + filledTo = start + len mask.clearUnsafe(0, y, startX, y) mask.clearUnsafe(filledTo, y, mask.width, y) of SubtractMaskBlend: - walkHits hits, numHits, windingRule, y, mask.width: - for x in fillStart ..< fillStart + fillLen: + for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): + var x = start + simdBlob(mask, x, len, maskBlendSubtractSimd) + for x in x ..< start + len: let backdrop = mask.unsafe[x, y] mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255) of ExcludeMaskBlend: - walkHits hits, numHits, windingRule, y, mask.width: - for x in fillStart ..< fillStart + fillLen: + for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): + var x = start + simdBlob(mask, x, len, maskBlendExcludeSimd) + for x in x ..< start + len: let backdrop = mask.unsafe[x, y] mask.unsafe[x, y] = maskBlendExclude(backdrop, 255)