From dd7bf9f210a53d1982d6c388fbcff55686ad826f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 31 Jul 2022 13:31:13 -0500 Subject: [PATCH] blendLine rgbx --- src/pixie/images.nim | 5 --- src/pixie/internal.nim | 5 +++ src/pixie/paths.nim | 40 ++++++++------------- src/pixie/simd/sse2.nim | 77 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 31 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 68d5f67..475e8f0 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -436,11 +436,6 @@ proc drawCorrect( blended = blender(backdrop, sample) a.unsafe[x, y] = blended -template getUncheckedArray( - image: Image, x, y: int -): ptr UncheckedArray[ColorRGBX] = - cast[ptr UncheckedArray[ColorRGBX]](image.data[image.dataIndex(x, y)].addr) - proc blitLine( a, b: ptr UncheckedArray[ColorRGBX], len: int, blender: Blender ) {.inline.} = diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 0120333..a4e9938 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -47,6 +47,11 @@ proc intersectsInside*(a, b: Segment, at: var Vec2): bool {.inline.} = at = a.at + (t * s1) return true +template getUncheckedArray*( + image: Image, x, y: int +): ptr UncheckedArray[ColorRGBX] = + cast[ptr UncheckedArray[ColorRGBX]](image.data[image.dataIndex(x, y)].addr) + proc fillUnsafe*( data: var seq[ColorRGBX], color: SomeColor, start, len: int ) {.hasSimd, raises: [].} = diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 7ffabd2..2cf7b09 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1597,6 +1597,18 @@ proc fillCoverage( image.data[dataIndex] = blender(backdrop, source(rgbx, coverage)) inc dataIndex +proc blendLineNormal( + line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int +) {.hasSimd.} = + for i in 0 ..< len: + line[i] = blendNormal(line[i], rgbx) + +proc blendLineMask( + line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int +) {.hasSimd.} = + for i in 0 ..< len: + line[i] = blendMask(line[i], rgbx) + proc fillHits( image: Image, rgbx: ColorRGBX, @@ -1607,19 +1619,6 @@ proc fillHits( blendMode: BlendMode, maskClears = true ) = - template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) = - when allowSimd: - when defined(amd64): - var p = cast[uint](image.data[image.dataIndex(x, y)].addr) - let - iterations = len div 4 - colorVec = mm_set1_epi32(cast[int32](rgbx)) - for _ in 0 ..< iterations: - let backdrop = mm_loadu_si128(cast[pointer](p)) - mm_storeu_si128(cast[pointer](p), blendProc(backdrop, colorVec)) - p += 16 - x += iterations * 4 - case blendMode: of OverwriteBlend: for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): @@ -1630,13 +1629,7 @@ proc fillHits( if rgbx.a == 255: fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len) else: - var x = start - simdBlob(image, x, len, blendNormalSimd) - var dataIndex = image.dataIndex(x, y) - for _ in x ..< start + len: - let backdrop = image.data[dataIndex] - image.data[dataIndex] = blendNormal(backdrop, rgbx) - inc dataIndex + blendLineNormal(image.getUncheckedArray(start, y), rgbx, len) of MaskBlend: {.linearScanEnd.} @@ -1653,12 +1646,7 @@ proc fillHits( ) block: # Handle this fill if rgbx.a != 255: - var x = start - simdBlob(image, x, len, blendMaskSimd) - var dataIndex = image.dataIndex(x, y) - for _ in x ..< start + len: - let backdrop = image.data[dataIndex] - image.data[dataIndex] = blendMask(backdrop, rgbx) + blendLineMask(image.getUncheckedArray(start, y), rgbx, len) filledTo = start + len if maskClears: diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim index cc77910..1eead85 100644 --- a/src/pixie/simd/sse2.nim +++ b/src/pixie/simd/sse2.nim @@ -527,6 +527,47 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} = result.width * 4 ) +proc blendLineNormalSse2*( + line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int +) {.simd.} = + var i: int + while (cast[uint](line[i].addr) and 15) != 0: + line[i] = blendNormal(line[i], rgbx) + inc i + + let + source = mm_set1_epi32(cast[uint32](rgbx)) + alphaMask = mm_set1_epi32(cast[int32](0xff000000)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255])) + while i < len - 4: + let backdrop = mm_load_si128(line[i].addr) + var + sourceAlpha = mm_and_si128(source, alphaMask) + backdropEven = mm_slli_epi16(backdrop, 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16)) + + let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha) + + backdropEven = mm_mulhi_epu16(backdropEven, multiplier) + backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier) + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + let added = mm_add_epi8( + source, + mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) + ) + + mm_store_si128(line[i].addr, added) + i += 4 + + for i in i ..< len: + line[i] = blendNormal(line[i], rgbx) + proc blitLineNormalSse2*( a, b: ptr UncheckedArray[ColorRGBX], len: int ) {.simd.} = @@ -576,6 +617,42 @@ proc blitLineNormalSse2*( for i in i ..< len: a[i] = blendNormal(a[i], b[i]) +proc blendLineMaskSse2*( + line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int +) {.simd.} = + var i: int + while (cast[uint](line[i].addr) and 15) != 0: + line[i] = blendMask(line[i], rgbx) + inc i + + let + source = mm_set1_epi32(cast[uint32](rgbx)) + alphaMask = mm_set1_epi32(cast[int32](0xff000000)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + while i < len - 4: + let backdrop = mm_load_si128(line[i].addr) + var + sourceAlpha = mm_and_si128(source, alphaMask) + backdropEven = mm_slli_epi16(backdrop, 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16)) + + backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha) + backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha) + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + mm_store_si128( + line[i].addr, + mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) + ) + i += 4 + + for i in i ..< len: + line[i] = blendMask(line[i], rgbx) + proc blitLineMaskSse2*( a, b: ptr UncheckedArray[ColorRGBX], len: int ) {.simd.} =