From 04c7bd87d864a4e6aa278af500af74495511e03e Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 15:05:34 -0600 Subject: [PATCH 1/5] shadow, spread benchmarks --- tests/benchmark_images_draw.nim | 17 ++++++ tests/benchmark_images_shadows.nim | 91 ------------------------------ tests/benchmark_masks.nim | 5 ++ 3 files changed, 22 insertions(+), 91 deletions(-) delete mode 100644 tests/benchmark_images_shadows.nim diff --git a/tests/benchmark_images_draw.nim b/tests/benchmark_images_draw.nim index 8b087e5..53ff723 100644 --- a/tests/benchmark_images_draw.nim +++ b/tests/benchmark_images_draw.nim @@ -43,3 +43,20 @@ block: timeIt "draw big-on-bigger Smooth bmNormal": a.draw(b, translate(vec2(25.2, 25.2)), bmNormal) keep(b) + +block: + let + a = newImage(100, 100) + b = newImage(50, 50) + + timeIt "shadow": + b.fill(rgba(0, 0, 0, 255)) + a.draw(b, vec2(25, 25)) + + let shadow = a.shadow( + offset = vec2(0, 0), + spread = 10, + blur = 10, + color = rgba(0, 0, 0, 255) + ) + keep(shadow) diff --git a/tests/benchmark_images_shadows.nim b/tests/benchmark_images_shadows.nim deleted file mode 100644 index 593b6c2..0000000 --- a/tests/benchmark_images_shadows.nim +++ /dev/null @@ -1,91 +0,0 @@ -import pixie, chroma, vmath, benchy - -block: - - var a = newImage(100, 100) - var b = newImage(50, 50) - - timeIt "spread": - a.fill(rgba(0, 0, 0, 0)) - b.fill(rgba(0, 0, 0, 255)) - a.draw(b, vec2(25, 25)) - - a.spread(spread = 10) - - b = newImage(50, 50) - b.fill(rgba(255, 255, 255, 255)) - a.draw(b, vec2(25, 25)) - - # a.writeFile("tests/images/spread1.png") - -block: - var a = newImage(100, 100) - var b = newImage(50, 50) - - timeIt "blur": - a.fill(rgba(0, 0, 0, 0)) - b.fill(rgba(255, 255, 255, 255)) - a.draw(b, vec2(25, 25)) - - a.blur(radius = 10) - - b = newImage(50, 50) - b.fill(rgba(255, 255, 255, 255)) - a.draw(b, vec2(25, 25)) - - # a.writeFile("tests/images/blur1.png") - -block: - var shadow: Image - var a = newImage(100, 100) - var b = newImage(50, 50) - - timeIt "shadow": - a.fill(rgba(0, 0, 0, 0)) - b.fill(rgba(0, 0, 0, 255)) - a.draw(b, vec2(25, 25)) - - shadow = a.shadow( - offset = vec2(0, 0), - spread = 10, - blur = 10, - color = rgba(0, 0, 0, 255) - ) - - b = newImage(50, 50) - b.fill(rgba(255, 255, 255, 255)) - shadow.draw(b, vec2(25, 25)) - keep(shadow) - - # shadow.writeFile("tests/images/shadow1.png") - - -# import print -# timeIt "Shadow Stops": -# var tmp = 0 -# var shadow: Image -# for i in 0 ..< 1: -# var a = newImage(10, 200) -# var b = newImage(50, 50) -# b.fill(rgba(0, 0, 0, 255)) -# a.draw(b, vec2(-25, -25)) - -# for spread in 0 .. 0: -# let spread = spread.float -# for blur in 0 .. 10: -# let blur = blur.float -# print spread, blur - -# shadow = a.shadow( -# offset = vec2(0, 0), spread = spread, blur = blur, color = rgba(0, 0, 0, 255)) - -# for y in 25 ..< (25 + spread + blur).int: -# echo y - 25, ":", shadow[5, y].a - -# b = newImage(50, 50) -# b.fill(rgba(255, 255, 255, 255)) -# shadow.draw(b, vec2(-25, -25)) - -# tmp += shadow.width * shadow.height -# shadow.writeFile("tests/images/shadowStops.png") -# echo tmp diff --git a/tests/benchmark_masks.nim b/tests/benchmark_masks.nim index c7f22a2..2c04b3d 100644 --- a/tests/benchmark_masks.nim +++ b/tests/benchmark_masks.nim @@ -30,3 +30,8 @@ reset() timeIt "ceil": mask.ceil() + +reset() + +timeIt "spread": + mask.spread(10) From 70ab79b607d33c8046c33eed7402281868ee760e Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 15:29:11 -0600 Subject: [PATCH 2/5] draw uber takes Image | Mask + works w/o simd so far --- src/pixie/images.nim | 211 ++++++++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 93 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index b86fce2..1a27c50 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -533,9 +533,7 @@ proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA = lerp(bottomMix, topMix, diffY) -proc drawCorrect( - a: Image | Mask, b: Image | Mask, mat = mat3(), blendMode = bmNormal -) = +proc drawCorrect(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = ## Draws one image onto another using matrix with color blending. when type(a) is Image: @@ -587,36 +585,49 @@ proc drawCorrect( let sample = b.getValueSmooth(xFloat, yFloat) a.setValueUnsafe(x, y, masker(backdrop, sample)) -proc draw*(image: Image, mask: Mask, mat: Mat3, blendMode = bmMask) = - image.drawCorrect(mask, mat, blendMode) +proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = + let + corners = [ + mat * vec2(0, 0), + mat * vec2(b.width.float32, 0), + mat * vec2(b.width.float32, b.height.float32), + mat * vec2(0, b.height.float32) + ] + perimeter = [ + segment(corners[0], corners[1]), + segment(corners[1], corners[2]), + segment(corners[2], corners[3]), + segment(corners[3], corners[0]) + ] -proc draw*( - image: Image, mask: Mask, pos = vec2(0, 0), blendMode = bmMask -) {.inline.} = - image.drawCorrect(mask, translate(pos), blendMode) + var + matInv = mat.inverse() + # Compute movement vectors + p = matInv * vec2(0 + h, 0 + h) + dx = matInv * vec2(1 + h, 0 + h) - p + dy = matInv * vec2(0 + h, 1 + h) - p + minFilterBy2 = max(dx.length, dy.length) + b = b -proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) = - a.drawCorrect(b, mat, blendMode) + while minFilterBy2 > 2.0: + b = b.minifyBy2() + p /= 2 + dx /= 2 + dy /= 2 + minFilterBy2 /= 2 + matInv = matInv * scale(vec2(0.5, 0.5)) -proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} = - a.draw(b, translate(pos), blendMode) + let smooth = not( + dx.length == 1.0 and + dy.length == 1.0 and + mat[2, 0].fractional == 0.0 and + mat[2, 1].fractional == 0.0 + ) -proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) = - mask.drawCorrect(image, mat, blendMode) - -proc draw*( - mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask -) {.inline.} = - mask.draw(image, translate(pos), blendMode) - -proc drawUber( - a, b: Image, - p, dx, dy: Vec2, - perimeter: array[0..3, Segment], - blendMode: BlendMode, - smooth: bool -) = - let blender = blendMode.blender() + when type(a) is Image: + let blender = blendMode.blender() + else: # a is a Mask + let masker = blendMode.masker() # Determine where we should start and stop drawing in the y dimension var yMin, yMax: int @@ -662,89 +673,103 @@ proc drawUber( srcPos = p + dx * x.float32 + dy * y.float32 xFloat = srcPos.x - h yFloat = srcPos.y - h - backdrop = a.getRgbaUnsafe(x, y) - source = b.getRgbaSmooth(xFloat, yFloat) - a.setRgbaUnsafe(x, y, blender(backdrop, source)) + when type(a) is Image: + let backdrop = a.getRgbaUnsafe(x, y) + when type(b) is Image: + let + sample = b.getRgbaSmooth(xFloat, yFloat) + blended = blender(backdrop, sample) + else: # b is a Mask + let + sample = b.getValueSmooth(xFloat, yFloat) + blended = blender(backdrop, rgba(0, 0, 0, sample)) + a.setRgbaUnsafe(x, y, blended) + else: # a is a Mask + let backdrop = a.getValueUnsafe(x, y) + when type(b) is Image: + let sample = b.getRgbaSmooth(xFloat, yFloat).a + else: # b is a Mask + let sample = b.getValueSmooth(xFloat, yFloat) + a.setValueUnsafe(x, y, masker(backdrop, sample)) else: var x = xMin - when defined(amd64) and not defined(pixieNoSimd): - if blendMode.hasSimdBlender(): - if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: - # Check we are not rotated before using SIMD blends - let blenderSimd = blendMode.blenderSimd() - for _ in countup(x, xMax - 4, 4): - let - srcPos = p + dx * x.float32 + dy * y.float32 - sx = srcPos.x.int - sy = srcPos.y.int - backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) - source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) - mm_storeu_si128( - a.data[a.dataIndex(x, y)].addr, - blenderSimd(backdrop, source) - ) - x += 4 + # when defined(amd64) and not defined(pixieNoSimd): + # if blendMode.hasSimdBlender(): + # if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: + # # Check we are not rotated before using SIMD blends + # let blenderSimd = blendMode.blenderSimd() + # for _ in countup(x, xMax - 4, 4): + # let + # srcPos = p + dx * x.float32 + dy * y.float32 + # sx = srcPos.x.int + # sy = srcPos.y.int + # backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) + # source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) + # mm_storeu_si128( + # a.data[a.dataIndex(x, y)].addr, + # blenderSimd(backdrop, source) + # ) + # x += 4 for _ in x ..< xMax: let srcPos = p + dx * x.float32 + dy * y.float32 xFloat = srcPos.x - h yFloat = srcPos.y - h - backdrop = a.getRgbaUnsafe(x, y) - source = b.getRgbaUnsafe(xFloat.int, yFloat.int) - a.setRgbaUnsafe(x, y, blender(backdrop, source)) + + when type(a) is Image: + let backdrop = a.getRgbaUnsafe(x, y) + when type(b) is Image: + let + sample = b.getRgbaUnsafe(xFloat.int, yFloat.int) + blended = blender(backdrop, sample) + else: # b is a Mask + let + sample = b.getValueUnsafe(xFloat.int, yFloat.int) + blended = blender(backdrop, rgba(0, 0, 0, sample)) + a.setRgbaUnsafe(x, y, blended) + else: # a is a Mask + let backdrop = a.getValueUnsafe(x, y) + when type(b) is Image: + let sample = b.getRgbaUnsafe(xFloat.int, yFloat.int).a + else: # b is a Mask + let sample = b.getValueUnsafe(xFloat.int, yFloat.int) + a.setValueUnsafe(x, y, masker(backdrop, sample)) inc x if blendMode == bmIntersectMask: if a.width - xMax > 0: zeroMem(a.data[a.dataIndex(xMax, y)].addr, 4 * (a.width - xMax)) -proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal) = +proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal) {.inline.} = ## Draws one image onto another using matrix with color blending. - - let - corners = [ - mat * vec2(0, 0), - mat * vec2(b.width.float32, 0), - mat * vec2(b.width.float32, b.height.float32), - mat * vec2(0, b.height.float32) - ] - perimeter = [ - segment(corners[0], corners[1]), - segment(corners[1], corners[2]), - segment(corners[2], corners[3]), - segment(corners[3], corners[0]) - ] - - var - matInv = mat.inverse() - # Compute movement vectors - p = matInv * vec2(0 + h, 0 + h) - dx = matInv * vec2(1 + h, 0 + h) - p - dy = matInv * vec2(0 + h, 1 + h) - p - minFilterBy2 = max(dx.length, dy.length) - b = b - - while minFilterBy2 > 2.0: - b = b.minifyBy2() - p /= 2 - dx /= 2 - dy /= 2 - minFilterBy2 /= 2 - matInv = matInv * scale(vec2(0.5, 0.5)) - - let smooth = not( - dx.length == 1.0 and - dy.length == 1.0 and - mat[2, 0].fractional == 0.0 and - mat[2, 1].fractional == 0.0 - ) - - a.drawUber(b, p, dx, dy, perimeter, blendMode, smooth) + a.drawUber(b, mat, blendMode) proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.} = a.draw(b, translate(pos), blendMode) +proc draw*(image: Image, mask: Mask, mat: Mat3, blendMode = bmMask) {.inline.} = + image.drawUber(mask, mat, blendMode) + +proc draw*( + image: Image, mask: Mask, pos = vec2(0, 0), blendMode = bmMask +) {.inline.} = + image.drawUber(mask, translate(pos), blendMode) + +proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) {.inline.} = + a.drawUber(b, mat, blendMode) + +proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} = + a.draw(b, translate(pos), blendMode) + +proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) {.inline.} = + mask.drawUber(image, mat, blendMode) + +proc draw*( + mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask +) {.inline.} = + mask.draw(image, translate(pos), blendMode) + proc resize*(srcImage: Image, width, height: int): Image = if width == srcImage.width and height == srcImage.height: result = srcImage.copy() From 243f9a3ba18162600fddb9b2273c686c12f09a46 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 22:12:12 -0600 Subject: [PATCH 3/5] better names --- src/pixie/paths.nim | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 1c41baa..e8690e0 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -965,8 +965,8 @@ proc fillShapes( when defined(amd64) and not defined(pixieNoSimd): # When supported, SIMD blend as much as possible let - coverageMask1 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits - coverageMask2 = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r` + first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits + redMask = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r` oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) v255 = mm_set1_epi32(255) @@ -974,7 +974,7 @@ proc fillShapes( for _ in countup(x, coverages.len - 16, 16): var coverage = mm_loadu_si128(coverages[x].addr) - coverage = mm_and_si128(coverage, coverageMask1) + coverage = mm_and_si128(coverage, first32) let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) if mm_movemask_epi8(eqZero) != 0xffff: @@ -985,10 +985,10 @@ proc fillShapes( coverage = mm_shuffle_epi32(coverage, MM_SHUFFLE(1, 1, 0, 0)) var - a = mm_and_si128(coverage, coverageMask1) - b = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 4)) - c = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 8)) - d = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 12)) + a = mm_and_si128(coverage, first32) + b = mm_and_si128(coverage, mm_slli_si128(first32, 4)) + c = mm_and_si128(coverage, mm_slli_si128(first32, 8)) + d = mm_and_si128(coverage, mm_slli_si128(first32, 12)) # Shift the coverages to `r` a = mm_srli_si128(a, 2) @@ -997,7 +997,7 @@ proc fillShapes( coverage = mm_and_si128( mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)), - coverageMask2 + redMask ) if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff: From debceb255a3f99717f65e5d4b41bf1580a499990 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 22:12:30 -0600 Subject: [PATCH 4/5] image + (image|mask) works --- src/pixie/images.nim | 69 +++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 1a27c50..f83d1b3 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -693,23 +693,58 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = a.setValueUnsafe(x, y, masker(backdrop, sample)) else: var x = xMin - # when defined(amd64) and not defined(pixieNoSimd): - # if blendMode.hasSimdBlender(): - # if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: - # # Check we are not rotated before using SIMD blends - # let blenderSimd = blendMode.blenderSimd() - # for _ in countup(x, xMax - 4, 4): - # let - # srcPos = p + dx * x.float32 + dy * y.float32 - # sx = srcPos.x.int - # sy = srcPos.y.int - # backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) - # source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) - # mm_storeu_si128( - # a.data[a.dataIndex(x, y)].addr, - # blenderSimd(backdrop, source) - # ) - # x += 4 + when defined(amd64) and not defined(pixieNoSimd): + if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: + # Check we are not rotated before using SIMD blends + when type(a) is Image: + if blendMode.hasSimdBlender(): + let + blenderSimd = blendMode.blenderSimd() + first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits + alphaMask = mm_set1_epi32(cast[int32](0xff000000)) # Only `a` + for _ in countup(x, xMax - 4, 4): + let + srcPos = p + dx * x.float32 + dy * y.float32 + sx = srcPos.x.int + sy = srcPos.y.int + backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) + when type(b) is Image: + let source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) + else: # b is a Mask + # Need to move 4 mask values into the alpha slots + var source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) + source = mm_slli_si128(source, 2) + source = mm_shuffle_epi32(source, MM_SHUFFLE(1, 1, 0, 0)) + + var + i = mm_and_si128(source, first32) + j = mm_and_si128(source, mm_slli_si128(first32, 4)) + k = mm_and_si128(source, mm_slli_si128(first32, 8)) + l = mm_and_si128(source, mm_slli_si128(first32, 12)) + + # Shift the values to `a` + i = mm_slli_si128(i, 1) + k = mm_slli_si128(k, 3) + l = mm_slli_si128(l, 2) + + source = mm_and_si128( + mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)), + alphaMask + ) + + mm_storeu_si128( + a.data[a.dataIndex(x, y)].addr, + blenderSimd(backdrop, source) + ) + x += 4 + + else: # is a Mask + if blendMode.hasSimdMasker(): + let maskerSimd = blendMode.maskerSimd() + when type(b) is Image: + discard + else: # b is a Mask + discard for _ in x ..< xMax: let From 64e5016df6a3bd3fb64b352c2e267d3a6eea2722 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 22:36:04 -0600 Subject: [PATCH 5/5] draw mask.image simd --- src/pixie/blends.nim | 11 +++++++++ src/pixie/images.nim | 56 ++++++++++++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 5ddeacd..ff84f34 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -505,6 +505,17 @@ when defined(amd64) and not defined(pixieNoSimd): BlenderSimd* = proc(blackdrop, source: M128i): M128i MaskerSimd* = proc(blackdrop, source: M128i): M128i + proc packAlphaValues*(v: M128i): M128i {.inline.} = + ## Shuffle the alpha values for these 4 colors to the first 4 bytes + result = mm_srli_epi32(v, 24) + let + i = mm_srli_si128(result, 3) + j = mm_srli_si128(result, 6) + k = mm_srli_si128(result, 9) + first32 = cast[M128i]([uint32.high, 0, 0, 0]) + result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k)) + result = mm_and_si128(result, first32) + proc blendNormalSimd*(backdrop, source: M128i): M128i = let alphaMask = mm_set1_epi32(cast[int32](0xff000000)) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index f83d1b3..886bfcd 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -476,8 +476,6 @@ proc newMask*(image: Image): Mask = var i: int when defined(amd64) and not defined(pixieNoSimd): - let mask32 = cast[M128i]([uint32.high, 0, 0, 0]) - for _ in countup(0, image.data.len - 16, 16): var a = mm_loadu_si128(image.data[i + 0].addr) @@ -485,20 +483,10 @@ proc newMask*(image: Image): Mask = c = mm_loadu_si128(image.data[i + 8].addr) d = mm_loadu_si128(image.data[i + 12].addr) - template pack(v: var M128i) = - # Shuffle the alpha values for these 4 colors to the first 4 bytes - v = mm_srli_epi32(v, 24) - let - i = mm_srli_si128(v, 3) - j = mm_srli_si128(v, 6) - k = mm_srli_si128(v, 9) - v = mm_or_si128(mm_or_si128(v, i), mm_or_si128(j, k)) - v = mm_and_si128(v, mask32) - - pack(a) - pack(b) - pack(c) - pack(d) + a = packAlphaValues(a) + b = packAlphaValues(b) + c = packAlphaValues(c) + d = packAlphaValues(d) b = mm_slli_si128(b, 4) c = mm_slli_si128(c, 8) @@ -741,10 +729,38 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = else: # is a Mask if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd() - when type(b) is Image: - discard - else: # b is a Mask - discard + for _ in countup(x, xMax - 16, 16): + let + srcPos = p + dx * x.float32 + dy * y.float32 + sx = srcPos.x.int + sy = srcPos.y.int + backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) + when type(b) is Image: + # Need to read 16 colors and pack their alpha values + var + i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr) + j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr) + k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr) + l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr) + + i = packAlphaValues(i) + j = packAlphaValues(j) + k = packAlphaValues(k) + l = packAlphaValues(l) + + j = mm_slli_si128(j, 4) + k = mm_slli_si128(k, 8) + l = mm_slli_si128(l, 12) + + let source = mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)) + else: # b is a Mask + let source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) + + mm_storeu_si128( + a.data[a.dataIndex(x, y)].addr, + maskerSimd(backdrop, source) + ) + x += 16 for _ in x ..< xMax: let