diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 4095346..5ddeacd 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -534,16 +534,41 @@ when defined(amd64) and not defined(pixieNoSimd): mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) ) + proc blendMaskSimd*(backdrop, source: M128i): M128i = + let + alphaMask = mm_set1_epi32(cast[int32](0xff000000)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + var + sourceAlpha = mm_and_si128(source, alphaMask) + backdropEven = mm_slli_epi16(backdrop, 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16)) + + backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha) + backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha) + + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) + proc blendOverwriteSimd*(backdrop, source: M128i): M128i = source proc blenderSimd*(blendMode: BlendMode): BlenderSimd = case blendMode: of bmNormal: blendNormalSimd + of bmMask: blendMaskSimd of bmOverwrite: blendOverwriteSimd else: raise newException(PixieError, "No SIMD blender for " & $blendMode) + proc hasSimdBlender*(blendMode: BlendMode): bool = + blendMode in {bmNormal, bmMask, bmOverwrite} + proc maskNormalSimd*(backdrop, source: M128i): M128i = ## Blending masks let @@ -583,12 +608,46 @@ when defined(amd64) and not defined(pixieNoSimd): mm_or_si128(blendedEven, blendedOdd) + proc maskMaskSimd*(backdrop, source: M128i): M128i = + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + var + sourceEven = mm_slli_epi16(mm_andnot_si128(oddMask, source), 8) + sourceOdd = mm_and_si128(source, oddMask) + + let + evenK = mm_sub_epi16(v255high, sourceEven) + oddK = mm_sub_epi16(v255high, sourceOdd) + + var + backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + # backdrop * k + backdropEven = mm_mulhi_epu16(backdropEven, evenK) + backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) + + # div 255 + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + backdropOdd = mm_slli_epi16(backdropOdd, 8) + + mm_or_si128(backdropEven, backdropOdd) + proc maskerSimd*(blendMode: BlendMode): MaskerSimd = case blendMode: of bmNormal: maskNormalSimd + of bmMask: maskMaskSimd of bmOverwrite: blendOverwriteSimd else: raise newException(PixieError, "No SIMD masker for " & $blendMode) + proc hasSimdMasker*(blendMode: BlendMode): bool = + blendMode in {bmNormal, bmMask, bmOverwrite} + when defined(release): {.pop.} diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 54b8eb0..b86fce2 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -369,6 +369,107 @@ proc invert*(target: Image | Mask) = for j in i ..< target.data.len: target.data[j] = (255 - target.data[j]).uint8 +proc blur*(target: Image | Mask, radius: float32) = + ## Applies Gaussian blur to the image given a radius. + let radius = round(radius).int + if radius == 0: + return + + proc gaussianLookup(radius: int): seq[uint32] = + ## Compute lookup table for 1d Gaussian kernel. + ## Values are [0, 255] * 1024. + result.setLen(radius * 2 + 1) + + var + floats = newSeq[float32](result.len) + total = 0.0 + for xb in -radius .. radius: + let + s = radius.float32 / 2.2 # 2.2 matches Figma. + x = xb.float32 + a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2)) + floats[xb + radius] = a + total += a + for xb in -radius .. radius: + floats[xb + radius] = floats[xb + radius] / total + + for i, f in floats: + result[i] = round(f * 255 * 1024).uint32 + + let lookup = gaussianLookup(radius) + + when type(target) is Image: + + template `*`(sample: ColorRGBA, a: uint32): array[4, uint32] = + [ + sample.r * a, + sample.g * a, + sample.b * a, + sample.a * a + ] + + template `+=`(values: var array[4, uint32], sample: array[4, uint32]) = + values[0] += sample[0] + values[1] += sample[1] + values[2] += sample[2] + values[3] += sample[3] + + template rgba(values: array[4, uint32]): ColorRGBA = + rgba( + (values[0] div 1024 div 255).uint8, + (values[1] div 1024 div 255).uint8, + (values[2] div 1024 div 255).uint8, + (values[3] div 1024 div 255).uint8 + ) + + # Blur in the X direction. + var blurX = newImage(target.width, target.height) + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var values: array[4, uint32] + for xb in -radius .. radius: + let + sample = target[x + xb, y] + a = lookup[xb + radius].uint32 + values += sample * a + blurX.setRgbaUnsafe(x, y, values.rgba()) + + # Blur in the Y direction. + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var values: array[4, uint32] + for yb in -radius .. radius: + let + sample = blurX[x, y + yb] + a = lookup[yb + radius].uint32 + values += sample * a + target.setRgbaUnsafe(x, y, values.rgba()) + + else: # target is a Mask + + # Blur in the X direction. + var blurX = newMask(target.width, target.height) + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var value: uint32 + for xb in -radius .. radius: + let + sample = target[x + xb, y] + a = lookup[xb + radius].uint32 + value += sample * a + blurX.setValueUnsafe(x, y, (value div 1024 div 255).uint8) + + # Blur in the Y direction and modify image. + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var value: uint32 + for yb in -radius .. radius: + let + sample = blurX[x, y + yb] + a = lookup[yb + radius].uint32 + value += sample * a + target.setValueUnsafe(x, y, (value div 1024 div 255).uint8) + proc newMask*(image: Image): Mask = ## Returns a new mask using the alpha values of the parameter image. result = newMask(image.width, image.height) @@ -508,110 +609,6 @@ proc draw*( ) {.inline.} = mask.draw(image, translate(pos), blendMode) -proc blur*(target: Image | Mask, radius: float32) = - ## Applies Gaussian blur to the image given a radius. - let radius = round(radius).int - if radius == 0: - return - - proc gaussianLookup(radius: int): seq[uint32] = - ## Compute lookup table for 1d Gaussian kernel. - ## Values are [0, 255] * 1024. - result.setLen(radius * 2 + 1) - - var - floats = newSeq[float32](result.len) - total = 0.0 - for xb in -radius .. radius: - let - s = radius.float32 / 2.2 # 2.2 matches Figma. - x = xb.float32 - a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2)) - floats[xb + radius] = a - total += a - for xb in -radius .. radius: - floats[xb + radius] = floats[xb + radius] / total - - for i, f in floats: - result[i] = round(f * 255 * 1024).uint32 - - let lookup = gaussianLookup(radius) - - when type(target) is Image: - - template `*`(sample: ColorRGBA, a: uint32): array[4, uint32] = - [ - sample.r * a, - sample.g * a, - sample.b * a, - sample.a * a - ] - - template `+=`(values: var array[4, uint32], sample: array[4, uint32]) = - values[0] += sample[0] - values[1] += sample[1] - values[2] += sample[2] - values[3] += sample[3] - - template rgba(values: array[4, uint32]): ColorRGBA = - rgba( - (values[0] div 1024 div 255).uint8, - (values[1] div 1024 div 255).uint8, - (values[2] div 1024 div 255).uint8, - (values[3] div 1024 div 255).uint8 - ) - - # Blur in the X direction. - var blurX = newImage(target.width, target.height) - for y in 0 ..< target.height: - for x in 0 ..< target.width: - var values: array[4, uint32] - for xb in -radius .. radius: - let - sample = target[x + xb, y] - a = lookup[xb + radius].uint32 - values += sample * a - blurX.setRgbaUnsafe(x, y, values.rgba()) - - # Blur in the Y direction. - for y in 0 ..< target.height: - for x in 0 ..< target.width: - var values: array[4, uint32] - for yb in -radius .. radius: - let - sample = blurX[x, y + yb] - a = lookup[yb + radius].uint32 - values += sample * a - target.setRgbaUnsafe(x, y, values.rgba()) - - else: # target is a Mask - - # Blur in the X direction. - var blurX = newMask(target.width, target.height) - for y in 0 ..< target.height: - for x in 0 ..< target.width: - var value: uint32 - for xb in -radius .. radius: - let - sample = target[x + xb, y] - a = lookup[xb + radius].uint32 - value += sample * a - blurX.setValueUnsafe(x, y, (value div 1024 div 255).uint8) - - # Blur in the Y direction and modify image. - for y in 0 ..< target.height: - for x in 0 ..< target.width: - var value: uint32 - for yb in -radius .. radius: - let - sample = blurX[x, y + yb] - a = lookup[yb + radius].uint32 - value += sample * a - target.setValueUnsafe(x, y, (value div 1024 div 255).uint8) - -when defined(release): - {.pop.} - proc drawUber( a, b: Image, p, dx, dy: Vec2, @@ -630,8 +627,8 @@ proc drawUber( yMin = a.height yMax = 0 for segment in perimeter: - yMin = min(yMin, segment.at.y.floor.int) - yMax = max(yMax, segment.at.y.ceil.int) + yMin = min(yMin, segment.at.y.floor.int) + yMax = max(yMax, segment.at.y.ceil.int) yMin = yMin.clamp(0, a.height) yMax = yMax.clamp(0, a.height) @@ -659,18 +656,44 @@ proc drawUber( if xMin > 0: zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xMin) - for x in xMin ..< xMax: - let - srcPos = p + dx * float32(x) + dy * float32(y) - xFloat = srcPos.x - h - yFloat = srcPos.y - h - rgba = a.getRgbaUnsafe(x, y) - rgba2 = - if smooth: - b.getRgbaSmooth(xFloat, yFloat) - else: - b.getRgbaUnsafe(xFloat.int, yFloat.int) - a.setRgbaUnsafe(x, y, blender(rgba, rgba2)) + if smooth: + for x in xMin ..< xMax: + let + srcPos = p + dx * x.float32 + dy * y.float32 + xFloat = srcPos.x - h + yFloat = srcPos.y - h + backdrop = a.getRgbaUnsafe(x, y) + source = b.getRgbaSmooth(xFloat, yFloat) + a.setRgbaUnsafe(x, y, blender(backdrop, source)) + else: + var x = xMin + when defined(amd64) and not defined(pixieNoSimd): + if blendMode.hasSimdBlender(): + if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: + # Check we are not rotated before using SIMD blends + let blenderSimd = blendMode.blenderSimd() + for _ in countup(x, xMax - 4, 4): + let + srcPos = p + dx * x.float32 + dy * y.float32 + sx = srcPos.x.int + sy = srcPos.y.int + backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr) + source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) + mm_storeu_si128( + a.data[a.dataIndex(x, y)].addr, + blenderSimd(backdrop, source) + ) + x += 4 + + for _ in x ..< xMax: + let + srcPos = p + dx * x.float32 + dy * y.float32 + xFloat = srcPos.x - h + yFloat = srcPos.y - h + backdrop = a.getRgbaUnsafe(x, y) + source = b.getRgbaUnsafe(xFloat.int, yFloat.int) + a.setRgbaUnsafe(x, y, blender(backdrop, source)) + inc x if blendMode == bmIntersectMask: if a.width - xMax > 0: @@ -761,3 +784,6 @@ proc shadow*( result = newImage(mask.width, mask.height) result.fill(color) result.draw(mask, blendMode = bmMask) + +when defined(release): + {.pop.} diff --git a/tests/images/rotate0.png b/tests/images/rotate0.png new file mode 100644 index 0000000..7982907 Binary files /dev/null and b/tests/images/rotate0.png differ diff --git a/tests/images/rotate180.png b/tests/images/rotate180.png new file mode 100644 index 0000000..bfda507 Binary files /dev/null and b/tests/images/rotate180.png differ diff --git a/tests/images/rotate270.png b/tests/images/rotate270.png new file mode 100644 index 0000000..8cd7328 Binary files /dev/null and b/tests/images/rotate270.png differ diff --git a/tests/images/rotate360.png b/tests/images/rotate360.png new file mode 100644 index 0000000..8f9fa85 Binary files /dev/null and b/tests/images/rotate360.png differ diff --git a/tests/images/rotate90.png b/tests/images/rotate90.png new file mode 100644 index 0000000..4e40c7b Binary files /dev/null and b/tests/images/rotate90.png differ diff --git a/tests/test_images_draw.nim b/tests/test_images_draw.nim new file mode 100644 index 0000000..5e3f46f --- /dev/null +++ b/tests/test_images_draw.nim @@ -0,0 +1,51 @@ +import chroma, pixie, vmath + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250))) + a.writeFile("tests/images/rotate0.png") + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250)) * rotationMat3(90 * PI / 180)) + a.writeFile("tests/images/rotate90.png") + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250)) * rotationMat3(180 * PI / 180)) + a.writeFile("tests/images/rotate180.png") + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250)) * rotationMat3(270 * PI / 180)) + a.writeFile("tests/images/rotate270.png") + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250)) * rotationMat3(360 * PI / 180)) + a.writeFile("tests/images/rotate360.png")