From 6fd7df9b234d37f3b59c0316d06d6b39201d7709 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 8 Feb 2021 21:24:20 -0600 Subject: [PATCH 1/2] shortcut --- src/pixie/masks.nim | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 805d621..ed0b013 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -116,6 +116,10 @@ proc applyOpacity*(mask: Mask, opacity: float32) = ## Multiplies the values of the mask by opacity. let opacity = round(255 * opacity).uint16 + if opacity == 0: + mask.fill(0) + return + var i: int when defined(amd64) and not defined(pixieNoSimd): let From 29f424fe4c21a0b58e764a9866a55b9d2f23a6ce Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 8 Feb 2021 21:42:39 -0600 Subject: [PATCH 2/2] applyOpacity simd for image too, 10x faster as well --- src/pixie/images.nim | 71 ++++++++++++++++++++++++++++++++++++++----- src/pixie/masks.nim | 44 --------------------------- tests/test_images.nim | 7 +++++ 3 files changed, 71 insertions(+), 51 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 44a3850..a5c2e6f 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -263,14 +263,71 @@ proc toStraightAlpha*(image: Image) = c.g = ((c.g.uint32 * multiplier) div 255).uint8 c.b = ((c.b.uint32 * multiplier) div 255).uint8 -proc applyOpacity*(image: Image, opacity: float32) = +proc applyOpacity*(target: Image | Mask, opacity: float32) = ## Multiplies alpha of the image by opacity. - let opacity = round(255 * opacity).uint32 - for rgba in image.data.mitems: - rgba.r = ((rgba.r * opacity) div 255).uint8 - rgba.g = ((rgba.g * opacity) div 255).uint8 - rgba.b = ((rgba.b * opacity) div 255).uint8 - rgba.a = ((rgba.a * opacity) div 255).uint8 + let opacity = round(255 * opacity).uint16 + + if opacity == 0: + when type(target) is Image: + target.fill(rgba(0, 0, 0, 0)) + else: + target.fill(0) + return + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + when type(target) is Image: + let byteLen = target.data.len * 4 + else: + let byteLen = target.data.len + + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) + + for _ in countup(0, byteLen - 16, 16): + when type(target) is Image: + let index = i div 4 + else: + let index = i + + var values = mm_loadu_si128(target.data[index].addr) + + let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128()) + if mm_movemask_epi8(eqZero) != 0xffff: + var + valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8) + valuesOdd = mm_and_si128(values, oddMask) + + # values * opacity + valuesEven = mm_mulhi_epu16(valuesEven, vOpacity) + valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity) + + # div 255 + valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) + valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) + + valuesOdd = mm_slli_epi16(valuesOdd, 8) + + mm_storeu_si128( + target.data[index].addr, + mm_or_si128(valuesEven, valuesOdd) + ) + + i += 16 + + when type(target) is Image: + for j in i div 4 ..< target.data.len: + var rgba = target.data[j] + rgba.r = ((rgba.r * opacity) div 255).uint8 + rgba.g = ((rgba.g * opacity) div 255).uint8 + rgba.b = ((rgba.b * opacity) div 255).uint8 + rgba.a = ((rgba.a * opacity) div 255).uint8 + target.data[j] = rgba + else: + for j in i ..< target.data.len: + target.data[j] = ((target.data[j] * opacity) div 255).uint8 proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA = let diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index ed0b013..2722f64 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -112,49 +112,5 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 = lerp(bottomMix, topMix, diffY) -proc applyOpacity*(mask: Mask, opacity: float32) = - ## Multiplies the values of the mask by opacity. - let opacity = round(255 * opacity).uint16 - - if opacity == 0: - mask.fill(0) - return - - var i: int - when defined(amd64) and not defined(pixieNoSimd): - let - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) - - for _ in countup(i, mask.data.len - 16, 16): - var values = mm_loadu_si128(mask.data[i].addr) - - let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128()) - if mm_movemask_epi8(eqZero) != 0xffff: - var - valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8) - valuesOdd = mm_and_si128(values, oddMask) - - # values * opacity - valuesEven = mm_mulhi_epu16(valuesEven, vOpacity) - valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity) - - # div 255 - valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) - valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) - - valuesOdd = mm_slli_epi16(valuesOdd, 8) - - mm_storeu_si128( - mask.data[i].addr, - mm_or_si128(valuesEven, valuesOdd) - ) - - i += 16 - - for j in i ..< mask.data.len: - mask.data[j] = ((mask.data[j] * opacity) div 255).uint8 - when defined(release): {.pop.} diff --git a/tests/test_images.nim b/tests/test_images.nim index 6bf3ff2..cccff3b 100644 --- a/tests/test_images.nim +++ b/tests/test_images.nim @@ -28,6 +28,13 @@ block: image.toStraightAlpha() doAssert image[9, 9] == rgba(254, 0, 0, 128) +block: + let image = newImage(100, 100) + image.fill(rgba(200, 200, 200, 200)) + image.applyOpacity(0.5) + doAssert image[0, 0] == rgba(100, 100, 100, 100) + doAssert image[88, 88] == rgba(100, 100, 100, 100) + block: let a = newImage(101, 101)