From 788b5a0821a63dd46123957e69469b8d9273b9e1 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 8 Feb 2021 20:06:07 -0600 Subject: [PATCH 1/2] consistent versions arcoss all combos --- src/pixie/images.nim | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index fef0bba..44a3850 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -365,12 +365,20 @@ proc draw*( ) {.inline.} = image.drawCorrect(mask, translate(pos), blendMode) -proc draw*(a, b: Mask, mat = mat3(), blendMode = bmMask) = +proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) = a.drawCorrect(b, mat, blendMode) -proc draw*(mask: Mask, image: Image, mat = mat3(), blendMode = bmMask) = +proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} = + a.draw(b, translate(pos), blendMode) + +proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) = mask.drawCorrect(image, mat, blendMode) +proc draw*( + mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask +) {.inline.} = + mask.draw(image, translate(pos), blendMode) + when defined(release): {.pop.} From 6c5fa78ed968fd2463b388f6c211e0d01f2f45ac Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 8 Feb 2021 21:12:28 -0600 Subject: [PATCH 2/2] mask applyOpacity 10x faster --- src/pixie/masks.nim | 43 ++++++++++++++++++++++++++++++++++++++++--- tests/test_masks.nim | 7 +++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 93cf1af..805d621 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -1,5 +1,8 @@ import blends, common, vmath, system/memory +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + type Mask* = ref object ## Mask object that holds mask opacity data. @@ -111,9 +114,43 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 = proc applyOpacity*(mask: Mask, opacity: float32) = ## Multiplies the values of the mask by opacity. - let opacity = round(255 * opacity).uint32 - for value in mask.data.mitems: - value = ((value * opacity) div 255).uint8 + let opacity = round(255 * opacity).uint16 + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) + + for _ in countup(i, mask.data.len - 16, 16): + var values = mm_loadu_si128(mask.data[i].addr) + + let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128()) + if mm_movemask_epi8(eqZero) != 0xffff: + var + valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8) + valuesOdd = mm_and_si128(values, oddMask) + + # values * opacity + valuesEven = mm_mulhi_epu16(valuesEven, vOpacity) + valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity) + + # div 255 + valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) + valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) + + valuesOdd = mm_slli_epi16(valuesOdd, 8) + + mm_storeu_si128( + mask.data[i].addr, + mm_or_si128(valuesEven, valuesOdd) + ) + + i += 16 + + for j in i ..< mask.data.len: + mask.data[j] = ((mask.data[j] * opacity) div 255).uint8 when defined(release): {.pop.} diff --git a/tests/test_masks.nim b/tests/test_masks.nim index e487a5b..02b5aaa 100644 --- a/tests/test_masks.nim +++ b/tests/test_masks.nim @@ -1,5 +1,12 @@ import chroma, pixie, pixie/fileformats/png +block: + let mask = newMask(100, 100) + mask.fill(200) + mask.applyOpacity(0.5) + doAssert mask[0, 0] == 100 + doAssert mask[88, 88] == 100 + block: let mask = newMask(100, 100)