Merge pull request #95 from guzba/master

simd mask applyOpacity, 10x faster
This commit is contained in:
treeform 2021-02-08 19:16:38 -08:00 committed by GitHub
commit c765a3323c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 5 deletions

View file

@ -365,12 +365,20 @@ proc draw*(
) {.inline.} =
image.drawCorrect(mask, translate(pos), blendMode)
proc draw*(a, b: Mask, mat = mat3(), blendMode = bmMask) =
proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) =
a.drawCorrect(b, mat, blendMode)
proc draw*(mask: Mask, image: Image, mat = mat3(), blendMode = bmMask) =
proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} =
a.draw(b, translate(pos), blendMode)
proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) =
mask.drawCorrect(image, mat, blendMode)
proc draw*(
mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask
) {.inline.} =
mask.draw(image, translate(pos), blendMode)
when defined(release):
{.pop.}

View file

@ -1,5 +1,8 @@
import blends, common, vmath, system/memory
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2
type
Mask* = ref object
## Mask object that holds mask opacity data.
@ -111,9 +114,43 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
proc applyOpacity*(mask: Mask, opacity: float32) =
## Multiplies the values of the mask by opacity.
let opacity = round(255 * opacity).uint32
for value in mask.data.mitems:
value = ((value * opacity) div 255).uint8
let opacity = round(255 * opacity).uint16
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
for _ in countup(i, mask.data.len - 16, 16):
var values = mm_loadu_si128(mask.data[i].addr)
let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff:
var
valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8)
valuesOdd = mm_and_si128(values, oddMask)
# values * opacity
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
# div 255
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
valuesOdd = mm_slli_epi16(valuesOdd, 8)
mm_storeu_si128(
mask.data[i].addr,
mm_or_si128(valuesEven, valuesOdd)
)
i += 16
for j in i ..< mask.data.len:
mask.data[j] = ((mask.data[j] * opacity) div 255).uint8
when defined(release):
{.pop.}

View file

@ -1,5 +1,12 @@
import chroma, pixie, pixie/fileformats/png
block:
let mask = newMask(100, 100)
mask.fill(200)
mask.applyOpacity(0.5)
doAssert mask[0, 0] == 100
doAssert mask[88, 88] == 100
block:
let
mask = newMask(100, 100)