Merge pull request #95 from guzba/master
simd mask applyOpacity, 10x faster
This commit is contained in:
commit
c765a3323c
3 changed files with 57 additions and 5 deletions
|
@ -365,12 +365,20 @@ proc draw*(
|
|||
) {.inline.} =
|
||||
image.drawCorrect(mask, translate(pos), blendMode)
|
||||
|
||||
proc draw*(a, b: Mask, mat = mat3(), blendMode = bmMask) =
|
||||
proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) =
|
||||
a.drawCorrect(b, mat, blendMode)
|
||||
|
||||
proc draw*(mask: Mask, image: Image, mat = mat3(), blendMode = bmMask) =
|
||||
proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} =
|
||||
a.draw(b, translate(pos), blendMode)
|
||||
|
||||
proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) =
|
||||
mask.drawCorrect(image, mat, blendMode)
|
||||
|
||||
proc draw*(
|
||||
mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask
|
||||
) {.inline.} =
|
||||
mask.draw(image, translate(pos), blendMode)
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
import blends, common, vmath, system/memory
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
import nimsimd/sse2
|
||||
|
||||
type
|
||||
Mask* = ref object
|
||||
## Mask object that holds mask opacity data.
|
||||
|
@ -111,9 +114,43 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
|
|||
|
||||
proc applyOpacity*(mask: Mask, opacity: float32) =
|
||||
## Multiplies the values of the mask by opacity.
|
||||
let opacity = round(255 * opacity).uint32
|
||||
for value in mask.data.mitems:
|
||||
value = ((value * opacity) div 255).uint8
|
||||
let opacity = round(255 * opacity).uint16
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
|
||||
|
||||
for _ in countup(i, mask.data.len - 16, 16):
|
||||
var values = mm_loadu_si128(mask.data[i].addr)
|
||||
|
||||
let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128())
|
||||
if mm_movemask_epi8(eqZero) != 0xffff:
|
||||
var
|
||||
valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8)
|
||||
valuesOdd = mm_and_si128(values, oddMask)
|
||||
|
||||
# values * opacity
|
||||
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
|
||||
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
|
||||
|
||||
# div 255
|
||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||
|
||||
valuesOdd = mm_slli_epi16(valuesOdd, 8)
|
||||
|
||||
mm_storeu_si128(
|
||||
mask.data[i].addr,
|
||||
mm_or_si128(valuesEven, valuesOdd)
|
||||
)
|
||||
|
||||
i += 16
|
||||
|
||||
for j in i ..< mask.data.len:
|
||||
mask.data[j] = ((mask.data[j] * opacity) div 255).uint8
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
import chroma, pixie, pixie/fileformats/png
|
||||
|
||||
block:
|
||||
let mask = newMask(100, 100)
|
||||
mask.fill(200)
|
||||
mask.applyOpacity(0.5)
|
||||
doAssert mask[0, 0] == 100
|
||||
doAssert mask[88, 88] == 100
|
||||
|
||||
block:
|
||||
let
|
||||
mask = newMask(100, 100)
|
||||
|
|
Loading…
Reference in a new issue