ceilMaskSimd

This commit is contained in:
Ryan Oldenburg 2022-06-30 10:29:16 -05:00
parent 1c6fa86ac0
commit f5825daf10
2 changed files with 13 additions and 11 deletions

View file

@ -308,10 +308,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
proc ceil*(mask: Mask) {.raises: [].} = proc ceil*(mask: Mask) {.raises: [].} =
## A value of 0 stays 0. Anything else turns into 255. ## A value of 0 stays 0. Anything else turns into 255.
when allowSimd and compiles(invertImageSimd): when allowSimd and compiles(invertImageSimd):
ceilMaskSimd( ceilMaskSimd(mask.data)
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
return return
for i in 0 ..< mask.data.len: for i in 0 ..< mask.data.len:

View file

@ -317,19 +317,24 @@ when defined(amd64):
for i in i ..< data.len: for i in i ..< data.len:
data[i] = 255 - data[i] data[i] = 255 - data[i]
proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) = proc ceilMaskSimd*(data: var seq[uint8]) =
var i: int var
i: int
p = cast[uint](data[0].addr)
let let
zeroVec = mm_setzero_si128() zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi8(255) vec255 = mm_set1_epi8(255)
for _ in 0 ..< len div 16: iterations = data.len div 16
var values = mm_loadu_si128(data[i].addr) for _ in 0 ..< iterations:
var values = mm_loadu_si128(cast[pointer](p))
values = mm_cmpeq_epi8(values, zeroVec) values = mm_cmpeq_epi8(values, zeroVec)
values = mm_andnot_si128(values, vec255) values = mm_andnot_si128(values, vec255)
mm_storeu_si128(data[i].addr, values) mm_storeu_si128(cast[pointer](p), values)
i += 16 p += 16
i += 16 * iterations
for i in i ..< len: for i in i ..< data.len:
if data[i] != 0: if data[i] != 0:
data[i] = 255 data[i] = 255