ceilMaskSimd
This commit is contained in:
parent
1c6fa86ac0
commit
f5825daf10
|
@ -308,10 +308,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
|||
proc ceil*(mask: Mask) {.raises: [].} =
|
||||
## A value of 0 stays 0. Anything else turns into 255.
|
||||
when allowSimd and compiles(invertImageSimd):
|
||||
ceilMaskSimd(
|
||||
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
|
||||
mask.data.len
|
||||
)
|
||||
ceilMaskSimd(mask.data)
|
||||
return
|
||||
|
||||
for i in 0 ..< mask.data.len:
|
||||
|
|
|
@ -317,19 +317,24 @@ when defined(amd64):
|
|||
for i in i ..< data.len:
|
||||
data[i] = 255 - data[i]
|
||||
|
||||
proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) =
|
||||
var i: int
|
||||
proc ceilMaskSimd*(data: var seq[uint8]) =
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](data[0].addr)
|
||||
|
||||
let
|
||||
zeroVec = mm_setzero_si128()
|
||||
vec255 = mm_set1_epi8(255)
|
||||
for _ in 0 ..< len div 16:
|
||||
var values = mm_loadu_si128(data[i].addr)
|
||||
iterations = data.len div 16
|
||||
for _ in 0 ..< iterations:
|
||||
var values = mm_loadu_si128(cast[pointer](p))
|
||||
values = mm_cmpeq_epi8(values, zeroVec)
|
||||
values = mm_andnot_si128(values, vec255)
|
||||
mm_storeu_si128(data[i].addr, values)
|
||||
i += 16
|
||||
mm_storeu_si128(cast[pointer](p), values)
|
||||
p += 16
|
||||
i += 16 * iterations
|
||||
|
||||
for i in i ..< len:
|
||||
for i in i ..< data.len:
|
||||
if data[i] != 0:
|
||||
data[i] = 255
|
||||
|
||||
|
|
Loading…
Reference in a new issue