move mask ceil simd out

This commit is contained in:
Ryan Oldenburg 2022-06-29 00:40:20 -05:00
parent 8a07e3cf9b
commit b706f93ee5
2 changed files with 25 additions and 14 deletions

View file

@ -286,21 +286,16 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
proc ceil*(mask: Mask) {.raises: [].} =
## A value of 0 stays 0. Anything else turns into 255.
var i: int
when defined(amd64) and allowSimd:
let
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi8(255)
for _ in 0 ..< mask.data.len div 16:
var values = mm_loadu_si128(mask.data[i].addr)
values = mm_cmpeq_epi8(values, zeroVec)
values = mm_andnot_si128(values, vec255)
mm_storeu_si128(mask.data[i].addr, values)
i += 16
when allowSimd and compiles(invertImageSimd):
ceilMaskSimd(
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
return
for j in i ..< mask.data.len:
if mask.data[j] != 0:
mask.data[j] = 255
for i in 0 ..< mask.data.len:
if mask.data[i] != 0:
mask.data[i] = 255
proc blur*(mask: Mask, radius: float32, outOfBounds: uint8 = 0) {.raises: [PixieError].} =
## Applies Gaussian blur to the image given a radius.

View file

@ -267,3 +267,19 @@ when defined(amd64):
for j in i ..< len:
data[j] = 255 - data[j]
proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) =
var i: int
let
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi8(255)
for _ in 0 ..< len div 16:
var values = mm_loadu_si128(data[i].addr)
values = mm_cmpeq_epi8(values, zeroVec)
values = mm_andnot_si128(values, vec255)
mm_storeu_si128(data[i].addr, values)
i += 16
for i in i ..< len:
if data[i] != 0:
data[i] = 255