From b706f93ee55922dc8bc176c5ee0be2d2d7ddd84b Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 29 Jun 2022 00:40:20 -0500 Subject: [PATCH] move mask ceil simd out --- src/pixie/masks.nim | 23 +++++++++-------------- src/pixie/simd.nim | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 32d21b1..7b29e09 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -286,21 +286,16 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} = proc ceil*(mask: Mask) {.raises: [].} = ## A value of 0 stays 0. Anything else turns into 255. - var i: int - when defined(amd64) and allowSimd: - let - zeroVec = mm_setzero_si128() - vec255 = mm_set1_epi8(255) - for _ in 0 ..< mask.data.len div 16: - var values = mm_loadu_si128(mask.data[i].addr) - values = mm_cmpeq_epi8(values, zeroVec) - values = mm_andnot_si128(values, vec255) - mm_storeu_si128(mask.data[i].addr, values) - i += 16 + when allowSimd and compiles(invertImageSimd): + ceilMaskSimd( + cast[ptr UncheckedArray[uint8]](mask.data[0].addr), + mask.data.len + ) + return - for j in i ..< mask.data.len: - if mask.data[j] != 0: - mask.data[j] = 255 + for i in 0 ..< mask.data.len: + if mask.data[i] != 0: + mask.data[i] = 255 proc blur*(mask: Mask, radius: float32, outOfBounds: uint8 = 0) {.raises: [PixieError].} = ## Applies Gaussian blur to the image given a radius. diff --git a/src/pixie/simd.nim b/src/pixie/simd.nim index 9c100ec..05036cf 100644 --- a/src/pixie/simd.nim +++ b/src/pixie/simd.nim @@ -267,3 +267,19 @@ when defined(amd64): for j in i ..< len: data[j] = 255 - data[j] + + proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) = + var i: int + let + zeroVec = mm_setzero_si128() + vec255 = mm_set1_epi8(255) + for _ in 0 ..< len div 16: + var values = mm_loadu_si128(data[i].addr) + values = mm_cmpeq_epi8(values, zeroVec) + values = mm_andnot_si128(values, vec255) + mm_storeu_si128(data[i].addr, values) + i += 16 + + for i in i ..< len: + if data[i] != 0: + data[i] = 255