diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 7644bba..32d21b1 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -213,17 +213,15 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} = proc invert*(mask: Mask) {.raises: [].} = ## Inverts all of the values - creates a negative of the mask. - var i: int - when defined(amd64) and allowSimd: - let vec255 = mm_set1_epi8(255) - for _ in 0 ..< mask.data.len div 16: - var values = mm_loadu_si128(mask.data[i].addr) - values = mm_sub_epi8(vec255, values) - mm_storeu_si128(mask.data[i].addr, values) - i += 16 + when allowSimd and compiles(invertImageSimd): + invertMaskSimd( + cast[ptr UncheckedArray[uint8]](mask.data[0].addr), + mask.data.len + ) + return - for j in i ..< mask.data.len: - mask.data[j] = 255 - mask.data[j] + for i in 0 ..< mask.data.len: + mask.data[i] = 255 - mask.data[i] proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} = ## Grows the mask by spread. diff --git a/src/pixie/simd.nim b/src/pixie/simd.nim index 13453d1..9c100ec 100644 --- a/src/pixie/simd.nim +++ b/src/pixie/simd.nim @@ -255,3 +255,15 @@ when defined(amd64): data[i] = rgbx toPremultipliedAlphaSimd(cast[ptr UncheckedArray[uint32]](data), len) + + proc invertMaskSimd*(data: ptr UncheckedArray[uint8], len: int) = + var i: int + let vec255 = mm_set1_epi8(255) + for _ in 0 ..< len div 16: + var values = mm_loadu_si128(data[i].addr) + values = mm_sub_epi8(vec255, values) + mm_storeu_si128(data[i].addr, values) + i += 16 + + for j in i ..< len: + data[j] = 255 - data[j]