invertImageSimd invertMaskSimd
This commit is contained in:
parent
9644894903
commit
af5045ccb8
4 changed files with 71 additions and 41 deletions
|
@ -386,10 +386,7 @@ proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
|
||||||
proc invert*(image: Image) {.raises: [].} =
|
proc invert*(image: Image) {.raises: [].} =
|
||||||
## Inverts all of the colors and alpha.
|
## Inverts all of the colors and alpha.
|
||||||
when allowSimd and compiles(invertImageSimd):
|
when allowSimd and compiles(invertImageSimd):
|
||||||
invertImageSimd(
|
invertImageSimd(image.data)
|
||||||
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
|
||||||
image.data.len
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
for i in 0 ..< image.data.len:
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
import common, internal, vmath
|
import common, internal, vmath
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
when allowSimd:
|
||||||
|
import simd
|
||||||
|
|
||||||
|
when defined(amd64):
|
||||||
import nimsimd/sse2
|
import nimsimd/sse2
|
||||||
|
|
||||||
type
|
type
|
||||||
|
@ -234,11 +237,8 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
||||||
|
|
||||||
proc invert*(mask: Mask) {.raises: [].} =
|
proc invert*(mask: Mask) {.raises: [].} =
|
||||||
## Inverts all of the values - creates a negative of the mask.
|
## Inverts all of the values - creates a negative of the mask.
|
||||||
when allowSimd and compiles(invertImageSimd):
|
when allowSimd and compiles(invertMaskSimd):
|
||||||
invertMaskSimd(
|
invertMaskSimd(mask.data)
|
||||||
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
|
|
||||||
mask.data.len
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
for i in 0 ..< mask.data.len:
|
||||||
|
|
|
@ -23,8 +23,8 @@ proc fillUnsafeAvx*(
|
||||||
p += 4
|
p += 4
|
||||||
|
|
||||||
let
|
let
|
||||||
iterations = (start + len - i) div 8
|
|
||||||
colorVec = mm256_set1_epi32(cast[int32](rgbx))
|
colorVec = mm256_set1_epi32(cast[int32](rgbx))
|
||||||
|
iterations = (start + len - i) div 8
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
mm256_store_si256(cast[pointer](p), colorVec)
|
mm256_store_si256(cast[pointer](p), colorVec)
|
||||||
p += 32
|
p += 32
|
||||||
|
|
|
@ -243,22 +243,38 @@ when defined(amd64):
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
dst[i] = src[i].a
|
dst[i] = src[i].a
|
||||||
|
|
||||||
proc invertImageSimd*(data: ptr UncheckedArray[ColorRGBX], len: int) =
|
proc invertImageSimd*(data: var seq[ColorRGBX]) =
|
||||||
var i: int
|
var
|
||||||
let vec255 = mm_set1_epi8(cast[int8](255))
|
i: int
|
||||||
for _ in 0 ..< len div 16:
|
p = cast[uint](data[0].addr)
|
||||||
let
|
# Align to 16 bytes
|
||||||
a = mm_loadu_si128(data[i + 0].addr)
|
while i < data.len and (p and 15) != 0:
|
||||||
b = mm_loadu_si128(data[i + 4].addr)
|
var rgbx = data[i]
|
||||||
c = mm_loadu_si128(data[i + 8].addr)
|
rgbx.r = 255 - rgbx.r
|
||||||
d = mm_loadu_si128(data[i + 12].addr)
|
rgbx.g = 255 - rgbx.g
|
||||||
mm_storeu_si128(data[i + 0].addr, mm_sub_epi8(vec255, a))
|
rgbx.b = 255 - rgbx.b
|
||||||
mm_storeu_si128(data[i + 4].addr, mm_sub_epi8(vec255, b))
|
rgbx.a = 255 - rgbx.a
|
||||||
mm_storeu_si128(data[i + 8].addr, mm_sub_epi8(vec255, c))
|
data[i] = rgbx
|
||||||
mm_storeu_si128(data[i + 12].addr, mm_sub_epi8(vec255, d))
|
inc i
|
||||||
i += 16
|
p += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
let
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
iterations = data.len div 16
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
a = mm_load_si128(cast[pointer](p))
|
||||||
|
b = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
c = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
d = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
||||||
|
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
||||||
|
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
||||||
|
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
||||||
|
p += 64
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< data.len:
|
||||||
var rgbx = data[i]
|
var rgbx = data[i]
|
||||||
rgbx.r = 255 - rgbx.r
|
rgbx.r = 255 - rgbx.r
|
||||||
rgbx.g = 255 - rgbx.g
|
rgbx.g = 255 - rgbx.g
|
||||||
|
@ -266,19 +282,36 @@ when defined(amd64):
|
||||||
rgbx.a = 255 - rgbx.a
|
rgbx.a = 255 - rgbx.a
|
||||||
data[i] = rgbx
|
data[i] = rgbx
|
||||||
|
|
||||||
toPremultipliedAlphaSimd(cast[ptr UncheckedArray[uint32]](data), len)
|
toPremultipliedAlphaSimd(cast[ptr UncheckedArray[uint32]](data[0].addr), data.len)
|
||||||
|
|
||||||
proc invertMaskSimd*(data: ptr UncheckedArray[uint8], len: int) =
|
proc invertMaskSimd*(data: var seq[uint8]) =
|
||||||
var i: int
|
var
|
||||||
let vec255 = mm_set1_epi8(255)
|
i: int
|
||||||
for _ in 0 ..< len div 16:
|
p = cast[uint](data[0].addr)
|
||||||
var values = mm_loadu_si128(data[i].addr)
|
# Align to 16 bytes
|
||||||
values = mm_sub_epi8(vec255, values)
|
while i < data.len and (p and 15) != 0:
|
||||||
mm_storeu_si128(data[i].addr, values)
|
data[i] = 255 - data[i]
|
||||||
i += 16
|
inc i
|
||||||
|
inc p
|
||||||
|
|
||||||
for j in i ..< len:
|
let
|
||||||
data[j] = 255 - data[j]
|
vec255 = mm_set1_epi8(255)
|
||||||
|
iterations = data.len div 64
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
a = mm_load_si128(cast[pointer](p))
|
||||||
|
b = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
c = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
d = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
||||||
|
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
||||||
|
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
||||||
|
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
||||||
|
p += 64
|
||||||
|
i += 64 * iterations
|
||||||
|
|
||||||
|
for i in i ..< data.len:
|
||||||
|
data[i] = 255 - data[i]
|
||||||
|
|
||||||
proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) =
|
proc ceilMaskSimd*(data: ptr UncheckedArray[uint8], len: int) =
|
||||||
var i: int
|
var i: int
|
||||||
|
@ -303,10 +336,10 @@ when defined(amd64):
|
||||||
) =
|
) =
|
||||||
var i: int
|
var i: int
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(0x8081)
|
||||||
zeroVec = mm_setzero_si128()
|
zeroVec = mm_setzero_si128()
|
||||||
opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
|
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
||||||
for _ in 0 ..< len div 16:
|
for _ in 0 ..< len div 16:
|
||||||
let values = mm_loadu_si128(data[i].addr)
|
let values = mm_loadu_si128(data[i].addr)
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
||||||
|
|
Loading…
Reference in a new issue