isOneColorSimd isTransparentSimd

This commit is contained in:
Ryan Oldenburg 2022-06-30 10:48:30 -05:00
parent 3a41ff8e64
commit c4eadf31ed
3 changed files with 55 additions and 45 deletions

View file

@ -102,10 +102,7 @@ proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
proc isOneColor*(image: Image): bool {.raises: [].} = proc isOneColor*(image: Image): bool {.raises: [].} =
## Checks if the entire image is the same color. ## Checks if the entire image is the same color.
when allowSimd and compiles(isOneColorSimd): when allowSimd and compiles(isOneColorSimd):
return isOneColorSimd( return isOneColorSimd(image.data)
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
result = true result = true
@ -117,10 +114,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
proc isTransparent*(image: Image): bool {.raises: [].} = proc isTransparent*(image: Image): bool {.raises: [].} =
## Checks if this image is fully transparent or not. ## Checks if this image is fully transparent or not.
when allowSimd and compiles(isTransparentSimd): when allowSimd and compiles(isTransparentSimd):
return isTransparentSimd( return isTransparentSimd(image.data)
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
result = true result = true

View file

@ -6,20 +6,21 @@ when defined(gcc) or defined(clang):
when defined(release): when defined(release):
{.push checks: off.} {.push checks: off.}
proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool = proc isOneColorAvx2*(data: var seq[ColorRGBX]): bool =
result = true result = true
let color = data[0] let color = data[0]
var i: int var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes # Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i] != color: if data[i] != color:
return false return false
inc i inc i
let let
colorVec = mm256_set1_epi32(cast[int32](color)) colorVec = mm256_set1_epi32(cast[int32](color))
iterations = (len - i) div 16 iterations = (data.len - i) div 16
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
values0 = mm256_load_si256(data[i].addr) values0 = mm256_load_si256(data[i].addr)
@ -31,22 +32,23 @@ proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false return false
i += 16 i += 16
for i in i ..< len: for i in i ..< data.len:
if data[i] != color: if data[i] != color:
return false return false
proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool = proc isTransparentAvx2*(data: var seq[ColorRGBX]): bool =
result = true result = true
var i: int var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes # Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i].a != 0: if data[i].a != 0:
return false return false
inc i inc i
let let
vecZero = mm256_setzero_si256() vecZero = mm256_setzero_si256()
iterations = (len - i) div 16 iterations = (data.len - i) div 16
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
values0 = mm256_load_si256(data[i].addr) values0 = mm256_load_si256(data[i].addr)
@ -57,7 +59,7 @@ proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false return false
i += 16 i += 16
for i in i ..< len: for i in i ..< data.len:
if data[i].a != 0: if data[i].a != 0:
return false return false

View file

@ -62,29 +62,33 @@ when defined(amd64):
for i in i ..< start + len: for i in i ..< start + len:
data[i] = rgbx data[i] = rgbx
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool = proc isOneColorSimd*(data: var seq[ColorRGBX]): bool =
if cpuHasAvx2: if cpuHasAvx2:
return isOneColorAvx2(data, len) return isOneColorAvx2(data)
result = true result = true
let color = data[0] let color = data[0]
var i: int var
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
if data[i] != color: if data[i] != color:
return false return false
inc i inc i
p += 4
let let
colorVec = mm_set1_epi32(cast[int32](color)) colorVec = mm_set1_epi32(cast[int32](color))
iterations = (len - i) div 16 iterations = (data.len - i) div 16
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
values0 = mm_load_si128(data[i].addr) values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(data[i + 4].addr) values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(data[i + 8].addr) values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(data[i + 12].addr) values3 = mm_load_si128(cast[pointer](p + 48))
eq0 = mm_cmpeq_epi8(values0, colorVec) eq0 = mm_cmpeq_epi8(values0, colorVec)
eq1 = mm_cmpeq_epi8(values1, colorVec) eq1 = mm_cmpeq_epi8(values1, colorVec)
eq2 = mm_cmpeq_epi8(values2, colorVec) eq2 = mm_cmpeq_epi8(values2, colorVec)
@ -92,41 +96,47 @@ when defined(amd64):
eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3)) eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
if mm_movemask_epi8(eq0123) != 0xffff: if mm_movemask_epi8(eq0123) != 0xffff:
return false return false
i += 16 p += 64
i += 16 * iterations
for i in i ..< len: for i in i ..< data.len:
if data[i] != color: if data[i] != color:
return false return false
proc isTransparentSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool = proc isTransparentSimd*(data: var seq[ColorRGBX]): bool =
if cpuHasAvx2: if cpuHasAvx2:
return isTransparentAvx2(data, len) return isTransparentAvx2(data)
var i: int var
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
if data[i].a != 0: if data[i].a != 0:
return false return false
inc i inc i
p += 4
result = true result = true
let let
vecZero = mm_setzero_si128() vecZero = mm_setzero_si128()
iterations = (len - i) div 16 iterations = (data.len - i) div 16
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
values0 = mm_load_si128(data[i].addr) values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(data[i + 4].addr) values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(data[i + 8].addr) values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(data[i + 12].addr) values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_or_si128(values0, values1) values01 = mm_or_si128(values0, values1)
values23 = mm_or_si128(values2, values3) values23 = mm_or_si128(values2, values3)
values0123 = mm_or_si128(values01, values23) values0123 = mm_or_si128(values01, values23)
if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff: if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff:
return false return false
i += 16 p += 64
i += 16 * iterations
for i in i ..< len: for i in i ..< data.len:
if data[i].a != 0: if data[i].a != 0:
return false return false
@ -136,29 +146,33 @@ when defined(amd64):
result = true result = true
var i = start var
i = start
p = cast[uint](data[0].addr)
# Align to 16 bytes # Align to 16 bytes
while i < (start + len) and (cast[uint](data[i].addr) and 15) != 0: while i < (start + len) and (p and 15) != 0:
if data[i].a != 255: if data[i].a != 255:
return false return false
inc i inc i
p += 4
let let
vec255 = mm_set1_epi8(255) vec255 = mm_set1_epi8(255)
iterations = (start + len - i) div 16 iterations = (start + len - i) div 16
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
values0 = mm_load_si128(data[i].addr) values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(data[i + 4].addr) values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(data[i + 8].addr) values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(data[i + 12].addr) values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_and_si128(values0, values1) values01 = mm_and_si128(values0, values1)
values23 = mm_and_si128(values2, values3) values23 = mm_and_si128(values2, values3)
values0123 = mm_and_si128(values01, values23) values0123 = mm_and_si128(values01, values23)
eq = mm_cmpeq_epi8(values0123, vec255) eq = mm_cmpeq_epi8(values0123, vec255)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888: if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
return false return false
i += 16 p += 64
i += 16 * iterations
for i in i ..< start + len: for i in i ..< start + len:
if data[i].a != 255: if data[i].a != 255: