isOneColorSimd isTransparentSimd

This commit is contained in:
Ryan Oldenburg 2022-06-30 10:48:30 -05:00
parent 3a41ff8e64
commit c4eadf31ed
3 changed files with 55 additions and 45 deletions

View file

@ -102,10 +102,7 @@ proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
proc isOneColor*(image: Image): bool {.raises: [].} =
## Checks if the entire image is the same color.
when allowSimd and compiles(isOneColorSimd):
return isOneColorSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
return isOneColorSimd(image.data)
result = true
@ -117,10 +114,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
proc isTransparent*(image: Image): bool {.raises: [].} =
## Checks if this image is fully transparent or not.
when allowSimd and compiles(isTransparentSimd):
return isTransparentSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
return isTransparentSimd(image.data)
result = true

View file

@ -6,20 +6,21 @@ when defined(gcc) or defined(clang):
when defined(release):
{.push checks: off.}
proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isOneColorAvx2*(data: var seq[ColorRGBX]): bool =
result = true
let color = data[0]
var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
# Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i] != color:
return false
inc i
let
colorVec = mm256_set1_epi32(cast[int32](color))
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm256_load_si256(data[i].addr)
@ -31,22 +32,23 @@ proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false
i += 16
for i in i ..< len:
for i in i ..< data.len:
if data[i] != color:
return false
proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isTransparentAvx2*(data: var seq[ColorRGBX]): bool =
result = true
var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
# Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i].a != 0:
return false
inc i
let
vecZero = mm256_setzero_si256()
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm256_load_si256(data[i].addr)
@ -57,7 +59,7 @@ proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false
i += 16
for i in i ..< len:
for i in i ..< data.len:
if data[i].a != 0:
return false

View file

@ -62,29 +62,33 @@ when defined(amd64):
for i in i ..< start + len:
data[i] = rgbx
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isOneColorSimd*(data: var seq[ColorRGBX]): bool =
if cpuHasAvx2:
return isOneColorAvx2(data, len)
return isOneColorAvx2(data)
result = true
let color = data[0]
var i: int
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes
var
i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
if data[i] != color:
return false
inc i
p += 4
let
colorVec = mm_set1_epi32(cast[int32](color))
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_load_si128(data[i].addr)
values1 = mm_load_si128(data[i + 4].addr)
values2 = mm_load_si128(data[i + 8].addr)
values3 = mm_load_si128(data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
eq0 = mm_cmpeq_epi8(values0, colorVec)
eq1 = mm_cmpeq_epi8(values1, colorVec)
eq2 = mm_cmpeq_epi8(values2, colorVec)
@ -92,41 +96,47 @@ when defined(amd64):
eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
if mm_movemask_epi8(eq0123) != 0xffff:
return false
i += 16
p += 64
i += 16 * iterations
for i in i ..< len:
for i in i ..< data.len:
if data[i] != color:
return false
proc isTransparentSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isTransparentSimd*(data: var seq[ColorRGBX]): bool =
if cpuHasAvx2:
return isTransparentAvx2(data, len)
return isTransparentAvx2(data)
var i: int
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes
var
i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
if data[i].a != 0:
return false
inc i
p += 4
result = true
let
vecZero = mm_setzero_si128()
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_load_si128(data[i].addr)
values1 = mm_load_si128(data[i + 4].addr)
values2 = mm_load_si128(data[i + 8].addr)
values3 = mm_load_si128(data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_or_si128(values0, values1)
values23 = mm_or_si128(values2, values3)
values0123 = mm_or_si128(values01, values23)
if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff:
return false
i += 16
p += 64
i += 16 * iterations
for i in i ..< len:
for i in i ..< data.len:
if data[i].a != 0:
return false
@ -136,29 +146,33 @@ when defined(amd64):
result = true
var i = start
var
i = start
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < (start + len) and (cast[uint](data[i].addr) and 15) != 0:
while i < (start + len) and (p and 15) != 0:
if data[i].a != 255:
return false
inc i
p += 4
let
vec255 = mm_set1_epi8(255)
iterations = (start + len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_load_si128(data[i].addr)
values1 = mm_load_si128(data[i + 4].addr)
values2 = mm_load_si128(data[i + 8].addr)
values3 = mm_load_si128(data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_and_si128(values0, values1)
values23 = mm_and_si128(values2, values3)
values0123 = mm_and_si128(values01, values23)
eq = mm_cmpeq_epi8(values0123, vec255)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
return false
i += 16
p += 64
i += 16 * iterations
for i in i ..< start + len:
if data[i].a != 255: