fillUnsafeSimd

This commit is contained in:
Ryan Oldenburg 2022-06-30 09:49:42 -05:00
parent aaead7ed84
commit 9644894903
3 changed files with 40 additions and 33 deletions

View file

@ -80,11 +80,7 @@ proc fillUnsafe*(
## Fills the image data with the color starting at index start and
## continuing for len indices.
when allowSimd and compiles(fillUnsafeSimd):
fillUnsafeSimd(
cast[ptr UncheckedArray[ColorRGBX]](data[start].addr),
len,
color
)
fillUnsafeSimd(data, start, len, color)
return
let rgbx = color.asRgbx()

View file

@ -7,25 +7,30 @@ when defined(release):
{.push checks: off.}
proc fillUnsafeAvx*(
data: ptr UncheckedArray[ColorRGBX],
len: int,
data: var seq[ColorRGBX],
start, len: int,
color: SomeColor
) =
let rgbx = color.asRgbx()
var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
var
i = start
p = cast[uint](data[i].addr)
# Align to 32 bytes
while i < (start + len) and (p and 31) != 0:
data[i] = rgbx
inc i
p += 4
let
iterations = (len - i) div 8
iterations = (start + len - i) div 8
colorVec = mm256_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< iterations:
mm256_store_si256(data[i].addr, colorVec)
i += 8
# Fill whatever is left the slow way
for i in i ..< len:
mm256_store_si256(cast[pointer](p), colorVec)
p += 32
i += 8 * iterations
for i in i ..< start + len:
data[i] = rgbx
when defined(release):

View file

@ -31,30 +31,36 @@ when defined(amd64):
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
proc fillUnsafeSimd*(
data: ptr UncheckedArray[ColorRGBX],
len: int,
data: var seq[ColorRGBX],
start, len: int,
color: SomeColor
) =
if cpuHasAvx and len >= 64:
fillUnsafeAvx(data, len, color)
else:
let rgbx = color.asRgbx()
if cpuHasAvx:
fillUnsafeAvx(data, start, len, color)
return
var i: int
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes
data[i] = rgbx
inc i
let rgbx = color.asRgbx()
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
iterations = (len - i) div 8
for _ in 0 ..< iterations:
mm_store_si128(data[i].addr, colorVec)
mm_store_si128(data[i + 4].addr, colorVec)
i += 8
var
i = start
p = cast[uint](data[i].addr)
# Align to 16 bytes
while i < (start + len) and (p and 15) != 0:
data[i] = rgbx
inc i
p += 4
for i in i ..< len:
data[i] = rgbx
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
iterations = (start + len - i) div 8
for _ in 0 ..< iterations:
mm_store_si128(cast[pointer](p), colorVec)
mm_store_si128(cast[pointer](p + 16), colorVec)
p += 32
i += iterations * 8
for i in i ..< start + len:
data[i] = rgbx
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
if cpuHasAvx2: