fillUnsafeSimd
This commit is contained in:
parent
aaead7ed84
commit
9644894903
|
@ -80,11 +80,7 @@ proc fillUnsafe*(
|
|||
## Fills the image data with the color starting at index start and
|
||||
## continuing for len indices.
|
||||
when allowSimd and compiles(fillUnsafeSimd):
|
||||
fillUnsafeSimd(
|
||||
cast[ptr UncheckedArray[ColorRGBX]](data[start].addr),
|
||||
len,
|
||||
color
|
||||
)
|
||||
fillUnsafeSimd(data, start, len, color)
|
||||
return
|
||||
|
||||
let rgbx = color.asRgbx()
|
||||
|
|
|
@ -7,25 +7,30 @@ when defined(release):
|
|||
{.push checks: off.}
|
||||
|
||||
proc fillUnsafeAvx*(
|
||||
data: ptr UncheckedArray[ColorRGBX],
|
||||
len: int,
|
||||
data: var seq[ColorRGBX],
|
||||
start, len: int,
|
||||
color: SomeColor
|
||||
) =
|
||||
let rgbx = color.asRgbx()
|
||||
|
||||
var i: int
|
||||
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
|
||||
var
|
||||
i = start
|
||||
p = cast[uint](data[i].addr)
|
||||
# Align to 32 bytes
|
||||
while i < (start + len) and (p and 31) != 0:
|
||||
data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
iterations = (len - i) div 8
|
||||
iterations = (start + len - i) div 8
|
||||
colorVec = mm256_set1_epi32(cast[int32](rgbx))
|
||||
for _ in 0 ..< iterations:
|
||||
mm256_store_si256(data[i].addr, colorVec)
|
||||
i += 8
|
||||
# Fill whatever is left the slow way
|
||||
for i in i ..< len:
|
||||
mm256_store_si256(cast[pointer](p), colorVec)
|
||||
p += 32
|
||||
i += 8 * iterations
|
||||
|
||||
for i in i ..< start + len:
|
||||
data[i] = rgbx
|
||||
|
||||
when defined(release):
|
||||
|
|
|
@ -31,30 +31,36 @@ when defined(amd64):
|
|||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||
|
||||
proc fillUnsafeSimd*(
|
||||
data: ptr UncheckedArray[ColorRGBX],
|
||||
len: int,
|
||||
data: var seq[ColorRGBX],
|
||||
start, len: int,
|
||||
color: SomeColor
|
||||
) =
|
||||
if cpuHasAvx and len >= 64:
|
||||
fillUnsafeAvx(data, len, color)
|
||||
else:
|
||||
let rgbx = color.asRgbx()
|
||||
if cpuHasAvx:
|
||||
fillUnsafeAvx(data, start, len, color)
|
||||
return
|
||||
|
||||
var i: int
|
||||
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes
|
||||
data[i] = rgbx
|
||||
inc i
|
||||
let rgbx = color.asRgbx()
|
||||
|
||||
let
|
||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
iterations = (len - i) div 8
|
||||
for _ in 0 ..< iterations:
|
||||
mm_store_si128(data[i].addr, colorVec)
|
||||
mm_store_si128(data[i + 4].addr, colorVec)
|
||||
i += 8
|
||||
var
|
||||
i = start
|
||||
p = cast[uint](data[i].addr)
|
||||
# Align to 16 bytes
|
||||
while i < (start + len) and (p and 15) != 0:
|
||||
data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
for i in i ..< len:
|
||||
data[i] = rgbx
|
||||
let
|
||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
iterations = (start + len - i) div 8
|
||||
for _ in 0 ..< iterations:
|
||||
mm_store_si128(cast[pointer](p), colorVec)
|
||||
mm_store_si128(cast[pointer](p + 16), colorVec)
|
||||
p += 32
|
||||
i += iterations * 8
|
||||
|
||||
for i in i ..< start + len:
|
||||
data[i] = rgbx
|
||||
|
||||
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
|
||||
if cpuHasAvx2:
|
||||
|
|
Loading…
Reference in a new issue