fillUnsafeSimd
This commit is contained in:
parent
aaead7ed84
commit
9644894903
3 changed files with 40 additions and 33 deletions
|
@ -80,11 +80,7 @@ proc fillUnsafe*(
|
||||||
## Fills the image data with the color starting at index start and
|
## Fills the image data with the color starting at index start and
|
||||||
## continuing for len indices.
|
## continuing for len indices.
|
||||||
when allowSimd and compiles(fillUnsafeSimd):
|
when allowSimd and compiles(fillUnsafeSimd):
|
||||||
fillUnsafeSimd(
|
fillUnsafeSimd(data, start, len, color)
|
||||||
cast[ptr UncheckedArray[ColorRGBX]](data[start].addr),
|
|
||||||
len,
|
|
||||||
color
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
let rgbx = color.asRgbx()
|
let rgbx = color.asRgbx()
|
||||||
|
|
|
@ -7,25 +7,30 @@ when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
proc fillUnsafeAvx*(
|
proc fillUnsafeAvx*(
|
||||||
data: ptr UncheckedArray[ColorRGBX],
|
data: var seq[ColorRGBX],
|
||||||
len: int,
|
start, len: int,
|
||||||
color: SomeColor
|
color: SomeColor
|
||||||
) =
|
) =
|
||||||
let rgbx = color.asRgbx()
|
let rgbx = color.asRgbx()
|
||||||
|
|
||||||
var i: int
|
var
|
||||||
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
|
i = start
|
||||||
|
p = cast[uint](data[i].addr)
|
||||||
|
# Align to 32 bytes
|
||||||
|
while i < (start + len) and (p and 31) != 0:
|
||||||
data[i] = rgbx
|
data[i] = rgbx
|
||||||
inc i
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
let
|
let
|
||||||
iterations = (len - i) div 8
|
iterations = (start + len - i) div 8
|
||||||
colorVec = mm256_set1_epi32(cast[int32](rgbx))
|
colorVec = mm256_set1_epi32(cast[int32](rgbx))
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
mm256_store_si256(data[i].addr, colorVec)
|
mm256_store_si256(cast[pointer](p), colorVec)
|
||||||
i += 8
|
p += 32
|
||||||
# Fill whatever is left the slow way
|
i += 8 * iterations
|
||||||
for i in i ..< len:
|
|
||||||
|
for i in i ..< start + len:
|
||||||
data[i] = rgbx
|
data[i] = rgbx
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
|
|
|
@ -31,29 +31,35 @@ when defined(amd64):
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||||
|
|
||||||
proc fillUnsafeSimd*(
|
proc fillUnsafeSimd*(
|
||||||
data: ptr UncheckedArray[ColorRGBX],
|
data: var seq[ColorRGBX],
|
||||||
len: int,
|
start, len: int,
|
||||||
color: SomeColor
|
color: SomeColor
|
||||||
) =
|
) =
|
||||||
if cpuHasAvx and len >= 64:
|
if cpuHasAvx:
|
||||||
fillUnsafeAvx(data, len, color)
|
fillUnsafeAvx(data, start, len, color)
|
||||||
else:
|
return
|
||||||
|
|
||||||
let rgbx = color.asRgbx()
|
let rgbx = color.asRgbx()
|
||||||
|
|
||||||
var i: int
|
var
|
||||||
while i < len and (cast[uint](data[i].addr) and 15) != 0: # Align to 16 bytes
|
i = start
|
||||||
|
p = cast[uint](data[i].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < (start + len) and (p and 15) != 0:
|
||||||
data[i] = rgbx
|
data[i] = rgbx
|
||||||
inc i
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
let
|
let
|
||||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
iterations = (len - i) div 8
|
iterations = (start + len - i) div 8
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
mm_store_si128(data[i].addr, colorVec)
|
mm_store_si128(cast[pointer](p), colorVec)
|
||||||
mm_store_si128(data[i + 4].addr, colorVec)
|
mm_store_si128(cast[pointer](p + 16), colorVec)
|
||||||
i += 8
|
p += 32
|
||||||
|
i += iterations * 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< start + len:
|
||||||
data[i] = rgbx
|
data[i] = rgbx
|
||||||
|
|
||||||
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
|
proc isOneColorSimd*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
|
||||||
|
|
Loading…
Reference in a new issue