This commit is contained in:
Ryan Oldenburg 2022-06-22 14:57:04 -05:00
parent 3bdc6c3266
commit 1cfaea9357
2 changed files with 49 additions and 5 deletions

View file

@ -3,7 +3,8 @@ import chroma, common, system/memory, vmath
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
when defined(amd64) and allowSimd:
import nimsimd/sse2
import nimsimd/runtimecheck, nimsimd/sse2, simd/avx
let cpuHasAvx* = checkInstructionSets({AVX})
template currentExceptionAsPixieError*(): untyped =
## Gets the current exception and returns it as a PixieError with stack trace.
@ -63,6 +64,13 @@ proc fillUnsafe*(
## Fills the image data with the color starting at index start and
## continuing for len indices.
let rgbx = color.asRgbx()
# If we can use AVX, do so
when defined(amd64) and allowSimd:
if cpuHasAvx and len >= 64:
fillUnsafeAvx(data, rgbx, start, len)
return
# Use memset when every byte has the same value
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
@ -70,14 +78,15 @@ proc fillUnsafe*(
var i = start
when defined(amd64) and allowSimd:
# Align to 16 bytes
while i < (start + len) and (cast[uint](data[i].addr) and 15) != 0:
var p = cast[uint](data[i].addr)
while i < (start + len) and (p and 15) != 0:
data[i] = rgbx
inc i
p += 4
# When supported, SIMD fill until we run out of room
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
iterations = (start + len - i) div 8
var p = cast[uint](data[i].addr)
for _ in 0 ..< iterations:
mm_store_si128(cast[pointer](p), colorVec)
mm_store_si128(cast[pointer](p + 16), colorVec)
@ -93,8 +102,8 @@ proc fillUnsafe*(
copyMem(data[i].addr, u64.addr, 8)
i += 2
# Fill whatever is left the slow way
for j in i ..< start + len:
data[j] = rgbx
for i in i ..< start + len:
data[i] = rgbx
const straightAlphaTable = block:
var table: array[256, array[256, uint8]]

35
src/pixie/simd/avx.nim Normal file
View file

@ -0,0 +1,35 @@
import chroma, nimsimd/avx
when defined(gcc) or defined(clang):
{.localPassc: "-mavx".}
when defined(release):
{.push checks: off.}
proc fillUnsafeAvx*(
data: var seq[ColorRGBX],
rgbx: ColorRGBX,
start, len: int
) =
var
i = start
p = cast[uint](data[i].addr)
# Align to 32 bytes
while i < (start + len) and (p and 31) != 0:
data[i] = rgbx
inc i
p += 4
# When supported, SIMD fill until we run out of room
let
iterations = (start + len - i) div 8
colorVec = mm256_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< iterations:
mm256_store_si256(cast[pointer](p), colorVec)
p += 32
i += iterations * 8
# Fill whatever is left the slow way
for i in i ..< start + len:
data[i] = rgbx
when defined(release):
{.pop.}