update simd macro to just call procs always

This commit is contained in:
Ryan Oldenburg 2022-08-02 09:57:24 -05:00
parent 52fb3b3928
commit 8b0fdf435f
4 changed files with 41 additions and 29 deletions

View file

@ -415,7 +415,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
# Set src as this result for if we do another power
src = result
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i =
template applyCoverage(rgbxVec: M256i, coverage: M128i): M256i =
## Unpack the first 8 coverage bytes.
let
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)

View file

@ -63,46 +63,58 @@ macro simd*(procedure: untyped) =
macro hasSimd*(procedure: untyped) =
let
name = procedure.procName()
originalBody = procedure[6]
nameNeon = name & "Neon"
nameSse2 = name & "Sse2"
nameAvx = name & "Avx"
nameAvx2 = name & "Avx2"
callNeon = callAndReturn(ident(nameNeon), procedure)
callSse2 = callAndReturn(ident(nameSse2), procedure)
callAvx = callAndReturn(ident(nameAvx), procedure)
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
var
foundSimd: bool
body = newStmtList()
if procedure[6].kind != nnkStmtList:
error("hasSimd proc body must start with nnkStmtList")
var insertIdx = 0
if procedure[6][0].kind == nnkCommentStmt:
insertIdx = 1
when defined(amd64) and not defined(pixieNoAvx):
if nameAvx2 & procSignature(procedure) in simdProcs:
foundSimd = true
body.add quote do:
procedure[6].insert(insertIdx, quote do:
if cpuHasAvx2:
`callAvx2`
)
inc insertIdx
if nameAvx & procSignature(procedure) in simdProcs:
foundSimd = true
body.add quote do:
if cpuHasAvx2:
procedure[6].insert(insertIdx, quote do:
if cpuHasAvx:
`callAvx`
if nameSse2 & procSignature(procedure) in simdProcs:
foundSimd = true
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6]
body.add quote do:
`bodySse2`
elif nameNeon & procSignature(procedure) in simdProcs:
foundSimd = true
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6]
body.add quote do:
`bodyNeon`
else:
body.add quote do:
`originalBody`
procedure[6] = body
)
inc insertIdx
when defined(amd64):
if nameSse2 & procSignature(procedure) in simdProcs:
foundSimd = true
procedure[6].insert(insertIdx, quote do:
`callSse2`
)
inc insertIdx
while procedure[6].len > insertIdx:
procedure[6].del(insertIdx)
elif defined(arm64):
if nameNeon & procSignature(procedure) in simdProcs:
foundSimd = true
procedure[6].insert(insertIdx, quote do:
`callNeon`
)
inc insertIdx
while procedure[6].len > insertIdx:
procedure[6].del(insertIdx)
when not defined(pixieNoSimd):
if not foundSimd:

View file

@ -3,17 +3,17 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
when defined(release):
{.push checks: off.}
template multiplyDiv255*(c, a: uint8x8): uint8x8 =
template multiplyDiv255(c, a: uint8x8): uint8x8 =
let ca = vmull_u8(c, a)
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
template multiplyDiv255*(c, a: uint8x16): uint8x16 =
template multiplyDiv255(c, a: uint8x16): uint8x16 =
vcombine_u8(
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
)
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 =
template blendNormalSimd(backdrop, source: uint8x16x4): uint8x16x4 =
let multiplier = vsubq_u8(vec255, source.val[3])
var blended: uint8x16x4

View file

@ -10,7 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
template blendNormalSimd*(backdrop, source: M128i): M128i =
template blendNormalSimd(backdrop, source: M128i): M128i =
var
sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8)
@ -30,7 +30,7 @@ template blendNormalSimd*(backdrop, source: M128i): M128i =
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
)
template blendMaskSimd*(backdrop, source: M128i): M128i =
template blendMaskSimd(backdrop, source: M128i): M128i =
var
sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8)
@ -507,7 +507,7 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
result.width * 4
)
template applyCoverage*(rgbxVec, coverage: M128i): M128i =
template applyCoverage(rgbxVec, coverage: M128i): M128i =
## Unpack the first 4 coverage bytes.
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)