update simd macro to just call procs always

This commit is contained in:
Ryan Oldenburg 2022-08-02 09:57:24 -05:00
parent 52fb3b3928
commit 8b0fdf435f
4 changed files with 41 additions and 29 deletions

View file

@ -415,7 +415,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
# Set src as this result for if we do another power # Set src as this result for if we do another power
src = result src = result
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i = template applyCoverage(rgbxVec: M256i, coverage: M128i): M256i =
## Unpack the first 8 coverage bytes. ## Unpack the first 8 coverage bytes.
let let
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle) unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)

View file

@ -63,46 +63,58 @@ macro simd*(procedure: untyped) =
macro hasSimd*(procedure: untyped) = macro hasSimd*(procedure: untyped) =
let let
name = procedure.procName() name = procedure.procName()
originalBody = procedure[6]
nameNeon = name & "Neon" nameNeon = name & "Neon"
nameSse2 = name & "Sse2" nameSse2 = name & "Sse2"
nameAvx = name & "Avx" nameAvx = name & "Avx"
nameAvx2 = name & "Avx2" nameAvx2 = name & "Avx2"
callNeon = callAndReturn(ident(nameNeon), procedure)
callSse2 = callAndReturn(ident(nameSse2), procedure)
callAvx = callAndReturn(ident(nameAvx), procedure) callAvx = callAndReturn(ident(nameAvx), procedure)
callAvx2 = callAndReturn(ident(nameAvx2), procedure) callAvx2 = callAndReturn(ident(nameAvx2), procedure)
var var
foundSimd: bool foundSimd: bool
body = newStmtList()
if procedure[6].kind != nnkStmtList:
error("hasSimd proc body must start with nnkStmtList")
var insertIdx = 0
if procedure[6][0].kind == nnkCommentStmt:
insertIdx = 1
when defined(amd64) and not defined(pixieNoAvx): when defined(amd64) and not defined(pixieNoAvx):
if nameAvx2 & procSignature(procedure) in simdProcs: if nameAvx2 & procSignature(procedure) in simdProcs:
foundSimd = true foundSimd = true
body.add quote do: procedure[6].insert(insertIdx, quote do:
if cpuHasAvx2: if cpuHasAvx2:
`callAvx2` `callAvx2`
)
inc insertIdx
if nameAvx & procSignature(procedure) in simdProcs: if nameAvx & procSignature(procedure) in simdProcs:
foundSimd = true foundSimd = true
body.add quote do: procedure[6].insert(insertIdx, quote do:
if cpuHasAvx2: if cpuHasAvx:
`callAvx` `callAvx`
)
if nameSse2 & procSignature(procedure) in simdProcs: inc insertIdx
foundSimd = true when defined(amd64):
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6] if nameSse2 & procSignature(procedure) in simdProcs:
body.add quote do: foundSimd = true
`bodySse2` procedure[6].insert(insertIdx, quote do:
elif nameNeon & procSignature(procedure) in simdProcs: `callSse2`
foundSimd = true )
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6] inc insertIdx
body.add quote do: while procedure[6].len > insertIdx:
`bodyNeon` procedure[6].del(insertIdx)
else: elif defined(arm64):
body.add quote do: if nameNeon & procSignature(procedure) in simdProcs:
`originalBody` foundSimd = true
procedure[6].insert(insertIdx, quote do:
procedure[6] = body `callNeon`
)
inc insertIdx
while procedure[6].len > insertIdx:
procedure[6].del(insertIdx)
when not defined(pixieNoSimd): when not defined(pixieNoSimd):
if not foundSimd: if not foundSimd:

View file

@ -3,17 +3,17 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
when defined(release): when defined(release):
{.push checks: off.} {.push checks: off.}
template multiplyDiv255*(c, a: uint8x8): uint8x8 = template multiplyDiv255(c, a: uint8x8): uint8x8 =
let ca = vmull_u8(c, a) let ca = vmull_u8(c, a)
vraddhn_u16(ca, vrshrq_n_u16(ca, 8)) vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
template multiplyDiv255*(c, a: uint8x16): uint8x16 = template multiplyDiv255(c, a: uint8x16): uint8x16 =
vcombine_u8( vcombine_u8(
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)), multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
multiplyDiv255(vget_high_u8(c), vget_high_u8(a)) multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
) )
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 = template blendNormalSimd(backdrop, source: uint8x16x4): uint8x16x4 =
let multiplier = vsubq_u8(vec255, source.val[3]) let multiplier = vsubq_u8(vec255, source.val[3])
var blended: uint8x16x4 var blended: uint8x16x4

View file

@ -10,7 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128()) finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
cast[ColorRGBX](mm_cvtsi128_si32(finalColor)) cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
template blendNormalSimd*(backdrop, source: M128i): M128i = template blendNormalSimd(backdrop, source: M128i): M128i =
var var
sourceAlpha = mm_and_si128(source, alphaMask) sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8) backdropEven = mm_slli_epi16(backdrop, 8)
@ -30,7 +30,7 @@ template blendNormalSimd*(backdrop, source: M128i): M128i =
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
) )
template blendMaskSimd*(backdrop, source: M128i): M128i = template blendMaskSimd(backdrop, source: M128i): M128i =
var var
sourceAlpha = mm_and_si128(source, alphaMask) sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8) backdropEven = mm_slli_epi16(backdrop, 8)
@ -507,7 +507,7 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
result.width * 4 result.width * 4
) )
template applyCoverage*(rgbxVec, coverage: M128i): M128i = template applyCoverage(rgbxVec, coverage: M128i): M128i =
## Unpack the first 4 coverage bytes. ## Unpack the first 4 coverage bytes.
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage) var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked) unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)