Merge pull request #491 from treeform/guzba

update bindings, fix hasSimd macro + comments
This commit is contained in:
Andre von Houck 2022-08-02 11:34:21 -07:00 committed by GitHub
commit 0b32a1d12e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 29 deletions

View file

@ -117,13 +117,18 @@ exportRefObject Image:
setColor setColor
fill(Image, Color) fill(Image, Color)
fill(Image, Paint) fill(Image, Paint)
isOneColor(Image)
isTransparent(Image)
isOpaque(Image)
flipHorizontal flipHorizontal
flipVertical flipVertical
rotate90
subImage subImage
minifyBy2(Image, int) minifyBy2(Image, int)
magnifyBy2(Image, int) magnifyBy2(Image, int)
applyOpacity(Image, float32) applyOpacity(Image, float32)
invert(Image) invert(Image)
ceil(Image)
blur(Image, float32, Color) blur(Image, float32, Color)
resize(Image, int, int) resize(Image, int, int)
shadow(Image, Vec2, float32, float32, Color) shadow(Image, Vec2, float32, float32, Color)
@ -298,6 +303,7 @@ exportProcs:
rotate(float32) rotate(float32)
scale(float32, float32) scale(float32, float32)
inverse(Matrix3) inverse(Matrix3)
snapToPixels
writeFiles("bindings/generated", "Pixie") writeFiles("bindings/generated", "Pixie")

View file

@ -415,7 +415,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
# Set src as this result for if we do another power # Set src as this result for if we do another power
src = result src = result
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i = template applyCoverage(rgbxVec: M256i, coverage: M128i): M256i =
## Unpack the first 8 coverage bytes. ## Unpack the first 8 coverage bytes.
let let
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle) unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)

View file

@ -63,46 +63,58 @@ macro simd*(procedure: untyped) =
macro hasSimd*(procedure: untyped) = macro hasSimd*(procedure: untyped) =
let let
name = procedure.procName() name = procedure.procName()
originalBody = procedure[6]
nameNeon = name & "Neon" nameNeon = name & "Neon"
nameSse2 = name & "Sse2" nameSse2 = name & "Sse2"
nameAvx = name & "Avx" nameAvx = name & "Avx"
nameAvx2 = name & "Avx2" nameAvx2 = name & "Avx2"
callNeon = callAndReturn(ident(nameNeon), procedure)
callSse2 = callAndReturn(ident(nameSse2), procedure)
callAvx = callAndReturn(ident(nameAvx), procedure) callAvx = callAndReturn(ident(nameAvx), procedure)
callAvx2 = callAndReturn(ident(nameAvx2), procedure) callAvx2 = callAndReturn(ident(nameAvx2), procedure)
var var
foundSimd: bool foundSimd: bool
body = newStmtList()
if procedure[6].kind != nnkStmtList:
error("hasSimd proc body must start with nnkStmtList")
var insertIdx = 0
if procedure[6][0].kind == nnkCommentStmt:
insertIdx = 1
when defined(amd64) and not defined(pixieNoAvx): when defined(amd64) and not defined(pixieNoAvx):
if nameAvx2 & procSignature(procedure) in simdProcs: if nameAvx2 & procSignature(procedure) in simdProcs:
foundSimd = true foundSimd = true
body.add quote do: procedure[6].insert(insertIdx, quote do:
if cpuHasAvx2: if cpuHasAvx2:
`callAvx2` `callAvx2`
)
inc insertIdx
if nameAvx & procSignature(procedure) in simdProcs: if nameAvx & procSignature(procedure) in simdProcs:
foundSimd = true foundSimd = true
body.add quote do: procedure[6].insert(insertIdx, quote do:
if cpuHasAvx2: if cpuHasAvx:
`callAvx` `callAvx`
)
if nameSse2 & procSignature(procedure) in simdProcs: inc insertIdx
foundSimd = true when defined(amd64):
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6] if nameSse2 & procSignature(procedure) in simdProcs:
body.add quote do: foundSimd = true
`bodySse2` procedure[6].insert(insertIdx, quote do:
elif nameNeon & procSignature(procedure) in simdProcs: `callSse2`
foundSimd = true )
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6] inc insertIdx
body.add quote do: while procedure[6].len > insertIdx:
`bodyNeon` procedure[6].del(insertIdx)
else: elif defined(arm64):
body.add quote do: if nameNeon & procSignature(procedure) in simdProcs:
`originalBody` foundSimd = true
procedure[6].insert(insertIdx, quote do:
procedure[6] = body `callNeon`
)
inc insertIdx
while procedure[6].len > insertIdx:
procedure[6].del(insertIdx)
when not defined(pixieNoSimd): when not defined(pixieNoSimd):
if not foundSimd: if not foundSimd:

View file

@ -3,17 +3,17 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
when defined(release): when defined(release):
{.push checks: off.} {.push checks: off.}
template multiplyDiv255*(c, a: uint8x8): uint8x8 = template multiplyDiv255(c, a: uint8x8): uint8x8 =
let ca = vmull_u8(c, a) let ca = vmull_u8(c, a)
vraddhn_u16(ca, vrshrq_n_u16(ca, 8)) vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
template multiplyDiv255*(c, a: uint8x16): uint8x16 = template multiplyDiv255(c, a: uint8x16): uint8x16 =
vcombine_u8( vcombine_u8(
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)), multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
multiplyDiv255(vget_high_u8(c), vget_high_u8(a)) multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
) )
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 = template blendNormalSimd(backdrop, source: uint8x16x4): uint8x16x4 =
let multiplier = vsubq_u8(vec255, source.val[3]) let multiplier = vsubq_u8(vec255, source.val[3])
var blended: uint8x16x4 var blended: uint8x16x4

View file

@ -10,7 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128()) finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
cast[ColorRGBX](mm_cvtsi128_si32(finalColor)) cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
template blendNormalSimd*(backdrop, source: M128i): M128i = template blendNormalSimd(backdrop, source: M128i): M128i =
var var
sourceAlpha = mm_and_si128(source, alphaMask) sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8) backdropEven = mm_slli_epi16(backdrop, 8)
@ -30,7 +30,7 @@ template blendNormalSimd*(backdrop, source: M128i): M128i =
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
) )
template blendMaskSimd*(backdrop, source: M128i): M128i = template blendMaskSimd(backdrop, source: M128i): M128i =
var var
sourceAlpha = mm_and_si128(source, alphaMask) sourceAlpha = mm_and_si128(source, alphaMask)
backdropEven = mm_slli_epi16(backdrop, 8) backdropEven = mm_slli_epi16(backdrop, 8)
@ -507,7 +507,7 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
result.width * 4 result.width * 4
) )
template applyCoverage*(rgbxVec, coverage: M128i): M128i = template applyCoverage(rgbxVec, coverage: M128i): M128i =
## Unpack the first 4 coverage bytes. ## Unpack the first 4 coverage bytes.
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage) var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked) unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)