Merge pull request #491 from treeform/guzba
update bindings, fix hasSimd macro + comments
This commit is contained in:
commit
0b32a1d12e
|
@ -117,13 +117,18 @@ exportRefObject Image:
|
|||
setColor
|
||||
fill(Image, Color)
|
||||
fill(Image, Paint)
|
||||
isOneColor(Image)
|
||||
isTransparent(Image)
|
||||
isOpaque(Image)
|
||||
flipHorizontal
|
||||
flipVertical
|
||||
rotate90
|
||||
subImage
|
||||
minifyBy2(Image, int)
|
||||
magnifyBy2(Image, int)
|
||||
applyOpacity(Image, float32)
|
||||
invert(Image)
|
||||
ceil(Image)
|
||||
blur(Image, float32, Color)
|
||||
resize(Image, int, int)
|
||||
shadow(Image, Vec2, float32, float32, Color)
|
||||
|
@ -298,6 +303,7 @@ exportProcs:
|
|||
rotate(float32)
|
||||
scale(float32, float32)
|
||||
inverse(Matrix3)
|
||||
snapToPixels
|
||||
|
||||
writeFiles("bindings/generated", "Pixie")
|
||||
|
||||
|
|
|
@ -415,7 +415,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
|||
# Set src as this result for if we do another power
|
||||
src = result
|
||||
|
||||
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i =
|
||||
template applyCoverage(rgbxVec: M256i, coverage: M128i): M256i =
|
||||
## Unpack the first 8 coverage bytes.
|
||||
let
|
||||
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)
|
||||
|
|
|
@ -63,46 +63,58 @@ macro simd*(procedure: untyped) =
|
|||
macro hasSimd*(procedure: untyped) =
|
||||
let
|
||||
name = procedure.procName()
|
||||
originalBody = procedure[6]
|
||||
nameNeon = name & "Neon"
|
||||
nameSse2 = name & "Sse2"
|
||||
nameAvx = name & "Avx"
|
||||
nameAvx2 = name & "Avx2"
|
||||
callNeon = callAndReturn(ident(nameNeon), procedure)
|
||||
callSse2 = callAndReturn(ident(nameSse2), procedure)
|
||||
callAvx = callAndReturn(ident(nameAvx), procedure)
|
||||
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
|
||||
|
||||
var
|
||||
foundSimd: bool
|
||||
body = newStmtList()
|
||||
|
||||
if procedure[6].kind != nnkStmtList:
|
||||
error("hasSimd proc body must start with nnkStmtList")
|
||||
|
||||
var insertIdx = 0
|
||||
if procedure[6][0].kind == nnkCommentStmt:
|
||||
insertIdx = 1
|
||||
|
||||
when defined(amd64) and not defined(pixieNoAvx):
|
||||
if nameAvx2 & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
body.add quote do:
|
||||
procedure[6].insert(insertIdx, quote do:
|
||||
if cpuHasAvx2:
|
||||
`callAvx2`
|
||||
|
||||
)
|
||||
inc insertIdx
|
||||
if nameAvx & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
body.add quote do:
|
||||
if cpuHasAvx2:
|
||||
procedure[6].insert(insertIdx, quote do:
|
||||
if cpuHasAvx:
|
||||
`callAvx`
|
||||
|
||||
if nameSse2 & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6]
|
||||
body.add quote do:
|
||||
`bodySse2`
|
||||
elif nameNeon & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6]
|
||||
body.add quote do:
|
||||
`bodyNeon`
|
||||
else:
|
||||
body.add quote do:
|
||||
`originalBody`
|
||||
|
||||
procedure[6] = body
|
||||
)
|
||||
inc insertIdx
|
||||
when defined(amd64):
|
||||
if nameSse2 & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
procedure[6].insert(insertIdx, quote do:
|
||||
`callSse2`
|
||||
)
|
||||
inc insertIdx
|
||||
while procedure[6].len > insertIdx:
|
||||
procedure[6].del(insertIdx)
|
||||
elif defined(arm64):
|
||||
if nameNeon & procSignature(procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
procedure[6].insert(insertIdx, quote do:
|
||||
`callNeon`
|
||||
)
|
||||
inc insertIdx
|
||||
while procedure[6].len > insertIdx:
|
||||
procedure[6].del(insertIdx)
|
||||
|
||||
when not defined(pixieNoSimd):
|
||||
if not foundSimd:
|
||||
|
|
|
@ -3,17 +3,17 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
|
|||
when defined(release):
|
||||
{.push checks: off.}
|
||||
|
||||
template multiplyDiv255*(c, a: uint8x8): uint8x8 =
|
||||
template multiplyDiv255(c, a: uint8x8): uint8x8 =
|
||||
let ca = vmull_u8(c, a)
|
||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||
|
||||
template multiplyDiv255*(c, a: uint8x16): uint8x16 =
|
||||
template multiplyDiv255(c, a: uint8x16): uint8x16 =
|
||||
vcombine_u8(
|
||||
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
|
||||
multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
|
||||
)
|
||||
|
||||
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 =
|
||||
template blendNormalSimd(backdrop, source: uint8x16x4): uint8x16x4 =
|
||||
let multiplier = vsubq_u8(vec255, source.val[3])
|
||||
|
||||
var blended: uint8x16x4
|
||||
|
|
|
@ -10,7 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
|||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||
|
||||
template blendNormalSimd*(backdrop, source: M128i): M128i =
|
||||
template blendNormalSimd(backdrop, source: M128i): M128i =
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
|
@ -30,7 +30,7 @@ template blendNormalSimd*(backdrop, source: M128i): M128i =
|
|||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
template blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||
template blendMaskSimd(backdrop, source: M128i): M128i =
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
|
@ -507,7 +507,7 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
|||
result.width * 4
|
||||
)
|
||||
|
||||
template applyCoverage*(rgbxVec, coverage: M128i): M128i =
|
||||
template applyCoverage(rgbxVec, coverage: M128i): M128i =
|
||||
## Unpack the first 4 coverage bytes.
|
||||
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
|
||||
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)
|
||||
|
|
Loading…
Reference in a new issue