Merge pull request #491 from treeform/guzba
update bindings, fix hasSimd macro + comments
This commit is contained in:
commit
0b32a1d12e
5 changed files with 47 additions and 29 deletions
|
@ -117,13 +117,18 @@ exportRefObject Image:
|
||||||
setColor
|
setColor
|
||||||
fill(Image, Color)
|
fill(Image, Color)
|
||||||
fill(Image, Paint)
|
fill(Image, Paint)
|
||||||
|
isOneColor(Image)
|
||||||
|
isTransparent(Image)
|
||||||
|
isOpaque(Image)
|
||||||
flipHorizontal
|
flipHorizontal
|
||||||
flipVertical
|
flipVertical
|
||||||
|
rotate90
|
||||||
subImage
|
subImage
|
||||||
minifyBy2(Image, int)
|
minifyBy2(Image, int)
|
||||||
magnifyBy2(Image, int)
|
magnifyBy2(Image, int)
|
||||||
applyOpacity(Image, float32)
|
applyOpacity(Image, float32)
|
||||||
invert(Image)
|
invert(Image)
|
||||||
|
ceil(Image)
|
||||||
blur(Image, float32, Color)
|
blur(Image, float32, Color)
|
||||||
resize(Image, int, int)
|
resize(Image, int, int)
|
||||||
shadow(Image, Vec2, float32, float32, Color)
|
shadow(Image, Vec2, float32, float32, Color)
|
||||||
|
@ -298,6 +303,7 @@ exportProcs:
|
||||||
rotate(float32)
|
rotate(float32)
|
||||||
scale(float32, float32)
|
scale(float32, float32)
|
||||||
inverse(Matrix3)
|
inverse(Matrix3)
|
||||||
|
snapToPixels
|
||||||
|
|
||||||
writeFiles("bindings/generated", "Pixie")
|
writeFiles("bindings/generated", "Pixie")
|
||||||
|
|
||||||
|
|
|
@ -415,7 +415,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
||||||
# Set src as this result for if we do another power
|
# Set src as this result for if we do another power
|
||||||
src = result
|
src = result
|
||||||
|
|
||||||
template applyCoverage*(rgbxVec: M256i, coverage: M128i): M256i =
|
template applyCoverage(rgbxVec: M256i, coverage: M128i): M256i =
|
||||||
## Unpack the first 8 coverage bytes.
|
## Unpack the first 8 coverage bytes.
|
||||||
let
|
let
|
||||||
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)
|
unpacked0 = mm_shuffle_epi8(coverage, coverageShuffle)
|
||||||
|
|
|
@ -63,46 +63,58 @@ macro simd*(procedure: untyped) =
|
||||||
macro hasSimd*(procedure: untyped) =
|
macro hasSimd*(procedure: untyped) =
|
||||||
let
|
let
|
||||||
name = procedure.procName()
|
name = procedure.procName()
|
||||||
originalBody = procedure[6]
|
|
||||||
nameNeon = name & "Neon"
|
nameNeon = name & "Neon"
|
||||||
nameSse2 = name & "Sse2"
|
nameSse2 = name & "Sse2"
|
||||||
nameAvx = name & "Avx"
|
nameAvx = name & "Avx"
|
||||||
nameAvx2 = name & "Avx2"
|
nameAvx2 = name & "Avx2"
|
||||||
|
callNeon = callAndReturn(ident(nameNeon), procedure)
|
||||||
|
callSse2 = callAndReturn(ident(nameSse2), procedure)
|
||||||
callAvx = callAndReturn(ident(nameAvx), procedure)
|
callAvx = callAndReturn(ident(nameAvx), procedure)
|
||||||
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
|
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
|
||||||
|
|
||||||
var
|
var
|
||||||
foundSimd: bool
|
foundSimd: bool
|
||||||
body = newStmtList()
|
|
||||||
|
if procedure[6].kind != nnkStmtList:
|
||||||
|
error("hasSimd proc body must start with nnkStmtList")
|
||||||
|
|
||||||
|
var insertIdx = 0
|
||||||
|
if procedure[6][0].kind == nnkCommentStmt:
|
||||||
|
insertIdx = 1
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoAvx):
|
when defined(amd64) and not defined(pixieNoAvx):
|
||||||
if nameAvx2 & procSignature(procedure) in simdProcs:
|
if nameAvx2 & procSignature(procedure) in simdProcs:
|
||||||
foundSimd = true
|
foundSimd = true
|
||||||
body.add quote do:
|
procedure[6].insert(insertIdx, quote do:
|
||||||
if cpuHasAvx2:
|
if cpuHasAvx2:
|
||||||
`callAvx2`
|
`callAvx2`
|
||||||
|
)
|
||||||
|
inc insertIdx
|
||||||
if nameAvx & procSignature(procedure) in simdProcs:
|
if nameAvx & procSignature(procedure) in simdProcs:
|
||||||
foundSimd = true
|
foundSimd = true
|
||||||
body.add quote do:
|
procedure[6].insert(insertIdx, quote do:
|
||||||
if cpuHasAvx2:
|
if cpuHasAvx:
|
||||||
`callAvx`
|
`callAvx`
|
||||||
|
)
|
||||||
if nameSse2 & procSignature(procedure) in simdProcs:
|
inc insertIdx
|
||||||
foundSimd = true
|
when defined(amd64):
|
||||||
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6]
|
if nameSse2 & procSignature(procedure) in simdProcs:
|
||||||
body.add quote do:
|
foundSimd = true
|
||||||
`bodySse2`
|
procedure[6].insert(insertIdx, quote do:
|
||||||
elif nameNeon & procSignature(procedure) in simdProcs:
|
`callSse2`
|
||||||
foundSimd = true
|
)
|
||||||
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6]
|
inc insertIdx
|
||||||
body.add quote do:
|
while procedure[6].len > insertIdx:
|
||||||
`bodyNeon`
|
procedure[6].del(insertIdx)
|
||||||
else:
|
elif defined(arm64):
|
||||||
body.add quote do:
|
if nameNeon & procSignature(procedure) in simdProcs:
|
||||||
`originalBody`
|
foundSimd = true
|
||||||
|
procedure[6].insert(insertIdx, quote do:
|
||||||
procedure[6] = body
|
`callNeon`
|
||||||
|
)
|
||||||
|
inc insertIdx
|
||||||
|
while procedure[6].len > insertIdx:
|
||||||
|
procedure[6].del(insertIdx)
|
||||||
|
|
||||||
when not defined(pixieNoSimd):
|
when not defined(pixieNoSimd):
|
||||||
if not foundSimd:
|
if not foundSimd:
|
||||||
|
|
|
@ -3,17 +3,17 @@ import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
template multiplyDiv255*(c, a: uint8x8): uint8x8 =
|
template multiplyDiv255(c, a: uint8x8): uint8x8 =
|
||||||
let ca = vmull_u8(c, a)
|
let ca = vmull_u8(c, a)
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||||
|
|
||||||
template multiplyDiv255*(c, a: uint8x16): uint8x16 =
|
template multiplyDiv255(c, a: uint8x16): uint8x16 =
|
||||||
vcombine_u8(
|
vcombine_u8(
|
||||||
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
|
multiplyDiv255(vget_low_u8(c), vget_low_u8(a)),
|
||||||
multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
|
multiplyDiv255(vget_high_u8(c), vget_high_u8(a))
|
||||||
)
|
)
|
||||||
|
|
||||||
template blendNormalSimd*(backdrop, source: uint8x16x4): uint8x16x4 =
|
template blendNormalSimd(backdrop, source: uint8x16x4): uint8x16x4 =
|
||||||
let multiplier = vsubq_u8(vec255, source.val[3])
|
let multiplier = vsubq_u8(vec255, source.val[3])
|
||||||
|
|
||||||
var blended: uint8x16x4
|
var blended: uint8x16x4
|
||||||
|
|
|
@ -10,7 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||||
|
|
||||||
template blendNormalSimd*(backdrop, source: M128i): M128i =
|
template blendNormalSimd(backdrop, source: M128i): M128i =
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -30,7 +30,7 @@ template blendNormalSimd*(backdrop, source: M128i): M128i =
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
template blendMaskSimd*(backdrop, source: M128i): M128i =
|
template blendMaskSimd(backdrop, source: M128i): M128i =
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -507,7 +507,7 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
template applyCoverage*(rgbxVec, coverage: M128i): M128i =
|
template applyCoverage(rgbxVec, coverage: M128i): M128i =
|
||||||
## Unpack the first 4 coverage bytes.
|
## Unpack the first 4 coverage bytes.
|
||||||
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
|
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
|
||||||
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)
|
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)
|
||||||
|
|
Loading…
Reference in a new issue