neon line blends
This commit is contained in:
parent
0245eeebea
commit
6fb4b02eb6
2 changed files with 76 additions and 3 deletions
|
@ -10,7 +10,7 @@ requires "vmath >= 1.1.4"
|
||||||
requires "chroma >= 0.2.6"
|
requires "chroma >= 0.2.6"
|
||||||
requires "zippy >= 0.10.3"
|
requires "zippy >= 0.10.3"
|
||||||
requires "flatty >= 0.3.4"
|
requires "flatty >= 0.3.4"
|
||||||
requires "nimsimd >= 1.1.9"
|
requires "nimsimd >= 1.1.10"
|
||||||
requires "bumpy >= 1.1.1"
|
requires "bumpy >= 1.1.1"
|
||||||
|
|
||||||
task bindings, "Generate bindings":
|
task bindings, "Generate bindings":
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import chroma, internal, nimsimd/neon, pixie/common, vmath
|
import chroma, internal, nimsimd/neon, pixie/blends, pixie/common, vmath
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
@ -146,7 +146,7 @@ proc toPremultipliedAlphaNeon*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||||
inc i
|
inc i
|
||||||
p += 4
|
p += 4
|
||||||
|
|
||||||
proc premultiply(c, a: uint8x8): uint8x8 {.inline.} =
|
template premultiply(c, a: uint8x8): uint8x8 =
|
||||||
let ca = vmull_u8(c, a)
|
let ca = vmull_u8(c, a)
|
||||||
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||||
|
|
||||||
|
@ -408,5 +408,78 @@ proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc blitLineNormalNeon*(
|
||||||
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while (cast[uint](a[i].addr) and 15) != 0:
|
||||||
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let vec255 = vmov_n_u8(255)
|
||||||
|
while i < len - 8:
|
||||||
|
let
|
||||||
|
source = vld4_u8(b[i].addr)
|
||||||
|
eq255 = vceq_u8(source.val[3], vec255)
|
||||||
|
if vget_lane_u64(cast[uint64x1](eq255), 0) == uint64.high:
|
||||||
|
vst4_u8(a[i].addr, source)
|
||||||
|
else:
|
||||||
|
template multiply(c, a: uint8x8): uint8x8 =
|
||||||
|
let ca = vmull_u8(c, a)
|
||||||
|
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||||
|
|
||||||
|
let
|
||||||
|
backdrop = vld4_u8(a[i].addr)
|
||||||
|
multiplier = vsub_u8(vec255, source.val[3])
|
||||||
|
|
||||||
|
var blended: uint8x8x4
|
||||||
|
blended.val[0] = multiply(backdrop.val[0], multiplier)
|
||||||
|
blended.val[1] = multiply(backdrop.val[1], multiplier)
|
||||||
|
blended.val[2] = multiply(backdrop.val[2], multiplier)
|
||||||
|
blended.val[3] = multiply(backdrop.val[3], multiplier)
|
||||||
|
blended.val[0] = vadd_u8(blended.val[0], source.val[0])
|
||||||
|
blended.val[1] = vadd_u8(blended.val[1], source.val[1])
|
||||||
|
blended.val[2] = vadd_u8(blended.val[2], source.val[2])
|
||||||
|
blended.val[3] = vadd_u8(blended.val[3], source.val[3])
|
||||||
|
vst4_u8(a[i].addr, blended)
|
||||||
|
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
|
proc blitLineMaskNeon*(
|
||||||
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while (cast[uint](a[i].addr) and 15) != 0:
|
||||||
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let vec255 = vmov_n_u8(255)
|
||||||
|
while i < len - 8:
|
||||||
|
let
|
||||||
|
source = vld4_u8(b[i].addr)
|
||||||
|
eq255 = vceq_u8(source.val[3], vec255)
|
||||||
|
if vget_lane_u64(cast[uint64x1](eq255), 0) == uint64.high:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
template multiply(c, a: uint8x8): uint8x8 =
|
||||||
|
let ca = vmull_u8(c, a)
|
||||||
|
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
|
||||||
|
|
||||||
|
let backdrop = vld4_u8(a[i].addr)
|
||||||
|
var blended: uint8x8x4
|
||||||
|
blended.val[0] = multiply(backdrop.val[0], source.val[3])
|
||||||
|
blended.val[1] = multiply(backdrop.val[1], source.val[3])
|
||||||
|
blended.val[2] = multiply(backdrop.val[2], source.val[3])
|
||||||
|
blended.val[3] = multiply(backdrop.val[3], source.val[3])
|
||||||
|
vst4_u8(a[i].addr, blended)
|
||||||
|
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
Loading…
Reference in a new issue