neon minifyBy2

This commit is contained in:
Ryan Oldenburg 2022-07-28 21:49:26 -05:00
parent 763a16bec4
commit 1239dbbf43

View file

@ -261,6 +261,104 @@ proc ceilNeon*(image: Image) {.simd.} =
rgbx.a = if rgbx.a == 0: 0 else: 255 rgbx.a = if rgbx.a == 0: 0 else: 255
image.data[i] = rgbx image.data[i] = rgbx
proc minifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
## Scales the image down by an integer scale.
if power < 0:
raise newException(PixieError, "Cannot minifyBy2 with negative power")
if power == 0:
return image.copy()
var src = image
for _ in 1 .. power:
# When minifying an image of odd size, round the result image size up
# so a 99 x 99 src image returns a 50 x 50 image.
let
srcWidthIsOdd = (src.width mod 2) != 0
srcHeightIsOdd = (src.height mod 2) != 0
resultEvenWidth = src.width div 2
resultEvenHeight = src.height div 2
result = newImage(
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
let
evenLanes = [0.uint8, 2, 4, 6, 255, 255, 255, 255]
tblIdx = vld1_u8(evenLanes.unsafeAddr)
for y in 0 ..< resultEvenHeight:
let
topRowStart = src.dataIndex(0, y * 2)
bottomRowStart = src.dataIndex(0, y * 2 + 1)
var x: int
while x <= resultEvenWidth - 9:
let
top = vld4_u8(src.data[topRowStart + x * 2].addr)
topNext = vld4_u8(src.data[topRowStart + x * 2 + 1].addr)
bottom = vld4_u8(src.data[bottomRowStart + x * 2].addr)
bottomNext = vld4_u8(src.data[bottomRowStart + x * 2 + 1].addr)
r = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[0], topNext.val[0]),
vaddl_u8(bottom.val[0], bottomNext.val[0])
), 2)
g = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[1], topNext.val[1]),
vaddl_u8(bottom.val[1], bottomNext.val[1])
), 2)
b = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[2], topNext.val[2]),
vaddl_u8(bottom.val[2], bottomNext.val[2])
), 2)
a = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[3], topNext.val[3]),
vaddl_u8(bottom.val[3], bottomNext.val[3])
), 2)
# The correct values are in the even lanes 0, 2, 4, 6
var correct: uint8x8x4
correct.val[0] = vtbl1_u8(r, tblIdx)
correct.val[1] = vtbl1_u8(g, tblIdx)
correct.val[2] = vtbl1_u8(b, tblIdx)
correct.val[3] = vtbl1_u8(a, tblIdx)
vst4_u8(result.data[result.dataIndex(x, y)].addr, correct)
x += 4
for x in x ..< resultEvenWidth:
let
a = src.data[topRowStart + x * 2]
b = src.data[topRowStart + x * 2 + 1]
c = src.data[bottomRowStart + x * 2 + 1]
d = src.data[bottomRowStart + x * 2]
mixed = rgbx(
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
)
result.data[result.dataIndex(x, y)] = mixed
if srcWidthIsOdd:
let rgbx = mix(
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
0.5
) * 0.5
result.data[result.dataIndex(result.width - 1, y)] = rgbx
if srcHeightIsOdd:
for x in 0 ..< resultEvenWidth:
let rgbx = mix(
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
0.5
) * 0.5
result.data[result.dataIndex(x, result.height - 1)] = rgbx
if srcWidthIsOdd:
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
# Set src as this result for if we do another power
src = result
proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} = proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
## Scales image up by 2 ^ power. ## Scales image up by 2 ^ power.
if power < 0: if power < 0: