bit of neon in png
This commit is contained in:
parent
93e9ef79b0
commit
e4afb30f2d
1 changed files with 17 additions and 8 deletions
|
@ -129,16 +129,25 @@ proc unfilter(
|
||||||
uncompressedStartIdx = uncompressedIdx(1, y)
|
uncompressedStartIdx = uncompressedIdx(1, y)
|
||||||
unfilteredStartIx = unfiteredIdx(0, y)
|
unfilteredStartIx = unfiteredIdx(0, y)
|
||||||
var x: int
|
var x: int
|
||||||
when allowSimd and defined(amd64):
|
when allowSimd and (defined(amd64) or defined(arm64)):
|
||||||
if y - 1 >= 0:
|
if y - 1 >= 0:
|
||||||
for _ in 0 ..< rowBytes div 16:
|
for _ in 0 ..< rowBytes div 16:
|
||||||
let
|
when defined(amd64):
|
||||||
bytes = mm_loadu_si128(uncompressed[uncompressedStartIdx + x].addr)
|
let
|
||||||
up = mm_loadu_si128(result[unfilteredStartIx + x - rowBytes].addr)
|
bytes = mm_loadu_si128(uncompressed[uncompressedStartIdx + x].addr)
|
||||||
mm_storeu_si128(
|
up = mm_loadu_si128(result[unfilteredStartIx + x - rowBytes].addr)
|
||||||
result[unfilteredStartIx + x].addr,
|
mm_storeu_si128(
|
||||||
mm_add_epi8(bytes, up)
|
result[unfilteredStartIx + x].addr,
|
||||||
)
|
mm_add_epi8(bytes, up)
|
||||||
|
)
|
||||||
|
else: # arm64
|
||||||
|
let
|
||||||
|
bytes = vld1q_u8(uncompressed[uncompressedStartIdx + x].addr)
|
||||||
|
up = vld1q_u8(result[unfilteredStartIx + x - rowBytes].addr)
|
||||||
|
vst1q_u8(
|
||||||
|
result[unfilteredStartIx + x].addr,
|
||||||
|
vaddq_u8(bytes, up)
|
||||||
|
)
|
||||||
x += 16
|
x += 16
|
||||||
for x in x ..< rowBytes:
|
for x in x ..< rowBytes:
|
||||||
var value = uncompressed[uncompressedStartIdx + x]
|
var value = uncompressed[uncompressedStartIdx + x]
|
||||||
|
|
Loading…
Reference in a new issue