better

2022-07-30 23:49:05 -05:00 · 2022-07-30 23:49:05 -05:00 · 0245eeebea
commit 0245eeebea
parent c4cd380676
1 changed files with 17 additions and 9 deletions
--- a/src/pixie/simd/neon.nim
+++ b/src/pixie/simd/neon.nim
@ -58,9 +58,9 @@ proc isOneColorNeon*(image: Image): bool {.simd.} =
      rgEq = vandq_u8(rEq, gEq)
      baEq = vandq_u8(bEq, aEq)
      rgbaEq = vandq_u8(rgEq, baEq)
-      mask =
+      mask = vget_lane_u64(cast[uint64x1](
-        cast[uint64](vget_low_u64(cast[uint64x2](rgbaEq))) and
+        vand_u8(vget_low_u8(rgbaEq), vget_high_u8(rgbaEq)
-        cast[uint64](vget_high_u64(cast[uint64x2](rgbaEq)))
+      )), 0)
    if mask != uint64.high:
      return false
    i += 16
@ -82,12 +82,16 @@ proc isTransparentNeon*(image: Image): bool {.simd.} =
  result = true
-  let iterations = (image.data.len - i) div 16
+  let
    vecZero = vmovq_n_u8(0)
    iterations = (image.data.len - i) div 16
  for _ in 0 ..< iterations:
    let
      alphas = vld4q_u8(image.data[i].addr).val[3]
-      eq = vceqq_u64(cast[uint64x2](alphas), vmovq_n_u64(0))
+      eq = vceqq_u8(alphas, vecZero)
-      mask = cast[uint64](vget_low_u64(eq)) and cast[uint64](vget_high_u64(eq))
+      mask = vget_lane_u64(cast[uint64x1](
        vand_u8(vget_low_u8(eq), vget_high_u8(eq)
      )), 0)
    if mask != uint64.high:
      return false
    i += 16
@ -109,12 +113,16 @@ proc isOpaqueNeon*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
    inc i
    p += 4
-  let iterations = (start + len - i) div 16
+  let
    vec255 = vmovq_n_u8(255)
    iterations = (start + len - i) div 16
  for _ in 0 ..< iterations:
    let
      alphas = vld4q_u8(data[i].addr).val[3]
-      eq = vceqq_u64(cast[uint64x2](alphas), vmovq_n_u64(uint64.high))
+      eq = vceqq_u8(alphas, vec255)
-      mask = cast[uint64](vget_low_u64(eq)) and cast[uint64](vget_high_u64(eq))
+      mask = vget_lane_u64(cast[uint64x1](
        vand_u8(vget_low_u8(eq), vget_high_u8(eq)
      )), 0)
    if mask != uint64.high:
      return false
    i += 16