From 5641e0dc078fa893f937adf7369109cf8cfa7c43 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Fri, 29 Jul 2022 13:20:14 -0500 Subject: [PATCH] rounding minifyBy2 --- src/pixie/common.nim | 21 ++++++++------------- src/pixie/images.nim | 8 ++++---- src/pixie/simd/avx2.nim | 15 +++++++++------ src/pixie/simd/sse2.nim | 15 +++++++++------ 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/pixie/common.nim b/src/pixie/common.nim index f558666..902d55f 100644 --- a/src/pixie/common.nim +++ b/src/pixie/common.nim @@ -56,18 +56,13 @@ proc copy*(image: Image): Image {.raises: [].} = template dataIndex*(image: Image, x, y: int): int = image.width * y + x -proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} = - ## Linearly interpolate between a and b using t. - let t = round(t * 255).uint32 - ((a * (255 - t) + b * t) div 255).uint8 - proc mix*(a, b: ColorRGBX, t: float32): ColorRGBX {.inline, raises: [].} = ## Linearly interpolate between a and b using t. let x = round(t * 255).uint32 - result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x) div 255).uint8 - result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x) div 255).uint8 - result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8 - result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8 + result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x + 127) div 255).uint8 + result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x + 127) div 255).uint8 + result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x + 127) div 255).uint8 + result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x + 127) div 255).uint8 proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} = if opacity == 0: @@ -75,10 +70,10 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} = else: let x = round(opacity * 255).uint32 - r = ((color.r * x) div 255).uint8 - g = ((color.g * x) div 255).uint8 - b = ((color.b * x) div 255).uint8 - a = ((color.a * x) div 255).uint8 + r = ((color.r * x + 127) div 255).uint8 + g = ((color.g * x + 127) div 255).uint8 + b = ((color.b * x + 127) div 255).uint8 + a = ((color.a * x + 127) div 255).uint8 rgbx(r, g, b, a) proc snapToPixels*(rect: Rect): Rect {.raises: [].} = diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 8212040..bcce99b 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -192,10 +192,10 @@ proc minifyBy2*( c = src.data[bottomRowStart + x * 2 + 1] d = src.data[bottomRowStart + x * 2] mixed = rgbx( - ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8, - ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8, - ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8, - ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8 + ((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8, + ((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8, + ((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8, + ((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8 ) result.data[result.dataIndex(x, y)] = mixed diff --git a/src/pixie/simd/avx2.nim b/src/pixie/simd/avx2.nim index 6b652d0..db06db8 100644 --- a/src/pixie/simd/avx2.nim +++ b/src/pixie/simd/avx2.nim @@ -296,6 +296,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} = ) let oddMask = mm256_set1_epi16(0xff00) + vec2 = mm256_set1_epi16(2) permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0) for y in 0 ..< resultEvenHeight: let @@ -323,8 +324,10 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} = bottomAddedOdd = mm256_add_epi16(bottomOdd, bottomShiftedOdd) addedEven = mm256_add_epi16(topAddedEven, bottomAddedEven) addedOdd = mm256_add_epi16(topAddedOdd, bottomAddedOdd) - addedEvenDiv4 = mm256_srli_epi16(addedEven, 2) - addedOddDiv4 = mm256_srli_epi16(addedOdd, 2) + addedEvenRounding = mm256_add_epi16(addedEven, vec2) + addedOddRounding = mm256_add_epi16(addedOdd, vec2) + addedEvenDiv4 = mm256_srli_epi16(addedEvenRounding, 2) + addedOddDiv4 = mm256_srli_epi16(addedOddRounding, 2) merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8)) # Merged has the correct values for the next two pixels at # index 0, 2, 4, 6 so permute into position and store @@ -342,10 +345,10 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} = c = src.data[bottomRowStart + x * 2 + 1] d = src.data[bottomRowStart + x * 2] mixed = rgbx( - ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8, - ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8, - ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8, - ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8 + ((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8, + ((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8, + ((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8, + ((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8 ) result.data[result.dataIndex(x, y)] = mixed diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim index 08b4dc1..062b972 100644 --- a/src/pixie/simd/sse2.nim +++ b/src/pixie/simd/sse2.nim @@ -354,6 +354,7 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} = oddMask = mm_set1_epi16(0xff00) loMask = mm_set_epi32(0, 0, uint32.high, uint32.high) hiMask = mm_set_epi32(uint32.high, uint32.high, 0, 0) + vec2 = mm_set1_epi16(2) for y in 0 ..< resultEvenHeight: let topRowStart = src.dataIndex(0, y * 2) @@ -390,8 +391,10 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} = bottomAddedOdd = mm_add_epi16(bottomOdd, bottomShiftedOdd) addedEven = mm_add_epi16(topAddedEven, bottomAddedEven) addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd) - addedEvenDiv4 = mm_srli_epi16(addedEven, 2) - addedOddDiv4 = mm_srli_epi16(addedOdd, 2) + addedEvenRounding = mm_add_epi16(addedEven, vec2) + addedOddRounding = mm_add_epi16(addedOdd, vec2) + addedEvenDiv4 = mm_srli_epi16(addedEvenRounding, 2) + addedOddDiv4 = mm_srli_epi16(addedOddRounding, 2) merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8)) mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, merged) x += 4 @@ -403,10 +406,10 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} = c = src.data[bottomRowStart + x * 2 + 1] d = src.data[bottomRowStart + x * 2] mixed = rgbx( - ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8, - ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8, - ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8, - ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8 + ((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8, + ((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8, + ((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8, + ((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8 ) result.data[result.dataIndex(x, y)] = mixed