Merge pull request #484 from treeform/guzba
rounding minifyBy2, updated masters
|
@ -56,18 +56,13 @@ proc copy*(image: Image): Image {.raises: [].} =
|
|||
template dataIndex*(image: Image, x, y: int): int =
|
||||
image.width * y + x
|
||||
|
||||
proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
|
||||
## Linearly interpolate between a and b using t.
|
||||
let t = round(t * 255).uint32
|
||||
((a * (255 - t) + b * t) div 255).uint8
|
||||
|
||||
proc mix*(a, b: ColorRGBX, t: float32): ColorRGBX {.inline, raises: [].} =
|
||||
## Linearly interpolate between a and b using t.
|
||||
let x = round(t * 255).uint32
|
||||
result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x) div 255).uint8
|
||||
result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x) div 255).uint8
|
||||
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
|
||||
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
|
||||
result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x + 127) div 255).uint8
|
||||
result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x + 127) div 255).uint8
|
||||
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x + 127) div 255).uint8
|
||||
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x + 127) div 255).uint8
|
||||
|
||||
proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||
if opacity == 0:
|
||||
|
@ -75,10 +70,10 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
|||
else:
|
||||
let
|
||||
x = round(opacity * 255).uint32
|
||||
r = ((color.r * x) div 255).uint8
|
||||
g = ((color.g * x) div 255).uint8
|
||||
b = ((color.b * x) div 255).uint8
|
||||
a = ((color.a * x) div 255).uint8
|
||||
r = ((color.r * x + 127) div 255).uint8
|
||||
g = ((color.g * x + 127) div 255).uint8
|
||||
b = ((color.b * x + 127) div 255).uint8
|
||||
a = ((color.a * x + 127) div 255).uint8
|
||||
rgbx(r, g, b, a)
|
||||
|
||||
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
||||
|
|
|
@ -192,10 +192,10 @@ proc minifyBy2*(
|
|||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8
|
||||
)
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
|
|
|
@ -296,6 +296,7 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
|||
)
|
||||
let
|
||||
oddMask = mm256_set1_epi16(0xff00)
|
||||
vec2 = mm256_set1_epi16(2)
|
||||
permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
let
|
||||
|
@ -323,8 +324,10 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
|||
bottomAddedOdd = mm256_add_epi16(bottomOdd, bottomShiftedOdd)
|
||||
addedEven = mm256_add_epi16(topAddedEven, bottomAddedEven)
|
||||
addedOdd = mm256_add_epi16(topAddedOdd, bottomAddedOdd)
|
||||
addedEvenDiv4 = mm256_srli_epi16(addedEven, 2)
|
||||
addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
|
||||
addedEvenRounding = mm256_add_epi16(addedEven, vec2)
|
||||
addedOddRounding = mm256_add_epi16(addedOdd, vec2)
|
||||
addedEvenDiv4 = mm256_srli_epi16(addedEvenRounding, 2)
|
||||
addedOddDiv4 = mm256_srli_epi16(addedOddRounding, 2)
|
||||
merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
|
||||
# Merged has the correct values for the next two pixels at
|
||||
# index 0, 2, 4, 6 so permute into position and store
|
||||
|
@ -342,10 +345,10 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
|||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8
|
||||
)
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
|
|
|
@ -350,19 +350,33 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
|||
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
|
||||
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
|
||||
)
|
||||
let oddMask = mm_set1_epi16(0xff00)
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
loMask = mm_set_epi32(0, 0, uint32.high, uint32.high)
|
||||
hiMask = mm_set_epi32(uint32.high, uint32.high, 0, 0)
|
||||
vec2 = mm_set1_epi16(2)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
let
|
||||
topRowStart = src.dataIndex(0, y * 2)
|
||||
bottomRowStart = src.dataIndex(0, y * 2 + 1)
|
||||
|
||||
template loadEven(src: Image, idx: int): M128i =
|
||||
var
|
||||
a = mm_loadu_si128(src.data[idx].addr)
|
||||
b = mm_loadu_si128(src.data[idx + 4].addr)
|
||||
a = mm_shuffle_epi32(a, MM_SHUFFLE(3, 3, 2, 0))
|
||||
b = mm_shuffle_epi32(b, MM_SHUFFLE(2, 0, 3, 3))
|
||||
a = mm_and_si128(a, loMask)
|
||||
b = mm_and_si128(b, hiMask)
|
||||
mm_or_si128(a, b)
|
||||
|
||||
var x: int
|
||||
while x <= resultEvenWidth - 4:
|
||||
while x <= resultEvenWidth - 9:
|
||||
let
|
||||
top = mm_loadu_si128(src.data[topRowStart + x * 2].addr)
|
||||
bottom = mm_loadu_si128(src.data[bottomRowStart + x * 2].addr)
|
||||
topShifted = mm_srli_si128(top, 4)
|
||||
bottomShifted = mm_srli_si128(bottom, 4)
|
||||
top = loadEven(src, topRowStart + x * 2)
|
||||
bottom = loadEven(src, bottomRowStart + x * 2)
|
||||
topShifted = loadEven(src, topRowStart + x * 2 + 1)
|
||||
bottomShifted = loadEven(src, bottomRowStart + x * 2 + 1)
|
||||
topEven = mm_andnot_si128(oddMask, top)
|
||||
topOdd = mm_srli_epi16(top, 8)
|
||||
bottomEven = mm_andnot_si128(oddMask, bottom)
|
||||
|
@ -377,15 +391,13 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
|||
bottomAddedOdd = mm_add_epi16(bottomOdd, bottomShiftedOdd)
|
||||
addedEven = mm_add_epi16(topAddedEven, bottomAddedEven)
|
||||
addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
|
||||
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
|
||||
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
|
||||
addedEvenRounding = mm_add_epi16(addedEven, vec2)
|
||||
addedOddRounding = mm_add_epi16(addedOdd, vec2)
|
||||
addedEvenDiv4 = mm_srli_epi16(addedEvenRounding, 2)
|
||||
addedOddDiv4 = mm_srli_epi16(addedOddRounding, 2)
|
||||
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
|
||||
# Merged has the correct values for the next two pixels at
|
||||
# index 0 and 2 so shift 0 and 2 into position and store
|
||||
shuffled = mm_shuffle_epi32(merged, MM_SHUFFLE(3, 3, 2, 0))
|
||||
lower = mm_cvtsi128_si64(shuffled)
|
||||
copyMem(result.data[result.dataIndex(x, y)].addr, lower.unsafeAddr, 8)
|
||||
x += 2
|
||||
mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, merged)
|
||||
x += 4
|
||||
|
||||
for x in x ..< resultEvenWidth:
|
||||
let
|
||||
|
@ -394,10 +406,10 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
|||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
((a.r.uint32 + b.r + c.r + d.r + 2) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g + 2) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b + 2) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a + 2) div 4).uint8
|
||||
)
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
|
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 2.7 KiB After Width: | Height: | Size: 2.5 KiB |
Before Width: | Height: | Size: 880 B After Width: | Height: | Size: 609 B |
Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 2.9 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 2.9 KiB |
Before Width: | Height: | Size: 617 B After Width: | Height: | Size: 596 B |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 2.9 KiB |
Before Width: | Height: | Size: 617 B After Width: | Height: | Size: 596 B |
Before Width: | Height: | Size: 3.3 KiB After Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 1,005 B After Width: | Height: | Size: 629 B |
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 4.5 KiB After Width: | Height: | Size: 3.4 KiB |
Before Width: | Height: | Size: 2.6 KiB After Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 3.2 KiB After Width: | Height: | Size: 3.9 KiB |
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 806 B After Width: | Height: | Size: 777 B |
Before Width: | Height: | Size: 3.2 KiB After Width: | Height: | Size: 3.9 KiB |
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.4 KiB |
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 4.2 KiB |
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 628 B |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 327 B After Width: | Height: | Size: 317 B |
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 2.7 KiB After Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 1 KiB After Width: | Height: | Size: 623 B |
Before Width: | Height: | Size: 956 B After Width: | Height: | Size: 632 B |
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 1,008 B After Width: | Height: | Size: 723 B |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1,014 B |
Before Width: | Height: | Size: 6.9 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 988 B After Width: | Height: | Size: 697 B |
Before Width: | Height: | Size: 968 B After Width: | Height: | Size: 751 B |
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 668 B |
Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 53 KiB |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 628 B |
Before Width: | Height: | Size: 111 B After Width: | Height: | Size: 104 B |
Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 48 KiB |
Before Width: | Height: | Size: 89 KiB After Width: | Height: | Size: 67 KiB |
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 355 B After Width: | Height: | Size: 337 B |
Before Width: | Height: | Size: 913 B After Width: | Height: | Size: 787 B |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 742 B |
Before Width: | Height: | Size: 130 B After Width: | Height: | Size: 111 B |
Before Width: | Height: | Size: 441 B After Width: | Height: | Size: 352 B |
Before Width: | Height: | Size: 358 B After Width: | Height: | Size: 327 B |
Before Width: | Height: | Size: 352 B After Width: | Height: | Size: 326 B |
Before Width: | Height: | Size: 344 B After Width: | Height: | Size: 326 B |
Before Width: | Height: | Size: 5.7 KiB After Width: | Height: | Size: 4.7 KiB |
Before Width: | Height: | Size: 4 KiB After Width: | Height: | Size: 3.7 KiB |
Before Width: | Height: | Size: 412 B After Width: | Height: | Size: 346 B |
Before Width: | Height: | Size: 475 B After Width: | Height: | Size: 372 B |
Before Width: | Height: | Size: 211 B After Width: | Height: | Size: 168 B |
Before Width: | Height: | Size: 178 B After Width: | Height: | Size: 138 B |
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 184 KiB |
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 6.6 KiB |
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 6.7 KiB |
Before Width: | Height: | Size: 9.6 KiB After Width: | Height: | Size: 6.7 KiB |
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 6.7 KiB |
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 6.7 KiB |
Before Width: | Height: | Size: 2.6 KiB After Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 2.2 KiB After Width: | Height: | Size: 2 KiB |
Before Width: | Height: | Size: 388 B After Width: | Height: | Size: 353 B |
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 106 B After Width: | Height: | Size: 96 B |
Before Width: | Height: | Size: 105 B After Width: | Height: | Size: 95 B |
Before Width: | Height: | Size: 393 B After Width: | Height: | Size: 375 B |
Before Width: | Height: | Size: 388 B After Width: | Height: | Size: 375 B |
Before Width: | Height: | Size: 514 B After Width: | Height: | Size: 387 B |
Before Width: | Height: | Size: 129 B After Width: | Height: | Size: 108 B |
Before Width: | Height: | Size: 305 B After Width: | Height: | Size: 252 B |
Before Width: | Height: | Size: 471 B After Width: | Height: | Size: 408 B |
Before Width: | Height: | Size: 305 B After Width: | Height: | Size: 250 B |
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 7.3 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 4.2 KiB After Width: | Height: | Size: 3.9 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 355 B After Width: | Height: | Size: 300 B |
Before Width: | Height: | Size: 261 B After Width: | Height: | Size: 190 B |
Before Width: | Height: | Size: 520 B After Width: | Height: | Size: 426 B |
Before Width: | Height: | Size: 837 B After Width: | Height: | Size: 430 B |
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
Before Width: | Height: | Size: 4.3 KiB After Width: | Height: | Size: 3.7 KiB |
Before Width: | Height: | Size: 3.9 KiB After Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 2.6 KiB After Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 2 KiB After Width: | Height: | Size: 1.7 KiB |