minifyBy2 magnifyBy2 simd

This commit is contained in:
Ryan Oldenburg 2022-07-26 13:56:13 -05:00
parent 5ca6f57aae
commit e5c4ba1605
5 changed files with 297 additions and 104 deletions

View file

@ -46,6 +46,16 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
result.height = height
result.data = newSeq[ColorRGBX](width * height)
proc copy*(image: Image): Image {.raises: [].} =
## Copies the image data into a new image.
result = Image()
result.width = image.width
result.height = image.height
result.data = image.data
template dataIndex*(image: Image, x, y: int): int =
image.width * y + x
proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
## Linearly interpolate between a and b using t.
let t = round(t * 255).uint32
@ -59,6 +69,18 @@ proc mix*(a, b: ColorRGBX, t: float32): ColorRGBX {.inline, raises: [].} =
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
if opacity == 0:
rgbx(0, 0, 0, 0)
else:
let
x = round(opacity * 255).uint32
r = ((color.r * x) div 255).uint8
g = ((color.g * x) div 255).uint8
b = ((color.b * x) div 255).uint8
a = ((color.a * x) div 255).uint8
rgbx(r, g, b, a)
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
let
xMin = rect.x

View file

@ -1,6 +1,6 @@
import blends, bumpy, chroma, common, internal, simd, vmath
export Image, newImage
export Image, newImage, copy, dataIndex
const h = 0.5.float32
@ -9,13 +9,6 @@ type UnsafeImage = distinct Image
when defined(release):
{.push checks: off.}
proc copy*(image: Image): Image {.raises: [].} =
## Copies the image data into a new image.
result = Image()
result.width = image.width
result.height = image.height
result.data = image.data
proc `$`*(image: Image): string {.raises: [].} =
## Prints the image size.
"<Image " & $image.width & "x" & $image.height & ">"
@ -24,9 +17,6 @@ proc inside*(image: Image, x, y: int): bool {.inline, raises: [].} =
## Returns true if (x, y) is inside the image.
x >= 0 and x < image.width and y >= 0 and y < image.height
proc dataIndex*(image: Image, x, y: int): int {.inline, raises: [].} =
image.width * y + x
template unsafe*(src: Image): UnsafeImage =
cast[UnsafeImage](src)
@ -167,7 +157,9 @@ proc diff*(master, image: Image): (float32, Image) {.raises: [PixieError].} =
(100 * diffScore.float32 / diffTotal.float32, diffImage)
proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
proc minifyBy2*(
image: Image, power = 1
): Image {.hasSimd, raises: [PixieError].} =
## Scales the image down by an integer scale.
if power < 0:
raise newException(PixieError, "Cannot minifyBy2 with negative power")
@ -188,90 +180,50 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
for y in 0 ..< resultEvenHeight:
var x: int
when defined(amd64) and allowSimd:
let
topRowStart = src.dataIndex(0, y * 2)
bottomRowStart = src.dataIndex(0, y * 2 + 1)
for x in 0 ..< resultEvenWidth:
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
for _ in countup(0, resultEvenWidth - 4, 2):
let
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr)
topShifted = mm_srli_si128(top, 4)
btmShifted = mm_srli_si128(btm, 4)
topEven = mm_andnot_si128(oddMask, top)
topOdd = mm_srli_epi16(top, 8)
btmEven = mm_andnot_si128(oddMask, btm)
btmOdd = mm_srli_epi16(btm, 8)
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
topShiftedOdd = mm_srli_epi16(topShifted, 8)
btmShiftedEven = mm_andnot_si128(oddMask, btmShifted)
btmShiftedOdd = mm_srli_epi16(btmShifted, 8)
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven)
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
addedEven = mm_add_epi16(topAddedEven, btmAddedEven)
addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd)
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0 and 2 so mask the others out and shift 0 and 2 into
# position and store
masked = mm_and_si128(merged, mergedMask)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm_shuffle_epi32(masked, MM_SHUFFLE(0, 0, 2, 0))
)
x += 2
for x in x ..< resultEvenWidth:
let
a = src.unsafe[x * 2 + 0, y * 2 + 0]
b = src.unsafe[x * 2 + 1, y * 2 + 0]
c = src.unsafe[x * 2 + 1, y * 2 + 1]
d = src.unsafe[x * 2 + 0, y * 2 + 1]
a = src.data[topRowStart + x * 2]
b = src.data[topRowStart + x * 2 + 1]
c = src.data[bottomRowStart + x * 2 + 1]
d = src.data[bottomRowStart + x * 2]
mixed = rgbx(
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
)
result.unsafe[x, y] = mixed
result.data[result.dataIndex(x, y)] = mixed
if srcWidthIsOdd:
let rgbx = mix(
src.unsafe[src.width - 1, y * 2 + 0],
src.unsafe[src.width - 1, y * 2 + 1],
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
0.5
) * 0.5
result.unsafe[result.width - 1, y] = rgbx
result.data[result.dataIndex(result.width - 1, y)] = rgbx
if srcHeightIsOdd:
for x in 0 ..< resultEvenWidth:
let rgbx = mix(
src.unsafe[x * 2 + 0, src.height - 1],
src.unsafe[x * 2 + 1, src.height - 1],
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
0.5
) * 0.5
result.unsafe[x, result.height - 1] = rgbx
result.data[result.dataIndex(x, result.height - 1)] = rgbx
if srcWidthIsOdd:
result.unsafe[result.width - 1, result.height - 1] =
src.unsafe[src.width - 1, src.height - 1] * 0.25
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
# Set src as this result for if we do another power
src = result
proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
proc magnifyBy2*(
image: Image, power = 1
): Image {.hasSimd, raises: [PixieError].} =
## Scales image up by 2 ^ power.
if power < 0:
raise newException(PixieError, "Cannot magnifyBy2 with negative power")
@ -281,32 +233,20 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale
var x: int
when defined(amd64) and allowSimd:
if scale == 2:
while x <= image.width - 4:
let values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
mm_unpacklo_epi32(values, values)
)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
mm_unpackhi_epi32(values, values)
)
x += 4
for x in x ..< image.width:
let
sourceRowStart = image.dataIndex(0, y)
resultRowStart = result.dataIndex(0, y * scale)
for x in 0 ..< image.width:
let
rgbx = image.unsafe[x, y]
resultIdx = result.dataIndex(x * scale, y * scale)
rgbx = image.data[sourceRowStart + x]
resultIdx = resultRowStart + x * scale
for i in 0 ..< scale:
result.data[resultIdx + i] = rgbx
# Copy that row of pixels into (scale - 1) more rows
let rowStart = result.dataIndex(0, y * scale)
for i in 1 ..< scale:
copyMem(
result.data[rowStart + result.width * i].addr,
result.data[rowStart].addr,
result.data[resultRowStart + result.width * i].addr,
result.data[resultRowStart].addr,
result.width * 4
)

View file

@ -33,18 +33,6 @@ proc gaussianKernel*(radius: int): seq[uint16] {.raises: [].} =
for i, f in floats:
result[i] = round(f * 255 * 256).uint16
proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
if opacity == 0:
rgbx(0, 0, 0, 0)
else:
let
x = round(opacity * 255).uint32
r = ((color.r * x) div 255).uint8
g = ((color.g * x) div 255).uint8
b = ((color.b * x) div 255).uint8
a = ((color.a * x) div 255).uint8
rgbx(r, g, b, a)
proc intersectsInside*(a, b: Segment, at: var Vec2): bool {.inline.} =
## Checks if the a segment intersects b segment (excluding endpoints).
## If it returns true, at will have point of intersection

View file

@ -274,5 +274,108 @@ proc ceilAvx2*(image: Image) {.simd.} =
rgbx.a = if rgbx.a == 0: 0 else: 255
image.data[i] = rgbx
proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
## Scales the image down by an integer scale.
if power < 0:
raise newException(PixieError, "Cannot minifyBy2 with negative power")
if power == 0:
return image.copy()
var src = image
for _ in 1 .. power:
# When minifying an image of odd size, round the result image size up
# so a 99 x 99 src image returns a 50 x 50 image.
let
srcWidthIsOdd = (src.width mod 2) != 0
srcHeightIsOdd = (src.height mod 2) != 0
resultEvenWidth = src.width div 2
resultEvenHeight = src.height div 2
result = newImage(
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
let
oddMask = mm256_set1_epi16(0xff00)
mergedMask = mm256_set_epi32(
0, uint32.high, 0, uint32.high, 0, uint32.high, 0, uint32.high
)
permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0)
for y in 0 ..< resultEvenHeight:
let
topRowStart = src.dataIndex(0, y * 2)
bottomRowStart = src.dataIndex(0, y * 2 + 1)
var x: int
while x <= resultEvenWidth - 8:
let
top = mm256_loadu_si256(src.data[topRowStart + x * 2].addr)
bottom = mm256_loadu_si256(src.data[bottomRowStart + x * 2].addr)
topShifted = mm256_srli_si256(top, 4)
bottomShifted = mm256_srli_si256(bottom, 4)
topEven = mm256_andnot_si256(oddMask, top)
topOdd = mm256_srli_epi16(top, 8)
bottomEven = mm256_andnot_si256(oddMask, bottom)
bottomOdd = mm256_srli_epi16(bottom, 8)
topShiftedEven = mm256_andnot_si256(oddMask, topShifted)
topShiftedOdd = mm256_srli_epi16(topShifted, 8)
bottomShiftedEven = mm256_andnot_si256(oddMask, bottomShifted)
bottomShiftedOdd = mm256_srli_epi16(bottomShifted, 8)
topAddedEven = mm256_add_epi16(topEven, topShiftedEven)
bottomAddedEven = mm256_add_epi16(bottomEven, bottomShiftedEven)
topAddedOdd = mm256_add_epi16(topOdd, topShiftedOdd)
bottomAddedOdd = mm256_add_epi16(bottomOdd, bottomShiftedOdd)
addedEven = mm256_add_epi16(topAddedEven, bottomAddedEven)
addedOdd = mm256_add_epi16(topAddedOdd, bottomAddedOdd)
addedEvenDiv4 = mm256_srli_epi16(addedEven, 2)
addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0, 2, 4, 6 so mask the others out and permute into position
masked = mm256_and_si256(merged, mergedMask)
permuted = mm_256_permutevar8x32_epi32(masked, permuteControl)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm256_castsi256_si128(permuted)
)
x += 4
for x in x ..< resultEvenWidth:
let
a = src.data[topRowStart + x * 2]
b = src.data[topRowStart + x * 2 + 1]
c = src.data[bottomRowStart + x * 2 + 1]
d = src.data[bottomRowStart + x * 2]
mixed = rgbx(
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
)
result.data[result.dataIndex(x, y)] = mixed
if srcWidthIsOdd:
let rgbx = mix(
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
0.5
) * 0.5
result.data[result.dataIndex(result.width - 1, y)] = rgbx
if srcHeightIsOdd:
for x in 0 ..< resultEvenWidth:
let rgbx = mix(
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
0.5
) * 0.5
result.data[result.dataIndex(x, result.height - 1)] = rgbx
if srcWidthIsOdd:
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
# Set src as this result for if we do another power
src = result
when defined(release):
{.pop.}

View file

@ -330,6 +330,146 @@ proc ceilSse2*(image: Image) {.simd.} =
rgbx.a = if rgbx.a == 0: 0 else: 255
image.data[i] = rgbx
proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
## Scales the image down by an integer scale.
if power < 0:
raise newException(PixieError, "Cannot minifyBy2 with negative power")
if power == 0:
return image.copy()
var src = image
for _ in 1 .. power:
# When minifying an image of odd size, round the result image size up
# so a 99 x 99 src image returns a 50 x 50 image.
let
srcWidthIsOdd = (src.width mod 2) != 0
srcHeightIsOdd = (src.height mod 2) != 0
resultEvenWidth = src.width div 2
resultEvenHeight = src.height div 2
result = newImage(
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
let
oddMask = mm_set1_epi16(0xff00)
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
for y in 0 ..< resultEvenHeight:
let
topRowStart = src.dataIndex(0, y * 2)
bottomRowStart = src.dataIndex(0, y * 2 + 1)
var x: int
while x <= resultEvenWidth - 4:
let
top = mm_loadu_si128(src.data[topRowStart + x * 2].addr)
bottom = mm_loadu_si128(src.data[bottomRowStart + x * 2].addr)
topShifted = mm_srli_si128(top, 4)
bottomShifted = mm_srli_si128(bottom, 4)
topEven = mm_andnot_si128(oddMask, top)
topOdd = mm_srli_epi16(top, 8)
bottomEven = mm_andnot_si128(oddMask, bottom)
bottomOdd = mm_srli_epi16(bottom, 8)
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
topShiftedOdd = mm_srli_epi16(topShifted, 8)
bottomShiftedEven = mm_andnot_si128(oddMask, bottomShifted)
bottomShiftedOdd = mm_srli_epi16(bottomShifted, 8)
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
bottomAddedEven = mm_add_epi16(bottomEven, bottomShiftedEven)
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
bottomAddedOdd = mm_add_epi16(bottomOdd, bottomShiftedOdd)
addedEven = mm_add_epi16(topAddedEven, bottomAddedEven)
addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0 and 2 so mask the others out and shift 0 and 2 into
# position and store
masked = mm_and_si128(merged, mergedMask)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm_shuffle_epi32(masked, MM_SHUFFLE(3, 3, 2, 0))
)
x += 2
for x in x ..< resultEvenWidth:
let
a = src.data[topRowStart + x * 2]
b = src.data[topRowStart + x * 2 + 1]
c = src.data[bottomRowStart + x * 2 + 1]
d = src.data[bottomRowStart + x * 2]
mixed = rgbx(
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
)
result.data[result.dataIndex(x, y)] = mixed
if srcWidthIsOdd:
let rgbx = mix(
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
0.5
) * 0.5
result.data[result.dataIndex(result.width - 1, y)] = rgbx
if srcHeightIsOdd:
for x in 0 ..< resultEvenWidth:
let rgbx = mix(
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
0.5
) * 0.5
result.data[result.dataIndex(x, result.height - 1)] = rgbx
if srcWidthIsOdd:
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
# Set src as this result for if we do another power
src = result
proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
## Scales image up by 2 ^ power.
if power < 0:
raise newException(PixieError, "Cannot magnifyBy2 with negative power")
let scale = 2 ^ power
result = newImage(image.width * scale, image.height * scale)
for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale
let
sourceRowStart = image.dataIndex(0, y)
resultRowStart = result.dataIndex(0, y * scale)
var x: int
if scale == 2:
while x <= image.width - 4:
let values = mm_loadu_si128(image.data[sourceRowStart + x].addr)
mm_storeu_si128(
result.data[resultRowStart + x * scale].addr,
mm_unpacklo_epi32(values, values)
)
mm_storeu_si128(
result.data[resultRowStart + x * scale + 4].addr,
mm_unpackhi_epi32(values, values)
)
x += 4
for x in x ..< image.width:
let
rgbx = image.data[sourceRowStart + x]
resultIdx = resultRowStart + x * scale
for i in 0 ..< scale:
result.data[resultIdx + i] = rgbx
# Copy that row of pixels into (scale - 1) more rows
for i in 1 ..< scale:
copyMem(
result.data[resultRowStart + result.width * i].addr,
result.data[resultRowStart].addr,
result.width * 4
)
proc blitLineNormalSse2*(
a, b: ptr UncheckedArray[ColorRGBX], len: int
) {.simd.} =