commit
2f746876b6
|
@ -174,7 +174,7 @@ block: # Tiger
|
|||
props.strokeDashArray,
|
||||
props.transform.pixelScale
|
||||
)
|
||||
let paint = newPaint(props.stroke)
|
||||
let paint = props.stroke.copy()
|
||||
paint.color.a *= (props.opacity * props.strokeOpacity)
|
||||
fills.add(Fill(
|
||||
shapes: strokeShapes,
|
||||
|
|
|
@ -46,6 +46,16 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
|||
result.height = height
|
||||
result.data = newSeq[ColorRGBX](width * height)
|
||||
|
||||
proc copy*(image: Image): Image {.raises: [].} =
|
||||
## Copies the image data into a new image.
|
||||
result = Image()
|
||||
result.width = image.width
|
||||
result.height = image.height
|
||||
result.data = image.data
|
||||
|
||||
template dataIndex*(image: Image, x, y: int): int =
|
||||
image.width * y + x
|
||||
|
||||
proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
|
||||
## Linearly interpolate between a and b using t.
|
||||
let t = round(t * 255).uint32
|
||||
|
@ -59,6 +69,18 @@ proc mix*(a, b: ColorRGBX, t: float32): ColorRGBX {.inline, raises: [].} =
|
|||
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
|
||||
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
|
||||
|
||||
proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||
if opacity == 0:
|
||||
rgbx(0, 0, 0, 0)
|
||||
else:
|
||||
let
|
||||
x = round(opacity * 255).uint32
|
||||
r = ((color.r * x) div 255).uint8
|
||||
g = ((color.g * x) div 255).uint8
|
||||
b = ((color.b * x) div 255).uint8
|
||||
a = ((color.a * x) div 255).uint8
|
||||
rgbx(r, g, b, a)
|
||||
|
||||
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
||||
let
|
||||
xMin = rect.x
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import blends, bumpy, chroma, common, internal, simd, vmath
|
||||
|
||||
export Image, newImage
|
||||
export Image, newImage, copy, dataIndex
|
||||
|
||||
const h = 0.5.float32
|
||||
|
||||
|
@ -9,13 +9,6 @@ type UnsafeImage = distinct Image
|
|||
when defined(release):
|
||||
{.push checks: off.}
|
||||
|
||||
proc copy*(image: Image): Image {.raises: [].} =
|
||||
## Copies the image data into a new image.
|
||||
result = Image()
|
||||
result.width = image.width
|
||||
result.height = image.height
|
||||
result.data = image.data
|
||||
|
||||
proc `$`*(image: Image): string {.raises: [].} =
|
||||
## Prints the image size.
|
||||
"<Image " & $image.width & "x" & $image.height & ">"
|
||||
|
@ -24,9 +17,6 @@ proc inside*(image: Image, x, y: int): bool {.inline, raises: [].} =
|
|||
## Returns true if (x, y) is inside the image.
|
||||
x >= 0 and x < image.width and y >= 0 and y < image.height
|
||||
|
||||
proc dataIndex*(image: Image, x, y: int): int {.inline, raises: [].} =
|
||||
image.width * y + x
|
||||
|
||||
template unsafe*(src: Image): UnsafeImage =
|
||||
cast[UnsafeImage](src)
|
||||
|
||||
|
@ -167,7 +157,9 @@ proc diff*(master, image: Image): (float32, Image) {.raises: [PixieError].} =
|
|||
|
||||
(100 * diffScore.float32 / diffTotal.float32, diffImage)
|
||||
|
||||
proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
||||
proc minifyBy2*(
|
||||
image: Image, power = 1
|
||||
): Image {.hasSimd, raises: [PixieError].} =
|
||||
## Scales the image down by an integer scale.
|
||||
if power < 0:
|
||||
raise newException(PixieError, "Cannot minifyBy2 with negative power")
|
||||
|
@ -188,90 +180,50 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
|||
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
|
||||
)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
var x: int
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
topRowStart = src.dataIndex(0, y * 2)
|
||||
bottomRowStart = src.dataIndex(0, y * 2 + 1)
|
||||
for x in 0 ..< resultEvenWidth:
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
|
||||
for _ in countup(0, resultEvenWidth - 4, 2):
|
||||
let
|
||||
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
|
||||
btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr)
|
||||
topShifted = mm_srli_si128(top, 4)
|
||||
btmShifted = mm_srli_si128(btm, 4)
|
||||
|
||||
topEven = mm_andnot_si128(oddMask, top)
|
||||
topOdd = mm_srli_epi16(top, 8)
|
||||
btmEven = mm_andnot_si128(oddMask, btm)
|
||||
btmOdd = mm_srli_epi16(btm, 8)
|
||||
|
||||
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
|
||||
topShiftedOdd = mm_srli_epi16(topShifted, 8)
|
||||
btmShiftedEven = mm_andnot_si128(oddMask, btmShifted)
|
||||
btmShiftedOdd = mm_srli_epi16(btmShifted, 8)
|
||||
|
||||
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
|
||||
btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven)
|
||||
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
|
||||
btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
|
||||
|
||||
addedEven = mm_add_epi16(topAddedEven, btmAddedEven)
|
||||
addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd)
|
||||
|
||||
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
|
||||
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
|
||||
|
||||
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
|
||||
# Merged has the correct values for the next two pixels at
|
||||
# index 0 and 2 so mask the others out and shift 0 and 2 into
|
||||
# position and store
|
||||
masked = mm_and_si128(merged, mergedMask)
|
||||
|
||||
mm_storeu_si128(
|
||||
result.data[result.dataIndex(x, y)].addr,
|
||||
mm_shuffle_epi32(masked, MM_SHUFFLE(0, 0, 2, 0))
|
||||
)
|
||||
x += 2
|
||||
|
||||
for x in x ..< resultEvenWidth:
|
||||
let
|
||||
a = src.unsafe[x * 2 + 0, y * 2 + 0]
|
||||
b = src.unsafe[x * 2 + 1, y * 2 + 0]
|
||||
c = src.unsafe[x * 2 + 1, y * 2 + 1]
|
||||
d = src.unsafe[x * 2 + 0, y * 2 + 1]
|
||||
a = src.data[topRowStart + x * 2]
|
||||
b = src.data[topRowStart + x * 2 + 1]
|
||||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
)
|
||||
result.unsafe[x, y] = mixed
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
if srcWidthIsOdd:
|
||||
let rgbx = mix(
|
||||
src.unsafe[src.width - 1, y * 2 + 0],
|
||||
src.unsafe[src.width - 1, y * 2 + 1],
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.unsafe[result.width - 1, y] = rgbx
|
||||
result.data[result.dataIndex(result.width - 1, y)] = rgbx
|
||||
|
||||
if srcHeightIsOdd:
|
||||
for x in 0 ..< resultEvenWidth:
|
||||
let rgbx = mix(
|
||||
src.unsafe[x * 2 + 0, src.height - 1],
|
||||
src.unsafe[x * 2 + 1, src.height - 1],
|
||||
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
|
||||
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.unsafe[x, result.height - 1] = rgbx
|
||||
result.data[result.dataIndex(x, result.height - 1)] = rgbx
|
||||
|
||||
if srcWidthIsOdd:
|
||||
result.unsafe[result.width - 1, result.height - 1] =
|
||||
src.unsafe[src.width - 1, src.height - 1] * 0.25
|
||||
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
|
||||
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
|
||||
|
||||
# Set src as this result for if we do another power
|
||||
src = result
|
||||
|
||||
proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
||||
proc magnifyBy2*(
|
||||
image: Image, power = 1
|
||||
): Image {.hasSimd, raises: [PixieError].} =
|
||||
## Scales image up by 2 ^ power.
|
||||
if power < 0:
|
||||
raise newException(PixieError, "Cannot magnifyBy2 with negative power")
|
||||
|
@ -281,32 +233,20 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
|||
|
||||
for y in 0 ..< image.height:
|
||||
# Write one row of pixels duplicated by scale
|
||||
var x: int
|
||||
when defined(amd64) and allowSimd:
|
||||
if scale == 2:
|
||||
while x <= image.width - 4:
|
||||
let values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
|
||||
mm_storeu_si128(
|
||||
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
|
||||
mm_unpacklo_epi32(values, values)
|
||||
)
|
||||
mm_storeu_si128(
|
||||
result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
|
||||
mm_unpackhi_epi32(values, values)
|
||||
)
|
||||
x += 4
|
||||
for x in x ..< image.width:
|
||||
let
|
||||
sourceRowStart = image.dataIndex(0, y)
|
||||
resultRowStart = result.dataIndex(0, y * scale)
|
||||
for x in 0 ..< image.width:
|
||||
let
|
||||
rgbx = image.unsafe[x, y]
|
||||
resultIdx = result.dataIndex(x * scale, y * scale)
|
||||
rgbx = image.data[sourceRowStart + x]
|
||||
resultIdx = resultRowStart + x * scale
|
||||
for i in 0 ..< scale:
|
||||
result.data[resultIdx + i] = rgbx
|
||||
# Copy that row of pixels into (scale - 1) more rows
|
||||
let rowStart = result.dataIndex(0, y * scale)
|
||||
for i in 1 ..< scale:
|
||||
copyMem(
|
||||
result.data[rowStart + result.width * i].addr,
|
||||
result.data[rowStart].addr,
|
||||
result.data[resultRowStart + result.width * i].addr,
|
||||
result.data[resultRowStart].addr,
|
||||
result.width * 4
|
||||
)
|
||||
|
||||
|
@ -853,7 +793,7 @@ proc spread(image: Image, spread: float32) {.raises: [PixieError].} =
|
|||
maxValue = value
|
||||
if maxValue == 255:
|
||||
break
|
||||
spreadX.unsafe[y, x] = rgbx(0, 0, 0, maxValue)
|
||||
spreadX.unsafe[y, x].a = maxValue
|
||||
|
||||
# Spread in the Y direction and modify mask.
|
||||
for y in 0 ..< image.height:
|
||||
|
|
|
@ -33,18 +33,6 @@ proc gaussianKernel*(radius: int): seq[uint16] {.raises: [].} =
|
|||
for i, f in floats:
|
||||
result[i] = round(f * 255 * 256).uint16
|
||||
|
||||
proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||
if opacity == 0:
|
||||
rgbx(0, 0, 0, 0)
|
||||
else:
|
||||
let
|
||||
x = round(opacity * 255).uint32
|
||||
r = ((color.r * x) div 255).uint8
|
||||
g = ((color.g * x) div 255).uint8
|
||||
b = ((color.b * x) div 255).uint8
|
||||
a = ((color.a * x) div 255).uint8
|
||||
rgbx(r, g, b, a)
|
||||
|
||||
proc intersectsInside*(a, b: Segment, at: var Vec2): bool {.inline.} =
|
||||
## Checks if the a segment intersects b segment (excluding endpoints).
|
||||
## If it returns true, at will have point of intersection
|
||||
|
|
|
@ -91,7 +91,19 @@ proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
|
|||
return false
|
||||
|
||||
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||
var i: int
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](data[0].addr)
|
||||
# Align to 32 bytes
|
||||
while i < data.len and (p and 31) != 0:
|
||||
var rgbx = data[i]
|
||||
if rgbx.a != 255:
|
||||
rgbx.r = ((rgbx.r.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.g = ((rgbx.g.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.b = ((rgbx.b.uint32 * rgbx.a + 127) div 255).uint8
|
||||
data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||
|
@ -101,7 +113,7 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
|||
iterations = data.len div 8
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
values = mm256_loadu_si256(data[i].addr)
|
||||
values = mm256_load_si256(cast[pointer](p))
|
||||
alpha = mm256_and_si256(values, alphaMask)
|
||||
eq = mm256_cmpeq_epi8(values, alphaMask)
|
||||
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
|
||||
|
@ -122,16 +134,17 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
|||
colorsOdd = mm256_add_epi16(colorsOdd, tmpOdd)
|
||||
colorsEven = mm256_srli_epi16(colorsEven, 8)
|
||||
colorsOdd = mm256_and_si256(colorsOdd, hiMask)
|
||||
mm256_storeu_si256(data[i].addr, mm256_or_si256(colorsEven, colorsOdd))
|
||||
i += 8
|
||||
mm256_store_si256(cast[pointer](p), mm256_or_si256(colorsEven, colorsOdd))
|
||||
p += 32
|
||||
i += 8 * iterations
|
||||
|
||||
for i in i ..< data.len:
|
||||
var c = data[i]
|
||||
if c.a != 255:
|
||||
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
|
||||
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
|
||||
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
|
||||
data[i] = c
|
||||
var rgbx = data[i]
|
||||
if rgbx.a != 255:
|
||||
rgbx.r = ((rgbx.r.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.g = ((rgbx.g.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.b = ((rgbx.b.uint32 * rgbx.a + 127) div 255).uint8
|
||||
data[i] = rgbx
|
||||
|
||||
proc invertAvx2*(image: Image) {.simd.} =
|
||||
var
|
||||
|
@ -182,6 +195,16 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
|
|||
var
|
||||
i: int
|
||||
p = cast[uint](image.data[0].addr)
|
||||
# Align to 32 bytes
|
||||
while i < image.data.len and (p and 31) != 0:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
image.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
oddMask = mm256_set1_epi16(0xff00)
|
||||
|
@ -191,7 +214,7 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
|
|||
iterations = image.data.len div 8
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
values = mm256_loadu_si256(cast[pointer](p))
|
||||
values = mm256_load_si256(cast[pointer](p))
|
||||
eqZero = mm256_cmpeq_epi16(values, zeroVec)
|
||||
if mm256_movemask_epi8(eqZero) != cast[int32](0xffffffff):
|
||||
var
|
||||
|
@ -201,7 +224,7 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
|
|||
valuesOdd = mm256_mulhi_epu16(valuesOdd, opacityVec)
|
||||
valuesEven = mm256_srli_epi16(mm256_mulhi_epu16(valuesEven, div255), 7)
|
||||
valuesOdd = mm256_srli_epi16(mm256_mulhi_epu16(valuesOdd, div255), 7)
|
||||
mm256_storeu_si256(
|
||||
mm256_store_si256(
|
||||
cast[pointer](p),
|
||||
mm256_or_si256(valuesEven, mm256_slli_epi16(valuesOdd, 8))
|
||||
)
|
||||
|
@ -216,5 +239,143 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
|
|||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc ceilAvx2*(image: Image) {.simd.} =
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](image.data[0].addr)
|
||||
# Align to 32 bytes
|
||||
while i < image.data.len and (p and 31) != 0:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = if rgbx.r == 0: 0 else: 255
|
||||
rgbx.g = if rgbx.g == 0: 0 else: 255
|
||||
rgbx.b = if rgbx.b == 0: 0 else: 255
|
||||
rgbx.a = if rgbx.a == 0: 0 else: 255
|
||||
image.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
vecZero = mm256_setzero_si256()
|
||||
vec255 = mm256_set1_epi8(255)
|
||||
iterations = image.data.len div 8
|
||||
for _ in 0 ..< iterations:
|
||||
var values = mm256_load_si256(cast[pointer](p))
|
||||
values = mm256_cmpeq_epi8(values, vecZero)
|
||||
values = mm256_andnot_si256(values, vec255)
|
||||
mm256_store_si256(cast[pointer](p), values)
|
||||
p += 32
|
||||
i += 8 * iterations
|
||||
|
||||
for i in i ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = if rgbx.r == 0: 0 else: 255
|
||||
rgbx.g = if rgbx.g == 0: 0 else: 255
|
||||
rgbx.b = if rgbx.b == 0: 0 else: 255
|
||||
rgbx.a = if rgbx.a == 0: 0 else: 255
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
||||
## Scales the image down by an integer scale.
|
||||
if power < 0:
|
||||
raise newException(PixieError, "Cannot minifyBy2 with negative power")
|
||||
if power == 0:
|
||||
return image.copy()
|
||||
|
||||
var src = image
|
||||
for _ in 1 .. power:
|
||||
# When minifying an image of odd size, round the result image size up
|
||||
# so a 99 x 99 src image returns a 50 x 50 image.
|
||||
let
|
||||
srcWidthIsOdd = (src.width mod 2) != 0
|
||||
srcHeightIsOdd = (src.height mod 2) != 0
|
||||
resultEvenWidth = src.width div 2
|
||||
resultEvenHeight = src.height div 2
|
||||
result = newImage(
|
||||
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
|
||||
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
|
||||
)
|
||||
let
|
||||
oddMask = mm256_set1_epi16(0xff00)
|
||||
mergedMask = mm256_set_epi32(
|
||||
0, uint32.high, 0, uint32.high, 0, uint32.high, 0, uint32.high
|
||||
)
|
||||
permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
let
|
||||
topRowStart = src.dataIndex(0, y * 2)
|
||||
bottomRowStart = src.dataIndex(0, y * 2 + 1)
|
||||
|
||||
var x: int
|
||||
while x <= resultEvenWidth - 8:
|
||||
let
|
||||
top = mm256_loadu_si256(src.data[topRowStart + x * 2].addr)
|
||||
bottom = mm256_loadu_si256(src.data[bottomRowStart + x * 2].addr)
|
||||
topShifted = mm256_srli_si256(top, 4)
|
||||
bottomShifted = mm256_srli_si256(bottom, 4)
|
||||
topEven = mm256_andnot_si256(oddMask, top)
|
||||
topOdd = mm256_srli_epi16(top, 8)
|
||||
bottomEven = mm256_andnot_si256(oddMask, bottom)
|
||||
bottomOdd = mm256_srli_epi16(bottom, 8)
|
||||
topShiftedEven = mm256_andnot_si256(oddMask, topShifted)
|
||||
topShiftedOdd = mm256_srli_epi16(topShifted, 8)
|
||||
bottomShiftedEven = mm256_andnot_si256(oddMask, bottomShifted)
|
||||
bottomShiftedOdd = mm256_srli_epi16(bottomShifted, 8)
|
||||
topAddedEven = mm256_add_epi16(topEven, topShiftedEven)
|
||||
bottomAddedEven = mm256_add_epi16(bottomEven, bottomShiftedEven)
|
||||
topAddedOdd = mm256_add_epi16(topOdd, topShiftedOdd)
|
||||
bottomAddedOdd = mm256_add_epi16(bottomOdd, bottomShiftedOdd)
|
||||
addedEven = mm256_add_epi16(topAddedEven, bottomAddedEven)
|
||||
addedOdd = mm256_add_epi16(topAddedOdd, bottomAddedOdd)
|
||||
addedEvenDiv4 = mm256_srli_epi16(addedEven, 2)
|
||||
addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
|
||||
merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
|
||||
# Merged has the correct values for the next two pixels at
|
||||
# index 0, 2, 4, 6 so mask the others out and permute into position
|
||||
masked = mm256_and_si256(merged, mergedMask)
|
||||
permuted = mm_256_permutevar8x32_epi32(masked, permuteControl)
|
||||
mm_storeu_si128(
|
||||
result.data[result.dataIndex(x, y)].addr,
|
||||
mm256_castsi256_si128(permuted)
|
||||
)
|
||||
x += 4
|
||||
|
||||
for x in x ..< resultEvenWidth:
|
||||
let
|
||||
a = src.data[topRowStart + x * 2]
|
||||
b = src.data[topRowStart + x * 2 + 1]
|
||||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
)
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
if srcWidthIsOdd:
|
||||
let rgbx = mix(
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.data[result.dataIndex(result.width - 1, y)] = rgbx
|
||||
|
||||
if srcHeightIsOdd:
|
||||
for x in 0 ..< resultEvenWidth:
|
||||
let rgbx = mix(
|
||||
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
|
||||
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.data[result.dataIndex(x, result.height - 1)] = rgbx
|
||||
|
||||
if srcWidthIsOdd:
|
||||
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
|
||||
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
|
||||
|
||||
# Set src as this result for if we do another power
|
||||
src = result
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -10,20 +10,6 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
|||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||
|
||||
proc packAlphaValues(v: M128i): M128i {.inline.} =
|
||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
|
||||
result = mm_srli_epi32(v, 24)
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
|
||||
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
|
||||
let
|
||||
i = packAlphaValues(i)
|
||||
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||
|
||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||
|
@ -167,6 +153,8 @@ proc isOpaqueSse2*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
|
|||
proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||
var i: int
|
||||
|
||||
# Not worth aligning
|
||||
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
|
@ -200,12 +188,12 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
|||
i += 4
|
||||
|
||||
for i in i ..< data.len:
|
||||
var c = data[i]
|
||||
if c.a != 255:
|
||||
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
|
||||
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
|
||||
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
|
||||
data[i] = c
|
||||
var rgbx = data[i]
|
||||
if rgbx.a != 255:
|
||||
rgbx.r = ((rgbx.r.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.g = ((rgbx.g.uint32 * rgbx.a + 127) div 255).uint8
|
||||
rgbx.b = ((rgbx.b.uint32 * rgbx.a + 127) div 255).uint8
|
||||
data[i] = rgbx
|
||||
|
||||
proc invertSse2*(image: Image) {.simd.} =
|
||||
var
|
||||
|
@ -260,6 +248,16 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
|
|||
var
|
||||
i: int
|
||||
p = cast[uint](image.data[0].addr)
|
||||
# Align to 16 bytes
|
||||
while i < image.data.len and (p and 15) != 0:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
image.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
|
@ -292,6 +290,186 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
|
|||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc ceilSse2*(image: Image) {.simd.} =
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](image.data[0].addr)
|
||||
# Align to 16 bytes
|
||||
while i < image.data.len and (p and 15) != 0:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = if rgbx.r == 0: 0 else: 255
|
||||
rgbx.g = if rgbx.g == 0: 0 else: 255
|
||||
rgbx.b = if rgbx.b == 0: 0 else: 255
|
||||
rgbx.a = if rgbx.a == 0: 0 else: 255
|
||||
image.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let
|
||||
vecZero = mm_setzero_si128()
|
||||
vec255 = mm_set1_epi8(255)
|
||||
iterations = image.data.len div 8
|
||||
for _ in 0 ..< iterations:
|
||||
var
|
||||
values0 = mm_loadu_si128(cast[pointer](p))
|
||||
values1 = mm_loadu_si128(cast[pointer](p + 16))
|
||||
values0 = mm_cmpeq_epi8(values0, vecZero)
|
||||
values1 = mm_cmpeq_epi8(values1, vecZero)
|
||||
values0 = mm_andnot_si128(values0, vec255)
|
||||
values1 = mm_andnot_si128(values1, vec255)
|
||||
mm_storeu_si128(cast[pointer](p), values0)
|
||||
mm_storeu_si128(cast[pointer](p + 16), values1)
|
||||
p += 32
|
||||
i += 8 * iterations
|
||||
|
||||
for i in i ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = if rgbx.r == 0: 0 else: 255
|
||||
rgbx.g = if rgbx.g == 0: 0 else: 255
|
||||
rgbx.b = if rgbx.b == 0: 0 else: 255
|
||||
rgbx.a = if rgbx.a == 0: 0 else: 255
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
||||
## Scales the image down by an integer scale.
|
||||
if power < 0:
|
||||
raise newException(PixieError, "Cannot minifyBy2 with negative power")
|
||||
if power == 0:
|
||||
return image.copy()
|
||||
|
||||
var src = image
|
||||
for _ in 1 .. power:
|
||||
# When minifying an image of odd size, round the result image size up
|
||||
# so a 99 x 99 src image returns a 50 x 50 image.
|
||||
let
|
||||
srcWidthIsOdd = (src.width mod 2) != 0
|
||||
srcHeightIsOdd = (src.height mod 2) != 0
|
||||
resultEvenWidth = src.width div 2
|
||||
resultEvenHeight = src.height div 2
|
||||
result = newImage(
|
||||
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
|
||||
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
|
||||
)
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
let
|
||||
topRowStart = src.dataIndex(0, y * 2)
|
||||
bottomRowStart = src.dataIndex(0, y * 2 + 1)
|
||||
|
||||
var x: int
|
||||
while x <= resultEvenWidth - 4:
|
||||
let
|
||||
top = mm_loadu_si128(src.data[topRowStart + x * 2].addr)
|
||||
bottom = mm_loadu_si128(src.data[bottomRowStart + x * 2].addr)
|
||||
topShifted = mm_srli_si128(top, 4)
|
||||
bottomShifted = mm_srli_si128(bottom, 4)
|
||||
topEven = mm_andnot_si128(oddMask, top)
|
||||
topOdd = mm_srli_epi16(top, 8)
|
||||
bottomEven = mm_andnot_si128(oddMask, bottom)
|
||||
bottomOdd = mm_srli_epi16(bottom, 8)
|
||||
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
|
||||
topShiftedOdd = mm_srli_epi16(topShifted, 8)
|
||||
bottomShiftedEven = mm_andnot_si128(oddMask, bottomShifted)
|
||||
bottomShiftedOdd = mm_srli_epi16(bottomShifted, 8)
|
||||
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
|
||||
bottomAddedEven = mm_add_epi16(bottomEven, bottomShiftedEven)
|
||||
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
|
||||
bottomAddedOdd = mm_add_epi16(bottomOdd, bottomShiftedOdd)
|
||||
addedEven = mm_add_epi16(topAddedEven, bottomAddedEven)
|
||||
addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
|
||||
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
|
||||
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
|
||||
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
|
||||
# Merged has the correct values for the next two pixels at
|
||||
# index 0 and 2 so mask the others out and shift 0 and 2 into
|
||||
# position and store
|
||||
masked = mm_and_si128(merged, mergedMask)
|
||||
mm_storeu_si128(
|
||||
result.data[result.dataIndex(x, y)].addr,
|
||||
mm_shuffle_epi32(masked, MM_SHUFFLE(3, 3, 2, 0))
|
||||
)
|
||||
x += 2
|
||||
|
||||
for x in x ..< resultEvenWidth:
|
||||
let
|
||||
a = src.data[topRowStart + x * 2]
|
||||
b = src.data[topRowStart + x * 2 + 1]
|
||||
c = src.data[bottomRowStart + x * 2 + 1]
|
||||
d = src.data[bottomRowStart + x * 2]
|
||||
mixed = rgbx(
|
||||
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
|
||||
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
|
||||
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
|
||||
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
|
||||
)
|
||||
result.data[result.dataIndex(x, y)] = mixed
|
||||
|
||||
if srcWidthIsOdd:
|
||||
let rgbx = mix(
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
|
||||
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.data[result.dataIndex(result.width - 1, y)] = rgbx
|
||||
|
||||
if srcHeightIsOdd:
|
||||
for x in 0 ..< resultEvenWidth:
|
||||
let rgbx = mix(
|
||||
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
|
||||
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
|
||||
0.5
|
||||
) * 0.5
|
||||
result.data[result.dataIndex(x, result.height - 1)] = rgbx
|
||||
|
||||
if srcWidthIsOdd:
|
||||
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
|
||||
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
|
||||
|
||||
# Set src as this result for if we do another power
|
||||
src = result
|
||||
|
||||
proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
||||
## Scales image up by 2 ^ power.
|
||||
if power < 0:
|
||||
raise newException(PixieError, "Cannot magnifyBy2 with negative power")
|
||||
|
||||
let scale = 2 ^ power
|
||||
result = newImage(image.width * scale, image.height * scale)
|
||||
|
||||
for y in 0 ..< image.height:
|
||||
# Write one row of pixels duplicated by scale
|
||||
let
|
||||
sourceRowStart = image.dataIndex(0, y)
|
||||
resultRowStart = result.dataIndex(0, y * scale)
|
||||
var x: int
|
||||
if scale == 2:
|
||||
while x <= image.width - 4:
|
||||
let values = mm_loadu_si128(image.data[sourceRowStart + x].addr)
|
||||
mm_storeu_si128(
|
||||
result.data[resultRowStart + x * scale].addr,
|
||||
mm_unpacklo_epi32(values, values)
|
||||
)
|
||||
mm_storeu_si128(
|
||||
result.data[resultRowStart + x * scale + 4].addr,
|
||||
mm_unpackhi_epi32(values, values)
|
||||
)
|
||||
x += 4
|
||||
for x in x ..< image.width:
|
||||
let
|
||||
rgbx = image.data[sourceRowStart + x]
|
||||
resultIdx = resultRowStart + x * scale
|
||||
for i in 0 ..< scale:
|
||||
result.data[resultIdx + i] = rgbx
|
||||
# Copy that row of pixels into (scale - 1) more rows
|
||||
for i in 1 ..< scale:
|
||||
copyMem(
|
||||
result.data[resultRowStart + result.width * i].addr,
|
||||
result.data[resultRowStart].addr,
|
||||
result.width * 4
|
||||
)
|
||||
|
||||
proc blitLineNormalSse2*(
|
||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||
) {.simd.} =
|
||||
|
|
|
@ -89,6 +89,10 @@ timeIt "toStraightAlpha":
|
|||
|
||||
reset()
|
||||
|
||||
timeIt "ceil":
|
||||
reset()
|
||||
image.ceil()
|
||||
|
||||
block:
|
||||
let image = newImage(200, 200)
|
||||
image.fill(rgbx(255, 0, 0, 255))
|
||||
|
|
Loading…
Reference in a new issue