Merge pull request #105 from guzba/master
simd image blend when possible
This commit is contained in:
commit
08064673b7
8 changed files with 254 additions and 118 deletions
|
@ -534,16 +534,41 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||||
|
let
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
|
||||||
|
var
|
||||||
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||||
|
|
||||||
|
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||||
|
|
||||||
|
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||||
|
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||||
|
|
||||||
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
proc blendOverwriteSimd*(backdrop, source: M128i): M128i =
|
proc blendOverwriteSimd*(backdrop, source: M128i): M128i =
|
||||||
source
|
source
|
||||||
|
|
||||||
proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
|
proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of bmNormal: blendNormalSimd
|
of bmNormal: blendNormalSimd
|
||||||
|
of bmMask: blendMaskSimd
|
||||||
of bmOverwrite: blendOverwriteSimd
|
of bmOverwrite: blendOverwriteSimd
|
||||||
else:
|
else:
|
||||||
raise newException(PixieError, "No SIMD blender for " & $blendMode)
|
raise newException(PixieError, "No SIMD blender for " & $blendMode)
|
||||||
|
|
||||||
|
proc hasSimdBlender*(blendMode: BlendMode): bool =
|
||||||
|
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||||
|
|
||||||
proc maskNormalSimd*(backdrop, source: M128i): M128i =
|
proc maskNormalSimd*(backdrop, source: M128i): M128i =
|
||||||
## Blending masks
|
## Blending masks
|
||||||
let
|
let
|
||||||
|
@ -583,12 +608,46 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
|
||||||
mm_or_si128(blendedEven, blendedOdd)
|
mm_or_si128(blendedEven, blendedOdd)
|
||||||
|
|
||||||
|
proc maskMaskSimd*(backdrop, source: M128i): M128i =
|
||||||
|
let
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
|
||||||
|
var
|
||||||
|
sourceEven = mm_slli_epi16(mm_andnot_si128(oddMask, source), 8)
|
||||||
|
sourceOdd = mm_and_si128(source, oddMask)
|
||||||
|
|
||||||
|
let
|
||||||
|
evenK = mm_sub_epi16(v255high, sourceEven)
|
||||||
|
oddK = mm_sub_epi16(v255high, sourceOdd)
|
||||||
|
|
||||||
|
var
|
||||||
|
backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8)
|
||||||
|
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||||
|
|
||||||
|
# backdrop * k
|
||||||
|
backdropEven = mm_mulhi_epu16(backdropEven, evenK)
|
||||||
|
backdropOdd = mm_mulhi_epu16(backdropOdd, oddK)
|
||||||
|
|
||||||
|
# div 255
|
||||||
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
backdropOdd = mm_slli_epi16(backdropOdd, 8)
|
||||||
|
|
||||||
|
mm_or_si128(backdropEven, backdropOdd)
|
||||||
|
|
||||||
proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
|
proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of bmNormal: maskNormalSimd
|
of bmNormal: maskNormalSimd
|
||||||
|
of bmMask: maskMaskSimd
|
||||||
of bmOverwrite: blendOverwriteSimd
|
of bmOverwrite: blendOverwriteSimd
|
||||||
else:
|
else:
|
||||||
raise newException(PixieError, "No SIMD masker for " & $blendMode)
|
raise newException(PixieError, "No SIMD masker for " & $blendMode)
|
||||||
|
|
||||||
|
proc hasSimdMasker*(blendMode: BlendMode): bool =
|
||||||
|
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -369,6 +369,107 @@ proc invert*(target: Image | Mask) =
|
||||||
for j in i ..< target.data.len:
|
for j in i ..< target.data.len:
|
||||||
target.data[j] = (255 - target.data[j]).uint8
|
target.data[j] = (255 - target.data[j]).uint8
|
||||||
|
|
||||||
|
proc blur*(target: Image | Mask, radius: float32) =
|
||||||
|
## Applies Gaussian blur to the image given a radius.
|
||||||
|
let radius = round(radius).int
|
||||||
|
if radius == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
proc gaussianLookup(radius: int): seq[uint32] =
|
||||||
|
## Compute lookup table for 1d Gaussian kernel.
|
||||||
|
## Values are [0, 255] * 1024.
|
||||||
|
result.setLen(radius * 2 + 1)
|
||||||
|
|
||||||
|
var
|
||||||
|
floats = newSeq[float32](result.len)
|
||||||
|
total = 0.0
|
||||||
|
for xb in -radius .. radius:
|
||||||
|
let
|
||||||
|
s = radius.float32 / 2.2 # 2.2 matches Figma.
|
||||||
|
x = xb.float32
|
||||||
|
a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2))
|
||||||
|
floats[xb + radius] = a
|
||||||
|
total += a
|
||||||
|
for xb in -radius .. radius:
|
||||||
|
floats[xb + radius] = floats[xb + radius] / total
|
||||||
|
|
||||||
|
for i, f in floats:
|
||||||
|
result[i] = round(f * 255 * 1024).uint32
|
||||||
|
|
||||||
|
let lookup = gaussianLookup(radius)
|
||||||
|
|
||||||
|
when type(target) is Image:
|
||||||
|
|
||||||
|
template `*`(sample: ColorRGBA, a: uint32): array[4, uint32] =
|
||||||
|
[
|
||||||
|
sample.r * a,
|
||||||
|
sample.g * a,
|
||||||
|
sample.b * a,
|
||||||
|
sample.a * a
|
||||||
|
]
|
||||||
|
|
||||||
|
template `+=`(values: var array[4, uint32], sample: array[4, uint32]) =
|
||||||
|
values[0] += sample[0]
|
||||||
|
values[1] += sample[1]
|
||||||
|
values[2] += sample[2]
|
||||||
|
values[3] += sample[3]
|
||||||
|
|
||||||
|
template rgba(values: array[4, uint32]): ColorRGBA =
|
||||||
|
rgba(
|
||||||
|
(values[0] div 1024 div 255).uint8,
|
||||||
|
(values[1] div 1024 div 255).uint8,
|
||||||
|
(values[2] div 1024 div 255).uint8,
|
||||||
|
(values[3] div 1024 div 255).uint8
|
||||||
|
)
|
||||||
|
|
||||||
|
# Blur in the X direction.
|
||||||
|
var blurX = newImage(target.width, target.height)
|
||||||
|
for y in 0 ..< target.height:
|
||||||
|
for x in 0 ..< target.width:
|
||||||
|
var values: array[4, uint32]
|
||||||
|
for xb in -radius .. radius:
|
||||||
|
let
|
||||||
|
sample = target[x + xb, y]
|
||||||
|
a = lookup[xb + radius].uint32
|
||||||
|
values += sample * a
|
||||||
|
blurX.setRgbaUnsafe(x, y, values.rgba())
|
||||||
|
|
||||||
|
# Blur in the Y direction.
|
||||||
|
for y in 0 ..< target.height:
|
||||||
|
for x in 0 ..< target.width:
|
||||||
|
var values: array[4, uint32]
|
||||||
|
for yb in -radius .. radius:
|
||||||
|
let
|
||||||
|
sample = blurX[x, y + yb]
|
||||||
|
a = lookup[yb + radius].uint32
|
||||||
|
values += sample * a
|
||||||
|
target.setRgbaUnsafe(x, y, values.rgba())
|
||||||
|
|
||||||
|
else: # target is a Mask
|
||||||
|
|
||||||
|
# Blur in the X direction.
|
||||||
|
var blurX = newMask(target.width, target.height)
|
||||||
|
for y in 0 ..< target.height:
|
||||||
|
for x in 0 ..< target.width:
|
||||||
|
var value: uint32
|
||||||
|
for xb in -radius .. radius:
|
||||||
|
let
|
||||||
|
sample = target[x + xb, y]
|
||||||
|
a = lookup[xb + radius].uint32
|
||||||
|
value += sample * a
|
||||||
|
blurX.setValueUnsafe(x, y, (value div 1024 div 255).uint8)
|
||||||
|
|
||||||
|
# Blur in the Y direction and modify image.
|
||||||
|
for y in 0 ..< target.height:
|
||||||
|
for x in 0 ..< target.width:
|
||||||
|
var value: uint32
|
||||||
|
for yb in -radius .. radius:
|
||||||
|
let
|
||||||
|
sample = blurX[x, y + yb]
|
||||||
|
a = lookup[yb + radius].uint32
|
||||||
|
value += sample * a
|
||||||
|
target.setValueUnsafe(x, y, (value div 1024 div 255).uint8)
|
||||||
|
|
||||||
proc newMask*(image: Image): Mask =
|
proc newMask*(image: Image): Mask =
|
||||||
## Returns a new mask using the alpha values of the parameter image.
|
## Returns a new mask using the alpha values of the parameter image.
|
||||||
result = newMask(image.width, image.height)
|
result = newMask(image.width, image.height)
|
||||||
|
@ -508,110 +609,6 @@ proc draw*(
|
||||||
) {.inline.} =
|
) {.inline.} =
|
||||||
mask.draw(image, translate(pos), blendMode)
|
mask.draw(image, translate(pos), blendMode)
|
||||||
|
|
||||||
proc blur*(target: Image | Mask, radius: float32) =
|
|
||||||
## Applies Gaussian blur to the image given a radius.
|
|
||||||
let radius = round(radius).int
|
|
||||||
if radius == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
proc gaussianLookup(radius: int): seq[uint32] =
|
|
||||||
## Compute lookup table for 1d Gaussian kernel.
|
|
||||||
## Values are [0, 255] * 1024.
|
|
||||||
result.setLen(radius * 2 + 1)
|
|
||||||
|
|
||||||
var
|
|
||||||
floats = newSeq[float32](result.len)
|
|
||||||
total = 0.0
|
|
||||||
for xb in -radius .. radius:
|
|
||||||
let
|
|
||||||
s = radius.float32 / 2.2 # 2.2 matches Figma.
|
|
||||||
x = xb.float32
|
|
||||||
a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2))
|
|
||||||
floats[xb + radius] = a
|
|
||||||
total += a
|
|
||||||
for xb in -radius .. radius:
|
|
||||||
floats[xb + radius] = floats[xb + radius] / total
|
|
||||||
|
|
||||||
for i, f in floats:
|
|
||||||
result[i] = round(f * 255 * 1024).uint32
|
|
||||||
|
|
||||||
let lookup = gaussianLookup(radius)
|
|
||||||
|
|
||||||
when type(target) is Image:
|
|
||||||
|
|
||||||
template `*`(sample: ColorRGBA, a: uint32): array[4, uint32] =
|
|
||||||
[
|
|
||||||
sample.r * a,
|
|
||||||
sample.g * a,
|
|
||||||
sample.b * a,
|
|
||||||
sample.a * a
|
|
||||||
]
|
|
||||||
|
|
||||||
template `+=`(values: var array[4, uint32], sample: array[4, uint32]) =
|
|
||||||
values[0] += sample[0]
|
|
||||||
values[1] += sample[1]
|
|
||||||
values[2] += sample[2]
|
|
||||||
values[3] += sample[3]
|
|
||||||
|
|
||||||
template rgba(values: array[4, uint32]): ColorRGBA =
|
|
||||||
rgba(
|
|
||||||
(values[0] div 1024 div 255).uint8,
|
|
||||||
(values[1] div 1024 div 255).uint8,
|
|
||||||
(values[2] div 1024 div 255).uint8,
|
|
||||||
(values[3] div 1024 div 255).uint8
|
|
||||||
)
|
|
||||||
|
|
||||||
# Blur in the X direction.
|
|
||||||
var blurX = newImage(target.width, target.height)
|
|
||||||
for y in 0 ..< target.height:
|
|
||||||
for x in 0 ..< target.width:
|
|
||||||
var values: array[4, uint32]
|
|
||||||
for xb in -radius .. radius:
|
|
||||||
let
|
|
||||||
sample = target[x + xb, y]
|
|
||||||
a = lookup[xb + radius].uint32
|
|
||||||
values += sample * a
|
|
||||||
blurX.setRgbaUnsafe(x, y, values.rgba())
|
|
||||||
|
|
||||||
# Blur in the Y direction.
|
|
||||||
for y in 0 ..< target.height:
|
|
||||||
for x in 0 ..< target.width:
|
|
||||||
var values: array[4, uint32]
|
|
||||||
for yb in -radius .. radius:
|
|
||||||
let
|
|
||||||
sample = blurX[x, y + yb]
|
|
||||||
a = lookup[yb + radius].uint32
|
|
||||||
values += sample * a
|
|
||||||
target.setRgbaUnsafe(x, y, values.rgba())
|
|
||||||
|
|
||||||
else: # target is a Mask
|
|
||||||
|
|
||||||
# Blur in the X direction.
|
|
||||||
var blurX = newMask(target.width, target.height)
|
|
||||||
for y in 0 ..< target.height:
|
|
||||||
for x in 0 ..< target.width:
|
|
||||||
var value: uint32
|
|
||||||
for xb in -radius .. radius:
|
|
||||||
let
|
|
||||||
sample = target[x + xb, y]
|
|
||||||
a = lookup[xb + radius].uint32
|
|
||||||
value += sample * a
|
|
||||||
blurX.setValueUnsafe(x, y, (value div 1024 div 255).uint8)
|
|
||||||
|
|
||||||
# Blur in the Y direction and modify image.
|
|
||||||
for y in 0 ..< target.height:
|
|
||||||
for x in 0 ..< target.width:
|
|
||||||
var value: uint32
|
|
||||||
for yb in -radius .. radius:
|
|
||||||
let
|
|
||||||
sample = blurX[x, y + yb]
|
|
||||||
a = lookup[yb + radius].uint32
|
|
||||||
value += sample * a
|
|
||||||
target.setValueUnsafe(x, y, (value div 1024 div 255).uint8)
|
|
||||||
|
|
||||||
when defined(release):
|
|
||||||
{.pop.}
|
|
||||||
|
|
||||||
proc drawUber(
|
proc drawUber(
|
||||||
a, b: Image,
|
a, b: Image,
|
||||||
p, dx, dy: Vec2,
|
p, dx, dy: Vec2,
|
||||||
|
@ -659,18 +656,44 @@ proc drawUber(
|
||||||
if xMin > 0:
|
if xMin > 0:
|
||||||
zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xMin)
|
zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xMin)
|
||||||
|
|
||||||
|
if smooth:
|
||||||
for x in xMin ..< xMax:
|
for x in xMin ..< xMax:
|
||||||
let
|
let
|
||||||
srcPos = p + dx * float32(x) + dy * float32(y)
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
xFloat = srcPos.x - h
|
xFloat = srcPos.x - h
|
||||||
yFloat = srcPos.y - h
|
yFloat = srcPos.y - h
|
||||||
rgba = a.getRgbaUnsafe(x, y)
|
backdrop = a.getRgbaUnsafe(x, y)
|
||||||
rgba2 =
|
source = b.getRgbaSmooth(xFloat, yFloat)
|
||||||
if smooth:
|
a.setRgbaUnsafe(x, y, blender(backdrop, source))
|
||||||
b.getRgbaSmooth(xFloat, yFloat)
|
|
||||||
else:
|
else:
|
||||||
b.getRgbaUnsafe(xFloat.int, yFloat.int)
|
var x = xMin
|
||||||
a.setRgbaUnsafe(x, y, blender(rgba, rgba2))
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
if blendMode.hasSimdBlender():
|
||||||
|
if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1:
|
||||||
|
# Check we are not rotated before using SIMD blends
|
||||||
|
let blenderSimd = blendMode.blenderSimd()
|
||||||
|
for _ in countup(x, xMax - 4, 4):
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
sx = srcPos.x.int
|
||||||
|
sy = srcPos.y.int
|
||||||
|
backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||||
|
source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x, y)].addr,
|
||||||
|
blenderSimd(backdrop, source)
|
||||||
|
)
|
||||||
|
x += 4
|
||||||
|
|
||||||
|
for _ in x ..< xMax:
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
xFloat = srcPos.x - h
|
||||||
|
yFloat = srcPos.y - h
|
||||||
|
backdrop = a.getRgbaUnsafe(x, y)
|
||||||
|
source = b.getRgbaUnsafe(xFloat.int, yFloat.int)
|
||||||
|
a.setRgbaUnsafe(x, y, blender(backdrop, source))
|
||||||
|
inc x
|
||||||
|
|
||||||
if blendMode == bmIntersectMask:
|
if blendMode == bmIntersectMask:
|
||||||
if a.width - xMax > 0:
|
if a.width - xMax > 0:
|
||||||
|
@ -761,3 +784,6 @@ proc shadow*(
|
||||||
result = newImage(mask.width, mask.height)
|
result = newImage(mask.width, mask.height)
|
||||||
result.fill(color)
|
result.fill(color)
|
||||||
result.draw(mask, blendMode = bmMask)
|
result.draw(mask, blendMode = bmMask)
|
||||||
|
|
||||||
|
when defined(release):
|
||||||
|
{.pop.}
|
||||||
|
|
BIN
tests/images/rotate0.png
Normal file
BIN
tests/images/rotate0.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.8 KiB |
BIN
tests/images/rotate180.png
Normal file
BIN
tests/images/rotate180.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.7 KiB |
BIN
tests/images/rotate270.png
Normal file
BIN
tests/images/rotate270.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.4 KiB |
BIN
tests/images/rotate360.png
Normal file
BIN
tests/images/rotate360.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.7 KiB |
BIN
tests/images/rotate90.png
Normal file
BIN
tests/images/rotate90.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.4 KiB |
51
tests/test_images_draw.nim
Normal file
51
tests/test_images_draw.nim
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
import chroma, pixie, vmath
|
||||||
|
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
a = newImage(1000, 1000)
|
||||||
|
b = newImage(500, 500)
|
||||||
|
a.fill(rgba(255, 0, 0, 255))
|
||||||
|
b.fill(rgba(0, 255, 0, 255))
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(250, 250)))
|
||||||
|
a.writeFile("tests/images/rotate0.png")
|
||||||
|
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
a = newImage(1000, 1000)
|
||||||
|
b = newImage(500, 500)
|
||||||
|
a.fill(rgba(255, 0, 0, 255))
|
||||||
|
b.fill(rgba(0, 255, 0, 255))
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(250, 250)) * rotationMat3(90 * PI / 180))
|
||||||
|
a.writeFile("tests/images/rotate90.png")
|
||||||
|
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
a = newImage(1000, 1000)
|
||||||
|
b = newImage(500, 500)
|
||||||
|
a.fill(rgba(255, 0, 0, 255))
|
||||||
|
b.fill(rgba(0, 255, 0, 255))
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(250, 250)) * rotationMat3(180 * PI / 180))
|
||||||
|
a.writeFile("tests/images/rotate180.png")
|
||||||
|
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
a = newImage(1000, 1000)
|
||||||
|
b = newImage(500, 500)
|
||||||
|
a.fill(rgba(255, 0, 0, 255))
|
||||||
|
b.fill(rgba(0, 255, 0, 255))
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(250, 250)) * rotationMat3(270 * PI / 180))
|
||||||
|
a.writeFile("tests/images/rotate270.png")
|
||||||
|
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
a = newImage(1000, 1000)
|
||||||
|
b = newImage(500, 500)
|
||||||
|
a.fill(rgba(255, 0, 0, 255))
|
||||||
|
b.fill(rgba(0, 255, 0, 255))
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(250, 250)) * rotationMat3(360 * PI / 180))
|
||||||
|
a.writeFile("tests/images/rotate360.png")
|
Loading…
Reference in a new issue