Merge pull request from guzba/master

paths and simd perf improvements
This commit is contained in:
Andre von Houck 2022-06-21 08:29:39 -07:00 committed by GitHub
commit a9dc2d7bb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 766 additions and 1164 deletions

View file

@ -1,264 +1,55 @@
import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat
import benchy, cairo, pixie, pixie/fileformats/svg {.all.}, pixie/paths {.all.}
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2, pixie/internal
type
Fill = object
shapes: seq[Polygon]
transform: Mat3
paint: Paint
windingRule: WindingRule
proc doDiff(a, b: Image, name: string) =
let (diffScore, diffImage) = diff(a, b)
echo &"{name} score: {diffScore}"
diffImage.writeFile(&"{name}_diff.png")
Benchmark = object
name: string
fills: seq[Fill]
when defined(release):
{.push checks: off.}
var benchmarks: seq[Benchmark]
proc fillMask(
shapes: seq[seq[Vec2]], width, height: int, windingRule = NonZero
): Mask =
result = newMask(width, height)
let
segments = shapes.shapesToSegments()
bounds = computeBounds(segments).snapToPixels()
startY = max(0, bounds.y.int)
pathHeight = min(height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight)
width = width.float32
var
hits = newSeq[(float32, int16)](partitioning.maxEntryCount)
numHits: int
aa: bool
for y in startY ..< pathHeight:
computeCoverage(
cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr),
hits,
numHits,
aa,
width,
y,
0,
partitioning,
windingRule
)
if not aa:
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
startIndex = result.dataIndex(prevAt.int, y)
len = at.int - prevAt.int
fillUnsafe(result.data, 255, startIndex, len)
proc fillMask*(
path: SomePath, width, height: int, windingRule = NonZero
): Mask =
## Returns a new mask with the path filled. This is a faster alternative
## to `newMask` + `fillPath`.
let shapes = parseSomePath(path, true, 1)
shapes.fillMask(width, height, windingRule)
proc fillImage(
shapes: seq[seq[Vec2]],
width, height: int,
color: SomeColor,
windingRule = NonZero
): Image =
result = newImage(width, height)
let
mask = shapes.fillMask(width, height, windingRule)
rgbx = color.rgbx()
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vecZero = mm_setzero_si128()
colorVecEven = mm_slli_epi16(colorVec, 8)
colorVecOdd = mm_and_si128(colorVec, oddMask)
iterations = result.data.len div 16
for _ in 0 ..< iterations:
var coverageVec = mm_loadu_si128(mask.data[i].addr)
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff:
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
for q in [0, 4, 8, 12]:
mm_storeu_si128(result.data[i + q].addr, colorVec)
else:
for q in [0, 4, 8, 12]:
var unpacked = unpackAlphaValues(coverageVec)
# Shift the coverages from `a` to `g` and `a` for multiplying
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
var
sourceEven = mm_mulhi_epu16(colorVecEven, unpacked)
sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
mm_storeu_si128(
result.data[i + q].addr,
mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
)
coverageVec = mm_srli_si128(coverageVec, 4)
i += 16
let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32]
for i in i ..< result.data.len:
let coverage = mask.data[i]
if coverage == 255:
result.data[i] = rgbx
elif coverage != 0:
result.data[i].r = ((channels[0] * coverage) div 255).uint8
result.data[i].g = ((channels[1] * coverage) div 255).uint8
result.data[i].b = ((channels[2] * coverage) div 255).uint8
result.data[i].a = ((channels[3] * coverage) div 255).uint8
proc fillImage*(
path: SomePath, width, height: int, color: SomeColor, windingRule = NonZero
): Image =
## Returns a new image with the path filled. This is a faster alternative
## to `newImage` + `fillPath`.
let shapes = parseSomePath(path, false, 1)
shapes.fillImage(width, height, color, windingRule)
proc strokeMask*(
path: SomePath,
width, height: int,
strokeWidth: float32 = 1.0,
lineCap = ButtCap,
lineJoin = MiterJoin,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Mask =
## Returns a new mask with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillMask(width, height, NonZero)
proc strokeImage*(
path: SomePath,
width, height: int,
color: SomeColor,
strokeWidth: float32 = 1.0,
lineCap = ButtCap,
lineJoin = MiterJoin,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Image =
## Returns a new image with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillImage(width, height, color, NonZero)
when defined(release):
{.pop.}
block:
block: # Basic rect
let path = newPath()
path.moveTo(0, 0)
path.lineTo(1920, 0)
path.lineTo(1920, 1080)
path.lineTo(0, 1080)
path.closePath()
let shapes = path.commandsToShapes(true, 1)
path.rect(rect(0, 0, 900, 900))
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create()
ctx.setSourceRgba(0, 0, 1, 1)
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
timeIt "cairo1":
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
benchmarks.add(Benchmark(
name: "rect",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: paint,
windingRule: NonZero
)]))
# discard surface.writeToPng("cairo1.png")
let a = newImage(1920, 1080)
timeIt "pixie1":
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(0, 0, 255, 255))
# a.writeFile("pixie1.png")
block:
block: # Rounded rect
let path = newPath()
path.moveTo(500, 240)
path.lineTo(1500, 240)
path.lineTo(1920, 600)
path.lineTo(0, 600)
path.closePath()
let shapes = path.commandsToShapes(true, 1)
path.roundedRect(rect(0, 0, 900, 900), 20, 20, 20, 20)
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create()
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
timeIt "cairo2":
ctx.setSourceRgba(1, 1, 1, 1)
let operator = ctx.getOperator()
ctx.setOperator(OperatorSource)
ctx.paint()
ctx.setOperator(operator)
benchmarks.add(Benchmark(
name: "roundedRect",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: paint,
windingRule: NonZero
)]))
ctx.setSourceRgba(0, 0, 1, 1)
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng("cairo2.png")
let a = newImage(1920, 1080)
timeIt "pixie2":
a.fill(rgbx(255, 255, 255, 255))
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(0, 0, 255, 255))
# a.writeFile("pixie2.png")
block:
block: # Heart
let path = parsePath("""
M 100,300
A 200,200 0,0,1 500,300
@ -267,131 +58,114 @@ block:
Q 100,600 100,300 z
""")
let shapes = path.commandsToShapes(true, 1)
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
ctx = surface.create()
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
timeIt "cairo3":
ctx.setSourceRgba(1, 1, 1, 1)
let operator = ctx.getOperator()
ctx.setOperator(OperatorSource)
ctx.paint()
ctx.setOperator(operator)
benchmarks.add(Benchmark(
name: "Heart",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: paint,
windingRule: NonZero
)]))
ctx.setSourceRgba(1, 0, 0, 1)
block: # Tiger
let
data = readFile("tests/fileformats/svg/Ghostscript_Tiger.svg")
parsed = parseSvg(data)
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
var fills: seq[Fill]
# discard surface.writeToPng("cairo3.png")
for (path, props) in parsed.elements:
if props.display and props.opacity > 0:
if props.fill != "none":
let
shapes = path.commandsToShapes(true, 1)
paint = parseSomePaint(props.fill)
fills.add(Fill(
shapes: shapes,
transform: props.transform,
paint: paint,
windingRule: props.fillRule
))
let a = newImage(1000, 1000)
if props.stroke != rgbx(0, 0, 0, 0) and props.strokeWidth > 0:
let strokeShapes = strokeShapes(
parseSomePath(path, false, props.transform.pixelScale),
props.strokeWidth,
props.strokeLineCap,
props.strokeLineJoin,
props.strokeMiterLimit,
props.strokeDashArray,
props.transform.pixelScale
)
let paint = newPaint(props.stroke)
paint.color.a *= (props.opacity * props.strokeOpacity)
fills.add(Fill(
shapes: strokeShapes,
transform: props.transform,
paint: paint,
windingRule: NonZero
))
timeIt "pixie3":
a.fill(rgbx(255, 255, 255, 255))
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(255, 0, 0, 255))
# a.writeFile("pixie3.png")
# doDiff(readImage("cairo3.png"), a, "cairo3")
# benchmarks.add(fills)
block:
let path = newPath()
path.roundedRect(200, 200, 600, 600, 10, 10, 10, 10)
let shapes = path.commandsToShapes(true, 1)
# let
# surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
# ctx = surface.create()
# timeIt "cairo4":
# ctx.setSourceRgba(0, 0, 0, 0)
# let operator = ctx.getOperator()
# ctx.setOperator(OperatorSource)
# ctx.paint()
# ctx.setOperator(operator)
timeIt "cairo4":
for benchmark in benchmarks:
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
surface = imageSurfaceCreate(FORMAT_ARGB32, 900, 900)
ctx = surface.create()
ctx.setSourceRgba(1, 0, 0, 0.5)
timeIt "[cairo] " & benchmark.name:
for fill in benchmark.fills:
if fill.shapes.len > 0:
ctx.newPath()
for shape in fill.shapes:
ctx.moveTo(shape[0].x, shape[0].y)
for v in shape:
ctx.lineTo(v.x, v.y)
let
color = fill.paint.color
matrix = Matrix(
xx: fill.transform[0, 0],
yx: fill.transform[0, 1],
xy: fill.transform[1, 0],
yy: fill.transform[1, 1],
x0: fill.transform[2, 0],
y0: fill.transform[2, 1],
)
ctx.setSourceRgba(color.r, color.g, color.b, color.a)
ctx.setMatrix(matrix.unsafeAddr)
ctx.setFillRule(
if fill.windingRule == NonZero:
FillRuleWinding
else:
FillRuleEvenOdd
)
ctx.fill()
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng(("cairo_" & benchmark.name & ".png").cstring)
# discard surface.writeToPng("cairo4.png")
block:
for benchmark in benchmarks:
let image = newImage(900, 900)
var a: Image
timeIt "pixie4":
a = newImage(1000, 1000)
timeIt "[pixie] " & benchmark.name:
for fill in benchmark.fills:
if fill.shapes.len > 0:
let p = newPath()
for shape in fill.shapes:
p.moveTo(shape[0])
for v in shape:
p.lineTo(v)
image.fillPath(
p,
fill.paint,
fill.transform,
fill.windingRule
)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(127, 0, 0, 127))
# a.writeFile("pixie4.png")
# doDiff(readImage("cairo4.png"), a, "4")
var b: Image
let paint = newPaint(SolidPaint)
paint.color = color(1, 0, 0, 0.5)
paint.blendMode = OverwriteBlend
timeIt "pixie4 overwrite":
b = newImage(1000, 1000)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
b.fillPath(p, paint)
# b.writeFile("b.png")
timeIt "pixie4 mask":
let mask = newMask(1000, 1000)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
mask.fillPath(p)
var tmp: Image
timeIt "pixie fillImage":
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
tmp = p.fillImage(1000, 1000, rgbx(127, 0, 0, 127))
# tmp.writeFile("tmp.png")
# image.writeFile("pixie_" & benchmark.name & ".png")

View file

@ -0,0 +1,397 @@
import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2, pixie/internal
proc doDiff(a, b: Image, name: string) =
let (diffScore, diffImage) = diff(a, b)
echo &"{name} score: {diffScore}"
diffImage.writeFile(&"{name}_diff.png")
when defined(release):
{.push checks: off.}
proc fillMask(
shapes: seq[seq[Vec2]], width, height: int, windingRule = NonZero
): Mask =
result = newMask(width, height)
let
segments = shapes.shapesToSegments()
bounds = computeBounds(segments).snapToPixels()
startY = max(0, bounds.y.int)
pathHeight = min(height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight)
width = width.float32
var
hits = newSeq[(float32, int16)](partitioning.maxEntryCount)
numHits: int
aa: bool
for y in startY ..< pathHeight:
computeCoverage(
cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr),
hits,
numHits,
aa,
width,
y,
0,
partitioning,
windingRule
)
if not aa:
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
startIndex = result.dataIndex(prevAt.int, y)
len = at.int - prevAt.int
fillUnsafe(result.data, 255, startIndex, len)
proc fillMask*(
path: SomePath, width, height: int, windingRule = NonZero
): Mask =
## Returns a new mask with the path filled. This is a faster alternative
## to `newMask` + `fillPath`.
let shapes = parseSomePath(path, true, 1)
shapes.fillMask(width, height, windingRule)
proc fillImage(
shapes: seq[seq[Vec2]],
width, height: int,
color: SomeColor,
windingRule = NonZero
): Image =
result = newImage(width, height)
let
mask = shapes.fillMask(width, height, windingRule)
rgbx = color.rgbx()
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vecZero = mm_setzero_si128()
colorVecEven = mm_slli_epi16(colorVec, 8)
colorVecOdd = mm_and_si128(colorVec, oddMask)
iterations = result.data.len div 16
for _ in 0 ..< iterations:
var coverageVec = mm_loadu_si128(mask.data[i].addr)
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff:
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
for q in [0, 4, 8, 12]:
mm_storeu_si128(result.data[i + q].addr, colorVec)
else:
for q in [0, 4, 8, 12]:
var unpacked = unpackAlphaValues(coverageVec)
# Shift the coverages from `a` to `g` and `a` for multiplying
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
var
sourceEven = mm_mulhi_epu16(colorVecEven, unpacked)
sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
mm_storeu_si128(
result.data[i + q].addr,
mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
)
coverageVec = mm_srli_si128(coverageVec, 4)
i += 16
let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32]
for i in i ..< result.data.len:
let coverage = mask.data[i]
if coverage == 255:
result.data[i] = rgbx
elif coverage != 0:
result.data[i].r = ((channels[0] * coverage) div 255).uint8
result.data[i].g = ((channels[1] * coverage) div 255).uint8
result.data[i].b = ((channels[2] * coverage) div 255).uint8
result.data[i].a = ((channels[3] * coverage) div 255).uint8
proc fillImage*(
path: SomePath, width, height: int, color: SomeColor, windingRule = NonZero
): Image =
## Returns a new image with the path filled. This is a faster alternative
## to `newImage` + `fillPath`.
let shapes = parseSomePath(path, false, 1)
shapes.fillImage(width, height, color, windingRule)
proc strokeMask*(
path: SomePath,
width, height: int,
strokeWidth: float32 = 1.0,
lineCap = ButtCap,
lineJoin = MiterJoin,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Mask =
## Returns a new mask with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillMask(width, height, NonZero)
proc strokeImage*(
path: SomePath,
width, height: int,
color: SomeColor,
strokeWidth: float32 = 1.0,
lineCap = ButtCap,
lineJoin = MiterJoin,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Image =
## Returns a new image with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillImage(width, height, color, NonZero)
when defined(release):
{.pop.}
block:
let path = newPath()
path.moveTo(0, 0)
path.lineTo(1920, 0)
path.lineTo(1920, 1080)
path.lineTo(0, 1080)
path.closePath()
let shapes = path.commandsToShapes(true, 1)
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create()
ctx.setSourceRgba(0, 0, 1, 1)
timeIt "cairo1":
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng("cairo1.png")
let a = newImage(1920, 1080)
timeIt "pixie1":
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(0, 0, 255, 255))
# a.writeFile("pixie1.png")
block:
let path = newPath()
path.moveTo(500, 240)
path.lineTo(1500, 240)
path.lineTo(1920, 600)
path.lineTo(0, 600)
path.closePath()
let shapes = path.commandsToShapes(true, 1)
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create()
timeIt "cairo2":
ctx.setSourceRgba(1, 1, 1, 1)
let operator = ctx.getOperator()
ctx.setOperator(OperatorSource)
ctx.paint()
ctx.setOperator(operator)
ctx.setSourceRgba(0, 0, 1, 1)
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng("cairo2.png")
let a = newImage(1920, 1080)
timeIt "pixie2":
a.fill(rgbx(255, 255, 255, 255))
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(0, 0, 255, 255))
# a.writeFile("pixie2.png")
block:
let path = parsePath("""
M 100,300
A 200,200 0,0,1 500,300
A 200,200 0,0,1 900,300
Q 900,600 500,900
Q 100,600 100,300 z
""")
let shapes = path.commandsToShapes(true, 1)
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
ctx = surface.create()
timeIt "cairo3":
ctx.setSourceRgba(1, 1, 1, 1)
let operator = ctx.getOperator()
ctx.setOperator(OperatorSource)
ctx.paint()
ctx.setOperator(operator)
ctx.setSourceRgba(1, 0, 0, 1)
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng("cairo3.png")
let a = newImage(1000, 1000)
timeIt "pixie3":
a.fill(rgbx(255, 255, 255, 255))
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(255, 0, 0, 255))
# a.writeFile("pixie3.png")
# doDiff(readImage("cairo3.png"), a, "cairo3")
block:
let path = newPath()
path.roundedRect(200, 200, 600, 600, 10, 10, 10, 10)
let shapes = path.commandsToShapes(true, 1)
# let
# surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
# ctx = surface.create()
# timeIt "cairo4":
# ctx.setSourceRgba(0, 0, 0, 0)
# let operator = ctx.getOperator()
# ctx.setOperator(OperatorSource)
# ctx.paint()
# ctx.setOperator(operator)
timeIt "cairo4":
let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
ctx = surface.create()
ctx.setSourceRgba(1, 0, 0, 0.5)
ctx.newPath()
ctx.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
ctx.lineTo(v.x, v.y)
ctx.fill()
surface.flush()
# discard surface.writeToPng("cairo4.png")
var a: Image
timeIt "pixie4":
a = newImage(1000, 1000)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
a.fillPath(p, rgbx(127, 0, 0, 127))
# a.writeFile("pixie4.png")
# doDiff(readImage("cairo4.png"), a, "4")
var b: Image
let paint = newPaint(SolidPaint)
paint.color = color(1, 0, 0, 0.5)
paint.blendMode = OverwriteBlend
timeIt "pixie4 overwrite":
b = newImage(1000, 1000)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
b.fillPath(p, paint)
# b.writeFile("b.png")
timeIt "pixie4 mask":
let mask = newMask(1000, 1000)
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
mask.fillPath(p)
var tmp: Image
timeIt "pixie fillImage":
let p = newPath()
p.moveTo(shapes[0][0])
for shape in shapes:
for v in shape:
p.lineTo(v)
tmp = p.fillImage(1000, 1000, rgbx(127, 0, 0, 127))
# tmp.writeFile("tmp.png")

View file

@ -1,6 +0,0 @@
import benchy, svg_cairo
let data = readFile("tests/fileformats/svg/Ghostscript_Tiger.svg")
timeIt "svg decode":
discard decodeSvg(data)

View file

@ -1,587 +0,0 @@
## Load and Save SVG files.
import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils,
tables, vmath, xmlparser, xmltree
include pixie/paths
proc processCommands(
c: ptr Context, path: Path, closeSubpaths: bool, mat: Mat3
) =
let shapes = path.commandsToShapes(closeSubpaths, mat.pixelScale())
if shapes.len == 0:
return
c.newPath()
c.moveTo(shapes[0][0].x, shapes[0][0].y)
for shape in shapes:
for v in shape:
c.lineTo(v.x, v.y)
proc prepare(
c: ptr Context,
path: Path,
paint: Paint,
mat: Mat3,
closeSubpaths: bool,
windingRule = NonZero
) =
let
color = paint.color
matrix = Matrix(
xx: mat[0, 0],
yx: mat[0, 1],
xy: mat[1, 0],
yy: mat[1, 1],
x0: mat[2, 0],
y0: mat[2, 1],
)
c.setSourceRgba(color.r, color.g, color.b, color.a)
c.setMatrix(matrix.unsafeAddr)
case windingRule:
of NonZero:
c.setFillRule(FillRuleWinding)
else:
c.setFillRule(FillRuleEvenOdd)
c.processCommands(path, closeSubpaths, mat)
type
LinearGradient = object
x1, y1, x2, y2: float32
stops: seq[ColorStop]
Ctx = object
display: bool
fillRule: WindingRule
fill: Paint
stroke: ColorRGBX
strokeWidth: float32
strokeLineCap: LineCap
strokeLineJoin: LineJoin
strokeMiterLimit: float32
strokeDashArray: seq[float32]
transform: Mat3
shouldStroke: bool
opacity, strokeOpacity: float32
linearGradients: TableRef[string, LinearGradient]
template failInvalid() =
raise newException(PixieError, "Invalid SVG data")
proc attrOrDefault(node: XmlNode, name, default: string): string =
result = node.attr(name)
if result.len == 0:
result = default
proc initCtx(): Ctx =
result.display = true
try:
result.fill = parseHtmlColor("black").rgbx
result.stroke = parseHtmlColor("black").rgbx
except:
raise currentExceptionAsPixieError()
result.strokeWidth = 1
result.transform = mat3()
result.strokeMiterLimit = defaultMiterLimit
result.opacity = 1
result.strokeOpacity = 1
result.linearGradients = newTable[string, LinearGradient]()
proc decodeCtxInternal(inherited: Ctx, node: XmlNode): Ctx =
result = inherited
proc splitArgs(s: string): seq[string] =
# Handles (1,1) or (1 1) or (1, 1) or (1,1 2,2) etc
let tmp = s.replace(',', ' ').split(' ')
for entry in tmp:
if entry.len > 0:
result.add(entry)
var
fillRule = node.attr("fill-rule")
fill = node.attr("fill")
stroke = node.attr("stroke")
strokeWidth = node.attr("stroke-width")
strokeLineCap = node.attr("stroke-linecap")
strokeLineJoin = node.attr("stroke-linejoin")
strokeMiterLimit = node.attr("stroke-miterlimit")
strokeDashArray = node.attr("stroke-dasharray")
transform = node.attr("transform")
style = node.attr("style")
display = node.attr("display")
opacity = node.attr("opacity")
fillOpacity = node.attr("fill-opacity")
strokeOpacity = node.attr("stroke-opacity")
let pairs = style.split(';')
for pair in pairs:
let parts = pair.split(':')
if parts.len == 2:
# Do not override element properties
case parts[0].strip():
of "fill-rule":
if fillRule.len == 0:
fillRule = parts[1].strip()
of "fill":
if fill.len == 0:
fill = parts[1].strip()
of "stroke":
if stroke.len == 0:
stroke = parts[1].strip()
of "stroke-linecap":
if strokeLineCap.len == 0:
strokeLineCap = parts[1].strip()
of "stroke-linejoin":
if strokeLineJoin.len == 0:
strokeLineJoin = parts[1].strip()
of "stroke-width":
if strokeWidth.len == 0:
strokeWidth = parts[1].strip()
of "stroke-miterlimit":
if strokeMiterLimit.len == 0:
strokeMiterLimit = parts[1].strip()
of "stroke-dasharray":
if strokeDashArray.len == 0:
strokeDashArray = parts[1].strip()
of "display":
if display.len == 0:
display = parts[1].strip()
of "opacity":
if opacity.len == 0:
opacity = parts[1].strip()
of "fillOpacity":
if fillOpacity.len == 0:
fillOpacity = parts[1].strip()
of "strokeOpacity":
if strokeOpacity.len == 0:
strokeOpacity = parts[1].strip()
else:
discard
elif pair.len > 0:
when defined(pixieDebugSvg):
echo "Invalid style pair: ", pair
if display.len > 0:
result.display = display.strip() != "none"
if opacity.len > 0:
result.opacity = clamp(parseFloat(opacity), 0, 1)
if fillOpacity.len > 0:
result.fill.opacity = clamp(parseFloat(fillOpacity), 0, 1)
if strokeOpacity.len > 0:
result.strokeOpacity = clamp(parseFloat(strokeOpacity), 0, 1)
if fillRule == "":
discard # Inherit
elif fillRule == "nonzero":
result.fillRule = NonZero
elif fillRule == "evenodd":
result.fillRule = EvenOdd
else:
raise newException(
PixieError, "Invalid fill-rule value " & fillRule
)
if fill == "" or fill == "currentColor":
discard # Inherit
elif fill == "none":
result.fill = ColorRGBX()
elif fill.startsWith("url("):
let id = fill[5 .. ^2]
if id in result.linearGradients:
let linearGradient = result.linearGradients[id]
result.fill = newPaint(LinearGradientPaint)
result.fill.gradientHandlePositions = @[
result.transform * vec2(linearGradient.x1, linearGradient.y1),
result.transform * vec2(linearGradient.x2, linearGradient.y2)
]
result.fill.gradientStops = linearGradient.stops
else:
raise newException(PixieError, "Missing SVG resource " & id)
else:
result.fill = parseHtmlColor(fill).rgbx
if stroke == "":
discard # Inherit
elif stroke == "currentColor":
result.shouldStroke = true
elif stroke == "none":
result.stroke = ColorRGBX()
else:
result.stroke = parseHtmlColor(stroke).rgbx
result.shouldStroke = true
if strokeWidth == "":
discard # Inherit
else:
if strokeWidth.endsWith("px"):
strokeWidth = strokeWidth[0 .. ^3]
result.strokeWidth = parseFloat(strokeWidth)
result.shouldStroke = true
if result.stroke == ColorRGBX() or result.strokeWidth <= 0:
result.shouldStroke = false
if strokeLineCap == "":
discard # Inherit
else:
case strokeLineCap:
of "butt":
result.strokeLineCap = ButtCap
of "round":
result.strokeLineCap = RoundCap
of "square":
result.strokeLineCap = SquareCap
of "inherit":
discard
else:
raise newException(
PixieError, "Invalid stroke-linecap value " & strokeLineCap
)
if strokeLineJoin == "":
discard # Inherit
else:
case strokeLineJoin:
of "miter":
result.strokeLineJoin = MiterJoin
of "round":
result.strokeLineJoin = RoundJoin
of "bevel":
result.strokeLineJoin = BevelJoin
of "inherit":
discard
else:
raise newException(
PixieError, "Invalid stroke-linejoin value " & strokeLineJoin
)
if strokeMiterLimit == "":
discard
else:
result.strokeMiterLimit = parseFloat(strokeMiterLimit)
if strokeDashArray == "":
discard
else:
var values = splitArgs(strokeDashArray)
for value in values:
result.strokeDashArray.add(parseFloat(value))
if transform == "":
discard # Inherit
else:
template failInvalidTransform(transform: string) =
raise newException(
PixieError, "Unsupported SVG transform: " & transform
)
var remaining = transform
while remaining.len > 0:
let index = remaining.find(")")
if index == -1:
failInvalidTransform(transform)
let f = remaining[0 .. index].strip()
remaining = remaining[index + 1 .. ^1]
if f.startsWith("matrix("):
let arr = splitArgs(f[7 .. ^2])
if arr.len != 6:
failInvalidTransform(transform)
var m = mat3()
m[0, 0] = parseFloat(arr[0])
m[0, 1] = parseFloat(arr[1])
m[1, 0] = parseFloat(arr[2])
m[1, 1] = parseFloat(arr[3])
m[2, 0] = parseFloat(arr[4])
m[2, 1] = parseFloat(arr[5])
result.transform = result.transform * m
elif f.startsWith("translate("):
let
components = splitArgs(f[10 .. ^2])
tx = parseFloat(components[0])
ty =
if components.len == 1:
0.0
else:
parseFloat(components[1])
result.transform = result.transform * translate(vec2(tx, ty))
elif f.startsWith("rotate("):
let
values = splitArgs(f[7 .. ^2])
angle: float32 = parseFloat(values[0]) * -PI / 180
var cx, cy: float32
if values.len > 1:
cx = parseFloat(values[1])
if values.len > 2:
cy = parseFloat(values[2])
let center = vec2(cx, cy)
result.transform = result.transform *
translate(center) * rotate(angle) * translate(-center)
elif f.startsWith("scale("):
let
values = splitArgs(f[6 .. ^2])
sx: float32 = parseFloat(values[0])
sy: float32 =
if values.len > 1:
parseFloat(values[1])
else:
sx
result.transform = result.transform * scale(vec2(sx, sy))
else:
failInvalidTransform(transform)
proc decodeCtx(inherited: Ctx, node: XmlNode): Ctx =
try:
decodeCtxInternal(inherited, node)
except PixieError as e:
raise e
except:
raise currentExceptionAsPixieError()
proc cairoLineCap(lineCap: LineCap): cairo.LineCap =
case lineCap:
of ButtCap:
LineCapButt
of RoundCap:
LineCapRound
of SquareCap:
LineCapSquare
proc cairoLineJoin(lineJoin: LineJoin): cairo.LineJoin =
case lineJoin:
of MiterJoin:
LineJoinMiter
of BevelJoin:
LineJoinBevel
of RoundJoin:
LineJoinRound
proc fill(c: ptr Context, ctx: Ctx, path: Path) {.inline.} =
if ctx.display and ctx.opacity > 0:
let paint = newPaint(ctx.fill)
paint.opacity = paint.opacity * ctx.opacity
prepare(c, path, paint, ctx.transform, true, ctx.fillRule)
c.fill()
proc stroke(c: ptr Context, ctx: Ctx, path: Path) {.inline.} =
if ctx.display and ctx.opacity > 0:
let paint = newPaint(ctx.stroke)
paint.color.a *= (ctx.opacity * ctx.strokeOpacity)
prepare(c, path, paint, ctx.transform, false)
c.setLineWidth(ctx.strokeWidth)
c.setLineCap(ctx.strokeLineCap.cairoLineCap())
c.setLineJoin(ctx.strokeLineJoin.cairoLineJoin())
c.setMiterLimit(ctx.strokeMiterLimit)
c.stroke()
proc drawInternal(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) =
if node.kind != xnElement:
# Skip <!-- comments -->
return
case node.tag:
of "title", "desc", "defs":
discard
of "g":
let ctx = decodeCtx(ctxStack[^1], node)
ctxStack.add(ctx)
for child in node:
img.drawInternal(child, ctxStack)
discard ctxStack.pop()
of "path":
let
d = node.attr("d")
ctx = decodeCtx(ctxStack[^1], node)
path = parsePath(d)
img.fill(ctx, path)
if ctx.shouldStroke:
img.stroke(ctx, path)
of "line":
let
ctx = decodeCtx(ctxStack[^1], node)
x1 = parseFloat(node.attrOrDefault("x1", "0"))
y1 = parseFloat(node.attrOrDefault("y1", "0"))
x2 = parseFloat(node.attrOrDefault("x2", "0"))
y2 = parseFloat(node.attrOrDefault("y2", "0"))
let path = newPath()
path.moveTo(x1, y1)
path.lineTo(x2, y2)
if ctx.shouldStroke:
img.stroke(ctx, path)
of "polyline", "polygon":
let
ctx = decodeCtx(ctxStack[^1], node)
points = node.attr("points")
var vecs: seq[Vec2]
if points.contains(","):
for pair in points.split(" "):
let parts = pair.split(",")
if parts.len != 2:
failInvalid()
vecs.add(vec2(parseFloat(parts[0]), parseFloat(parts[1])))
else:
let points = points.split(" ")
if points.len mod 2 != 0:
failInvalid()
for i in 0 ..< points.len div 2:
vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1])))
if vecs.len == 0:
failInvalid()
let path = newPath()
path.moveTo(vecs[0])
for i in 1 ..< vecs.len:
path.lineTo(vecs[i])
# The difference between polyline and polygon is whether we close the path
# and fill or not
if node.tag == "polygon":
path.closePath()
img.fill(ctx, path)
if ctx.shouldStroke:
img.stroke(ctx, path)
of "rect":
let
ctx = decodeCtx(ctxStack[^1], node)
x = parseFloat(node.attrOrDefault("x", "0"))
y = parseFloat(node.attrOrDefault("y", "0"))
width = parseFloat(node.attrOrDefault("width", "0"))
height = parseFloat(node.attrOrDefault("height", "0"))
if width == 0 or height == 0:
return
var
rx = max(parseFloat(node.attrOrDefault("rx", "0")), 0)
ry = max(parseFloat(node.attrOrDefault("ry", "0")), 0)
let path = newPath()
if rx > 0 or ry > 0:
if rx == 0:
rx = ry
elif ry == 0:
ry = rx
rx = min(rx, width / 2)
ry = min(ry, height / 2)
path.moveTo(x + rx, y)
path.lineTo(x + width - rx, y)
path.ellipticalArcTo(rx, ry, 0, false, true, x + width, y + ry)
path.lineTo(x + width, y + height - ry)
path.ellipticalArcTo(rx, ry, 0, false, true, x + width - rx, y + height)
path.lineTo(x + rx, y + height)
path.ellipticalArcTo(rx, ry, 0, false, true, x, y + height - ry)
path.lineTo(x, y + ry)
path.ellipticalArcTo(rx, ry, 0, false, true, x + rx, y)
else:
path.rect(x, y, width, height)
img.fill(ctx, path)
if ctx.shouldStroke:
img.stroke(ctx, path)
of "circle", "ellipse":
let
ctx = decodeCtx(ctxStack[^1], node)
cx = parseFloat(node.attrOrDefault("cx", "0"))
cy = parseFloat(node.attrOrDefault("cy", "0"))
var rx, ry: float32
if node.tag == "circle":
rx = parseFloat(node.attr("r"))
ry = rx
else:
rx = parseFloat(node.attrOrDefault("rx", "0"))
ry = parseFloat(node.attrOrDefault("ry", "0"))
let path = newPath()
path.ellipse(cx, cy, rx, ry)
img.fill(ctx, path)
if ctx.shouldStroke:
img.stroke(ctx, path)
else:
raise newException(PixieError, "Unsupported SVG tag: " & node.tag & ".")
proc draw(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) =
try:
drawInternal(img, node, ctxStack)
except PixieError as e:
raise e
except:
raise currentExceptionAsPixieError()
proc decodeSvg*(data: string, width = 0, height = 0): Image =
## Render SVG file and return the image. Defaults to the SVG's view box size.
try:
let root = parseXml(data)
if root.tag != "svg":
failInvalid()
let
viewBox = root.attr("viewBox")
box = viewBox.split(" ")
viewBoxMinX = parseInt(box[0])
viewBoxMinY = parseInt(box[1])
viewBoxWidth = parseInt(box[2])
viewBoxHeight = parseInt(box[3])
var rootCtx = initCtx()
rootCtx = decodeCtx(rootCtx, root)
if viewBoxMinX != 0 or viewBoxMinY != 0:
rootCtx.transform = rootCtx.transform * translate(
vec2(-viewBoxMinX.float32, -viewBoxMinY.float32)
)
var
width = width
height = height
surface: ptr Surface
if width == 0 and height == 0: # Default to the view box size
width = viewBoxWidth.int32
height = viewBoxHeight.int32
else:
let
scaleX = width.float32 / viewBoxWidth.float32
scaleY = height.float32 / viewBoxHeight.float32
rootCtx.transform = rootCtx.transform * scale(vec2(scaleX, scaleY))
surface = imageSurfaceCreate(FORMAT_ARGB32, width.int32, height.int32)
let c = surface.create()
var ctxStack = @[rootCtx]
for node in root:
c.draw(node, ctxStack)
surface.flush()
result = newImage(width, height)
let pixels = cast[ptr UncheckedArray[array[4, uint8]]](surface.getData())
for y in 0 ..< result.height:
for x in 0 ..< result.width:
let
bgra = pixels[result.dataIndex(x, y)]
rgba = rgba(bgra[2], bgra[1], bgra[0], bgra[3])
result.unsafe[x, y] = rgba.rgbx()
except PixieError as e:
raise e
except:
raise newException(PixieError, "Unable to load SVG")

View file

@ -10,7 +10,7 @@ requires "vmath >= 1.1.4"
requires "chroma >= 0.2.5"
requires "zippy >= 0.10.0"
requires "flatty >= 0.3.4"
requires "nimsimd >= 1.0.0"
requires "nimsimd >= 1.1.1"
requires "bumpy >= 1.1.1"
task bindings, "Generate bindings":

View file

@ -108,15 +108,20 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
var i: int
when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](color))
for _ in 0 ..< image.data.len div 8:
for _ in 0 ..< image.data.len div 16:
let
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
mask0 = mm_movemask_epi8(mm_cmpeq_epi8(values0, colorVec))
mask1 = mm_movemask_epi8(mm_cmpeq_epi8(values1, colorVec))
if mask0 != 0xffff or mask1 != 0xffff:
values2 = mm_loadu_si128(image.data[i + 8].addr)
values3 = mm_loadu_si128(image.data[i + 12].addr)
eq0 = mm_cmpeq_epi8(values0, colorVec)
eq1 = mm_cmpeq_epi8(values1, colorVec)
eq2 = mm_cmpeq_epi8(values2, colorVec)
eq3 = mm_cmpeq_epi8(values3, colorVec)
eq = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
if mm_movemask_epi8(eq) != 0xffff:
return false
i += 8
i += 16
for j in i ..< image.data.len:
if image.data[j] != color:
@ -257,7 +262,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
when defined(amd64) and allowSimd:
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
first32 = cast[M128i]([uint32.high, 0, 0, 0])
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
for _ in countup(0, resultEvenWidth - 4, 2):
let
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
@ -266,36 +271,36 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
btmShifted = mm_srli_si128(btm, 4)
topEven = mm_andnot_si128(oddMask, top)
topOdd = mm_srli_epi16(mm_and_si128(top, oddMask), 8)
topOdd = mm_srli_epi16(top, 8)
btmEven = mm_andnot_si128(oddMask, btm)
btmOdd = mm_srli_epi16(mm_and_si128(btm, oddMask), 8)
btmOdd = mm_srli_epi16(btm, 8)
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
topShiftedOdd = mm_srli_epi16(mm_and_si128(topShifted, oddMask), 8)
topShiftedOdd = mm_srli_epi16(topShifted, 8)
btmShiftedEven = mm_andnot_si128(oddMask, btmShifted)
btmShiftedOdd = mm_srli_epi16(mm_and_si128(btmShifted, oddMask), 8)
btmShiftedOdd = mm_srli_epi16(btmShifted, 8)
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven)
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
bottomAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
addedEven = mm_add_epi16(topAddedEven, btmAddedEven)
addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd)
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0 and 2 so mask the others out and shift 0 and 2 into
# position and store
masked = mm_and_si128(merged, mergedMask)
# merged [0, 1, 2, 3] has the correct values for the next two pixels
# at index 0 and 2 so shift those into position and store
zero = mm_and_si128(merged, first32)
two = mm_and_si128(mm_srli_si128(merged, 8), first32)
zeroTwo = mm_or_si128(zero, mm_slli_si128(two, 4))
mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, zeroTwo)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm_shuffle_epi32(masked, MM_SHUFFLE(0, 0, 2, 0))
)
x += 2
for x in x ..< resultEvenWidth:
@ -350,17 +355,14 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
when defined(amd64) and allowSimd:
if scale == 2:
while x <= image.width - 4:
let
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
lo = mm_unpacklo_epi32(values, mm_setzero_si128())
hi = mm_unpackhi_epi32(values, mm_setzero_si128())
let values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
mm_or_si128(lo, mm_slli_si128(lo, 4))
mm_unpacklo_epi32(values, values)
)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
mm_or_si128(hi, mm_slli_si128(hi, 4))
mm_unpackhi_epi32(values, values)
)
x += 4
for x in x ..< image.width:
@ -410,25 +412,17 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
let index = i
let values = mm_loadu_si128(target.data[index].addr)
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
var
valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8)
valuesEven = mm_slli_epi16(values, 8)
valuesOdd = mm_and_si128(values, oddMask)
# values * opacity
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
# div 255
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
valuesOdd = mm_slli_epi16(valuesOdd, 8)
mm_storeu_si128(
target.data[index].addr,
mm_or_si128(valuesEven, valuesOdd)
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
)
i += 16
@ -445,31 +439,35 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
for j in i ..< target.data.len:
target.data[j] = ((target.data[j] * opacity) div 255).uint8
proc invert*(target: Image) {.raises: [].} =
proc invert*(image: Image) {.raises: [].} =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = target.data.len * 4
for _ in 0 ..< byteLen div 16:
let index = i div 4
var values = mm_loadu_si128(target.data[index].addr)
values = mm_sub_epi8(vec255, values)
mm_storeu_si128(target.data[index].addr, values)
for _ in 0 ..< image.data.len div 16:
let
a = mm_loadu_si128(image.data[i + 0].addr)
b = mm_loadu_si128(image.data[i + 4].addr)
c = mm_loadu_si128(image.data[i + 8].addr)
d = mm_loadu_si128(image.data[i + 12].addr)
mm_storeu_si128(image.data[i + 0].addr, mm_sub_epi8(vec255, a))
mm_storeu_si128(image.data[i + 4].addr, mm_sub_epi8(vec255, b))
mm_storeu_si128(image.data[i + 8].addr, mm_sub_epi8(vec255, c))
mm_storeu_si128(image.data[i + 12].addr, mm_sub_epi8(vec255, d))
i += 16
for j in i div 4 ..< target.data.len:
var rgba = target.data[j]
rgba.r = 255 - rgba.r
rgba.g = 255 - rgba.g
rgba.b = 255 - rgba.b
rgba.a = 255 - rgba.a
target.data[j] = rgba
for j in i ..< image.data.len:
var rgbx = image.data[j]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
image.data[j] = rgbx
# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
# is not a valid premultiplied alpha color.
# We need to convert back to premultiplied alpha after inverting.
target.data.toPremultipliedAlpha()
image.data.toPremultipliedAlpha()
proc blur*(
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)

View file

@ -121,34 +121,28 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
# When supported, SIMD convert as much as possible
let
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
notAlphaMask = mm_set1_epi32(0x00ffffff)
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< data.len div 4:
var
color = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(color, alphaMask)
if mm_movemask_epi8(mm_cmpeq_epi16(alpha, alphaMask)) != 0xffff:
# If not all of the alpha values are 255, premultiply
let
values = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(values, alphaMask)
eq = mm_cmpeq_epi8(values, alphaMask)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
let
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
var
colorEven = mm_slli_epi16(color, 8)
colorOdd = mm_and_si128(color, oddMask)
alpha = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
colorEven = mm_mulhi_epu16(colorEven, alpha)
colorOdd = mm_mulhi_epu16(colorOdd, alpha)
colorEven = mm_srli_epi16(mm_mulhi_epu16(colorEven, div255), 7)
colorOdd = mm_srli_epi16(mm_mulhi_epu16(colorOdd, div255), 7)
color = mm_or_si128(colorEven, mm_slli_epi16(colorOdd, 8))
color = mm_or_si128(
mm_and_si128(alpha, alphaMask), mm_and_si128(color, notAlphaMask)
colorsEven = mm_slli_epi16(values, 8)
colorsOdd = mm_and_si128(values, oddMask)
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
mm_storeu_si128(
data[i].addr,
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
)
mm_storeu_si128(data[i].addr, color)
i += 4
# Convert whatever is left
@ -165,9 +159,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
var i = start
when defined(amd64) and allowSimd:
let
vec255 = mm_set1_epi32(cast[int32](uint32.high))
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
let vec255 = mm_set1_epi32(cast[int32](uint32.high))
for _ in start ..< (start + len) div 16:
let
values0 = mm_loadu_si128(data[i + 0].addr)
@ -176,8 +168,9 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
values3 = mm_loadu_si128(data[i + 12].addr)
values01 = mm_and_si128(values0, values1)
values23 = mm_and_si128(values2, values3)
values = mm_or_si128(mm_and_si128(values01, values23), colorMask)
if mm_movemask_epi8(mm_cmpeq_epi8(values, vec255)) != 0xffff:
values = mm_and_si128(values01, values23)
eq = mm_cmpeq_epi8(values, vec255)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
return false
i += 16
@ -186,14 +179,11 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
return false
when defined(amd64) and allowSimd:
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
proc packAlphaValues(v: M128i): M128i {.inline, raises: [].} =
## Shuffle the alpha values for these 4 colors to the first 4 bytes
let mask = mm_set1_epi32(cast[int32](0xff000000))
result = mm_and_si128(v, mask)
result = mm_srli_epi32(result, 24)
result = mm_packus_epi16(result, result)
result = mm_packus_epi16(result, result)
result = mm_srli_si128(result, 12)
result = mm_srli_epi32(v, 24)
result = mm_packus_epi16(result, mm_setzero_si128())
result = mm_packus_epi16(result, mm_setzero_si128())
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} =
let
@ -205,10 +195,8 @@ when defined(amd64) and allowSimd:
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
let
a = mm_unpacklo_epi8(v, mm_setzero_si128())
b = mm_unpacklo_epi8(a, mm_setzero_si128())
result = mm_slli_epi32(b, 24) # Shift the values to uint32 `a`
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
when defined(release):
{.pop.}

View file

@ -88,12 +88,8 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
for y in 0 ..< result.height:
var x: int
when defined(amd64) and allowSimd:
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
firstByte = cast[M128i](
[uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
)
for _ in countup(0, result.width - 16, 8):
let oddMask = mm_set1_epi16(cast[int16](0xff00))
while x <= result.width - 16:
let
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr)
@ -101,50 +97,35 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
btmShifted = mm_srli_si128(btm, 1)
topEven = mm_andnot_si128(oddMask, top)
topOdd = mm_srli_epi16(mm_and_si128(top, oddMask), 8)
topOdd = mm_srli_epi16(top, 8)
btmEven = mm_andnot_si128(oddMask, btm)
btmOdd = mm_srli_epi16(mm_and_si128(btm, oddMask), 8)
btmOdd = mm_srli_epi16(btm, 8)
topShiftedEven = mm_andnot_si128(oddMask, topShifted)
topShiftedOdd = mm_srli_epi16(mm_and_si128(topShifted, oddMask), 8)
topShiftedOdd = mm_srli_epi16(topShifted, 8)
btmShiftedEven = mm_andnot_si128(oddMask, btmShifted)
btmShiftedOdd = mm_srli_epi16(mm_and_si128(btmShifted, oddMask), 8)
btmShiftedOdd = mm_srli_epi16(btmShifted, 8)
topAddedEven = mm_add_epi16(topEven, topShiftedEven)
btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven)
topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
bottomAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
addedEven = mm_add_epi16(topAddedEven, btmAddedEven)
addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd)
addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values in the even indices
# Mask out the odd values for packing
masked = mm_andnot_si128(oddMask, merged)
# merged has the correct values in the even indices
a = mm_and_si128(merged, firstByte)
b = mm_and_si128(mm_srli_si128(merged, 2), firstByte)
c = mm_and_si128(mm_srli_si128(merged, 4), firstByte)
d = mm_and_si128(mm_srli_si128(merged, 6), firstByte)
e = mm_and_si128(mm_srli_si128(merged, 8), firstByte)
f = mm_and_si128(mm_srli_si128(merged, 10), firstByte)
g = mm_and_si128(mm_srli_si128(merged, 12), firstByte)
h = mm_and_si128(mm_srli_si128(merged, 14), firstByte)
ab = mm_or_si128(a, mm_slli_si128(b, 1))
cd = mm_or_si128(c, mm_slli_si128(d, 1))
ef = mm_or_si128(e, mm_slli_si128(f, 1))
gh = mm_or_si128(g, mm_slli_si128(h, 1))
abcd = mm_or_si128(ab, mm_slli_si128(cd, 2))
efgh = mm_or_si128(ef, mm_slli_si128(gh, 2))
abcdefgh = mm_or_si128(abcd, mm_slli_si128(efgh, 4))
mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, abcdefgh)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm_packus_epi16(masked, mm_setzero_si128())
)
x += 8
for x in x ..< result.width:
@ -172,17 +153,14 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
when defined(amd64) and allowSimd:
if scale == 2:
while x <= mask.width - 16:
let
values = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
lo = mm_unpacklo_epi8(values, mm_setzero_si128())
hi = mm_unpacklo_epi8(values, mm_setzero_si128())
let values = mm_loadu_si128(mask.unsafe[x, y].addr)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
mm_or_si128(lo, mm_slli_si128(lo, 1))
mm_unpacklo_epi8(values, values)
)
mm_storeu_si128(
result.data[result.dataIndex(x * scale + 16, y * scale)].addr,
mm_or_si128(hi, mm_slli_si128(hi, 1))
mm_unpackhi_epi8(values, values)
)
x += 16
for x in x ..< mask.width:
@ -237,17 +215,15 @@ proc invert*(mask: Mask) {.raises: [].} =
## Inverts all of the values - creates a negative of the mask.
var i: int
when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = mask.data.len
for _ in 0 ..< byteLen div 16:
let index = i
var values = mm_loadu_si128(mask.data[index].addr)
let vec255 = mm_set1_epi8(255)
for _ in 0 ..< mask.data.len div 16:
var values = mm_loadu_si128(mask.data[i].addr)
values = mm_sub_epi8(vec255, values)
mm_storeu_si128(mask.data[index].addr, values)
mm_storeu_si128(mask.data[i].addr, values)
i += 16
for j in i ..< mask.data.len:
mask.data[j] = (255 - mask.data[j]).uint8
mask.data[j] = 255 - mask.data[j]
proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
## Grows the mask by spread.
@ -316,7 +292,7 @@ proc ceil*(mask: Mask) {.raises: [].} =
when defined(amd64) and allowSimd:
let
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vec255 = mm_set1_epi8(255)
for _ in 0 ..< mask.data.len div 16:
var values = mm_loadu_si128(mask.data[i].addr)
values = mm_cmpeq_epi8(values, zeroVec)

View file

@ -1336,7 +1336,9 @@ proc fillCoverage(
coverages: seq[uint8],
blendMode: BlendMode
) =
var x = startX
var
x = startX
dataIndex = image.dataIndex(x, y)
when allowSimd:
when defined(amd64):
@ -1373,67 +1375,78 @@ proc fillCoverage(
let colorVec = mm_set1_epi32(cast[int32](rgbx))
proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
if coverage > 0:
if coverage == 255:
result = rgbx
else:
result = rgbx(
((rgbx.r.uint32 * coverage) div 255).uint8,
((rgbx.g.uint32 * coverage) div 255).uint8,
((rgbx.b.uint32 * coverage) div 255).uint8,
((rgbx.a.uint32 * coverage) div 255).uint8
)
if coverage == 0:
discard
elif coverage == 255:
result = rgbx
else:
result = rgbx(
((rgbx.r.uint32 * coverage) div 255).uint8,
((rgbx.g.uint32 * coverage) div 255).uint8,
((rgbx.b.uint32 * coverage) div 255).uint8,
((rgbx.a.uint32 * coverage) div 255).uint8
)
case blendMode:
of OverwriteBlend:
when allowSimd:
when defined(amd64):
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if allZeroes:
dataIndex += 16
else:
if all255:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec)
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
dataIndex += 4
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let source = source(colorVec, coverageVec)
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, source)
mm_storeu_si128(image.data[dataIndex].addr, source)
coverageVec = mm_srli_si128(coverageVec, 4)
dataIndex += 4
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
image.unsafe[x, y] = source(rgbx, coverage)
image.data[dataIndex] = source(rgbx, coverage)
inc dataIndex
of NormalBlend:
when allowSimd:
when defined(amd64):
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if allZeroes:
dataIndex += 16
else:
if all255 and rgbx.a == 255:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec)
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
dataIndex += 4
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let
backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr)
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
source = source(colorVec, coverageVec)
mm_storeu_si128(
image.unsafe[x + i * 4, y].addr,
image.data[dataIndex].addr,
blendNormalSimd(backdrop, source)
)
coverageVec = mm_srli_si128(coverageVec, 4)
dataIndex += 4
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 255 and rgbx.a == 255:
image.unsafe[x, y] = rgbx
image.data[dataIndex] = rgbx
elif coverage == 0:
discard
else:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendNormal(backdrop, source(rgbx, coverage))
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendNormal(backdrop, source(rgbx, coverage))
inc dataIndex
of MaskBlend:
{.linearScanEnd.}
@ -1443,31 +1456,34 @@ proc fillCoverage(
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
if not allZeroes:
if all255:
discard
dataIndex += 16
else:
var coverageVec = coverageVec
for i in 0 ..< 4:
let
backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr)
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
source = source(colorVec, coverageVec)
mm_storeu_si128(
image.unsafe[x + i * 4, y].addr,
image.data[dataIndex].addr,
blendMaskSimd(backdrop, source)
)
coverageVec = mm_srli_si128(coverageVec, 4)
dataIndex += 4
else:
for i in 0 ..< 4:
mm_storeu_si128(image.unsafe[x + i * 4, y].addr, mm_setzero_si128())
mm_storeu_si128(image.data[dataIndex].addr, mm_setzero_si128())
dataIndex += 4
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 0:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
image.data[dataIndex] = rgbx(0, 0, 0, 0)
elif coverage == 255:
discard
else:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendMask(backdrop, source(rgbx, coverage))
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendMask(backdrop, source(rgbx, coverage))
inc dataIndex
image.clearUnsafe(0, y, startX, y)
image.clearUnsafe(startX + coverages.len, y, image.width, y)
@ -1476,25 +1492,28 @@ proc fillCoverage(
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage == 255 and rgbx.a == 255:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
image.data[dataIndex] = rgbx(0, 0, 0, 0)
elif coverage != 0:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendSubtractMask(backdrop, source(rgbx, coverage))
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendSubtractMask(backdrop, source(rgbx, coverage))
inc dataIndex
of ExcludeMaskBlend:
for x in x ..< startX + coverages.len:
let
coverage = coverages[x - startX]
backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendExcludeMask(backdrop, source(rgbx, coverage))
backdrop = image.data[dataIndex]
image.data[dataIndex] = blendExcludeMask(backdrop, source(rgbx, coverage))
inc dataIndex
else:
let blender = blendMode.blender()
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blender(backdrop, source(rgbx, coverage))
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blender(backdrop, source(rgbx, coverage))
inc dataIndex
proc fillCoverage(
mask: Mask,
@ -1502,7 +1521,9 @@ proc fillCoverage(
coverages: seq[uint8],
blendMode: BlendMode
) =
var x = startX
var
x = startX
dataIndex = mask.dataIndex(x, y)
template simdBlob(blendProc: untyped) =
when allowSimd:
@ -1513,19 +1534,21 @@ proc fillCoverage(
eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128())
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
if not allZeroes:
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
let backdrop = mm_loadu_si128(mask.data[dataIndex].addr)
mm_storeu_si128(
mask.unsafe[x, y].addr,
mask.data[dataIndex].addr,
blendProc(backdrop, coveragesVec)
)
x += 16
dataIndex += 16
template blendBlob(blendProc: untyped) =
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = blendProc(backdrop, coverage)
let backdrop = mask.data[dataIndex]
mask.data[dataIndex] = blendProc(backdrop, coverage)
inc dataIndex
case blendMode:
of OverwriteBlend:
@ -1540,6 +1563,8 @@ proc fillCoverage(
blendBlob(maskBlendNormal)
of MaskBlend:
{.linearScanEnd.}
when allowSimd:
when defined(amd64):
for _ in 0 ..< coverages.len div 16:
@ -1548,22 +1573,24 @@ proc fillCoverage(
eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128())
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
if not allZeroes:
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
let backdrop = mm_loadu_si128(mask.data[dataIndex].addr)
mm_storeu_si128(
mask.unsafe[x, y].addr,
mask.data[dataIndex].addr,
maskBlendMaskSimd(backdrop, coveragesVec)
)
else:
mm_storeu_si128(mask.unsafe[x, y].addr, mm_setzero_si128())
mm_storeu_si128(mask.data[dataIndex].addr, mm_setzero_si128())
x += 16
dataIndex += 16
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendMask(backdrop, coverage)
let backdrop = mask.data[dataIndex]
mask.data[dataIndex] = maskBlendMask(backdrop, coverage)
else:
mask.unsafe[x, y] = 0
mask.data[dataIndex] = 0
inc dataIndex
mask.clearUnsafe(0, y, startX, y)
mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
@ -1593,10 +1620,15 @@ proc fillHits(
when allowSimd:
when defined(amd64):
let colorVec = mm_set1_epi32(cast[int32](rgbx))
var dataIndex = image.dataIndex(x, y)
for _ in 0 ..< len div 4:
let backdrop = mm_loadu_si128(image.unsafe[x, y].addr)
mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec))
let backdrop = mm_loadu_si128(image.data[dataIndex].addr)
mm_storeu_si128(
image.data[dataIndex].addr,
blendProc(backdrop, colorVec)
)
x += 4
dataIndex += 4
case blendMode:
of OverwriteBlend:
@ -1610,9 +1642,11 @@ proc fillHits(
else:
var x = start
simdBlob(image, x, len, blendNormalSimd)
for x in x ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendNormal(backdrop, rgbx)
var dataIndex = image.dataIndex(x, y)
for _ in x ..< start + len:
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendNormal(backdrop, rgbx)
inc dataIndex
of MaskBlend:
{.linearScanEnd.}
@ -1633,34 +1667,41 @@ proc fillHits(
if rgbx.a != 255:
var x = start
simdBlob(image, x, len, blendMaskSimd)
for x in x ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendMask(backdrop, rgbx)
var dataIndex = image.dataIndex(x, y)
for _ in x ..< start + len:
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendMask(backdrop, rgbx)
image.clearUnsafe(0, y, startX, y)
image.clearUnsafe(filledTo, y, image.width, y)
of SubtractMaskBlend:
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
for x in start ..< start + len:
var dataIndex = image.dataIndex(start, y)
for _ in 0 ..< len:
if rgbx.a == 255:
image.unsafe[x, y] = rgbx(0, 0, 0, 0)
image.data[dataIndex] = rgbx(0, 0, 0, 0)
else:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendSubtractMask(backdrop, rgbx)
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendSubtractMask(backdrop, rgbx)
inc dataIndex
of ExcludeMaskBlend:
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
for x in start ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendExcludeMask(backdrop, rgbx)
var dataIndex = image.dataIndex(start, y)
for _ in 0 ..< len:
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blendExcludeMask(backdrop, rgbx)
inc dataIndex
else:
let blender = blendMode.blender()
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
for x in start ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blender(backdrop, rgbx)
var dataIndex = image.dataIndex(start, y)
for _ in 0 ..< len:
let backdrop = image.data[dataIndex]
image.data[dataIndex] = blender(backdrop, rgbx)
inc dataIndex
proc fillHits(
mask: Mask,
@ -1674,10 +1715,15 @@ proc fillHits(
when allowSimd:
when defined(amd64):
let vec255 = mm_set1_epi8(255)
var dataIndex = mask.dataIndex(x, y)
for _ in 0 ..< len div 16:
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255))
let backdrop = mm_loadu_si128(mask.data[dataIndex].addr)
mm_storeu_si128(
mask.data[dataIndex].addr,
blendProc(backdrop, vec255)
)
x += 16
dataIndex += 16
case blendMode:
of NormalBlend, OverwriteBlend:
@ -1685,6 +1731,8 @@ proc fillHits(
fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len)
of MaskBlend:
{.linearScanEnd.}
var filledTo = startX
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
let gapBetween = start - filledTo
@ -1699,17 +1747,21 @@ proc fillHits(
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
var x = start
simdBlob(mask, x, len, maskBlendSubtractSimd)
for x in x ..< start + len:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255)
var dataIndex = mask.dataIndex(x, y)
for _ in x ..< start + len:
let backdrop = mask.data[dataIndex]
mask.data[dataIndex] = maskBlendSubtract(backdrop, 255)
inc dataIndex
of ExcludeMaskBlend:
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
var x = start
simdBlob(mask, x, len, maskBlendExcludeSimd)
for x in x ..< start + len:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendExclude(backdrop, 255)
var dataIndex = mask.dataIndex(x, y)
for _ in x ..< start + len:
let backdrop = mask.data[dataIndex]
mask.data[dataIndex] = maskBlendExclude(backdrop, 255)
inc dataIndex
else:
failUnsupportedBlendMode(blendMode)

Binary file not shown.

After

(image error) Size: 1.1 KiB

View file

@ -205,3 +205,13 @@ block:
let minified = mask.minifyBy2()
minified.writeFile("tests/masks/minifiedBlur.png")
block:
let path = newPath()
path.polygon(vec2(50, 50), 30, 6)
let mask = newMask(100, 100)
mask.fillPath(path)
let magnified = mask.magnifyBy2()
magnified.writeFile("tests/masks/drawPolygonMagnified.png")