From 412757d4c075fa9001f59bb3948b0e6d887200dd Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 16:12:52 -0500 Subject: [PATCH 01/20] linearScanEnd --- src/pixie/paths.nim | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 644eaa1..ebb8bdb 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1540,6 +1540,8 @@ proc fillCoverage( blendBlob(maskBlendNormal) of MaskBlend: + {.linearScanEnd.} + when allowSimd: when defined(amd64): for _ in 0 ..< coverages.len div 16: @@ -1685,6 +1687,8 @@ proc fillHits( fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len) of MaskBlend: + {.linearScanEnd.} + var filledTo = startX for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): let gapBetween = start - filledTo From d2b84e76ca9785a607cc03c1b3f11956087124ec Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 19:19:32 -0500 Subject: [PATCH 02/20] better cairo benchmarking --- experiments/benchmark_cairo.nim | 492 +++++++---------------- experiments/benchmark_cairo_old.nim | 397 +++++++++++++++++++ experiments/benchmark_svg_cairo.nim | 6 - experiments/svg_cairo.nim | 587 ---------------------------- 4 files changed, 530 insertions(+), 952 deletions(-) create mode 100644 experiments/benchmark_cairo_old.nim delete mode 100644 experiments/benchmark_svg_cairo.nim delete mode 100644 experiments/svg_cairo.nim diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index 52f63b4..f8bbeae 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -1,264 +1,55 @@ -import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat +import benchy, cairo, pixie, pixie/fileformats/svg {.all.}, pixie/paths {.all.} -when defined(amd64) and not defined(pixieNoSimd): - import nimsimd/sse2, pixie/internal +type + Fill = object + shapes: seq[Polygon] + transform: Mat3 + paint: Paint + windingRule: WindingRule -proc doDiff(a, b: Image, name: string) = - let (diffScore, diffImage) = diff(a, b) - echo &"{name} score: {diffScore}" - diffImage.writeFile(&"{name}_diff.png") + Benchmark = object + name: string + fills: seq[Fill] -when defined(release): - {.push checks: off.} +var benchmarks: seq[Benchmark] -proc fillMask( - shapes: seq[seq[Vec2]], width, height: int, windingRule = NonZero -): Mask = - result = newMask(width, height) - - let - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startY = max(0, bounds.y.int) - pathHeight = min(height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight) - width = width.float32 - - var - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), - hits, - numHits, - aa, - width, - y, - 0, - partitioning, - windingRule - ) - if not aa: - for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): - let - startIndex = result.dataIndex(prevAt.int, y) - len = at.int - prevAt.int - fillUnsafe(result.data, 255, startIndex, len) - -proc fillMask*( - path: SomePath, width, height: int, windingRule = NonZero -): Mask = - ## Returns a new mask with the path filled. This is a faster alternative - ## to `newMask` + `fillPath`. - let shapes = parseSomePath(path, true, 1) - shapes.fillMask(width, height, windingRule) - -proc fillImage( - shapes: seq[seq[Vec2]], - width, height: int, - color: SomeColor, - windingRule = NonZero -): Image = - result = newImage(width, height) - - let - mask = shapes.fillMask(width, height, windingRule) - rgbx = color.rgbx() - - var i: int - when defined(amd64) and not defined(pixieNoSimd): - let - colorVec = mm_set1_epi32(cast[int32](rgbx)) - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - vec255 = mm_set1_epi32(cast[int32](uint32.high)) - vecZero = mm_setzero_si128() - colorVecEven = mm_slli_epi16(colorVec, 8) - colorVecOdd = mm_and_si128(colorVec, oddMask) - iterations = result.data.len div 16 - for _ in 0 ..< iterations: - var coverageVec = mm_loadu_si128(mask.data[i].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: - if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: - for q in [0, 4, 8, 12]: - mm_storeu_si128(result.data[i + q].addr, colorVec) - else: - for q in [0, 4, 8, 12]: - var unpacked = unpackAlphaValues(coverageVec) - # Shift the coverages from `a` to `g` and `a` for multiplying - unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) - - var - sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) - sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) - sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) - sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) - - mm_storeu_si128( - result.data[i + q].addr, - mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - ) - - coverageVec = mm_srli_si128(coverageVec, 4) - - i += 16 - - let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] - for i in i ..< result.data.len: - let coverage = mask.data[i] - if coverage == 255: - result.data[i] = rgbx - elif coverage != 0: - result.data[i].r = ((channels[0] * coverage) div 255).uint8 - result.data[i].g = ((channels[1] * coverage) div 255).uint8 - result.data[i].b = ((channels[2] * coverage) div 255).uint8 - result.data[i].a = ((channels[3] * coverage) div 255).uint8 - -proc fillImage*( - path: SomePath, width, height: int, color: SomeColor, windingRule = NonZero -): Image = - ## Returns a new image with the path filled. This is a faster alternative - ## to `newImage` + `fillPath`. - let shapes = parseSomePath(path, false, 1) - shapes.fillImage(width, height, color, windingRule) - -proc strokeMask*( - path: SomePath, - width, height: int, - strokeWidth: float32 = 1.0, - lineCap = ButtCap, - lineJoin = MiterJoin, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Mask = - ## Returns a new mask with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillMask(width, height, NonZero) - -proc strokeImage*( - path: SomePath, - width, height: int, - color: SomeColor, - strokeWidth: float32 = 1.0, - lineCap = ButtCap, - lineJoin = MiterJoin, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Image = - ## Returns a new image with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillImage(width, height, color, NonZero) - -when defined(release): - {.pop.} - - -block: +block: # Basic rect let path = newPath() - path.moveTo(0, 0) - path.lineTo(1920, 0) - path.lineTo(1920, 1080) - path.lineTo(0, 1080) - path.closePath() - - let shapes = path.commandsToShapes(true, 1) + path.rect(rect(0, 0, 900, 900)) let - surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) - ctx = surface.create() - ctx.setSourceRgba(0, 0, 1, 1) + shapes = path.commandsToShapes(true, 1) + paint = newPaint(SolidPaint) + paint.color = color(0, 0, 0, 1) - timeIt "cairo1": - ctx.newPath() - ctx.moveTo(shapes[0][0].x, shapes[0][0].y) - for shape in shapes: - for v in shape: - ctx.lineTo(v.x, v.y) - ctx.fill() - surface.flush() + benchmarks.add(Benchmark( + name: "rect", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: paint, + windingRule: NonZero + )])) - # discard surface.writeToPng("cairo1.png") - - let a = newImage(1920, 1080) - - timeIt "pixie1": - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - a.fillPath(p, rgbx(0, 0, 255, 255)) - - # a.writeFile("pixie1.png") - -block: +block: # Rounded rect let path = newPath() - path.moveTo(500, 240) - path.lineTo(1500, 240) - path.lineTo(1920, 600) - path.lineTo(0, 600) - path.closePath() - - let shapes = path.commandsToShapes(true, 1) + path.roundedRect(rect(0, 0, 900, 900), 20, 20, 20, 20) let - surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) - ctx = surface.create() + shapes = path.commandsToShapes(true, 1) + paint = newPaint(SolidPaint) + paint.color = color(0, 0, 0, 1) - timeIt "cairo2": - ctx.setSourceRgba(1, 1, 1, 1) - let operator = ctx.getOperator() - ctx.setOperator(OperatorSource) - ctx.paint() - ctx.setOperator(operator) + benchmarks.add(Benchmark( + name: "roundedRect", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: paint, + windingRule: NonZero + )])) - ctx.setSourceRgba(0, 0, 1, 1) - - ctx.newPath() - ctx.moveTo(shapes[0][0].x, shapes[0][0].y) - for shape in shapes: - for v in shape: - ctx.lineTo(v.x, v.y) - ctx.fill() - surface.flush() - - # discard surface.writeToPng("cairo2.png") - - let a = newImage(1920, 1080) - - timeIt "pixie2": - a.fill(rgbx(255, 255, 255, 255)) - - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - a.fillPath(p, rgbx(0, 0, 255, 255)) - - # a.writeFile("pixie2.png") - -block: +block: # Heart let path = parsePath(""" M 100,300 A 200,200 0,0,1 500,300 @@ -267,131 +58,114 @@ block: Q 100,600 100,300 z """) - let shapes = path.commandsToShapes(true, 1) - let - surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) - ctx = surface.create() + shapes = path.commandsToShapes(true, 1) + paint = newPaint(SolidPaint) + paint.color = color(0, 0, 0, 1) - timeIt "cairo3": - ctx.setSourceRgba(1, 1, 1, 1) - let operator = ctx.getOperator() - ctx.setOperator(OperatorSource) - ctx.paint() - ctx.setOperator(operator) + benchmarks.add(Benchmark( + name: "Heart", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: paint, + windingRule: NonZero + )])) - ctx.setSourceRgba(1, 0, 0, 1) +block: # Tiger + let + data = readFile("tests/fileformats/svg/Ghostscript_Tiger.svg") + parsed = parseSvg(data) - ctx.newPath() - ctx.moveTo(shapes[0][0].x, shapes[0][0].y) - for shape in shapes: - for v in shape: - ctx.lineTo(v.x, v.y) - ctx.fill() - surface.flush() + var fills: seq[Fill] - # discard surface.writeToPng("cairo3.png") + for (path, props) in parsed.elements: + if props.display and props.opacity > 0: + if props.fill != "none": + let + shapes = path.commandsToShapes(true, 1) + paint = parseSomePaint(props.fill) + fills.add(Fill( + shapes: shapes, + transform: props.transform, + paint: paint, + windingRule: props.fillRule + )) - let a = newImage(1000, 1000) + if props.stroke != rgbx(0, 0, 0, 0) and props.strokeWidth > 0: + let strokeShapes = strokeShapes( + parseSomePath(path, false, props.transform.pixelScale), + props.strokeWidth, + props.strokeLineCap, + props.strokeLineJoin, + props.strokeMiterLimit, + props.strokeDashArray, + props.transform.pixelScale + ) + let paint = newPaint(props.stroke) + paint.color.a *= (props.opacity * props.strokeOpacity) + fills.add(Fill( + shapes: strokeShapes, + transform: props.transform, + paint: paint, + windingRule: NonZero + )) - timeIt "pixie3": - a.fill(rgbx(255, 255, 255, 255)) - - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - a.fillPath(p, rgbx(255, 0, 0, 255)) - - # a.writeFile("pixie3.png") - - # doDiff(readImage("cairo3.png"), a, "cairo3") + # benchmarks.add(fills) block: - let path = newPath() - path.roundedRect(200, 200, 600, 600, 10, 10, 10, 10) - - let shapes = path.commandsToShapes(true, 1) - - # let - # surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) - # ctx = surface.create() - - # timeIt "cairo4": - # ctx.setSourceRgba(0, 0, 0, 0) - # let operator = ctx.getOperator() - # ctx.setOperator(OperatorSource) - # ctx.paint() - # ctx.setOperator(operator) - - timeIt "cairo4": + for benchmark in benchmarks: let - surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) + surface = imageSurfaceCreate(FORMAT_ARGB32, 900, 900) ctx = surface.create() - ctx.setSourceRgba(1, 0, 0, 0.5) + timeIt "[cairo] " & benchmark.name: + for fill in benchmark.fills: + if fill.shapes.len > 0: + ctx.newPath() + for shape in fill.shapes: + ctx.moveTo(shape[0].x, shape[0].y) + for v in shape: + ctx.lineTo(v.x, v.y) + let + color = fill.paint.color + matrix = Matrix( + xx: fill.transform[0, 0], + yx: fill.transform[0, 1], + xy: fill.transform[1, 0], + yy: fill.transform[1, 1], + x0: fill.transform[2, 0], + y0: fill.transform[2, 1], + ) + ctx.setSourceRgba(color.r, color.g, color.b, color.a) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setFillRule( + if fill.windingRule == NonZero: + FillRuleWinding + else: + FillRuleEvenOdd + ) + ctx.fill() - ctx.newPath() - ctx.moveTo(shapes[0][0].x, shapes[0][0].y) - for shape in shapes: - for v in shape: - ctx.lineTo(v.x, v.y) - ctx.fill() - surface.flush() + # discard surface.writeToPng(("cairo_" & benchmark.name & ".png").cstring) - # discard surface.writeToPng("cairo4.png") +block: + for benchmark in benchmarks: + let image = newImage(900, 900) - var a: Image - timeIt "pixie4": - a = newImage(1000, 1000) + timeIt "[pixie] " & benchmark.name: + for fill in benchmark.fills: + if fill.shapes.len > 0: + let p = newPath() + for shape in fill.shapes: + p.moveTo(shape[0]) + for v in shape: + p.lineTo(v) + image.fillPath( + p, + fill.paint, + fill.transform, + fill.windingRule + ) - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - a.fillPath(p, rgbx(127, 0, 0, 127)) - - # a.writeFile("pixie4.png") - - # doDiff(readImage("cairo4.png"), a, "4") - - var b: Image - let paint = newPaint(SolidPaint) - paint.color = color(1, 0, 0, 0.5) - paint.blendMode = OverwriteBlend - - timeIt "pixie4 overwrite": - b = newImage(1000, 1000) - - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - b.fillPath(p, paint) - - # b.writeFile("b.png") - - timeIt "pixie4 mask": - let mask = newMask(1000, 1000) - - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - mask.fillPath(p) - - var tmp: Image - timeIt "pixie fillImage": - let p = newPath() - p.moveTo(shapes[0][0]) - for shape in shapes: - for v in shape: - p.lineTo(v) - - tmp = p.fillImage(1000, 1000, rgbx(127, 0, 0, 127)) - - # tmp.writeFile("tmp.png") + # image.writeFile("pixie_" & benchmark.name & ".png") diff --git a/experiments/benchmark_cairo_old.nim b/experiments/benchmark_cairo_old.nim new file mode 100644 index 0000000..52f63b4 --- /dev/null +++ b/experiments/benchmark_cairo_old.nim @@ -0,0 +1,397 @@ +import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat + +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2, pixie/internal + +proc doDiff(a, b: Image, name: string) = + let (diffScore, diffImage) = diff(a, b) + echo &"{name} score: {diffScore}" + diffImage.writeFile(&"{name}_diff.png") + +when defined(release): + {.push checks: off.} + +proc fillMask( + shapes: seq[seq[Vec2]], width, height: int, windingRule = NonZero +): Mask = + result = newMask(width, height) + + let + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startY = max(0, bounds.y.int) + pathHeight = min(height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight) + width = width.float32 + + var + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), + hits, + numHits, + aa, + width, + y, + 0, + partitioning, + windingRule + ) + if not aa: + for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): + let + startIndex = result.dataIndex(prevAt.int, y) + len = at.int - prevAt.int + fillUnsafe(result.data, 255, startIndex, len) + +proc fillMask*( + path: SomePath, width, height: int, windingRule = NonZero +): Mask = + ## Returns a new mask with the path filled. This is a faster alternative + ## to `newMask` + `fillPath`. + let shapes = parseSomePath(path, true, 1) + shapes.fillMask(width, height, windingRule) + +proc fillImage( + shapes: seq[seq[Vec2]], + width, height: int, + color: SomeColor, + windingRule = NonZero +): Image = + result = newImage(width, height) + + let + mask = shapes.fillMask(width, height, windingRule) + rgbx = color.rgbx() + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + colorVec = mm_set1_epi32(cast[int32](rgbx)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vec255 = mm_set1_epi32(cast[int32](uint32.high)) + vecZero = mm_setzero_si128() + colorVecEven = mm_slli_epi16(colorVec, 8) + colorVecOdd = mm_and_si128(colorVec, oddMask) + iterations = result.data.len div 16 + for _ in 0 ..< iterations: + var coverageVec = mm_loadu_si128(mask.data[i].addr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: + if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: + for q in [0, 4, 8, 12]: + mm_storeu_si128(result.data[i + q].addr, colorVec) + else: + for q in [0, 4, 8, 12]: + var unpacked = unpackAlphaValues(coverageVec) + # Shift the coverages from `a` to `g` and `a` for multiplying + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) + + var + sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) + sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) + sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) + sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) + + mm_storeu_si128( + result.data[i + q].addr, + mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) + ) + + coverageVec = mm_srli_si128(coverageVec, 4) + + i += 16 + + let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] + for i in i ..< result.data.len: + let coverage = mask.data[i] + if coverage == 255: + result.data[i] = rgbx + elif coverage != 0: + result.data[i].r = ((channels[0] * coverage) div 255).uint8 + result.data[i].g = ((channels[1] * coverage) div 255).uint8 + result.data[i].b = ((channels[2] * coverage) div 255).uint8 + result.data[i].a = ((channels[3] * coverage) div 255).uint8 + +proc fillImage*( + path: SomePath, width, height: int, color: SomeColor, windingRule = NonZero +): Image = + ## Returns a new image with the path filled. This is a faster alternative + ## to `newImage` + `fillPath`. + let shapes = parseSomePath(path, false, 1) + shapes.fillImage(width, height, color, windingRule) + +proc strokeMask*( + path: SomePath, + width, height: int, + strokeWidth: float32 = 1.0, + lineCap = ButtCap, + lineJoin = MiterJoin, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Mask = + ## Returns a new mask with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillMask(width, height, NonZero) + +proc strokeImage*( + path: SomePath, + width, height: int, + color: SomeColor, + strokeWidth: float32 = 1.0, + lineCap = ButtCap, + lineJoin = MiterJoin, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Image = + ## Returns a new image with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillImage(width, height, color, NonZero) + +when defined(release): + {.pop.} + + +block: + let path = newPath() + path.moveTo(0, 0) + path.lineTo(1920, 0) + path.lineTo(1920, 1080) + path.lineTo(0, 1080) + path.closePath() + + let shapes = path.commandsToShapes(true, 1) + + let + surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) + ctx = surface.create() + ctx.setSourceRgba(0, 0, 1, 1) + + timeIt "cairo1": + ctx.newPath() + ctx.moveTo(shapes[0][0].x, shapes[0][0].y) + for shape in shapes: + for v in shape: + ctx.lineTo(v.x, v.y) + ctx.fill() + surface.flush() + + # discard surface.writeToPng("cairo1.png") + + let a = newImage(1920, 1080) + + timeIt "pixie1": + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + a.fillPath(p, rgbx(0, 0, 255, 255)) + + # a.writeFile("pixie1.png") + +block: + let path = newPath() + path.moveTo(500, 240) + path.lineTo(1500, 240) + path.lineTo(1920, 600) + path.lineTo(0, 600) + path.closePath() + + let shapes = path.commandsToShapes(true, 1) + + let + surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) + ctx = surface.create() + + timeIt "cairo2": + ctx.setSourceRgba(1, 1, 1, 1) + let operator = ctx.getOperator() + ctx.setOperator(OperatorSource) + ctx.paint() + ctx.setOperator(operator) + + ctx.setSourceRgba(0, 0, 1, 1) + + ctx.newPath() + ctx.moveTo(shapes[0][0].x, shapes[0][0].y) + for shape in shapes: + for v in shape: + ctx.lineTo(v.x, v.y) + ctx.fill() + surface.flush() + + # discard surface.writeToPng("cairo2.png") + + let a = newImage(1920, 1080) + + timeIt "pixie2": + a.fill(rgbx(255, 255, 255, 255)) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + a.fillPath(p, rgbx(0, 0, 255, 255)) + + # a.writeFile("pixie2.png") + +block: + let path = parsePath(""" + M 100,300 + A 200,200 0,0,1 500,300 + A 200,200 0,0,1 900,300 + Q 900,600 500,900 + Q 100,600 100,300 z + """) + + let shapes = path.commandsToShapes(true, 1) + + let + surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) + ctx = surface.create() + + timeIt "cairo3": + ctx.setSourceRgba(1, 1, 1, 1) + let operator = ctx.getOperator() + ctx.setOperator(OperatorSource) + ctx.paint() + ctx.setOperator(operator) + + ctx.setSourceRgba(1, 0, 0, 1) + + ctx.newPath() + ctx.moveTo(shapes[0][0].x, shapes[0][0].y) + for shape in shapes: + for v in shape: + ctx.lineTo(v.x, v.y) + ctx.fill() + surface.flush() + + # discard surface.writeToPng("cairo3.png") + + let a = newImage(1000, 1000) + + timeIt "pixie3": + a.fill(rgbx(255, 255, 255, 255)) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + a.fillPath(p, rgbx(255, 0, 0, 255)) + + # a.writeFile("pixie3.png") + + # doDiff(readImage("cairo3.png"), a, "cairo3") + +block: + let path = newPath() + path.roundedRect(200, 200, 600, 600, 10, 10, 10, 10) + + let shapes = path.commandsToShapes(true, 1) + + # let + # surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) + # ctx = surface.create() + + # timeIt "cairo4": + # ctx.setSourceRgba(0, 0, 0, 0) + # let operator = ctx.getOperator() + # ctx.setOperator(OperatorSource) + # ctx.paint() + # ctx.setOperator(operator) + + timeIt "cairo4": + let + surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) + ctx = surface.create() + + ctx.setSourceRgba(1, 0, 0, 0.5) + + ctx.newPath() + ctx.moveTo(shapes[0][0].x, shapes[0][0].y) + for shape in shapes: + for v in shape: + ctx.lineTo(v.x, v.y) + ctx.fill() + surface.flush() + + # discard surface.writeToPng("cairo4.png") + + var a: Image + timeIt "pixie4": + a = newImage(1000, 1000) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + a.fillPath(p, rgbx(127, 0, 0, 127)) + + # a.writeFile("pixie4.png") + + # doDiff(readImage("cairo4.png"), a, "4") + + var b: Image + let paint = newPaint(SolidPaint) + paint.color = color(1, 0, 0, 0.5) + paint.blendMode = OverwriteBlend + + timeIt "pixie4 overwrite": + b = newImage(1000, 1000) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + b.fillPath(p, paint) + + # b.writeFile("b.png") + + timeIt "pixie4 mask": + let mask = newMask(1000, 1000) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + mask.fillPath(p) + + var tmp: Image + timeIt "pixie fillImage": + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + + tmp = p.fillImage(1000, 1000, rgbx(127, 0, 0, 127)) + + # tmp.writeFile("tmp.png") diff --git a/experiments/benchmark_svg_cairo.nim b/experiments/benchmark_svg_cairo.nim deleted file mode 100644 index 6355169..0000000 --- a/experiments/benchmark_svg_cairo.nim +++ /dev/null @@ -1,6 +0,0 @@ -import benchy, svg_cairo - -let data = readFile("tests/fileformats/svg/Ghostscript_Tiger.svg") - -timeIt "svg decode": - discard decodeSvg(data) diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim deleted file mode 100644 index ca0020c..0000000 --- a/experiments/svg_cairo.nim +++ /dev/null @@ -1,587 +0,0 @@ -## Load and Save SVG files. - -import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils, - tables, vmath, xmlparser, xmltree - -include pixie/paths - -proc processCommands( - c: ptr Context, path: Path, closeSubpaths: bool, mat: Mat3 -) = - let shapes = path.commandsToShapes(closeSubpaths, mat.pixelScale()) - if shapes.len == 0: - return - - c.newPath() - c.moveTo(shapes[0][0].x, shapes[0][0].y) - for shape in shapes: - for v in shape: - c.lineTo(v.x, v.y) - -proc prepare( - c: ptr Context, - path: Path, - paint: Paint, - mat: Mat3, - closeSubpaths: bool, - windingRule = NonZero -) = - let - color = paint.color - matrix = Matrix( - xx: mat[0, 0], - yx: mat[0, 1], - xy: mat[1, 0], - yy: mat[1, 1], - x0: mat[2, 0], - y0: mat[2, 1], - ) - c.setSourceRgba(color.r, color.g, color.b, color.a) - c.setMatrix(matrix.unsafeAddr) - case windingRule: - of NonZero: - c.setFillRule(FillRuleWinding) - else: - c.setFillRule(FillRuleEvenOdd) - c.processCommands(path, closeSubpaths, mat) - -type - LinearGradient = object - x1, y1, x2, y2: float32 - stops: seq[ColorStop] - - Ctx = object - display: bool - fillRule: WindingRule - fill: Paint - stroke: ColorRGBX - strokeWidth: float32 - strokeLineCap: LineCap - strokeLineJoin: LineJoin - strokeMiterLimit: float32 - strokeDashArray: seq[float32] - transform: Mat3 - shouldStroke: bool - opacity, strokeOpacity: float32 - linearGradients: TableRef[string, LinearGradient] - -template failInvalid() = - raise newException(PixieError, "Invalid SVG data") - -proc attrOrDefault(node: XmlNode, name, default: string): string = - result = node.attr(name) - if result.len == 0: - result = default - -proc initCtx(): Ctx = - result.display = true - try: - result.fill = parseHtmlColor("black").rgbx - result.stroke = parseHtmlColor("black").rgbx - except: - raise currentExceptionAsPixieError() - result.strokeWidth = 1 - result.transform = mat3() - result.strokeMiterLimit = defaultMiterLimit - result.opacity = 1 - result.strokeOpacity = 1 - result.linearGradients = newTable[string, LinearGradient]() - -proc decodeCtxInternal(inherited: Ctx, node: XmlNode): Ctx = - result = inherited - - proc splitArgs(s: string): seq[string] = - # Handles (1,1) or (1 1) or (1, 1) or (1,1 2,2) etc - let tmp = s.replace(',', ' ').split(' ') - for entry in tmp: - if entry.len > 0: - result.add(entry) - - var - fillRule = node.attr("fill-rule") - fill = node.attr("fill") - stroke = node.attr("stroke") - strokeWidth = node.attr("stroke-width") - strokeLineCap = node.attr("stroke-linecap") - strokeLineJoin = node.attr("stroke-linejoin") - strokeMiterLimit = node.attr("stroke-miterlimit") - strokeDashArray = node.attr("stroke-dasharray") - transform = node.attr("transform") - style = node.attr("style") - display = node.attr("display") - opacity = node.attr("opacity") - fillOpacity = node.attr("fill-opacity") - strokeOpacity = node.attr("stroke-opacity") - - let pairs = style.split(';') - for pair in pairs: - let parts = pair.split(':') - if parts.len == 2: - # Do not override element properties - case parts[0].strip(): - of "fill-rule": - if fillRule.len == 0: - fillRule = parts[1].strip() - of "fill": - if fill.len == 0: - fill = parts[1].strip() - of "stroke": - if stroke.len == 0: - stroke = parts[1].strip() - of "stroke-linecap": - if strokeLineCap.len == 0: - strokeLineCap = parts[1].strip() - of "stroke-linejoin": - if strokeLineJoin.len == 0: - strokeLineJoin = parts[1].strip() - of "stroke-width": - if strokeWidth.len == 0: - strokeWidth = parts[1].strip() - of "stroke-miterlimit": - if strokeMiterLimit.len == 0: - strokeMiterLimit = parts[1].strip() - of "stroke-dasharray": - if strokeDashArray.len == 0: - strokeDashArray = parts[1].strip() - of "display": - if display.len == 0: - display = parts[1].strip() - of "opacity": - if opacity.len == 0: - opacity = parts[1].strip() - of "fillOpacity": - if fillOpacity.len == 0: - fillOpacity = parts[1].strip() - of "strokeOpacity": - if strokeOpacity.len == 0: - strokeOpacity = parts[1].strip() - else: - discard - elif pair.len > 0: - when defined(pixieDebugSvg): - echo "Invalid style pair: ", pair - - if display.len > 0: - result.display = display.strip() != "none" - - if opacity.len > 0: - result.opacity = clamp(parseFloat(opacity), 0, 1) - - if fillOpacity.len > 0: - result.fill.opacity = clamp(parseFloat(fillOpacity), 0, 1) - - if strokeOpacity.len > 0: - result.strokeOpacity = clamp(parseFloat(strokeOpacity), 0, 1) - - if fillRule == "": - discard # Inherit - elif fillRule == "nonzero": - result.fillRule = NonZero - elif fillRule == "evenodd": - result.fillRule = EvenOdd - else: - raise newException( - PixieError, "Invalid fill-rule value " & fillRule - ) - - if fill == "" or fill == "currentColor": - discard # Inherit - elif fill == "none": - result.fill = ColorRGBX() - elif fill.startsWith("url("): - let id = fill[5 .. ^2] - if id in result.linearGradients: - let linearGradient = result.linearGradients[id] - result.fill = newPaint(LinearGradientPaint) - result.fill.gradientHandlePositions = @[ - result.transform * vec2(linearGradient.x1, linearGradient.y1), - result.transform * vec2(linearGradient.x2, linearGradient.y2) - ] - result.fill.gradientStops = linearGradient.stops - else: - raise newException(PixieError, "Missing SVG resource " & id) - else: - result.fill = parseHtmlColor(fill).rgbx - - if stroke == "": - discard # Inherit - elif stroke == "currentColor": - result.shouldStroke = true - elif stroke == "none": - result.stroke = ColorRGBX() - else: - result.stroke = parseHtmlColor(stroke).rgbx - result.shouldStroke = true - - if strokeWidth == "": - discard # Inherit - else: - if strokeWidth.endsWith("px"): - strokeWidth = strokeWidth[0 .. ^3] - result.strokeWidth = parseFloat(strokeWidth) - result.shouldStroke = true - - if result.stroke == ColorRGBX() or result.strokeWidth <= 0: - result.shouldStroke = false - - if strokeLineCap == "": - discard # Inherit - else: - case strokeLineCap: - of "butt": - result.strokeLineCap = ButtCap - of "round": - result.strokeLineCap = RoundCap - of "square": - result.strokeLineCap = SquareCap - of "inherit": - discard - else: - raise newException( - PixieError, "Invalid stroke-linecap value " & strokeLineCap - ) - - if strokeLineJoin == "": - discard # Inherit - else: - case strokeLineJoin: - of "miter": - result.strokeLineJoin = MiterJoin - of "round": - result.strokeLineJoin = RoundJoin - of "bevel": - result.strokeLineJoin = BevelJoin - of "inherit": - discard - else: - raise newException( - PixieError, "Invalid stroke-linejoin value " & strokeLineJoin - ) - - if strokeMiterLimit == "": - discard - else: - result.strokeMiterLimit = parseFloat(strokeMiterLimit) - - if strokeDashArray == "": - discard - else: - var values = splitArgs(strokeDashArray) - for value in values: - result.strokeDashArray.add(parseFloat(value)) - - if transform == "": - discard # Inherit - else: - template failInvalidTransform(transform: string) = - raise newException( - PixieError, "Unsupported SVG transform: " & transform - ) - - var remaining = transform - while remaining.len > 0: - let index = remaining.find(")") - if index == -1: - failInvalidTransform(transform) - let f = remaining[0 .. index].strip() - remaining = remaining[index + 1 .. ^1] - - if f.startsWith("matrix("): - let arr = splitArgs(f[7 .. ^2]) - if arr.len != 6: - failInvalidTransform(transform) - var m = mat3() - m[0, 0] = parseFloat(arr[0]) - m[0, 1] = parseFloat(arr[1]) - m[1, 0] = parseFloat(arr[2]) - m[1, 1] = parseFloat(arr[3]) - m[2, 0] = parseFloat(arr[4]) - m[2, 1] = parseFloat(arr[5]) - result.transform = result.transform * m - elif f.startsWith("translate("): - let - components = splitArgs(f[10 .. ^2]) - tx = parseFloat(components[0]) - ty = - if components.len == 1: - 0.0 - else: - parseFloat(components[1]) - result.transform = result.transform * translate(vec2(tx, ty)) - elif f.startsWith("rotate("): - let - values = splitArgs(f[7 .. ^2]) - angle: float32 = parseFloat(values[0]) * -PI / 180 - var cx, cy: float32 - if values.len > 1: - cx = parseFloat(values[1]) - if values.len > 2: - cy = parseFloat(values[2]) - let center = vec2(cx, cy) - result.transform = result.transform * - translate(center) * rotate(angle) * translate(-center) - elif f.startsWith("scale("): - let - values = splitArgs(f[6 .. ^2]) - sx: float32 = parseFloat(values[0]) - sy: float32 = - if values.len > 1: - parseFloat(values[1]) - else: - sx - result.transform = result.transform * scale(vec2(sx, sy)) - else: - failInvalidTransform(transform) - -proc decodeCtx(inherited: Ctx, node: XmlNode): Ctx = - try: - decodeCtxInternal(inherited, node) - except PixieError as e: - raise e - except: - raise currentExceptionAsPixieError() - -proc cairoLineCap(lineCap: LineCap): cairo.LineCap = - case lineCap: - of ButtCap: - LineCapButt - of RoundCap: - LineCapRound - of SquareCap: - LineCapSquare - -proc cairoLineJoin(lineJoin: LineJoin): cairo.LineJoin = - case lineJoin: - of MiterJoin: - LineJoinMiter - of BevelJoin: - LineJoinBevel - of RoundJoin: - LineJoinRound - -proc fill(c: ptr Context, ctx: Ctx, path: Path) {.inline.} = - if ctx.display and ctx.opacity > 0: - let paint = newPaint(ctx.fill) - paint.opacity = paint.opacity * ctx.opacity - prepare(c, path, paint, ctx.transform, true, ctx.fillRule) - c.fill() - -proc stroke(c: ptr Context, ctx: Ctx, path: Path) {.inline.} = - if ctx.display and ctx.opacity > 0: - let paint = newPaint(ctx.stroke) - paint.color.a *= (ctx.opacity * ctx.strokeOpacity) - prepare(c, path, paint, ctx.transform, false) - c.setLineWidth(ctx.strokeWidth) - c.setLineCap(ctx.strokeLineCap.cairoLineCap()) - c.setLineJoin(ctx.strokeLineJoin.cairoLineJoin()) - c.setMiterLimit(ctx.strokeMiterLimit) - c.stroke() - -proc drawInternal(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) = - if node.kind != xnElement: - # Skip - return - - case node.tag: - of "title", "desc", "defs": - discard - - of "g": - let ctx = decodeCtx(ctxStack[^1], node) - ctxStack.add(ctx) - for child in node: - img.drawInternal(child, ctxStack) - discard ctxStack.pop() - - of "path": - let - d = node.attr("d") - ctx = decodeCtx(ctxStack[^1], node) - path = parsePath(d) - - img.fill(ctx, path) - if ctx.shouldStroke: - img.stroke(ctx, path) - - of "line": - let - ctx = decodeCtx(ctxStack[^1], node) - x1 = parseFloat(node.attrOrDefault("x1", "0")) - y1 = parseFloat(node.attrOrDefault("y1", "0")) - x2 = parseFloat(node.attrOrDefault("x2", "0")) - y2 = parseFloat(node.attrOrDefault("y2", "0")) - - let path = newPath() - path.moveTo(x1, y1) - path.lineTo(x2, y2) - - if ctx.shouldStroke: - img.stroke(ctx, path) - - of "polyline", "polygon": - let - ctx = decodeCtx(ctxStack[^1], node) - points = node.attr("points") - - var vecs: seq[Vec2] - if points.contains(","): - for pair in points.split(" "): - let parts = pair.split(",") - if parts.len != 2: - failInvalid() - vecs.add(vec2(parseFloat(parts[0]), parseFloat(parts[1]))) - else: - let points = points.split(" ") - if points.len mod 2 != 0: - failInvalid() - for i in 0 ..< points.len div 2: - vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1]))) - - if vecs.len == 0: - failInvalid() - - let path = newPath() - path.moveTo(vecs[0]) - for i in 1 ..< vecs.len: - path.lineTo(vecs[i]) - - # The difference between polyline and polygon is whether we close the path - # and fill or not - if node.tag == "polygon": - path.closePath() - img.fill(ctx, path) - - if ctx.shouldStroke: - img.stroke(ctx, path) - - of "rect": - let - ctx = decodeCtx(ctxStack[^1], node) - x = parseFloat(node.attrOrDefault("x", "0")) - y = parseFloat(node.attrOrDefault("y", "0")) - width = parseFloat(node.attrOrDefault("width", "0")) - height = parseFloat(node.attrOrDefault("height", "0")) - - if width == 0 or height == 0: - return - - var - rx = max(parseFloat(node.attrOrDefault("rx", "0")), 0) - ry = max(parseFloat(node.attrOrDefault("ry", "0")), 0) - - let path = newPath() - if rx > 0 or ry > 0: - if rx == 0: - rx = ry - elif ry == 0: - ry = rx - rx = min(rx, width / 2) - ry = min(ry, height / 2) - - path.moveTo(x + rx, y) - path.lineTo(x + width - rx, y) - path.ellipticalArcTo(rx, ry, 0, false, true, x + width, y + ry) - path.lineTo(x + width, y + height - ry) - path.ellipticalArcTo(rx, ry, 0, false, true, x + width - rx, y + height) - path.lineTo(x + rx, y + height) - path.ellipticalArcTo(rx, ry, 0, false, true, x, y + height - ry) - path.lineTo(x, y + ry) - path.ellipticalArcTo(rx, ry, 0, false, true, x + rx, y) - else: - path.rect(x, y, width, height) - - img.fill(ctx, path) - if ctx.shouldStroke: - img.stroke(ctx, path) - - of "circle", "ellipse": - let - ctx = decodeCtx(ctxStack[^1], node) - cx = parseFloat(node.attrOrDefault("cx", "0")) - cy = parseFloat(node.attrOrDefault("cy", "0")) - - var rx, ry: float32 - if node.tag == "circle": - rx = parseFloat(node.attr("r")) - ry = rx - else: - rx = parseFloat(node.attrOrDefault("rx", "0")) - ry = parseFloat(node.attrOrDefault("ry", "0")) - - let path = newPath() - path.ellipse(cx, cy, rx, ry) - - img.fill(ctx, path) - if ctx.shouldStroke: - img.stroke(ctx, path) - - else: - raise newException(PixieError, "Unsupported SVG tag: " & node.tag & ".") - -proc draw(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) = - try: - drawInternal(img, node, ctxStack) - except PixieError as e: - raise e - except: - raise currentExceptionAsPixieError() - -proc decodeSvg*(data: string, width = 0, height = 0): Image = - ## Render SVG file and return the image. Defaults to the SVG's view box size. - try: - let root = parseXml(data) - if root.tag != "svg": - failInvalid() - - let - viewBox = root.attr("viewBox") - box = viewBox.split(" ") - viewBoxMinX = parseInt(box[0]) - viewBoxMinY = parseInt(box[1]) - viewBoxWidth = parseInt(box[2]) - viewBoxHeight = parseInt(box[3]) - - var rootCtx = initCtx() - rootCtx = decodeCtx(rootCtx, root) - - if viewBoxMinX != 0 or viewBoxMinY != 0: - rootCtx.transform = rootCtx.transform * translate( - vec2(-viewBoxMinX.float32, -viewBoxMinY.float32) - ) - - var - width = width - height = height - surface: ptr Surface - if width == 0 and height == 0: # Default to the view box size - width = viewBoxWidth.int32 - height = viewBoxHeight.int32 - else: - let - scaleX = width.float32 / viewBoxWidth.float32 - scaleY = height.float32 / viewBoxHeight.float32 - rootCtx.transform = rootCtx.transform * scale(vec2(scaleX, scaleY)) - - surface = imageSurfaceCreate(FORMAT_ARGB32, width.int32, height.int32) - - let c = surface.create() - - var ctxStack = @[rootCtx] - for node in root: - c.draw(node, ctxStack) - - surface.flush() - - result = newImage(width, height) - - let pixels = cast[ptr UncheckedArray[array[4, uint8]]](surface.getData()) - for y in 0 ..< result.height: - for x in 0 ..< result.width: - let - bgra = pixels[result.dataIndex(x, y)] - rgba = rgba(bgra[2], bgra[1], bgra[0], bgra[3]) - result.unsafe[x, y] = rgba.rgbx() - except PixieError as e: - raise e - except: - raise newException(PixieError, "Unable to load SVG") From ee442977faed95d8f953bd1cddcb47921497e1de Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 20:00:29 -0500 Subject: [PATCH 03/20] avoid unsafe[] in loops --- src/pixie/paths.nim | 173 ++++++++++++++++++++++++++++---------------- 1 file changed, 110 insertions(+), 63 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index ebb8bdb..916144f 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1336,7 +1336,9 @@ proc fillCoverage( coverages: seq[uint8], blendMode: BlendMode ) = - var x = startX + var + x = startX + dataIndex = image.dataIndex(x, y) when allowSimd: when defined(amd64): @@ -1389,51 +1391,61 @@ proc fillCoverage( when allowSimd: when defined(amd64): for (coverageVec, allZeroes, all255) in simd(coverages, x, startX): - if not allZeroes: + if allZeroes: + dataIndex += 16 + else: if all255: for i in 0 ..< 4: - mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec) + mm_storeu_si128(image.data[dataIndex].addr, colorVec) + dataIndex += 4 else: var coverageVec = coverageVec for i in 0 ..< 4: let source = source(colorVec, coverageVec) - mm_storeu_si128(image.unsafe[x + i * 4, y].addr, source) + mm_storeu_si128(image.data[dataIndex].addr, source) coverageVec = mm_srli_si128(coverageVec, 4) + dataIndex += 4 for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0: - image.unsafe[x, y] = source(rgbx, coverage) + image.data[dataIndex] = source(rgbx, coverage) + inc dataIndex of NormalBlend: when allowSimd: when defined(amd64): for (coverageVec, allZeroes, all255) in simd(coverages, x, startX): - if not allZeroes: + if allZeroes: + dataIndex += 16 + else: if all255 and rgbx.a == 255: for i in 0 ..< 4: - mm_storeu_si128(image.unsafe[x + i * 4, y].addr, colorVec) + mm_storeu_si128(image.data[dataIndex].addr, colorVec) + dataIndex += 4 else: var coverageVec = coverageVec for i in 0 ..< 4: let - backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr) + backdrop = mm_loadu_si128(image.data[dataIndex].addr) source = source(colorVec, coverageVec) mm_storeu_si128( - image.unsafe[x + i * 4, y].addr, + image.data[dataIndex].addr, blendNormalSimd(backdrop, source) ) coverageVec = mm_srli_si128(coverageVec, 4) + dataIndex += 4 for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage == 255 and rgbx.a == 255: - image.unsafe[x, y] = rgbx + image.data[dataIndex] = rgbx elif coverage == 0: discard else: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendNormal(backdrop, source(rgbx, coverage)) + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendNormal(backdrop, source(rgbx, coverage)) + inc dataIndex of MaskBlend: {.linearScanEnd.} @@ -1443,31 +1455,34 @@ proc fillCoverage( for (coverageVec, allZeroes, all255) in simd(coverages, x, startX): if not allZeroes: if all255: - discard + dataIndex += 16 else: var coverageVec = coverageVec for i in 0 ..< 4: let - backdrop = mm_loadu_si128(image.unsafe[x + i * 4, y].addr) + backdrop = mm_loadu_si128(image.data[dataIndex].addr) source = source(colorVec, coverageVec) mm_storeu_si128( - image.unsafe[x + i * 4, y].addr, + image.data[dataIndex].addr, blendMaskSimd(backdrop, source) ) coverageVec = mm_srli_si128(coverageVec, 4) + dataIndex += 4 else: for i in 0 ..< 4: - mm_storeu_si128(image.unsafe[x + i * 4, y].addr, mm_setzero_si128()) + mm_storeu_si128(image.data[dataIndex].addr, mm_setzero_si128()) + dataIndex += 4 for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage == 0: - image.unsafe[x, y] = rgbx(0, 0, 0, 0) + image.data[dataIndex] = rgbx(0, 0, 0, 0) elif coverage == 255: discard else: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendMask(backdrop, source(rgbx, coverage)) + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendMask(backdrop, source(rgbx, coverage)) + inc dataIndex image.clearUnsafe(0, y, startX, y) image.clearUnsafe(startX + coverages.len, y, image.width, y) @@ -1476,25 +1491,28 @@ proc fillCoverage( for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage == 255 and rgbx.a == 255: - image.unsafe[x, y] = rgbx(0, 0, 0, 0) + image.data[dataIndex] = rgbx(0, 0, 0, 0) elif coverage != 0: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendSubtractMask(backdrop, source(rgbx, coverage)) + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendSubtractMask(backdrop, source(rgbx, coverage)) + inc dataIndex of ExcludeMaskBlend: for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] - backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendExcludeMask(backdrop, source(rgbx, coverage)) + backdrop = image.data[dataIndex] + image.data[dataIndex] = blendExcludeMask(backdrop, source(rgbx, coverage)) + inc dataIndex else: let blender = blendMode.blender() for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blender(backdrop, source(rgbx, coverage)) + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blender(backdrop, source(rgbx, coverage)) + inc dataIndex proc fillCoverage( mask: Mask, @@ -1502,7 +1520,9 @@ proc fillCoverage( coverages: seq[uint8], blendMode: BlendMode ) = - var x = startX + var + x = startX + dataIndex = mask.dataIndex(x, y) template simdBlob(blendProc: untyped) = when allowSimd: @@ -1513,19 +1533,21 @@ proc fillCoverage( eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128()) allZeroes = mm_movemask_epi8(eqZero) == 0xffff if not allZeroes: - let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr) + let backdrop = mm_loadu_si128(mask.data[dataIndex].addr) mm_storeu_si128( - mask.unsafe[x, y].addr, + mask.data[dataIndex].addr, blendProc(backdrop, coveragesVec) ) x += 16 + dataIndex += 16 template blendBlob(blendProc: untyped) = for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = blendProc(backdrop, coverage) + let backdrop = mask.data[dataIndex] + mask.data[dataIndex] = blendProc(backdrop, coverage) + inc dataIndex case blendMode: of OverwriteBlend: @@ -1550,22 +1572,24 @@ proc fillCoverage( eqZero = mm_cmpeq_epi8(coveragesVec, mm_setzero_si128()) allZeroes = mm_movemask_epi8(eqZero) == 0xffff if not allZeroes: - let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr) + let backdrop = mm_loadu_si128(mask.data[dataIndex].addr) mm_storeu_si128( - mask.unsafe[x, y].addr, + mask.data[dataIndex].addr, maskBlendMaskSimd(backdrop, coveragesVec) ) else: - mm_storeu_si128(mask.unsafe[x, y].addr, mm_setzero_si128()) + mm_storeu_si128(mask.data[dataIndex].addr, mm_setzero_si128()) x += 16 + dataIndex += 16 for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = maskBlendMask(backdrop, coverage) + let backdrop = mask.data[dataIndex] + mask.data[dataIndex] = maskBlendMask(backdrop, coverage) else: - mask.unsafe[x, y] = 0 + mask.data[dataIndex] = 0 + inc dataIndex mask.clearUnsafe(0, y, startX, y) mask.clearUnsafe(startX + coverages.len, y, mask.width, y) @@ -1595,10 +1619,15 @@ proc fillHits( when allowSimd: when defined(amd64): let colorVec = mm_set1_epi32(cast[int32](rgbx)) + var dataIndex = image.dataIndex(x, y) for _ in 0 ..< len div 4: - let backdrop = mm_loadu_si128(image.unsafe[x, y].addr) - mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec)) + let backdrop = mm_loadu_si128(image.data[dataIndex].addr) + mm_storeu_si128( + image.data[dataIndex].addr, + blendProc(backdrop, colorVec) + ) x += 4 + dataIndex += 4 case blendMode: of OverwriteBlend: @@ -1612,9 +1641,11 @@ proc fillHits( else: var x = start simdBlob(image, x, len, blendNormalSimd) - for x in x ..< start + len: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendNormal(backdrop, rgbx) + var dataIndex = image.dataIndex(x, y) + for _ in x ..< start + len: + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendNormal(backdrop, rgbx) + inc dataIndex of MaskBlend: {.linearScanEnd.} @@ -1635,34 +1666,41 @@ proc fillHits( if rgbx.a != 255: var x = start simdBlob(image, x, len, blendMaskSimd) - for x in x ..< start + len: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendMask(backdrop, rgbx) + var dataIndex = image.dataIndex(x, y) + for _ in x ..< start + len: + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendMask(backdrop, rgbx) image.clearUnsafe(0, y, startX, y) image.clearUnsafe(filledTo, y, image.width, y) of SubtractMaskBlend: for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): - for x in start ..< start + len: + var dataIndex = image.dataIndex(start, y) + for _ in 0 ..< len: if rgbx.a == 255: - image.unsafe[x, y] = rgbx(0, 0, 0, 0) + image.data[dataIndex] = rgbx(0, 0, 0, 0) else: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendSubtractMask(backdrop, rgbx) + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendSubtractMask(backdrop, rgbx) + inc dataIndex of ExcludeMaskBlend: for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): - for x in start ..< start + len: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blendExcludeMask(backdrop, rgbx) + var dataIndex = image.dataIndex(start, y) + for _ in 0 ..< len: + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blendExcludeMask(backdrop, rgbx) + inc dataIndex else: let blender = blendMode.blender() for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width): - for x in start ..< start + len: - let backdrop = image.unsafe[x, y] - image.unsafe[x, y] = blender(backdrop, rgbx) + var dataIndex = image.dataIndex(start, y) + for _ in 0 ..< len: + let backdrop = image.data[dataIndex] + image.data[dataIndex] = blender(backdrop, rgbx) + inc dataIndex proc fillHits( mask: Mask, @@ -1676,10 +1714,15 @@ proc fillHits( when allowSimd: when defined(amd64): let vec255 = mm_set1_epi8(255) + var dataIndex = mask.dataIndex(x, y) for _ in 0 ..< len div 16: - let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr) - mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255)) + let backdrop = mm_loadu_si128(mask.data[dataIndex].addr) + mm_storeu_si128( + mask.data[dataIndex].addr, + blendProc(backdrop, vec255) + ) x += 16 + dataIndex += 16 case blendMode: of NormalBlend, OverwriteBlend: @@ -1703,17 +1746,21 @@ proc fillHits( for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): var x = start simdBlob(mask, x, len, maskBlendSubtractSimd) - for x in x ..< start + len: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255) + var dataIndex = mask.dataIndex(x, y) + for _ in x ..< start + len: + let backdrop = mask.data[dataIndex] + mask.data[dataIndex] = maskBlendSubtract(backdrop, 255) + inc dataIndex of ExcludeMaskBlend: for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width): var x = start simdBlob(mask, x, len, maskBlendExcludeSimd) - for x in x ..< start + len: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = maskBlendExclude(backdrop, 255) + var dataIndex = mask.dataIndex(x, y) + for _ in x ..< start + len: + let backdrop = mask.data[dataIndex] + mask.data[dataIndex] = maskBlendExclude(backdrop, 255) + inc dataIndex else: failUnsupportedBlendMode(blendMode) From 380c9be1815c6318b81f6b2280e443ed3a40a421 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 20:20:29 -0500 Subject: [PATCH 04/20] f --- src/pixie/paths.nim | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 916144f..1a2c90e 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1375,16 +1375,17 @@ proc fillCoverage( let colorVec = mm_set1_epi32(cast[int32](rgbx)) proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} = - if coverage > 0: - if coverage == 255: - result = rgbx - else: - result = rgbx( - ((rgbx.r.uint32 * coverage) div 255).uint8, - ((rgbx.g.uint32 * coverage) div 255).uint8, - ((rgbx.b.uint32 * coverage) div 255).uint8, - ((rgbx.a.uint32 * coverage) div 255).uint8 - ) + if coverage == 0: + discard + elif coverage == 255: + result = rgbx + else: + result = rgbx( + ((rgbx.r.uint32 * coverage) div 255).uint8, + ((rgbx.g.uint32 * coverage) div 255).uint8, + ((rgbx.b.uint32 * coverage) div 255).uint8, + ((rgbx.a.uint32 * coverage) div 255).uint8 + ) case blendMode: of OverwriteBlend: From 97d69b1473b8a15c2ccf11c4b3a9b67586429fe4 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 20:46:38 -0500 Subject: [PATCH 05/20] save a cycle --- src/pixie/internal.nim | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index a2585f6..444b205 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -205,10 +205,8 @@ when defined(amd64) and allowSimd: proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} = ## Unpack the first 32 bits into 4 rgba(0, 0, 0, value) - let - a = mm_unpacklo_epi8(v, mm_setzero_si128()) - b = mm_unpacklo_epi8(a, mm_setzero_si128()) - result = mm_slli_epi32(b, 24) # Shift the values to uint32 `a` + result = mm_unpacklo_epi8(mm_setzero_si128(), v) + result = mm_unpacklo_epi8(mm_setzero_si128(), result) when defined(release): {.pop.} From 53e5185360b3d24b93ebac42aa7e6267f11a318b Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:05:58 -0500 Subject: [PATCH 06/20] less instructions --- src/pixie/internal.nim | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 444b205..14308d4 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -186,14 +186,11 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool = return false when defined(amd64) and allowSimd: - proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} = + proc packAlphaValues(v: M128i): M128i {.inline, raises: [].} = ## Shuffle the alpha values for these 4 colors to the first 4 bytes - let mask = mm_set1_epi32(cast[int32](0xff000000)) - result = mm_and_si128(v, mask) - result = mm_srli_epi32(result, 24) - result = mm_packus_epi16(result, result) - result = mm_packus_epi16(result, result) - result = mm_srli_si128(result, 12) + result = mm_srli_epi32(v, 24) + result = mm_packus_epi16(result, mm_setzero_si128()) + result = mm_packus_epi16(result, mm_setzero_si128()) proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} = let From 7c91a69bcad34b40df42031085353d8a54831406 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:10:23 -0500 Subject: [PATCH 07/20] less instructions --- src/pixie/internal.nim | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 14308d4..64c7dde 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -165,9 +165,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool = var i = start when defined(amd64) and allowSimd: - let - vec255 = mm_set1_epi32(cast[int32](uint32.high)) - colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0])) + let vec255 = mm_set1_epi32(cast[int32](uint32.high)) for _ in start ..< (start + len) div 16: let values0 = mm_loadu_si128(data[i + 0].addr) @@ -176,8 +174,9 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool = values3 = mm_loadu_si128(data[i + 12].addr) values01 = mm_and_si128(values0, values1) values23 = mm_and_si128(values2, values3) - values = mm_or_si128(mm_and_si128(values01, values23), colorMask) - if mm_movemask_epi8(mm_cmpeq_epi8(values, vec255)) != 0xffff: + values = mm_and_si128(values01, values23) + eq = mm_cmpeq_epi8(values, vec255) + if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888: return false i += 16 From e676e6cf22ef01bca518fe6c329000db35e3fe36 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:16:26 -0500 Subject: [PATCH 08/20] less instructions --- src/pixie/internal.nim | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 64c7dde..36f1041 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -125,30 +125,25 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) for _ in 0 ..< data.len div 4: - var - color = mm_loadu_si128(data[i].addr) - alpha = mm_and_si128(color, alphaMask) - if mm_movemask_epi8(mm_cmpeq_epi16(alpha, alphaMask)) != 0xffff: - # If not all of the alpha values are 255, premultiply + let + values = mm_loadu_si128(data[i].addr) + alpha = mm_and_si128(values, alphaMask) + eq = mm_cmpeq_epi8(values, alphaMask) + if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888: + let + evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16)) + oddMultiplier = mm_or_si128(evenMultiplier, alphaMask) var - colorEven = mm_slli_epi16(color, 8) - colorOdd = mm_and_si128(color, oddMask) - - alpha = mm_or_si128(alpha, mm_srli_epi32(alpha, 16)) - - colorEven = mm_mulhi_epu16(colorEven, alpha) - colorOdd = mm_mulhi_epu16(colorOdd, alpha) - - colorEven = mm_srli_epi16(mm_mulhi_epu16(colorEven, div255), 7) - colorOdd = mm_srli_epi16(mm_mulhi_epu16(colorOdd, div255), 7) - - color = mm_or_si128(colorEven, mm_slli_epi16(colorOdd, 8)) - color = mm_or_si128( - mm_and_si128(alpha, alphaMask), mm_and_si128(color, notAlphaMask) + colorsEven = mm_slli_epi16(values, 8) + colorsOdd = mm_and_si128(values, oddMask) + colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier) + colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier) + colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7) + colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7) + mm_storeu_si128( + data[i].addr, + mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8)) ) - - mm_storeu_si128(data[i].addr, color) - i += 4 # Convert whatever is left From 38ab18bcdfe61542889516437538fb6d3fd08d9a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:26:56 -0500 Subject: [PATCH 09/20] rm --- src/pixie/internal.nim | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 36f1041..d850a36 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -121,7 +121,6 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} # When supported, SIMD convert as much as possible let alphaMask = mm_set1_epi32(cast[int32](0xff000000)) - notAlphaMask = mm_set1_epi32(0x00ffffff) oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) for _ in 0 ..< data.len div 4: From 37041d6790a74d170fda9c9f8952f823b70563f2 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:31:12 -0500 Subject: [PATCH 10/20] f --- src/pixie/masks.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index aa42526..6a45862 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -316,7 +316,7 @@ proc ceil*(mask: Mask) {.raises: [].} = when defined(amd64) and allowSimd: let zeroVec = mm_setzero_si128() - vec255 = mm_set1_epi32(cast[int32](uint32.high)) + vec255 = mm_set1_epi8(255) for _ in 0 ..< mask.data.len div 16: var values = mm_loadu_si128(mask.data[i].addr) values = mm_cmpeq_epi8(values, zeroVec) From 7cfb124c2e151f492b1c40a806cf0f0d83806210 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:40:09 -0500 Subject: [PATCH 11/20] simpler + bugfix + test --- src/pixie/masks.nim | 21 ++++++++------------- tests/masks/drawPolygonMagnified.png | Bin 0 -> 1173 bytes tests/test_masks.nim | 10 ++++++++++ 3 files changed, 18 insertions(+), 13 deletions(-) create mode 100644 tests/masks/drawPolygonMagnified.png diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 6a45862..bff1c81 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -172,17 +172,14 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = when defined(amd64) and allowSimd: if scale == 2: while x <= mask.width - 16: - let - values = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) - lo = mm_unpacklo_epi8(values, mm_setzero_si128()) - hi = mm_unpacklo_epi8(values, mm_setzero_si128()) + let values = mm_loadu_si128(mask.unsafe[x, y].addr) mm_storeu_si128( result.data[result.dataIndex(x * scale + 0, y * scale)].addr, - mm_or_si128(lo, mm_slli_si128(lo, 1)) + mm_unpacklo_epi8(values, values) ) mm_storeu_si128( result.data[result.dataIndex(x * scale + 16, y * scale)].addr, - mm_or_si128(hi, mm_slli_si128(hi, 1)) + mm_unpackhi_epi8(values, values) ) x += 16 for x in x ..< mask.width: @@ -237,17 +234,15 @@ proc invert*(mask: Mask) {.raises: [].} = ## Inverts all of the values - creates a negative of the mask. var i: int when defined(amd64) and allowSimd: - let vec255 = mm_set1_epi8(cast[int8](255)) - let byteLen = mask.data.len - for _ in 0 ..< byteLen div 16: - let index = i - var values = mm_loadu_si128(mask.data[index].addr) + let vec255 = mm_set1_epi8(255) + for _ in 0 ..< mask.data.len div 16: + var values = mm_loadu_si128(mask.data[i].addr) values = mm_sub_epi8(vec255, values) - mm_storeu_si128(mask.data[index].addr, values) + mm_storeu_si128(mask.data[i].addr, values) i += 16 for j in i ..< mask.data.len: - mask.data[j] = (255 - mask.data[j]).uint8 + mask.data[j] = 255 - mask.data[j] proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} = ## Grows the mask by spread. diff --git a/tests/masks/drawPolygonMagnified.png b/tests/masks/drawPolygonMagnified.png new file mode 100644 index 0000000000000000000000000000000000000000..a79b040c9b9e4259d61e923ce8e778d994363405 GIT binary patch literal 1173 zcmeAS@N?(olHy`uVBq!ia0vp^CqS43NHBC5e{^DCV2SZ`aSW+oe0$N~V{$#i@sBGz zJ5;&kTAGp;&&+dW$`)POnbE*8b=cPsGKiA5AziEAUijoBjZ});k zK@**(HrERqIHbfJ4=ZIv;1ygn?dq$qReQr$-})T2u|sO-&bMz5E;#+Ph_(6c+nqbJ z=3VS^74r06y}H{)O4`_Xx_?@lnVFkXX=!HW>G>%M6Te!{ef;?DT~^kQ%elCA?LK~d z?ztb?#}X%QeS5>uz`*25fyUedi?+63zqjXy?^@2^t#SHfW-f25-@K50yB}RyS^hpM zs;DT;PO{Z?%9SfVesMb|&0SlfA6NG6>gxCR!^4lqU+#7d)7F)hto)Gh`r6g&*Ei)x z$KO9a{rC6vrST%&N3?a`ysiEE>cPPu*;!Mj_V(W09UigbS&ov9(B!FC@BDdq_~zz~ z=NDyVAAaaPebXkF`O&?uVId(cPUd+z_x619oUI#u>(;$}wuMf&FB|G48Qpwa_kXc_ zTJan6i-(gWWoo{i$-DlquYRvbU0wL~BS$7G->N^jf9iC0rt)`Bp8v0P@2%f?Vwta; zze&}RB@y*T8Usk^O-P*q3lEIVb=g;|V{`u~1{Qe%fty`{KO*Jbo)6?zT zA9+~7N2>kI855H!Q*7$XAdZN2rmQ^HK;CXei;>)|0+ZduvS&$#lZ#@jpa;^FqrbDahy z9!8QjPkw*TzTVZ%&US6~mHYd%uYY}QU$>^*ZO%dGN$!Ux^vnCtx2v%b2#^xxu$UVf zI^Rz7?q#6(lL;m%6&3gQ{rdIt<&VJ2m%o0k{=Tn5u>P#Yq_!NJOy|1)?_O?p zpYE=!S5^|CV{3c5{vd$@?2>><% BI)wlL literal 0 HcmV?d00001 diff --git a/tests/test_masks.nim b/tests/test_masks.nim index 0b9d105..0be2fc1 100644 --- a/tests/test_masks.nim +++ b/tests/test_masks.nim @@ -205,3 +205,13 @@ block: let minified = mask.minifyBy2() minified.writeFile("tests/masks/minifiedBlur.png") + +block: + let path = newPath() + path.polygon(vec2(50, 50), 30, 6) + + let mask = newMask(100, 100) + mask.fillPath(path) + + let magnified = mask.magnifyBy2() + magnified.writeFile("tests/masks/drawPolygonMagnified.png") From 432dce490209725eec48d566b6c1f848e8ada02a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:48:18 -0500 Subject: [PATCH 12/20] less instructions --- src/pixie/masks.nim | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index bff1c81..0ac4321 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -93,7 +93,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = firstByte = cast[M128i]( [uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ) - for _ in countup(0, result.width - 16, 8): + while x <= result.width - 16: let top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr) btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr) @@ -101,22 +101,22 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = btmShifted = mm_srli_si128(btm, 1) topEven = mm_andnot_si128(oddMask, top) - topOdd = mm_srli_epi16(mm_and_si128(top, oddMask), 8) + topOdd = mm_srli_epi16(top, 8) btmEven = mm_andnot_si128(oddMask, btm) - btmOdd = mm_srli_epi16(mm_and_si128(btm, oddMask), 8) + btmOdd = mm_srli_epi16(btm, 8) topShiftedEven = mm_andnot_si128(oddMask, topShifted) - topShiftedOdd = mm_srli_epi16(mm_and_si128(topShifted, oddMask), 8) + topShiftedOdd = mm_srli_epi16(topShifted, 8) btmShiftedEven = mm_andnot_si128(oddMask, btmShifted) - btmShiftedOdd = mm_srli_epi16(mm_and_si128(btmShifted, oddMask), 8) + btmShiftedOdd = mm_srli_epi16(btmShifted, 8) topAddedEven = mm_add_epi16(topEven, topShiftedEven) btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven) topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd) - bottomAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd) + btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd) addedEven = mm_add_epi16(topAddedEven, btmAddedEven) - addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd) + addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd) addedEvenDiv4 = mm_srli_epi16(addedEven, 2) addedOddDiv4 = mm_srli_epi16(addedOdd, 2) From 14576fcd27c6d68a5a74eee3d99b28ffcfd864ae Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 21:57:25 -0500 Subject: [PATCH 13/20] much simpler --- src/pixie/masks.nim | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 0ac4321..7644bba 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -88,11 +88,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = for y in 0 ..< result.height: var x: int when defined(amd64) and allowSimd: - let - oddMask = mm_set1_epi16(cast[int16](0xff00)) - firstByte = cast[M128i]( - [uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - ) + let oddMask = mm_set1_epi16(cast[int16](0xff00)) while x <= result.width - 16: let top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr) @@ -122,29 +118,14 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = addedOddDiv4 = mm_srli_epi16(addedOdd, 2) merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8)) + # Merged has the correct values in the even indices + # Mask out the odd values for packing + masked = mm_andnot_si128(oddMask, merged) - # merged has the correct values in the even indices - - a = mm_and_si128(merged, firstByte) - b = mm_and_si128(mm_srli_si128(merged, 2), firstByte) - c = mm_and_si128(mm_srli_si128(merged, 4), firstByte) - d = mm_and_si128(mm_srli_si128(merged, 6), firstByte) - e = mm_and_si128(mm_srli_si128(merged, 8), firstByte) - f = mm_and_si128(mm_srli_si128(merged, 10), firstByte) - g = mm_and_si128(mm_srli_si128(merged, 12), firstByte) - h = mm_and_si128(mm_srli_si128(merged, 14), firstByte) - - ab = mm_or_si128(a, mm_slli_si128(b, 1)) - cd = mm_or_si128(c, mm_slli_si128(d, 1)) - ef = mm_or_si128(e, mm_slli_si128(f, 1)) - gh = mm_or_si128(g, mm_slli_si128(h, 1)) - - abcd = mm_or_si128(ab, mm_slli_si128(cd, 2)) - efgh = mm_or_si128(ef, mm_slli_si128(gh, 2)) - - abcdefgh = mm_or_si128(abcd, mm_slli_si128(efgh, 4)) - - mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, abcdefgh) + mm_storeu_si128( + result.data[result.dataIndex(x, y)].addr, + mm_packus_epi16(masked, mm_setzero_si128()) + ) x += 8 for x in x ..< result.width: From 3c769291af59663c3bba578a5d7a92212d7143ad Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:00:20 -0500 Subject: [PATCH 14/20] faster --- src/pixie/images.nim | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index ac82d87..072a8cb 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -108,15 +108,20 @@ proc isOneColor*(image: Image): bool {.raises: [].} = var i: int when defined(amd64) and allowSimd: let colorVec = mm_set1_epi32(cast[int32](color)) - for _ in 0 ..< image.data.len div 8: + for _ in 0 ..< image.data.len div 16: let values0 = mm_loadu_si128(image.data[i + 0].addr) values1 = mm_loadu_si128(image.data[i + 4].addr) - mask0 = mm_movemask_epi8(mm_cmpeq_epi8(values0, colorVec)) - mask1 = mm_movemask_epi8(mm_cmpeq_epi8(values1, colorVec)) - if mask0 != 0xffff or mask1 != 0xffff: + values2 = mm_loadu_si128(image.data[i + 8].addr) + values3 = mm_loadu_si128(image.data[i + 12].addr) + eq0 = mm_cmpeq_epi8(values0, colorVec) + eq1 = mm_cmpeq_epi8(values1, colorVec) + eq2 = mm_cmpeq_epi8(values2, colorVec) + eq3 = mm_cmpeq_epi8(values3, colorVec) + eq = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3)) + if mm_movemask_epi8(eq) != 0xffff: return false - i += 8 + i += 16 for j in i ..< image.data.len: if image.data[j] != color: From 82881ae75b65f162290432e65e53e2e03ab8687e Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:19:11 -0500 Subject: [PATCH 15/20] simpler, faster --- src/pixie/images.nim | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 072a8cb..e77f93c 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -262,7 +262,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = when defined(amd64) and allowSimd: let oddMask = mm_set1_epi16(cast[int16](0xff00)) - first32 = cast[M128i]([uint32.high, 0, 0, 0]) + mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high) for _ in countup(0, resultEvenWidth - 4, 2): let top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr) @@ -271,36 +271,36 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = btmShifted = mm_srli_si128(btm, 4) topEven = mm_andnot_si128(oddMask, top) - topOdd = mm_srli_epi16(mm_and_si128(top, oddMask), 8) + topOdd = mm_srli_epi16(top, 8) btmEven = mm_andnot_si128(oddMask, btm) - btmOdd = mm_srli_epi16(mm_and_si128(btm, oddMask), 8) + btmOdd = mm_srli_epi16(btm, 8) topShiftedEven = mm_andnot_si128(oddMask, topShifted) - topShiftedOdd = mm_srli_epi16(mm_and_si128(topShifted, oddMask), 8) + topShiftedOdd = mm_srli_epi16(topShifted, 8) btmShiftedEven = mm_andnot_si128(oddMask, btmShifted) - btmShiftedOdd = mm_srli_epi16(mm_and_si128(btmShifted, oddMask), 8) + btmShiftedOdd = mm_srli_epi16(btmShifted, 8) topAddedEven = mm_add_epi16(topEven, topShiftedEven) btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven) topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd) - bottomAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd) + btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd) addedEven = mm_add_epi16(topAddedEven, btmAddedEven) - addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd) + addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd) addedEvenDiv4 = mm_srli_epi16(addedEven, 2) addedOddDiv4 = mm_srli_epi16(addedOdd, 2) merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8)) + # Merged has the correct values for the next two pixels at + # index 0 and 2 so mask the others out and shift 0 and 2 into + # position and store + masked = mm_and_si128(merged, mergedMask) - # merged [0, 1, 2, 3] has the correct values for the next two pixels - # at index 0 and 2 so shift those into position and store - - zero = mm_and_si128(merged, first32) - two = mm_and_si128(mm_srli_si128(merged, 8), first32) - zeroTwo = mm_or_si128(zero, mm_slli_si128(two, 4)) - - mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, zeroTwo) + mm_storeu_si128( + result.data[result.dataIndex(x, y)].addr, + mm_shuffle_epi32(masked, MM_SHUFFLE(0, 0, 2, 0)) + ) x += 2 for x in x ..< resultEvenWidth: From 4870c8f283c0cc69edfd253404718ff8c6300053 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:21:49 -0500 Subject: [PATCH 16/20] simpler --- src/pixie/images.nim | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index e77f93c..2518cab 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -355,17 +355,14 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = when defined(amd64) and allowSimd: if scale == 2: while x <= image.width - 4: - let - values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr) - lo = mm_unpacklo_epi32(values, mm_setzero_si128()) - hi = mm_unpackhi_epi32(values, mm_setzero_si128()) + let values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr) mm_storeu_si128( result.data[result.dataIndex(x * scale + 0, y * scale)].addr, - mm_or_si128(lo, mm_slli_si128(lo, 4)) + mm_unpacklo_epi32(values, values) ) mm_storeu_si128( result.data[result.dataIndex(x * scale + 4, y * scale)].addr, - mm_or_si128(hi, mm_slli_si128(hi, 4)) + mm_unpackhi_epi32(values, values) ) x += 4 for x in x ..< image.width: From b302c9b38edd820c433c717d0335e36d50204c74 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:24:31 -0500 Subject: [PATCH 17/20] simpler --- src/pixie/images.nim | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 2518cab..8f58f38 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -415,22 +415,18 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: var - valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8) + valuesEven = mm_slli_epi16(values, 8) valuesOdd = mm_and_si128(values, oddMask) - # values * opacity valuesEven = mm_mulhi_epu16(valuesEven, opacityVec) valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec) - # div 255 valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) - valuesOdd = mm_slli_epi16(valuesOdd, 8) - mm_storeu_si128( target.data[index].addr, - mm_or_si128(valuesEven, valuesOdd) + mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8)) ) i += 16 From 67938a0597be7b837adbe9da6de2f8295b92092f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:28:15 -0500 Subject: [PATCH 18/20] f --- src/pixie/images.nim | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 8f58f38..d448c18 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -443,31 +443,35 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = for j in i ..< target.data.len: target.data[j] = ((target.data[j] * opacity) div 255).uint8 -proc invert*(target: Image) {.raises: [].} = +proc invert*(image: Image) {.raises: [].} = ## Inverts all of the colors and alpha. var i: int when defined(amd64) and allowSimd: let vec255 = mm_set1_epi8(cast[int8](255)) - let byteLen = target.data.len * 4 - for _ in 0 ..< byteLen div 16: - let index = i div 4 - var values = mm_loadu_si128(target.data[index].addr) - values = mm_sub_epi8(vec255, values) - mm_storeu_si128(target.data[index].addr, values) + for _ in 0 ..< image.data.len div 16: + let + a = mm_loadu_si128(image.data[i + 0].addr) + b = mm_loadu_si128(image.data[i + 4].addr) + c = mm_loadu_si128(image.data[i + 8].addr) + d = mm_loadu_si128(image.data[i + 12].addr) + mm_storeu_si128(image.data[i + 0].addr, mm_sub_epi8(vec255, a)) + mm_storeu_si128(image.data[i + 4].addr, mm_sub_epi8(vec255, b)) + mm_storeu_si128(image.data[i + 8].addr, mm_sub_epi8(vec255, c)) + mm_storeu_si128(image.data[i + 12].addr, mm_sub_epi8(vec255, d)) i += 16 - for j in i div 4 ..< target.data.len: - var rgba = target.data[j] - rgba.r = 255 - rgba.r - rgba.g = 255 - rgba.g - rgba.b = 255 - rgba.b - rgba.a = 255 - rgba.a - target.data[j] = rgba + for j in i ..< image.data.len: + var rgbx = image.data[j] + rgbx.r = 255 - rgbx.r + rgbx.g = 255 - rgbx.g + rgbx.b = 255 - rgbx.b + rgbx.a = 255 - rgbx.a + image.data[j] = rgbx # Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This # is not a valid premultiplied alpha color. # We need to convert back to premultiplied alpha after inverting. - target.data.toPremultipliedAlpha() + image.data.toPremultipliedAlpha() proc blur*( image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0) From f54d46bee0549011c17033d72f6d999c89b5ab93 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Mon, 20 Jun 2022 22:30:56 -0500 Subject: [PATCH 19/20] nimsimd bump --- pixie.nimble | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixie.nimble b/pixie.nimble index 94fa28e..767bf01 100644 --- a/pixie.nimble +++ b/pixie.nimble @@ -10,7 +10,7 @@ requires "vmath >= 1.1.4" requires "chroma >= 0.2.5" requires "zippy >= 0.10.0" requires "flatty >= 0.3.4" -requires "nimsimd >= 1.0.0" +requires "nimsimd >= 1.1.1" requires "bumpy >= 1.1.1" task bindings, "Generate bindings": From 1821c8ea9343aa672c6a1704f9bd51ed570bcf0e Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 21 Jun 2022 00:00:15 -0500 Subject: [PATCH 20/20] f --- src/pixie/images.nim | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index d448c18..250721f 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -412,18 +412,14 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = let index = i let values = mm_loadu_si128(target.data[index].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: var valuesEven = mm_slli_epi16(values, 8) valuesOdd = mm_and_si128(values, oddMask) - valuesEven = mm_mulhi_epu16(valuesEven, opacityVec) valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec) - valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) - mm_storeu_si128( target.data[index].addr, mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))