From ecdf9f2f7e69c8353826883acd9207fb7f45747d Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sat, 11 Dec 2021 23:18:59 -0600 Subject: [PATCH 01/13] benchmark cairo draw --- experiments/benchmark_cairo_draw.nim | 130 +++++++++++++++++++++++++++ experiments/svg_cairo.nim | 2 +- 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 experiments/benchmark_cairo_draw.nim diff --git a/experiments/benchmark_cairo_draw.nim b/experiments/benchmark_cairo_draw.nim new file mode 100644 index 0000000..9798b35 --- /dev/null +++ b/experiments/benchmark_cairo_draw.nim @@ -0,0 +1,130 @@ +import benchy, cairo, pixie + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw basic": + ctx.setSource(backdrop, 0, 0) + ctx.paint() + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "isOneColor": + doAssert not backdrop.isOneColor() + + timeIt "pixie draw basic": + tmp.draw(backdrop) + tmp.draw(source) + + # tmp.writeFile("tmp2.png") + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw smooth": + var + mat = mat3() + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(backdrop, 0, 0) + ctx.paint() + mat = rotate(15.toRadians) + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "pixie draw smooth": + tmp.draw(backdrop) + tmp.draw(source, translate(vec2(0.5, 0.5))) + + # tmp.writeFile("tmp2.png") + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw smooth rotated": + var + mat = mat3() + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(backdrop, 0, 0) + ctx.paint() + mat = rotate(15.toRadians) + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "pixie draw smooth rotated": + tmp.draw(backdrop) + tmp.draw(source, rotate(15.toRadians)) + + # tmp.writeFile("tmp2.png") diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index 916b01d..d246aa2 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -580,7 +580,7 @@ proc decodeSvg*(data: string, width = 0, height = 0): Image = let bgra = pixels[result.dataIndex(x, y)] rgba = rgba(bgra[2], bgra[1], bgra[0], bgra[3]) - result.setRgbaUnsafe(x, y, rgba.rgbx()) + result.unsafe[x, y] = rgba.rgbx() except PixieError as e: raise e except: From 720e5ae0195cd453cfd115ae3e264555f129bb3f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sat, 11 Dec 2021 23:24:00 -0600 Subject: [PATCH 02/13] morepretty --- experiments/svg_cairo.nim | 4 ++-- src/pixie/paths.nim | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index d246aa2..5dffdd1 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -1,7 +1,7 @@ ## Load and Save SVG files. -import cairo, chroma, pixie/common, pixie/images, pixie/paints, pixie/paths {.all.}, - strutils, tables, vmath, xmlparser, xmltree +import cairo, chroma, pixie/common, pixie/images, pixie/paints, + pixie/paths {.all.}, strutils, tables, vmath, xmlparser, xmltree include pixie/paths diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 5af66e1..c195d8e 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -2103,7 +2103,9 @@ when defined(pixieSweeps): line.winding = s[1] return line - proc intersectsYLine(y: float32, s: Segment, atx: var float32): bool {.inline.} = + proc intersectsYLine( + y: float32, s: Segment, atx: var float32 + ): bool {.inline.} = let s2y = s.to.y - s.at.y denominator = -s2y @@ -2372,11 +2374,11 @@ when defined(pixieSweeps): swX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracBottom) seX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracBottom) - minWi = min(nwX, swX).int#.clamp(startX, coverages.len + startX) - maxWi = max(nwX, swX).ceil.int#.clamp(startX, coverages.len + startX) + minWi = min(nwX, swX).int #.clamp(startX, coverages.len + startX) + maxWi = max(nwX, swX).ceil.int #.clamp(startX, coverages.len + startX) - minEi = min(neX, seX).int#.clamp(startX, coverages.len + startX) - maxEi = max(neX, seX).ceil.int#.clamp(startX, coverages.len + startX) + minEi = min(neX, seX).int #.clamp(startX, coverages.len + startX) + maxEi = max(neX, seX).ceil.int #.clamp(startX, coverages.len + startX) let nw = vec2(sweep[i+0].atx, cutLines[currCutLine]) From 790146e55378c86c2c08b6931b27c4fd30d66f95 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 00:28:17 -0600 Subject: [PATCH 03/13] faster --- src/pixie/paths.nim | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index c195d8e..71fbc14 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1394,25 +1394,29 @@ proc fillCoverage( x += 16 + proc applyCoverage(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} = + if coverage != 255: + result.r = ((rgbx.r.uint32 * coverage) div 255).uint8 + result.g = ((rgbx.g.uint32 * coverage) div 255).uint8 + result.b = ((rgbx.b.uint32 * coverage) div 255).uint8 + result.a = ((rgbx.a.uint32 * coverage) div 255).uint8 + else: + result = rgbx + let blender = blendMode.blender() - while x < startX + coverages.len: + for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == bmExcludeMask: if blendMode == bmNormal and coverage == 255 and rgbx.a == 255: # Skip blending image.unsafe[x, y] = rgbx else: - var source = rgbx - if coverage != 255: - source.r = ((source.r.uint32 * coverage) div 255).uint8 - source.g = ((source.g.uint32 * coverage) div 255).uint8 - source.b = ((source.b.uint32 * coverage) div 255).uint8 - source.a = ((source.a.uint32 * coverage) div 255).uint8 - let backdrop = image.unsafe[x, y] + let + source = rgbx.applyCoverage(coverage) + backdrop = image.unsafe[x, y] image.unsafe[x, y] = blender(backdrop, source) elif blendMode == bmMask: image.unsafe[x, y] = rgbx(0, 0, 0, 0) - inc x if blendMode == bmMask: image.clearUnsafe(0, y, startX, y) @@ -1446,14 +1450,13 @@ proc fillCoverage( x += 16 let masker = blendMode.masker() - while x < startX + coverages.len: + for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == bmExcludeMask: let backdrop = mask.unsafe[x, y] mask.unsafe[x, y] = masker(backdrop, coverage) elif blendMode == bmMask: mask.unsafe[x, y] = 0 - inc x if blendMode == bmMask: mask.clearUnsafe(0, y, startX, y) From 4f81ae9f818ec9e3c33fc1f1774a37efde6b25bf Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:08:40 -0600 Subject: [PATCH 04/13] f --- experiments/benchmark_cairo_draw.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/benchmark_cairo_draw.nim b/experiments/benchmark_cairo_draw.nim index 9798b35..0538d0c 100644 --- a/experiments/benchmark_cairo_draw.nim +++ b/experiments/benchmark_cairo_draw.nim @@ -52,7 +52,7 @@ block: ctx.setMatrix(matrix.unsafeAddr) ctx.setSource(backdrop, 0, 0) ctx.paint() - mat = rotate(15.toRadians) + mat = translate(vec2(0.5, 0.5)) matrix = cairo.Matrix( xx: mat[0, 0], yx: mat[0, 1], From a5a6ec10abd6da1c78be2270f4401211a397bafa Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:11:41 -0600 Subject: [PATCH 05/13] f --- experiments/svg_cairo.nim | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index 5dffdd1..b7f5cfb 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -1,7 +1,6 @@ ## Load and Save SVG files. -import cairo, chroma, pixie/common, pixie/images, pixie/paints, - pixie/paths {.all.}, strutils, tables, vmath, xmlparser, xmltree +import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils, tables, vmath, xmlparser, xmltree include pixie/paths From 152be5c902b3932313d9903c3ab5576f7add4dd2 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:12:06 -0600 Subject: [PATCH 06/13] f --- experiments/svg_cairo.nim | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index b7f5cfb..8bd5700 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -1,6 +1,7 @@ ## Load and Save SVG files. -import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils, tables, vmath, xmlparser, xmltree +import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils, + tables, vmath, xmlparser, xmltree include pixie/paths From 2e163e6e6c8598f0839c3cb852a947449fde6baa Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:15:05 -0600 Subject: [PATCH 07/13] move pixieSweeps --- experiments/sweeps4.nim | 426 +++++++++++++++++++++++++++++++ src/pixie/paths.nim | 539 ++++------------------------------------ 2 files changed, 479 insertions(+), 486 deletions(-) create mode 100644 experiments/sweeps4.nim diff --git a/experiments/sweeps4.nim b/experiments/sweeps4.nim new file mode 100644 index 0000000..6603843 --- /dev/null +++ b/experiments/sweeps4.nim @@ -0,0 +1,426 @@ + +when defined(pixieSweeps): + import algorithm + + proc pixelCover(a0, b0: Vec2): float32 = + ## Returns the amount of area a given segment sweeps to the right + ## in a [0,0 to 1,1] box. + var + a = a0 + b = b0 + aI: Vec2 + bI: Vec2 + area: float32 = 0.0 + + if (a.x < 0 and b.x < 0) or # Both to the left. + (a.x == b.x): # Vertical line + # Area of the rectangle: + return (1 - clamp(a.x, 0, 1)) * (min(b.y, 1) - max(a.y, 0)) + + else: + # y = mm*x + bb + let + mm: float32 = (b.y - a.y) / (b.x - a.x) + bb: float32 = a.y - mm * a.x + + if a.x >= 0 and a.x <= 1 and a.y >= 0 and a.y <= 1: + # A is in pixel bounds. + aI = a + else: + aI = vec2((0 - bb) / mm, 0) + if aI.x < 0: + let y = mm * 0 + bb + # Area of the extra rectangle. + area += (min(bb, 1) - max(a.y, 0)).clamp(0, 1) + aI = vec2(0, y.clamp(0, 1)) + elif aI.x > 1: + let y = mm * 1 + bb + aI = vec2(1, y.clamp(0, 1)) + + if b.x >= 0 and b.x <= 1 and b.y >= 0 and b.y <= 1: + # B is in pixel bounds. + bI = b + else: + bI = vec2((1 - bb) / mm, 1) + if bI.x < 0: + let y = mm * 0 + bb + # Area of the extra rectangle. + area += (min(b.y, 1) - max(bb, 0)).clamp(0, 1) + bI = vec2(0, y.clamp(0, 1)) + elif bI.x > 1: + let y = mm * 1 + bb + bI = vec2(1, y.clamp(0, 1)) + + area += ((1 - aI.x) + (1 - bI.x)) / 2 * (bI.y - aI.y) + return area + + proc intersectsInner*(a, b: Segment, at: var Vec2): bool {.inline.} = + ## Checks if the a segment intersects b segment. + ## If it returns true, at will have point of intersection + let + s1 = a.to - a.at + s2 = b.to - b.at + denominator = (-s2.x * s1.y + s1.x * s2.y) + s = (-s1.y * (a.at.x - b.at.x) + s1.x * (a.at.y - b.at.y)) / denominator + t = (s2.x * (a.at.y - b.at.y) - s2.y * (a.at.x - b.at.x)) / denominator + + if s > 0 and s < 1 and t > 0 and t < 1: + at = a.at + (t * s1) + return true + + type + + Trapezoid = object + nw, ne, se, sw: Vec2 + + SweepLine = object + #m, x, b: float32 + atx, tox: float32 + winding: int16 + + proc toLine(s: (Segment, int16)): SweepLine = + var line = SweepLine() + line.atx = s[0].at.x + line.tox = s[0].to.x + # y = mx + b + # line.m = (s.at.y - s.to.y) / (s.at.x - s.to.x) + # line.b = s.at.y - line.m * s.at.x + line.winding = s[1] + return line + + proc intersectsYLine( + y: float32, s: Segment, atx: var float32 + ): bool {.inline.} = + let + s2y = s.to.y - s.at.y + denominator = -s2y + numerator = s.at.y - y + u = numerator / denominator + if u >= 0 and u <= 1: + let at = s.at + (u * vec2(s.to.x - s.at.x, s2y)) + atx = at.x + return true + + proc binaryInsert(arr: var seq[float32], v: float32) = + if arr.len == 0: + arr.add(v) + return + var + L = 0 + R = arr.len - 1 + while L < R: + let m = (L + R) div 2 + if arr[m] ~= v: + return + elif arr[m] < v: + L = m + 1 + else: # arr[m] > v: + R = m - 1 + if arr[L] ~= v: + return + elif arr[L] > v: + arr.insert(v, L) + else: + arr.insert(v, L + 1) + + proc sortSegments(segments: var seq[(Segment, int16)], inl, inr: int) = + ## Quicksort + insertion sort, in-place and faster than standard lib sort. + + let n = inr - inl + 1 + if n < 32: # Use insertion sort for the rest + for i in inl + 1 .. inr: + var + j = i - 1 + k = i + while j >= 0 and segments[j][0].at.y > segments[k][0].at.y: + swap(segments[j + 1], segments[j]) + dec j + dec k + return + var + l = inl + r = inr + let p = segments[l + n div 2][0].at.y + while l <= r: + if segments[l][0].at.y < p: + inc l + elif segments[r][0].at.y > p: + dec r + else: + swap(segments[l], segments[r]) + inc l + dec r + sortSegments(segments, inl, r) + sortSegments(segments, l, inr) + + proc sortSweepLines(segments: var seq[SweepLine], inl, inr: int) = + ## Quicksort + insertion sort, in-place and faster than standard lib sort. + + proc avg(line: SweepLine): float32 {.inline.} = + (line.tox + line.atx) / 2.float32 + + let n = inr - inl + 1 + if n < 32: # Use insertion sort for the rest + for i in inl + 1 .. inr: + var + j = i - 1 + k = i + while j >= 0 and segments[j].avg > segments[k].avg: + swap(segments[j + 1], segments[j]) + dec j + dec k + return + var + l = inl + r = inr + let p = segments[l + n div 2].avg + while l <= r: + if segments[l].avg < p: + inc l + elif segments[r].avg > p: + dec r + else: + swap(segments[l], segments[r]) + inc l + dec r + sortSweepLines(segments, inl, r) + sortSweepLines(segments, l, inr) + + proc fillShapes( + image: Image, + shapes: seq[seq[Vec2]], + color: SomeColor, + windingRule: WindingRule, + blendMode: BlendMode + ) = + + let rgbx = color.rgbx + var segments = shapes.shapesToSegments() + let + bounds = computeBounds(segments).snapToPixels() + startX = max(0, bounds.x.int) + + if segments.len == 0 or bounds.w.int == 0 or bounds.h.int == 0: + return + + # const q = 1/10 + # for i in 0 ..< segments.len: + # segments[i][0].at.x = quantize(segments[i][0].at.x, q) + # segments[i][0].at.y = quantize(segments[i][0].at.y, q) + # segments[i][0].to.x = quantize(segments[i][0].to.x, q) + # segments[i][0].to.y = quantize(segments[i][0].to.y, q) + + # Create sorted segments. + segments.sortSegments(0, segments.high) + + # Compute cut lines + var cutLines: seq[float32] + for s in segments: + cutLines.binaryInsert(s[0].at.y) + cutLines.binaryInsert(s[0].to.y) + + var + # Dont add bottom cutLine. + sweeps = newSeq[seq[SweepLine]](cutLines.len - 1) + lastSeg = 0 + i = 0 + while i < sweeps.len: + + if lastSeg < segments.len: + + while segments[lastSeg][0].at.y == cutLines[i]: + let s = segments[lastSeg] + + if s[0].to.y != cutLines[i + 1]: + var atx: float32 + var seg = s[0] + for j in i ..< sweeps.len: + let y = cutLines[j + 1] + if intersectsYLine(y, seg, atx): + sweeps[j].add(toLine((segment(seg.at, vec2(atx, y)), s[1]))) + seg = segment(vec2(atx, y), seg.to) + else: + if seg.at.y != seg.to.y: + sweeps[j].add(toLine(s)) + break + else: + sweeps[i].add(toLine(s)) + + inc lastSeg + if lastSeg >= segments.len: + break + inc i + + # i = 0 + # while i < sweeps.len: + # # TODO: Maybe finds all cuts first, add them to array, cut all lines at once. + # var crossCuts: seq[float32] + + # # echo i, " cut?" + + # for aIndex in 0 ..< sweeps[i].len: + # let a = sweeps[i][aIndex] + # # echo i, ":", sweeps.len, ":", cutLines.len + # let aSeg = segment(vec2(a.atx, cutLines[i]), vec2(a.tox, cutLines[i+1])) + # for bIndex in aIndex + 1 ..< sweeps[i].len: + # let b = sweeps[i][bIndex] + # let bSeg = segment(vec2(b.atx, cutLines[i]), vec2(b.tox, cutLines[i+1])) + # var at: Vec2 + # if intersectsInner(aSeg, bSeg, at): + # crossCuts.binaryInsert(at.y) + + # if crossCuts.len > 0: + # var + # thisSweep = sweeps[i] + # yTop = cutLines[i] + # yBottom = cutLines[i + 1] + # sweeps[i].setLen(0) + + # for k in crossCuts: + # let prevLen = cutLines.len + # cutLines.binaryInsert(k) + # if prevLen != cutLines.len: + # sweeps.insert(newSeq[SweepLine](), i + 1) + + # for a in thisSweep: + # var seg = segment(vec2(a.atx, yTop), vec2(a.tox, yBottom)) + # var at: Vec2 + # for j, cutterLine in crossCuts: + # if intersects(line(vec2(0, cutterLine), vec2(1, cutterLine)), seg, at): + # sweeps[i+j].add(toLine((segment(seg.at, at), a.winding))) + # seg = segment(at, seg.to) + # sweeps[i+crossCuts.len].add(toLine((seg, a.winding))) + + # i += crossCuts.len + + # inc i + + i = 0 + while i < sweeps.len: + # Sort the sweep by X + sweeps[i].sortSweepLines(0, sweeps[i].high) + # Do winding order + var + pen = 0 + prevFill = false + j = 0 + while j < sweeps[i].len: + let a = sweeps[i][j] + if a.winding == 1: + inc pen + if a.winding == -1: + dec pen + let thisFill = shouldFill(windingRule, pen) + if prevFill == thisFill: + # Remove this sweep line. + sweeps[i].delete(j) + continue + prevFill = thisFill + inc j + inc i + + # Used to debug sweeps: + # for s in 0 ..< sweeps.len: + # let + # y1 = cutLines[s] + # echo "M -100 ", y1 + # echo "L 300 ", y1 + # for line in sweeps[s]: + # let + # nw = vec2(line.atx, cutLines[s]) + # sw = vec2(line.tox, cutLines[s + 1]) + # echo "M ", nw.x, " ", nw.y + # echo "L ", sw.x, " ", sw.y + + proc computeCoverage( + coverages: var seq[uint16], + y: int, + startX: int, + cutLines: seq[float32], + currCutLine: int, + sweep: seq[SweepLine] + ) = + + if cutLines[currCutLine + 1] - cutLines[currCutLine] < 1/256: + # TODO some thing about micro sweeps + return + + let + sweepHeight = cutLines[currCutLine + 1] - cutLines[currCutLine] + yFracTop = ((y.float32 - cutLines[currCutLine]) / sweepHeight).clamp(0, 1) + yFracBottom = ((y.float32 + 1 - cutLines[currCutLine]) / + sweepHeight).clamp(0, 1) + var i = 0 + while i < sweep.len: + let + nwX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracTop) + neX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracTop) + + swX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracBottom) + seX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracBottom) + + minWi = min(nwX, swX).int #.clamp(startX, coverages.len + startX) + maxWi = max(nwX, swX).ceil.int #.clamp(startX, coverages.len + startX) + + minEi = min(neX, seX).int #.clamp(startX, coverages.len + startX) + maxEi = max(neX, seX).ceil.int #.clamp(startX, coverages.len + startX) + + let + nw = vec2(sweep[i+0].atx, cutLines[currCutLine]) + sw = vec2(sweep[i+0].tox, cutLines[currCutLine + 1]) + f16 = (256 * 256 - 1).float32 + for x in minWi ..< maxWi: + var area = pixelCover( + nw - vec2(x.float32, y.float32), + sw - vec2(x.float32, y.float32) + ) + coverages[x - startX] += (area * f16).uint16 + + let x = maxWi + var midArea = pixelCover( + nw - vec2(x.float32, y.float32), + sw - vec2(x.float32, y.float32) + ) + for x in maxWi ..< maxEi: + coverages[x - startX] += (midArea * f16).uint16 + + let + ne = vec2(sweep[i+1].atx, cutLines[currCutLine]) + se = vec2(sweep[i+1].tox, cutLines[currCutLine + 1]) + for x in minEi ..< maxEi: + var area = pixelCover( + ne - vec2(x.float32, y.float32), + se - vec2(x.float32, y.float32) + ) + coverages[x - startX] -= (area * f16).uint16 + + i += 2 + + var + currCutLine = 0 + coverages16 = newSeq[uint16](bounds.w.int) + coverages8 = newSeq[uint8](bounds.w.int) + for scanLine in max(cutLines[0].int, 0) ..< min(cutLines[^1].ceil.int, image.height): + + zeroMem(coverages16[0].addr, coverages16.len * 2) + + coverages16.computeCoverage( + scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) + while cutLines[currCutLine + 1] < scanLine.float + 1.0: + inc currCutLine + if currCutLine == sweeps.len: + break + coverages16.computeCoverage( + scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) + + for i in 0 ..< coverages16.len: + coverages8[i] = (coverages16[i] shr 8).uint8 + image.fillCoverage( + rgbx, + startX = startX, + y = scanLine, + coverages8, + blendMode + ) + +else: diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 71fbc14..4e655cd 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1580,7 +1580,59 @@ proc fillShapes( color: SomeColor, windingRule: WindingRule, blendMode: BlendMode -) +) = + # Figure out the total bounds of all the shapes, + # rasterize only within the total bounds + let + rgbx = color.asRgbx() + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startX = max(0, bounds.x.int) + startY = max(0, bounds.y.int) + pathHeight = min(image.height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight - startY) + + var + coverages = newSeq[uint8](bounds.w.int) + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](coverages[0].addr), + hits, + numHits, + aa, + image.width.float32, + y, + startX, + partitioning, + windingRule + ) + if aa: + image.fillCoverage( + rgbx, + startX, + y, + coverages, + blendMode + ) + zeroMem(coverages[0].addr, coverages.len) + else: + image.fillHits( + rgbx, + startX, + y, + hits, + numHits, + windingRule, + blendMode + ) + + if blendMode == bmMask: + image.clearUnsafe(0, 0, 0, startY) + image.clearUnsafe(0, pathHeight, 0, image.height) proc fillShapes( mask: Mask, @@ -2017,491 +2069,6 @@ proc strokeOverlaps*( strokeShapes.transform(transform) strokeShapes.overlaps(test, wrNonZero) -when defined(pixieSweeps): - import algorithm - - proc pixelCover(a0, b0: Vec2): float32 = - ## Returns the amount of area a given segment sweeps to the right - ## in a [0,0 to 1,1] box. - var - a = a0 - b = b0 - aI: Vec2 - bI: Vec2 - area: float32 = 0.0 - - if (a.x < 0 and b.x < 0) or # Both to the left. - (a.x == b.x): # Vertical line - # Area of the rectangle: - return (1 - clamp(a.x, 0, 1)) * (min(b.y, 1) - max(a.y, 0)) - - else: - # y = mm*x + bb - let - mm: float32 = (b.y - a.y) / (b.x - a.x) - bb: float32 = a.y - mm * a.x - - if a.x >= 0 and a.x <= 1 and a.y >= 0 and a.y <= 1: - # A is in pixel bounds. - aI = a - else: - aI = vec2((0 - bb) / mm, 0) - if aI.x < 0: - let y = mm * 0 + bb - # Area of the extra rectangle. - area += (min(bb, 1) - max(a.y, 0)).clamp(0, 1) - aI = vec2(0, y.clamp(0, 1)) - elif aI.x > 1: - let y = mm * 1 + bb - aI = vec2(1, y.clamp(0, 1)) - - if b.x >= 0 and b.x <= 1 and b.y >= 0 and b.y <= 1: - # B is in pixel bounds. - bI = b - else: - bI = vec2((1 - bb) / mm, 1) - if bI.x < 0: - let y = mm * 0 + bb - # Area of the extra rectangle. - area += (min(b.y, 1) - max(bb, 0)).clamp(0, 1) - bI = vec2(0, y.clamp(0, 1)) - elif bI.x > 1: - let y = mm * 1 + bb - bI = vec2(1, y.clamp(0, 1)) - - area += ((1 - aI.x) + (1 - bI.x)) / 2 * (bI.y - aI.y) - return area - - proc intersectsInner*(a, b: Segment, at: var Vec2): bool {.inline.} = - ## Checks if the a segment intersects b segment. - ## If it returns true, at will have point of intersection - let - s1 = a.to - a.at - s2 = b.to - b.at - denominator = (-s2.x * s1.y + s1.x * s2.y) - s = (-s1.y * (a.at.x - b.at.x) + s1.x * (a.at.y - b.at.y)) / denominator - t = (s2.x * (a.at.y - b.at.y) - s2.y * (a.at.x - b.at.x)) / denominator - - if s > 0 and s < 1 and t > 0 and t < 1: - at = a.at + (t * s1) - return true - - type - - Trapezoid = object - nw, ne, se, sw: Vec2 - - SweepLine = object - #m, x, b: float32 - atx, tox: float32 - winding: int16 - - proc toLine(s: (Segment, int16)): SweepLine = - var line = SweepLine() - line.atx = s[0].at.x - line.tox = s[0].to.x - # y = mx + b - # line.m = (s.at.y - s.to.y) / (s.at.x - s.to.x) - # line.b = s.at.y - line.m * s.at.x - line.winding = s[1] - return line - - proc intersectsYLine( - y: float32, s: Segment, atx: var float32 - ): bool {.inline.} = - let - s2y = s.to.y - s.at.y - denominator = -s2y - numerator = s.at.y - y - u = numerator / denominator - if u >= 0 and u <= 1: - let at = s.at + (u * vec2(s.to.x - s.at.x, s2y)) - atx = at.x - return true - - proc binaryInsert(arr: var seq[float32], v: float32) = - if arr.len == 0: - arr.add(v) - return - var - L = 0 - R = arr.len - 1 - while L < R: - let m = (L + R) div 2 - if arr[m] ~= v: - return - elif arr[m] < v: - L = m + 1 - else: # arr[m] > v: - R = m - 1 - if arr[L] ~= v: - return - elif arr[L] > v: - arr.insert(v, L) - else: - arr.insert(v, L + 1) - - proc sortSegments(segments: var seq[(Segment, int16)], inl, inr: int) = - ## Quicksort + insertion sort, in-place and faster than standard lib sort. - - let n = inr - inl + 1 - if n < 32: # Use insertion sort for the rest - for i in inl + 1 .. inr: - var - j = i - 1 - k = i - while j >= 0 and segments[j][0].at.y > segments[k][0].at.y: - swap(segments[j + 1], segments[j]) - dec j - dec k - return - var - l = inl - r = inr - let p = segments[l + n div 2][0].at.y - while l <= r: - if segments[l][0].at.y < p: - inc l - elif segments[r][0].at.y > p: - dec r - else: - swap(segments[l], segments[r]) - inc l - dec r - sortSegments(segments, inl, r) - sortSegments(segments, l, inr) - - proc sortSweepLines(segments: var seq[SweepLine], inl, inr: int) = - ## Quicksort + insertion sort, in-place and faster than standard lib sort. - - proc avg(line: SweepLine): float32 {.inline.} = - (line.tox + line.atx) / 2.float32 - - let n = inr - inl + 1 - if n < 32: # Use insertion sort for the rest - for i in inl + 1 .. inr: - var - j = i - 1 - k = i - while j >= 0 and segments[j].avg > segments[k].avg: - swap(segments[j + 1], segments[j]) - dec j - dec k - return - var - l = inl - r = inr - let p = segments[l + n div 2].avg - while l <= r: - if segments[l].avg < p: - inc l - elif segments[r].avg > p: - dec r - else: - swap(segments[l], segments[r]) - inc l - dec r - sortSweepLines(segments, inl, r) - sortSweepLines(segments, l, inr) - - proc fillShapes( - image: Image, - shapes: seq[seq[Vec2]], - color: SomeColor, - windingRule: WindingRule, - blendMode: BlendMode - ) = - - let rgbx = color.rgbx - var segments = shapes.shapesToSegments() - let - bounds = computeBounds(segments).snapToPixels() - startX = max(0, bounds.x.int) - - if segments.len == 0 or bounds.w.int == 0 or bounds.h.int == 0: - return - - # const q = 1/10 - # for i in 0 ..< segments.len: - # segments[i][0].at.x = quantize(segments[i][0].at.x, q) - # segments[i][0].at.y = quantize(segments[i][0].at.y, q) - # segments[i][0].to.x = quantize(segments[i][0].to.x, q) - # segments[i][0].to.y = quantize(segments[i][0].to.y, q) - - # Create sorted segments. - segments.sortSegments(0, segments.high) - - # Compute cut lines - var cutLines: seq[float32] - for s in segments: - cutLines.binaryInsert(s[0].at.y) - cutLines.binaryInsert(s[0].to.y) - - var - # Dont add bottom cutLine. - sweeps = newSeq[seq[SweepLine]](cutLines.len - 1) - lastSeg = 0 - i = 0 - while i < sweeps.len: - - if lastSeg < segments.len: - - while segments[lastSeg][0].at.y == cutLines[i]: - let s = segments[lastSeg] - - if s[0].to.y != cutLines[i + 1]: - var atx: float32 - var seg = s[0] - for j in i ..< sweeps.len: - let y = cutLines[j + 1] - if intersectsYLine(y, seg, atx): - sweeps[j].add(toLine((segment(seg.at, vec2(atx, y)), s[1]))) - seg = segment(vec2(atx, y), seg.to) - else: - if seg.at.y != seg.to.y: - sweeps[j].add(toLine(s)) - break - else: - sweeps[i].add(toLine(s)) - - inc lastSeg - if lastSeg >= segments.len: - break - inc i - - # i = 0 - # while i < sweeps.len: - # # TODO: Maybe finds all cuts first, add them to array, cut all lines at once. - # var crossCuts: seq[float32] - - # # echo i, " cut?" - - # for aIndex in 0 ..< sweeps[i].len: - # let a = sweeps[i][aIndex] - # # echo i, ":", sweeps.len, ":", cutLines.len - # let aSeg = segment(vec2(a.atx, cutLines[i]), vec2(a.tox, cutLines[i+1])) - # for bIndex in aIndex + 1 ..< sweeps[i].len: - # let b = sweeps[i][bIndex] - # let bSeg = segment(vec2(b.atx, cutLines[i]), vec2(b.tox, cutLines[i+1])) - # var at: Vec2 - # if intersectsInner(aSeg, bSeg, at): - # crossCuts.binaryInsert(at.y) - - # if crossCuts.len > 0: - # var - # thisSweep = sweeps[i] - # yTop = cutLines[i] - # yBottom = cutLines[i + 1] - # sweeps[i].setLen(0) - - # for k in crossCuts: - # let prevLen = cutLines.len - # cutLines.binaryInsert(k) - # if prevLen != cutLines.len: - # sweeps.insert(newSeq[SweepLine](), i + 1) - - # for a in thisSweep: - # var seg = segment(vec2(a.atx, yTop), vec2(a.tox, yBottom)) - # var at: Vec2 - # for j, cutterLine in crossCuts: - # if intersects(line(vec2(0, cutterLine), vec2(1, cutterLine)), seg, at): - # sweeps[i+j].add(toLine((segment(seg.at, at), a.winding))) - # seg = segment(at, seg.to) - # sweeps[i+crossCuts.len].add(toLine((seg, a.winding))) - - # i += crossCuts.len - - # inc i - - i = 0 - while i < sweeps.len: - # Sort the sweep by X - sweeps[i].sortSweepLines(0, sweeps[i].high) - # Do winding order - var - pen = 0 - prevFill = false - j = 0 - while j < sweeps[i].len: - let a = sweeps[i][j] - if a.winding == 1: - inc pen - if a.winding == -1: - dec pen - let thisFill = shouldFill(windingRule, pen) - if prevFill == thisFill: - # Remove this sweep line. - sweeps[i].delete(j) - continue - prevFill = thisFill - inc j - inc i - - # Used to debug sweeps: - # for s in 0 ..< sweeps.len: - # let - # y1 = cutLines[s] - # echo "M -100 ", y1 - # echo "L 300 ", y1 - # for line in sweeps[s]: - # let - # nw = vec2(line.atx, cutLines[s]) - # sw = vec2(line.tox, cutLines[s + 1]) - # echo "M ", nw.x, " ", nw.y - # echo "L ", sw.x, " ", sw.y - - proc computeCoverage( - coverages: var seq[uint16], - y: int, - startX: int, - cutLines: seq[float32], - currCutLine: int, - sweep: seq[SweepLine] - ) = - - if cutLines[currCutLine + 1] - cutLines[currCutLine] < 1/256: - # TODO some thing about micro sweeps - return - - let - sweepHeight = cutLines[currCutLine + 1] - cutLines[currCutLine] - yFracTop = ((y.float32 - cutLines[currCutLine]) / sweepHeight).clamp(0, 1) - yFracBottom = ((y.float32 + 1 - cutLines[currCutLine]) / - sweepHeight).clamp(0, 1) - var i = 0 - while i < sweep.len: - let - nwX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracTop) - neX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracTop) - - swX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracBottom) - seX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracBottom) - - minWi = min(nwX, swX).int #.clamp(startX, coverages.len + startX) - maxWi = max(nwX, swX).ceil.int #.clamp(startX, coverages.len + startX) - - minEi = min(neX, seX).int #.clamp(startX, coverages.len + startX) - maxEi = max(neX, seX).ceil.int #.clamp(startX, coverages.len + startX) - - let - nw = vec2(sweep[i+0].atx, cutLines[currCutLine]) - sw = vec2(sweep[i+0].tox, cutLines[currCutLine + 1]) - f16 = (256 * 256 - 1).float32 - for x in minWi ..< maxWi: - var area = pixelCover( - nw - vec2(x.float32, y.float32), - sw - vec2(x.float32, y.float32) - ) - coverages[x - startX] += (area * f16).uint16 - - let x = maxWi - var midArea = pixelCover( - nw - vec2(x.float32, y.float32), - sw - vec2(x.float32, y.float32) - ) - for x in maxWi ..< maxEi: - coverages[x - startX] += (midArea * f16).uint16 - - let - ne = vec2(sweep[i+1].atx, cutLines[currCutLine]) - se = vec2(sweep[i+1].tox, cutLines[currCutLine + 1]) - for x in minEi ..< maxEi: - var area = pixelCover( - ne - vec2(x.float32, y.float32), - se - vec2(x.float32, y.float32) - ) - coverages[x - startX] -= (area * f16).uint16 - - i += 2 - - var - currCutLine = 0 - coverages16 = newSeq[uint16](bounds.w.int) - coverages8 = newSeq[uint8](bounds.w.int) - for scanLine in max(cutLines[0].int, 0) ..< min(cutLines[^1].ceil.int, image.height): - - zeroMem(coverages16[0].addr, coverages16.len * 2) - - coverages16.computeCoverage( - scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) - while cutLines[currCutLine + 1] < scanLine.float + 1.0: - inc currCutLine - if currCutLine == sweeps.len: - break - coverages16.computeCoverage( - scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) - - for i in 0 ..< coverages16.len: - coverages8[i] = (coverages16[i] shr 8).uint8 - image.fillCoverage( - rgbx, - startX = startX, - y = scanLine, - coverages8, - blendMode - ) - -else: - proc fillShapes( - image: Image, - shapes: seq[seq[Vec2]], - color: SomeColor, - windingRule: WindingRule, - blendMode: BlendMode - ) = - # Figure out the total bounds of all the shapes, - # rasterize only within the total bounds - let - rgbx = color.asRgbx() - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startX = max(0, bounds.x.int) - startY = max(0, bounds.y.int) - pathHeight = min(image.height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight - startY) - - var - coverages = newSeq[uint8](bounds.w.int) - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](coverages[0].addr), - hits, - numHits, - aa, - image.width.float32, - y, - startX, - partitioning, - windingRule - ) - if aa: - image.fillCoverage( - rgbx, - startX, - y, - coverages, - blendMode - ) - zeroMem(coverages[0].addr, coverages.len) - else: - image.fillHits( - rgbx, - startX, - y, - hits, - numHits, - windingRule, - blendMode - ) - - if blendMode == bmMask: - image.clearUnsafe(0, 0, 0, startY) - image.clearUnsafe(0, pathHeight, 0, image.height) - proc fillMask( shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero ): Mask = From afc7e8d8162446b99163b2398f5e91f8d6affc26 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:19:07 -0600 Subject: [PATCH 08/13] move fillImage/fillMask strokeImage/strokeMask --- experiments/benchmark_cairo.nim | 168 ++++++++++++++++++++++++++++++++ src/pixie/paths.nim | 161 ------------------------------ 2 files changed, 168 insertions(+), 161 deletions(-) diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index 4afb63d..80ff9ef 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -1,10 +1,178 @@ import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2, pixie/internal + proc doDiff(a, b: Image, name: string) = let (diffScore, diffImage) = diff(a, b) echo &"{name} score: {diffScore}" diffImage.writeFile(&"{name}_diff.png") +when defined(release): + {.push checks: off.} + +proc fillMask( + shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero +): Mask = + result = newMask(width, height) + + let + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startY = max(0, bounds.y.int) + pathHeight = min(height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight) + width = width.float32 + + var + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), + hits, + numHits, + aa, + width, + y, + 0, + partitioning, + windingRule + ) + if not aa: + for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): + let + startIndex = result.dataIndex(prevAt.int, y) + len = at.int - prevAt.int + fillUnsafe(result.data, 255, startIndex, len) + +proc fillMask*( + path: SomePath, width, height: int, windingRule = wrNonZero +): Mask = + ## Returns a new mask with the path filled. This is a faster alternative + ## to `newMask` + `fillPath`. + let shapes = parseSomePath(path, true, 1) + shapes.fillMask(width, height, windingRule) + +proc fillImage( + shapes: seq[seq[Vec2]], + width, height: int, + color: SomeColor, + windingRule = wrNonZero +): Image = + result = newImage(width, height) + + let + mask = shapes.fillMask(width, height, windingRule) + rgbx = color.rgbx() + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + colorVec = mm_set1_epi32(cast[int32](rgbx)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vec255 = mm_set1_epi32(cast[int32](uint32.high)) + vecZero = mm_setzero_si128() + colorVecEven = mm_slli_epi16(colorVec, 8) + colorVecOdd = mm_and_si128(colorVec, oddMask) + iterations = result.data.len div 16 + for _ in 0 ..< iterations: + var coverageVec = mm_loadu_si128(mask.data[i].addr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: + if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: + for q in [0, 4, 8, 12]: + mm_storeu_si128(result.data[i + q].addr, colorVec) + else: + for q in [0, 4, 8, 12]: + var unpacked = unpackAlphaValues(coverageVec) + # Shift the coverages from `a` to `g` and `a` for multiplying + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) + + var + sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) + sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) + sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) + sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) + + mm_storeu_si128( + result.data[i + q].addr, + mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) + ) + + coverageVec = mm_srli_si128(coverageVec, 4) + + i += 16 + + let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] + for i in i ..< result.data.len: + let coverage = mask.data[i] + if coverage == 255: + result.data[i] = rgbx + elif coverage != 0: + result.data[i].r = ((channels[0] * coverage) div 255).uint8 + result.data[i].g = ((channels[1] * coverage) div 255).uint8 + result.data[i].b = ((channels[2] * coverage) div 255).uint8 + result.data[i].a = ((channels[3] * coverage) div 255).uint8 + +proc fillImage*( + path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero +): Image = + ## Returns a new image with the path filled. This is a faster alternative + ## to `newImage` + `fillPath`. + let shapes = parseSomePath(path, false, 1) + shapes.fillImage(width, height, color, windingRule) + +proc strokeMask*( + path: SomePath, + width, height: int, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Mask = + ## Returns a new mask with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillMask(width, height, wrNonZero) + +proc strokeImage*( + path: SomePath, + width, height: int, + color: SomeColor, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Image = + ## Returns a new image with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillImage(width, height, color, wrNonZero) + +when defined(release): + {.pop.} + + block: let path = newPath() path.moveTo(0, 0) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 4e655cd..14e9674 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -2068,164 +2068,3 @@ proc strokeOverlaps*( ) strokeShapes.transform(transform) strokeShapes.overlaps(test, wrNonZero) - -proc fillMask( - shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero -): Mask = - result = newMask(width, height) - - let - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startY = max(0, bounds.y.int) - pathHeight = min(height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight) - width = width.float32 - - var - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), - hits, - numHits, - aa, - width, - y, - 0, - partitioning, - windingRule - ) - if not aa: - for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): - let - startIndex = result.dataIndex(prevAt.int, y) - len = at.int - prevAt.int - fillUnsafe(result.data, 255, startIndex, len) - -proc fillMask*( - path: SomePath, width, height: int, windingRule = wrNonZero -): Mask = - ## Returns a new mask with the path filled. This is a faster alternative - ## to `newMask` + `fillPath`. - let shapes = parseSomePath(path, true, 1) - shapes.fillMask(width, height, windingRule) - -proc fillImage( - shapes: seq[seq[Vec2]], - width, height: int, - color: SomeColor, - windingRule = wrNonZero -): Image = - result = newImage(width, height) - - let - mask = shapes.fillMask(width, height, windingRule) - rgbx = color.rgbx() - - var i: int - when defined(amd64) and not defined(pixieNoSimd): - let - colorVec = mm_set1_epi32(cast[int32](rgbx)) - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - vec255 = mm_set1_epi32(cast[int32](uint32.high)) - vecZero = mm_setzero_si128() - colorVecEven = mm_slli_epi16(colorVec, 8) - colorVecOdd = mm_and_si128(colorVec, oddMask) - iterations = result.data.len div 16 - for _ in 0 ..< iterations: - var coverageVec = mm_loadu_si128(mask.data[i].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: - if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: - for q in [0, 4, 8, 12]: - mm_storeu_si128(result.data[i + q].addr, colorVec) - else: - for q in [0, 4, 8, 12]: - var unpacked = unpackAlphaValues(coverageVec) - # Shift the coverages from `a` to `g` and `a` for multiplying - unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) - - var - sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) - sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) - sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) - sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) - - mm_storeu_si128( - result.data[i + q].addr, - mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - ) - - coverageVec = mm_srli_si128(coverageVec, 4) - - i += 16 - - let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] - for i in i ..< result.data.len: - let coverage = mask.data[i] - if coverage == 255: - result.data[i] = rgbx - elif coverage != 0: - result.data[i].r = ((channels[0] * coverage) div 255).uint8 - result.data[i].g = ((channels[1] * coverage) div 255).uint8 - result.data[i].b = ((channels[2] * coverage) div 255).uint8 - result.data[i].a = ((channels[3] * coverage) div 255).uint8 - -proc fillImage*( - path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero -): Image = - ## Returns a new image with the path filled. This is a faster alternative - ## to `newImage` + `fillPath`. - let shapes = parseSomePath(path, false, 1) - shapes.fillImage(width, height, color, windingRule) - -proc strokeMask*( - path: SomePath, - width, height: int, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Mask = - ## Returns a new mask with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillMask(width, height, wrNonZero) - -proc strokeImage*( - path: SomePath, - width, height: int, - color: SomeColor, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Image = - ## Returns a new image with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillImage(width, height, color, wrNonZero) - -when defined(release): - {.pop.} From 97ed037d2aff722dbd296907555247fa0f05cba9 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:21:21 -0600 Subject: [PATCH 09/13] f --- src/pixie/blends.nim | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index f4777c9..371b50f 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -502,8 +502,6 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} = raise newException(PixieError, "No masker for " & $blendMode) when defined(amd64) and not defined(pixieNoSimd): - import nimsimd/sse2 - type BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].} ## Function signature returned by blenderSimd. From 5da824123122e24118c1d48b0d14ce719a6a06b3 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:21:54 -0600 Subject: [PATCH 10/13] rm --- tests/paths/pathHeart2.png | Bin 2440 -> 0 bytes tests/test_paths.nim | 10 ---------- 2 files changed, 10 deletions(-) delete mode 100644 tests/paths/pathHeart2.png diff --git a/tests/paths/pathHeart2.png b/tests/paths/pathHeart2.png deleted file mode 100644 index 560cd6648020522cf54e28e701015f4cfddca69c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2440 zcmV;333v91P)$hbEa z*2`9uRqk(S*?YEo*?b~%et8vg?4}jN6E{p-6$}Jk&@^op09@^KLsA_8_(s6*eH{rm_DpTTNE#k9yC+Q z_i~9Rdw$*IMacQ7Yu0K$?O6aA(H%HnP)yL^{+f{9VcJ%5AaP~SuO~SPIWlE=$@+?E zztc4BM?HhO0RYU1h4e{_%PMz1*|PV7NE@=}myVo-tpC#V=Rwo%OBdJ;0N^Vl{Qe5l zPQEA-U-tacF<>DVzXm+mGx`DmUlsZKl0Cov7?6;TX{+xB(6#^o^cUD=rpF^y`%K$9 zn64e!^UH)|A#~_CY?%K~09t9Lz;4p@L72L9h{NXTPf zj?&&SM4EUP2#`)z{qqTy=TX6*A{1I*KhUWL-t{`v7pKmm6CY@WH<|pLV^>`I)xWEV4er zLgL}*G61+yB&g>g`!6t zu#k8}Ukw0Pi3If?JO}Ke5NY!>ZR-~z+eKJNEMnBrVWvn>uR(?Zc1eh|`I)xWAhKPA zg~TJqS~_VY64YzxgF+9l+Z6}tT#nEYs<@DFbP)iI5>G<$Dacw(JNffZUuQsENW}Oq z08GdbOVK}vOgs5(|8IYWu#i|pr*q}keO6T=3NoZq1zSay7Zwr^8#4jm7LlNeJ|ONg zZR@2zrD+is5(`If2LL^76h%f5*&j_i`R9xab>TpWelq~f>Z+%303z-%ZEKH66=5OO zAtR(|aF0k(MQ=JeK3AKv-sxSM9$_JC%O{`k`2x$+hf!qWLTCF=>W=I=nQ57BgoRMR zSlFNu-z&Q-E3$)#?-w_%H?uE!31J~bJYw8LlcwppLYVEQofJC4A{+^+iRfR@0OnCe zg&3US##-CjFSayuCS>iz$;VYc;*1Ly+LzJSy=W~*dk6(q{fIbD`wzmA5JL4Mj&Rg} z5YB~!s?LL^eUB)^4Y3?pO%1qO9^@!0-Pd0MMyZMIR8ct*yO%;Xp209gq-e zC?3|osD|VF0r9uPttYni^KM24B!q}X^i}|E2|ZWz1{q&$+Sak&rD6m9LEO`{vS($msP@1(CmV zti64n=$(l~$-59ru)6$?MgZ+W00@dEQb-`;nawqxF%Bd zfd<-;oT0)Qf4I2maiO`oPH{yDQEOXo55@k3`i)@Yzy}Yr;hGR)sP!jq^&35LRmf24 zPu%P`dg7`Og8Gx{kp6z5&z$e%xUl&3#~W##+{ASu1Wm=S3F)T`b!dfizOtw(@eHkt z+jt^`pbJ)NLIz#FTu@hGagGha#>9Zn06N7pAp~94RY*r(myW0suHOQ;C!Pu+ZnVek zye>S7E^D}+Udswc+_9ta@PZfE0rWCQ&xOzmJ`}q452b8`0RUkyS5_3xc~dJMou{XM zpO<^iJHc}y#KzSA3(Ky)WqU9Xq**KDs7w|(;=Yq@r)Q7fz2(ecZEVJs%^QwWj)TFV2_X`#)|u61(d|AzqdjE|l?@h%c%b7f%oy?4 zMBWDsoiM0Ei29b~sg+}@w)ul>Dgcb9GOyvto6nxcnWfE%54~=?FM}?Gp!@DF9eLZ< zl49Q!ukQ-SIsWC7TEyEZ1Z{BuAX>bw%+(2b+!#QrZTTaan z|MO=TZ^SGE(g-0o9dGW`?Bt8lus#j|ZlyA}K;&EM8WRieIo>SvUnE{3l@Nk12r_N! zWjBXe_@&{HA5&Y@rc6jDq#GJ$abrM|1`5T;rkLmWJa5qfrfu!1E;mvD>`nmiJE;0O z5b+%6Y;jRj;;GNQDG^c&A!<{}zsEwxs|=8&jHEKBAo5n^@U5bz`j?&7^&07g5N0ab z^613qRt?w`04S$2Q8>q4=R4t>C9l`ND_UnTDF`7%{gLG9`-Y8ue)RBBV*udmRF(}y z{`7nQIduPoH#eWiwyZ1^j1c0NA06m4ZEI(BxzP&1X7|^xboCdH1RE13)uFnDLO}`X zrZ$y)tG|AwM}P5B_wt2AK?)(#^(%$eU!LaLMp$9#u zvO=cdg$y;65Xn$NBtr?23?)P|ln}{KLL@^8kqjk7GX4)0^0JDb(*ceE0000 Date: Sun, 12 Dec 2021 18:23:31 -0600 Subject: [PATCH 11/13] add overwrite benchmark --- experiments/benchmark_cairo.nim | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index 80ff9ef..ea66bdc 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -357,6 +357,23 @@ block: # doDiff(readImage("cairo4.png"), a, "4") + var b: Image + let paint = newPaint(pkSolid) + paint.color = color(1, 0, 0, 0.5) + paint.blendMode = bmOverwrite + + timeIt "pixie4 overwrite": + b = newImage(1000, 1000) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + b.fillPath(p, paint) + + # b.writeFile("b.png") + timeIt "pixie4 mask": let mask = newMask(1000, 1000) From 7abe128124f9bd2f8b58d95b1753d46f54014543 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:29:18 -0600 Subject: [PATCH 12/13] f --- src/pixie/paths.nim | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 14e9674..f98cae0 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1394,15 +1394,6 @@ proc fillCoverage( x += 16 - proc applyCoverage(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} = - if coverage != 255: - result.r = ((rgbx.r.uint32 * coverage) div 255).uint8 - result.g = ((rgbx.g.uint32 * coverage) div 255).uint8 - result.b = ((rgbx.b.uint32 * coverage) div 255).uint8 - result.a = ((rgbx.a.uint32 * coverage) div 255).uint8 - else: - result = rgbx - let blender = blendMode.blender() for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] @@ -1411,9 +1402,13 @@ proc fillCoverage( # Skip blending image.unsafe[x, y] = rgbx else: - let - source = rgbx.applyCoverage(coverage) - backdrop = image.unsafe[x, y] + var source = rgbx + if coverage != 255: + source.r = ((source.r.uint32 * coverage) div 255).uint8 + source.g = ((source.g.uint32 * coverage) div 255).uint8 + source.b = ((source.b.uint32 * coverage) div 255).uint8 + source.a = ((source.a.uint32 * coverage) div 255).uint8 + let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blender(backdrop, source) elif blendMode == bmMask: image.unsafe[x, y] = rgbx(0, 0, 0, 0) From 6280af09f05f421f6078dc6e83e642dd038fce2a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:42:01 -0600 Subject: [PATCH 13/13] bmOverwrite --- src/pixie/paths.nim | 67 ++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index f98cae0..1e37fbe 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1335,7 +1335,10 @@ proc fillCoverage( # If the coverages are not all zero if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: # If the coverages are all 255 - if blendMode == bmNormal: + if blendMode == bmOverwrite: + for i in 0 ..< 4: + mm_storeu_si128(image.data[index + i * 4].addr, colorVec) + elif blendMode == bmNormal: if rgbx.a == 255: for i in 0 ..< 4: mm_storeu_si128(image.data[index + i * 4].addr, colorVec) @@ -1375,11 +1378,14 @@ proc fillCoverage( source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) - mm_storeu_si128( - image.data[index + i * 4].addr, - blendProc(backdrop, source) - ) + if blendMode == bmOverwrite: + mm_storeu_si128(image.data[index + i * 4].addr, source) + else: + let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) + mm_storeu_si128( + image.data[index + i * 4].addr, + blendProc(backdrop, source) + ) coverageVec = mm_srli_si128(coverageVec, 4) @@ -1401,13 +1407,18 @@ proc fillCoverage( if blendMode == bmNormal and coverage == 255 and rgbx.a == 255: # Skip blending image.unsafe[x, y] = rgbx + continue + + var source = rgbx + if coverage != 255: + source.r = ((source.r.uint32 * coverage) div 255).uint8 + source.g = ((source.g.uint32 * coverage) div 255).uint8 + source.b = ((source.b.uint32 * coverage) div 255).uint8 + source.a = ((source.a.uint32 * coverage) div 255).uint8 + + if blendMode == bmOverwrite: + image.unsafe[x, y] = source else: - var source = rgbx - if coverage != 255: - source.r = ((source.r.uint32 * coverage) div 255).uint8 - source.g = ((source.g.uint32 * coverage) div 255).uint8 - source.b = ((source.b.uint32 * coverage) div 255).uint8 - source.a = ((source.a.uint32 * coverage) div 255).uint8 let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blender(backdrop, source) elif blendMode == bmMask: @@ -1428,28 +1439,34 @@ proc fillCoverage( if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd() - zeroVec = mm_setzero_si128() + vecZero = mm_setzero_si128() for _ in 0 ..< coverages.len div 16: let index = mask.dataIndex(x, y) - coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zeroVec)) != 0xffff: + coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: # If the coverages are not all zero - let backdrop = mm_loadu_si128(mask.data[index].addr) - mm_storeu_si128( - mask.data[index].addr, - maskerSimd(backdrop, coverage) - ) + if blendMode == bmOverwrite: + mm_storeu_si128(mask.data[index].addr, coverageVec) + else: + let backdrop = mm_loadu_si128(mask.data[index].addr) + mm_storeu_si128( + mask.data[index].addr, + maskerSimd(backdrop, coverageVec) + ) elif blendMode == bmMask: - mm_storeu_si128(mask.data[index].addr, zeroVec) + mm_storeu_si128(mask.data[index].addr, vecZero) x += 16 let masker = blendMode.masker() for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == bmExcludeMask: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = masker(backdrop, coverage) + if blendMode == bmOverwrite: + mask.unsafe[x, y] = coverage + else: + let backdrop = mask.unsafe[x, y] + mask.unsafe[x, y] = masker(backdrop, coverage) elif blendMode == bmMask: mask.unsafe[x, y] = 0 @@ -1479,7 +1496,7 @@ proc fillHits( filledTo = fillStart + fillLen - if blendMode == bmNormal and rgbx.a == 255: + if blendMode == bmOverwrite or (blendMode == bmNormal and rgbx.a == 255): fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen) continue @@ -1541,7 +1558,7 @@ proc fillHits( filledTo = fillStart + fillLen - if blendMode == bmNormal or blendMode == bmOverwrite: + if blendMode in {bmNormal, bmOverwrite}: fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen) continue