diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index 4afb63d..ea66bdc 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -1,10 +1,178 @@ import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2, pixie/internal + proc doDiff(a, b: Image, name: string) = let (diffScore, diffImage) = diff(a, b) echo &"{name} score: {diffScore}" diffImage.writeFile(&"{name}_diff.png") +when defined(release): + {.push checks: off.} + +proc fillMask( + shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero +): Mask = + result = newMask(width, height) + + let + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startY = max(0, bounds.y.int) + pathHeight = min(height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight) + width = width.float32 + + var + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), + hits, + numHits, + aa, + width, + y, + 0, + partitioning, + windingRule + ) + if not aa: + for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): + let + startIndex = result.dataIndex(prevAt.int, y) + len = at.int - prevAt.int + fillUnsafe(result.data, 255, startIndex, len) + +proc fillMask*( + path: SomePath, width, height: int, windingRule = wrNonZero +): Mask = + ## Returns a new mask with the path filled. This is a faster alternative + ## to `newMask` + `fillPath`. + let shapes = parseSomePath(path, true, 1) + shapes.fillMask(width, height, windingRule) + +proc fillImage( + shapes: seq[seq[Vec2]], + width, height: int, + color: SomeColor, + windingRule = wrNonZero +): Image = + result = newImage(width, height) + + let + mask = shapes.fillMask(width, height, windingRule) + rgbx = color.rgbx() + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + colorVec = mm_set1_epi32(cast[int32](rgbx)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vec255 = mm_set1_epi32(cast[int32](uint32.high)) + vecZero = mm_setzero_si128() + colorVecEven = mm_slli_epi16(colorVec, 8) + colorVecOdd = mm_and_si128(colorVec, oddMask) + iterations = result.data.len div 16 + for _ in 0 ..< iterations: + var coverageVec = mm_loadu_si128(mask.data[i].addr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: + if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: + for q in [0, 4, 8, 12]: + mm_storeu_si128(result.data[i + q].addr, colorVec) + else: + for q in [0, 4, 8, 12]: + var unpacked = unpackAlphaValues(coverageVec) + # Shift the coverages from `a` to `g` and `a` for multiplying + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) + + var + sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) + sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) + sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) + sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) + + mm_storeu_si128( + result.data[i + q].addr, + mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) + ) + + coverageVec = mm_srli_si128(coverageVec, 4) + + i += 16 + + let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] + for i in i ..< result.data.len: + let coverage = mask.data[i] + if coverage == 255: + result.data[i] = rgbx + elif coverage != 0: + result.data[i].r = ((channels[0] * coverage) div 255).uint8 + result.data[i].g = ((channels[1] * coverage) div 255).uint8 + result.data[i].b = ((channels[2] * coverage) div 255).uint8 + result.data[i].a = ((channels[3] * coverage) div 255).uint8 + +proc fillImage*( + path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero +): Image = + ## Returns a new image with the path filled. This is a faster alternative + ## to `newImage` + `fillPath`. + let shapes = parseSomePath(path, false, 1) + shapes.fillImage(width, height, color, windingRule) + +proc strokeMask*( + path: SomePath, + width, height: int, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Mask = + ## Returns a new mask with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillMask(width, height, wrNonZero) + +proc strokeImage*( + path: SomePath, + width, height: int, + color: SomeColor, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Image = + ## Returns a new image with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillImage(width, height, color, wrNonZero) + +when defined(release): + {.pop.} + + block: let path = newPath() path.moveTo(0, 0) @@ -189,6 +357,23 @@ block: # doDiff(readImage("cairo4.png"), a, "4") + var b: Image + let paint = newPaint(pkSolid) + paint.color = color(1, 0, 0, 0.5) + paint.blendMode = bmOverwrite + + timeIt "pixie4 overwrite": + b = newImage(1000, 1000) + + let p = newPath() + p.moveTo(shapes[0][0]) + for shape in shapes: + for v in shape: + p.lineTo(v) + b.fillPath(p, paint) + + # b.writeFile("b.png") + timeIt "pixie4 mask": let mask = newMask(1000, 1000) diff --git a/experiments/benchmark_cairo_draw.nim b/experiments/benchmark_cairo_draw.nim new file mode 100644 index 0000000..0538d0c --- /dev/null +++ b/experiments/benchmark_cairo_draw.nim @@ -0,0 +1,130 @@ +import benchy, cairo, pixie + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw basic": + ctx.setSource(backdrop, 0, 0) + ctx.paint() + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "isOneColor": + doAssert not backdrop.isOneColor() + + timeIt "pixie draw basic": + tmp.draw(backdrop) + tmp.draw(source) + + # tmp.writeFile("tmp2.png") + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw smooth": + var + mat = mat3() + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(backdrop, 0, 0) + ctx.paint() + mat = translate(vec2(0.5, 0.5)) + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "pixie draw smooth": + tmp.draw(backdrop) + tmp.draw(source, translate(vec2(0.5, 0.5))) + + # tmp.writeFile("tmp2.png") + +block: + let + backdrop = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/dragon2.png") + source = imageSurfaceCreateFromPng("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = imageSurfaceCreate(FORMAT_ARGB32, 1568, 940) + ctx = tmp.create() + + timeIt "cairo draw smooth rotated": + var + mat = mat3() + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(backdrop, 0, 0) + ctx.paint() + mat = rotate(15.toRadians) + matrix = cairo.Matrix( + xx: mat[0, 0], + yx: mat[0, 1], + xy: mat[1, 0], + yy: mat[1, 1], + x0: mat[2, 0], + y0: mat[2, 1], + ) + ctx.setMatrix(matrix.unsafeAddr) + ctx.setSource(source, 0, 0) + ctx.paint() + tmp.flush() + + # echo tmp.writeToPng("tmp.png") + +block: + let + backdrop = readImage("tests/fileformats/svg/masters/dragon2.png") + source = readImage("tests/fileformats/svg/masters/Ghostscript_Tiger.png") + tmp = newImage(1568, 940) + + timeIt "pixie draw smooth rotated": + tmp.draw(backdrop) + tmp.draw(source, rotate(15.toRadians)) + + # tmp.writeFile("tmp2.png") diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index 916b01d..8bd5700 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -1,7 +1,7 @@ ## Load and Save SVG files. -import cairo, chroma, pixie/common, pixie/images, pixie/paints, pixie/paths {.all.}, - strutils, tables, vmath, xmlparser, xmltree +import cairo, chroma, pixie/common, pixie/images, pixie/paints, strutils, + tables, vmath, xmlparser, xmltree include pixie/paths @@ -580,7 +580,7 @@ proc decodeSvg*(data: string, width = 0, height = 0): Image = let bgra = pixels[result.dataIndex(x, y)] rgba = rgba(bgra[2], bgra[1], bgra[0], bgra[3]) - result.setRgbaUnsafe(x, y, rgba.rgbx()) + result.unsafe[x, y] = rgba.rgbx() except PixieError as e: raise e except: diff --git a/experiments/sweeps4.nim b/experiments/sweeps4.nim new file mode 100644 index 0000000..6603843 --- /dev/null +++ b/experiments/sweeps4.nim @@ -0,0 +1,426 @@ + +when defined(pixieSweeps): + import algorithm + + proc pixelCover(a0, b0: Vec2): float32 = + ## Returns the amount of area a given segment sweeps to the right + ## in a [0,0 to 1,1] box. + var + a = a0 + b = b0 + aI: Vec2 + bI: Vec2 + area: float32 = 0.0 + + if (a.x < 0 and b.x < 0) or # Both to the left. + (a.x == b.x): # Vertical line + # Area of the rectangle: + return (1 - clamp(a.x, 0, 1)) * (min(b.y, 1) - max(a.y, 0)) + + else: + # y = mm*x + bb + let + mm: float32 = (b.y - a.y) / (b.x - a.x) + bb: float32 = a.y - mm * a.x + + if a.x >= 0 and a.x <= 1 and a.y >= 0 and a.y <= 1: + # A is in pixel bounds. + aI = a + else: + aI = vec2((0 - bb) / mm, 0) + if aI.x < 0: + let y = mm * 0 + bb + # Area of the extra rectangle. + area += (min(bb, 1) - max(a.y, 0)).clamp(0, 1) + aI = vec2(0, y.clamp(0, 1)) + elif aI.x > 1: + let y = mm * 1 + bb + aI = vec2(1, y.clamp(0, 1)) + + if b.x >= 0 and b.x <= 1 and b.y >= 0 and b.y <= 1: + # B is in pixel bounds. + bI = b + else: + bI = vec2((1 - bb) / mm, 1) + if bI.x < 0: + let y = mm * 0 + bb + # Area of the extra rectangle. + area += (min(b.y, 1) - max(bb, 0)).clamp(0, 1) + bI = vec2(0, y.clamp(0, 1)) + elif bI.x > 1: + let y = mm * 1 + bb + bI = vec2(1, y.clamp(0, 1)) + + area += ((1 - aI.x) + (1 - bI.x)) / 2 * (bI.y - aI.y) + return area + + proc intersectsInner*(a, b: Segment, at: var Vec2): bool {.inline.} = + ## Checks if the a segment intersects b segment. + ## If it returns true, at will have point of intersection + let + s1 = a.to - a.at + s2 = b.to - b.at + denominator = (-s2.x * s1.y + s1.x * s2.y) + s = (-s1.y * (a.at.x - b.at.x) + s1.x * (a.at.y - b.at.y)) / denominator + t = (s2.x * (a.at.y - b.at.y) - s2.y * (a.at.x - b.at.x)) / denominator + + if s > 0 and s < 1 and t > 0 and t < 1: + at = a.at + (t * s1) + return true + + type + + Trapezoid = object + nw, ne, se, sw: Vec2 + + SweepLine = object + #m, x, b: float32 + atx, tox: float32 + winding: int16 + + proc toLine(s: (Segment, int16)): SweepLine = + var line = SweepLine() + line.atx = s[0].at.x + line.tox = s[0].to.x + # y = mx + b + # line.m = (s.at.y - s.to.y) / (s.at.x - s.to.x) + # line.b = s.at.y - line.m * s.at.x + line.winding = s[1] + return line + + proc intersectsYLine( + y: float32, s: Segment, atx: var float32 + ): bool {.inline.} = + let + s2y = s.to.y - s.at.y + denominator = -s2y + numerator = s.at.y - y + u = numerator / denominator + if u >= 0 and u <= 1: + let at = s.at + (u * vec2(s.to.x - s.at.x, s2y)) + atx = at.x + return true + + proc binaryInsert(arr: var seq[float32], v: float32) = + if arr.len == 0: + arr.add(v) + return + var + L = 0 + R = arr.len - 1 + while L < R: + let m = (L + R) div 2 + if arr[m] ~= v: + return + elif arr[m] < v: + L = m + 1 + else: # arr[m] > v: + R = m - 1 + if arr[L] ~= v: + return + elif arr[L] > v: + arr.insert(v, L) + else: + arr.insert(v, L + 1) + + proc sortSegments(segments: var seq[(Segment, int16)], inl, inr: int) = + ## Quicksort + insertion sort, in-place and faster than standard lib sort. + + let n = inr - inl + 1 + if n < 32: # Use insertion sort for the rest + for i in inl + 1 .. inr: + var + j = i - 1 + k = i + while j >= 0 and segments[j][0].at.y > segments[k][0].at.y: + swap(segments[j + 1], segments[j]) + dec j + dec k + return + var + l = inl + r = inr + let p = segments[l + n div 2][0].at.y + while l <= r: + if segments[l][0].at.y < p: + inc l + elif segments[r][0].at.y > p: + dec r + else: + swap(segments[l], segments[r]) + inc l + dec r + sortSegments(segments, inl, r) + sortSegments(segments, l, inr) + + proc sortSweepLines(segments: var seq[SweepLine], inl, inr: int) = + ## Quicksort + insertion sort, in-place and faster than standard lib sort. + + proc avg(line: SweepLine): float32 {.inline.} = + (line.tox + line.atx) / 2.float32 + + let n = inr - inl + 1 + if n < 32: # Use insertion sort for the rest + for i in inl + 1 .. inr: + var + j = i - 1 + k = i + while j >= 0 and segments[j].avg > segments[k].avg: + swap(segments[j + 1], segments[j]) + dec j + dec k + return + var + l = inl + r = inr + let p = segments[l + n div 2].avg + while l <= r: + if segments[l].avg < p: + inc l + elif segments[r].avg > p: + dec r + else: + swap(segments[l], segments[r]) + inc l + dec r + sortSweepLines(segments, inl, r) + sortSweepLines(segments, l, inr) + + proc fillShapes( + image: Image, + shapes: seq[seq[Vec2]], + color: SomeColor, + windingRule: WindingRule, + blendMode: BlendMode + ) = + + let rgbx = color.rgbx + var segments = shapes.shapesToSegments() + let + bounds = computeBounds(segments).snapToPixels() + startX = max(0, bounds.x.int) + + if segments.len == 0 or bounds.w.int == 0 or bounds.h.int == 0: + return + + # const q = 1/10 + # for i in 0 ..< segments.len: + # segments[i][0].at.x = quantize(segments[i][0].at.x, q) + # segments[i][0].at.y = quantize(segments[i][0].at.y, q) + # segments[i][0].to.x = quantize(segments[i][0].to.x, q) + # segments[i][0].to.y = quantize(segments[i][0].to.y, q) + + # Create sorted segments. + segments.sortSegments(0, segments.high) + + # Compute cut lines + var cutLines: seq[float32] + for s in segments: + cutLines.binaryInsert(s[0].at.y) + cutLines.binaryInsert(s[0].to.y) + + var + # Dont add bottom cutLine. + sweeps = newSeq[seq[SweepLine]](cutLines.len - 1) + lastSeg = 0 + i = 0 + while i < sweeps.len: + + if lastSeg < segments.len: + + while segments[lastSeg][0].at.y == cutLines[i]: + let s = segments[lastSeg] + + if s[0].to.y != cutLines[i + 1]: + var atx: float32 + var seg = s[0] + for j in i ..< sweeps.len: + let y = cutLines[j + 1] + if intersectsYLine(y, seg, atx): + sweeps[j].add(toLine((segment(seg.at, vec2(atx, y)), s[1]))) + seg = segment(vec2(atx, y), seg.to) + else: + if seg.at.y != seg.to.y: + sweeps[j].add(toLine(s)) + break + else: + sweeps[i].add(toLine(s)) + + inc lastSeg + if lastSeg >= segments.len: + break + inc i + + # i = 0 + # while i < sweeps.len: + # # TODO: Maybe finds all cuts first, add them to array, cut all lines at once. + # var crossCuts: seq[float32] + + # # echo i, " cut?" + + # for aIndex in 0 ..< sweeps[i].len: + # let a = sweeps[i][aIndex] + # # echo i, ":", sweeps.len, ":", cutLines.len + # let aSeg = segment(vec2(a.atx, cutLines[i]), vec2(a.tox, cutLines[i+1])) + # for bIndex in aIndex + 1 ..< sweeps[i].len: + # let b = sweeps[i][bIndex] + # let bSeg = segment(vec2(b.atx, cutLines[i]), vec2(b.tox, cutLines[i+1])) + # var at: Vec2 + # if intersectsInner(aSeg, bSeg, at): + # crossCuts.binaryInsert(at.y) + + # if crossCuts.len > 0: + # var + # thisSweep = sweeps[i] + # yTop = cutLines[i] + # yBottom = cutLines[i + 1] + # sweeps[i].setLen(0) + + # for k in crossCuts: + # let prevLen = cutLines.len + # cutLines.binaryInsert(k) + # if prevLen != cutLines.len: + # sweeps.insert(newSeq[SweepLine](), i + 1) + + # for a in thisSweep: + # var seg = segment(vec2(a.atx, yTop), vec2(a.tox, yBottom)) + # var at: Vec2 + # for j, cutterLine in crossCuts: + # if intersects(line(vec2(0, cutterLine), vec2(1, cutterLine)), seg, at): + # sweeps[i+j].add(toLine((segment(seg.at, at), a.winding))) + # seg = segment(at, seg.to) + # sweeps[i+crossCuts.len].add(toLine((seg, a.winding))) + + # i += crossCuts.len + + # inc i + + i = 0 + while i < sweeps.len: + # Sort the sweep by X + sweeps[i].sortSweepLines(0, sweeps[i].high) + # Do winding order + var + pen = 0 + prevFill = false + j = 0 + while j < sweeps[i].len: + let a = sweeps[i][j] + if a.winding == 1: + inc pen + if a.winding == -1: + dec pen + let thisFill = shouldFill(windingRule, pen) + if prevFill == thisFill: + # Remove this sweep line. + sweeps[i].delete(j) + continue + prevFill = thisFill + inc j + inc i + + # Used to debug sweeps: + # for s in 0 ..< sweeps.len: + # let + # y1 = cutLines[s] + # echo "M -100 ", y1 + # echo "L 300 ", y1 + # for line in sweeps[s]: + # let + # nw = vec2(line.atx, cutLines[s]) + # sw = vec2(line.tox, cutLines[s + 1]) + # echo "M ", nw.x, " ", nw.y + # echo "L ", sw.x, " ", sw.y + + proc computeCoverage( + coverages: var seq[uint16], + y: int, + startX: int, + cutLines: seq[float32], + currCutLine: int, + sweep: seq[SweepLine] + ) = + + if cutLines[currCutLine + 1] - cutLines[currCutLine] < 1/256: + # TODO some thing about micro sweeps + return + + let + sweepHeight = cutLines[currCutLine + 1] - cutLines[currCutLine] + yFracTop = ((y.float32 - cutLines[currCutLine]) / sweepHeight).clamp(0, 1) + yFracBottom = ((y.float32 + 1 - cutLines[currCutLine]) / + sweepHeight).clamp(0, 1) + var i = 0 + while i < sweep.len: + let + nwX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracTop) + neX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracTop) + + swX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracBottom) + seX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracBottom) + + minWi = min(nwX, swX).int #.clamp(startX, coverages.len + startX) + maxWi = max(nwX, swX).ceil.int #.clamp(startX, coverages.len + startX) + + minEi = min(neX, seX).int #.clamp(startX, coverages.len + startX) + maxEi = max(neX, seX).ceil.int #.clamp(startX, coverages.len + startX) + + let + nw = vec2(sweep[i+0].atx, cutLines[currCutLine]) + sw = vec2(sweep[i+0].tox, cutLines[currCutLine + 1]) + f16 = (256 * 256 - 1).float32 + for x in minWi ..< maxWi: + var area = pixelCover( + nw - vec2(x.float32, y.float32), + sw - vec2(x.float32, y.float32) + ) + coverages[x - startX] += (area * f16).uint16 + + let x = maxWi + var midArea = pixelCover( + nw - vec2(x.float32, y.float32), + sw - vec2(x.float32, y.float32) + ) + for x in maxWi ..< maxEi: + coverages[x - startX] += (midArea * f16).uint16 + + let + ne = vec2(sweep[i+1].atx, cutLines[currCutLine]) + se = vec2(sweep[i+1].tox, cutLines[currCutLine + 1]) + for x in minEi ..< maxEi: + var area = pixelCover( + ne - vec2(x.float32, y.float32), + se - vec2(x.float32, y.float32) + ) + coverages[x - startX] -= (area * f16).uint16 + + i += 2 + + var + currCutLine = 0 + coverages16 = newSeq[uint16](bounds.w.int) + coverages8 = newSeq[uint8](bounds.w.int) + for scanLine in max(cutLines[0].int, 0) ..< min(cutLines[^1].ceil.int, image.height): + + zeroMem(coverages16[0].addr, coverages16.len * 2) + + coverages16.computeCoverage( + scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) + while cutLines[currCutLine + 1] < scanLine.float + 1.0: + inc currCutLine + if currCutLine == sweeps.len: + break + coverages16.computeCoverage( + scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) + + for i in 0 ..< coverages16.len: + coverages8[i] = (coverages16[i] shr 8).uint8 + image.fillCoverage( + rgbx, + startX = startX, + y = scanLine, + coverages8, + blendMode + ) + +else: diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index f4777c9..371b50f 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -502,8 +502,6 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} = raise newException(PixieError, "No masker for " & $blendMode) when defined(amd64) and not defined(pixieNoSimd): - import nimsimd/sse2 - type BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].} ## Function signature returned by blenderSimd. diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 5af66e1..1e37fbe 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1335,7 +1335,10 @@ proc fillCoverage( # If the coverages are not all zero if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: # If the coverages are all 255 - if blendMode == bmNormal: + if blendMode == bmOverwrite: + for i in 0 ..< 4: + mm_storeu_si128(image.data[index + i * 4].addr, colorVec) + elif blendMode == bmNormal: if rgbx.a == 255: for i in 0 ..< 4: mm_storeu_si128(image.data[index + i * 4].addr, colorVec) @@ -1375,11 +1378,14 @@ proc fillCoverage( source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) - mm_storeu_si128( - image.data[index + i * 4].addr, - blendProc(backdrop, source) - ) + if blendMode == bmOverwrite: + mm_storeu_si128(image.data[index + i * 4].addr, source) + else: + let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) + mm_storeu_si128( + image.data[index + i * 4].addr, + blendProc(backdrop, source) + ) coverageVec = mm_srli_si128(coverageVec, 4) @@ -1395,24 +1401,28 @@ proc fillCoverage( x += 16 let blender = blendMode.blender() - while x < startX + coverages.len: + for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == bmExcludeMask: if blendMode == bmNormal and coverage == 255 and rgbx.a == 255: # Skip blending image.unsafe[x, y] = rgbx + continue + + var source = rgbx + if coverage != 255: + source.r = ((source.r.uint32 * coverage) div 255).uint8 + source.g = ((source.g.uint32 * coverage) div 255).uint8 + source.b = ((source.b.uint32 * coverage) div 255).uint8 + source.a = ((source.a.uint32 * coverage) div 255).uint8 + + if blendMode == bmOverwrite: + image.unsafe[x, y] = source else: - var source = rgbx - if coverage != 255: - source.r = ((source.r.uint32 * coverage) div 255).uint8 - source.g = ((source.g.uint32 * coverage) div 255).uint8 - source.b = ((source.b.uint32 * coverage) div 255).uint8 - source.a = ((source.a.uint32 * coverage) div 255).uint8 let backdrop = image.unsafe[x, y] image.unsafe[x, y] = blender(backdrop, source) elif blendMode == bmMask: image.unsafe[x, y] = rgbx(0, 0, 0, 0) - inc x if blendMode == bmMask: image.clearUnsafe(0, y, startX, y) @@ -1429,31 +1439,36 @@ proc fillCoverage( if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd() - zeroVec = mm_setzero_si128() + vecZero = mm_setzero_si128() for _ in 0 ..< coverages.len div 16: let index = mask.dataIndex(x, y) - coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zeroVec)) != 0xffff: + coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: # If the coverages are not all zero - let backdrop = mm_loadu_si128(mask.data[index].addr) - mm_storeu_si128( - mask.data[index].addr, - maskerSimd(backdrop, coverage) - ) + if blendMode == bmOverwrite: + mm_storeu_si128(mask.data[index].addr, coverageVec) + else: + let backdrop = mm_loadu_si128(mask.data[index].addr) + mm_storeu_si128( + mask.data[index].addr, + maskerSimd(backdrop, coverageVec) + ) elif blendMode == bmMask: - mm_storeu_si128(mask.data[index].addr, zeroVec) + mm_storeu_si128(mask.data[index].addr, vecZero) x += 16 let masker = blendMode.masker() - while x < startX + coverages.len: + for x in x ..< startX + coverages.len: let coverage = coverages[x - startX] if coverage != 0 or blendMode == bmExcludeMask: - let backdrop = mask.unsafe[x, y] - mask.unsafe[x, y] = masker(backdrop, coverage) + if blendMode == bmOverwrite: + mask.unsafe[x, y] = coverage + else: + let backdrop = mask.unsafe[x, y] + mask.unsafe[x, y] = masker(backdrop, coverage) elif blendMode == bmMask: mask.unsafe[x, y] = 0 - inc x if blendMode == bmMask: mask.clearUnsafe(0, y, startX, y) @@ -1481,7 +1496,7 @@ proc fillHits( filledTo = fillStart + fillLen - if blendMode == bmNormal and rgbx.a == 255: + if blendMode == bmOverwrite or (blendMode == bmNormal and rgbx.a == 255): fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen) continue @@ -1543,7 +1558,7 @@ proc fillHits( filledTo = fillStart + fillLen - if blendMode == bmNormal or blendMode == bmOverwrite: + if blendMode in {bmNormal, bmOverwrite}: fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen) continue @@ -1577,7 +1592,59 @@ proc fillShapes( color: SomeColor, windingRule: WindingRule, blendMode: BlendMode -) +) = + # Figure out the total bounds of all the shapes, + # rasterize only within the total bounds + let + rgbx = color.asRgbx() + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startX = max(0, bounds.x.int) + startY = max(0, bounds.y.int) + pathHeight = min(image.height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight - startY) + + var + coverages = newSeq[uint8](bounds.w.int) + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](coverages[0].addr), + hits, + numHits, + aa, + image.width.float32, + y, + startX, + partitioning, + windingRule + ) + if aa: + image.fillCoverage( + rgbx, + startX, + y, + coverages, + blendMode + ) + zeroMem(coverages[0].addr, coverages.len) + else: + image.fillHits( + rgbx, + startX, + y, + hits, + numHits, + windingRule, + blendMode + ) + + if blendMode == bmMask: + image.clearUnsafe(0, 0, 0, startY) + image.clearUnsafe(0, pathHeight, 0, image.height) proc fillShapes( mask: Mask, @@ -2013,647 +2080,3 @@ proc strokeOverlaps*( ) strokeShapes.transform(transform) strokeShapes.overlaps(test, wrNonZero) - -when defined(pixieSweeps): - import algorithm - - proc pixelCover(a0, b0: Vec2): float32 = - ## Returns the amount of area a given segment sweeps to the right - ## in a [0,0 to 1,1] box. - var - a = a0 - b = b0 - aI: Vec2 - bI: Vec2 - area: float32 = 0.0 - - if (a.x < 0 and b.x < 0) or # Both to the left. - (a.x == b.x): # Vertical line - # Area of the rectangle: - return (1 - clamp(a.x, 0, 1)) * (min(b.y, 1) - max(a.y, 0)) - - else: - # y = mm*x + bb - let - mm: float32 = (b.y - a.y) / (b.x - a.x) - bb: float32 = a.y - mm * a.x - - if a.x >= 0 and a.x <= 1 and a.y >= 0 and a.y <= 1: - # A is in pixel bounds. - aI = a - else: - aI = vec2((0 - bb) / mm, 0) - if aI.x < 0: - let y = mm * 0 + bb - # Area of the extra rectangle. - area += (min(bb, 1) - max(a.y, 0)).clamp(0, 1) - aI = vec2(0, y.clamp(0, 1)) - elif aI.x > 1: - let y = mm * 1 + bb - aI = vec2(1, y.clamp(0, 1)) - - if b.x >= 0 and b.x <= 1 and b.y >= 0 and b.y <= 1: - # B is in pixel bounds. - bI = b - else: - bI = vec2((1 - bb) / mm, 1) - if bI.x < 0: - let y = mm * 0 + bb - # Area of the extra rectangle. - area += (min(b.y, 1) - max(bb, 0)).clamp(0, 1) - bI = vec2(0, y.clamp(0, 1)) - elif bI.x > 1: - let y = mm * 1 + bb - bI = vec2(1, y.clamp(0, 1)) - - area += ((1 - aI.x) + (1 - bI.x)) / 2 * (bI.y - aI.y) - return area - - proc intersectsInner*(a, b: Segment, at: var Vec2): bool {.inline.} = - ## Checks if the a segment intersects b segment. - ## If it returns true, at will have point of intersection - let - s1 = a.to - a.at - s2 = b.to - b.at - denominator = (-s2.x * s1.y + s1.x * s2.y) - s = (-s1.y * (a.at.x - b.at.x) + s1.x * (a.at.y - b.at.y)) / denominator - t = (s2.x * (a.at.y - b.at.y) - s2.y * (a.at.x - b.at.x)) / denominator - - if s > 0 and s < 1 and t > 0 and t < 1: - at = a.at + (t * s1) - return true - - type - - Trapezoid = object - nw, ne, se, sw: Vec2 - - SweepLine = object - #m, x, b: float32 - atx, tox: float32 - winding: int16 - - proc toLine(s: (Segment, int16)): SweepLine = - var line = SweepLine() - line.atx = s[0].at.x - line.tox = s[0].to.x - # y = mx + b - # line.m = (s.at.y - s.to.y) / (s.at.x - s.to.x) - # line.b = s.at.y - line.m * s.at.x - line.winding = s[1] - return line - - proc intersectsYLine(y: float32, s: Segment, atx: var float32): bool {.inline.} = - let - s2y = s.to.y - s.at.y - denominator = -s2y - numerator = s.at.y - y - u = numerator / denominator - if u >= 0 and u <= 1: - let at = s.at + (u * vec2(s.to.x - s.at.x, s2y)) - atx = at.x - return true - - proc binaryInsert(arr: var seq[float32], v: float32) = - if arr.len == 0: - arr.add(v) - return - var - L = 0 - R = arr.len - 1 - while L < R: - let m = (L + R) div 2 - if arr[m] ~= v: - return - elif arr[m] < v: - L = m + 1 - else: # arr[m] > v: - R = m - 1 - if arr[L] ~= v: - return - elif arr[L] > v: - arr.insert(v, L) - else: - arr.insert(v, L + 1) - - proc sortSegments(segments: var seq[(Segment, int16)], inl, inr: int) = - ## Quicksort + insertion sort, in-place and faster than standard lib sort. - - let n = inr - inl + 1 - if n < 32: # Use insertion sort for the rest - for i in inl + 1 .. inr: - var - j = i - 1 - k = i - while j >= 0 and segments[j][0].at.y > segments[k][0].at.y: - swap(segments[j + 1], segments[j]) - dec j - dec k - return - var - l = inl - r = inr - let p = segments[l + n div 2][0].at.y - while l <= r: - if segments[l][0].at.y < p: - inc l - elif segments[r][0].at.y > p: - dec r - else: - swap(segments[l], segments[r]) - inc l - dec r - sortSegments(segments, inl, r) - sortSegments(segments, l, inr) - - proc sortSweepLines(segments: var seq[SweepLine], inl, inr: int) = - ## Quicksort + insertion sort, in-place and faster than standard lib sort. - - proc avg(line: SweepLine): float32 {.inline.} = - (line.tox + line.atx) / 2.float32 - - let n = inr - inl + 1 - if n < 32: # Use insertion sort for the rest - for i in inl + 1 .. inr: - var - j = i - 1 - k = i - while j >= 0 and segments[j].avg > segments[k].avg: - swap(segments[j + 1], segments[j]) - dec j - dec k - return - var - l = inl - r = inr - let p = segments[l + n div 2].avg - while l <= r: - if segments[l].avg < p: - inc l - elif segments[r].avg > p: - dec r - else: - swap(segments[l], segments[r]) - inc l - dec r - sortSweepLines(segments, inl, r) - sortSweepLines(segments, l, inr) - - proc fillShapes( - image: Image, - shapes: seq[seq[Vec2]], - color: SomeColor, - windingRule: WindingRule, - blendMode: BlendMode - ) = - - let rgbx = color.rgbx - var segments = shapes.shapesToSegments() - let - bounds = computeBounds(segments).snapToPixels() - startX = max(0, bounds.x.int) - - if segments.len == 0 or bounds.w.int == 0 or bounds.h.int == 0: - return - - # const q = 1/10 - # for i in 0 ..< segments.len: - # segments[i][0].at.x = quantize(segments[i][0].at.x, q) - # segments[i][0].at.y = quantize(segments[i][0].at.y, q) - # segments[i][0].to.x = quantize(segments[i][0].to.x, q) - # segments[i][0].to.y = quantize(segments[i][0].to.y, q) - - # Create sorted segments. - segments.sortSegments(0, segments.high) - - # Compute cut lines - var cutLines: seq[float32] - for s in segments: - cutLines.binaryInsert(s[0].at.y) - cutLines.binaryInsert(s[0].to.y) - - var - # Dont add bottom cutLine. - sweeps = newSeq[seq[SweepLine]](cutLines.len - 1) - lastSeg = 0 - i = 0 - while i < sweeps.len: - - if lastSeg < segments.len: - - while segments[lastSeg][0].at.y == cutLines[i]: - let s = segments[lastSeg] - - if s[0].to.y != cutLines[i + 1]: - var atx: float32 - var seg = s[0] - for j in i ..< sweeps.len: - let y = cutLines[j + 1] - if intersectsYLine(y, seg, atx): - sweeps[j].add(toLine((segment(seg.at, vec2(atx, y)), s[1]))) - seg = segment(vec2(atx, y), seg.to) - else: - if seg.at.y != seg.to.y: - sweeps[j].add(toLine(s)) - break - else: - sweeps[i].add(toLine(s)) - - inc lastSeg - if lastSeg >= segments.len: - break - inc i - - # i = 0 - # while i < sweeps.len: - # # TODO: Maybe finds all cuts first, add them to array, cut all lines at once. - # var crossCuts: seq[float32] - - # # echo i, " cut?" - - # for aIndex in 0 ..< sweeps[i].len: - # let a = sweeps[i][aIndex] - # # echo i, ":", sweeps.len, ":", cutLines.len - # let aSeg = segment(vec2(a.atx, cutLines[i]), vec2(a.tox, cutLines[i+1])) - # for bIndex in aIndex + 1 ..< sweeps[i].len: - # let b = sweeps[i][bIndex] - # let bSeg = segment(vec2(b.atx, cutLines[i]), vec2(b.tox, cutLines[i+1])) - # var at: Vec2 - # if intersectsInner(aSeg, bSeg, at): - # crossCuts.binaryInsert(at.y) - - # if crossCuts.len > 0: - # var - # thisSweep = sweeps[i] - # yTop = cutLines[i] - # yBottom = cutLines[i + 1] - # sweeps[i].setLen(0) - - # for k in crossCuts: - # let prevLen = cutLines.len - # cutLines.binaryInsert(k) - # if prevLen != cutLines.len: - # sweeps.insert(newSeq[SweepLine](), i + 1) - - # for a in thisSweep: - # var seg = segment(vec2(a.atx, yTop), vec2(a.tox, yBottom)) - # var at: Vec2 - # for j, cutterLine in crossCuts: - # if intersects(line(vec2(0, cutterLine), vec2(1, cutterLine)), seg, at): - # sweeps[i+j].add(toLine((segment(seg.at, at), a.winding))) - # seg = segment(at, seg.to) - # sweeps[i+crossCuts.len].add(toLine((seg, a.winding))) - - # i += crossCuts.len - - # inc i - - i = 0 - while i < sweeps.len: - # Sort the sweep by X - sweeps[i].sortSweepLines(0, sweeps[i].high) - # Do winding order - var - pen = 0 - prevFill = false - j = 0 - while j < sweeps[i].len: - let a = sweeps[i][j] - if a.winding == 1: - inc pen - if a.winding == -1: - dec pen - let thisFill = shouldFill(windingRule, pen) - if prevFill == thisFill: - # Remove this sweep line. - sweeps[i].delete(j) - continue - prevFill = thisFill - inc j - inc i - - # Used to debug sweeps: - # for s in 0 ..< sweeps.len: - # let - # y1 = cutLines[s] - # echo "M -100 ", y1 - # echo "L 300 ", y1 - # for line in sweeps[s]: - # let - # nw = vec2(line.atx, cutLines[s]) - # sw = vec2(line.tox, cutLines[s + 1]) - # echo "M ", nw.x, " ", nw.y - # echo "L ", sw.x, " ", sw.y - - proc computeCoverage( - coverages: var seq[uint16], - y: int, - startX: int, - cutLines: seq[float32], - currCutLine: int, - sweep: seq[SweepLine] - ) = - - if cutLines[currCutLine + 1] - cutLines[currCutLine] < 1/256: - # TODO some thing about micro sweeps - return - - let - sweepHeight = cutLines[currCutLine + 1] - cutLines[currCutLine] - yFracTop = ((y.float32 - cutLines[currCutLine]) / sweepHeight).clamp(0, 1) - yFracBottom = ((y.float32 + 1 - cutLines[currCutLine]) / - sweepHeight).clamp(0, 1) - var i = 0 - while i < sweep.len: - let - nwX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracTop) - neX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracTop) - - swX = mix(sweep[i+0].atx, sweep[i+0].tox, yFracBottom) - seX = mix(sweep[i+1].atx, sweep[i+1].tox, yFracBottom) - - minWi = min(nwX, swX).int#.clamp(startX, coverages.len + startX) - maxWi = max(nwX, swX).ceil.int#.clamp(startX, coverages.len + startX) - - minEi = min(neX, seX).int#.clamp(startX, coverages.len + startX) - maxEi = max(neX, seX).ceil.int#.clamp(startX, coverages.len + startX) - - let - nw = vec2(sweep[i+0].atx, cutLines[currCutLine]) - sw = vec2(sweep[i+0].tox, cutLines[currCutLine + 1]) - f16 = (256 * 256 - 1).float32 - for x in minWi ..< maxWi: - var area = pixelCover( - nw - vec2(x.float32, y.float32), - sw - vec2(x.float32, y.float32) - ) - coverages[x - startX] += (area * f16).uint16 - - let x = maxWi - var midArea = pixelCover( - nw - vec2(x.float32, y.float32), - sw - vec2(x.float32, y.float32) - ) - for x in maxWi ..< maxEi: - coverages[x - startX] += (midArea * f16).uint16 - - let - ne = vec2(sweep[i+1].atx, cutLines[currCutLine]) - se = vec2(sweep[i+1].tox, cutLines[currCutLine + 1]) - for x in minEi ..< maxEi: - var area = pixelCover( - ne - vec2(x.float32, y.float32), - se - vec2(x.float32, y.float32) - ) - coverages[x - startX] -= (area * f16).uint16 - - i += 2 - - var - currCutLine = 0 - coverages16 = newSeq[uint16](bounds.w.int) - coverages8 = newSeq[uint8](bounds.w.int) - for scanLine in max(cutLines[0].int, 0) ..< min(cutLines[^1].ceil.int, image.height): - - zeroMem(coverages16[0].addr, coverages16.len * 2) - - coverages16.computeCoverage( - scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) - while cutLines[currCutLine + 1] < scanLine.float + 1.0: - inc currCutLine - if currCutLine == sweeps.len: - break - coverages16.computeCoverage( - scanLine, startX, cutLines, currCutLine, sweeps[currCutLine]) - - for i in 0 ..< coverages16.len: - coverages8[i] = (coverages16[i] shr 8).uint8 - image.fillCoverage( - rgbx, - startX = startX, - y = scanLine, - coverages8, - blendMode - ) - -else: - proc fillShapes( - image: Image, - shapes: seq[seq[Vec2]], - color: SomeColor, - windingRule: WindingRule, - blendMode: BlendMode - ) = - # Figure out the total bounds of all the shapes, - # rasterize only within the total bounds - let - rgbx = color.asRgbx() - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startX = max(0, bounds.x.int) - startY = max(0, bounds.y.int) - pathHeight = min(image.height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight - startY) - - var - coverages = newSeq[uint8](bounds.w.int) - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](coverages[0].addr), - hits, - numHits, - aa, - image.width.float32, - y, - startX, - partitioning, - windingRule - ) - if aa: - image.fillCoverage( - rgbx, - startX, - y, - coverages, - blendMode - ) - zeroMem(coverages[0].addr, coverages.len) - else: - image.fillHits( - rgbx, - startX, - y, - hits, - numHits, - windingRule, - blendMode - ) - - if blendMode == bmMask: - image.clearUnsafe(0, 0, 0, startY) - image.clearUnsafe(0, pathHeight, 0, image.height) - -proc fillMask( - shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero -): Mask = - result = newMask(width, height) - - let - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startY = max(0, bounds.y.int) - pathHeight = min(height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight) - width = width.float32 - - var - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), - hits, - numHits, - aa, - width, - y, - 0, - partitioning, - windingRule - ) - if not aa: - for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): - let - startIndex = result.dataIndex(prevAt.int, y) - len = at.int - prevAt.int - fillUnsafe(result.data, 255, startIndex, len) - -proc fillMask*( - path: SomePath, width, height: int, windingRule = wrNonZero -): Mask = - ## Returns a new mask with the path filled. This is a faster alternative - ## to `newMask` + `fillPath`. - let shapes = parseSomePath(path, true, 1) - shapes.fillMask(width, height, windingRule) - -proc fillImage( - shapes: seq[seq[Vec2]], - width, height: int, - color: SomeColor, - windingRule = wrNonZero -): Image = - result = newImage(width, height) - - let - mask = shapes.fillMask(width, height, windingRule) - rgbx = color.rgbx() - - var i: int - when defined(amd64) and not defined(pixieNoSimd): - let - colorVec = mm_set1_epi32(cast[int32](rgbx)) - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - vec255 = mm_set1_epi32(cast[int32](uint32.high)) - vecZero = mm_setzero_si128() - colorVecEven = mm_slli_epi16(colorVec, 8) - colorVecOdd = mm_and_si128(colorVec, oddMask) - iterations = result.data.len div 16 - for _ in 0 ..< iterations: - var coverageVec = mm_loadu_si128(mask.data[i].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: - if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: - for q in [0, 4, 8, 12]: - mm_storeu_si128(result.data[i + q].addr, colorVec) - else: - for q in [0, 4, 8, 12]: - var unpacked = unpackAlphaValues(coverageVec) - # Shift the coverages from `a` to `g` and `a` for multiplying - unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) - - var - sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) - sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) - sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) - sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) - - mm_storeu_si128( - result.data[i + q].addr, - mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - ) - - coverageVec = mm_srli_si128(coverageVec, 4) - - i += 16 - - let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] - for i in i ..< result.data.len: - let coverage = mask.data[i] - if coverage == 255: - result.data[i] = rgbx - elif coverage != 0: - result.data[i].r = ((channels[0] * coverage) div 255).uint8 - result.data[i].g = ((channels[1] * coverage) div 255).uint8 - result.data[i].b = ((channels[2] * coverage) div 255).uint8 - result.data[i].a = ((channels[3] * coverage) div 255).uint8 - -proc fillImage*( - path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero -): Image = - ## Returns a new image with the path filled. This is a faster alternative - ## to `newImage` + `fillPath`. - let shapes = parseSomePath(path, false, 1) - shapes.fillImage(width, height, color, windingRule) - -proc strokeMask*( - path: SomePath, - width, height: int, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Mask = - ## Returns a new mask with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillMask(width, height, wrNonZero) - -proc strokeImage*( - path: SomePath, - width, height: int, - color: SomeColor, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Image = - ## Returns a new image with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillImage(width, height, color, wrNonZero) - -when defined(release): - {.pop.} diff --git a/tests/paths/pathHeart2.png b/tests/paths/pathHeart2.png deleted file mode 100644 index 560cd66..0000000 Binary files a/tests/paths/pathHeart2.png and /dev/null differ diff --git a/tests/test_paths.nim b/tests/test_paths.nim index 8d973a7..8c81a5a 100644 --- a/tests/test_paths.nim +++ b/tests/test_paths.nim @@ -131,16 +131,6 @@ block: ) image.writeFile("tests/paths/pathHeart.png") -block: - let image = """ - M 10,30 - A 20,20 0,0,1 50,30 - A 20,20 0,0,1 90,30 - Q 90,60 50,90 - Q 10,60 10,30 z - """.fillImage(100, 100, parseHtmlColor("#FC427B").rgba) - image.writeFile("tests/paths/pathHeart2.png") - block: let image = newImage(100, 100) image.fillPath(