From afc7e8d8162446b99163b2398f5e91f8d6affc26 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 12 Dec 2021 18:19:07 -0600 Subject: [PATCH] move fillImage/fillMask strokeImage/strokeMask --- experiments/benchmark_cairo.nim | 168 ++++++++++++++++++++++++++++++++ src/pixie/paths.nim | 161 ------------------------------ 2 files changed, 168 insertions(+), 161 deletions(-) diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index 4afb63d..80ff9ef 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -1,10 +1,178 @@ import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2, pixie/internal + proc doDiff(a, b: Image, name: string) = let (diffScore, diffImage) = diff(a, b) echo &"{name} score: {diffScore}" diffImage.writeFile(&"{name}_diff.png") +when defined(release): + {.push checks: off.} + +proc fillMask( + shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero +): Mask = + result = newMask(width, height) + + let + segments = shapes.shapesToSegments() + bounds = computeBounds(segments).snapToPixels() + startY = max(0, bounds.y.int) + pathHeight = min(height, (bounds.y + bounds.h).int) + partitioning = partitionSegments(segments, startY, pathHeight) + width = width.float32 + + var + hits = newSeq[(float32, int16)](partitioning.maxEntryCount) + numHits: int + aa: bool + for y in startY ..< pathHeight: + computeCoverage( + cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), + hits, + numHits, + aa, + width, + y, + 0, + partitioning, + windingRule + ) + if not aa: + for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): + let + startIndex = result.dataIndex(prevAt.int, y) + len = at.int - prevAt.int + fillUnsafe(result.data, 255, startIndex, len) + +proc fillMask*( + path: SomePath, width, height: int, windingRule = wrNonZero +): Mask = + ## Returns a new mask with the path filled. This is a faster alternative + ## to `newMask` + `fillPath`. + let shapes = parseSomePath(path, true, 1) + shapes.fillMask(width, height, windingRule) + +proc fillImage( + shapes: seq[seq[Vec2]], + width, height: int, + color: SomeColor, + windingRule = wrNonZero +): Image = + result = newImage(width, height) + + let + mask = shapes.fillMask(width, height, windingRule) + rgbx = color.rgbx() + + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + colorVec = mm_set1_epi32(cast[int32](rgbx)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + vec255 = mm_set1_epi32(cast[int32](uint32.high)) + vecZero = mm_setzero_si128() + colorVecEven = mm_slli_epi16(colorVec, 8) + colorVecOdd = mm_and_si128(colorVec, oddMask) + iterations = result.data.len div 16 + for _ in 0 ..< iterations: + var coverageVec = mm_loadu_si128(mask.data[i].addr) + if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: + if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: + for q in [0, 4, 8, 12]: + mm_storeu_si128(result.data[i + q].addr, colorVec) + else: + for q in [0, 4, 8, 12]: + var unpacked = unpackAlphaValues(coverageVec) + # Shift the coverages from `a` to `g` and `a` for multiplying + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) + + var + sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) + sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) + sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) + sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) + + mm_storeu_si128( + result.data[i + q].addr, + mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) + ) + + coverageVec = mm_srli_si128(coverageVec, 4) + + i += 16 + + let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] + for i in i ..< result.data.len: + let coverage = mask.data[i] + if coverage == 255: + result.data[i] = rgbx + elif coverage != 0: + result.data[i].r = ((channels[0] * coverage) div 255).uint8 + result.data[i].g = ((channels[1] * coverage) div 255).uint8 + result.data[i].b = ((channels[2] * coverage) div 255).uint8 + result.data[i].a = ((channels[3] * coverage) div 255).uint8 + +proc fillImage*( + path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero +): Image = + ## Returns a new image with the path filled. This is a faster alternative + ## to `newImage` + `fillPath`. + let shapes = parseSomePath(path, false, 1) + shapes.fillImage(width, height, color, windingRule) + +proc strokeMask*( + path: SomePath, + width, height: int, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Mask = + ## Returns a new mask with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillMask(width, height, wrNonZero) + +proc strokeImage*( + path: SomePath, + width, height: int, + color: SomeColor, + strokeWidth: float32 = 1.0, + lineCap = lcButt, + lineJoin = ljMiter, + miterLimit = defaultMiterLimit, + dashes: seq[float32] = @[] +): Image = + ## Returns a new image with the path stroked. This is a faster alternative + ## to `newImage` + `strokePath`. + let strokeShapes = strokeShapes( + parseSomePath(path, false, 1), + strokeWidth, + lineCap, + lineJoin, + miterLimit, + dashes, + 1 + ) + result = strokeShapes.fillImage(width, height, color, wrNonZero) + +when defined(release): + {.pop.} + + block: let path = newPath() path.moveTo(0, 0) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 4e655cd..14e9674 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -2068,164 +2068,3 @@ proc strokeOverlaps*( ) strokeShapes.transform(transform) strokeShapes.overlaps(test, wrNonZero) - -proc fillMask( - shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero -): Mask = - result = newMask(width, height) - - let - segments = shapes.shapesToSegments() - bounds = computeBounds(segments).snapToPixels() - startY = max(0, bounds.y.int) - pathHeight = min(height, (bounds.y + bounds.h).int) - partitioning = partitionSegments(segments, startY, pathHeight) - width = width.float32 - - var - hits = newSeq[(float32, int16)](partitioning.maxEntryCount) - numHits: int - aa: bool - for y in startY ..< pathHeight: - computeCoverage( - cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr), - hits, - numHits, - aa, - width, - y, - 0, - partitioning, - windingRule - ) - if not aa: - for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width): - let - startIndex = result.dataIndex(prevAt.int, y) - len = at.int - prevAt.int - fillUnsafe(result.data, 255, startIndex, len) - -proc fillMask*( - path: SomePath, width, height: int, windingRule = wrNonZero -): Mask = - ## Returns a new mask with the path filled. This is a faster alternative - ## to `newMask` + `fillPath`. - let shapes = parseSomePath(path, true, 1) - shapes.fillMask(width, height, windingRule) - -proc fillImage( - shapes: seq[seq[Vec2]], - width, height: int, - color: SomeColor, - windingRule = wrNonZero -): Image = - result = newImage(width, height) - - let - mask = shapes.fillMask(width, height, windingRule) - rgbx = color.rgbx() - - var i: int - when defined(amd64) and not defined(pixieNoSimd): - let - colorVec = mm_set1_epi32(cast[int32](rgbx)) - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - vec255 = mm_set1_epi32(cast[int32](uint32.high)) - vecZero = mm_setzero_si128() - colorVecEven = mm_slli_epi16(colorVec, 8) - colorVecOdd = mm_and_si128(colorVec, oddMask) - iterations = result.data.len div 16 - for _ in 0 ..< iterations: - var coverageVec = mm_loadu_si128(mask.data[i].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff: - if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff: - for q in [0, 4, 8, 12]: - mm_storeu_si128(result.data[i + q].addr, colorVec) - else: - for q in [0, 4, 8, 12]: - var unpacked = unpackAlphaValues(coverageVec) - # Shift the coverages from `a` to `g` and `a` for multiplying - unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) - - var - sourceEven = mm_mulhi_epu16(colorVecEven, unpacked) - sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked) - sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) - sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) - - mm_storeu_si128( - result.data[i + q].addr, - mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) - ) - - coverageVec = mm_srli_si128(coverageVec, 4) - - i += 16 - - let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32] - for i in i ..< result.data.len: - let coverage = mask.data[i] - if coverage == 255: - result.data[i] = rgbx - elif coverage != 0: - result.data[i].r = ((channels[0] * coverage) div 255).uint8 - result.data[i].g = ((channels[1] * coverage) div 255).uint8 - result.data[i].b = ((channels[2] * coverage) div 255).uint8 - result.data[i].a = ((channels[3] * coverage) div 255).uint8 - -proc fillImage*( - path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero -): Image = - ## Returns a new image with the path filled. This is a faster alternative - ## to `newImage` + `fillPath`. - let shapes = parseSomePath(path, false, 1) - shapes.fillImage(width, height, color, windingRule) - -proc strokeMask*( - path: SomePath, - width, height: int, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Mask = - ## Returns a new mask with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillMask(width, height, wrNonZero) - -proc strokeImage*( - path: SomePath, - width, height: int, - color: SomeColor, - strokeWidth: float32 = 1.0, - lineCap = lcButt, - lineJoin = ljMiter, - miterLimit = defaultMiterLimit, - dashes: seq[float32] = @[] -): Image = - ## Returns a new image with the path stroked. This is a faster alternative - ## to `newImage` + `strokePath`. - let strokeShapes = strokeShapes( - parseSomePath(path, false, 1), - strokeWidth, - lineCap, - lineJoin, - miterLimit, - dashes, - 1 - ) - result = strokeShapes.fillImage(width, height, color, wrNonZero) - -when defined(release): - {.pop.}