move fillImage/fillMask strokeImage/strokeMask

This commit is contained in:
Ryan Oldenburg 2021-12-12 18:19:07 -06:00
parent 2e163e6e6c
commit afc7e8d816
2 changed files with 168 additions and 161 deletions

View file

@ -1,10 +1,178 @@
import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat import benchy, cairo, chroma, math, pixie, pixie/paths {.all.}, strformat
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2, pixie/internal
proc doDiff(a, b: Image, name: string) = proc doDiff(a, b: Image, name: string) =
let (diffScore, diffImage) = diff(a, b) let (diffScore, diffImage) = diff(a, b)
echo &"{name} score: {diffScore}" echo &"{name} score: {diffScore}"
diffImage.writeFile(&"{name}_diff.png") diffImage.writeFile(&"{name}_diff.png")
when defined(release):
{.push checks: off.}
proc fillMask(
shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero
): Mask =
result = newMask(width, height)
let
segments = shapes.shapesToSegments()
bounds = computeBounds(segments).snapToPixels()
startY = max(0, bounds.y.int)
pathHeight = min(height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight)
width = width.float32
var
hits = newSeq[(float32, int16)](partitioning.maxEntryCount)
numHits: int
aa: bool
for y in startY ..< pathHeight:
computeCoverage(
cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr),
hits,
numHits,
aa,
width,
y,
0,
partitioning,
windingRule
)
if not aa:
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
startIndex = result.dataIndex(prevAt.int, y)
len = at.int - prevAt.int
fillUnsafe(result.data, 255, startIndex, len)
proc fillMask*(
path: SomePath, width, height: int, windingRule = wrNonZero
): Mask =
## Returns a new mask with the path filled. This is a faster alternative
## to `newMask` + `fillPath`.
let shapes = parseSomePath(path, true, 1)
shapes.fillMask(width, height, windingRule)
proc fillImage(
shapes: seq[seq[Vec2]],
width, height: int,
color: SomeColor,
windingRule = wrNonZero
): Image =
result = newImage(width, height)
let
mask = shapes.fillMask(width, height, windingRule)
rgbx = color.rgbx()
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vecZero = mm_setzero_si128()
colorVecEven = mm_slli_epi16(colorVec, 8)
colorVecOdd = mm_and_si128(colorVec, oddMask)
iterations = result.data.len div 16
for _ in 0 ..< iterations:
var coverageVec = mm_loadu_si128(mask.data[i].addr)
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff:
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
for q in [0, 4, 8, 12]:
mm_storeu_si128(result.data[i + q].addr, colorVec)
else:
for q in [0, 4, 8, 12]:
var unpacked = unpackAlphaValues(coverageVec)
# Shift the coverages from `a` to `g` and `a` for multiplying
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
var
sourceEven = mm_mulhi_epu16(colorVecEven, unpacked)
sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
mm_storeu_si128(
result.data[i + q].addr,
mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
)
coverageVec = mm_srli_si128(coverageVec, 4)
i += 16
let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32]
for i in i ..< result.data.len:
let coverage = mask.data[i]
if coverage == 255:
result.data[i] = rgbx
elif coverage != 0:
result.data[i].r = ((channels[0] * coverage) div 255).uint8
result.data[i].g = ((channels[1] * coverage) div 255).uint8
result.data[i].b = ((channels[2] * coverage) div 255).uint8
result.data[i].a = ((channels[3] * coverage) div 255).uint8
proc fillImage*(
path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero
): Image =
## Returns a new image with the path filled. This is a faster alternative
## to `newImage` + `fillPath`.
let shapes = parseSomePath(path, false, 1)
shapes.fillImage(width, height, color, windingRule)
proc strokeMask*(
path: SomePath,
width, height: int,
strokeWidth: float32 = 1.0,
lineCap = lcButt,
lineJoin = ljMiter,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Mask =
## Returns a new mask with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillMask(width, height, wrNonZero)
proc strokeImage*(
path: SomePath,
width, height: int,
color: SomeColor,
strokeWidth: float32 = 1.0,
lineCap = lcButt,
lineJoin = ljMiter,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Image =
## Returns a new image with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillImage(width, height, color, wrNonZero)
when defined(release):
{.pop.}
block: block:
let path = newPath() let path = newPath()
path.moveTo(0, 0) path.moveTo(0, 0)

View file

@ -2068,164 +2068,3 @@ proc strokeOverlaps*(
) )
strokeShapes.transform(transform) strokeShapes.transform(transform)
strokeShapes.overlaps(test, wrNonZero) strokeShapes.overlaps(test, wrNonZero)
proc fillMask(
shapes: seq[seq[Vec2]], width, height: int, windingRule = wrNonZero
): Mask =
result = newMask(width, height)
let
segments = shapes.shapesToSegments()
bounds = computeBounds(segments).snapToPixels()
startY = max(0, bounds.y.int)
pathHeight = min(height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight)
width = width.float32
var
hits = newSeq[(float32, int16)](partitioning.maxEntryCount)
numHits: int
aa: bool
for y in startY ..< pathHeight:
computeCoverage(
cast[ptr UncheckedArray[uint8]](result.data[result.dataIndex(0, y)].addr),
hits,
numHits,
aa,
width,
y,
0,
partitioning,
windingRule
)
if not aa:
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
startIndex = result.dataIndex(prevAt.int, y)
len = at.int - prevAt.int
fillUnsafe(result.data, 255, startIndex, len)
proc fillMask*(
path: SomePath, width, height: int, windingRule = wrNonZero
): Mask =
## Returns a new mask with the path filled. This is a faster alternative
## to `newMask` + `fillPath`.
let shapes = parseSomePath(path, true, 1)
shapes.fillMask(width, height, windingRule)
proc fillImage(
shapes: seq[seq[Vec2]],
width, height: int,
color: SomeColor,
windingRule = wrNonZero
): Image =
result = newImage(width, height)
let
mask = shapes.fillMask(width, height, windingRule)
rgbx = color.rgbx()
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vec255 = mm_set1_epi32(cast[int32](uint32.high))
vecZero = mm_setzero_si128()
colorVecEven = mm_slli_epi16(colorVec, 8)
colorVecOdd = mm_and_si128(colorVec, oddMask)
iterations = result.data.len div 16
for _ in 0 ..< iterations:
var coverageVec = mm_loadu_si128(mask.data[i].addr)
if mm_movemask_epi8(mm_cmpeq_epi16(coverageVec, vecZero)) != 0xffff:
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
for q in [0, 4, 8, 12]:
mm_storeu_si128(result.data[i + q].addr, colorVec)
else:
for q in [0, 4, 8, 12]:
var unpacked = unpackAlphaValues(coverageVec)
# Shift the coverages from `a` to `g` and `a` for multiplying
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
var
sourceEven = mm_mulhi_epu16(colorVecEven, unpacked)
sourceOdd = mm_mulhi_epu16(colorVecOdd, unpacked)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
mm_storeu_si128(
result.data[i + q].addr,
mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
)
coverageVec = mm_srli_si128(coverageVec, 4)
i += 16
let channels = [rgbx.r.uint32, rgbx.g.uint32, rgbx.b.uint32, rgbx.a.uint32]
for i in i ..< result.data.len:
let coverage = mask.data[i]
if coverage == 255:
result.data[i] = rgbx
elif coverage != 0:
result.data[i].r = ((channels[0] * coverage) div 255).uint8
result.data[i].g = ((channels[1] * coverage) div 255).uint8
result.data[i].b = ((channels[2] * coverage) div 255).uint8
result.data[i].a = ((channels[3] * coverage) div 255).uint8
proc fillImage*(
path: SomePath, width, height: int, color: SomeColor, windingRule = wrNonZero
): Image =
## Returns a new image with the path filled. This is a faster alternative
## to `newImage` + `fillPath`.
let shapes = parseSomePath(path, false, 1)
shapes.fillImage(width, height, color, windingRule)
proc strokeMask*(
path: SomePath,
width, height: int,
strokeWidth: float32 = 1.0,
lineCap = lcButt,
lineJoin = ljMiter,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Mask =
## Returns a new mask with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillMask(width, height, wrNonZero)
proc strokeImage*(
path: SomePath,
width, height: int,
color: SomeColor,
strokeWidth: float32 = 1.0,
lineCap = lcButt,
lineJoin = ljMiter,
miterLimit = defaultMiterLimit,
dashes: seq[float32] = @[]
): Image =
## Returns a new image with the path stroked. This is a faster alternative
## to `newImage` + `strokePath`.
let strokeShapes = strokeShapes(
parseSomePath(path, false, 1),
strokeWidth,
lineCap,
lineJoin,
miterLimit,
dashes,
1
)
result = strokeShapes.fillImage(width, height, color, wrNonZero)
when defined(release):
{.pop.}