fill faster when not doing aa

This commit is contained in:
Ryan Oldenburg 2021-06-04 01:05:15 -05:00
parent 2e1f9a81b8
commit f157a11ebf

View file

@ -1142,6 +1142,7 @@ iterator walk(
proc computeCoverages( proc computeCoverages(
coverages: var seq[uint8], coverages: var seq[uint8],
hits: var seq[(float32, int16)], hits: var seq[(float32, int16)],
numHits: var int,
size: Vec2, size: Vec2,
y: int, y: int,
aa: bool, aa: bool,
@ -1154,6 +1155,7 @@ proc computeCoverages(
offset = 1 / quality.float32 offset = 1 / quality.float32
initialOffset = offset / 2 + epsilon initialOffset = offset / 2 + epsilon
if aa: # Coverage is only used for anti-aliasing
zeroMem(coverages[0].addr, coverages.len) zeroMem(coverages[0].addr, coverages.len)
# Do scanlines for this row # Do scanlines for this row
@ -1161,7 +1163,6 @@ proc computeCoverages(
var var
yLine = y.float32 + initialOffset - offset yLine = y.float32 + initialOffset - offset
scanline = line(vec2(0, yLine), vec2(size.x, yLine)) scanline = line(vec2(0, yLine), vec2(size.x, yLine))
numHits: int
for m in 0 ..< quality: for m in 0 ..< quality:
yLine += offset yLine += offset
scanline.a.y = yLine scanline.a.y = yLine
@ -1184,6 +1185,7 @@ proc computeCoverages(
else: else:
insertionSort(hits, numHits - 1) insertionSort(hits, numHits - 1)
if aa:
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, size): for (prevAt, at, count) in hits.walk(numHits, windingRule, y, size):
var fillStart = prevAt.int var fillStart = prevAt.int
@ -1206,7 +1208,6 @@ proc computeCoverages(
let fillLen = at.int - fillStart let fillLen = at.int - fillStart
if fillLen > 0: if fillLen > 0:
var i = fillStart var i = fillStart
if aa:
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage)) let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
for j in countup(i, fillStart + fillLen - 16, 16): for j in countup(i, fillStart + fillLen - 16, 16):
@ -1216,45 +1217,14 @@ proc computeCoverages(
i += 16 i += 16
for j in i ..< fillStart + fillLen: for j in i ..< fillStart + fillLen:
coverages[j] += sampleCoverage coverages[j] += sampleCoverage
else:
nimSetMem(coverages[fillStart].addr, sampleCoverage.cint, fillLen)
proc fillShapes( proc fillCoverage(
image: Image, image: Image,
shapes: seq[seq[Vec2]], rgbx: ColorRGBX,
color: SomeColor, startX, y: int,
windingRule: WindingRule, coverages: seq[uint8],
blendMode: BlendMode blendMode: BlendMode
) = ) =
# Figure out the total bounds of all the shapes,
# rasterize only within the total bounds
let
rgbx = color.asRgbx()
blender = blendMode.blender()
segments = shapes.shapesToSegments()
aa = segments.requiresAntiAliasing()
bounds = computePixelBounds(segments)
startX = max(0, bounds.x.int)
startY = max(0, bounds.y.int)
pathHeight = min(image.height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight - startY)
var
coverages = newSeq[uint8](image.width)
hits = newSeq[(float32, int16)](4)
for y in startY ..< pathHeight:
computeCoverages(
coverages,
hits,
image.wh,
y,
aa,
partitioning,
windingRule
)
# Apply the coverage and blend
var x = startX var x = startX
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
if blendMode.hasSimdBlender(): if blendMode.hasSimdBlender():
@ -1266,7 +1236,7 @@ proc fillShapes(
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
vColor = mm_set1_epi32(cast[int32](rgbx)) vColor = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(x, image.width - 16, 4): for _ in countup(x, image.width - 16, 4):
var coverage = mm_loadu_si128(coverages[x].addr) var coverage = mm_loadu_si128(coverages[x].unsafeAddr)
coverage = mm_and_si128(coverage, first32) coverage = mm_and_si128(coverage, first32)
let let
@ -1310,6 +1280,7 @@ proc fillShapes(
) )
x += 4 x += 4
let blender = blendMode.blender()
while x < image.width: while x < image.width:
let coverage = coverages[x] let coverage = coverages[x]
if coverage != 0: if coverage != 0:
@ -1327,6 +1298,138 @@ proc fillShapes(
image.setRgbaUnsafe(x, y, blender(backdrop, source)) image.setRgbaUnsafe(x, y, blender(backdrop, source))
inc x inc x
proc fillCoverage(mask: Mask, startX, y: int, coverages: seq[uint8]) =
var x = startX
when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD blend as much as possible
let maskerSimd = bmNormal.maskerSimd()
for _ in countup(x, coverages.len - 16, 16):
let
coverage = mm_loadu_si128(coverages[x].unsafeAddr)
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff:
# If the coverages are not all zero
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
mm_storeu_si128(
mask.data[mask.dataIndex(x, y)].addr,
maskerSimd(backdrop, coverage)
)
x += 16
while x < mask.width:
let coverage = coverages[x]
if coverage != 0:
let backdrop = mask.getValueUnsafe(x, y)
mask.setValueUnsafe(x, y, blendAlpha(backdrop, coverage))
inc x
proc fillHits(
image: Image,
rgbx: ColorRGBX,
startX, y: int,
hits: seq[(float32, int16)],
numHits: int,
windingRule: WindingRule,
blendMode: BlendMode
) =
let blender = blendMode.blender()
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, image.wh):
let
fillStart = prevAt.int
fillLen = at.int - fillStart
if fillLen > 0:
if blendMode == bmNormal and rgbx.a == 255:
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
else:
var x = fillStart
when defined(amd64) and not defined(pixieNoSimd):
if blendMode.hasSimdBlender():
# When supported, SIMD blend as much as possible
let
blenderSimd = blendMode.blenderSimd()
vColor = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(fillStart, fillLen - 16, 4):
let
index = image.dataIndex(x, y)
backdrop = mm_loadu_si128(image.data[index].addr)
mm_storeu_si128(
image.data[index].addr,
blenderSimd(backdrop, vColor)
)
x += 4
while x < fillStart + fillLen:
let backdrop = image.getRgbaUnsafe(x, y)
image.setRgbaUnsafe(x, y, blender(backdrop, rgbx))
inc x
proc fillHits(
mask: Mask,
startX, y: int,
hits: seq[(float32, int16)],
numHits: int,
windingRule: WindingRule
) =
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, mask.wh):
let
fillStart = prevAt.int
fillLen = at.int - fillStart
if fillLen > 0:
fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen)
proc fillShapes(
image: Image,
shapes: seq[seq[Vec2]],
color: SomeColor,
windingRule: WindingRule,
blendMode: BlendMode
) =
# Figure out the total bounds of all the shapes,
# rasterize only within the total bounds
let
rgbx = color.asRgbx()
segments = shapes.shapesToSegments()
aa = segments.requiresAntiAliasing()
bounds = computePixelBounds(segments)
startX = max(0, bounds.x.int)
startY = max(0, bounds.y.int)
pathHeight = min(image.height, (bounds.y + bounds.h).int)
partitioning = partitionSegments(segments, startY, pathHeight - startY)
var
coverages = newSeq[uint8](image.width)
hits = newSeq[(float32, int16)](4)
numHits: int
for y in startY ..< pathHeight:
computeCoverages(
coverages,
hits,
numHits,
image.wh,
y,
aa,
partitioning,
windingRule
)
if aa:
image.fillCoverage(
rgbx,
startX,
y,
coverages,
blendMode
)
else:
image.fillHits(
rgbx,
startX,
y,
hits,
numHits,
windingRule,
blendMode
)
proc fillShapes( proc fillShapes(
mask: Mask, mask: Mask,
shapes: seq[seq[Vec2]], shapes: seq[seq[Vec2]],
@ -1344,47 +1447,26 @@ proc fillShapes(
pathHeight = stopY - startY pathHeight = stopY - startY
partitioning = partitionSegments(segments, startY, pathHeight) partitioning = partitionSegments(segments, startY, pathHeight)
when defined(amd64) and not defined(pixieNoSimd):
let maskerSimd = bmNormal.maskerSimd()
var var
coverages = newSeq[uint8](mask.width) coverages = newSeq[uint8](mask.width)
hits = newSeq[(float32, int16)](4) hits = newSeq[(float32, int16)](4)
numHits: int
for y in startY ..< stopY: for y in startY ..< stopY:
computeCoverages( computeCoverages(
coverages, coverages,
hits, hits,
numHits,
mask.wh, mask.wh,
y, y,
aa, aa,
partitioning, partitioning,
windingRule windingRule
) )
if aa:
# Apply the coverage and blend mask.fillCoverage(startX, y, coverages)
var x = startX else:
when defined(amd64) and not defined(pixieNoSimd): mask.fillHits(startX, y, hits, numHits, windingRule)
# When supported, SIMD blend as much as possible
for _ in countup(x, coverages.len - 16, 16):
let
coverage = mm_loadu_si128(coverages[x].addr)
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff:
# If the coverages are not all zero
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
mm_storeu_si128(
mask.data[mask.dataIndex(x, y)].addr,
maskerSimd(backdrop, coverage)
)
x += 16
while x < mask.width:
let coverage = coverages[x]
if coverage != 0:
let backdrop = mask.getValueUnsafe(x, y)
mask.setValueUnsafe(x, y, blendAlpha(backdrop, coverage))
inc x
proc miterLimitToAngle*(limit: float32): float32 = proc miterLimitToAngle*(limit: float32): float32 =
## Converts miter-limit-ratio to miter-limit-angle. ## Converts miter-limit-ratio to miter-limit-angle.