fill faster when not doing aa
This commit is contained in:
parent
2e1f9a81b8
commit
f157a11ebf
1 changed files with 207 additions and 125 deletions
|
@ -1142,6 +1142,7 @@ iterator walk(
|
||||||
proc computeCoverages(
|
proc computeCoverages(
|
||||||
coverages: var seq[uint8],
|
coverages: var seq[uint8],
|
||||||
hits: var seq[(float32, int16)],
|
hits: var seq[(float32, int16)],
|
||||||
|
numHits: var int,
|
||||||
size: Vec2,
|
size: Vec2,
|
||||||
y: int,
|
y: int,
|
||||||
aa: bool,
|
aa: bool,
|
||||||
|
@ -1154,6 +1155,7 @@ proc computeCoverages(
|
||||||
offset = 1 / quality.float32
|
offset = 1 / quality.float32
|
||||||
initialOffset = offset / 2 + epsilon
|
initialOffset = offset / 2 + epsilon
|
||||||
|
|
||||||
|
if aa: # Coverage is only used for anti-aliasing
|
||||||
zeroMem(coverages[0].addr, coverages.len)
|
zeroMem(coverages[0].addr, coverages.len)
|
||||||
|
|
||||||
# Do scanlines for this row
|
# Do scanlines for this row
|
||||||
|
@ -1161,7 +1163,6 @@ proc computeCoverages(
|
||||||
var
|
var
|
||||||
yLine = y.float32 + initialOffset - offset
|
yLine = y.float32 + initialOffset - offset
|
||||||
scanline = line(vec2(0, yLine), vec2(size.x, yLine))
|
scanline = line(vec2(0, yLine), vec2(size.x, yLine))
|
||||||
numHits: int
|
|
||||||
for m in 0 ..< quality:
|
for m in 0 ..< quality:
|
||||||
yLine += offset
|
yLine += offset
|
||||||
scanline.a.y = yLine
|
scanline.a.y = yLine
|
||||||
|
@ -1184,6 +1185,7 @@ proc computeCoverages(
|
||||||
else:
|
else:
|
||||||
insertionSort(hits, numHits - 1)
|
insertionSort(hits, numHits - 1)
|
||||||
|
|
||||||
|
if aa:
|
||||||
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, size):
|
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, size):
|
||||||
var fillStart = prevAt.int
|
var fillStart = prevAt.int
|
||||||
|
|
||||||
|
@ -1206,7 +1208,6 @@ proc computeCoverages(
|
||||||
let fillLen = at.int - fillStart
|
let fillLen = at.int - fillStart
|
||||||
if fillLen > 0:
|
if fillLen > 0:
|
||||||
var i = fillStart
|
var i = fillStart
|
||||||
if aa:
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
|
let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
|
||||||
for j in countup(i, fillStart + fillLen - 16, 16):
|
for j in countup(i, fillStart + fillLen - 16, 16):
|
||||||
|
@ -1216,45 +1217,14 @@ proc computeCoverages(
|
||||||
i += 16
|
i += 16
|
||||||
for j in i ..< fillStart + fillLen:
|
for j in i ..< fillStart + fillLen:
|
||||||
coverages[j] += sampleCoverage
|
coverages[j] += sampleCoverage
|
||||||
else:
|
|
||||||
nimSetMem(coverages[fillStart].addr, sampleCoverage.cint, fillLen)
|
|
||||||
|
|
||||||
proc fillShapes(
|
proc fillCoverage(
|
||||||
image: Image,
|
image: Image,
|
||||||
shapes: seq[seq[Vec2]],
|
rgbx: ColorRGBX,
|
||||||
color: SomeColor,
|
startX, y: int,
|
||||||
windingRule: WindingRule,
|
coverages: seq[uint8],
|
||||||
blendMode: BlendMode
|
blendMode: BlendMode
|
||||||
) =
|
) =
|
||||||
# Figure out the total bounds of all the shapes,
|
|
||||||
# rasterize only within the total bounds
|
|
||||||
let
|
|
||||||
rgbx = color.asRgbx()
|
|
||||||
blender = blendMode.blender()
|
|
||||||
segments = shapes.shapesToSegments()
|
|
||||||
aa = segments.requiresAntiAliasing()
|
|
||||||
bounds = computePixelBounds(segments)
|
|
||||||
startX = max(0, bounds.x.int)
|
|
||||||
startY = max(0, bounds.y.int)
|
|
||||||
pathHeight = min(image.height, (bounds.y + bounds.h).int)
|
|
||||||
partitioning = partitionSegments(segments, startY, pathHeight - startY)
|
|
||||||
|
|
||||||
var
|
|
||||||
coverages = newSeq[uint8](image.width)
|
|
||||||
hits = newSeq[(float32, int16)](4)
|
|
||||||
|
|
||||||
for y in startY ..< pathHeight:
|
|
||||||
computeCoverages(
|
|
||||||
coverages,
|
|
||||||
hits,
|
|
||||||
image.wh,
|
|
||||||
y,
|
|
||||||
aa,
|
|
||||||
partitioning,
|
|
||||||
windingRule
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply the coverage and blend
|
|
||||||
var x = startX
|
var x = startX
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
if blendMode.hasSimdBlender():
|
if blendMode.hasSimdBlender():
|
||||||
|
@ -1266,7 +1236,7 @@ proc fillShapes(
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
vColor = mm_set1_epi32(cast[int32](rgbx))
|
vColor = mm_set1_epi32(cast[int32](rgbx))
|
||||||
for _ in countup(x, image.width - 16, 4):
|
for _ in countup(x, image.width - 16, 4):
|
||||||
var coverage = mm_loadu_si128(coverages[x].addr)
|
var coverage = mm_loadu_si128(coverages[x].unsafeAddr)
|
||||||
coverage = mm_and_si128(coverage, first32)
|
coverage = mm_and_si128(coverage, first32)
|
||||||
|
|
||||||
let
|
let
|
||||||
|
@ -1310,6 +1280,7 @@ proc fillShapes(
|
||||||
)
|
)
|
||||||
x += 4
|
x += 4
|
||||||
|
|
||||||
|
let blender = blendMode.blender()
|
||||||
while x < image.width:
|
while x < image.width:
|
||||||
let coverage = coverages[x]
|
let coverage = coverages[x]
|
||||||
if coverage != 0:
|
if coverage != 0:
|
||||||
|
@ -1327,6 +1298,138 @@ proc fillShapes(
|
||||||
image.setRgbaUnsafe(x, y, blender(backdrop, source))
|
image.setRgbaUnsafe(x, y, blender(backdrop, source))
|
||||||
inc x
|
inc x
|
||||||
|
|
||||||
|
proc fillCoverage(mask: Mask, startX, y: int, coverages: seq[uint8]) =
|
||||||
|
var x = startX
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
# When supported, SIMD blend as much as possible
|
||||||
|
let maskerSimd = bmNormal.maskerSimd()
|
||||||
|
for _ in countup(x, coverages.len - 16, 16):
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[x].unsafeAddr)
|
||||||
|
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
||||||
|
if mm_movemask_epi8(eqZero) != 0xffff:
|
||||||
|
# If the coverages are not all zero
|
||||||
|
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
mask.data[mask.dataIndex(x, y)].addr,
|
||||||
|
maskerSimd(backdrop, coverage)
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
|
||||||
|
while x < mask.width:
|
||||||
|
let coverage = coverages[x]
|
||||||
|
if coverage != 0:
|
||||||
|
let backdrop = mask.getValueUnsafe(x, y)
|
||||||
|
mask.setValueUnsafe(x, y, blendAlpha(backdrop, coverage))
|
||||||
|
inc x
|
||||||
|
|
||||||
|
proc fillHits(
|
||||||
|
image: Image,
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
startX, y: int,
|
||||||
|
hits: seq[(float32, int16)],
|
||||||
|
numHits: int,
|
||||||
|
windingRule: WindingRule,
|
||||||
|
blendMode: BlendMode
|
||||||
|
) =
|
||||||
|
let blender = blendMode.blender()
|
||||||
|
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, image.wh):
|
||||||
|
let
|
||||||
|
fillStart = prevAt.int
|
||||||
|
fillLen = at.int - fillStart
|
||||||
|
if fillLen > 0:
|
||||||
|
if blendMode == bmNormal and rgbx.a == 255:
|
||||||
|
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
|
||||||
|
else:
|
||||||
|
var x = fillStart
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
if blendMode.hasSimdBlender():
|
||||||
|
# When supported, SIMD blend as much as possible
|
||||||
|
let
|
||||||
|
blenderSimd = blendMode.blenderSimd()
|
||||||
|
vColor = mm_set1_epi32(cast[int32](rgbx))
|
||||||
|
for _ in countup(fillStart, fillLen - 16, 4):
|
||||||
|
let
|
||||||
|
index = image.dataIndex(x, y)
|
||||||
|
backdrop = mm_loadu_si128(image.data[index].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
image.data[index].addr,
|
||||||
|
blenderSimd(backdrop, vColor)
|
||||||
|
)
|
||||||
|
x += 4
|
||||||
|
while x < fillStart + fillLen:
|
||||||
|
let backdrop = image.getRgbaUnsafe(x, y)
|
||||||
|
image.setRgbaUnsafe(x, y, blender(backdrop, rgbx))
|
||||||
|
inc x
|
||||||
|
|
||||||
|
proc fillHits(
|
||||||
|
mask: Mask,
|
||||||
|
startX, y: int,
|
||||||
|
hits: seq[(float32, int16)],
|
||||||
|
numHits: int,
|
||||||
|
windingRule: WindingRule
|
||||||
|
) =
|
||||||
|
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, mask.wh):
|
||||||
|
let
|
||||||
|
fillStart = prevAt.int
|
||||||
|
fillLen = at.int - fillStart
|
||||||
|
if fillLen > 0:
|
||||||
|
fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen)
|
||||||
|
|
||||||
|
proc fillShapes(
|
||||||
|
image: Image,
|
||||||
|
shapes: seq[seq[Vec2]],
|
||||||
|
color: SomeColor,
|
||||||
|
windingRule: WindingRule,
|
||||||
|
blendMode: BlendMode
|
||||||
|
) =
|
||||||
|
# Figure out the total bounds of all the shapes,
|
||||||
|
# rasterize only within the total bounds
|
||||||
|
let
|
||||||
|
rgbx = color.asRgbx()
|
||||||
|
segments = shapes.shapesToSegments()
|
||||||
|
aa = segments.requiresAntiAliasing()
|
||||||
|
bounds = computePixelBounds(segments)
|
||||||
|
startX = max(0, bounds.x.int)
|
||||||
|
startY = max(0, bounds.y.int)
|
||||||
|
pathHeight = min(image.height, (bounds.y + bounds.h).int)
|
||||||
|
partitioning = partitionSegments(segments, startY, pathHeight - startY)
|
||||||
|
|
||||||
|
var
|
||||||
|
coverages = newSeq[uint8](image.width)
|
||||||
|
hits = newSeq[(float32, int16)](4)
|
||||||
|
numHits: int
|
||||||
|
|
||||||
|
for y in startY ..< pathHeight:
|
||||||
|
computeCoverages(
|
||||||
|
coverages,
|
||||||
|
hits,
|
||||||
|
numHits,
|
||||||
|
image.wh,
|
||||||
|
y,
|
||||||
|
aa,
|
||||||
|
partitioning,
|
||||||
|
windingRule
|
||||||
|
)
|
||||||
|
if aa:
|
||||||
|
image.fillCoverage(
|
||||||
|
rgbx,
|
||||||
|
startX,
|
||||||
|
y,
|
||||||
|
coverages,
|
||||||
|
blendMode
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
image.fillHits(
|
||||||
|
rgbx,
|
||||||
|
startX,
|
||||||
|
y,
|
||||||
|
hits,
|
||||||
|
numHits,
|
||||||
|
windingRule,
|
||||||
|
blendMode
|
||||||
|
)
|
||||||
|
|
||||||
proc fillShapes(
|
proc fillShapes(
|
||||||
mask: Mask,
|
mask: Mask,
|
||||||
shapes: seq[seq[Vec2]],
|
shapes: seq[seq[Vec2]],
|
||||||
|
@ -1344,47 +1447,26 @@ proc fillShapes(
|
||||||
pathHeight = stopY - startY
|
pathHeight = stopY - startY
|
||||||
partitioning = partitionSegments(segments, startY, pathHeight)
|
partitioning = partitionSegments(segments, startY, pathHeight)
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
|
||||||
let maskerSimd = bmNormal.maskerSimd()
|
|
||||||
|
|
||||||
var
|
var
|
||||||
coverages = newSeq[uint8](mask.width)
|
coverages = newSeq[uint8](mask.width)
|
||||||
hits = newSeq[(float32, int16)](4)
|
hits = newSeq[(float32, int16)](4)
|
||||||
|
numHits: int
|
||||||
|
|
||||||
for y in startY ..< stopY:
|
for y in startY ..< stopY:
|
||||||
computeCoverages(
|
computeCoverages(
|
||||||
coverages,
|
coverages,
|
||||||
hits,
|
hits,
|
||||||
|
numHits,
|
||||||
mask.wh,
|
mask.wh,
|
||||||
y,
|
y,
|
||||||
aa,
|
aa,
|
||||||
partitioning,
|
partitioning,
|
||||||
windingRule
|
windingRule
|
||||||
)
|
)
|
||||||
|
if aa:
|
||||||
# Apply the coverage and blend
|
mask.fillCoverage(startX, y, coverages)
|
||||||
var x = startX
|
else:
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
mask.fillHits(startX, y, hits, numHits, windingRule)
|
||||||
# When supported, SIMD blend as much as possible
|
|
||||||
for _ in countup(x, coverages.len - 16, 16):
|
|
||||||
let
|
|
||||||
coverage = mm_loadu_si128(coverages[x].addr)
|
|
||||||
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
|
||||||
if mm_movemask_epi8(eqZero) != 0xffff:
|
|
||||||
# If the coverages are not all zero
|
|
||||||
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
|
||||||
mm_storeu_si128(
|
|
||||||
mask.data[mask.dataIndex(x, y)].addr,
|
|
||||||
maskerSimd(backdrop, coverage)
|
|
||||||
)
|
|
||||||
x += 16
|
|
||||||
|
|
||||||
while x < mask.width:
|
|
||||||
let coverage = coverages[x]
|
|
||||||
if coverage != 0:
|
|
||||||
let backdrop = mask.getValueUnsafe(x, y)
|
|
||||||
mask.setValueUnsafe(x, y, blendAlpha(backdrop, coverage))
|
|
||||||
inc x
|
|
||||||
|
|
||||||
proc miterLimitToAngle*(limit: float32): float32 =
|
proc miterLimitToAngle*(limit: float32): float32 =
|
||||||
## Converts miter-limit-ratio to miter-limit-angle.
|
## Converts miter-limit-ratio to miter-limit-angle.
|
||||||
|
|
Loading…
Reference in a new issue