performance improvements

This commit is contained in:
Ryan Oldenburg 2021-02-14 12:22:38 -06:00
parent 955d813a27
commit 68e2522074
3 changed files with 87 additions and 66 deletions

View file

@ -68,7 +68,7 @@ proc `[]=`*(image: Image, x, y: int, rgba: ColorRGBA) {.inline.} =
if image.inside(x, y):
image.setRgbaUnsafe(x, y, rgba)
proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
proc fillUnsafe*(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
## Fills the image data with the parameter color starting at index start and
## continuing for len indices.

View file

@ -84,7 +84,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask =
mask.getValueUnsafe(x * 2 + 0, y * 2 + 1)
result.setValueUnsafe(x, y, (value div 4).uint8)
proc fillUnsafe(data: var seq[uint8], value: uint8, start, len: int) =
proc fillUnsafe*(data: var seq[uint8], value: uint8, start, len: int) =
## Fills the mask data with the parameter value starting at index start and
## continuing for len indices.
nimSetMem(data[start].addr, value.cint, len)

View file

@ -382,7 +382,7 @@ proc roundedRect*(
h = wh.y
s = splineCircleK
maxRadius = min(w/2, h/2)
maxRadius = min(w / 2, h / 2)
nw = min(nw, maxRadius)
ne = min(ne, maxRadius)
se = min(se, maxRadius)
@ -397,14 +397,14 @@ proc roundedRect*(
l1 = vec2(x, y + h - sw)
l2 = vec2(x, y + nw)
t1h = t1 + vec2(-nw*s, 0)
t2h = t2 + vec2(+ne*s, 0)
r1h = r1 + vec2(0, -ne*s)
r2h = r2 + vec2(0, +se*s)
b1h = b1 + vec2(+se*s, 0)
b2h = b2 + vec2(-sw*s, 0)
l1h = l1 + vec2(0, +sw*s)
l2h = l2 + vec2(0, -nw*s)
t1h = t1 + vec2(-nw * s, 0)
t2h = t2 + vec2(+ne * s, 0)
r1h = r1 + vec2(0, -ne * s)
r2h = r2 + vec2(0, +se * s)
b1h = b1 + vec2(+se * s, 0)
b2h = b2 + vec2(-sw * s, 0)
l1h = l1 + vec2(0, +sw * s)
l2h = l2 + vec2(0, -nw * s)
if clockwise:
path.moveTo(t1)
@ -865,13 +865,11 @@ proc partitionSegments(
segmentCount += shape.len - 1
let
maxPartitions = max(1, height div 10)
numPartitions = min(maxPartitions, max(1, segmentCount div 10))
result.setLen(numPartitions)
let partitionHeight = height div numPartitions
maxPartitions = max(1, height div 10).uint32
numPartitions = min(maxPartitions, max(1, segmentCount div 10).uint32)
partitionHeight = (height.uint32 div numPartitions)
var partitions = newSeq[seq[(Segment, int16)]](numPartitions)
for shape in shapes:
for segment in shape.segments:
if segment.at.y == segment.to.y: # Skip horizontal
@ -884,41 +882,46 @@ proc partitionSegments(
winding = -1
if partitionHeight == 0:
result[0].add((segment, winding))
partitions[0].add((segment, winding))
else:
let
atPartition = max(0, segment.at.y).int div partitionHeight
toPartition = max(0, ceil(segment.to.y)).int div partitionHeight
for i in min(atPartition, result.high) .. min(toPartition, result.high):
result[i].add((segment, winding))
var
atPartition = max(0, segment.at.y).uint32 div partitionHeight
toPartition = max(0, ceil(segment.to.y)).uint32 div partitionHeight
atPartition = clamp(atPartition, 0, partitions.high.uint32)
toPartition = clamp(toPartition, 0, partitions.high.uint32)
for i in atPartition .. toPartition:
partitions[i].add((segment, winding))
partitions
proc computeCoverages(
coverages: var seq[uint8],
hits: var seq[(float32, int16)],
numHits: var int,
size: Vec2,
y: int,
partitions: seq[seq[(Segment, int16)]],
partitionHeight: uint32,
windingRule: WindingRule
) =
) {.inline.} =
const
quality = 5 # Must divide 255 cleanly (1, 3, 5, 15, 17, 51, 85)
sampleCoverage = 255.uint8 div quality
ep = 0.0001 * PI
sampleCoverage = (255 div quality).uint8
offset = 1 / quality.float32
initialOffset = offset / 2
var numHits: int
let
partitionHeight = size.y.int div partitions.len
partition =
if partitionHeight == 0:
0
0.uint32
else:
min(y div partitionHeight, partitions.high)
min(y.uint32 div partitionHeight, partitions.high.uint32)
zeroMem(coverages[0].addr, coverages.len)
# Do scanlines for this row
for m in 0 ..< quality:
const ep = 0.0001 * PI
let
yLine = y.float32 + initialOffset + offset * m.float32 + ep
scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine))
@ -940,9 +943,9 @@ proc computeCoverages(
for i in 0 ..< numHits:
let (at, winding) = hits[i]
var
fillStart = x.int
leftCover = if at.int - x.int > 0: trunc(x) + 1 - x else: at - x
var fillStart = x.int
let leftCover = if at.int - x.int > 0: trunc(x) + 1 - x else: at - x
if leftCover != 0:
inc fillStart
if shouldFill(windingRule, count):
@ -978,7 +981,9 @@ proc fillShapes(
windingRule: WindingRule,
blendMode: BlendMode
) =
let partitions = partitionSegments(shapes, image.height)
let
partitions = partitionSegments(shapes, image.height)
partitionHeight = image.height.uint32 div partitions.len.uint32
# Figure out the total bounds of all the shapes,
# rasterize only within the total bounds
@ -995,17 +1000,17 @@ proc fillShapes(
var
coverages = newSeq[uint8](image.width)
hits = newSeq[(float32, int16)](4)
numHits: int
for y in startY ..< stopY:
# Reset buffer for this row
zeroMem(coverages[0].addr, coverages.len)
computeCoverages(
coverages,
hits,
numHits,
image.wh,
y,
partitions,
partitionHeight,
windingRule
)
@ -1023,34 +1028,45 @@ proc fillShapes(
var coverage = mm_loadu_si128(coverages[x].addr)
coverage = mm_and_si128(coverage, first32)
let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
let
index = image.dataIndex(x, y)
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff:
# If the coverages are not all zero
var source = vColor
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) != 0xffff:
# If the coverages are not all 255
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) == 0xffff:
# Coverages are all 255
if color.a == 255 and blendMode == bmNormal:
mm_storeu_si128(image.data[index].addr, vColor)
else:
let backdrop = mm_loadu_si128(image.data[index].addr)
mm_storeu_si128(
image.data[index].addr,
blenderSimd(backdrop, vColor)
)
else:
# Coverages are not all 255
coverage = unpackAlphaValues(coverage)
# Shift the coverages from `a` to `g` and `a` for multiplying
coverage = mm_or_si128(coverage, mm_srli_epi32(coverage, 16))
var
colorEven = mm_slli_epi16(source, 8)
colorOdd = mm_and_si128(source, oddMask)
source = vColor
sourceEven = mm_slli_epi16(source, 8)
sourceOdd = mm_and_si128(source, oddMask)
colorEven = mm_mulhi_epu16(colorEven, coverage)
colorOdd = mm_mulhi_epu16(colorOdd, coverage)
sourceEven = mm_mulhi_epu16(sourceEven, coverage)
sourceOdd = mm_mulhi_epu16(sourceOdd, coverage)
colorEven = mm_srli_epi16(mm_mulhi_epu16(colorEven, div255), 7)
colorOdd = mm_srli_epi16(mm_mulhi_epu16(colorOdd, div255), 7)
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
source = mm_or_si128(colorEven, mm_slli_epi16(colorOdd, 8))
let
index = image.dataIndex(x, y)
backdrop = mm_loadu_si128(image.data[index].addr)
mm_storeu_si128(image.data[index].addr, blenderSimd(backdrop, source))
source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
let backdrop = mm_loadu_si128(image.data[index].addr)
mm_storeu_si128(
image.data[index].addr,
blenderSimd(backdrop, source)
)
x += 4
while x < image.width:
@ -1069,8 +1085,12 @@ proc fillShapes(
source.b = ((color.b.uint16 * coverage) div 255).uint8
source.a = ((color.a.uint16 * coverage) div 255).uint8
let backdrop = image.getRgbaUnsafe(x, y)
image.setRgbaUnsafe(x, y, blender(backdrop, source))
if source.a == 255 and blendMode == bmNormal:
# Skip blending
image.setRgbaUnsafe(x, y, source)
else:
let backdrop = image.getRgbaUnsafe(x, y)
image.setRgbaUnsafe(x, y, blender(backdrop, source))
inc x
proc fillShapes(
@ -1078,7 +1098,9 @@ proc fillShapes(
shapes: seq[seq[Vec2]],
windingRule: WindingRule
) =
let partitions = partitionSegments(shapes, mask.height)
let
partitions = partitionSegments(shapes, mask.height)
partitionHeight = mask.height.uint32 div partitions.len.uint32
# Figure out the total bounds of all the shapes,
# rasterize only within the total bounds
@ -1088,24 +1110,23 @@ proc fillShapes(
startY = max(0, bounds.y.int)
stopY = min(mask.height, (bounds.y + bounds.h).int)
var
coverages = newSeq[uint8](mask.width)
hits = newSeq[(float32, int16)](4)
when defined(amd64) and not defined(pixieNoSimd):
let maskerSimd = bmNormal.maskerSimd()
for y in startY ..< stopY:
# Reset buffer for this row
zeroMem(coverages[0].addr, coverages.len)
var
coverages = newSeq[uint8](mask.width)
hits = newSeq[(float32, int16)](4)
numHits: int
for y in startY ..< stopY:
computeCoverages(
coverages,
hits,
numHits,
mask.wh,
y,
partitions,
partitionHeight,
windingRule
)