Merge pull request #449 from guzba/master

faster simple path filling
This commit is contained in:
Andre von Houck 2022-06-22 10:45:23 -07:00 committed by GitHub
commit b2e145130a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 87 deletions

View file

@ -13,21 +13,33 @@ type
var benchmarks: seq[Benchmark] var benchmarks: seq[Benchmark]
let
opaque = newPaint(SolidPaint)
notOpaque = newPaint(SolidPaint)
opaque.color = color(0, 0, 0, 1)
notOpaque.color = color(0, 0, 0, 0.5)
block: # Basic rect block: # Basic rect
let path = newPath() let path = newPath()
path.rect(rect(0, 0, 900, 900)) path.rect(rect(50, 50, 800, 800))
let let shapes = path.commandsToShapes(true, 1)
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
benchmarks.add(Benchmark( benchmarks.add(Benchmark(
name: "rect", name: "rect opaque",
fills: @[Fill( fills: @[Fill(
shapes: shapes, shapes: shapes,
transform: mat3(), transform: mat3(),
paint: paint, paint: opaque,
windingRule: NonZero
)]))
benchmarks.add(Benchmark(
name: "rect not opaque",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: notOpaque,
windingRule: NonZero windingRule: NonZero
)])) )]))
@ -35,17 +47,23 @@ block: # Rounded rect
let path = newPath() let path = newPath()
path.roundedRect(rect(0, 0, 900, 900), 20, 20, 20, 20) path.roundedRect(rect(0, 0, 900, 900), 20, 20, 20, 20)
let let shapes = path.commandsToShapes(true, 1)
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
benchmarks.add(Benchmark( benchmarks.add(Benchmark(
name: "roundedRect", name: "roundedRect opaque",
fills: @[Fill( fills: @[Fill(
shapes: shapes, shapes: shapes,
transform: mat3(), transform: mat3(),
paint: paint, paint: opaque,
windingRule: NonZero
)]))
benchmarks.add(Benchmark(
name: "roundedRect not opaque",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: notOpaque,
windingRule: NonZero windingRule: NonZero
)])) )]))
@ -58,17 +76,23 @@ block: # Heart
Q 100,600 100,300 z Q 100,600 100,300 z
""") """)
let let shapes = path.commandsToShapes(true, 1)
shapes = path.commandsToShapes(true, 1)
paint = newPaint(SolidPaint)
paint.color = color(0, 0, 0, 1)
benchmarks.add(Benchmark( benchmarks.add(Benchmark(
name: "Heart", name: "heart opaque",
fills: @[Fill( fills: @[Fill(
shapes: shapes, shapes: shapes,
transform: mat3(), transform: mat3(),
paint: paint, paint: opaque,
windingRule: NonZero
)]))
benchmarks.add(Benchmark(
name: "heart not opaque",
fills: @[Fill(
shapes: shapes,
transform: mat3(),
paint: notOpaque,
windingRule: NonZero windingRule: NonZero
)])) )]))
@ -111,7 +135,10 @@ block: # Tiger
windingRule: NonZero windingRule: NonZero
)) ))
# benchmarks.add(fills) benchmarks.add(Benchmark(
name: "tiger",
fills: fills
))
block: block:
for benchmark in benchmarks: for benchmark in benchmarks:

View file

@ -52,7 +52,7 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
proc fillUnsafe*( proc fillUnsafe*(
data: var seq[uint8], value: uint8, start, len: int data: var seq[uint8], value: uint8, start, len: int
) {.raises: [].} = ) {.inline, raises: [].} =
## Fills the mask data with the value starting at index start and ## Fills the mask data with the value starting at index start and
## continuing for len indices. ## continuing for len indices.
nimSetMem(data[start].addr, value.cint, len) nimSetMem(data[start].addr, value.cint, len)
@ -62,9 +62,7 @@ proc fillUnsafe*(
) {.raises: [].} = ) {.raises: [].} =
## Fills the image data with the color starting at index start and ## Fills the image data with the color starting at index start and
## continuing for len indices. ## continuing for len indices.
let rgbx = color.asRgbx() let rgbx = color.asRgbx()
# Use memset when every byte has the same value # Use memset when every byte has the same value
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a: if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
nimSetMem(data[start].addr, rgbx.r.cint, len * 4) nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
@ -78,11 +76,13 @@ proc fillUnsafe*(
# When supported, SIMD fill until we run out of room # When supported, SIMD fill until we run out of room
let let
colorVec = mm_set1_epi32(cast[int32](rgbx)) colorVec = mm_set1_epi32(cast[int32](rgbx))
remaining = start + len - i iterations = (start + len - i) div 8
for _ in 0 ..< remaining div 8: var p = cast[uint](data[i].addr)
mm_store_si128(data[i + 0].addr, colorVec) for _ in 0 ..< iterations:
mm_store_si128(data[i + 4].addr, colorVec) mm_store_si128(cast[pointer](p), colorVec)
i += 8 mm_store_si128(cast[pointer](p + 16), colorVec)
p += 32
i += iterations * 8
else: else:
when sizeof(int) == 8: when sizeof(int) == 8:
# Fill 8 bytes at a time when possible # Fill 8 bytes at a time when possible

View file

@ -44,10 +44,7 @@ type
Partition = object Partition = object
entries: seq[PartitionEntry] entries: seq[PartitionEntry]
requiresAntiAliasing: bool requiresAntiAliasing: bool
bottom: int
Partitioning = object
partitions: seq[Partition]
startY, partitionHeight: uint32
Fixed32 = int32 ## 24.8 fixed point Fixed32 = int32 ## 24.8 fixed point
@ -1122,38 +1119,51 @@ proc requiresAntiAliasing(entries: var seq[PartitionEntry]): bool =
proc partitionSegments( proc partitionSegments(
segments: seq[(Segment, int16)], top, height: int segments: seq[(Segment, int16)], top, height: int
): Partitioning = ): seq[Partition] =
## Puts segments into the height partitions they intersect with. ## Puts segments into the height partitions they intersect with.
let let
maxPartitions = max(1, height div 4).uint32 maxPartitions = max(1, height div 4).uint32
numPartitions = min(maxPartitions, max(1, segments.len div 2).uint32) numPartitions = min(maxPartitions, max(1, segments.len div 2).uint32)
result.partitions.setLen(numPartitions) result.setLen(numPartitions)
result.startY = top.uint32
result.partitionHeight = height.uint32 div numPartitions let
startY = top.uint32
partitionHeight = height.uint32 div numPartitions
for (segment, winding) in segments: for (segment, winding) in segments:
let entry = initPartitionEntry(segment, winding) var entry = initPartitionEntry(segment, winding)
if result.partitionHeight == 0: if partitionHeight == 0:
result.partitions[0].entries.add(entry) result[0].entries.add(move entry)
else: else:
var var
atPartition = max(0, segment.at.y - result.startY.float32).uint32 atPartition = max(0, segment.at.y - startY.float32).uint32
toPartition = max(0, segment.to.y - result.startY.float32).uint32 toPartition = max(0, segment.to.y - startY.float32).uint32
atPartition = atPartition div result.partitionHeight atPartition = atPartition div partitionHeight
toPartition = toPartition div result.partitionHeight toPartition = toPartition div partitionHeight
atPartition = min(atPartition, result.partitions.high.uint32) atPartition = min(atPartition, result.high.uint32)
toPartition = min(toPartition, result.partitions.high.uint32) toPartition = min(toPartition, result.high.uint32)
for i in atPartition .. toPartition: for i in atPartition .. toPartition:
result.partitions[i].entries.add(entry) result[i].entries.add(entry)
for partition in result.partitions.mitems: # Set the bottom values for the partitions (y value where this partition ends)
var partitionBottom = top + partitionHeight.int
for partition in result.mitems:
partition.bottom = partitionBottom
partition.requiresAntiAliasing = partition.requiresAntiAliasing =
requiresAntiAliasing(partition.entries) requiresAntiAliasing(partition.entries)
partitionBottom += partitionHeight.int
proc maxEntryCount(partitioning: var Partitioning): int = # Ensure the final partition goes to the actual bottom
for i in 0 ..< partitioning.partitions.len: # This is needed since the final partition includes
result = max(result, partitioning.partitions[i].entries.len) # height - (height div numPartitions) * numPartitions
result[^1].bottom = top + height
proc maxEntryCount(partitions: var seq[Partition]): int =
for i in 0 ..< partitions.len:
result = max(result, partitions[i].entries.len)
proc fixed32(f: float32): Fixed32 {.inline.} = proc fixed32(f: float32): Fixed32 {.inline.} =
Fixed32(f * 256) Fixed32(f * 256)
@ -1244,19 +1254,14 @@ proc computeCoverage(
aa: var bool, aa: var bool,
width: int, width: int,
y, startX: int, y, startX: int,
partitioning: var Partitioning, partitions: var seq[Partition],
partitionIndex: var int,
windingRule: WindingRule windingRule: WindingRule
) {.inline.} = ) {.inline.} =
let partitionIndex = if y >= partitions[partitionIndex].bottom:
if partitioning.partitions.len == 1: inc partitionIndex
0.uint32
else:
min(
(y.uint32 - partitioning.startY) div partitioning.partitionHeight,
partitioning.partitions.high.uint32
)
aa = partitioning.partitions[partitionIndex].requiresAntiAliasing aa = partitions[partitionIndex].requiresAntiAliasing
let let
quality = if aa: 5 else: 1 # Must divide 255 cleanly (1, 3, 5, 15, 17, 51, 85) quality = if aa: 5 else: 1 # Must divide 255 cleanly (1, 3, 5, 15, 17, 51, 85)
@ -1268,7 +1273,7 @@ proc computeCoverage(
for m in 0 ..< quality: for m in 0 ..< quality:
yLine += offset yLine += offset
numHits = 0 numHits = 0
for entry in partitioning.partitions[partitionIndex].entries.mitems: for entry in partitions[partitionIndex].entries.mitems:
if entry.segment.at.y <= yLine and entry.segment.to.y >= yLine: if entry.segment.at.y <= yLine and entry.segment.to.y >= yLine:
let x = let x =
if entry.m == 0: if entry.m == 0:
@ -1619,16 +1624,15 @@ proc fillHits(
template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) = template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) =
when allowSimd: when allowSimd:
when defined(amd64): when defined(amd64):
let colorVec = mm_set1_epi32(cast[int32](rgbx)) var p = cast[uint](image.data[image.dataIndex(x, y)].addr)
var dataIndex = image.dataIndex(x, y) let
for _ in 0 ..< len div 4: iterations = len div 4
let backdrop = mm_loadu_si128(image.data[dataIndex].addr) colorVec = mm_set1_epi32(cast[int32](rgbx))
mm_storeu_si128( for _ in 0 ..< iterations:
image.data[dataIndex].addr, let backdrop = mm_loadu_si128(cast[pointer](p))
blendProc(backdrop, colorVec) mm_storeu_si128(cast[pointer](p), blendProc(backdrop, colorVec))
) p += 16
x += 4 x += iterations * 4
dataIndex += 4
case blendMode: case blendMode:
of OverwriteBlend: of OverwriteBlend:
@ -1714,16 +1718,15 @@ proc fillHits(
template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) = template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) =
when allowSimd: when allowSimd:
when defined(amd64): when defined(amd64):
let vec255 = mm_set1_epi8(255) var p = cast[uint](mask.data[mask.dataIndex(x, y)].addr)
var dataIndex = mask.dataIndex(x, y) let
for _ in 0 ..< len div 16: iterations = len div 16
let backdrop = mm_loadu_si128(mask.data[dataIndex].addr) vec255 = mm_set1_epi8(255)
mm_storeu_si128( for _ in 0 ..< iterations:
mask.data[dataIndex].addr, let backdrop = mm_loadu_si128(cast[pointer](p))
blendProc(backdrop, vec255) mm_storeu_si128(cast[pointer](p), blendProc(backdrop, vec255))
) p += 16
x += 16 x += iterations * 16
dataIndex += 16
case blendMode: case blendMode:
of NormalBlend, OverwriteBlend: of NormalBlend, OverwriteBlend:
@ -1795,9 +1798,10 @@ proc fillShapes(
raise newException(PixieError, "Path int overflow detected") raise newException(PixieError, "Path int overflow detected")
var var
partitioning = partitionSegments(segments, startY, pathHeight - startY) partitions = partitionSegments(segments, startY, pathHeight - startY)
partitionIndex: int
coverages = newSeq[uint8](pathWidth) coverages = newSeq[uint8](pathWidth)
hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) hits = newSeq[(Fixed32, int16)](partitions.maxEntryCount)
numHits: int numHits: int
aa: bool aa: bool
@ -1810,7 +1814,8 @@ proc fillShapes(
image.width, image.width,
y, y,
startX, startX,
partitioning, partitions,
partitionIndex,
windingRule windingRule
) )
if aa: if aa:
@ -1864,9 +1869,10 @@ proc fillShapes(
raise newException(PixieError, "Path int overflow detected") raise newException(PixieError, "Path int overflow detected")
var var
partitioning = partitionSegments(segments, startY, pathHeight) partitions = partitionSegments(segments, startY, pathHeight)
partitionIndex: int
coverages = newSeq[uint8](pathWidth) coverages = newSeq[uint8](pathWidth)
hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) hits = newSeq[(Fixed32, int16)](partitions.maxEntryCount)
numHits: int numHits: int
aa: bool aa: bool
@ -1879,7 +1885,8 @@ proc fillShapes(
mask.width, mask.width,
y, y,
startX, startX,
partitioning, partitions,
partitionIndex,
windingRule windingRule
) )
if aa: if aa: