From 34158f3f28099536816729f625f53c6fa2af9da4 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 22 Jun 2022 01:00:49 -0500 Subject: [PATCH 1/4] better benchmarking --- experiments/benchmark_cairo.nim | 67 +++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index f8bbeae..82da4e9 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -13,21 +13,33 @@ type var benchmarks: seq[Benchmark] +let + opaque = newPaint(SolidPaint) + notOpaque = newPaint(SolidPaint) +opaque.color = color(0, 0, 0, 1) +notOpaque.color = color(0, 0, 0, 0.5) + block: # Basic rect let path = newPath() - path.rect(rect(0, 0, 900, 900)) + path.rect(rect(50, 50, 800, 800)) - let - shapes = path.commandsToShapes(true, 1) - paint = newPaint(SolidPaint) - paint.color = color(0, 0, 0, 1) + let shapes = path.commandsToShapes(true, 1) benchmarks.add(Benchmark( - name: "rect", + name: "rect opaque", fills: @[Fill( shapes: shapes, transform: mat3(), - paint: paint, + paint: opaque, + windingRule: NonZero + )])) + + benchmarks.add(Benchmark( + name: "rect not opaque", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: notOpaque, windingRule: NonZero )])) @@ -35,17 +47,23 @@ block: # Rounded rect let path = newPath() path.roundedRect(rect(0, 0, 900, 900), 20, 20, 20, 20) - let - shapes = path.commandsToShapes(true, 1) - paint = newPaint(SolidPaint) - paint.color = color(0, 0, 0, 1) + let shapes = path.commandsToShapes(true, 1) benchmarks.add(Benchmark( - name: "roundedRect", + name: "roundedRect opaque", fills: @[Fill( shapes: shapes, transform: mat3(), - paint: paint, + paint: opaque, + windingRule: NonZero + )])) + + benchmarks.add(Benchmark( + name: "roundedRect not opaque", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: notOpaque, windingRule: NonZero )])) @@ -58,17 +76,23 @@ block: # Heart Q 100,600 100,300 z """) - let - shapes = path.commandsToShapes(true, 1) - paint = newPaint(SolidPaint) - paint.color = color(0, 0, 0, 1) + let shapes = path.commandsToShapes(true, 1) benchmarks.add(Benchmark( - name: "Heart", + name: "heart opaque", fills: @[Fill( shapes: shapes, transform: mat3(), - paint: paint, + paint: opaque, + windingRule: NonZero + )])) + + benchmarks.add(Benchmark( + name: "heart not opaque", + fills: @[Fill( + shapes: shapes, + transform: mat3(), + paint: notOpaque, windingRule: NonZero )])) @@ -111,7 +135,10 @@ block: # Tiger windingRule: NonZero )) - # benchmarks.add(fills) + benchmarks.add(Benchmark( + name: "tiger", + fills: fills + )) block: for benchmark in benchmarks: From 82c7c8b8643078b5b48416e665ce7d7b04631724 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 22 Jun 2022 01:03:32 -0500 Subject: [PATCH 2/4] faster --- src/pixie/internal.nim | 16 ++++++++-------- src/pixie/paths.nim | 42 ++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index d850a36..f9b255d 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -52,7 +52,7 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} = proc fillUnsafe*( data: var seq[uint8], value: uint8, start, len: int -) {.raises: [].} = +) {.inline, raises: [].} = ## Fills the mask data with the value starting at index start and ## continuing for len indices. nimSetMem(data[start].addr, value.cint, len) @@ -62,9 +62,7 @@ proc fillUnsafe*( ) {.raises: [].} = ## Fills the image data with the color starting at index start and ## continuing for len indices. - let rgbx = color.asRgbx() - # Use memset when every byte has the same value if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a: nimSetMem(data[start].addr, rgbx.r.cint, len * 4) @@ -78,11 +76,13 @@ proc fillUnsafe*( # When supported, SIMD fill until we run out of room let colorVec = mm_set1_epi32(cast[int32](rgbx)) - remaining = start + len - i - for _ in 0 ..< remaining div 8: - mm_store_si128(data[i + 0].addr, colorVec) - mm_store_si128(data[i + 4].addr, colorVec) - i += 8 + iterations = (start + len - i) div 8 + var p = cast[uint](data[i].addr) + for _ in 0 ..< iterations: + mm_store_si128(cast[pointer](p), colorVec) + mm_store_si128(cast[pointer](p + 16), colorVec) + p += 32 + i += iterations * 8 else: when sizeof(int) == 8: # Fill 8 bytes at a time when possible diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 1a2c90e..22afc69 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1133,9 +1133,9 @@ proc partitionSegments( result.partitionHeight = height.uint32 div numPartitions for (segment, winding) in segments: - let entry = initPartitionEntry(segment, winding) + var entry = initPartitionEntry(segment, winding) if result.partitionHeight == 0: - result.partitions[0].entries.add(entry) + result.partitions[0].entries.add(move entry) else: var atPartition = max(0, segment.at.y - result.startY.float32).uint32 @@ -1619,16 +1619,15 @@ proc fillHits( template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) = when allowSimd: when defined(amd64): - let colorVec = mm_set1_epi32(cast[int32](rgbx)) - var dataIndex = image.dataIndex(x, y) - for _ in 0 ..< len div 4: - let backdrop = mm_loadu_si128(image.data[dataIndex].addr) - mm_storeu_si128( - image.data[dataIndex].addr, - blendProc(backdrop, colorVec) - ) - x += 4 - dataIndex += 4 + var p = cast[uint](image.data[image.dataIndex(x, y)].addr) + let + iterations = len div 4 + colorVec = mm_set1_epi32(cast[int32](rgbx)) + for _ in 0 ..< iterations: + let backdrop = mm_loadu_si128(cast[pointer](p)) + mm_storeu_si128(cast[pointer](p), blendProc(backdrop, colorVec)) + p += 16 + x += iterations * 4 case blendMode: of OverwriteBlend: @@ -1714,16 +1713,15 @@ proc fillHits( template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) = when allowSimd: when defined(amd64): - let vec255 = mm_set1_epi8(255) - var dataIndex = mask.dataIndex(x, y) - for _ in 0 ..< len div 16: - let backdrop = mm_loadu_si128(mask.data[dataIndex].addr) - mm_storeu_si128( - mask.data[dataIndex].addr, - blendProc(backdrop, vec255) - ) - x += 16 - dataIndex += 16 + var p = cast[uint](mask.data[mask.dataIndex(x, y)].addr) + let + iterations = len div 16 + vec255 = mm_set1_epi8(255) + for _ in 0 ..< iterations: + let backdrop = mm_loadu_si128(cast[pointer](p)) + mm_storeu_si128(cast[pointer](p), blendProc(backdrop, vec255)) + p += 16 + x += iterations * 16 case blendMode: of NormalBlend, OverwriteBlend: From a4fbb5365eab14e095e6b494dc1e91afa5e8ef00 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 22 Jun 2022 02:02:08 -0500 Subject: [PATCH 3/4] partition heights can now vary --- src/pixie/paths.nim | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 22afc69..694f1af 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -44,6 +44,7 @@ type Partition = object entries: seq[PartitionEntry] requiresAntiAliasing: bool + bottom: int Partitioning = object partitions: seq[Partition] @@ -1147,9 +1148,20 @@ proc partitionSegments( for i in atPartition .. toPartition: result.partitions[i].entries.add(entry) + # Set the bottom values for the partitions (y value where this partition ends) + + var partitionBottom = top + result.partitionHeight.int + for partition in result.partitions.mitems: + partition.bottom = partitionBottom partition.requiresAntiAliasing = requiresAntiAliasing(partition.entries) + partitionBottom += result.partitionHeight.int + + # Ensure the final partition goes to the actual bottom + # This is needed since the final partition includes + # height - (height div numPartitions) * numPartitions + result.partitions[^1].bottom = top + height proc maxEntryCount(partitioning: var Partitioning): int = for i in 0 ..< partitioning.partitions.len: @@ -1245,16 +1257,11 @@ proc computeCoverage( width: int, y, startX: int, partitioning: var Partitioning, + partitionIndex: var int, windingRule: WindingRule ) {.inline.} = - let partitionIndex = - if partitioning.partitions.len == 1: - 0.uint32 - else: - min( - (y.uint32 - partitioning.startY) div partitioning.partitionHeight, - partitioning.partitions.high.uint32 - ) + if y >= partitioning.partitions[partitionIndex].bottom: + inc partitionIndex aa = partitioning.partitions[partitionIndex].requiresAntiAliasing @@ -1794,6 +1801,7 @@ proc fillShapes( var partitioning = partitionSegments(segments, startY, pathHeight - startY) + partitionIndex: int coverages = newSeq[uint8](pathWidth) hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) numHits: int @@ -1809,6 +1817,7 @@ proc fillShapes( y, startX, partitioning, + partitionIndex, windingRule ) if aa: @@ -1863,6 +1872,7 @@ proc fillShapes( var partitioning = partitionSegments(segments, startY, pathHeight) + partitionIndex: int coverages = newSeq[uint8](pathWidth) hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) numHits: int @@ -1878,6 +1888,7 @@ proc fillShapes( y, startX, partitioning, + partitionIndex, windingRule ) if aa: From 23deffc7619adc9a6c0b188b48556066ab4b97d0 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 22 Jun 2022 02:12:15 -0500 Subject: [PATCH 4/4] simplify --- src/pixie/paths.nim | 66 ++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 694f1af..1c8efe1 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -46,10 +46,6 @@ type requiresAntiAliasing: bool bottom: int - Partitioning = object - partitions: seq[Partition] - startY, partitionHeight: uint32 - Fixed32 = int32 ## 24.8 fixed point const @@ -1123,49 +1119,51 @@ proc requiresAntiAliasing(entries: var seq[PartitionEntry]): bool = proc partitionSegments( segments: seq[(Segment, int16)], top, height: int -): Partitioning = +): seq[Partition] = ## Puts segments into the height partitions they intersect with. let maxPartitions = max(1, height div 4).uint32 numPartitions = min(maxPartitions, max(1, segments.len div 2).uint32) - result.partitions.setLen(numPartitions) - result.startY = top.uint32 - result.partitionHeight = height.uint32 div numPartitions + result.setLen(numPartitions) + + let + startY = top.uint32 + partitionHeight = height.uint32 div numPartitions for (segment, winding) in segments: var entry = initPartitionEntry(segment, winding) - if result.partitionHeight == 0: - result.partitions[0].entries.add(move entry) + if partitionHeight == 0: + result[0].entries.add(move entry) else: var - atPartition = max(0, segment.at.y - result.startY.float32).uint32 - toPartition = max(0, segment.to.y - result.startY.float32).uint32 - atPartition = atPartition div result.partitionHeight - toPartition = toPartition div result.partitionHeight - atPartition = min(atPartition, result.partitions.high.uint32) - toPartition = min(toPartition, result.partitions.high.uint32) + atPartition = max(0, segment.at.y - startY.float32).uint32 + toPartition = max(0, segment.to.y - startY.float32).uint32 + atPartition = atPartition div partitionHeight + toPartition = toPartition div partitionHeight + atPartition = min(atPartition, result.high.uint32) + toPartition = min(toPartition, result.high.uint32) for i in atPartition .. toPartition: - result.partitions[i].entries.add(entry) + result[i].entries.add(entry) # Set the bottom values for the partitions (y value where this partition ends) - var partitionBottom = top + result.partitionHeight.int + var partitionBottom = top + partitionHeight.int - for partition in result.partitions.mitems: + for partition in result.mitems: partition.bottom = partitionBottom partition.requiresAntiAliasing = requiresAntiAliasing(partition.entries) - partitionBottom += result.partitionHeight.int + partitionBottom += partitionHeight.int # Ensure the final partition goes to the actual bottom # This is needed since the final partition includes # height - (height div numPartitions) * numPartitions - result.partitions[^1].bottom = top + height + result[^1].bottom = top + height -proc maxEntryCount(partitioning: var Partitioning): int = - for i in 0 ..< partitioning.partitions.len: - result = max(result, partitioning.partitions[i].entries.len) +proc maxEntryCount(partitions: var seq[Partition]): int = + for i in 0 ..< partitions.len: + result = max(result, partitions[i].entries.len) proc fixed32(f: float32): Fixed32 {.inline.} = Fixed32(f * 256) @@ -1256,14 +1254,14 @@ proc computeCoverage( aa: var bool, width: int, y, startX: int, - partitioning: var Partitioning, + partitions: var seq[Partition], partitionIndex: var int, windingRule: WindingRule ) {.inline.} = - if y >= partitioning.partitions[partitionIndex].bottom: + if y >= partitions[partitionIndex].bottom: inc partitionIndex - aa = partitioning.partitions[partitionIndex].requiresAntiAliasing + aa = partitions[partitionIndex].requiresAntiAliasing let quality = if aa: 5 else: 1 # Must divide 255 cleanly (1, 3, 5, 15, 17, 51, 85) @@ -1275,7 +1273,7 @@ proc computeCoverage( for m in 0 ..< quality: yLine += offset numHits = 0 - for entry in partitioning.partitions[partitionIndex].entries.mitems: + for entry in partitions[partitionIndex].entries.mitems: if entry.segment.at.y <= yLine and entry.segment.to.y >= yLine: let x = if entry.m == 0: @@ -1800,10 +1798,10 @@ proc fillShapes( raise newException(PixieError, "Path int overflow detected") var - partitioning = partitionSegments(segments, startY, pathHeight - startY) + partitions = partitionSegments(segments, startY, pathHeight - startY) partitionIndex: int coverages = newSeq[uint8](pathWidth) - hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) + hits = newSeq[(Fixed32, int16)](partitions.maxEntryCount) numHits: int aa: bool @@ -1816,7 +1814,7 @@ proc fillShapes( image.width, y, startX, - partitioning, + partitions, partitionIndex, windingRule ) @@ -1871,10 +1869,10 @@ proc fillShapes( raise newException(PixieError, "Path int overflow detected") var - partitioning = partitionSegments(segments, startY, pathHeight) + partitions = partitionSegments(segments, startY, pathHeight) partitionIndex: int coverages = newSeq[uint8](pathWidth) - hits = newSeq[(Fixed32, int16)](partitioning.maxEntryCount) + hits = newSeq[(Fixed32, int16)](partitions.maxEntryCount) numHits: int aa: bool @@ -1887,7 +1885,7 @@ proc fillShapes( mask.width, y, startX, - partitioning, + partitions, partitionIndex, windingRule )