From 68e2522074a6d8fdb557a585064a95905c9058f7 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 14 Feb 2021 12:22:38 -0600 Subject: [PATCH] performance improvements --- src/pixie/images.nim | 2 +- src/pixie/masks.nim | 2 +- src/pixie/paths.nim | 149 ++++++++++++++++++++++++------------------- 3 files changed, 87 insertions(+), 66 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 3a2efd2..2f6e876 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -68,7 +68,7 @@ proc `[]=`*(image: Image, x, y: int, rgba: ColorRGBA) {.inline.} = if image.inside(x, y): image.setRgbaUnsafe(x, y, rgba) -proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) = +proc fillUnsafe*(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) = ## Fills the image data with the parameter color starting at index start and ## continuing for len indices. diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index e5af319..974983c 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -84,7 +84,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask = mask.getValueUnsafe(x * 2 + 0, y * 2 + 1) result.setValueUnsafe(x, y, (value div 4).uint8) -proc fillUnsafe(data: var seq[uint8], value: uint8, start, len: int) = +proc fillUnsafe*(data: var seq[uint8], value: uint8, start, len: int) = ## Fills the mask data with the parameter value starting at index start and ## continuing for len indices. nimSetMem(data[start].addr, value.cint, len) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 9d10e28..7081239 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -382,7 +382,7 @@ proc roundedRect*( h = wh.y s = splineCircleK - maxRadius = min(w/2, h/2) + maxRadius = min(w / 2, h / 2) nw = min(nw, maxRadius) ne = min(ne, maxRadius) se = min(se, maxRadius) @@ -397,14 +397,14 @@ proc roundedRect*( l1 = vec2(x, y + h - sw) l2 = vec2(x, y + nw) - t1h = t1 + vec2(-nw*s, 0) - t2h = t2 + vec2(+ne*s, 0) - r1h = r1 + vec2(0, -ne*s) - r2h = r2 + vec2(0, +se*s) - b1h = b1 + vec2(+se*s, 0) - b2h = b2 + vec2(-sw*s, 0) - l1h = l1 + vec2(0, +sw*s) - l2h = l2 + vec2(0, -nw*s) + t1h = t1 + vec2(-nw * s, 0) + t2h = t2 + vec2(+ne * s, 0) + r1h = r1 + vec2(0, -ne * s) + r2h = r2 + vec2(0, +se * s) + b1h = b1 + vec2(+se * s, 0) + b2h = b2 + vec2(-sw * s, 0) + l1h = l1 + vec2(0, +sw * s) + l2h = l2 + vec2(0, -nw * s) if clockwise: path.moveTo(t1) @@ -865,13 +865,11 @@ proc partitionSegments( segmentCount += shape.len - 1 let - maxPartitions = max(1, height div 10) - numPartitions = min(maxPartitions, max(1, segmentCount div 10)) - - result.setLen(numPartitions) - - let partitionHeight = height div numPartitions + maxPartitions = max(1, height div 10).uint32 + numPartitions = min(maxPartitions, max(1, segmentCount div 10).uint32) + partitionHeight = (height.uint32 div numPartitions) + var partitions = newSeq[seq[(Segment, int16)]](numPartitions) for shape in shapes: for segment in shape.segments: if segment.at.y == segment.to.y: # Skip horizontal @@ -884,41 +882,46 @@ proc partitionSegments( winding = -1 if partitionHeight == 0: - result[0].add((segment, winding)) + partitions[0].add((segment, winding)) else: - let - atPartition = max(0, segment.at.y).int div partitionHeight - toPartition = max(0, ceil(segment.to.y)).int div partitionHeight - for i in min(atPartition, result.high) .. min(toPartition, result.high): - result[i].add((segment, winding)) + var + atPartition = max(0, segment.at.y).uint32 div partitionHeight + toPartition = max(0, ceil(segment.to.y)).uint32 div partitionHeight + atPartition = clamp(atPartition, 0, partitions.high.uint32) + toPartition = clamp(toPartition, 0, partitions.high.uint32) + for i in atPartition .. toPartition: + partitions[i].add((segment, winding)) + + partitions proc computeCoverages( coverages: var seq[uint8], hits: var seq[(float32, int16)], + numHits: var int, size: Vec2, y: int, partitions: seq[seq[(Segment, int16)]], + partitionHeight: uint32, windingRule: WindingRule -) = +) {.inline.} = const quality = 5 # Must divide 255 cleanly (1, 3, 5, 15, 17, 51, 85) - sampleCoverage = 255.uint8 div quality - ep = 0.0001 * PI + sampleCoverage = (255 div quality).uint8 offset = 1 / quality.float32 initialOffset = offset / 2 - var numHits: int - let - partitionHeight = size.y.int div partitions.len partition = if partitionHeight == 0: - 0 + 0.uint32 else: - min(y div partitionHeight, partitions.high) + min(y.uint32 div partitionHeight, partitions.high.uint32) + + zeroMem(coverages[0].addr, coverages.len) # Do scanlines for this row for m in 0 ..< quality: + const ep = 0.0001 * PI let yLine = y.float32 + initialOffset + offset * m.float32 + ep scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine)) @@ -940,9 +943,9 @@ proc computeCoverages( for i in 0 ..< numHits: let (at, winding) = hits[i] - var - fillStart = x.int - leftCover = if at.int - x.int > 0: trunc(x) + 1 - x else: at - x + var fillStart = x.int + + let leftCover = if at.int - x.int > 0: trunc(x) + 1 - x else: at - x if leftCover != 0: inc fillStart if shouldFill(windingRule, count): @@ -978,7 +981,9 @@ proc fillShapes( windingRule: WindingRule, blendMode: BlendMode ) = - let partitions = partitionSegments(shapes, image.height) + let + partitions = partitionSegments(shapes, image.height) + partitionHeight = image.height.uint32 div partitions.len.uint32 # Figure out the total bounds of all the shapes, # rasterize only within the total bounds @@ -995,17 +1000,17 @@ proc fillShapes( var coverages = newSeq[uint8](image.width) hits = newSeq[(float32, int16)](4) + numHits: int for y in startY ..< stopY: - # Reset buffer for this row - zeroMem(coverages[0].addr, coverages.len) - computeCoverages( coverages, hits, + numHits, image.wh, y, partitions, + partitionHeight, windingRule ) @@ -1023,34 +1028,45 @@ proc fillShapes( var coverage = mm_loadu_si128(coverages[x].addr) coverage = mm_and_si128(coverage, first32) - let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) + let + index = image.dataIndex(x, y) + eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) if mm_movemask_epi8(eqZero) != 0xffff: # If the coverages are not all zero - var source = vColor - - if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) != 0xffff: - # If the coverages are not all 255 + if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) == 0xffff: + # Coverages are all 255 + if color.a == 255 and blendMode == bmNormal: + mm_storeu_si128(image.data[index].addr, vColor) + else: + let backdrop = mm_loadu_si128(image.data[index].addr) + mm_storeu_si128( + image.data[index].addr, + blenderSimd(backdrop, vColor) + ) + else: + # Coverages are not all 255 coverage = unpackAlphaValues(coverage) # Shift the coverages from `a` to `g` and `a` for multiplying coverage = mm_or_si128(coverage, mm_srli_epi32(coverage, 16)) var - colorEven = mm_slli_epi16(source, 8) - colorOdd = mm_and_si128(source, oddMask) + source = vColor + sourceEven = mm_slli_epi16(source, 8) + sourceOdd = mm_and_si128(source, oddMask) - colorEven = mm_mulhi_epu16(colorEven, coverage) - colorOdd = mm_mulhi_epu16(colorOdd, coverage) + sourceEven = mm_mulhi_epu16(sourceEven, coverage) + sourceOdd = mm_mulhi_epu16(sourceOdd, coverage) - colorEven = mm_srli_epi16(mm_mulhi_epu16(colorEven, div255), 7) - colorOdd = mm_srli_epi16(mm_mulhi_epu16(colorOdd, div255), 7) + sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7) + sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7) - source = mm_or_si128(colorEven, mm_slli_epi16(colorOdd, 8)) - - let - index = image.dataIndex(x, y) - backdrop = mm_loadu_si128(image.data[index].addr) - mm_storeu_si128(image.data[index].addr, blenderSimd(backdrop, source)) + source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8)) + let backdrop = mm_loadu_si128(image.data[index].addr) + mm_storeu_si128( + image.data[index].addr, + blenderSimd(backdrop, source) + ) x += 4 while x < image.width: @@ -1069,8 +1085,12 @@ proc fillShapes( source.b = ((color.b.uint16 * coverage) div 255).uint8 source.a = ((color.a.uint16 * coverage) div 255).uint8 - let backdrop = image.getRgbaUnsafe(x, y) - image.setRgbaUnsafe(x, y, blender(backdrop, source)) + if source.a == 255 and blendMode == bmNormal: + # Skip blending + image.setRgbaUnsafe(x, y, source) + else: + let backdrop = image.getRgbaUnsafe(x, y) + image.setRgbaUnsafe(x, y, blender(backdrop, source)) inc x proc fillShapes( @@ -1078,7 +1098,9 @@ proc fillShapes( shapes: seq[seq[Vec2]], windingRule: WindingRule ) = - let partitions = partitionSegments(shapes, mask.height) + let + partitions = partitionSegments(shapes, mask.height) + partitionHeight = mask.height.uint32 div partitions.len.uint32 # Figure out the total bounds of all the shapes, # rasterize only within the total bounds @@ -1088,24 +1110,23 @@ proc fillShapes( startY = max(0, bounds.y.int) stopY = min(mask.height, (bounds.y + bounds.h).int) - var - coverages = newSeq[uint8](mask.width) - hits = newSeq[(float32, int16)](4) - - when defined(amd64) and not defined(pixieNoSimd): let maskerSimd = bmNormal.maskerSimd() - for y in startY ..< stopY: - # Reset buffer for this row - zeroMem(coverages[0].addr, coverages.len) + var + coverages = newSeq[uint8](mask.width) + hits = newSeq[(float32, int16)](4) + numHits: int + for y in startY ..< stopY: computeCoverages( coverages, hits, + numHits, mask.wh, y, partitions, + partitionHeight, windingRule )