From 9e132b64575fccdfbd6d68f622905ee8dbec0180 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 16:54:32 -0500
Subject: [PATCH 1/9] small stuff

---
 experiments/benchmark_cairo.nim |  8 ++++----
 src/pixie/images.nim            |  3 +--
 tests/benchmark_png.nim         | 17 +++++++++++++++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim
index 02c0677..8c4e71e 100644
--- a/experiments/benchmark_cairo.nim
+++ b/experiments/benchmark_cairo.nim
@@ -1,7 +1,7 @@
 import benchy, cairo, chroma, math, pixie
 
 block:
-  var
+  let
     surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
     ctx = surface.create()
 
@@ -19,7 +19,7 @@ block:
 
   # discard surface.writeToPng("cairo1.png")
 
-  var a = newImage(1920, 1080)
+  let a = newImage(1920, 1080)
   a.fill(rgba(255, 255, 255, 255))
 
   timeIt "pixie1":
@@ -34,7 +34,7 @@ block:
   # a.writeFile("pixie1.png")
 
 block:
-  var
+  let
     surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
     ctx = surface.create()
 
@@ -52,7 +52,7 @@ block:
 
   # discard surface.writeToPng("cairo2.png")
 
-  var a = newImage(1920, 1080)
+  let a = newImage(1920, 1080)
   a.fill(rgba(255, 255, 255, 255))
 
   timeIt "pixie2":
diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index bf174b7..7097630 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -709,7 +709,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
     else:
       var x = xMin
       when defined(amd64) and not defined(pixieNoSimd):
-        if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1:
+        if dx == vec2(1, 0) and dy == vec2(0, 1):
           # Check we are not rotated before using SIMD blends
           when type(a) is Image:
             if blendMode.hasSimdBlender():
@@ -732,7 +732,6 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
                   blenderSimd(backdrop, source)
                 )
                 x += 4
-
           else: # is a Mask
             if blendMode.hasSimdMasker():
               let maskerSimd = blendMode.maskerSimd()
diff --git a/tests/benchmark_png.nim b/tests/benchmark_png.nim
index eb0b120..ec13a7e 100644
--- a/tests/benchmark_png.nim
+++ b/tests/benchmark_png.nim
@@ -1,7 +1,9 @@
-import benchy, nimPNG, pixie/fileformats/png, stb_image/read as stbi,
+import benchy, cairo, nimPNG, pixie/fileformats/png, stb_image/read as stbi,
     stb_image/write as stbr
 
-let data = readFile("tests/images/png/lenna.png")
+let
+  filePath = "tests/images/png/lenna.png"
+  data = readFile(filePath)
 
 timeIt "pixie decode":
   keep decodePng(cast[seq[uint8]](data))
@@ -37,3 +39,14 @@ timeIt "stb_image encode":
     stbi.RGBA
   )
   keep writePNG(width, height, channels, decoded).len
+
+timeIt "cairo decode":
+  keep imageSurfaceCreateFromPng(filePath)
+
+timeIt "cairo encode":
+  let decoded = imageSurfaceCreateFromPng(filePath)
+
+  var write: WriteFunc = proc(closure: pointer, data: cstring, len: int32): Status {.cdecl.} =
+    StatusSuccess
+
+  discard decoded.writeToPng(write, nil)

From 205f091012eb22dd1325ea95b27ebd1299336898 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 17:07:14 -0500
Subject: [PATCH 2/9] shorter

---
 src/pixie/images.nim | 22 +++++++++-------------
 src/pixie/masks.nim  | 26 +++++++++++++-------------
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index 7097630..68c1bb4 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -517,16 +517,12 @@ proc getRgbaSmooth*(image: Image, x, y: float32, wrapped = false): ColorRGBX =
   ## Gets a interpolated color with float point coordinates.
   ## Pixes outside the image are transparent.
   let
-    minX = floor(x)
-    minY = floor(y)
-    diffX = x - minX
-    diffY = y - minY
-    x = minX.int
-    y = minY.int
-    x0 = (x + 0)
-    y0 = (y + 0)
-    x1 = (x + 1)
-    y1 = (y + 1)
+    x0 = x.int
+    y0 = y.int
+    x1 = x0 + 1
+    y1 = y0 + 1
+    xFractional = x.fractional
+    yFractional = y.fractional
 
   var x0y0, x1y0, x0y1, x1y1: ColorRGBX
   if wrapped:
@@ -541,10 +537,10 @@ proc getRgbaSmooth*(image: Image, x, y: float32, wrapped = false): ColorRGBX =
     x1y1 = image[x1, y1]
 
   let
-    bottomMix = lerp(x0y0, x1y0, diffX)
-    topMix = lerp(x0y1, x1y1, diffX)
+    bottomMix = lerp(x0y0, x1y0, xFractional)
+    topMix = lerp(x0y1, x1y1, xFractional)
 
-  lerp(bottomMix, topMix, diffY)
+  lerp(bottomMix, topMix, yFractional)
 
 proc drawCorrect(
   a, b: Image | Mask, mat = mat3(), tiled = false, blendMode = bmNormal
diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim
index 9887e6e..30cafcf 100644
--- a/src/pixie/masks.nim
+++ b/src/pixie/masks.nim
@@ -159,22 +159,22 @@ proc fill*(mask: Mask, value: uint8) {.inline.} =
 proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
   ## Gets a interpolated value with float point coordinates.
   let
-    minX = floor(x)
-    minY = floor(y)
-    diffX = x - minX
-    diffY = y - minY
-    x = minX.int
-    y = minY.int
+    x0 = x.int
+    y0 = y.int
+    x1 = x0 + 1
+    y1 = y0 + 1
+    xFractional = x.fractional
+    yFractional = y.fractional
 
-    x0y0 = mask[x + 0, y + 0]
-    x1y0 = mask[x + 1, y + 0]
-    x0y1 = mask[x + 0, y + 1]
-    x1y1 = mask[x + 1, y + 1]
+    x0y0 = mask[x0, y0]
+    x1y0 = mask[x1, y0]
+    x0y1 = mask[x0, y1]
+    x1y1 = mask[x1, y1]
 
-    bottomMix = lerp(x0y0, x1y0, diffX)
-    topMix = lerp(x0y1, x1y1, diffX)
+    bottomMix = lerp(x0y0, x1y0, xFractional)
+    topMix = lerp(x0y1, x1y1, xFractional)
 
-  lerp(bottomMix, topMix, diffY)
+  lerp(bottomMix, topMix, yFractional)
 
 proc spread*(mask: Mask, spread: float32) =
   ## Grows the mask by spread.

From 14cfd34750ef5486a76d28a31217513ccd818f38 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 20:49:05 -0500
Subject: [PATCH 3/9] add benchmark

---
 experiments/benchmark_cairo.nim | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim
index 8c4e71e..1bb3bc0 100644
--- a/experiments/benchmark_cairo.nim
+++ b/experiments/benchmark_cairo.nim
@@ -65,3 +65,32 @@ block:
     a.fillPath(p, rgba(0, 0, 255, 255))
 
   # a.writeFile("pixie2.png")
+
+block:
+  let
+    a = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
+    b = imageSurfaceCreate(FORMAT_ARGB32, 500, 500)
+    ac = a.create()
+    bc = b.create()
+
+  ac.setSourceRgba(1, 0, 0, 1)
+  ac.newPath()
+  ac.rectangle(0, 0, 1000, 1000)
+  ac.fill()
+
+  bc.setSourceRgba(0, 1, 0, 1)
+  bc.newPath()
+  bc.rectangle(0, 0, 500, 500)
+  bc.fill()
+
+  let pattern = patternCreateForSurface(b)
+
+  timeIt "a":
+    ac.setSource(pattern)
+    ac.save()
+    ac.translate(25.2, 25.2)
+    ac.rectangle(0, 0, 500, 500)
+    ac.fill()
+    ac.restore()
+
+  discard a.writeToPng("a.png")

From b1bc2d048cafca8c016cfa8aa1e636e8374d0256 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 22:24:45 -0500
Subject: [PATCH 4/9] name fix

---
 src/pixie/images.nim | 6 +++---
 src/pixie/masks.nim  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index 68c1bb4..868320d 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -537,10 +537,10 @@ proc getRgbaSmooth*(image: Image, x, y: float32, wrapped = false): ColorRGBX =
     x1y1 = image[x1, y1]
 
   let
-    bottomMix = lerp(x0y0, x1y0, xFractional)
-    topMix = lerp(x0y1, x1y1, xFractional)
+    topMix = lerp(x0y0, x1y0, xFractional)
+    bottomMix = lerp(x0y1, x1y1, xFractional)
 
-  lerp(bottomMix, topMix, yFractional)
+  lerp(topMix, bottomMix, yFractional)
 
 proc drawCorrect(
   a, b: Image | Mask, mat = mat3(), tiled = false, blendMode = bmNormal
diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim
index 30cafcf..92ae347 100644
--- a/src/pixie/masks.nim
+++ b/src/pixie/masks.nim
@@ -171,10 +171,10 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
     x0y1 = mask[x0, y1]
     x1y1 = mask[x1, y1]
 
-    bottomMix = lerp(x0y0, x1y0, xFractional)
-    topMix = lerp(x0y1, x1y1, xFractional)
+    topMix = lerp(x0y0, x1y0, xFractional)
+    bottomMix = lerp(x0y1, x1y1, xFractional)
 
-  lerp(bottomMix, topMix, yFractional)
+  lerp(topMix, bottomMix, yFractional)
 
 proc spread*(mask: Mask, spread: float32) =
   ## Grows the mask by spread.

From 42c1a5e2be25e4455ca1baa1de135227f34dc1ae Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 22:30:49 -0500
Subject: [PATCH 5/9] f

---
 src/pixie/images.nim | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index 868320d..b1bfe7e 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -29,8 +29,7 @@ proc newImage*(mask: Mask): Image =
   var i: int
   when defined(amd64) and not defined(pixieNoSimd):
     for _ in countup(0, mask.data.len - 16, 4):
-      let values = mm_loadu_si128(mask.data[i].addr)
-      var alphas = unpackAlphaValues(values)
+      var alphas = unpackAlphaValues(mm_loadu_si128(mask.data[i].addr))
       alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8))
       alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16))
       mm_storeu_si128(result.data[i].addr, alphas)

From fbc6df5fa0e2ef134d1aa0b8d2508675e6d79080 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Fri, 25 Jun 2021 22:41:09 -0500
Subject: [PATCH 6/9] not inline

---
 src/pixie/contexts.nim | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/pixie/contexts.nim b/src/pixie/contexts.nim
index 85c16a7..4bba302 100644
--- a/src/pixie/contexts.nim
+++ b/src/pixie/contexts.nim
@@ -158,7 +158,7 @@ proc stroke(ctx: Context, image: Image, path: Path) =
     ctx.layer.applyOpacity(ctx.globalAlpha)
     ctx.restore()
 
-proc fillText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} =
+proc fillText(ctx: Context, image: Image, text: string, at: Vec2) =
   if ctx.font.typeface == nil:
     raise newException(PixieError, "No font has been set on this Context")
 
@@ -185,7 +185,7 @@ proc fillText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} =
     ctx.layer.applyOpacity(ctx.globalAlpha)
     ctx.restore()
 
-proc strokeText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} =
+proc strokeText(ctx: Context, image: Image, text: string, at: Vec2) =
   if ctx.font.typeface == nil:
     raise newException(PixieError, "No font has been set on this Context")
 
@@ -293,7 +293,7 @@ proc ellipse*(ctx: Context, x, y, rx, ry: float32) {.inline.} =
   ## Adds an ellipse to the current sub-path.
   ctx.path.ellipse(x, y, rx, ry)
 
-proc fill*(ctx: Context, path: Path, windingRule = wrNonZero) {.inline.} =
+proc fill*(ctx: Context, path: Path, windingRule = wrNonZero) =
   ## Fills the path with the current fillStyle.
   if ctx.mask != nil and ctx.layer == nil:
     ctx.saveLayer()
@@ -308,7 +308,7 @@ proc fill*(ctx: Context, windingRule = wrNonZero) {.inline.} =
   ## Fills the current path with the current fillStyle.
   ctx.fill(ctx.path, windingRule)
 
-proc clip*(ctx: Context, path: Path, windingRule = wrNonZero) {.inline.} =
+proc clip*(ctx: Context, path: Path, windingRule = wrNonZero) =
   ## Turns the path into the current clipping region. The previous clipping
   ## region, if any, is intersected with the current or given path to create
   ## the new clipping region.
@@ -326,7 +326,7 @@ proc clip*(ctx: Context, windingRule = wrNonZero) {.inline.} =
   ## to create the new clipping region.
   ctx.clip(ctx.path, windingRule)
 
-proc stroke*(ctx: Context, path: Path) {.inline.} =
+proc stroke*(ctx: Context, path: Path) =
   ## Strokes (outlines) the current or given path with the current strokeStyle.
   if ctx.mask != nil and ctx.layer == nil:
     ctx.saveLayer()

From 0656d7566a0e9f2f1437d6f6fe64119e1c27d144 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Sat, 26 Jun 2021 20:46:10 -0500
Subject: [PATCH 7/9] f

---
 src/pixie/paths.nim | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim
index e3f8bd3..ab0d9d3 100644
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@@ -26,8 +26,8 @@ type
 
   PathCommand* = object
     ## Binary version of an SVG command.
-    kind*: PathCommandKind
-    numbers*: seq[float32]
+    kind: PathCommandKind
+    numbers: seq[float32]
 
   Path* = object
     ## Used to hold paths and create paths.

From 60f5801b71c96674735c5236a71eab9a2ec989aa Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Sat, 26 Jun 2021 23:11:18 -0500
Subject: [PATCH 8/9] fill paths faster

---
 src/pixie/paths.nim | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim
index ab0d9d3..dc5a8e3 100644
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@@ -1152,7 +1152,7 @@ proc computeCoverages(
   hits: var seq[(float32, int16)],
   numHits: var int,
   size: Vec2,
-  y: int,
+  y, startX: int,
   aa: bool,
   partitioning: Partitioning,
   windingRule: WindingRule
@@ -1206,12 +1206,12 @@ proc computeCoverages(
               at - prevAt
         if leftCover != 0:
           inc fillStart
-          coverages[prevAt.int] += (leftCover * sampleCoverage.float32).uint8
+          coverages[prevAt.int - startX] += (leftCover * sampleCoverage.float32).uint8
 
         if pixelCrossed:
           let rightCover = at - trunc(at)
           if rightCover > 0:
-            coverages[at.int] += (rightCover * sampleCoverage.float32).uint8
+            coverages[at.int - startX] += (rightCover * sampleCoverage.float32).uint8
 
         let fillLen = at.int - fillStart
         if fillLen > 0:
@@ -1219,12 +1219,12 @@ proc computeCoverages(
           when defined(amd64) and not defined(pixieNoSimd):
             let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
             for j in countup(i, fillStart + fillLen - 16, 16):
-              var coverage = mm_loadu_si128(coverages[j].addr)
+              var coverage = mm_loadu_si128(coverages[j - startX].addr)
               coverage = mm_add_epi8(coverage, vSampleCoverage)
-              mm_storeu_si128(coverages[j].addr, coverage)
+              mm_storeu_si128(coverages[j - startX].addr, coverage)
               i += 16
           for j in i ..< fillStart + fillLen:
-            coverages[j] += sampleCoverage
+            coverages[j - startX] += sampleCoverage
 
 proc clearUnsafe(target: Image | Mask, startX, startY, toX, toY: int) =
   ## Clears data from [start, to).
@@ -1253,8 +1253,8 @@ proc fillCoverage(
         oddMask = mm_set1_epi16(cast[int16](0xff00))
         div255 = mm_set1_epi16(cast[int16](0x8081))
         vColor = mm_set1_epi32(cast[int32](rgbx))
-      for _ in countup(x, image.width - 16, 4):
-        var coverage = mm_loadu_si128(coverages[x].unsafeAddr)
+      for _ in countup(x, startX + coverages.len - 16, 4):
+        var coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
         coverage = mm_and_si128(coverage, first32)
 
         let
@@ -1301,8 +1301,8 @@ proc fillCoverage(
         x += 4
 
   let blender = blendMode.blender()
-  while x < image.width:
-    let coverage = coverages[x]
+  while x < startX + coverages.len:
+    let coverage = coverages[x - startX]
     if coverage != 0 or blendMode == bmExcludeMask:
       if blendMode == bmNormal and coverage == 255 and rgbx.a == 255:
         # Skip blending
@@ -1322,6 +1322,7 @@ proc fillCoverage(
 
   if blendMode == bmMask:
     image.clearUnsafe(0, y, startX, y)
+    image.clearUnsafe(startX + coverages.len, y, image.width, y)
 
 proc fillCoverage(
   mask: Mask,
@@ -1333,10 +1334,10 @@ proc fillCoverage(
   when defined(amd64) and not defined(pixieNoSimd):
     if blendMode.hasSimdMasker():
       let maskerSimd = blendMode.maskerSimd()
-      for _ in countup(x, coverages.len - 16, 16):
+      for _ in countup(x, startX + coverages.len - 16, 16):
         let
           index = mask.dataIndex(x, y)
-          coverage = mm_loadu_si128(coverages[x].unsafeAddr)
+          coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
           eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
         if mm_movemask_epi8(eqZero) != 0xffff: # or blendMode == bmExcludeMask:
           # If the coverages are not all zero
@@ -1350,8 +1351,8 @@ proc fillCoverage(
         x += 16
 
   let masker = blendMode.masker()
-  while x < mask.width:
-    let coverage = coverages[x]
+  while x < startX + coverages.len:
+    let coverage = coverages[x - startX]
     if coverage != 0 or blendMode == bmExcludeMask:
       let backdrop = mask.getValueUnsafe(x, y)
       mask.setValueUnsafe(x, y, masker(backdrop, coverage))
@@ -1361,6 +1362,7 @@ proc fillCoverage(
 
   if blendMode == bmMask:
     mask.clearUnsafe(0, y, startX, y)
+    mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
 
 proc fillHits(
   image: Image,
@@ -1476,7 +1478,7 @@ proc fillShapes(
     partitioning = partitionSegments(segments, startY, pathHeight - startY)
 
   var
-    coverages = newSeq[uint8](image.width)
+    coverages = newSeq[uint8](bounds.w.int)
     hits = newSeq[(float32, int16)](4)
     numHits: int
 
@@ -1487,6 +1489,7 @@ proc fillShapes(
       numHits,
       image.wh,
       y,
+      startX,
       aa,
       partitioning,
       windingRule
@@ -1532,7 +1535,7 @@ proc fillShapes(
     partitioning = partitionSegments(segments, startY, pathHeight)
 
   var
-    coverages = newSeq[uint8](mask.width)
+    coverages = newSeq[uint8](bounds.w.int)
     hits = newSeq[(float32, int16)](4)
     numHits: int
 
@@ -1543,6 +1546,7 @@ proc fillShapes(
       numHits,
       mask.wh,
       y,
+      startX,
       aa,
       partitioning,
       windingRule

From c765026b672d3690da4db7bf682a6743f57ae1f7 Mon Sep 17 00:00:00 2001
From: Ryan Oldenburg <ryan@guzba.com>
Date: Sat, 26 Jun 2021 23:16:35 -0500
Subject: [PATCH 9/9] morepretty

---
 src/pixie/masks.nim     | 20 +++++++++++---------
 src/pixie/paths.nim     |  6 ++++--
 tests/benchmark_png.nim |  5 +++--
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim
index 92ae347..e81d16a 100644
--- a/src/pixie/masks.nim
+++ b/src/pixie/masks.nim
@@ -81,7 +81,9 @@ proc minifyBy2*(mask: Mask, power = 1): Mask =
       when defined(amd64) and not defined(pixieNoSimd):
         let
           oddMask = mm_set1_epi16(cast[int16](0xff00))
-          first8 = cast[M128i]([uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+          firstByte = cast[M128i](
+            [uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+          )
         for _ in countup(0, result.width - 16, 8):
           let
             top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
@@ -114,14 +116,14 @@ proc minifyBy2*(mask: Mask, power = 1): Mask =
 
             # merged has the correct values in the even indices
 
-            a = mm_and_si128(merged, first8)
-            b = mm_and_si128(mm_srli_si128(merged, 2), first8)
-            c = mm_and_si128(mm_srli_si128(merged, 4), first8)
-            d = mm_and_si128(mm_srli_si128(merged, 6), first8)
-            e = mm_and_si128(mm_srli_si128(merged, 8), first8)
-            f = mm_and_si128(mm_srli_si128(merged, 10), first8)
-            g = mm_and_si128(mm_srli_si128(merged, 12), first8)
-            h = mm_and_si128(mm_srli_si128(merged, 14), first8)
+            a = mm_and_si128(merged, firstByte)
+            b = mm_and_si128(mm_srli_si128(merged, 2), firstByte)
+            c = mm_and_si128(mm_srli_si128(merged, 4), firstByte)
+            d = mm_and_si128(mm_srli_si128(merged, 6), firstByte)
+            e = mm_and_si128(mm_srli_si128(merged, 8), firstByte)
+            f = mm_and_si128(mm_srli_si128(merged, 10), firstByte)
+            g = mm_and_si128(mm_srli_si128(merged, 12), firstByte)
+            h = mm_and_si128(mm_srli_si128(merged, 14), firstByte)
 
             ab = mm_or_si128(a, mm_slli_si128(b, 1))
             cd = mm_or_si128(c, mm_slli_si128(d, 1))
diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim
index dc5a8e3..c911417 100644
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@@ -1206,12 +1206,14 @@ proc computeCoverages(
               at - prevAt
         if leftCover != 0:
           inc fillStart
-          coverages[prevAt.int - startX] += (leftCover * sampleCoverage.float32).uint8
+          coverages[prevAt.int - startX] +=
+            (leftCover * sampleCoverage.float32).uint8
 
         if pixelCrossed:
           let rightCover = at - trunc(at)
           if rightCover > 0:
-            coverages[at.int - startX] += (rightCover * sampleCoverage.float32).uint8
+            coverages[at.int - startX] +=
+              (rightCover * sampleCoverage.float32).uint8
 
         let fillLen = at.int - fillStart
         if fillLen > 0:
diff --git a/tests/benchmark_png.nim b/tests/benchmark_png.nim
index ec13a7e..637db62 100644
--- a/tests/benchmark_png.nim
+++ b/tests/benchmark_png.nim
@@ -46,7 +46,8 @@ timeIt "cairo decode":
 timeIt "cairo encode":
   let decoded = imageSurfaceCreateFromPng(filePath)
 
-  var write: WriteFunc = proc(closure: pointer, data: cstring, len: int32): Status {.cdecl.} =
-    StatusSuccess
+  var write: WriteFunc =
+    proc(closure: pointer, data: cstring, len: int32): Status {.cdecl.} =
+      StatusSuccess
 
   discard decoded.writeToPng(write, nil)