diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim
index 9f0e652..4a5e49b 100644
--- a/src/pixie/blends.nim
+++ b/src/pixie/blends.nim
@@ -11,8 +11,8 @@ when defined(amd64) and allowSimd:
 type
   Blender* = proc(backdrop, source: ColorRGBX): ColorRGBX {.gcsafe, raises: [].}
     ## Function signature returned by blender.
-  Masker* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
-    ## Function signature returned by masker.
+  MaskBlender* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
+    ## Function signature returned by maskBlender.
 
 when defined(release):
   {.push checks: off.}
@@ -484,29 +484,29 @@ proc maskBlendExclude*(backdrop, source: uint8): uint8 {.inline.} =
   ## Exclude blend masks
   max(backdrop, source) - min(backdrop, source)
 
-proc maskBlendNormalMasker(backdrop, source: uint8): uint8 =
+proc maskBlendNormalMaskBlender(backdrop, source: uint8): uint8 =
   maskBlendNormal(backdrop, source)
 
-proc maskBlendMaskMasker(backdrop, source: uint8): uint8 =
+proc maskBlendMaskMaskBlender(backdrop, source: uint8): uint8 =
   maskBlendMask(backdrop, source)
 
-proc maskBlendSubtractMasker(backdrop, source: uint8): uint8 =
+proc maskBlendSubtractMaskBlender(backdrop, source: uint8): uint8 =
   maskBlendSubtract(backdrop, source)
 
-proc maskBlendExcludeMasker(backdrop, source: uint8): uint8 =
+proc maskBlendExcludeMaskBlender(backdrop, source: uint8): uint8 =
   maskBlendExclude(backdrop, source)
 
-proc maskBlendOverwriteMasker(backdrop, source: uint8): uint8 =
+proc maskBlendOverwriteMaskBlender(backdrop, source: uint8): uint8 =
   source
 
-proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
+proc maskBlender*(blendMode: BlendMode): MaskBlender {.raises: [PixieError].} =
   ## Returns a blend masking function for a given blend masking mode.
   case blendMode:
-  of NormalBlend: maskBlendNormalMasker
-  of MaskBlend: maskBlendMaskMasker
-  of OverwriteBlend: maskBlendOverwriteMasker
-  of SubtractMaskBlend: maskBlendSubtractMasker
-  of ExcludeMaskBlend: maskBlendExcludeMasker
+  of NormalBlend: maskBlendNormalMaskBlender
+  of MaskBlend: maskBlendMaskMaskBlender
+  of OverwriteBlend: maskBlendOverwriteMaskBlender
+  of SubtractMaskBlend: maskBlendSubtractMaskBlender
+  of ExcludeMaskBlend: maskBlendExcludeMaskBlender
   else:
     raise newException(PixieError, "No masker for " & $blendMode)
 
@@ -647,24 +647,63 @@ when defined(amd64) and allowSimd:
 
     mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
 
-  proc maskBlendNormalSimdMasker(backdrop, source: M128i): M128i =
+  proc maskBlendSubtractSimd*(backdrop, source: M128i): M128i {.inline.} =
+    let
+      oddMask = mm_set1_epi16(cast[int16](0xff00))
+      vec255 = mm_set1_epi8(255)
+      div255 = mm_set1_epi16(cast[int16](0x8081))
+
+    let sourceMinus255 = mm_sub_epi8(vec255, source)
+
+    var
+      multiplierEven = mm_slli_epi16(sourceMinus255, 8)
+      multiplierOdd = mm_and_si128(sourceMinus255, oddMask)
+      backdropEven = mm_slli_epi16(backdrop, 8)
+      backdropOdd = mm_and_si128(backdrop, oddMask)
+
+    backdropEven = mm_mulhi_epu16(backdropEven, multiplierEven)
+    backdropOdd = mm_mulhi_epu16(backdropOdd, multiplierOdd)
+
+    backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
+    backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
+
+    mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
+
+  proc maskBlendExcludeSimd*(backdrop, source: M128i): M128i {.inline.} =
+    mm_sub_epi8(mm_max_epu8(backdrop, source), mm_min_epu8(backdrop, source))
+
+  proc maskBlendNormalSimdMaskBlender(backdrop, source: M128i): M128i =
     maskBlendNormalSimd(backdrop, source)
 
-  proc maskBlendMaskSimdMasker(backdrop, source: M128i): M128i =
+  proc maskBlendMaskSimdMaskBlender(backdrop, source: M128i): M128i =
     maskBlendMaskSimd(backdrop, source)
 
-  proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
+  proc maskBlendExcludeSimdMaskBlender(backdrop, source: M128i): M128i =
+    maskBlendExcludeSimd(backdrop, source)
+
+  proc maskBlendSubtractSimdMaskBlender(backdrop, source: M128i): M128i =
+    maskBlendSubtractSimd(backdrop, source)
+
+  proc maskBlenderSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
     ## Returns a blend masking function with SIMD support.
     case blendMode:
-    of NormalBlend: maskBlendNormalSimdMasker
-    of MaskBlend: maskBlendMaskSimdMasker
+    of NormalBlend: maskBlendNormalSimdMaskBlender
+    of MaskBlend: maskBlendMaskSimdMaskBlender
     of OverwriteBlend: overwriteSimdBlender
+    of SubtractMaskBlend: maskBlendSubtractSimdMaskBlender
+    of ExcludeMaskBlend: maskBlendExcludeSimdMaskBlender
     else:
       raise newException(PixieError, "No SIMD masker for " & $blendMode)
 
-  proc hasSimdMasker*(blendMode: BlendMode): bool {.inline, raises: [].} =
+  proc hasSimdMaskBlender*(blendMode: BlendMode): bool {.inline, raises: [].} =
     ## Is there a blend masking function with SIMD support?
-    blendMode in {NormalBlend, MaskBlend, OverwriteBlend}
+    blendMode in {
+      NormalBlend,
+      MaskBlend,
+      OverwriteBlend,
+      SubtractMaskBlend,
+      ExcludeMaskBlend
+    }
 
 when defined(release):
   {.pop.}
diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index 1f0e26d..ac82d87 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -714,7 +714,7 @@ proc drawUber(
   when type(a) is Image:
     let blender = blendMode.blender()
   else: # a is a Mask
-    let masker = blendMode.masker()
+    let maskBlender = blendMode.maskBlender()
 
   if blendMode == MaskBlend:
     if yMin > 0:
@@ -777,7 +777,7 @@ proc drawUber(
             let sample = b.getRgbaSmooth(srcPos.x, srcPos.y).a
           else: # b is a Mask
             let sample = b.getValueSmooth(srcPos.x, srcPos.y)
-          a.unsafe[x, y] = masker(backdrop, sample)
+          a.unsafe[x, y] = maskBlender(backdrop, sample)
 
         srcPos += dx
 
@@ -972,8 +972,8 @@ proc drawUber(
                   x += 16
                   sx += 16
             else: # is a Mask
-              if blendMode.hasSimdMasker():
-                let maskerSimd = blendMode.maskerSimd()
+              if blendMode.hasSimdMaskBlender():
+                let maskerSimd = blendMode.maskBlenderSimd()
                 for _ in 0 ..< (xStop - xStart) div 16:
                   let backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
                   when type(b) is Image:
@@ -1089,7 +1089,7 @@ proc drawUber(
               let sample = b.unsafe[samplePos.x, samplePos.y].a
             else: # b is a Mask
               let sample = b.unsafe[samplePos.x, samplePos.y]
-            a.unsafe[x, y] = masker(backdrop, sample)
+            a.unsafe[x, y] = maskBlender(backdrop, sample)
           srcPos += dx
 
     if blendMode == MaskBlend:
diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim
index a75582b..d1f2637 100644
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@@ -1223,6 +1223,20 @@ iterator walk(
     if prevAt != width.float32.fixed32 and count != 0:
       echo "Leak detected: ", count, " @ (", prevAt, ", ", y, ")"
 
+iterator walkInteger(
+  hits: seq[(int32, int16)],
+  numHits: int,
+  windingRule: WindingRule,
+  y, width: int
+): (int, int) =
+  for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
+    let
+      fillStart = prevAt.integer
+      fillLen = at.integer - fillStart
+    if fillLen <= 0:
+      continue
+    yield (fillStart, fillLen)
+
 proc computeCoverage(
   coverages: ptr UncheckedArray[uint8],
   hits: var seq[(Fixed32, int16)],
@@ -1443,9 +1457,9 @@ proc fillCoverage(
 ) =
   var x = startX
   when defined(amd64) and allowSimd:
-    if blendMode.hasSimdMasker():
+    if blendMode.hasSimdMaskBlender():
       let
-        maskerSimd = blendMode.maskerSimd()
+        maskerSimd = blendMode.maskBlenderSimd()
         vecZero = mm_setzero_si128()
       for _ in 0 ..< coverages.len div 16:
         let
@@ -1465,7 +1479,7 @@ proc fillCoverage(
           mm_storeu_si128(mask.data[index].addr, vecZero)
         x += 16
 
-  let masker = blendMode.masker()
+  let maskBlender = blendMode.maskBlender()
   for x in x ..< startX + coverages.len:
     let coverage = coverages[x - startX]
     if coverage != 0 or blendMode == ExcludeMaskBlend:
@@ -1473,7 +1487,7 @@ proc fillCoverage(
         mask.unsafe[x, y] = coverage
       else:
         let backdrop = mask.unsafe[x, y]
-        mask.unsafe[x, y] = masker(backdrop, coverage)
+        mask.unsafe[x, y] = maskBlender(backdrop, coverage)
     elif blendMode == MaskBlend:
       mask.unsafe[x, y] = 0
 
@@ -1481,22 +1495,6 @@ proc fillCoverage(
     mask.clearUnsafe(0, y, startX, y)
     mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
 
-template walkHits(
-  hits: seq[(int32, int16)],
-  numHits: int,
-  windingRule: WindingRule,
-  y, width: int,
-  inner: untyped
-) =
-  for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
-    let
-      fillStart {.inject.} = prevAt.integer
-      fillLen {.inject.} = at.integer - fillStart
-    if fillLen <= 0:
-      continue
-
-    inner
-
 proc fillHits(
   image: Image,
   rgbx: ColorRGBX,
@@ -1506,38 +1504,36 @@ proc fillHits(
   windingRule: WindingRule,
   blendMode: BlendMode
 ) =
-  template simdBlob(image: Image, x: var int, blendProc: untyped) =
+  template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) =
     when allowSimd:
       when defined(amd64):
         let colorVec = mm_set1_epi32(cast[int32](rgbx))
-        for _ in 0 ..< fillLen div 4:
-          let
-            index = image.dataIndex(x, y)
-            backdrop = mm_loadu_si128(image.data[index].addr)
-          mm_storeu_si128(image.data[index].addr, blendProc(backdrop, colorVec))
+        for _ in 0 ..< len div 4:
+          let backdrop = mm_loadu_si128(image.unsafe[x, y].addr)
+          mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec))
           x += 4
 
   case blendMode:
   of OverwriteBlend:
-    walkHits hits, numHits, windingRule, y, image.width:
-      fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
+      fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
 
   of NormalBlend:
-    walkHits hits, numHits, windingRule, y, image.width:
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
       if rgbx.a == 255:
-        fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
+        fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
       else:
-        var x = fillStart
-        simdBlob(image, x, blendNormalSimd)
-        for x in x ..< fillStart + fillLen:
+        var x = start
+        simdBlob(image, x, len, blendNormalSimd)
+        for x in x ..< start + len:
           let backdrop = image.unsafe[x, y]
           image.unsafe[x, y] = blendNormal(backdrop, rgbx)
 
   of MaskBlend:
     var filledTo = startX
-    walkHits hits, numHits, windingRule, y, image.width:
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
       block: # Clear any gap between this fill and the previous fill
-        let gapBetween = fillStart - filledTo
+        let gapBetween = start - filledTo
         if gapBetween > 0:
           fillUnsafe(
             image.data,
@@ -1545,12 +1541,12 @@ proc fillHits(
             image.dataIndex(filledTo, y),
             gapBetween
           )
-        filledTo = fillStart + fillLen
+        filledTo = start + len
       block: # Handle this fill
         if rgbx.a != 255:
-          var x = fillStart
-          simdBlob(image, x, blendMaskSimd)
-          for x in x ..< fillStart + fillLen:
+          var x = start
+          simdBlob(image, x, len, blendMaskSimd)
+          for x in x ..< start + len:
             let backdrop = image.unsafe[x, y]
             image.unsafe[x, y] = blendMask(backdrop, rgbx)
 
@@ -1559,8 +1555,8 @@ proc fillHits(
 
   else:
     let blender = blendMode.blender()
-    walkHits hits, numHits, windingRule, y, image.width:
-      for x in fillStart ..< fillStart + fillLen:
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
+      for x in start ..< start + len:
         let backdrop = image.unsafe[x, y]
         image.unsafe[x, y] = blender(backdrop, rgbx)
 
@@ -1572,31 +1568,44 @@ proc fillHits(
   windingRule: WindingRule,
   blendMode: BlendMode
 ) =
+  template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) =
+    when allowSimd:
+      when defined(amd64):
+        let vec255 = mm_set1_epi8(255)
+        for _ in 0 ..< len div 16:
+          let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
+          mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255))
+          x += 16
+
   case blendMode:
   of NormalBlend, OverwriteBlend:
-    walkHits hits, numHits, windingRule, y, mask.width:
-      fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen)
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
+      fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len)
 
   of MaskBlend:
     var filledTo = startX
-    walkHits hits, numHits, windingRule,y,  mask.width:
-      let gapBetween = fillStart - filledTo
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
+      let gapBetween = start - filledTo
       if gapBetween > 0:
         fillUnsafe(mask.data, 0, mask.dataIndex(filledTo, y), gapBetween)
-      filledTo = fillStart + fillLen
+      filledTo = start + len
 
     mask.clearUnsafe(0, y, startX, y)
     mask.clearUnsafe(filledTo, y, mask.width, y)
 
   of SubtractMaskBlend:
-    walkHits hits, numHits, windingRule, y, mask.width:
-      for x in fillStart ..< fillStart + fillLen:
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
+      var x = start
+      simdBlob(mask, x, len, maskBlendSubtractSimd)
+      for x in x ..< start + len:
         let backdrop = mask.unsafe[x, y]
         mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255)
 
   of ExcludeMaskBlend:
-    walkHits hits, numHits, windingRule, y, mask.width:
-      for x in fillStart ..< fillStart + fillLen:
+    for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
+      var x = start
+      simdBlob(mask, x, len, maskBlendExcludeSimd)
+      for x in x ..< start + len:
         let backdrop = mask.unsafe[x, y]
         mask.unsafe[x, y] = maskBlendExclude(backdrop, 255)