diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim
index 4b4553e..abcba6b 100644
--- a/src/pixie/blends.nim
+++ b/src/pixie/blends.nim
@@ -1,9 +1,6 @@
 ## Blending modes.
 import chroma, math, common
 
-when defined(amd64) and not defined(pixieNoSimd):
-  import nimsimd/sse2
-
 # See https://www.w3.org/TR/compositing-1/
 # See https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_blend_equation_advanced.txt
 
@@ -34,12 +31,20 @@ type
     bmIntersectMask
     bmExcludeMask
 
-  Blender* = proc(a, b: ColorRGBA): ColorRGBA
+  Blender* = proc(backdrop, source: ColorRGBA): ColorRGBA
+
+when defined(amd64) and not defined(pixieNoSimd):
+  import nimsimd/sse2
+
+  type BlenderSimd* = proc(blackdrop, source: M128i): M128i
 
 when defined(release):
   {.push checks: off.}
 
-proc blendNormalPremultiplied*(backdrop, source: ColorRGBA): ColorRGBA {.inline.} =
+proc blendAlpha(backdrop, source: uint8): uint8 {.inline.} =
+  source + ((backdrop.uint32 * (255 - source)) div 255).uint8
+
+proc blendNormalPremultiplied*(backdrop, source: ColorRGBA): ColorRGBA =
   if backdrop.a == 0:
     return source
   if source.a == 255:
@@ -51,17 +56,21 @@ proc blendNormalPremultiplied*(backdrop, source: ColorRGBA): ColorRGBA {.inline.
   result.r = source.r + ((backdrop.r.uint32 * k) div 255).uint8
   result.g = source.g + ((backdrop.g.uint32 * k) div 255).uint8
   result.b = source.b + ((backdrop.b.uint32 * k) div 255).uint8
-  result.a = source.a + ((backdrop.a.uint32 * k) div 255).uint8
+  result.a = blendAlpha(backdrop.a, source.a)
+
+proc blenderPremultiplied*(blendMode: BlendMode): Blender =
+  case blendMode:
+  of bmNormal: blendNormalPremultiplied
+  else:
+    raise newException(PixieError, "No premultiplied blender for " & $blendMode)
 
 when defined(amd64) and not defined(pixieNoSimd):
-  proc blendNormalPremultiplied*(backdrop, source: M128i): M128i {.inline.} =
+  proc blendNormalPremultipliedSimd*(backdrop, source: M128i): M128i =
     let
       alphaMask = mm_set1_epi32(cast[int32](0xff000000))
       oddMask = mm_set1_epi16(cast[int16](0xff00))
       div255 = mm_set1_epi16(cast[int16](0x8081))
 
-    # Shortcuts didn't help (backdrop.a == 0, source.a == 0, source.a == 255)
-
     var
       sourceAlpha = mm_and_si128(source, alphaMask)
       backdropEven = mm_slli_epi16(backdrop, 8)
@@ -85,6 +94,13 @@ when defined(amd64) and not defined(pixieNoSimd):
       mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
     )
 
+  proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
+    case blendMode:
+    of bmNormal: blendNormalPremultipliedSimd
+    else:
+      raise newException(PixieError, "No SIMD blender for " & $blendMode)
+
+
 when defined(release):
   {.pop.}
 
@@ -534,7 +550,7 @@ proc blendOverwrite(backdrop, source: ColorRGBA): ColorRGBA =
   source
 
 proc blender*(blendMode: BlendMode): Blender =
-  case blendMode
+  case blendMode:
   of bmNormal: blendNormal
   of bmDarken: blendDarken
   of bmMultiply: blendMultiply
diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim
index 5e69d8b..61650ec 100644
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@@ -360,6 +360,9 @@ proc rect*(path: var Path, x, y, w, h: float32) =
   path.lineTo(x, y + h)
   path.closePath()
 
+proc rect*(path: var Path, pos: Vec2, wh: Vec2) {.inline.} =
+  path.rect(pos.x, pos.y, wh.x, wh.y)
+
 proc polygon*(path: var Path, x, y, size: float32, sides: int) =
   ## Draws a n sided regular polygon at (x, y) with size.
   path.moveTo(x + size * cos(0.0), y + size * sin(0.0))
@@ -886,6 +889,7 @@ proc fillShapes(
   image: Image,
   shapes: seq[seq[Vec2]],
   color: ColorRGBA,
+  blendMode: BlendMode,
   windingRule: WindingRule
 ) =
   let (topHalf, bottomHalf, fullHeight) =
@@ -898,6 +902,10 @@ proc fillShapes(
     startX = max(0, bounds.x.int)
     startY = max(0, bounds.y.int)
     stopY = min(image.height, (bounds.y + bounds.h).int)
+    blender = blendMode.blenderPremultiplied()
+
+  when defined(amd64) and not defined(pixieNoSimd):
+    let blenderSimd = blendMode.blenderSimd()
 
   var
     coverages = newSeq[uint8](image.width)
@@ -979,10 +987,7 @@ proc fillShapes(
           let
             index = image.dataIndex(x, y)
             backdrop = mm_loadu_si128(image.data[index].addr)
-          mm_storeu_si128(
-            image.data[index].addr,
-            blendNormalPremultiplied(backdrop, source)
-          )
+          mm_storeu_si128(image.data[index].addr, blenderSimd(backdrop, source))
 
         x += 4
 
@@ -1003,7 +1008,7 @@ proc fillShapes(
           source.a = ((color.a.uint16 * coverage) div 255).uint8
 
         let backdrop = image.getRgbaUnsafe(x, y)
-        image.setRgbaUnsafe(x, y, blendNormalPremultiplied(backdrop, source))
+        image.setRgbaUnsafe(x, y, blender(backdrop, source))
       inc x
 
 proc fillShapes(
@@ -1160,35 +1165,38 @@ proc fillPath*(
   image: Image,
   path: SomePath,
   color: ColorRGBA,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) {.inline.} =
-  image.fillShapes(parseSomePath(path), color, windingRule)
+  image.fillShapes(parseSomePath(path), color, blendMode, windingRule)
 
 proc fillPath*(
   image: Image,
   path: SomePath,
   color: ColorRGBA,
   pos: Vec2,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) =
   var shapes = parseSomePath(path)
   for shape in shapes.mitems:
     for segment in shape.mitems:
       segment += pos
-  image.fillShapes(shapes, color, windingRule)
+  image.fillShapes(shapes, color, blendMode, windingRule)
 
 proc fillPath*(
   image: Image,
   path: SomePath,
   color: ColorRGBA,
   mat: Mat3,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) =
   var shapes = parseSomePath(path)
   for shape in shapes.mitems:
     for segment in shape.mitems:
       segment = mat * segment
-  image.fillShapes(shapes, color, windingRule)
+  image.fillShapes(shapes, color, blendMode, windingRule)
 
 proc fillPath*(
   mask: Mask,
@@ -1226,6 +1234,7 @@ proc strokePath*(
   path: SomePath,
   color: ColorRGBA,
   strokeWidth = 1.0,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) =
   let strokeShapes = strokeShapes(
@@ -1233,7 +1242,7 @@ proc strokePath*(
     strokeWidth,
     windingRule
   )
-  image.fillShapes(strokeShapes, color, windingRule)
+  image.fillShapes(strokeShapes, color, blendMode, windingRule)
 
 proc strokePath*(
   image: Image,
@@ -1241,6 +1250,7 @@ proc strokePath*(
   color: ColorRGBA,
   strokeWidth = 1.0,
   pos: Vec2,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) =
   var strokeShapes = strokeShapes(
@@ -1251,7 +1261,7 @@ proc strokePath*(
   for shape in strokeShapes.mitems:
     for segment in shape.mitems:
       segment += pos
-  image.fillShapes(strokeShapes, color, windingRule)
+  image.fillShapes(strokeShapes, color, blendMode, windingRule)
 
 proc strokePath*(
   image: Image,
@@ -1259,6 +1269,7 @@ proc strokePath*(
   color: ColorRGBA,
   strokeWidth = 1.0,
   mat: Mat3,
+  blendMode = bmNormal,
   windingRule = wrNonZero
 ) =
   var strokeShapes = strokeShapes(
@@ -1269,7 +1280,7 @@ proc strokePath*(
   for shape in strokeShapes.mitems:
     for segment in shape.mitems:
       segment = mat * segment
-  image.fillShapes(strokeShapes, color, windingRule)
+  image.fillShapes(strokeShapes, color, blendMode, windingRule)
 
 proc strokePath*(
   mask: Mask,