diff --git a/src/pixie/common.nim b/src/pixie/common.nim
index 8b04458..f558666 100644
--- a/src/pixie/common.nim
+++ b/src/pixie/common.nim
@@ -46,6 +46,16 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
   result.height = height
   result.data = newSeq[ColorRGBX](width * height)
 
+proc copy*(image: Image): Image {.raises: [].} =
+  ## Copies the image data into a new image.
+  result = Image()
+  result.width = image.width
+  result.height = image.height
+  result.data = image.data
+
+template dataIndex*(image: Image, x, y: int): int =
+  image.width * y + x
+
 proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
   ## Linearly interpolate between a and b using t.
   let t = round(t * 255).uint32
@@ -59,6 +69,18 @@ proc mix*(a, b: ColorRGBX, t: float32): ColorRGBX {.inline, raises: [].} =
   result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
   result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
 
+proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
+  if opacity == 0:
+    rgbx(0, 0, 0, 0)
+  else:
+    let
+      x = round(opacity * 255).uint32
+      r = ((color.r * x) div 255).uint8
+      g = ((color.g * x) div 255).uint8
+      b = ((color.b * x) div 255).uint8
+      a = ((color.a * x) div 255).uint8
+    rgbx(r, g, b, a)
+
 proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
   let
     xMin = rect.x
diff --git a/src/pixie/images.nim b/src/pixie/images.nim
index ba48e7a..d2871e4 100644
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@@ -1,6 +1,6 @@
 import blends, bumpy, chroma, common, internal, simd, vmath
 
-export Image, newImage
+export Image, newImage, copy, dataIndex
 
 const h = 0.5.float32
 
@@ -9,13 +9,6 @@ type UnsafeImage = distinct Image
 when defined(release):
   {.push checks: off.}
 
-proc copy*(image: Image): Image {.raises: [].} =
-  ## Copies the image data into a new image.
-  result = Image()
-  result.width = image.width
-  result.height = image.height
-  result.data = image.data
-
 proc `$`*(image: Image): string {.raises: [].} =
   ## Prints the image size.
   "<Image " & $image.width & "x" & $image.height & ">"
@@ -24,9 +17,6 @@ proc inside*(image: Image, x, y: int): bool {.inline, raises: [].} =
   ## Returns true if (x, y) is inside the image.
   x >= 0 and x < image.width and y >= 0 and y < image.height
 
-proc dataIndex*(image: Image, x, y: int): int {.inline, raises: [].} =
-  image.width * y + x
-
 template unsafe*(src: Image): UnsafeImage =
   cast[UnsafeImage](src)
 
@@ -167,7 +157,9 @@ proc diff*(master, image: Image): (float32, Image) {.raises: [PixieError].} =
 
   (100 * diffScore.float32 / diffTotal.float32, diffImage)
 
-proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
+proc minifyBy2*(
+  image: Image, power = 1
+): Image {.hasSimd, raises: [PixieError].} =
   ## Scales the image down by an integer scale.
   if power < 0:
     raise newException(PixieError, "Cannot minifyBy2 with negative power")
@@ -188,90 +180,50 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
       if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
     )
     for y in 0 ..< resultEvenHeight:
-      var x: int
-      when defined(amd64) and allowSimd:
+      let
+        topRowStart = src.dataIndex(0, y * 2)
+        bottomRowStart = src.dataIndex(0, y * 2 + 1)
+      for x in 0 ..< resultEvenWidth:
         let
-          oddMask = mm_set1_epi16(cast[int16](0xff00))
-          mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
-        for _ in countup(0, resultEvenWidth - 4, 2):
-          let
-            top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
-            btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr)
-            topShifted = mm_srli_si128(top, 4)
-            btmShifted = mm_srli_si128(btm, 4)
-
-            topEven = mm_andnot_si128(oddMask, top)
-            topOdd = mm_srli_epi16(top, 8)
-            btmEven = mm_andnot_si128(oddMask, btm)
-            btmOdd = mm_srli_epi16(btm, 8)
-
-            topShiftedEven = mm_andnot_si128(oddMask, topShifted)
-            topShiftedOdd = mm_srli_epi16(topShifted, 8)
-            btmShiftedEven = mm_andnot_si128(oddMask, btmShifted)
-            btmShiftedOdd = mm_srli_epi16(btmShifted, 8)
-
-            topAddedEven = mm_add_epi16(topEven, topShiftedEven)
-            btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven)
-            topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
-            btmAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd)
-
-            addedEven = mm_add_epi16(topAddedEven, btmAddedEven)
-            addedOdd = mm_add_epi16(topAddedOdd, btmAddedOdd)
-
-            addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
-            addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
-
-            merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
-            # Merged has the correct values for the next two pixels at
-            # index 0 and 2 so mask the others out and shift 0 and 2 into
-            # position and store
-            masked = mm_and_si128(merged, mergedMask)
-
-          mm_storeu_si128(
-            result.data[result.dataIndex(x, y)].addr,
-            mm_shuffle_epi32(masked, MM_SHUFFLE(0, 0, 2, 0))
-          )
-          x += 2
-
-      for x in x ..< resultEvenWidth:
-        let
-          a = src.unsafe[x * 2 + 0, y * 2 + 0]
-          b = src.unsafe[x * 2 + 1, y * 2 + 0]
-          c = src.unsafe[x * 2 + 1, y * 2 + 1]
-          d = src.unsafe[x * 2 + 0, y * 2 + 1]
+          a = src.data[topRowStart + x * 2]
+          b = src.data[topRowStart + x * 2 + 1]
+          c = src.data[bottomRowStart + x * 2 + 1]
+          d = src.data[bottomRowStart + x * 2]
           mixed = rgbx(
             ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
             ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
             ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
             ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
           )
-        result.unsafe[x, y] = mixed
+        result.data[result.dataIndex(x, y)] = mixed
 
       if srcWidthIsOdd:
         let rgbx = mix(
-          src.unsafe[src.width - 1, y * 2 + 0],
-          src.unsafe[src.width - 1, y * 2 + 1],
+          src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
+          src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
           0.5
         ) * 0.5
-        result.unsafe[result.width - 1, y] = rgbx
+        result.data[result.dataIndex(result.width - 1, y)] = rgbx
 
     if srcHeightIsOdd:
       for x in 0 ..< resultEvenWidth:
         let rgbx = mix(
-          src.unsafe[x * 2 + 0, src.height - 1],
-          src.unsafe[x * 2 + 1, src.height - 1],
+          src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
+          src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
           0.5
         ) * 0.5
-        result.unsafe[x, result.height - 1] = rgbx
+        result.data[result.dataIndex(x, result.height - 1)] = rgbx
 
       if srcWidthIsOdd:
-        result.unsafe[result.width - 1, result.height - 1] =
-          src.unsafe[src.width - 1, src.height - 1] * 0.25
+        result.data[result.dataIndex(result.width - 1, result.height - 1)] =
+          src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
 
     # Set src as this result for if we do another power
     src = result
 
-proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
+proc magnifyBy2*(
+  image: Image, power = 1
+): Image {.hasSimd, raises: [PixieError].} =
   ## Scales image up by 2 ^ power.
   if power < 0:
     raise newException(PixieError, "Cannot magnifyBy2 with negative power")
@@ -281,32 +233,20 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
 
   for y in 0 ..< image.height:
     # Write one row of pixels duplicated by scale
-    var x: int
-    when defined(amd64) and allowSimd:
-      if scale == 2:
-        while x <= image.width - 4:
-          let values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
-          mm_storeu_si128(
-            result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
-            mm_unpacklo_epi32(values, values)
-          )
-          mm_storeu_si128(
-            result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
-            mm_unpackhi_epi32(values, values)
-          )
-          x += 4
-    for x in x ..< image.width:
+    let
+      sourceRowStart = image.dataIndex(0, y)
+      resultRowStart = result.dataIndex(0, y * scale)
+    for x in 0 ..< image.width:
       let
-        rgbx = image.unsafe[x, y]
-        resultIdx = result.dataIndex(x * scale, y * scale)
+        rgbx = image.data[sourceRowStart + x]
+        resultIdx = resultRowStart + x * scale
       for i in 0 ..< scale:
         result.data[resultIdx + i] = rgbx
     # Copy that row of pixels into (scale - 1) more rows
-    let rowStart = result.dataIndex(0, y * scale)
     for i in 1 ..< scale:
       copyMem(
-        result.data[rowStart + result.width * i].addr,
-        result.data[rowStart].addr,
+        result.data[resultRowStart + result.width * i].addr,
+        result.data[resultRowStart].addr,
         result.width * 4
       )
 
diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim
index 3f00fca..0120333 100644
--- a/src/pixie/internal.nim
+++ b/src/pixie/internal.nim
@@ -33,18 +33,6 @@ proc gaussianKernel*(radius: int): seq[uint16] {.raises: [].} =
   for i, f in floats:
     result[i] = round(f * 255 * 256).uint16
 
-proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
-  if opacity == 0:
-    rgbx(0, 0, 0, 0)
-  else:
-    let
-      x = round(opacity * 255).uint32
-      r = ((color.r * x) div 255).uint8
-      g = ((color.g * x) div 255).uint8
-      b = ((color.b * x) div 255).uint8
-      a = ((color.a * x) div 255).uint8
-    rgbx(r, g, b, a)
-
 proc intersectsInside*(a, b: Segment, at: var Vec2): bool {.inline.} =
   ## Checks if the a segment intersects b segment (excluding endpoints).
   ## If it returns true, at will have point of intersection
diff --git a/src/pixie/simd/avx2.nim b/src/pixie/simd/avx2.nim
index ea07247..3e36f8f 100644
--- a/src/pixie/simd/avx2.nim
+++ b/src/pixie/simd/avx2.nim
@@ -274,5 +274,108 @@ proc ceilAvx2*(image: Image) {.simd.} =
     rgbx.a = if rgbx.a == 0: 0 else: 255
     image.data[i] = rgbx
 
+proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
+  ## Scales the image down by an integer scale.
+  if power < 0:
+    raise newException(PixieError, "Cannot minifyBy2 with negative power")
+  if power == 0:
+    return image.copy()
+
+  var src = image
+  for _ in 1 .. power:
+    # When minifying an image of odd size, round the result image size up
+    # so a 99 x 99 src image returns a 50 x 50 image.
+    let
+      srcWidthIsOdd = (src.width mod 2) != 0
+      srcHeightIsOdd = (src.height mod 2) != 0
+      resultEvenWidth = src.width div 2
+      resultEvenHeight = src.height div 2
+    result = newImage(
+      if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
+      if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
+    )
+    let
+      oddMask = mm256_set1_epi16(0xff00)
+      mergedMask = mm256_set_epi32(
+        0, uint32.high, 0, uint32.high, 0, uint32.high, 0, uint32.high
+      )
+      permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0)
+    for y in 0 ..< resultEvenHeight:
+      let
+        topRowStart = src.dataIndex(0, y * 2)
+        bottomRowStart = src.dataIndex(0, y * 2 + 1)
+
+      var x: int
+      while x <= resultEvenWidth - 8:
+        let
+          top = mm256_loadu_si256(src.data[topRowStart + x * 2].addr)
+          bottom = mm256_loadu_si256(src.data[bottomRowStart + x * 2].addr)
+          topShifted = mm256_srli_si256(top, 4)
+          bottomShifted = mm256_srli_si256(bottom, 4)
+          topEven = mm256_andnot_si256(oddMask, top)
+          topOdd = mm256_srli_epi16(top, 8)
+          bottomEven = mm256_andnot_si256(oddMask, bottom)
+          bottomOdd = mm256_srli_epi16(bottom, 8)
+          topShiftedEven = mm256_andnot_si256(oddMask, topShifted)
+          topShiftedOdd = mm256_srli_epi16(topShifted, 8)
+          bottomShiftedEven = mm256_andnot_si256(oddMask, bottomShifted)
+          bottomShiftedOdd = mm256_srli_epi16(bottomShifted, 8)
+          topAddedEven = mm256_add_epi16(topEven, topShiftedEven)
+          bottomAddedEven = mm256_add_epi16(bottomEven, bottomShiftedEven)
+          topAddedOdd = mm256_add_epi16(topOdd, topShiftedOdd)
+          bottomAddedOdd = mm256_add_epi16(bottomOdd, bottomShiftedOdd)
+          addedEven = mm256_add_epi16(topAddedEven, bottomAddedEven)
+          addedOdd = mm256_add_epi16(topAddedOdd, bottomAddedOdd)
+          addedEvenDiv4 = mm256_srli_epi16(addedEven, 2)
+          addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
+          merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
+          # Merged has the correct values for the next two pixels at
+          # index 0, 2, 4, 6 so mask the others out and permute into position
+          masked = mm256_and_si256(merged, mergedMask)
+          permuted = mm_256_permutevar8x32_epi32(masked, permuteControl)
+        mm_storeu_si128(
+          result.data[result.dataIndex(x, y)].addr,
+          mm256_castsi256_si128(permuted)
+        )
+        x += 4
+
+      for x in x ..< resultEvenWidth:
+        let
+          a = src.data[topRowStart + x * 2]
+          b = src.data[topRowStart + x * 2 + 1]
+          c = src.data[bottomRowStart + x * 2 + 1]
+          d = src.data[bottomRowStart + x * 2]
+          mixed = rgbx(
+            ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
+            ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
+            ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
+            ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
+          )
+        result.data[result.dataIndex(x, y)] = mixed
+
+      if srcWidthIsOdd:
+        let rgbx = mix(
+          src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
+          src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
+          0.5
+        ) * 0.5
+        result.data[result.dataIndex(result.width - 1, y)] = rgbx
+
+    if srcHeightIsOdd:
+      for x in 0 ..< resultEvenWidth:
+        let rgbx = mix(
+          src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
+          src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
+          0.5
+        ) * 0.5
+        result.data[result.dataIndex(x, result.height - 1)] = rgbx
+
+      if srcWidthIsOdd:
+        result.data[result.dataIndex(result.width - 1, result.height - 1)] =
+          src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
+
+    # Set src as this result for if we do another power
+    src = result
+
 when defined(release):
   {.pop.}
diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim
index 6b4f78c..815b880 100644
--- a/src/pixie/simd/sse2.nim
+++ b/src/pixie/simd/sse2.nim
@@ -330,6 +330,146 @@ proc ceilSse2*(image: Image) {.simd.} =
     rgbx.a = if rgbx.a == 0: 0 else: 255
     image.data[i] = rgbx
 
+proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
+  ## Scales the image down by an integer scale.
+  if power < 0:
+    raise newException(PixieError, "Cannot minifyBy2 with negative power")
+  if power == 0:
+    return image.copy()
+
+  var src = image
+  for _ in 1 .. power:
+    # When minifying an image of odd size, round the result image size up
+    # so a 99 x 99 src image returns a 50 x 50 image.
+    let
+      srcWidthIsOdd = (src.width mod 2) != 0
+      srcHeightIsOdd = (src.height mod 2) != 0
+      resultEvenWidth = src.width div 2
+      resultEvenHeight = src.height div 2
+    result = newImage(
+      if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
+      if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
+    )
+    let
+      oddMask = mm_set1_epi16(0xff00)
+      mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
+    for y in 0 ..< resultEvenHeight:
+      let
+        topRowStart = src.dataIndex(0, y * 2)
+        bottomRowStart = src.dataIndex(0, y * 2 + 1)
+
+      var x: int
+      while x <= resultEvenWidth - 4:
+        let
+          top = mm_loadu_si128(src.data[topRowStart + x * 2].addr)
+          bottom = mm_loadu_si128(src.data[bottomRowStart + x * 2].addr)
+          topShifted = mm_srli_si128(top, 4)
+          bottomShifted = mm_srli_si128(bottom, 4)
+          topEven = mm_andnot_si128(oddMask, top)
+          topOdd = mm_srli_epi16(top, 8)
+          bottomEven = mm_andnot_si128(oddMask, bottom)
+          bottomOdd = mm_srli_epi16(bottom, 8)
+          topShiftedEven = mm_andnot_si128(oddMask, topShifted)
+          topShiftedOdd = mm_srli_epi16(topShifted, 8)
+          bottomShiftedEven = mm_andnot_si128(oddMask, bottomShifted)
+          bottomShiftedOdd = mm_srli_epi16(bottomShifted, 8)
+          topAddedEven = mm_add_epi16(topEven, topShiftedEven)
+          bottomAddedEven = mm_add_epi16(bottomEven, bottomShiftedEven)
+          topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd)
+          bottomAddedOdd = mm_add_epi16(bottomOdd, bottomShiftedOdd)
+          addedEven = mm_add_epi16(topAddedEven, bottomAddedEven)
+          addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd)
+          addedEvenDiv4 = mm_srli_epi16(addedEven, 2)
+          addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
+          merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
+          # Merged has the correct values for the next two pixels at
+          # index 0 and 2 so mask the others out and shift 0 and 2 into
+          # position and store
+          masked = mm_and_si128(merged, mergedMask)
+        mm_storeu_si128(
+          result.data[result.dataIndex(x, y)].addr,
+          mm_shuffle_epi32(masked, MM_SHUFFLE(3, 3, 2, 0))
+        )
+        x += 2
+
+      for x in x ..< resultEvenWidth:
+        let
+          a = src.data[topRowStart + x * 2]
+          b = src.data[topRowStart + x * 2 + 1]
+          c = src.data[bottomRowStart + x * 2 + 1]
+          d = src.data[bottomRowStart + x * 2]
+          mixed = rgbx(
+            ((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
+            ((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
+            ((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
+            ((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
+          )
+        result.data[result.dataIndex(x, y)] = mixed
+
+      if srcWidthIsOdd:
+        let rgbx = mix(
+          src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
+          src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
+          0.5
+        ) * 0.5
+        result.data[result.dataIndex(result.width - 1, y)] = rgbx
+
+    if srcHeightIsOdd:
+      for x in 0 ..< resultEvenWidth:
+        let rgbx = mix(
+          src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
+          src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
+          0.5
+        ) * 0.5
+        result.data[result.dataIndex(x, result.height - 1)] = rgbx
+
+      if srcWidthIsOdd:
+        result.data[result.dataIndex(result.width - 1, result.height - 1)] =
+          src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25
+
+    # Set src as this result for if we do another power
+    src = result
+
+proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
+  ## Scales image up by 2 ^ power.
+  if power < 0:
+    raise newException(PixieError, "Cannot magnifyBy2 with negative power")
+
+  let scale = 2 ^ power
+  result = newImage(image.width * scale, image.height * scale)
+
+  for y in 0 ..< image.height:
+    # Write one row of pixels duplicated by scale
+    let
+      sourceRowStart = image.dataIndex(0, y)
+      resultRowStart = result.dataIndex(0, y * scale)
+    var x: int
+    if scale == 2:
+      while x <= image.width - 4:
+        let values = mm_loadu_si128(image.data[sourceRowStart + x].addr)
+        mm_storeu_si128(
+          result.data[resultRowStart + x * scale].addr,
+          mm_unpacklo_epi32(values, values)
+        )
+        mm_storeu_si128(
+          result.data[resultRowStart + x * scale + 4].addr,
+          mm_unpackhi_epi32(values, values)
+        )
+        x += 4
+    for x in x ..< image.width:
+      let
+        rgbx = image.data[sourceRowStart + x]
+        resultIdx = resultRowStart + x * scale
+      for i in 0 ..< scale:
+        result.data[resultIdx + i] = rgbx
+    # Copy that row of pixels into (scale - 1) more rows
+    for i in 1 ..< scale:
+      copyMem(
+        result.data[resultRowStart + result.width * i].addr,
+        result.data[resultRowStart].addr,
+        result.width * 4
+      )
+
 proc blitLineNormalSse2*(
   a, b: ptr UncheckedArray[ColorRGBX], len: int
 ) {.simd.} =