Merge pull request #70 from guzba/master

pixieNoSimd, perf improvements
2021-01-24 16:44:28 -08:00 · 2021-01-24 16:44:28 -08:00 · cb70bee1ec
commit cb70bee1ec
parent a4b9e524c3 9e834ce8e4
7 changed files with 102 additions and 70 deletions
--- a/examples/blur.png
+++ b/examples/blur.png
--- a/src/pixie/blends.nim
+++ b/src/pixie/blends.nim
@ -1,7 +1,7 @@
 ## Blending modes.
 import chroma, math

-when defined(amd64):
+when defined(amd64) and not defined(pixieNoSimd):
  import nimsimd/sse2

 # See https://www.w3.org/TR/compositing-1/
@ -264,7 +264,7 @@ proc blendExcludeMaskFloats*(backdrop, source: Color): Color {.inline.} =
 proc blendOverwriteFloats*(backdrop, source: Color): Color {.inline.} =
  source

-when defined(amd64):
+when defined(amd64) and not defined(pixieNoSimd):
  proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA =
    let
      sa = source.a.float32
@ -417,7 +417,7 @@ proc blendSoftLight(backdrop, source: ColorRGBA): ColorRGBA =
  #     (2 * source * backdrop) div 255
  #   ).uint8

-  when defined(amd64):
+  when defined(amd64) and not defined(pixieNoSimd):
    let
      vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0)
      vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0)
--- a/src/pixie/common.nim
+++ b/src/pixie/common.nim
@ -9,20 +9,43 @@ proc fractional*(v: float32): float32 {.inline.} =
  result = abs(v)
  result = result - floor(result)

+proc lerp*(a, b: ColorRGBA, t: float32): ColorRGBA {.inline.} =
+  let x = round(t * 255).uint32
+  result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x) div 255).uint8
+  result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x) div 255).uint8
+  result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
+  result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
+
+proc toPremultipliedAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
+  ## Converts a color to premultiplied alpha from straight alpha.
+  result.r = ((c.r.uint16 * c.a.uint16) div 255).uint8
+  result.g = ((c.g.uint16 * c.a.uint16) div 255).uint8
+  result.b = ((c.b.uint16 * c.a.uint16) div 255).uint8
+  result.a = c.a
+
+proc toStraightAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
+  ## Converts a color to from premultiplied alpha to straight alpha.
+  result = c
+  if result.a != 0 and result.a != 255:
+    let multiplier = ((255 / c.a.float32) * 255).uint32
+    result.r = ((result.r.uint32 * multiplier) div 255).uint8
+    result.g = ((result.g.uint32 * multiplier) div 255).uint8
+    result.b = ((result.b.uint32 * multiplier) div 255).uint8
+
 func lerp*(a, b: Color, v: float32): Color {.inline.} =
  result.r = lerp(a.r, b.r, v)
  result.g = lerp(a.g, b.g, v)
  result.b = lerp(a.b, b.b, v)
  result.a = lerp(a.a, b.a, v)

-proc toAlphy*(c: Color): Color =
+proc toPremultipliedAlpha*(c: Color): Color {.inline.} =
  ## Converts a color to premultiplied alpha from straight.
  result.r = c.r * c.a
  result.g = c.g * c.a
  result.b = c.b * c.a
  result.a = c.a

-proc fromAlphy*(c: Color): Color =
+proc toStraightAlpha*(c: Color): Color {.inline.} =
  ## Converts a color to from premultiplied alpha to straight.
  if c.a == 0:
    return
--- a/src/pixie/images.nim
+++ b/src/pixie/images.nim
@ -1,4 +1,7 @@
-import chroma, blends, bumpy, vmath, common, nimsimd/sse2, system/memory
+import chroma, blends, bumpy, vmath, common, system/memory
+
+when defined(amd64) and not defined(pixieNoSimd):
+  import nimsimd/sse2

 const h = 0.5.float32

@ -71,7 +74,7 @@ proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
    nimSetMem(data[start].addr, rgba.r.cint, len * 4)
  else:
    var i = start
-    when defined(amd64):
+    when defined(amd64) and not defined(pixieNoSimd):
      # When supported, SIMD fill until we run out of room
      let m = mm_set1_epi32(cast[int32](rgba))
      for j in countup(i, start + len - 8, 8):
@ -172,31 +175,10 @@ proc magnifyBy2*(image: Image): Image =
 when defined(release):
  {.pop.}

-proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
-proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
-
-proc invert*(image: Image) =
-  ## Inverts all of the colors and alpha.
-  var i: int
-  when defined(amd64):
-    let vec255 = mm_set1_epi8(255)
-    while i < image.data.len - 4:
-      var m = mm_loadu_si128(image.data[i].addr)
-      m = mm_sub_epi8(vec255, m)
-      mm_storeu_si128(image.data[i].addr, m)
-      i += 4
-  for j in i ..< image.data.len:
-    var rgba = image.data[j]
-    rgba.r = 255 - rgba.r
-    rgba.g = 255 - rgba.g
-    rgba.b = 255 - rgba.b
-    rgba.a = 255 - rgba.a
-    image.data[j] = rgba
-
-proc toAlphy*(image: Image) =
+proc toPremultipliedAlpha*(image: Image) =
  ## Converts an image to premultiplied alpha from straight.
  var i: int
-  when defined(amd64):
+  when defined(amd64) and not defined(pixieNoSimd):
    # When supported, SIMD convert as much as possible
    let
      alphaMask = mm_set1_epi32(cast[int32](0xff000000))
@ -235,33 +217,55 @@ proc toAlphy*(image: Image) =
    c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
    image.data[j] = c

-proc fromAlphy*(image: Image) =
-  ## Converts an image to from premultiplied alpha to straight.
+proc toStraightAlpha*(image: Image) =
+  ## Converts an image from premultiplied alpha to straight alpha.
+  ## This is expensive for large images.
  for c in image.data.mitems:
-    if c.a == 0:
+    if c.a == 0 or c.a == 255:
      continue
-    c.r = ((c.r.uint32 * 255) div c.a.uint32).uint8
-    c.g = ((c.g.uint32 * 255) div c.a.uint32).uint8
-    c.b = ((c.b.uint32 * 255) div c.a.uint32).uint8
+    let multiplier = ((255 / c.a.float32) * 255).uint32
+    c.r = ((c.r.uint32 * multiplier) div 255).uint8
+    c.g = ((c.g.uint32 * multiplier) div 255).uint8
+    c.b = ((c.b.uint32 * multiplier) div 255).uint8
+
+proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
+proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
+
+proc invert*(image: Image) =
+  ## Inverts all of the colors and alpha.
+  var i: int
+  when defined(amd64) and not defined(pixieNoSimd):
+    let vec255 = mm_set1_epi8(255)
+    while i < image.data.len - 4:
+      var m = mm_loadu_si128(image.data[i].addr)
+      m = mm_sub_epi8(vec255, m)
+      mm_storeu_si128(image.data[i].addr, m)
+      i += 4
+  for j in i ..< image.data.len:
+    var rgba = image.data[j]
+    rgba.r = 255 - rgba.r
+    rgba.g = 255 - rgba.g
+    rgba.b = 255 - rgba.b
+    rgba.a = 255 - rgba.a
+    image.data[j] = rgba

 proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} =
-  ## Gets a pixel as (x, y) floats.
  let
    minX = x.floor.int
    difX = x - x.floor
    minY = y.floor.int
    difY = y - y.floor

-    vX0Y0 = image[minX, minY].color().toAlphy()
-    vX1Y0 = image[minX + 1, minY].color().toAlphy()
-    vX0Y1 = image[minX, minY + 1].color().toAlphy()
-    vX1Y1 = image[minX + 1, minY + 1].color().toAlphy()
+    vX0Y0 = image[minX, minY].toPremultipliedAlpha()
+    vX1Y0 = image[minX + 1, minY].toPremultipliedAlpha()
+    vX0Y1 = image[minX, minY + 1].toPremultipliedAlpha()
+    vX1Y1 = image[minX + 1, minY + 1].toPremultipliedAlpha()

    bottomMix = lerp(vX0Y0, vX1Y0, difX)
    topMix = lerp(vX0Y1, vX1Y1, difX)
    finalMix = lerp(bottomMix, topMix, difY)

-  return finalMix.fromAlphy().rgba()
+  finalMix.toStraightAlpha()

 proc resize*(srcImage: Image, width, height: int): Image =
  result = newImage(width, height)
--- a/src/pixie/paths.nim
+++ b/src/pixie/paths.nim
@ -415,36 +415,34 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
    discretize(arc, 1, 1)

  for command in commands:
+    if command.numbers.len != command.kind.parameterCount():
+      raise newException(PixieError, "Invalid path")
+
    case command.kind
      of Move:
-        assert command.numbers.len == 2
        at.x = command.numbers[0]
        at.y = command.numbers[1]
        start = at

      of Line:
-        assert command.numbers.len == 2
        to.x = command.numbers[0]
        to.y = command.numbers[1]
        drawLine(at, to)
        at = to

      of VLine:
-        assert command.numbers.len == 1
        to.x = at.x
        to.y = command.numbers[0]
        drawLine(at, to)
        at = to

      of HLine:
-        assert command.numbers.len == 1
        to.x = command.numbers[0]
        to.y = at.y
        drawLine(at, to)
        at = to

      of Quad:
-        assert command.numbers.len mod 4 == 0
        var i = 0
        while i < command.numbers.len:
          ctr.x = command.numbers[i+0]
@ -459,7 +457,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
      of TQuad:
        if prevCommand != Quad and prevCommand != TQuad:
          ctr = at
-        assert command.numbers.len == 2
        to.x = command.numbers[0]
        to.y = command.numbers[1]
        ctr = at - (ctr - at)
@ -467,7 +464,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
        at = to

      of Cubic:
-        assert command.numbers.len == 6
        ctr.x = command.numbers[0]
        ctr.y = command.numbers[1]
        ctr2.x = command.numbers[2]
@ -488,7 +484,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
        at = to

      of Close:
-        assert command.numbers.len == 0
        if at != start:
          if prevCommand == Quad or prevCommand == TQuad:
            drawQuad(at, ctr, start)
@ -500,34 +495,29 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
        at = start

      of RMove:
-        assert command.numbers.len == 2
        at.x += command.numbers[0]
        at.y += command.numbers[1]
        start = at

      of RLine:
-        assert command.numbers.len == 2
        to.x = at.x + command.numbers[0]
        to.y = at.y + command.numbers[1]
        drawLine(at, to)
        at = to

      of RVLine:
-        assert command.numbers.len == 1
        to.x = at.x
        to.y = at.y + command.numbers[0]
        drawLine(at, to)
        at = to

      of RHLine:
-        assert command.numbers.len == 1
        to.x = at.x + command.numbers[0]
        to.y = at.y
        drawLine(at, to)
        at = to

      of RQuad:
-        assert command.numbers.len == 4
        ctr.x = at.x + command.numbers[0]
        ctr.y = at.y + command.numbers[1]
        to.x = at.x + command.numbers[2]
@ -538,7 +528,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
      of RTQuad:
        if prevCommand != RQuad and prevCommand != RTQuad:
          ctr = at
-        assert command.numbers.len == 2
        to.x = at.x + command.numbers[0]
        to.y = at.y + command.numbers[1]
        ctr = at - (ctr - at)
@ -546,7 +535,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
        at = to

      of RCubic:
-        assert command.numbers.len == 6
        ctr.x = at.x + command.numbers[0]
        ctr.y = at.y + command.numbers[1]
        ctr2.x = at.x + command.numbers[2]
@ -557,7 +545,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
        at = to

      of RSCubic:
-        assert command.numbers.len == 4
        if prevCommand in {Cubic, SCubic, RCubic, RSCubic}:
          ctr = 2 * at - ctr2
        else:
@ -1005,11 +992,7 @@ proc rect*(path: Path, x, y, w, h: float32) =
  path.closePath()

 proc polygon*(path: Path, x, y, size: float32, sides: int) =
-  ## Draws a n sided regular polygon at x,y with size.
-  let
-    size = 80.0
-    x = 100.0
-    y = 100.0
+  ## Draws a n sided regular polygon at (x, y) with size.
  path.moveTo(x + size * cos(0.0), y + size * sin(0.0))
  for side in 0 .. sides:
    path.lineTo(
--- a/tests/benchmark_images.nim
+++ b/tests/benchmark_images.nim
@ -5,12 +5,10 @@ let a = newImage(2560, 1440)
 timeIt "fill":
  a.fill(rgba(255, 255, 255, 255))
  doAssert a[0, 0] == rgba(255, 255, 255, 255)
-  keep(a)

 timeIt "fill_rgba":
-  a.fill(rgba(63, 127, 191, 255))
-  doAssert a[0, 0] == rgba(63, 127, 191, 255)
-  keep(a)
+  a.fill(rgba(63, 127, 191, 191))
+  doAssert a[0, 0] == rgba(63, 127, 191, 191)

 timeIt "subImage":
  keep a.subImage(0, 0, 256, 256)
@ -26,3 +24,27 @@ timeIt "applyOpacity":
 timeIt "sharpOpacity":
  a.sharpOpacity()
  keep(a)
+
+a.fill(rgba(63, 127, 191, 191))
+
+timeIt "toAlphy":
+  a.toAlphy()
+
+timeIt "fromAlphy":
+  a.fromAlphy()
+
+timeIt "lerp integers":
+  for i in 0 ..< 100000:
+    let c = a[0, 0]
+    var z: int
+    for t in 0 .. 100:
+      z += lerp(c, c, t.float32 / 100).a.int
+    doAssert z > 0
+
+timeIt "lerp floats":
+  for i in 0 ..< 100000:
+    let c = a[0, 0]
+    var z: int
+    for t in 0 .. 100:
+      z += lerp(c.color, c.color, t.float32 / 100).rgba().a.int
+    doAssert z > 0
--- a/tests/test_images.nim
+++ b/tests/test_images.nim
@ -19,14 +19,14 @@ block:
 block:
  let image = newImage(10, 10)
  image.fill(rgba(255, 0, 0, 128))
-  image.toAlphy()
+  image.toPremultipliedAlpha()
  doAssert image[9, 9] == rgba(128, 0, 0, 128)

 block:
  let image = newImage(10, 10)
  image.fill(rgba(128, 0, 0, 128))
-  image.fromAlphy()
-  doAssert image[9, 9] == rgba(255, 0, 0, 128)
+  image.toStraightAlpha()
+  doAssert image[9, 9] == rgba(254, 0, 0, 128)

 block:
  let