Merge pull request #70 from guzba/master

pixieNoSimd, perf improvements
This commit is contained in:
treeform 2021-01-24 16:44:28 -08:00 committed by GitHub
commit cb70bee1ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 70 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

After

Width:  |  Height:  |  Size: 88 KiB

View file

@ -1,7 +1,7 @@
## Blending modes.
import chroma, math
when defined(amd64):
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2
# See https://www.w3.org/TR/compositing-1/
@ -264,7 +264,7 @@ proc blendExcludeMaskFloats*(backdrop, source: Color): Color {.inline.} =
proc blendOverwriteFloats*(backdrop, source: Color): Color {.inline.} =
source
when defined(amd64):
when defined(amd64) and not defined(pixieNoSimd):
proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA =
let
sa = source.a.float32
@ -417,7 +417,7 @@ proc blendSoftLight(backdrop, source: ColorRGBA): ColorRGBA =
# (2 * source * backdrop) div 255
# ).uint8
when defined(amd64):
when defined(amd64) and not defined(pixieNoSimd):
let
vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0)
vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0)

View file

@ -9,20 +9,43 @@ proc fractional*(v: float32): float32 {.inline.} =
result = abs(v)
result = result - floor(result)
proc lerp*(a, b: ColorRGBA, t: float32): ColorRGBA {.inline.} =
let x = round(t * 255).uint32
result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x) div 255).uint8
result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x) div 255).uint8
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
proc toPremultipliedAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
## Converts a color to premultiplied alpha from straight alpha.
result.r = ((c.r.uint16 * c.a.uint16) div 255).uint8
result.g = ((c.g.uint16 * c.a.uint16) div 255).uint8
result.b = ((c.b.uint16 * c.a.uint16) div 255).uint8
result.a = c.a
proc toStraightAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
## Converts a color to from premultiplied alpha to straight alpha.
result = c
if result.a != 0 and result.a != 255:
let multiplier = ((255 / c.a.float32) * 255).uint32
result.r = ((result.r.uint32 * multiplier) div 255).uint8
result.g = ((result.g.uint32 * multiplier) div 255).uint8
result.b = ((result.b.uint32 * multiplier) div 255).uint8
func lerp*(a, b: Color, v: float32): Color {.inline.} =
result.r = lerp(a.r, b.r, v)
result.g = lerp(a.g, b.g, v)
result.b = lerp(a.b, b.b, v)
result.a = lerp(a.a, b.a, v)
proc toAlphy*(c: Color): Color =
proc toPremultipliedAlpha*(c: Color): Color {.inline.} =
## Converts a color to premultiplied alpha from straight.
result.r = c.r * c.a
result.g = c.g * c.a
result.b = c.b * c.a
result.a = c.a
proc fromAlphy*(c: Color): Color =
proc toStraightAlpha*(c: Color): Color {.inline.} =
## Converts a color to from premultiplied alpha to straight.
if c.a == 0:
return

View file

@ -1,4 +1,7 @@
import chroma, blends, bumpy, vmath, common, nimsimd/sse2, system/memory
import chroma, blends, bumpy, vmath, common, system/memory
when defined(amd64) and not defined(pixieNoSimd):
import nimsimd/sse2
const h = 0.5.float32
@ -71,7 +74,7 @@ proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
nimSetMem(data[start].addr, rgba.r.cint, len * 4)
else:
var i = start
when defined(amd64):
when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD fill until we run out of room
let m = mm_set1_epi32(cast[int32](rgba))
for j in countup(i, start + len - 8, 8):
@ -172,31 +175,10 @@ proc magnifyBy2*(image: Image): Image =
when defined(release):
{.pop.}
proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
proc invert*(image: Image) =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64):
let vec255 = mm_set1_epi8(255)
while i < image.data.len - 4:
var m = mm_loadu_si128(image.data[i].addr)
m = mm_sub_epi8(vec255, m)
mm_storeu_si128(image.data[i].addr, m)
i += 4
for j in i ..< image.data.len:
var rgba = image.data[j]
rgba.r = 255 - rgba.r
rgba.g = 255 - rgba.g
rgba.b = 255 - rgba.b
rgba.a = 255 - rgba.a
image.data[j] = rgba
proc toAlphy*(image: Image) =
proc toPremultipliedAlpha*(image: Image) =
## Converts an image to premultiplied alpha from straight.
var i: int
when defined(amd64):
when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD convert as much as possible
let
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
@ -235,33 +217,55 @@ proc toAlphy*(image: Image) =
c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
image.data[j] = c
proc fromAlphy*(image: Image) =
## Converts an image to from premultiplied alpha to straight.
proc toStraightAlpha*(image: Image) =
## Converts an image from premultiplied alpha to straight alpha.
## This is expensive for large images.
for c in image.data.mitems:
if c.a == 0:
if c.a == 0 or c.a == 255:
continue
c.r = ((c.r.uint32 * 255) div c.a.uint32).uint8
c.g = ((c.g.uint32 * 255) div c.a.uint32).uint8
c.b = ((c.b.uint32 * 255) div c.a.uint32).uint8
let multiplier = ((255 / c.a.float32) * 255).uint32
c.r = ((c.r.uint32 * multiplier) div 255).uint8
c.g = ((c.g.uint32 * multiplier) div 255).uint8
c.b = ((c.b.uint32 * multiplier) div 255).uint8
proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
proc invert*(image: Image) =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let vec255 = mm_set1_epi8(255)
while i < image.data.len - 4:
var m = mm_loadu_si128(image.data[i].addr)
m = mm_sub_epi8(vec255, m)
mm_storeu_si128(image.data[i].addr, m)
i += 4
for j in i ..< image.data.len:
var rgba = image.data[j]
rgba.r = 255 - rgba.r
rgba.g = 255 - rgba.g
rgba.b = 255 - rgba.b
rgba.a = 255 - rgba.a
image.data[j] = rgba
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} =
## Gets a pixel as (x, y) floats.
let
minX = x.floor.int
difX = x - x.floor
minY = y.floor.int
difY = y - y.floor
vX0Y0 = image[minX, minY].color().toAlphy()
vX1Y0 = image[minX + 1, minY].color().toAlphy()
vX0Y1 = image[minX, minY + 1].color().toAlphy()
vX1Y1 = image[minX + 1, minY + 1].color().toAlphy()
vX0Y0 = image[minX, minY].toPremultipliedAlpha()
vX1Y0 = image[minX + 1, minY].toPremultipliedAlpha()
vX0Y1 = image[minX, minY + 1].toPremultipliedAlpha()
vX1Y1 = image[minX + 1, minY + 1].toPremultipliedAlpha()
bottomMix = lerp(vX0Y0, vX1Y0, difX)
topMix = lerp(vX0Y1, vX1Y1, difX)
finalMix = lerp(bottomMix, topMix, difY)
return finalMix.fromAlphy().rgba()
finalMix.toStraightAlpha()
proc resize*(srcImage: Image, width, height: int): Image =
result = newImage(width, height)

View file

@ -415,36 +415,34 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
discretize(arc, 1, 1)
for command in commands:
if command.numbers.len != command.kind.parameterCount():
raise newException(PixieError, "Invalid path")
case command.kind
of Move:
assert command.numbers.len == 2
at.x = command.numbers[0]
at.y = command.numbers[1]
start = at
of Line:
assert command.numbers.len == 2
to.x = command.numbers[0]
to.y = command.numbers[1]
drawLine(at, to)
at = to
of VLine:
assert command.numbers.len == 1
to.x = at.x
to.y = command.numbers[0]
drawLine(at, to)
at = to
of HLine:
assert command.numbers.len == 1
to.x = command.numbers[0]
to.y = at.y
drawLine(at, to)
at = to
of Quad:
assert command.numbers.len mod 4 == 0
var i = 0
while i < command.numbers.len:
ctr.x = command.numbers[i+0]
@ -459,7 +457,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
of TQuad:
if prevCommand != Quad and prevCommand != TQuad:
ctr = at
assert command.numbers.len == 2
to.x = command.numbers[0]
to.y = command.numbers[1]
ctr = at - (ctr - at)
@ -467,7 +464,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
at = to
of Cubic:
assert command.numbers.len == 6
ctr.x = command.numbers[0]
ctr.y = command.numbers[1]
ctr2.x = command.numbers[2]
@ -488,7 +484,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
at = to
of Close:
assert command.numbers.len == 0
if at != start:
if prevCommand == Quad or prevCommand == TQuad:
drawQuad(at, ctr, start)
@ -500,34 +495,29 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
at = start
of RMove:
assert command.numbers.len == 2
at.x += command.numbers[0]
at.y += command.numbers[1]
start = at
of RLine:
assert command.numbers.len == 2
to.x = at.x + command.numbers[0]
to.y = at.y + command.numbers[1]
drawLine(at, to)
at = to
of RVLine:
assert command.numbers.len == 1
to.x = at.x
to.y = at.y + command.numbers[0]
drawLine(at, to)
at = to
of RHLine:
assert command.numbers.len == 1
to.x = at.x + command.numbers[0]
to.y = at.y
drawLine(at, to)
at = to
of RQuad:
assert command.numbers.len == 4
ctr.x = at.x + command.numbers[0]
ctr.y = at.y + command.numbers[1]
to.x = at.x + command.numbers[2]
@ -538,7 +528,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
of RTQuad:
if prevCommand != RQuad and prevCommand != RTQuad:
ctr = at
assert command.numbers.len == 2
to.x = at.x + command.numbers[0]
to.y = at.y + command.numbers[1]
ctr = at - (ctr - at)
@ -546,7 +535,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
at = to
of RCubic:
assert command.numbers.len == 6
ctr.x = at.x + command.numbers[0]
ctr.y = at.y + command.numbers[1]
ctr2.x = at.x + command.numbers[2]
@ -557,7 +545,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
at = to
of RSCubic:
assert command.numbers.len == 4
if prevCommand in {Cubic, SCubic, RCubic, RSCubic}:
ctr = 2 * at - ctr2
else:
@ -1005,11 +992,7 @@ proc rect*(path: Path, x, y, w, h: float32) =
path.closePath()
proc polygon*(path: Path, x, y, size: float32, sides: int) =
## Draws a n sided regular polygon at x,y with size.
let
size = 80.0
x = 100.0
y = 100.0
## Draws a n sided regular polygon at (x, y) with size.
path.moveTo(x + size * cos(0.0), y + size * sin(0.0))
for side in 0 .. sides:
path.lineTo(

View file

@ -5,12 +5,10 @@ let a = newImage(2560, 1440)
timeIt "fill":
a.fill(rgba(255, 255, 255, 255))
doAssert a[0, 0] == rgba(255, 255, 255, 255)
keep(a)
timeIt "fill_rgba":
a.fill(rgba(63, 127, 191, 255))
doAssert a[0, 0] == rgba(63, 127, 191, 255)
keep(a)
a.fill(rgba(63, 127, 191, 191))
doAssert a[0, 0] == rgba(63, 127, 191, 191)
timeIt "subImage":
keep a.subImage(0, 0, 256, 256)
@ -26,3 +24,27 @@ timeIt "applyOpacity":
timeIt "sharpOpacity":
a.sharpOpacity()
keep(a)
a.fill(rgba(63, 127, 191, 191))
timeIt "toAlphy":
a.toAlphy()
timeIt "fromAlphy":
a.fromAlphy()
timeIt "lerp integers":
for i in 0 ..< 100000:
let c = a[0, 0]
var z: int
for t in 0 .. 100:
z += lerp(c, c, t.float32 / 100).a.int
doAssert z > 0
timeIt "lerp floats":
for i in 0 ..< 100000:
let c = a[0, 0]
var z: int
for t in 0 .. 100:
z += lerp(c.color, c.color, t.float32 / 100).rgba().a.int
doAssert z > 0

View file

@ -19,14 +19,14 @@ block:
block:
let image = newImage(10, 10)
image.fill(rgba(255, 0, 0, 128))
image.toAlphy()
image.toPremultipliedAlpha()
doAssert image[9, 9] == rgba(128, 0, 0, 128)
block:
let image = newImage(10, 10)
image.fill(rgba(128, 0, 0, 128))
image.fromAlphy()
doAssert image[9, 9] == rgba(255, 0, 0, 128)
image.toStraightAlpha()
doAssert image[9, 9] == rgba(254, 0, 0, 128)
block:
let