Merge pull request #70 from guzba/master
pixieNoSimd, perf improvements
This commit is contained in:
commit
cb70bee1ec
7 changed files with 102 additions and 70 deletions
Binary file not shown.
Before Width: | Height: | Size: 85 KiB After Width: | Height: | Size: 88 KiB |
|
@ -1,7 +1,7 @@
|
|||
## Blending modes.
|
||||
import chroma, math
|
||||
|
||||
when defined(amd64):
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
import nimsimd/sse2
|
||||
|
||||
# See https://www.w3.org/TR/compositing-1/
|
||||
|
@ -264,7 +264,7 @@ proc blendExcludeMaskFloats*(backdrop, source: Color): Color {.inline.} =
|
|||
proc blendOverwriteFloats*(backdrop, source: Color): Color {.inline.} =
|
||||
source
|
||||
|
||||
when defined(amd64):
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA =
|
||||
let
|
||||
sa = source.a.float32
|
||||
|
@ -417,7 +417,7 @@ proc blendSoftLight(backdrop, source: ColorRGBA): ColorRGBA =
|
|||
# (2 * source * backdrop) div 255
|
||||
# ).uint8
|
||||
|
||||
when defined(amd64):
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
let
|
||||
vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0)
|
||||
vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0)
|
||||
|
|
|
@ -9,20 +9,43 @@ proc fractional*(v: float32): float32 {.inline.} =
|
|||
result = abs(v)
|
||||
result = result - floor(result)
|
||||
|
||||
proc lerp*(a, b: ColorRGBA, t: float32): ColorRGBA {.inline.} =
|
||||
let x = round(t * 255).uint32
|
||||
result.r = ((a.r.uint32 * (255 - x) + b.r.uint32 * x) div 255).uint8
|
||||
result.g = ((a.g.uint32 * (255 - x) + b.g.uint32 * x) div 255).uint8
|
||||
result.b = ((a.b.uint32 * (255 - x) + b.b.uint32 * x) div 255).uint8
|
||||
result.a = ((a.a.uint32 * (255 - x) + b.a.uint32 * x) div 255).uint8
|
||||
|
||||
proc toPremultipliedAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
|
||||
## Converts a color to premultiplied alpha from straight alpha.
|
||||
result.r = ((c.r.uint16 * c.a.uint16) div 255).uint8
|
||||
result.g = ((c.g.uint16 * c.a.uint16) div 255).uint8
|
||||
result.b = ((c.b.uint16 * c.a.uint16) div 255).uint8
|
||||
result.a = c.a
|
||||
|
||||
proc toStraightAlpha*(c: ColorRGBA): ColorRGBA {.inline.} =
|
||||
## Converts a color to from premultiplied alpha to straight alpha.
|
||||
result = c
|
||||
if result.a != 0 and result.a != 255:
|
||||
let multiplier = ((255 / c.a.float32) * 255).uint32
|
||||
result.r = ((result.r.uint32 * multiplier) div 255).uint8
|
||||
result.g = ((result.g.uint32 * multiplier) div 255).uint8
|
||||
result.b = ((result.b.uint32 * multiplier) div 255).uint8
|
||||
|
||||
func lerp*(a, b: Color, v: float32): Color {.inline.} =
|
||||
result.r = lerp(a.r, b.r, v)
|
||||
result.g = lerp(a.g, b.g, v)
|
||||
result.b = lerp(a.b, b.b, v)
|
||||
result.a = lerp(a.a, b.a, v)
|
||||
|
||||
proc toAlphy*(c: Color): Color =
|
||||
proc toPremultipliedAlpha*(c: Color): Color {.inline.} =
|
||||
## Converts a color to premultiplied alpha from straight.
|
||||
result.r = c.r * c.a
|
||||
result.g = c.g * c.a
|
||||
result.b = c.b * c.a
|
||||
result.a = c.a
|
||||
|
||||
proc fromAlphy*(c: Color): Color =
|
||||
proc toStraightAlpha*(c: Color): Color {.inline.} =
|
||||
## Converts a color to from premultiplied alpha to straight.
|
||||
if c.a == 0:
|
||||
return
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
import chroma, blends, bumpy, vmath, common, nimsimd/sse2, system/memory
|
||||
import chroma, blends, bumpy, vmath, common, system/memory
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
import nimsimd/sse2
|
||||
|
||||
const h = 0.5.float32
|
||||
|
||||
|
@ -71,7 +74,7 @@ proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
|
|||
nimSetMem(data[start].addr, rgba.r.cint, len * 4)
|
||||
else:
|
||||
var i = start
|
||||
when defined(amd64):
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
# When supported, SIMD fill until we run out of room
|
||||
let m = mm_set1_epi32(cast[int32](rgba))
|
||||
for j in countup(i, start + len - 8, 8):
|
||||
|
@ -172,31 +175,10 @@ proc magnifyBy2*(image: Image): Image =
|
|||
when defined(release):
|
||||
{.pop.}
|
||||
|
||||
proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
|
||||
proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
|
||||
|
||||
proc invert*(image: Image) =
|
||||
## Inverts all of the colors and alpha.
|
||||
var i: int
|
||||
when defined(amd64):
|
||||
let vec255 = mm_set1_epi8(255)
|
||||
while i < image.data.len - 4:
|
||||
var m = mm_loadu_si128(image.data[i].addr)
|
||||
m = mm_sub_epi8(vec255, m)
|
||||
mm_storeu_si128(image.data[i].addr, m)
|
||||
i += 4
|
||||
for j in i ..< image.data.len:
|
||||
var rgba = image.data[j]
|
||||
rgba.r = 255 - rgba.r
|
||||
rgba.g = 255 - rgba.g
|
||||
rgba.b = 255 - rgba.b
|
||||
rgba.a = 255 - rgba.a
|
||||
image.data[j] = rgba
|
||||
|
||||
proc toAlphy*(image: Image) =
|
||||
proc toPremultipliedAlpha*(image: Image) =
|
||||
## Converts an image to premultiplied alpha from straight.
|
||||
var i: int
|
||||
when defined(amd64):
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
# When supported, SIMD convert as much as possible
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
|
@ -235,33 +217,55 @@ proc toAlphy*(image: Image) =
|
|||
c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
|
||||
image.data[j] = c
|
||||
|
||||
proc fromAlphy*(image: Image) =
|
||||
## Converts an image to from premultiplied alpha to straight.
|
||||
proc toStraightAlpha*(image: Image) =
|
||||
## Converts an image from premultiplied alpha to straight alpha.
|
||||
## This is expensive for large images.
|
||||
for c in image.data.mitems:
|
||||
if c.a == 0:
|
||||
if c.a == 0 or c.a == 255:
|
||||
continue
|
||||
c.r = ((c.r.uint32 * 255) div c.a.uint32).uint8
|
||||
c.g = ((c.g.uint32 * 255) div c.a.uint32).uint8
|
||||
c.b = ((c.b.uint32 * 255) div c.a.uint32).uint8
|
||||
let multiplier = ((255 / c.a.float32) * 255).uint32
|
||||
c.r = ((c.r.uint32 * multiplier) div 255).uint8
|
||||
c.g = ((c.g.uint32 * multiplier) div 255).uint8
|
||||
c.b = ((c.b.uint32 * multiplier) div 255).uint8
|
||||
|
||||
proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal)
|
||||
proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.}
|
||||
|
||||
proc invert*(image: Image) =
|
||||
## Inverts all of the colors and alpha.
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
let vec255 = mm_set1_epi8(255)
|
||||
while i < image.data.len - 4:
|
||||
var m = mm_loadu_si128(image.data[i].addr)
|
||||
m = mm_sub_epi8(vec255, m)
|
||||
mm_storeu_si128(image.data[i].addr, m)
|
||||
i += 4
|
||||
for j in i ..< image.data.len:
|
||||
var rgba = image.data[j]
|
||||
rgba.r = 255 - rgba.r
|
||||
rgba.g = 255 - rgba.g
|
||||
rgba.b = 255 - rgba.b
|
||||
rgba.a = 255 - rgba.a
|
||||
image.data[j] = rgba
|
||||
|
||||
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} =
|
||||
## Gets a pixel as (x, y) floats.
|
||||
let
|
||||
minX = x.floor.int
|
||||
difX = x - x.floor
|
||||
minY = y.floor.int
|
||||
difY = y - y.floor
|
||||
|
||||
vX0Y0 = image[minX, minY].color().toAlphy()
|
||||
vX1Y0 = image[minX + 1, minY].color().toAlphy()
|
||||
vX0Y1 = image[minX, minY + 1].color().toAlphy()
|
||||
vX1Y1 = image[minX + 1, minY + 1].color().toAlphy()
|
||||
vX0Y0 = image[minX, minY].toPremultipliedAlpha()
|
||||
vX1Y0 = image[minX + 1, minY].toPremultipliedAlpha()
|
||||
vX0Y1 = image[minX, minY + 1].toPremultipliedAlpha()
|
||||
vX1Y1 = image[minX + 1, minY + 1].toPremultipliedAlpha()
|
||||
|
||||
bottomMix = lerp(vX0Y0, vX1Y0, difX)
|
||||
topMix = lerp(vX0Y1, vX1Y1, difX)
|
||||
finalMix = lerp(bottomMix, topMix, difY)
|
||||
|
||||
return finalMix.fromAlphy().rgba()
|
||||
finalMix.toStraightAlpha()
|
||||
|
||||
proc resize*(srcImage: Image, width, height: int): Image =
|
||||
result = newImage(width, height)
|
||||
|
|
|
@ -415,36 +415,34 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
discretize(arc, 1, 1)
|
||||
|
||||
for command in commands:
|
||||
if command.numbers.len != command.kind.parameterCount():
|
||||
raise newException(PixieError, "Invalid path")
|
||||
|
||||
case command.kind
|
||||
of Move:
|
||||
assert command.numbers.len == 2
|
||||
at.x = command.numbers[0]
|
||||
at.y = command.numbers[1]
|
||||
start = at
|
||||
|
||||
of Line:
|
||||
assert command.numbers.len == 2
|
||||
to.x = command.numbers[0]
|
||||
to.y = command.numbers[1]
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of VLine:
|
||||
assert command.numbers.len == 1
|
||||
to.x = at.x
|
||||
to.y = command.numbers[0]
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of HLine:
|
||||
assert command.numbers.len == 1
|
||||
to.x = command.numbers[0]
|
||||
to.y = at.y
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of Quad:
|
||||
assert command.numbers.len mod 4 == 0
|
||||
var i = 0
|
||||
while i < command.numbers.len:
|
||||
ctr.x = command.numbers[i+0]
|
||||
|
@ -459,7 +457,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
of TQuad:
|
||||
if prevCommand != Quad and prevCommand != TQuad:
|
||||
ctr = at
|
||||
assert command.numbers.len == 2
|
||||
to.x = command.numbers[0]
|
||||
to.y = command.numbers[1]
|
||||
ctr = at - (ctr - at)
|
||||
|
@ -467,7 +464,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
at = to
|
||||
|
||||
of Cubic:
|
||||
assert command.numbers.len == 6
|
||||
ctr.x = command.numbers[0]
|
||||
ctr.y = command.numbers[1]
|
||||
ctr2.x = command.numbers[2]
|
||||
|
@ -488,7 +484,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
at = to
|
||||
|
||||
of Close:
|
||||
assert command.numbers.len == 0
|
||||
if at != start:
|
||||
if prevCommand == Quad or prevCommand == TQuad:
|
||||
drawQuad(at, ctr, start)
|
||||
|
@ -500,34 +495,29 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
at = start
|
||||
|
||||
of RMove:
|
||||
assert command.numbers.len == 2
|
||||
at.x += command.numbers[0]
|
||||
at.y += command.numbers[1]
|
||||
start = at
|
||||
|
||||
of RLine:
|
||||
assert command.numbers.len == 2
|
||||
to.x = at.x + command.numbers[0]
|
||||
to.y = at.y + command.numbers[1]
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of RVLine:
|
||||
assert command.numbers.len == 1
|
||||
to.x = at.x
|
||||
to.y = at.y + command.numbers[0]
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of RHLine:
|
||||
assert command.numbers.len == 1
|
||||
to.x = at.x + command.numbers[0]
|
||||
to.y = at.y
|
||||
drawLine(at, to)
|
||||
at = to
|
||||
|
||||
of RQuad:
|
||||
assert command.numbers.len == 4
|
||||
ctr.x = at.x + command.numbers[0]
|
||||
ctr.y = at.y + command.numbers[1]
|
||||
to.x = at.x + command.numbers[2]
|
||||
|
@ -538,7 +528,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
of RTQuad:
|
||||
if prevCommand != RQuad and prevCommand != RTQuad:
|
||||
ctr = at
|
||||
assert command.numbers.len == 2
|
||||
to.x = at.x + command.numbers[0]
|
||||
to.y = at.y + command.numbers[1]
|
||||
ctr = at - (ctr - at)
|
||||
|
@ -546,7 +535,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
at = to
|
||||
|
||||
of RCubic:
|
||||
assert command.numbers.len == 6
|
||||
ctr.x = at.x + command.numbers[0]
|
||||
ctr.y = at.y + command.numbers[1]
|
||||
ctr2.x = at.x + command.numbers[2]
|
||||
|
@ -557,7 +545,6 @@ proc commandsToPolygons*(commands: seq[PathCommand]): seq[seq[Vec2]] =
|
|||
at = to
|
||||
|
||||
of RSCubic:
|
||||
assert command.numbers.len == 4
|
||||
if prevCommand in {Cubic, SCubic, RCubic, RSCubic}:
|
||||
ctr = 2 * at - ctr2
|
||||
else:
|
||||
|
@ -1005,11 +992,7 @@ proc rect*(path: Path, x, y, w, h: float32) =
|
|||
path.closePath()
|
||||
|
||||
proc polygon*(path: Path, x, y, size: float32, sides: int) =
|
||||
## Draws a n sided regular polygon at x,y with size.
|
||||
let
|
||||
size = 80.0
|
||||
x = 100.0
|
||||
y = 100.0
|
||||
## Draws a n sided regular polygon at (x, y) with size.
|
||||
path.moveTo(x + size * cos(0.0), y + size * sin(0.0))
|
||||
for side in 0 .. sides:
|
||||
path.lineTo(
|
||||
|
|
|
@ -5,12 +5,10 @@ let a = newImage(2560, 1440)
|
|||
timeIt "fill":
|
||||
a.fill(rgba(255, 255, 255, 255))
|
||||
doAssert a[0, 0] == rgba(255, 255, 255, 255)
|
||||
keep(a)
|
||||
|
||||
timeIt "fill_rgba":
|
||||
a.fill(rgba(63, 127, 191, 255))
|
||||
doAssert a[0, 0] == rgba(63, 127, 191, 255)
|
||||
keep(a)
|
||||
a.fill(rgba(63, 127, 191, 191))
|
||||
doAssert a[0, 0] == rgba(63, 127, 191, 191)
|
||||
|
||||
timeIt "subImage":
|
||||
keep a.subImage(0, 0, 256, 256)
|
||||
|
@ -26,3 +24,27 @@ timeIt "applyOpacity":
|
|||
timeIt "sharpOpacity":
|
||||
a.sharpOpacity()
|
||||
keep(a)
|
||||
|
||||
a.fill(rgba(63, 127, 191, 191))
|
||||
|
||||
timeIt "toAlphy":
|
||||
a.toAlphy()
|
||||
|
||||
timeIt "fromAlphy":
|
||||
a.fromAlphy()
|
||||
|
||||
timeIt "lerp integers":
|
||||
for i in 0 ..< 100000:
|
||||
let c = a[0, 0]
|
||||
var z: int
|
||||
for t in 0 .. 100:
|
||||
z += lerp(c, c, t.float32 / 100).a.int
|
||||
doAssert z > 0
|
||||
|
||||
timeIt "lerp floats":
|
||||
for i in 0 ..< 100000:
|
||||
let c = a[0, 0]
|
||||
var z: int
|
||||
for t in 0 .. 100:
|
||||
z += lerp(c.color, c.color, t.float32 / 100).rgba().a.int
|
||||
doAssert z > 0
|
||||
|
|
|
@ -19,14 +19,14 @@ block:
|
|||
block:
|
||||
let image = newImage(10, 10)
|
||||
image.fill(rgba(255, 0, 0, 128))
|
||||
image.toAlphy()
|
||||
image.toPremultipliedAlpha()
|
||||
doAssert image[9, 9] == rgba(128, 0, 0, 128)
|
||||
|
||||
block:
|
||||
let image = newImage(10, 10)
|
||||
image.fill(rgba(128, 0, 0, 128))
|
||||
image.fromAlphy()
|
||||
doAssert image[9, 9] == rgba(255, 0, 0, 128)
|
||||
image.toStraightAlpha()
|
||||
doAssert image[9, 9] == rgba(254, 0, 0, 128)
|
||||
|
||||
block:
|
||||
let
|
||||
|
|
Loading…
Reference in a new issue