Merge pull request #67 from guzba/master

0.0.15, gate simd behind amd64
This commit is contained in:
treeform 2021-01-23 11:21:19 -08:00 committed by GitHub
commit 45da7c4db9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 71 additions and 42 deletions

View file

@ -1,4 +1,4 @@
version = "0.0.14" version = "0.0.15"
author = "Andre von Houck and Ryan Oldenburg" author = "Andre von Houck and Ryan Oldenburg"
description = "Full-featured 2d graphics library for Nim." description = "Full-featured 2d graphics library for Nim."
license = "MIT" license = "MIT"

View file

@ -1,5 +1,8 @@
## Blending modes. ## Blending modes.
import chroma, math, nimsimd/sse2 import chroma, math
when defined(amd64):
import nimsimd/sse2
# See https://www.w3.org/TR/compositing-1/ # See https://www.w3.org/TR/compositing-1/
# See https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_blend_equation_advanced.txt # See https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_blend_equation_advanced.txt
@ -261,37 +264,60 @@ proc blendExcludeMaskFloats*(backdrop, source: Color): Color {.inline.} =
proc blendOverwriteFloats*(backdrop, source: Color): Color {.inline.} = proc blendOverwriteFloats*(backdrop, source: Color): Color {.inline.} =
source source
proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA = when defined(amd64):
let proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA =
sa = source.a.float32 let
ba = backdrop.a.float32 sa = source.a.float32
a = sa + ba * (255 - sa) / 255 ba = backdrop.a.float32
if a == 0: a = sa + ba * (255 - sa) / 255
return if a == 0:
return
let let
t0 = mm_set1_ps(sa * (255 - ba)) t0 = mm_set1_ps(sa * (255 - ba))
t1 = mm_set1_ps(sa * ba) t1 = mm_set1_ps(sa * ba)
t2 = mm_set1_ps((255 - sa) * ba) t2 = mm_set1_ps((255 - sa) * ba)
va = mm_set1_ps(a) va = mm_set1_ps(a)
v255 = mm_set1_ps(255) v255 = mm_set1_ps(255)
values = cast[array[4, uint32]]( values = cast[array[4, uint32]](
mm_cvtps_epi32((t0 * vs + t1 * vm + t2 * vb) / va / v255) mm_cvtps_epi32((t0 * vs + t1 * vm + t2 * vb) / va / v255)
) )
result.r = values[0].uint8 result.r = values[0].uint8
result.g = values[1].uint8 result.g = values[1].uint8
result.b = values[2].uint8 result.b = values[2].uint8
result.a = a.uint8 result.a = a.uint8
proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} =
if backdrop.a == 0 and source.a == 0: if backdrop.a == 0 and source.a == 0:
return return
let let
vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0) vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0)
vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0) vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0)
vm = mm_setr_ps(mixed.r.float32, mixed.g.float32, mixed.b.float32, 0) vm = mm_setr_ps(mixed.r.float32, mixed.g.float32, mixed.b.float32, 0)
alphaFix(backdrop, source, vb, vs, vm) alphaFix(backdrop, source, vb, vs, vm)
else:
proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} =
let
sa = source.a.int32
ba = backdrop.a.int32
t0 = sa * (255 - ba)
t1 = sa * ba
t2 = (255 - sa) * ba
let
r = t0 * source.r.int32 + t1 * mixed.r.int32 + t2 * backdrop.r.int32
g = t0 * source.g.int32 + t1 * mixed.g.int32 + t2 * backdrop.g.int32
b = t0 * source.b.int32 + t1 * mixed.b.int32 + t2 * backdrop.b.int32
a = sa + ba * (255 - sa) div 255
if a == 0:
return
result.r = (r div a div 255).uint8
result.g = (g div a div 255).uint8
result.b = (b div a div 255).uint8
result.a = a.uint8
proc min(a, b: uint32): uint32 {.inline.} = proc min(a, b: uint32): uint32 {.inline.} =
if a < b: a else: b if a < b: a else: b
@ -391,19 +417,22 @@ proc blendSoftLight(backdrop, source: ColorRGBA): ColorRGBA =
# (2 * source * backdrop) div 255 # (2 * source * backdrop) div 255
# ).uint8 # ).uint8
let when defined(amd64):
vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0) let
vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0) vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0)
v2 = mm_set1_ps(2) vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0)
v255 = mm_set1_ps(255) v2 = mm_set1_ps(2)
v255sq = mm_set1_ps(255 * 255) v255 = mm_set1_ps(255)
vm = ((v255 - v2 * vs) * vb * vb) / v255sq + (v2 * vs * vb) / v255 v255sq = mm_set1_ps(255 * 255)
values = cast[array[4, uint32]](mm_cvtps_epi32(vm)) vm = ((v255 - v2 * vs) * vb * vb) / v255sq + (v2 * vs * vb) / v255
values = cast[array[4, uint32]](mm_cvtps_epi32(vm))
result.r = values[0].uint8 result.r = values[0].uint8
result.g = values[1].uint8 result.g = values[1].uint8
result.b = values[2].uint8 result.b = values[2].uint8
result = alphaFix(backdrop, source, vb, vs, vm) result = alphaFix(backdrop, source, vb, vs, vm)
else:
blendSoftLightFloats(backdrop.color, source.color).rgba
proc blendDifference(backdrop, source: ColorRGBA): ColorRGBA = proc blendDifference(backdrop, source: ColorRGBA): ColorRGBA =
result.r = max(backdrop.r, source.r) - min(backdrop.r, source.r) result.r = max(backdrop.r, source.r) - min(backdrop.r, source.r)