From b895030088d6a540d173146359f8765cb4db9997 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 9 Dec 2020 07:47:58 -0600 Subject: [PATCH] all int blends, simd alphafix + softlight, benchmarks --- src/pixie/blends.nim | 140 ++++++++++++++++++++----- tests/benchmark_blends.nim | 208 ++++++++++++++++++++++++++++--------- 2 files changed, 276 insertions(+), 72 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index df2865a..f71660f 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -111,6 +111,10 @@ proc SetSat(C: Color, s: float32): Color {.inline.} = result = (C - min([C.r, C.g, C.b])) * s / satC proc alphaFix(backdrop, source, mixed: Color): Color = + result.a = (source.a + backdrop.a * (1.0 - source.a)) + if result.a == 0: + return + let t0 = source.a * (1 - backdrop.a) t1 = source.a * backdrop.a @@ -120,7 +124,6 @@ proc alphaFix(backdrop, source, mixed: Color): Color = result.g = t0 * source.g + t1 * mixed.g + t2 * backdrop.g result.b = t0 * source.b + t1 * mixed.b + t2 * backdrop.b - result.a = (source.a + backdrop.a * (1.0 - source.a)) result.r /= result.a result.g /= result.a result.b /= result.a @@ -258,25 +261,22 @@ proc blendExcludeMaskFloats(backdrop, source: Color): Color {.inline.} = proc blendOverwriteFloats(backdrop, source: Color): Color {.inline.} = source -proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = +proc alphaFix(backdrop, source: ColorRGBA, vb, vs, vm: M128): ColorRGBA = let sa = source.a.float32 ba = backdrop.a.float32 a = sa + ba * (255 - sa) / 255 - if a < 1: + if a == 0: return let - vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0) - vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0) - vm = mm_setr_ps(mixed.r.float32, mixed.g.float32, mixed.b.float32, 0) - vt0 = mm_set1_ps(sa * (255 - ba)) - vt1 = mm_set1_ps(sa * ba) - vt2 = mm_set1_ps((255 - sa) * ba) + t0 = mm_set1_ps(sa * (255 - ba)) + t1 = mm_set1_ps(sa * ba) + t2 = mm_set1_ps((255 - sa) * ba) va = mm_set1_ps(a) v255 = mm_set1_ps(255) - values = cast[array[4, int32]]( - mm_cvtps_epi32((vt0 * vs + vt1 * vm + vt2 * vb) / va / v255) + values = cast[array[4, uint32]]( + mm_cvtps_epi32((t0 * vs + t1 * vm + t2 * vb) / va / v255) ) result.r = values[0].uint8 @@ -284,48 +284,140 @@ proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = result.b = values[2].uint8 result.a = a.uint8 +proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = + if backdrop.a == 0 and source.a == 0: + return + let + vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0) + vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0) + vm = mm_setr_ps(mixed.r.float32, mixed.g.float32, mixed.b.float32, 0) + alphaFix(backdrop, source, vb, vs, vm) + +proc min(a, b: uint32): uint32 {.inline.} = + if a < b: a else: b + +proc screen(backdrop, source: uint32): uint8 {.inline.} = + (255 - ((255 - backdrop) * (255 - source)) div 255).uint8 + +proc hardLight(backdrop, source: uint32): uint8 {.inline.} = + if source <= 127: + ((backdrop * 2 * source) div 255).uint8 + else: + screen(backdrop, 2 * source - 255) + proc blendNormal(backdrop, source: ColorRGBA): ColorRGBA = result = source result = alphaFix(backdrop, source, result) proc blendDarken(backdrop, source: ColorRGBA): ColorRGBA = - blendDarkenFloats(backdrop.color, source.color).rgba + result.r = min(backdrop.r, source.r) + result.g = min(backdrop.g, source.g) + result.b = min(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendMultiply(backdrop, source: ColorRGBA): ColorRGBA = - blendMultiplyFloats(backdrop.color, source.color).rgba + result.r = ((backdrop.r.uint32 * source.r) div 255).uint8 + result.g = ((backdrop.g.uint32 * source.g) div 255).uint8 + result.b = ((backdrop.b.uint32 * source.b) div 255).uint8 + result = alphaFix(backdrop, source, result) proc blendLinearBurn(backdrop, source: ColorRGBA): ColorRGBA = - blendLinearBurnFloats(backdrop.color, source.color).rgba + result.r = min(0, backdrop.r.int16 + source.r.int16 - 255).uint8 + result.g = min(0, backdrop.g.int16 + source.g.int16 - 255).uint8 + result.b = min(0, backdrop.b.int16 + source.b.int16 - 255).uint8 + result = alphaFix(backdrop, source, result) proc blendColorBurn(backdrop, source: ColorRGBA): ColorRGBA = - blendColorBurnFloats(backdrop.color, source.color).rgba + proc blend(backdrop, source: uint32): uint8 {.inline.} = + if backdrop == 255: + 255.uint8 + elif source == 0: + 0 + else: + 255 - min(255, (255 * (255 - backdrop)) div source).uint8 + result.r = blend(backdrop.r, source.r) + result.g = blend(backdrop.g, source.g) + result.b = blend(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendLighten(backdrop, source: ColorRGBA): ColorRGBA = - blendLightenFloats(backdrop.color, source.color).rgba + result.r = max(backdrop.r, source.r) + result.g = max(backdrop.g, source.g) + result.b = max(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendScreen(backdrop, source: ColorRGBA): ColorRGBA = - blendScreenFloats(backdrop.color, source.color).rgba + result.r = screen(backdrop.r, source.r) + result.g = screen(backdrop.g, source.g) + result.b = screen(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendLinearDodge(backdrop, source: ColorRGBA): ColorRGBA = - blendLinearDodgeFloats(backdrop.color, source.color).rgba + result.r = min(backdrop.r.uint32 + source.r, 255).uint8 + result.g = min(backdrop.g.uint32 + source.g, 255).uint8 + result.b = min(backdrop.b.uint32 + source.b, 255).uint8 + result = alphaFix(backdrop, source, result) proc blendColorDodge(backdrop, source: ColorRGBA): ColorRGBA = - blendColorDodgeFloats(backdrop.color, source.color).rgba + proc blend(backdrop, source: uint32): uint8 {.inline.} = + if backdrop == 0: + 0.uint8 + elif source == 255: + 255 + else: + min(255, (255 * backdrop) div (255 - source)).uint8 + result.r = blend(backdrop.r, source.r) + result.g = blend(backdrop.g, source.g) + result.b = blend(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendOverlay(backdrop, source: ColorRGBA): ColorRGBA = - blendOverlayFloats(backdrop.color, source.color).rgba + result.r = hardLight(source.r, backdrop.r) + result.g = hardLight(source.g, backdrop.g) + result.b = hardLight(source.b, backdrop.b) + result = alphaFix(backdrop, source, result) proc blendHardLight(backdrop, source: ColorRGBA): ColorRGBA = - blendHardLightFloats(backdrop.color, source.color).rgba + result.r = hardLight(backdrop.r, source.r) + result.g = hardLight(backdrop.g, source.g) + result.b = hardLight(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendSoftLight(backdrop, source: ColorRGBA): ColorRGBA = - blendSoftLightFloats(backdrop.color, source.color).rgba + # proc softLight(backdrop, source: int32): uint8 {.inline.} = + # ## Pegtop + # ( + # ((255 - 2 * source) * backdrop ^ 2) div 255 ^ 2 + + # (2 * source * backdrop) div 255 + # ).uint8 + + let + vb = mm_setr_ps(backdrop.r.float32, backdrop.g.float32, backdrop.b.float32, 0) + vs = mm_setr_ps(source.r.float32, source.g.float32, source.b.float32, 0) + v2 = mm_set1_ps(2) + v255 = mm_set1_ps(255) + v255sq = mm_set1_ps(255 * 255) + vm = ((v255 - v2 * vs) * vb * vb) / v255sq + (v2 * vs * vb) / v255 + values = cast[array[4, uint32]](mm_cvtps_epi32(vm)) + + result.r = values[0].uint8 + result.g = values[1].uint8 + result.b = values[2].uint8 + result = alphaFix(backdrop, source, vb, vs, vm) proc blendDifference(backdrop, source: ColorRGBA): ColorRGBA = - blendDifferenceFloats(backdrop.color, source.color).rgba + result.r = max(backdrop.r, source.r) - min(backdrop.r, source.r) + result.g = max(backdrop.g, source.g) - min(backdrop.g, source.g) + result.b = max(backdrop.b, source.b) - min(backdrop.b, source.b) + result = alphaFix(backdrop, source, result) proc blendExclusion(backdrop, source: ColorRGBA): ColorRGBA = - blendExclusionFloats(backdrop.color, source.color).rgba + proc blend(backdrop, source: int32): uint8 {.inline.} = + max(0, backdrop + source - (2 * backdrop * source) div 255).uint8 + result.r = blend(backdrop.r.int32, source.r.int32) + result.g = blend(backdrop.g.int32, source.g.int32) + result.b = blend(backdrop.b.int32, source.b.int32) + result = alphaFix(backdrop, source, result) proc blendColor(backdrop, source: ColorRGBA): ColorRGBA = blendColorFloats(backdrop.color, source.color).rgba diff --git a/tests/benchmark_blends.nim b/tests/benchmark_blends.nim index 629a0ae..666d51c 100644 --- a/tests/benchmark_blends.nim +++ b/tests/benchmark_blends.nim @@ -1,73 +1,185 @@ -import benchy, chroma, pixie, vmath +import benchy, chroma, vmath + +include pixie/blends + +const iterations = 1_000_000 let - a = newImage(1000, 1000) - b = newImage(1000, 1000) + a = rgba(100, 200, 100, 255) + b = rgba(25, 33, 100, 127) -b.fill(rgba(127, 127, 127, 255)) +timeIt "blendNormal": + for i in 0 ..< iterations: + keep blendNormal(a, b) -timeIt "bmNormal": - a.draw(b, vec2(0, 0), bmNormal) +timeIt "blendNormalFloats": + for i in 0 ..< iterations: + keep blendNormalFloats(a.color, b.color).rgba -timeIt "bmDarken": - a.draw(b, vec2(0, 0), bmDarken) +timeIt "blendDarken": + for i in 0 ..< iterations: + keep blendDarken(a, b) -timeIt "bmMultiply": - a.draw(b, vec2(0, 0), bmMultiply) +timeIt "blendDarkenFloats": + for i in 0 ..< iterations: + keep blendDarkenFloats(a.color, b.color).rgba -timeIt "bmLinearBurn": - a.draw(b, vec2(0, 0), bmLinearBurn) +timeIt "blendMultiply": + for i in 0 ..< iterations: + keep blendMultiply(a, b) -timeIt "bmColorBurn": - a.draw(b, vec2(0, 0), bmColorBurn) +timeIt "blendMultiplyFloats": + for i in 0 ..< iterations: + keep blendMultiplyFloats(a.color, b.color).rgba -timeIt "bmLighten": - a.draw(b, vec2(0, 0), bmLighten) +timeIt "blendLinearBurn": + for i in 0 ..< iterations: + keep blendLinearBurn(a, b) -timeIt "bmScreen": - a.draw(b, vec2(0, 0), bmScreen) +timeIt "blendLinearBurnFloats": + for i in 0 ..< iterations: + keep blendLinearBurnFloats(a.color, b.color).rgba -timeIt "bmLinearDodge": - a.draw(b, vec2(0, 0), bmLinearDodge) +timeIt "blendColorBurn": + for i in 0 ..< iterations: + keep blendColorBurn(a, b) -timeIt "bmColorDodge": - a.draw(b, vec2(0, 0), bmColorDodge) +timeIt "blendColorBurnFloats": + for i in 0 ..< iterations: + keep blendColorBurnFloats(a.color, b.color).rgba -timeIt "bmOverlay": - a.draw(b, vec2(0, 0), bmOverlay) +timeIt "blendLighten": + for i in 0 ..< iterations: + keep blendLighten(a, b) -timeIt "bmSoftLight": - a.draw(b, vec2(0, 0), bmSoftLight) +timeIt "blendLightenFloats": + for i in 0 ..< iterations: + keep blendLightenFloats(a.color, b.color).rgba -timeIt "bmHardLight": - a.draw(b, vec2(0, 0), bmHardLight) +timeIt "blendScreen": + for i in 0 ..< iterations: + keep blendScreen(a, b) -timeIt "bmDifference": - a.draw(b, vec2(0, 0), bmDifference) +timeIt "blendScreenFloats": + for i in 0 ..< iterations: + keep blendScreenFloats(a.color, b.color).rgba -timeIt "bmExclusion": - a.draw(b, vec2(0, 0), bmExclusion) +timeIt "blendLinearDodge": + for i in 0 ..< iterations: + keep blendLinearDodge(a, b) -timeIt "bmHue": - a.draw(b, vec2(0, 0), bmHue) +timeIt "blendLinearDodgeFloats": + for i in 0 ..< iterations: + keep blendLinearDodgeFloats(a.color, b.color).rgba -timeIt "bmSaturation": - a.draw(b, vec2(0, 0), bmSaturation) +timeIt "blendColorDodge": + for i in 0 ..< iterations: + keep blendColorDodge(a, b) -timeIt "bmColor": - a.draw(b, vec2(0, 0), bmColor) +timeIt "blendColorDodgeFloats": + for i in 0 ..< iterations: + keep blendColorDodgeFloats(a.color, b.color).rgba -timeIt "bmLuminosity": - a.draw(b, vec2(0, 0), bmLuminosity) +timeIt "blendOverlay": + for i in 0 ..< iterations: + keep blendOverlay(a, b) -timeIt "bmMask": - a.draw(b, vec2(0, 0), bmMask) +timeIt "blendOverlayFloats": + for i in 0 ..< iterations: + keep blendOverlayFloats(a.color, b.color).rgba -timeIt "bmSubtractMask": - a.draw(b, vec2(0, 0), bmSubtractMask) +timeIt "blendSoftLight": + for i in 0 ..< iterations: + keep blendSoftLight(a, b) -timeIt "bmIntersectMask": - a.draw(b, vec2(0, 0), bmIntersectMask) +timeIt "blendSoftLightFloats": + for i in 0 ..< iterations: + keep blendSoftLightFloats(a.color, b.color).rgba -timeIt "bmExcludeMask": - a.draw(b, vec2(0, 0), bmExcludeMask) +timeIt "blendHardLight": + for i in 0 ..< iterations: + keep blendHardLight(a, b) + +timeIt "blendHardLightFloats": + for i in 0 ..< iterations: + keep blendHardLightFloats(a.color, b.color).rgba + +timeIt "blendDifference": + for i in 0 ..< iterations: + keep blendDifference(a, b) + +timeIt "blendDifferenceFloats": + for i in 0 ..< iterations: + keep blendDifferenceFloats(a.color, b.color).rgba + +timeIt "blendExclusion": + for i in 0 ..< iterations: + keep blendExclusion(a, b) + +timeIt "blendExclusionFloats": + for i in 0 ..< iterations: + keep blendExclusionFloats(a.color, b.color).rgba + +timeIt "blendHue": + for i in 0 ..< iterations: + keep blendHue(a, b) + +timeIt "blendHueFloats": + for i in 0 ..< iterations: + keep blendHueFloats(a.color, b.color).rgba + +timeIt "blendSaturation": + for i in 0 ..< iterations: + keep blendSaturation(a, b) + +timeIt "blendSaturationFloats": + for i in 0 ..< iterations: + keep blendSaturationFloats(a.color, b.color).rgba + +timeIt "blendColor": + for i in 0 ..< iterations: + keep blendColor(a, b) + +timeIt "blendColorFloats": + for i in 0 ..< iterations: + keep blendColorFloats(a.color, b.color).rgba + +timeIt "blendLuminosity": + for i in 0 ..< iterations: + keep blendLuminosity(a, b) + +timeIt "blendLuminosityFloats": + for i in 0 ..< iterations: + keep blendLuminosityFloats(a.color, b.color).rgba + +timeIt "blendMask": + for i in 0 ..< iterations: + keep blendMask(a, b) + +timeIt "blendMaskFloats": + for i in 0 ..< iterations: + keep blendMaskFloats(a.color, b.color).rgba + +timeIt "blendSubtractMask": + for i in 0 ..< iterations: + keep blendSubtractMask(a, b) + +timeIt "blendSubtractMaskFloats": + for i in 0 ..< iterations: + keep blendSubtractMaskFloats(a.color, b.color).rgba + +timeIt "blendIntersectMask": + for i in 0 ..< iterations: + keep blendIntersectMask(a, b) + +timeIt "blendIntersectMaskFloats": + for i in 0 ..< iterations: + keep blendIntersectMaskFloats(a.color, b.color).rgba + +timeIt "blendExcludeMask": + for i in 0 ..< iterations: + keep blendExcludeMask(a, b) + +timeIt "blendExcludeMaskFloats": + for i in 0 ..< iterations: + keep blendExcludeMaskFloats(a.color, b.color).rgba