From 913c7e94984216c64af9927633b8f2c4a8e3d770 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 14:01:13 -0600 Subject: [PATCH 1/9] more premul blends --- src/pixie/blends.nim | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index abcba6b..02a17c2 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -58,9 +58,21 @@ proc blendNormalPremultiplied*(backdrop, source: ColorRGBA): ColorRGBA = result.b = source.b + ((backdrop.b.uint32 * k) div 255).uint8 result.a = blendAlpha(backdrop.a, source.a) +proc blendMask(backdrop, source: ColorRGBA): ColorRGBA = + let k = source.a.uint32 + result.r = ((backdrop.r * k) div 255).uint8 + result.g = ((backdrop.g * k) div 255).uint8 + result.b = ((backdrop.b * k) div 255).uint8 + result.a = ((backdrop.a * k) div 255).uint8 + +proc blendOverwrite*(backdrop, source: ColorRGBA): ColorRGBA = + source + proc blenderPremultiplied*(blendMode: BlendMode): Blender = case blendMode: of bmNormal: blendNormalPremultiplied + of bmOverwrite: blendOverwrite + of bmMask: blendMask else: raise newException(PixieError, "No premultiplied blender for " & $blendMode) @@ -94,9 +106,13 @@ when defined(amd64) and not defined(pixieNoSimd): mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) ) + proc blendOverwriteSimd*(backdrop, source: M128i): M128i = + source + proc blenderSimd*(blendMode: BlendMode): BlenderSimd = case blendMode: of bmNormal: blendNormalPremultipliedSimd + of bmOverwrite: blendOverwriteSimd else: raise newException(PixieError, "No SIMD blender for " & $blendMode) @@ -530,10 +546,6 @@ proc blendHue(backdrop, source: ColorRGBA): ColorRGBA = proc blendSaturation(backdrop, source: ColorRGBA): ColorRGBA = blendSaturationFloats(backdrop.color, source.color).rgba -proc blendMask(backdrop, source: ColorRGBA): ColorRGBA = - result = backdrop - result.a = min(backdrop.a, source.a) - proc blendSubtractMask(backdrop, source: ColorRGBA): ColorRGBA = result = backdrop result.a = max(0, (backdrop.a.int32 * (255 - source.a.int32)) div 255).uint8 @@ -546,9 +558,6 @@ proc blendExcludeMask(backdrop, source: ColorRGBA): ColorRGBA = result = backdrop result.a = max(backdrop.a, source.a) - min(backdrop.a, source.a) -proc blendOverwrite(backdrop, source: ColorRGBA): ColorRGBA = - source - proc blender*(blendMode: BlendMode): Blender = case blendMode: of bmNormal: blendNormal From 29047d6a46c5559076a00591f4868db9308eeb14 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 15:22:22 -0600 Subject: [PATCH 2/9] blends, blurs, masks checkpoint --- src/pixie/blends.nim | 93 ++++++++------- src/pixie/images.nim | 205 ++++++++++++++++----------------- src/pixie/masks.nim | 23 ++++ src/pixie/paths.nim | 5 +- tests/images/masks/shifted.png | Bin 0 -> 163 bytes tests/images/masks/spread.png | Bin 0 -> 192 bytes tests/test_masks.nim | 19 ++- 7 files changed, 193 insertions(+), 152 deletions(-) create mode 100644 tests/images/masks/shifted.png create mode 100644 tests/images/masks/spread.png diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 02a17c2..5a38ef9 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -32,19 +32,15 @@ type bmExcludeMask Blender* = proc(backdrop, source: ColorRGBA): ColorRGBA - -when defined(amd64) and not defined(pixieNoSimd): - import nimsimd/sse2 - - type BlenderSimd* = proc(blackdrop, source: M128i): M128i + Masker* = proc(backdrop, source: uint8): uint8 when defined(release): {.push checks: off.} -proc blendAlpha(backdrop, source: uint8): uint8 {.inline.} = +proc blendAlpha*(backdrop, source: uint8): uint8 {.inline.} = source + ((backdrop.uint32 * (255 - source)) div 255).uint8 -proc blendNormalPremultiplied*(backdrop, source: ColorRGBA): ColorRGBA = +proc blendNormal(backdrop, source: ColorRGBA): ColorRGBA = if backdrop.a == 0: return source if source.a == 255: @@ -65,18 +61,36 @@ proc blendMask(backdrop, source: ColorRGBA): ColorRGBA = result.b = ((backdrop.b * k) div 255).uint8 result.a = ((backdrop.a * k) div 255).uint8 -proc blendOverwrite*(backdrop, source: ColorRGBA): ColorRGBA = +proc blendOverwrite(backdrop, source: ColorRGBA): ColorRGBA = source -proc blenderPremultiplied*(blendMode: BlendMode): Blender = +proc blender*(blendMode: BlendMode): Blender = case blendMode: - of bmNormal: blendNormalPremultiplied - of bmOverwrite: blendOverwrite + of bmNormal: blendNormal of bmMask: blendMask + of bmOverwrite: blendOverwrite else: - raise newException(PixieError, "No premultiplied blender for " & $blendMode) + blendNormal + # raise newException(PixieError, "No blender for " & $blendMode) + +proc maskMask(backdrop, source: uint8): uint8 = + ((backdrop.uint32 * source) div 255).uint8 + +proc maskOverwrite(backdrop, source: uint8): uint8 = + source + +proc masker*(blendMode: BlendMode): Masker = + case blendMode: + of bmMask: maskMask + of bmOverwrite: maskOverwrite + else: + raise newException(PixieError, "No masker for " & $blendMode) when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + + type BlenderSimd* = proc(blackdrop, source: M128i): M128i + proc blendNormalPremultipliedSimd*(backdrop, source: M128i): M128i = let alphaMask = mm_set1_epi32(cast[int32](0xff000000)) @@ -116,7 +130,6 @@ when defined(amd64) and not defined(pixieNoSimd): else: raise newException(PixieError, "No SIMD blender for " & $blendMode) - when defined(release): {.pop.} @@ -415,8 +428,8 @@ proc hardLight(backdrop, source: uint32): uint8 {.inline.} = else: screen(backdrop, 2 * source - 255) -proc blendNormal(backdrop, source: ColorRGBA): ColorRGBA = - blendNormalPremultiplied( +proc blendNormalOld(backdrop, source: ColorRGBA): ColorRGBA = + blendNormal( backdrop.toPremultipliedAlpha(), source.toPremultipliedAlpha() ).toStraightAlpha() @@ -558,28 +571,28 @@ proc blendExcludeMask(backdrop, source: ColorRGBA): ColorRGBA = result = backdrop result.a = max(backdrop.a, source.a) - min(backdrop.a, source.a) -proc blender*(blendMode: BlendMode): Blender = - case blendMode: - of bmNormal: blendNormal - of bmDarken: blendDarken - of bmMultiply: blendMultiply - of bmLinearBurn: blendLinearBurn - of bmColorBurn: blendColorBurn - of bmLighten: blendLighten - of bmScreen: blendScreen - of bmLinearDodge: blendLinearDodge - of bmColorDodge: blendColorDodge - of bmOverlay: blendOverlay - of bmSoftLight: blendSoftLight - of bmHardLight: blendHardLight - of bmDifference: blendDifference - of bmExclusion: blendExclusion - of bmHue: blendHue - of bmSaturation: blendSaturation - of bmColor: blendColor - of bmLuminosity: blendLuminosity - of bmMask: blendMask - of bmOverwrite: blendOverwrite - of bmSubtractMask: blendSubtractMask - of bmIntersectMask: blendIntersectMask - of bmExcludeMask: blendExcludeMask +# proc blender*(blendMode: BlendMode): Blender = +# case blendMode: +# of bmNormal: blendNormal +# of bmDarken: blendDarken +# of bmMultiply: blendMultiply +# of bmLinearBurn: blendLinearBurn +# of bmColorBurn: blendColorBurn +# of bmLighten: blendLighten +# of bmScreen: blendScreen +# of bmLinearDodge: blendLinearDodge +# of bmColorDodge: blendColorDodge +# of bmOverlay: blendOverlay +# of bmSoftLight: blendSoftLight +# of bmHardLight: blendHardLight +# of bmDifference: blendDifference +# of bmExclusion: blendExclusion +# of bmHue: blendHue +# of bmSaturation: blendSaturation +# of bmColor: blendColor +# of bmLuminosity: blendLuminosity +# of bmMask: blendMask +# of bmOverwrite: blendOverwrite +# of bmSubtractMask: blendSubtractMask +# of bmIntersectMask: blendIntersectMask +# of bmExcludeMask: blendExcludeMask diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 9d00bd6..fc47849 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -369,6 +369,13 @@ proc invert*(target: Image | Mask) = for j in i ..< target.data.len: target.data[j] = (255 - target.data[j]).uint8 +proc newMask*(image: Image): Mask = + ## Returns a new mask using the alpha values of the parameter image. + result = newMask(image.width, image.height) + + for i, rgba in image.data: + result.data[i] = rgba.a + proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA = let minX = floor(x) @@ -393,21 +400,10 @@ proc drawCorrect( ) = ## Draws one image onto another using matrix with color blending. - proc validateMaskBlendMode() = - if blendMode notin {bmMask}: - raise newException( - PixieError, - "Blend mode " & $blendMode & " not supported for masks" - ) - when type(a) is Image: - when type(b) is Image: - let blender = blendMode.blenderPremultiplied() - else: # b is a Mask - validateMaskBlendMode() + let blender = blendMode.blender() else: # a is a Mask - when type(b) is Mask: - validateMaskBlendMode() + let masker = blendMode.masker() var matInv = mat.inverse() @@ -435,27 +431,23 @@ proc drawCorrect( yFloat = samplePos.y - h when type(a) is Image: - let rgba = a.getRgbaUnsafe(x, y) - var blended: ColorRGBA + let backdrop = a.getRgbaUnsafe(x, y) when type(b) is Image: - let sample = b.getRgbaSmooth(xFloat, yFloat) - blended = blender(rgba, sample) + let + sample = b.getRgbaSmooth(xFloat, yFloat) + blended = blender(backdrop, sample) else: # b is a Mask - let sample = b.getValueSmooth(xFloat, yFloat).uint32 - blended = rgba( - ((rgba.r * sample) div 255).uint8, - ((rgba.g * sample) div 255).uint8, - ((rgba.b * sample) div 255).uint8, - ((rgba.a * sample) div 255).uint8 - ) + let + sample = b.getValueSmooth(xFloat, yFloat) + blended = blender(backdrop, rgba(0, 0, 0, sample)) a.setRgbaUnsafe(x, y, blended) - else: # a is a Mask, b must be a mask - let value = a.getValueUnsafe(x, y) + else: # a is a Mask + let backdrop = a.getValueUnsafe(x, y) when type(b) is Image: - let sample = b.getRgbaSmooth(xFloat, yFloat).a.uint32 - else: # a is a Mask - let sample = b.getValueSmooth(xFloat, yFloat).uint32 - a.setValueUnsafe(x, y, ((value * sample) div 255).uint8) + let sample = b.getRgbaSmooth(xFloat, yFloat).a + else: # b is a Mask + let sample = b.getValueSmooth(xFloat, yFloat) + a.setValueUnsafe(x, y, masker(backdrop, sample)) proc draw*(image: Image, mask: Mask, mat: Mat3, blendMode = bmMask) = image.drawCorrect(mask, mat, blendMode) @@ -496,7 +488,7 @@ proc gaussianLookup(radius: int): seq[float32] = when defined(release): {.pop.} -proc blur*(image: Image, radius: float32) = +proc blur*(target: Image | Mask, radius: float32) = ## Applies Gaussian blur to the image given a radius. let radius = round(radius).int if radius == 0: @@ -504,73 +496,68 @@ proc blur*(image: Image, radius: float32) = let lookup = gaussianLookup(radius) - # Blur in the X direction. - var blurX = newImage(image.width, image.height) - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var c: Color - var totalA = 0.0 - for xb in -radius .. radius: - let c2 = image[x + xb, y].color - let a = lookup[xb + radius] - let aa = c2.a * a - totalA += aa - c.r += c2.r * aa - c.g += c2.g * aa - c.b += c2.b * aa - c.a += c2.a * a - c.r = c.r / totalA - c.g = c.g / totalA - c.b = c.b / totalA - blurX.setRgbaUnsafe(x, y, c.rgba) + when type(target) is Image: + # Blur in the X direction. + var blurX = newImage(target.width, target.height) + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var c: Color + var totalA = 0.0 + for xb in -radius .. radius: + let c2 = target[x + xb, y].color + let a = lookup[xb + radius] + let aa = c2.a * a + totalA += aa + c.r += c2.r * aa + c.g += c2.g * aa + c.b += c2.b * aa + c.a += c2.a * a + c.r = c.r / totalA + c.g = c.g / totalA + c.b = c.b / totalA + blurX.setRgbaUnsafe(x, y, c.rgba) - # Blur in the Y direction. - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var c: Color - var totalA = 0.0 - for yb in -radius .. radius: - let c2 = blurX[x, y + yb].color - let a = lookup[yb + radius] - let aa = c2.a * a - totalA += aa - c.r += c2.r * aa - c.g += c2.g * aa - c.b += c2.b * aa - c.a += c2.a * a - c.r = c.r / totalA - c.g = c.g / totalA - c.b = c.b / totalA - image.setRgbaUnsafe(x, y, c.rgba) + # Blur in the Y direction. + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var c: Color + var totalA = 0.0 + for yb in -radius .. radius: + let c2 = blurX[x, y + yb].color + let a = lookup[yb + radius] + let aa = c2.a * a + totalA += aa + c.r += c2.r * aa + c.g += c2.g * aa + c.b += c2.b * aa + c.a += c2.a * a + c.r = c.r / totalA + c.g = c.g / totalA + c.b = c.b / totalA + target.setRgbaUnsafe(x, y, c.rgba) -proc blurAlpha*(image: Image, radius: float32) = - ## Applies Gaussian blur to the image given a radius. - let radius = round(radius).int - if radius == 0: - return + else: # target is a Mask - let lookup = gaussianLookup(radius) + # Blur in the X direction. + var blurX = newMask(target.width, target.height) + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var alpha: float32 + for xb in -radius .. radius: + let c2 = target[x + xb, y] + let a = lookup[xb + radius] + alpha += c2.float32 * a + blurX.setValueUnsafe(x, y, alpha.uint8) - # Blur in the X direction. - var blurX = newImage(image.width, image.height) - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var alpha: float32 - for xb in -radius .. radius: - let c2 = image[x + xb, y] - let a = lookup[xb + radius] - alpha += c2.a.float32 * a - blurX.setRgbaUnsafe(x, y, rgba(0, 0, 0, alpha.uint8)) - - # Blur in the Y direction and modify image. - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var alpha: float32 - for yb in -radius .. radius: - let c2 = blurX[x, y + yb] - let a = lookup[yb + radius] - alpha += c2.a.float32 * a - image.setRgbaUnsafe(x, y, rgba(0, 0, 0, alpha.uint8)) + # Blur in the Y direction and modify image. + for y in 0 ..< target.height: + for x in 0 ..< target.width: + var alpha: float32 + for yb in -radius .. radius: + let c2 = blurX[x, y + yb] + let a = lookup[yb + radius] + alpha += c2.float32 * a + target.setValueUnsafe(x, y, alpha.uint8) proc sharpOpacity*(image: Image) = ## Sharpens the opacity to extreme. @@ -688,15 +675,19 @@ proc resize*(srcImage: Image, width, height: int): Image = bmOverwrite ) -proc shift*(image: Image, offset: Vec2) = - ## Shifts the image by offset. +proc shift*(target: Image | Mask, offset: Vec2) = + ## Shifts the target by offset. if offset != vec2(0, 0): - let copy = image.copy() # Copy to read from. - image.fill(rgba(0, 0, 0, 0)) # Reset this for being drawn to. - image.draw(copy, offset, bmOverwrite) # Draw copy into image. + let copy = target.copy() # Copy to read from + # Reset target for being drawn to + when type(target) is Image: + target.fill(rgba(0, 0, 0, 0)) + else: + target.fill(0) + target.draw(copy, offset, bmOverwrite) # Draw copy at offset proc spread*(image: Image, spread: float32) = - ## Grows the image as a mask by spread. + ## Grows the target as a mask by spread. if spread == 0: return if spread < 0: @@ -719,18 +710,16 @@ proc spread*(image: Image, spread: float32) = image.setRgbaUnsafe(x, y, rgba(0, 0, 0, maxAlpha)) proc shadow*( - mask: Image, offset: Vec2, spread, blur: float32, color: ColorRGBA + image: Image, offset: Vec2, spread, blur: float32, color: ColorRGBA ): Image = ## Create a shadow of the image with the offset, spread and blur. - # TODO: copying is bad here due to this being slow already, - # we're doing it tho to avoid mutating param and returning new Image. - let copy = mask.copy() + let mask = image.newMask() if offset != vec2(0, 0): - copy.shift(offset) + mask.shift(offset) if spread > 0: - copy.spread(spread) + mask.spread(spread) if blur > 0: - copy.blurAlpha(blur) + mask.blur(blur) result = newImage(mask.width, mask.height) result.fill(color) - result.draw(copy, blendMode = bmMask) + result.draw(mask, blendMode = bmMask) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 50c076e..3497154 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -109,5 +109,28 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 = lerp(bottomMix, topMix, diffY) +proc spread*(mask: Mask, spread: float32) = + ## Grows the mask by spread. + if spread == 0: + return + if spread < 0: + raise newException(PixieError, "Cannot apply negative spread") + + let + copy = mask.copy() + spread = round(spread).int + for y in 0 ..< mask.height: + for x in 0 ..< mask.width: + var maxValue: uint8 + block blurBox: + for bx in -spread .. spread: + for by in -spread .. spread: + let value = copy[x + bx, y + by] + if value > maxValue: + maxValue = value + if maxValue == 255: + break blurBox + mask.setValueUnsafe(x, y, maxValue) + when defined(release): {.pop.} diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 71034d8..c4151d8 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -931,7 +931,7 @@ proc fillShapes( startX = max(0, bounds.x.int) startY = max(0, bounds.y.int) stopY = min(image.height, (bounds.y + bounds.h).int) - blender = blendMode.blenderPremultiplied() + blender = blendMode.blender() when defined(amd64) and not defined(pixieNoSimd): let blenderSimd = blendMode.blenderSimd() @@ -1136,8 +1136,7 @@ proc fillShapes( if coverage != 0: let backdrop = mask.getValueUnsafe(x, y) - blended = - coverage + ((backdrop.uint32 * (255 - coverage)) div 255).uint8 + blended = blendAlpha(backdrop, coverage) mask.setValueUnsafe(x, y, blended) inc x diff --git a/tests/images/masks/shifted.png b/tests/images/masks/shifted.png new file mode 100644 index 0000000000000000000000000000000000000000..2beb2a4f7e1e57407c0519a1f5e2fcb417ad40cc GIT binary patch literal 163 zcmeAS@N?(olHy`uVBq!ia0vp^DIm-NBp5q;ya7pem!Nn>BZcPmMPuj!J3n$3=a=Q>!{ZJ^aR=N>FVdQ I&MBb@03N+T@c;k- literal 0 HcmV?d00001 diff --git a/tests/images/masks/spread.png b/tests/images/masks/spread.png new file mode 100644 index 0000000000000000000000000000000000000000..96bfceb15b73197b5c7cd10325d8e1d87b5c11ac GIT binary patch literal 192 zcmeAS@N?(olHy`uVBq!ia0vp^DIm-NBp5rPg%K!iX literal 0 HcmV?d00001 diff --git a/tests/test_masks.nim b/tests/test_masks.nim index 43ffd2e..453dcce 100644 --- a/tests/test_masks.nim +++ b/tests/test_masks.nim @@ -1,4 +1,4 @@ -import chroma, pixie, pixie/fileformats/png +import chroma, pixie, pixie/fileformats/png, vmath block: let mask = newMask(100, 100) @@ -74,3 +74,20 @@ block: a.draw(b) writeFile("tests/images/masks/imageMaskedMask.png", a.encodePng()) + +block: + let a = newMask(100, 100) + a.fill(255) + a.shift(vec2(10, 10)) + writeFile("tests/images/masks/shifted.png", a.encodePng()) + +block: + var path: Path + path.rect(40, 40, 20, 20) + + let a = newMask(100, 100) + a.fillPath(path) + + a.spread(10) + + writeFile("tests/images/masks/spread.png", a.encodePng()) From c95df08cbe607a1bd047ffb6908b7c4022e9a252 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 15:28:47 -0600 Subject: [PATCH 3/9] remove old --- src/pixie/images.nim | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index fc47849..a8648fc 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -686,29 +686,6 @@ proc shift*(target: Image | Mask, offset: Vec2) = target.fill(0) target.draw(copy, offset, bmOverwrite) # Draw copy at offset -proc spread*(image: Image, spread: float32) = - ## Grows the target as a mask by spread. - if spread == 0: - return - if spread < 0: - raise newException(PixieError, "Cannot apply negative spread") - - let - copy = image.copy() - spread = round(spread).int - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var maxAlpha = 0.uint8 - block blurBox: - for bx in -spread .. spread: - for by in -spread .. spread: - let alpha = copy[x + bx, y + by].a - if alpha > maxAlpha: - maxAlpha = alpha - if maxAlpha == 255: - break blurBox - image.setRgbaUnsafe(x, y, rgba(0, 0, 0, maxAlpha)) - proc shadow*( image: Image, offset: Vec2, spread, blur: float32, color: ColorRGBA ): Image = From 264b30b118486c63c225d31c5b7cae1fdca0328f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 16:08:08 -0600 Subject: [PATCH 4/9] image.newMask 35% faster with simd --- src/pixie/images.nim | 41 ++++++++++++++++++++++++++++++++++++-- tests/benchmark_images.nim | 6 ++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index a8648fc..6d900d5 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -373,8 +373,45 @@ proc newMask*(image: Image): Mask = ## Returns a new mask using the alpha values of the parameter image. result = newMask(image.width, image.height) - for i, rgba in image.data: - result.data[i] = rgba.a + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let mask32 = cast[M128i]([uint32.high, 0, 0, 0]) + + for _ in countup(0, image.data.len - 16, 16): + var + a = mm_loadu_si128(image.data[i + 0].addr) + b = mm_loadu_si128(image.data[i + 4].addr) + c = mm_loadu_si128(image.data[i + 8].addr) + d = mm_loadu_si128(image.data[i + 12].addr) + + template pack(v: var M128i) = + # Shuffle the alpha values for these 4 colors to the first 4 bytes + v = mm_srli_epi32(v, 24) + let + i = mm_srli_si128(v, 3) + j = mm_srli_si128(v, 6) + k = mm_srli_si128(v, 9) + v = mm_or_si128(mm_or_si128(v, i), mm_or_si128(j, k)) + v = mm_and_si128(v, mask32) + + pack(a) + pack(b) + pack(c) + pack(d) + + b = mm_slli_si128(b, 4) + c = mm_slli_si128(c, 8) + d = mm_slli_si128(d, 12) + + mm_storeu_si128( + result.data[i].addr, + mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)) + ) + + i += 16 + + for j in i ..< image.data.len: + result.data[i] = image.data[j].a proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA = let diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index df46382..33a4adf 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -72,6 +72,12 @@ block: reset() +timeIt "newMask": + let mask = image.newMask() + doAssert mask[0, 0] == image[0, 0].a + +reset() + timeIt "lerp integers": for i in 0 ..< 100000: let c = image[0, 0] From b074e9bfc7f814c78bcf7f01fd2aeb9b93c13f10 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 16:37:07 -0600 Subject: [PATCH 5/9] mask blends --- src/pixie/blends.nim | 61 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 5a38ef9..f6a13d5 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -61,14 +61,51 @@ proc blendMask(backdrop, source: ColorRGBA): ColorRGBA = result.b = ((backdrop.b * k) div 255).uint8 result.a = ((backdrop.a * k) div 255).uint8 +proc blendSubtractMask(backdrop, source: ColorRGBA): ColorRGBA = + let a = (backdrop.a.uint32 * (255 - source.a)) div 255 + result.r = ((backdrop.r * a) div 255).uint8 + result.g = ((backdrop.g * a) div 255).uint8 + result.b = ((backdrop.b * a) div 255).uint8 + result.a = a.uint8 + +proc blendIntersectMask(backdrop, source: ColorRGBA): ColorRGBA = + blendMask(backdrop, source) + +proc blendExcludeMask(backdrop, source: ColorRGBA): ColorRGBA = + let a = max(backdrop.a, source.a).uint32 - min(backdrop.a, source.a) + result.r = ((backdrop.r * a) div 255).uint8 + result.g = ((backdrop.g * a) div 255).uint8 + result.b = ((backdrop.b * a) div 255).uint8 + result.a = a.uint8 + proc blendOverwrite(backdrop, source: ColorRGBA): ColorRGBA = source proc blender*(blendMode: BlendMode): Blender = case blendMode: of bmNormal: blendNormal + # of bmDarken: blendDarken + # of bmMultiply: blendMultiply + # of bmLinearBurn: blendLinearBurn + # of bmColorBurn: blendColorBurn + # of bmLighten: blendLighten + # of bmScreen: blendScreen + # of bmLinearDodge: blendLinearDodge + # of bmColorDodge: blendColorDodge + # of bmOverlay: blendOverlay + # of bmSoftLight: blendSoftLight + # of bmHardLight: blendHardLight + # of bmDifference: blendDifference + # of bmExclusion: blendExclusion + # of bmHue: blendHue + # of bmSaturation: blendSaturation + # of bmColor: blendColor + # of bmLuminosity: blendLuminosity of bmMask: blendMask of bmOverwrite: blendOverwrite + of bmSubtractMask: blendSubtractMask + of bmIntersectMask: blendIntersectMask + of bmExcludeMask: blendExcludeMask else: blendNormal # raise newException(PixieError, "No blender for " & $blendMode) @@ -76,6 +113,15 @@ proc blender*(blendMode: BlendMode): Blender = proc maskMask(backdrop, source: uint8): uint8 = ((backdrop.uint32 * source) div 255).uint8 +proc maskSubtract(backdrop, source: uint8): uint8 = + ((backdrop.uint32 * (255 - source)) div 255).uint8 + +proc maskIntersect(backdrop, source: uint8): uint8 = + maskMask(backdrop, source) + +proc maskExclude(backdrop, source: uint8): uint8 = + max(backdrop, source) - min(backdrop, source) + proc maskOverwrite(backdrop, source: uint8): uint8 = source @@ -83,6 +129,9 @@ proc masker*(blendMode: BlendMode): Masker = case blendMode: of bmMask: maskMask of bmOverwrite: maskOverwrite + of bmSubtractMask: maskSubtract + of bmIntersectMask: maskIntersect + of bmExcludeMask: maskExclude else: raise newException(PixieError, "No masker for " & $blendMode) @@ -559,18 +608,6 @@ proc blendHue(backdrop, source: ColorRGBA): ColorRGBA = proc blendSaturation(backdrop, source: ColorRGBA): ColorRGBA = blendSaturationFloats(backdrop.color, source.color).rgba -proc blendSubtractMask(backdrop, source: ColorRGBA): ColorRGBA = - result = backdrop - result.a = max(0, (backdrop.a.int32 * (255 - source.a.int32)) div 255).uint8 - -proc blendIntersectMask(backdrop, source: ColorRGBA): ColorRGBA = - result = backdrop - result.a = ((backdrop.a.uint32 * (source.a.uint32)) div 255).uint8 - -proc blendExcludeMask(backdrop, source: ColorRGBA): ColorRGBA = - result = backdrop - result.a = max(backdrop.a, source.a) - min(backdrop.a, source.a) - # proc blender*(blendMode: BlendMode): Blender = # case blendMode: # of bmNormal: blendNormal From 3ec1710e2e7347d1b4131dc430c0e23e83274236 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 21:13:23 -0600 Subject: [PATCH 6/9] blurs --- src/pixie/images.nim | 128 +++++++++++++++++++++---------------- tests/benchmark_images.nim | 5 ++ tests/benchmark_masks.nim | 5 ++ 3 files changed, 82 insertions(+), 56 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 6d900d5..a91968d 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -411,7 +411,7 @@ proc newMask*(image: Image): Mask = i += 16 for j in i ..< image.data.len: - result.data[i] = image.data[j].a + result.data[j] = image.data[j].a proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA = let @@ -508,70 +508,81 @@ proc draw*( ) {.inline.} = mask.draw(image, translate(pos), blendMode) -proc gaussianLookup(radius: int): seq[float32] = - ## Compute lookup table for 1d Gaussian kernel. - result.setLen(radius * 2 + 1) - var total = 0.0 - for xb in -radius .. radius: - let - s = radius.float32 / 2.2 # 2.2 matches Figma. - x = xb.float32 - a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2)) - result[xb + radius] = a - total += a - for xb in -radius .. radius: - result[xb + radius] = result[xb + radius] / total - -when defined(release): - {.pop.} - proc blur*(target: Image | Mask, radius: float32) = ## Applies Gaussian blur to the image given a radius. let radius = round(radius).int if radius == 0: return + proc gaussianLookup(radius: int): seq[uint32] = + ## Compute lookup table for 1d Gaussian kernel. + ## Values are [0, 255] * 1024. + result.setLen(radius * 2 + 1) + + var + floats = newSeq[float32](result.len) + total = 0.0 + for xb in -radius .. radius: + let + s = radius.float32 / 2.2 # 2.2 matches Figma. + x = xb.float32 + a = 1 / sqrt(2 * PI * s^2) * exp(-1 * x^2 / (2 * s^2)) + floats[xb + radius] = a + total += a + for xb in -radius .. radius: + floats[xb + radius] = floats[xb + radius] / total + + for i, f in floats: + result[i] = round(f * 255 * 1024).uint32 + let lookup = gaussianLookup(radius) when type(target) is Image: + + template `*`(sample: ColorRGBA, a: uint32): array[4, uint32] = + [ + sample.r * a, + sample.g * a, + sample.b * a, + sample.a * a + ] + + template `+=`(values: var array[4, uint32], sample: array[4, uint32]) = + values[0] += sample[0] + values[1] += sample[1] + values[2] += sample[2] + values[3] += sample[3] + + template rgba(values: array[4, uint32]): ColorRGBA = + rgba( + (values[0] div 1024 div 255).uint8, + (values[1] div 1024 div 255).uint8, + (values[2] div 1024 div 255).uint8, + (values[3] div 1024 div 255).uint8 + ) + # Blur in the X direction. var blurX = newImage(target.width, target.height) for y in 0 ..< target.height: for x in 0 ..< target.width: - var c: Color - var totalA = 0.0 + var values: array[4, uint32] for xb in -radius .. radius: - let c2 = target[x + xb, y].color - let a = lookup[xb + radius] - let aa = c2.a * a - totalA += aa - c.r += c2.r * aa - c.g += c2.g * aa - c.b += c2.b * aa - c.a += c2.a * a - c.r = c.r / totalA - c.g = c.g / totalA - c.b = c.b / totalA - blurX.setRgbaUnsafe(x, y, c.rgba) + let + sample = target[x + xb, y] + a = lookup[xb + radius].uint32 + values += sample * a + blurX.setRgbaUnsafe(x, y, values.rgba()) # Blur in the Y direction. for y in 0 ..< target.height: for x in 0 ..< target.width: - var c: Color - var totalA = 0.0 + var values: array[4, uint32] for yb in -radius .. radius: - let c2 = blurX[x, y + yb].color - let a = lookup[yb + radius] - let aa = c2.a * a - totalA += aa - c.r += c2.r * aa - c.g += c2.g * aa - c.b += c2.b * aa - c.a += c2.a * a - c.r = c.r / totalA - c.g = c.g / totalA - c.b = c.b / totalA - target.setRgbaUnsafe(x, y, c.rgba) + let + sample = blurX[x, y + yb] + a = lookup[yb + radius].uint32 + values += sample * a + target.setRgbaUnsafe(x, y, values.rgba()) else: # target is a Mask @@ -579,22 +590,27 @@ proc blur*(target: Image | Mask, radius: float32) = var blurX = newMask(target.width, target.height) for y in 0 ..< target.height: for x in 0 ..< target.width: - var alpha: float32 + var value: uint32 for xb in -radius .. radius: - let c2 = target[x + xb, y] - let a = lookup[xb + radius] - alpha += c2.float32 * a - blurX.setValueUnsafe(x, y, alpha.uint8) + let + sample = target[x + xb, y] + a = lookup[xb + radius].uint32 + value += sample * a + blurX.setValueUnsafe(x, y, (value div 1024 div 255).uint8) # Blur in the Y direction and modify image. for y in 0 ..< target.height: for x in 0 ..< target.width: - var alpha: float32 + var value: uint32 for yb in -radius .. radius: - let c2 = blurX[x, y + yb] - let a = lookup[yb + radius] - alpha += c2.float32 * a - target.setValueUnsafe(x, y, alpha.uint8) + let + sample = blurX[x, y + yb] + a = lookup[yb + radius].uint32 + value += sample * a + target.setValueUnsafe(x, y, (value div 1024 div 255).uint8) + +when defined(release): + {.pop.} proc sharpOpacity*(image: Image) = ## Sharpens the opacity to extreme. diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index 33a4adf..ed15cfd 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -78,6 +78,11 @@ timeIt "newMask": reset() +timeIt "blur": + image.blur(40) + +reset() + timeIt "lerp integers": for i in 0 ..< 100000: let c = image[0, 0] diff --git a/tests/benchmark_masks.nim b/tests/benchmark_masks.nim index 3693d7c..71c2e65 100644 --- a/tests/benchmark_masks.nim +++ b/tests/benchmark_masks.nim @@ -20,3 +20,8 @@ reset() timeIt "applyOpacity": mask.applyOpacity(0.5) + +reset() + +timeIt "blur": + mask.blur(40) From d9fcf71f43ee78879ec13fa4fa0cd1a78ee01a67 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 23:16:52 -0600 Subject: [PATCH 7/9] blendExclusion passes tests --- src/pixie/blends.nim | 55 +++++++++++--------------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index f6a13d5..c91aef1 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -54,6 +54,14 @@ proc blendNormal(backdrop, source: ColorRGBA): ColorRGBA = result.b = source.b + ((backdrop.b.uint32 * k) div 255).uint8 result.a = blendAlpha(backdrop.a, source.a) +proc blendExclusion(backdrop, source: ColorRGBA): ColorRGBA = + proc blend(backdrop, source: int32): uint8 {.inline.} = + max(0, backdrop + source - (2 * backdrop * source) div 255).uint8 + result.r = blend(backdrop.r.int32, source.r.int32) + result.g = blend(backdrop.g.int32, source.g.int32) + result.b = blend(backdrop.b.int32, source.b.int32) + result.a = blendAlpha(backdrop.a, source.a) + proc blendMask(backdrop, source: ColorRGBA): ColorRGBA = let k = source.a.uint32 result.r = ((backdrop.r * k) div 255).uint8 @@ -81,6 +89,10 @@ proc blendExcludeMask(backdrop, source: ColorRGBA): ColorRGBA = proc blendOverwrite(backdrop, source: ColorRGBA): ColorRGBA = source +proc blendWhite(backdrop, source: ColorRGBA): ColorRGBA = + ## For testing + rgba(255, 255, 255, 255) + proc blender*(blendMode: BlendMode): Blender = case blendMode: of bmNormal: blendNormal @@ -96,7 +108,7 @@ proc blender*(blendMode: BlendMode): Blender = # of bmSoftLight: blendSoftLight # of bmHardLight: blendHardLight # of bmDifference: blendDifference - # of bmExclusion: blendExclusion + of bmExclusion: blendExclusion # of bmHue: blendHue # of bmSaturation: blendSaturation # of bmColor: blendColor @@ -107,6 +119,7 @@ proc blender*(blendMode: BlendMode): Blender = of bmIntersectMask: blendIntersectMask of bmExcludeMask: blendExcludeMask else: + # blendWhite blendNormal # raise newException(PixieError, "No blender for " & $blendMode) @@ -477,12 +490,6 @@ proc hardLight(backdrop, source: uint32): uint8 {.inline.} = else: screen(backdrop, 2 * source - 255) -proc blendNormalOld(backdrop, source: ColorRGBA): ColorRGBA = - blendNormal( - backdrop.toPremultipliedAlpha(), - source.toPremultipliedAlpha() - ).toStraightAlpha() - proc blendDarken(backdrop, source: ColorRGBA): ColorRGBA = result.r = min(backdrop.r, source.r) result.g = min(backdrop.g, source.g) @@ -588,14 +595,6 @@ proc blendDifference(backdrop, source: ColorRGBA): ColorRGBA = result.b = max(backdrop.b, source.b) - min(backdrop.b, source.b) result = alphaFix(backdrop, source, result) -proc blendExclusion(backdrop, source: ColorRGBA): ColorRGBA = - proc blend(backdrop, source: int32): uint8 {.inline.} = - max(0, backdrop + source - (2 * backdrop * source) div 255).uint8 - result.r = blend(backdrop.r.int32, source.r.int32) - result.g = blend(backdrop.g.int32, source.g.int32) - result.b = blend(backdrop.b.int32, source.b.int32) - result = alphaFix(backdrop, source, result) - proc blendColor(backdrop, source: ColorRGBA): ColorRGBA = blendColorFloats(backdrop.color, source.color).rgba @@ -607,29 +606,3 @@ proc blendHue(backdrop, source: ColorRGBA): ColorRGBA = proc blendSaturation(backdrop, source: ColorRGBA): ColorRGBA = blendSaturationFloats(backdrop.color, source.color).rgba - -# proc blender*(blendMode: BlendMode): Blender = -# case blendMode: -# of bmNormal: blendNormal -# of bmDarken: blendDarken -# of bmMultiply: blendMultiply -# of bmLinearBurn: blendLinearBurn -# of bmColorBurn: blendColorBurn -# of bmLighten: blendLighten -# of bmScreen: blendScreen -# of bmLinearDodge: blendLinearDodge -# of bmColorDodge: blendColorDodge -# of bmOverlay: blendOverlay -# of bmSoftLight: blendSoftLight -# of bmHardLight: blendHardLight -# of bmDifference: blendDifference -# of bmExclusion: blendExclusion -# of bmHue: blendHue -# of bmSaturation: blendSaturation -# of bmColor: blendColor -# of bmLuminosity: blendLuminosity -# of bmMask: blendMask -# of bmOverwrite: blendOverwrite -# of bmSubtractMask: blendSubtractMask -# of bmIntersectMask: blendIntersectMask -# of bmExcludeMask: blendExcludeMask From 59b186b5357fddc2c4dcc5e384b00d17bafc956e Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 23:32:13 -0600 Subject: [PATCH 8/9] group float timings (to easily disable, they are going away) --- src/pixie/blends.nim | 4 +- tests/benchmark_blends.nim | 276 ++++++++++++++++++------------------- 2 files changed, 138 insertions(+), 142 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index c91aef1..ac3810c 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -153,7 +153,7 @@ when defined(amd64) and not defined(pixieNoSimd): type BlenderSimd* = proc(blackdrop, source: M128i): M128i - proc blendNormalPremultipliedSimd*(backdrop, source: M128i): M128i = + proc blendNormalSimd*(backdrop, source: M128i): M128i = let alphaMask = mm_set1_epi32(cast[int32](0xff000000)) oddMask = mm_set1_epi16(cast[int16](0xff00)) @@ -187,7 +187,7 @@ when defined(amd64) and not defined(pixieNoSimd): proc blenderSimd*(blendMode: BlendMode): BlenderSimd = case blendMode: - of bmNormal: blendNormalPremultipliedSimd + of bmNormal: blendNormalSimd of bmOverwrite: blendOverwriteSimd else: raise newException(PixieError, "No SIMD blender for " & $blendMode) diff --git a/tests/benchmark_blends.nim b/tests/benchmark_blends.nim index 98a0b07..6be6622 100644 --- a/tests/benchmark_blends.nim +++ b/tests/benchmark_blends.nim @@ -18,20 +18,144 @@ timeIt "blendNormal": reset() -timeIt "blendNormalFloats": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendNormalFloats( - backdrop.data[i].color, source.data[i].color - ).rgba - -reset() - timeIt "blendDarken": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendDarken(backdrop.data[i], source.data[i]) reset() +timeIt "blendMultiply": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendMultiply(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendLinearBurn": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendLinearBurn(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendColorBurn": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendColorBurn(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendLighten": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendLighten(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendScreen": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendScreen(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendLinearDodge": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendLinearDodge(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendColorDodge": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendColorDodge(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendOverlay": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendOverlay(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendSoftLight": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendSoftLight(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendHardLight": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendHardLight(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendDifference": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendDifference(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendExclusion": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendExclusion(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendHue": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendHue(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendSaturation": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendSaturation(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendColor": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendColor(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendLuminosity": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendLuminosity(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendMask": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendMask(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendSubtractMask": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendSubtractMask(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendIntersectMask": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendIntersectMask(backdrop.data[i], source.data[i]) + +reset() + +timeIt "blendExcludeMask": + for i in 0 ..< backdrop.data.len: + backdrop.data[i] = blendExcludeMask(backdrop.data[i], source.data[i]) + +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + + reset() + + timeIt "blendNormal [simd]": + for i in countup(0, backdrop.data.len - 4, 4): + let + b = mm_loadu_si128(backdrop.data[i].addr) + s = mm_loadu_si128(source.data[i].addr) + mm_storeu_si128(backdrop.data[i].addr, blendNormalSimd(b, s)) + +reset() + timeIt "blendDarkenFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendDarkenFloats( @@ -40,9 +164,11 @@ timeIt "blendDarkenFloats": reset() -timeIt "blendMultiply": +timeIt "blendNormalFloats": for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendMultiply(backdrop.data[i], source.data[i]) + backdrop.data[i] = blendNormalFloats( + backdrop.data[i].color, source.data[i].color + ).rgba reset() @@ -54,12 +180,6 @@ timeIt "blendMultiplyFloats": reset() -timeIt "blendLinearBurn": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendLinearBurn(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendLinearBurnFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendLinearBurnFloats( @@ -68,12 +188,6 @@ timeIt "blendLinearBurnFloats": reset() -timeIt "blendColorBurn": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendColorBurn(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendColorBurnFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendColorBurnFloats( @@ -82,12 +196,6 @@ timeIt "blendColorBurnFloats": reset() -timeIt "blendLighten": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendLighten(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendLightenFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendLightenFloats( @@ -96,12 +204,6 @@ timeIt "blendLightenFloats": reset() -timeIt "blendScreen": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendScreen(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendScreenFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendScreenFloats( @@ -110,12 +212,6 @@ timeIt "blendScreenFloats": reset() -timeIt "blendLinearDodge": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendLinearDodge(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendLinearDodgeFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendLinearDodgeFloats( @@ -124,12 +220,6 @@ timeIt "blendLinearDodgeFloats": reset() -timeIt "blendColorDodge": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendColorDodge(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendColorDodgeFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendColorDodgeFloats( @@ -138,12 +228,6 @@ timeIt "blendColorDodgeFloats": reset() -timeIt "blendOverlay": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendOverlay(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendOverlayFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendOverlayFloats( @@ -152,12 +236,6 @@ timeIt "blendOverlayFloats": reset() -timeIt "blendSoftLight": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendSoftLight(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendSoftLightFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendSoftLightFloats( @@ -166,12 +244,6 @@ timeIt "blendSoftLightFloats": reset() -timeIt "blendHardLight": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendHardLight(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendHardLightFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendHardLightFloats( @@ -180,12 +252,6 @@ timeIt "blendHardLightFloats": reset() -timeIt "blendDifference": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendDifference(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendDifferenceFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendDifferenceFloats( @@ -194,12 +260,6 @@ timeIt "blendDifferenceFloats": reset() -timeIt "blendExclusion": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendExclusion(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendExclusionFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendExclusionFloats( @@ -208,12 +268,6 @@ timeIt "blendExclusionFloats": reset() -timeIt "blendHue": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendHue(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendHueFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendHueFloats( @@ -222,12 +276,6 @@ timeIt "blendHueFloats": reset() -timeIt "blendSaturation": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendSaturation(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendSaturationFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendSaturationFloats( @@ -236,12 +284,6 @@ timeIt "blendSaturationFloats": reset() -timeIt "blendColor": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendColor(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendColorFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendColorFloats( @@ -250,12 +292,6 @@ timeIt "blendColorFloats": reset() -timeIt "blendLuminosity": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendLuminosity(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendLuminosityFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendLuminosityFloats( @@ -264,12 +300,6 @@ timeIt "blendLuminosityFloats": reset() -timeIt "blendMask": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendMask(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendMaskFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendMaskFloats( @@ -278,12 +308,6 @@ timeIt "blendMaskFloats": reset() -timeIt "blendSubtractMask": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendSubtractMask(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendSubtractMaskFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendSubtractMaskFloats( @@ -292,12 +316,6 @@ timeIt "blendSubtractMaskFloats": reset() -timeIt "blendIntersectMask": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendIntersectMask(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendIntersectMaskFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendIntersectMaskFloats( @@ -306,30 +324,8 @@ timeIt "blendIntersectMaskFloats": reset() -timeIt "blendExcludeMask": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendExcludeMask(backdrop.data[i], source.data[i]) - -reset() - timeIt "blendExcludeMaskFloats": for i in 0 ..< backdrop.data.len: backdrop.data[i] = blendExcludeMaskFloats( backdrop.data[i].color, source.data[i].color ).rgba - -reset() - -timeIt "blendNormalPremultiplied": - for i in 0 ..< backdrop.data.len: - backdrop.data[i] = blendNormalPremultiplied(backdrop.data[i], source.data[i]) - -when defined(amd64) and not defined(pixieNoSimd): - import nimsimd/sse2 - - timeIt "blendNormalPremultiplied [simd]": - for i in countup(0, backdrop.data.len - 4, 4): - let - b = mm_loadu_si128(backdrop.data[i].addr) - s = mm_loadu_si128(source.data[i].addr) - mm_storeu_si128(backdrop.data[i].addr, blendNormalPremultiplied(b, s)) From 81dbfacdda6b2e1902bcde584743a3c86dc614ca Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 9 Feb 2021 23:34:49 -0600 Subject: [PATCH 9/9] blendExclusion 50% faster --- src/pixie/blends.nim | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index ac3810c..9001e42 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -55,11 +55,12 @@ proc blendNormal(backdrop, source: ColorRGBA): ColorRGBA = result.a = blendAlpha(backdrop.a, source.a) proc blendExclusion(backdrop, source: ColorRGBA): ColorRGBA = - proc blend(backdrop, source: int32): uint8 {.inline.} = - max(0, backdrop + source - (2 * backdrop * source) div 255).uint8 - result.r = blend(backdrop.r.int32, source.r.int32) - result.g = blend(backdrop.g.int32, source.g.int32) - result.b = blend(backdrop.b.int32, source.b.int32) + proc blend(backdrop, source: uint32): uint8 {.inline.} = + let v = (backdrop + source).int32 - ((2 * backdrop * source) div 255).int32 + (cast[uint32](v) and uint8.high.uint32).uint8 + result.r = blend(backdrop.r.uint32, source.r.uint32) + result.g = blend(backdrop.g.uint32, source.g.uint32) + result.b = blend(backdrop.b.uint32, source.b.uint32) result.a = blendAlpha(backdrop.a, source.a) proc blendMask(backdrop, source: ColorRGBA): ColorRGBA =