From e17d0c742dbb2c2cbb744f0cbe327778872c2f1f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 01:44:46 -0600 Subject: [PATCH 1/4] roundedRect clockwise vs counter --- src/pixie/paths.nim | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index c4151d8..c668c61 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -373,11 +373,18 @@ proc roundedRect*( se = min(se, maxRadius) sw = min(sw, maxRadius) - path.moveTo(pos.x + nw, pos.y) - path.arcTo(pos.x + wh.x, pos.y, pos.x + wh.x, pos.y + wh.y, ne) - path.arcTo(pos.x + wh.x, pos.y + wh.y, pos.x, pos.y + wh.y, se) - path.arcTo(pos.x, pos.y + wh.y, pos.x, pos.y, sw) - path.arcTo(pos.x, pos.y, pos.x + wh.x, pos.y, nw) + if clockwise: + path.moveTo(pos.x + nw, pos.y) + path.arcTo(pos.x + wh.x, pos.y, pos.x + wh.x, pos.y + wh.y, ne) + path.arcTo(pos.x + wh.x, pos.y + wh.y, pos.x, pos.y + wh.y, se) + path.arcTo(pos.x, pos.y + wh.y, pos.x, pos.y, sw) + path.arcTo(pos.x, pos.y, pos.x + wh.x, pos.y, nw) + else: + path.moveTo(pos.x + wh.x + ne, pos.y) + path.arcTo(pos.x, pos.y, pos.x, pos.y + wh.y, nw) + path.arcTo(pos.x, pos.y + wh.y, pos.x + wh.x, pos.y + wh.y, sw) + path.arcTo(pos.x + wh.x, pos.y + wh.y, pos.x + wh.x, pos.y, se) + path.arcTo(pos.x + wh.x, pos.y, pos.x, pos.y, ne) path.closePath() proc ellipse*(path: var Path, cx, cy, rx, ry: float32) = From a40683920d62322ae66715ea0c8ecff0d83881a8 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 02:38:27 -0600 Subject: [PATCH 2/4] sharpen masks + simd, blend masks --- src/pixie/blends.nim | 60 ++++++++++++++++++++++++++++++++++---- src/pixie/images.nim | 9 ------ src/pixie/masks.nim | 21 +++++++++++++ src/pixie/paths.nim | 38 ++---------------------- tests/benchmark_images.nim | 5 ---- tests/benchmark_masks.nim | 5 ++++ 6 files changed, 83 insertions(+), 55 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index b4c7ce6..4095346 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -466,12 +466,13 @@ proc blender*(blendMode: BlendMode): Blender = of bmSubtractMask: blendSubtractMask of bmIntersectMask: blendIntersectMask of bmExcludeMask: blendExcludeMask - else: - # blendWhite - # blendNormal - raise newException(PixieError, "No blender for " & $blendMode) + +proc maskNormal(backdrop, source: uint8): uint8 = + ## Blending masks + blendAlpha(backdrop, source) proc maskMask(backdrop, source: uint8): uint8 = + ## Masking masks ((backdrop.uint32 * source) div 255).uint8 proc maskSubtract(backdrop, source: uint8): uint8 = @@ -488,6 +489,7 @@ proc maskOverwrite(backdrop, source: uint8): uint8 = proc masker*(blendMode: BlendMode): Masker = case blendMode: + of bmNormal: maskNormal of bmMask: maskMask of bmOverwrite: maskOverwrite of bmSubtractMask: maskSubtract @@ -499,7 +501,9 @@ proc masker*(blendMode: BlendMode): Masker = when defined(amd64) and not defined(pixieNoSimd): import nimsimd/sse2 - type BlenderSimd* = proc(blackdrop, source: M128i): M128i + type + BlenderSimd* = proc(blackdrop, source: M128i): M128i + MaskerSimd* = proc(blackdrop, source: M128i): M128i proc blendNormalSimd*(backdrop, source: M128i): M128i = let @@ -540,5 +544,51 @@ when defined(amd64) and not defined(pixieNoSimd): else: raise newException(PixieError, "No SIMD blender for " & $blendMode) + proc maskNormalSimd*(backdrop, source: M128i): M128i = + ## Blending masks + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + var + sourceEven = mm_slli_epi16(mm_andnot_si128(oddMask, source), 8) + sourceOdd = mm_and_si128(source, oddMask) + + let + evenK = mm_sub_epi16(v255high, sourceEven) + oddK = mm_sub_epi16(v255high, sourceOdd) + + var + backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + # backdrop * k + backdropEven = mm_mulhi_epu16(backdropEven, evenK) + backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) + + # div 255 + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + # Shift from high to low bits + sourceEven = mm_srli_epi16(sourceEven, 8) + sourceOdd = mm_srli_epi16(sourceOdd, 8) + + var + blendedEven = mm_add_epi16(sourceEven, backdropEven) + blendedOdd = mm_add_epi16(sourceOdd, backdropOdd) + + blendedOdd = mm_slli_epi16(blendedOdd, 8) + + mm_or_si128(blendedEven, blendedOdd) + + proc maskerSimd*(blendMode: BlendMode): MaskerSimd = + case blendMode: + of bmNormal: maskNormalSimd + of bmOverwrite: blendOverwriteSimd + else: + raise newException(PixieError, "No SIMD masker for " & $blendMode) + when defined(release): {.pop.} diff --git a/src/pixie/images.nim b/src/pixie/images.nim index a91968d..40a4d74 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -612,15 +612,6 @@ proc blur*(target: Image | Mask, radius: float32) = when defined(release): {.pop.} -proc sharpOpacity*(image: Image) = - ## Sharpens the opacity to extreme. - ## A = 0 stays 0. Anything else turns into 255. - for rgba in image.data.mitems: - if rgba.a == 0: - rgba = rgba(0, 0, 0, 0) - else: - rgba = rgba(255, 255, 255, 255) - proc drawUber( a, b: Image, p, dx, dy: Vec2, diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 3497154..96f74f0 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -1,5 +1,8 @@ import common, vmath, system/memory +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + type Mask* = ref object ## Mask object that holds mask opacity data. @@ -132,5 +135,23 @@ proc spread*(mask: Mask, spread: float32) = break blurBox mask.setValueUnsafe(x, y, maxValue) +proc sharpen*(mask: Mask) = + ## A value of 0 stays 0. Anything else turns into 255. + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + vZero = mm_setzero_si128() + vMax = mm_set1_epi32(cast[int32](uint32.high)) + for _ in countup(0, mask.data.len - 16, 16): + var values = mm_loadu_si128(mask.data[i].addr) + values = mm_cmpeq_epi8(values, vZero) + values = mm_andnot_si128(values, vMax) + mm_storeu_si128(mask.data[i].addr, values) + i += 16 + + for j in i ..< mask.data.len: + if mask.data[j] != 0: + mask.data[j] = 255 + when defined(release): {.pop.} diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index c668c61..1c41baa 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1084,51 +1084,17 @@ proc fillShapes( var x = startX when defined(amd64) and not defined(pixieNoSimd): # When supported, SIMD blend as much as possible - let - oddMask = mm_set1_epi16(cast[int16](0xff00)) - v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - for _ in countup(x, coverages.len - 16, 16): var coverage = mm_loadu_si128(coverages[x].addr) let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) if mm_movemask_epi8(eqZero) != 0xffff: # If the coverages are not all zero - var - coverageEven = mm_slli_epi16(mm_andnot_si128(oddMask, coverage), 8) - coverageOdd = mm_and_si128(coverage, oddMask) - - let - evenK = mm_sub_epi16(v255high, coverageEven) - oddK = mm_sub_epi16(v255high, coverageOdd) - - var - backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) - backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8) - backdropOdd = mm_and_si128(backdrop, oddMask) - - # backdrop * k - backdropEven = mm_mulhi_epu16(backdropEven, evenK) - backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) - - # div 255 - backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) - backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) - - # Shift from high to low bits - coverageEven = mm_srli_epi16(coverageEven, 8) - coverageOdd = mm_srli_epi16(coverageOdd, 8) - - var - blendedEven = mm_add_epi16(coverageEven, backdropEven) - blendedOdd = mm_add_epi16(coverageOdd, backdropOdd) - - blendedOdd = mm_slli_epi16(blendedOdd, 8) + let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) mm_storeu_si128( mask.data[mask.dataIndex(x, y)].addr, - mm_or_si128(blendedEven, blendedOdd) + maskNormalSimd(backdrop, coverage) ) x += 16 diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index ed15cfd..7ace876 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -45,11 +45,6 @@ timeIt "applyOpacity": reset() -timeIt "sharpOpacity": - image.sharpOpacity() - -reset() - timeIt "toPremultipliedAlpha": image.toPremultipliedAlpha() diff --git a/tests/benchmark_masks.nim b/tests/benchmark_masks.nim index 71c2e65..db5347c 100644 --- a/tests/benchmark_masks.nim +++ b/tests/benchmark_masks.nim @@ -25,3 +25,8 @@ reset() timeIt "blur": mask.blur(40) + +reset() + +timeIt "sharpen": + mask.sharpen() From 7e06625c2710d9c01fea3f2adc23a164be38cc44 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 12:41:56 -0600 Subject: [PATCH 3/4] y min, max when drawing optimization --- src/pixie/images.nim | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 40a4d74..54b8eb0 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -620,12 +620,29 @@ proc drawUber( smooth: bool ) = let blender = blendMode.blender() - for y in 0 ..< a.height: + + # Determine where we should start and stop drawing in the y dimension + var yMin, yMax: int + if blendMode == bmIntersectMask: + yMin = 0 + yMax = a.height + else: + yMin = a.height + yMax = 0 + for segment in perimeter: + yMin = min(yMin, segment.at.y.floor.int) + yMax = max(yMax, segment.at.y.ceil.int) + + yMin = yMin.clamp(0, a.height) + yMax = yMax.clamp(0, a.height) + + for y in yMin ..< yMax: + # Determine where we should start and stop drawing in the x dimension var xMin = a.width xMax = 0 for yOffset in [0.float32, 1]: - var scanLine = Line( + let scanLine = Line( a: vec2(-1000, y.float32 + yOffset), b: vec2(1000, y.float32 + yOffset) ) From b6b1d3a44c5d5af0f4a1d3dace36d2bf3e479ecd Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 11 Feb 2021 13:13:09 -0600 Subject: [PATCH 4/4] sharpen -> ceil --- src/pixie/masks.nim | 2 +- tests/benchmark_masks.nim | 4 ++-- tests/images/masks/circleMaskSharpened.png | Bin 0 -> 380 bytes tests/test_masks.nim | 11 +++++++++++ 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tests/images/masks/circleMaskSharpened.png diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 96f74f0..e5af319 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -135,7 +135,7 @@ proc spread*(mask: Mask, spread: float32) = break blurBox mask.setValueUnsafe(x, y, maxValue) -proc sharpen*(mask: Mask) = +proc ceil*(mask: Mask) = ## A value of 0 stays 0. Anything else turns into 255. var i: int when defined(amd64) and not defined(pixieNoSimd): diff --git a/tests/benchmark_masks.nim b/tests/benchmark_masks.nim index db5347c..c7f22a2 100644 --- a/tests/benchmark_masks.nim +++ b/tests/benchmark_masks.nim @@ -28,5 +28,5 @@ timeIt "blur": reset() -timeIt "sharpen": - mask.sharpen() +timeIt "ceil": + mask.ceil() diff --git a/tests/images/masks/circleMaskSharpened.png b/tests/images/masks/circleMaskSharpened.png new file mode 100644 index 0000000000000000000000000000000000000000..88dc9b96ef1b135693625621f09355b3f7dfc3cf GIT binary patch literal 380 zcmeAS@N?(olHy`uVBq!ia0vp^DIm-NBp5IEGX(zMXQK>yUv6Yw(|s zPfjaue#k(~9=fXY73ymC<=~^*UD7Ya#|LUTZWHIHpMi zUl7o_sC&UtWfiOS3>O=*5`|vxCCQ4;FD99-c&1>@!f7jR<~;d9N>lUms4FKcR