diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index b4c7ce6..4095346 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -466,12 +466,13 @@ proc blender*(blendMode: BlendMode): Blender = of bmSubtractMask: blendSubtractMask of bmIntersectMask: blendIntersectMask of bmExcludeMask: blendExcludeMask - else: - # blendWhite - # blendNormal - raise newException(PixieError, "No blender for " & $blendMode) + +proc maskNormal(backdrop, source: uint8): uint8 = + ## Blending masks + blendAlpha(backdrop, source) proc maskMask(backdrop, source: uint8): uint8 = + ## Masking masks ((backdrop.uint32 * source) div 255).uint8 proc maskSubtract(backdrop, source: uint8): uint8 = @@ -488,6 +489,7 @@ proc maskOverwrite(backdrop, source: uint8): uint8 = proc masker*(blendMode: BlendMode): Masker = case blendMode: + of bmNormal: maskNormal of bmMask: maskMask of bmOverwrite: maskOverwrite of bmSubtractMask: maskSubtract @@ -499,7 +501,9 @@ proc masker*(blendMode: BlendMode): Masker = when defined(amd64) and not defined(pixieNoSimd): import nimsimd/sse2 - type BlenderSimd* = proc(blackdrop, source: M128i): M128i + type + BlenderSimd* = proc(blackdrop, source: M128i): M128i + MaskerSimd* = proc(blackdrop, source: M128i): M128i proc blendNormalSimd*(backdrop, source: M128i): M128i = let @@ -540,5 +544,51 @@ when defined(amd64) and not defined(pixieNoSimd): else: raise newException(PixieError, "No SIMD blender for " & $blendMode) + proc maskNormalSimd*(backdrop, source: M128i): M128i = + ## Blending masks + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + var + sourceEven = mm_slli_epi16(mm_andnot_si128(oddMask, source), 8) + sourceOdd = mm_and_si128(source, oddMask) + + let + evenK = mm_sub_epi16(v255high, sourceEven) + oddK = mm_sub_epi16(v255high, sourceOdd) + + var + backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + # backdrop * k + backdropEven = mm_mulhi_epu16(backdropEven, evenK) + backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) + + # div 255 + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + # Shift from high to low bits + sourceEven = mm_srli_epi16(sourceEven, 8) + sourceOdd = mm_srli_epi16(sourceOdd, 8) + + var + blendedEven = mm_add_epi16(sourceEven, backdropEven) + blendedOdd = mm_add_epi16(sourceOdd, backdropOdd) + + blendedOdd = mm_slli_epi16(blendedOdd, 8) + + mm_or_si128(blendedEven, blendedOdd) + + proc maskerSimd*(blendMode: BlendMode): MaskerSimd = + case blendMode: + of bmNormal: maskNormalSimd + of bmOverwrite: blendOverwriteSimd + else: + raise newException(PixieError, "No SIMD masker for " & $blendMode) + when defined(release): {.pop.} diff --git a/src/pixie/images.nim b/src/pixie/images.nim index a91968d..40a4d74 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -612,15 +612,6 @@ proc blur*(target: Image | Mask, radius: float32) = when defined(release): {.pop.} -proc sharpOpacity*(image: Image) = - ## Sharpens the opacity to extreme. - ## A = 0 stays 0. Anything else turns into 255. - for rgba in image.data.mitems: - if rgba.a == 0: - rgba = rgba(0, 0, 0, 0) - else: - rgba = rgba(255, 255, 255, 255) - proc drawUber( a, b: Image, p, dx, dy: Vec2, diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 3497154..96f74f0 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -1,5 +1,8 @@ import common, vmath, system/memory +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + type Mask* = ref object ## Mask object that holds mask opacity data. @@ -132,5 +135,23 @@ proc spread*(mask: Mask, spread: float32) = break blurBox mask.setValueUnsafe(x, y, maxValue) +proc sharpen*(mask: Mask) = + ## A value of 0 stays 0. Anything else turns into 255. + var i: int + when defined(amd64) and not defined(pixieNoSimd): + let + vZero = mm_setzero_si128() + vMax = mm_set1_epi32(cast[int32](uint32.high)) + for _ in countup(0, mask.data.len - 16, 16): + var values = mm_loadu_si128(mask.data[i].addr) + values = mm_cmpeq_epi8(values, vZero) + values = mm_andnot_si128(values, vMax) + mm_storeu_si128(mask.data[i].addr, values) + i += 16 + + for j in i ..< mask.data.len: + if mask.data[j] != 0: + mask.data[j] = 255 + when defined(release): {.pop.} diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index c668c61..1c41baa 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1084,51 +1084,17 @@ proc fillShapes( var x = startX when defined(amd64) and not defined(pixieNoSimd): # When supported, SIMD blend as much as possible - let - oddMask = mm_set1_epi16(cast[int16](0xff00)) - v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - for _ in countup(x, coverages.len - 16, 16): var coverage = mm_loadu_si128(coverages[x].addr) let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) if mm_movemask_epi8(eqZero) != 0xffff: # If the coverages are not all zero - var - coverageEven = mm_slli_epi16(mm_andnot_si128(oddMask, coverage), 8) - coverageOdd = mm_and_si128(coverage, oddMask) - - let - evenK = mm_sub_epi16(v255high, coverageEven) - oddK = mm_sub_epi16(v255high, coverageOdd) - - var - backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) - backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8) - backdropOdd = mm_and_si128(backdrop, oddMask) - - # backdrop * k - backdropEven = mm_mulhi_epu16(backdropEven, evenK) - backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) - - # div 255 - backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) - backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) - - # Shift from high to low bits - coverageEven = mm_srli_epi16(coverageEven, 8) - coverageOdd = mm_srli_epi16(coverageOdd, 8) - - var - blendedEven = mm_add_epi16(coverageEven, backdropEven) - blendedOdd = mm_add_epi16(coverageOdd, backdropOdd) - - blendedOdd = mm_slli_epi16(blendedOdd, 8) + let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) mm_storeu_si128( mask.data[mask.dataIndex(x, y)].addr, - mm_or_si128(blendedEven, blendedOdd) + maskNormalSimd(backdrop, coverage) ) x += 16 diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index ed15cfd..7ace876 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -45,11 +45,6 @@ timeIt "applyOpacity": reset() -timeIt "sharpOpacity": - image.sharpOpacity() - -reset() - timeIt "toPremultipliedAlpha": image.toPremultipliedAlpha() diff --git a/tests/benchmark_masks.nim b/tests/benchmark_masks.nim index 71c2e65..db5347c 100644 --- a/tests/benchmark_masks.nim +++ b/tests/benchmark_masks.nim @@ -25,3 +25,8 @@ reset() timeIt "blur": mask.blur(40) + +reset() + +timeIt "sharpen": + mask.sharpen()