diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 33119db..7e47209 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -357,64 +357,31 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = result.width * 4 ) -proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = +proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} = ## Multiplies alpha of the image by opacity. let opacity = round(255 * opacity).uint16 if opacity == 255: return if opacity == 0: - when type(target) is Image: - target.fill(rgbx(0, 0, 0, 0)) - else: - target.fill(0) + image.fill(rgbx(0, 0, 0, 0)) return - var i: int - when defined(amd64) and allowSimd: - when type(target) is Image: - let byteLen = target.data.len * 4 - else: - let byteLen = target.data.len + when allowSimd and compiles(applyOpacitySimd): + applyOpacitySimd( + cast[ptr UncheckedArray[uint8]](image.data[0].addr), + image.data.len * 4, + opacity + ) + return - let - oddMask = mm_set1_epi16(cast[int16](0xff00)) - div255 = mm_set1_epi16(cast[int16](0x8081)) - zeroVec = mm_setzero_si128() - opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) - for _ in 0 ..< byteLen div 16: - when type(target) is Image: - let index = i div 4 - else: - let index = i - - let values = mm_loadu_si128(target.data[index].addr) - if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: - var - valuesEven = mm_slli_epi16(values, 8) - valuesOdd = mm_and_si128(values, oddMask) - valuesEven = mm_mulhi_epu16(valuesEven, opacityVec) - valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec) - valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) - valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) - mm_storeu_si128( - target.data[index].addr, - mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8)) - ) - - i += 16 - - when type(target) is Image: - for j in i div 4 ..< target.data.len: - var rgbx = target.data[j] - rgbx.r = ((rgbx.r * opacity) div 255).uint8 - rgbx.g = ((rgbx.g * opacity) div 255).uint8 - rgbx.b = ((rgbx.b * opacity) div 255).uint8 - rgbx.a = ((rgbx.a * opacity) div 255).uint8 - target.data[j] = rgbx - else: - for j in i ..< target.data.len: - target.data[j] = ((target.data[j] * opacity) div 255).uint8 + for i in 0 ..< image.data.len: + var rgbx = image.data[i] + rgbx.r = ((rgbx.r * opacity) div 255).uint8 + rgbx.g = ((rgbx.g * opacity) div 255).uint8 + rgbx.b = ((rgbx.b * opacity) div 255).uint8 + rgbx.a = ((rgbx.a * opacity) div 255).uint8 + image.data[i] = rgbx proc invert*(image: Image) {.raises: [].} = ## Inverts all of the colors and alpha. diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 7b29e09..40b2ea4 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -75,6 +75,10 @@ proc setValue*(mask: Mask, x, y: int, value: uint8) {.inline, raises: [].} = ## Sets a value at (x, y) or does nothing if outside of bounds. mask[x, y] = value +proc fill*(mask: Mask, value: uint8) {.inline, raises: [].} = + ## Fills the mask with the value. + fillUnsafe(mask.data, value, 0, mask.data.len) + proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = ## Scales the mask down by an integer scale. if power < 0: @@ -179,9 +183,26 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = result.width * 4 ) -proc fill*(mask: Mask, value: uint8) {.inline, raises: [].} = - ## Fills the mask with the value. - fillUnsafe(mask.data, value, 0, mask.data.len) +proc applyOpacity*(mask: Mask, opacity: float32) {.raises: [].} = + ## Multiplies alpha of the image by opacity. + let opacity = round(255 * opacity).uint16 + if opacity == 255: + return + + if opacity == 0: + mask.fill(0) + return + + when allowSimd and compiles(applyOpacitySimd): + applyOpacitySimd( + cast[ptr UncheckedArray[uint8]](mask.data[0].addr), + mask.data.len, + opacity + ) + return + + for i in 0 ..< mask.data.len: + mask.data[i] = ((mask.data[i] * opacity) div 255).uint8 proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} = ## Gets a interpolated value with float point coordinates. diff --git a/src/pixie/simd.nim b/src/pixie/simd.nim index 3f43755..40a17fa 100644 --- a/src/pixie/simd.nim +++ b/src/pixie/simd.nim @@ -1,4 +1,4 @@ -import chroma +import chroma, vmath when defined(release): {.push checks: off.} @@ -287,5 +287,35 @@ when defined(amd64): if data[i] != 0: data[i] = 255 + proc applyOpacitySimd*( + data: ptr UncheckedArray[uint8], + len: int, + opacity: uint16 + ) = + var i: int + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + zeroVec = mm_setzero_si128() + opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) + for _ in 0 ..< len div 16: + let values = mm_loadu_si128(data[i].addr) + if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: + var + valuesEven = mm_slli_epi16(values, 8) + valuesOdd = mm_and_si128(values, oddMask) + valuesEven = mm_mulhi_epu16(valuesEven, opacityVec) + valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec) + valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) + valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) + mm_storeu_si128( + data[i].addr, + mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8)) + ) + i += 16 + + for i in i ..< len: + data[i] = ((data[i] * opacity) div 255).uint8 + when defined(release): {.pop.}