Merge pull request #96 from guzba/master
applyOpacity simd for image too, 10x faster as well
This commit is contained in:
commit
875b2caff3
3 changed files with 71 additions and 47 deletions
|
@ -263,14 +263,71 @@ proc toStraightAlpha*(image: Image) =
|
||||||
c.g = ((c.g.uint32 * multiplier) div 255).uint8
|
c.g = ((c.g.uint32 * multiplier) div 255).uint8
|
||||||
c.b = ((c.b.uint32 * multiplier) div 255).uint8
|
c.b = ((c.b.uint32 * multiplier) div 255).uint8
|
||||||
|
|
||||||
proc applyOpacity*(image: Image, opacity: float32) =
|
proc applyOpacity*(target: Image | Mask, opacity: float32) =
|
||||||
## Multiplies alpha of the image by opacity.
|
## Multiplies alpha of the image by opacity.
|
||||||
let opacity = round(255 * opacity).uint32
|
let opacity = round(255 * opacity).uint16
|
||||||
for rgba in image.data.mitems:
|
|
||||||
rgba.r = ((rgba.r * opacity) div 255).uint8
|
if opacity == 0:
|
||||||
rgba.g = ((rgba.g * opacity) div 255).uint8
|
when type(target) is Image:
|
||||||
rgba.b = ((rgba.b * opacity) div 255).uint8
|
target.fill(rgba(0, 0, 0, 0))
|
||||||
rgba.a = ((rgba.a * opacity) div 255).uint8
|
else:
|
||||||
|
target.fill(0)
|
||||||
|
return
|
||||||
|
|
||||||
|
var i: int
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
when type(target) is Image:
|
||||||
|
let byteLen = target.data.len * 4
|
||||||
|
else:
|
||||||
|
let byteLen = target.data.len
|
||||||
|
|
||||||
|
let
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
|
||||||
|
|
||||||
|
for _ in countup(0, byteLen - 16, 16):
|
||||||
|
when type(target) is Image:
|
||||||
|
let index = i div 4
|
||||||
|
else:
|
||||||
|
let index = i
|
||||||
|
|
||||||
|
var values = mm_loadu_si128(target.data[index].addr)
|
||||||
|
|
||||||
|
let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128())
|
||||||
|
if mm_movemask_epi8(eqZero) != 0xffff:
|
||||||
|
var
|
||||||
|
valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8)
|
||||||
|
valuesOdd = mm_and_si128(values, oddMask)
|
||||||
|
|
||||||
|
# values * opacity
|
||||||
|
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
|
||||||
|
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
|
||||||
|
|
||||||
|
# div 255
|
||||||
|
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||||
|
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||||
|
|
||||||
|
valuesOdd = mm_slli_epi16(valuesOdd, 8)
|
||||||
|
|
||||||
|
mm_storeu_si128(
|
||||||
|
target.data[index].addr,
|
||||||
|
mm_or_si128(valuesEven, valuesOdd)
|
||||||
|
)
|
||||||
|
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
when type(target) is Image:
|
||||||
|
for j in i div 4 ..< target.data.len:
|
||||||
|
var rgba = target.data[j]
|
||||||
|
rgba.r = ((rgba.r * opacity) div 255).uint8
|
||||||
|
rgba.g = ((rgba.g * opacity) div 255).uint8
|
||||||
|
rgba.b = ((rgba.b * opacity) div 255).uint8
|
||||||
|
rgba.a = ((rgba.a * opacity) div 255).uint8
|
||||||
|
target.data[j] = rgba
|
||||||
|
else:
|
||||||
|
for j in i ..< target.data.len:
|
||||||
|
target.data[j] = ((target.data[j] * opacity) div 255).uint8
|
||||||
|
|
||||||
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA =
|
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA =
|
||||||
let
|
let
|
||||||
|
|
|
@ -112,45 +112,5 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
|
||||||
|
|
||||||
lerp(bottomMix, topMix, diffY)
|
lerp(bottomMix, topMix, diffY)
|
||||||
|
|
||||||
proc applyOpacity*(mask: Mask, opacity: float32) =
|
|
||||||
## Multiplies the values of the mask by opacity.
|
|
||||||
let opacity = round(255 * opacity).uint16
|
|
||||||
|
|
||||||
var i: int
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
|
||||||
let
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
|
|
||||||
|
|
||||||
for _ in countup(i, mask.data.len - 16, 16):
|
|
||||||
var values = mm_loadu_si128(mask.data[i].addr)
|
|
||||||
|
|
||||||
let eqZero = mm_cmpeq_epi16(values, mm_setzero_si128())
|
|
||||||
if mm_movemask_epi8(eqZero) != 0xffff:
|
|
||||||
var
|
|
||||||
valuesEven = mm_slli_epi16(mm_andnot_si128(oddMask, values), 8)
|
|
||||||
valuesOdd = mm_and_si128(values, oddMask)
|
|
||||||
|
|
||||||
# values * opacity
|
|
||||||
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
|
|
||||||
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
|
|
||||||
|
|
||||||
# div 255
|
|
||||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
|
||||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
|
||||||
|
|
||||||
valuesOdd = mm_slli_epi16(valuesOdd, 8)
|
|
||||||
|
|
||||||
mm_storeu_si128(
|
|
||||||
mask.data[i].addr,
|
|
||||||
mm_or_si128(valuesEven, valuesOdd)
|
|
||||||
)
|
|
||||||
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for j in i ..< mask.data.len:
|
|
||||||
mask.data[j] = ((mask.data[j] * opacity) div 255).uint8
|
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -28,6 +28,13 @@ block:
|
||||||
image.toStraightAlpha()
|
image.toStraightAlpha()
|
||||||
doAssert image[9, 9] == rgba(254, 0, 0, 128)
|
doAssert image[9, 9] == rgba(254, 0, 0, 128)
|
||||||
|
|
||||||
|
block:
|
||||||
|
let image = newImage(100, 100)
|
||||||
|
image.fill(rgba(200, 200, 200, 200))
|
||||||
|
image.applyOpacity(0.5)
|
||||||
|
doAssert image[0, 0] == rgba(100, 100, 100, 100)
|
||||||
|
doAssert image[88, 88] == rgba(100, 100, 100, 100)
|
||||||
|
|
||||||
block:
|
block:
|
||||||
let
|
let
|
||||||
a = newImage(101, 101)
|
a = newImage(101, 101)
|
||||||
|
|
Loading…
Reference in a new issue