applyOpacitySimd
This commit is contained in:
parent
f5825daf10
commit
3a41ff8e64
3 changed files with 28 additions and 21 deletions
|
@ -368,11 +368,7 @@ proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
|
||||||
return
|
return
|
||||||
|
|
||||||
when allowSimd and compiles(applyOpacitySimd):
|
when allowSimd and compiles(applyOpacitySimd):
|
||||||
applyOpacitySimd(
|
applyOpacitySimd(image.data, opacity)
|
||||||
cast[ptr UncheckedArray[uint8]](image.data[0].addr),
|
|
||||||
image.data.len * 4,
|
|
||||||
opacity
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
for i in 0 ..< image.data.len:
|
||||||
|
|
|
@ -197,11 +197,7 @@ proc applyOpacity*(mask: Mask, opacity: float32) {.raises: [].} =
|
||||||
return
|
return
|
||||||
|
|
||||||
when allowSimd and compiles(applyOpacitySimd):
|
when allowSimd and compiles(applyOpacitySimd):
|
||||||
applyOpacitySimd(
|
applyOpacitySimd(mask.data, opacity)
|
||||||
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
|
|
||||||
mask.data.len,
|
|
||||||
opacity
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
for i in 0 ..< mask.data.len:
|
||||||
|
|
|
@ -338,19 +338,24 @@ when defined(amd64):
|
||||||
if data[i] != 0:
|
if data[i] != 0:
|
||||||
data[i] = 255
|
data[i] = 255
|
||||||
|
|
||||||
proc applyOpacitySimd*(
|
proc applyOpacitySimd*(data: var seq[uint8 | ColorRGBX], opacity: uint16) =
|
||||||
data: ptr UncheckedArray[uint8],
|
var
|
||||||
len: int,
|
i: int
|
||||||
opacity: uint16
|
p = cast[uint](data[0].addr)
|
||||||
) =
|
len =
|
||||||
var i: int
|
when data is seq[ColorRGBX]:
|
||||||
|
data.len * 4
|
||||||
|
else:
|
||||||
|
data.len
|
||||||
|
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(0xff00)
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
div255 = mm_set1_epi16(0x8081)
|
div255 = mm_set1_epi16(0x8081)
|
||||||
zeroVec = mm_setzero_si128()
|
zeroVec = mm_setzero_si128()
|
||||||
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
||||||
|
iterations = len div 16
|
||||||
for _ in 0 ..< len div 16:
|
for _ in 0 ..< len div 16:
|
||||||
let values = mm_loadu_si128(data[i].addr)
|
let values = mm_loadu_si128(cast[pointer](p))
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
||||||
var
|
var
|
||||||
valuesEven = mm_slli_epi16(values, 8)
|
valuesEven = mm_slli_epi16(values, 8)
|
||||||
|
@ -360,12 +365,22 @@ when defined(amd64):
|
||||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
data[i].addr,
|
cast[pointer](p),
|
||||||
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
||||||
)
|
)
|
||||||
i += 16
|
p += 16
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
for i in i ..< len:
|
when data is seq[ColorRGBX]:
|
||||||
|
for i in i div 4 ..< data.len:
|
||||||
|
var rgbx = data[i]
|
||||||
|
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||||
|
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||||
|
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||||
|
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||||
|
data[i] = rgbx
|
||||||
|
else:
|
||||||
|
for i in i ..< data.len:
|
||||||
data[i] = ((data[i] * opacity) div 255).uint8
|
data[i] = ((data[i] * opacity) div 255).uint8
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
|
|
Loading…
Reference in a new issue