applyOpacitySimd

This commit is contained in:
Ryan Oldenburg 2022-06-30 10:35:54 -05:00
parent f5825daf10
commit 3a41ff8e64
3 changed files with 28 additions and 21 deletions

View file

@ -368,11 +368,7 @@ proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
return return
when allowSimd and compiles(applyOpacitySimd): when allowSimd and compiles(applyOpacitySimd):
applyOpacitySimd( applyOpacitySimd(image.data, opacity)
cast[ptr UncheckedArray[uint8]](image.data[0].addr),
image.data.len * 4,
opacity
)
return return
for i in 0 ..< image.data.len: for i in 0 ..< image.data.len:

View file

@ -197,11 +197,7 @@ proc applyOpacity*(mask: Mask, opacity: float32) {.raises: [].} =
return return
when allowSimd and compiles(applyOpacitySimd): when allowSimd and compiles(applyOpacitySimd):
applyOpacitySimd( applyOpacitySimd(mask.data, opacity)
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len,
opacity
)
return return
for i in 0 ..< mask.data.len: for i in 0 ..< mask.data.len:

View file

@ -338,19 +338,24 @@ when defined(amd64):
if data[i] != 0: if data[i] != 0:
data[i] = 255 data[i] = 255
proc applyOpacitySimd*( proc applyOpacitySimd*(data: var seq[uint8 | ColorRGBX], opacity: uint16) =
data: ptr UncheckedArray[uint8], var
len: int, i: int
opacity: uint16 p = cast[uint](data[0].addr)
) = len =
var i: int when data is seq[ColorRGBX]:
data.len * 4
else:
data.len
let let
oddMask = mm_set1_epi16(0xff00) oddMask = mm_set1_epi16(0xff00)
div255 = mm_set1_epi16(0x8081) div255 = mm_set1_epi16(0x8081)
zeroVec = mm_setzero_si128() zeroVec = mm_setzero_si128()
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8) opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
iterations = len div 16
for _ in 0 ..< len div 16: for _ in 0 ..< len div 16:
let values = mm_loadu_si128(data[i].addr) let values = mm_loadu_si128(cast[pointer](p))
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
var var
valuesEven = mm_slli_epi16(values, 8) valuesEven = mm_slli_epi16(values, 8)
@ -360,12 +365,22 @@ when defined(amd64):
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7) valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
mm_storeu_si128( mm_storeu_si128(
data[i].addr, cast[pointer](p),
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8)) mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
) )
i += 16 p += 16
i += 16 * iterations
for i in i ..< len: when data is seq[ColorRGBX]:
for i in i div 4 ..< data.len:
var rgbx = data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
data[i] = rgbx
else:
for i in i ..< data.len:
data[i] = ((data[i] * opacity) div 255).uint8 data[i] = ((data[i] * opacity) div 255).uint8
when defined(release): when defined(release):