toPremultipliedAlphaSimd
This commit is contained in:
parent
af5045ccb8
commit
d76550052e
3 changed files with 59 additions and 51 deletions
|
@ -113,10 +113,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
|||
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||
## Converts an image to premultiplied alpha from straight alpha.
|
||||
when allowSimd and compiles(toPremultipliedAlphaSimd):
|
||||
toPremultipliedAlphaSimd(
|
||||
cast[ptr UncheckedArray[uint32]](data[0].addr),
|
||||
data.len
|
||||
)
|
||||
toPremultipliedAlphaSimd(data)
|
||||
return
|
||||
|
||||
for i in 0 ..< data.len:
|
||||
|
|
|
@ -87,17 +87,17 @@ proc isOpaqueAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
|
|||
if data[i].a != 255:
|
||||
return false
|
||||
|
||||
proc toPremultipliedAlphaAvx2*(
|
||||
data: ptr UncheckedArray[uint32],
|
||||
len: int
|
||||
): int =
|
||||
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
|
||||
var i: int
|
||||
|
||||
let
|
||||
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||
for _ in 0 ..< len div 8:
|
||||
oddMask = mm256_set1_epi16(0xff00)
|
||||
div255 = mm256_set1_epi16(0x8081)
|
||||
iterations = data.len div 8
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
values = mm256_loadu_si256(data[result].addr)
|
||||
values = mm256_loadu_si256(data[i].addr)
|
||||
alpha = mm256_and_si256(values, alphaMask)
|
||||
eq = mm256_cmpeq_epi8(values, alphaMask)
|
||||
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
|
||||
|
@ -112,10 +112,18 @@ proc toPremultipliedAlphaAvx2*(
|
|||
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
|
||||
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
|
||||
mm256_storeu_si256(
|
||||
data[result].addr,
|
||||
data[i].addr,
|
||||
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
|
||||
)
|
||||
result += 8
|
||||
i += 8
|
||||
|
||||
for i in i ..< data.len:
|
||||
var c = data[i]
|
||||
if c.a != 255:
|
||||
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
||||
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||
data[i] = c
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -163,44 +163,47 @@ when defined(amd64):
|
|||
if data[i].a != 255:
|
||||
return false
|
||||
|
||||
proc toPremultipliedAlphaSimd*(data: ptr UncheckedArray[uint32], len: int) =
|
||||
var i: int
|
||||
proc toPremultipliedAlphaSimd*(data: var seq[ColorRGBA | ColorRGBX]) =
|
||||
if cpuHasAvx2:
|
||||
i = toPremultipliedAlphaAvx2(data, len)
|
||||
else:
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
for _ in 0 ..< len div 4:
|
||||
let
|
||||
values = mm_loadu_si128(data[i].addr)
|
||||
alpha = mm_and_si128(values, alphaMask)
|
||||
eq = mm_cmpeq_epi8(values, alphaMask)
|
||||
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
||||
let
|
||||
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
||||
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
||||
var
|
||||
colorsEven = mm_slli_epi16(values, 8)
|
||||
colorsOdd = mm_and_si128(values, oddMask)
|
||||
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
||||
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
||||
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
||||
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
||||
mm_storeu_si128(
|
||||
data[i].addr,
|
||||
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
||||
)
|
||||
i += 4
|
||||
toPremultipliedAlphaAvx2(data)
|
||||
return
|
||||
|
||||
for i in i ..< len:
|
||||
var c: ColorRGBX
|
||||
copyMem(c.addr, data[i].addr, 4)
|
||||
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
||||
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||
copyMem(data[i].addr, c.addr, 4)
|
||||
var i: int
|
||||
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
div255 = mm_set1_epi16(0x8081)
|
||||
iterations = data.len div 4
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
values = mm_loadu_si128(data[i].addr)
|
||||
alpha = mm_and_si128(values, alphaMask)
|
||||
eq = mm_cmpeq_epi8(values, alphaMask)
|
||||
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
||||
let
|
||||
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
||||
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
||||
var
|
||||
colorsEven = mm_slli_epi16(values, 8)
|
||||
colorsOdd = mm_and_si128(values, oddMask)
|
||||
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
||||
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
||||
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
||||
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
||||
mm_storeu_si128(
|
||||
data[i].addr,
|
||||
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
||||
)
|
||||
i += 4
|
||||
|
||||
for i in i ..< data.len:
|
||||
var c = data[i]
|
||||
if c.a != 255:
|
||||
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
||||
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||
data[i] = c
|
||||
|
||||
proc newImageFromMaskSimd*(
|
||||
dst: ptr UncheckedArray[ColorRGBX],
|
||||
|
@ -282,7 +285,7 @@ when defined(amd64):
|
|||
rgbx.a = 255 - rgbx.a
|
||||
data[i] = rgbx
|
||||
|
||||
toPremultipliedAlphaSimd(cast[ptr UncheckedArray[uint32]](data[0].addr), data.len)
|
||||
toPremultipliedAlphaSimd(data)
|
||||
|
||||
proc invertMaskSimd*(data: var seq[uint8]) =
|
||||
var
|
||||
|
|
Loading…
Reference in a new issue