use return as progress marker
This commit is contained in:
parent
e0ac7dc7a2
commit
fe488708cd
2 changed files with 30 additions and 43 deletions
|
@ -141,39 +141,36 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||||
|
|
||||||
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||||
## Converts an image to premultiplied alpha from straight alpha.
|
## Converts an image to premultiplied alpha from straight alpha.
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
if cpuHasAvx2:
|
|
||||||
toPremultipliedAlphaAvx2(data)
|
|
||||||
return
|
|
||||||
|
|
||||||
var i: int
|
var i: int
|
||||||
when defined(amd64) and allowSimd:
|
when defined(amd64) and allowSimd:
|
||||||
# When supported, SIMD convert as much as possible
|
if cpuHasAvx2:
|
||||||
let
|
i = toPremultipliedAlphaAvx2(data)
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
else:
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
for _ in 0 ..< data.len div 4:
|
|
||||||
let
|
let
|
||||||
values = mm_loadu_si128(data[i].addr)
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
alpha = mm_and_si128(values, alphaMask)
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
eq = mm_cmpeq_epi8(values, alphaMask)
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
for _ in 0 ..< data.len div 4:
|
||||||
let
|
let
|
||||||
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
values = mm_loadu_si128(data[i].addr)
|
||||||
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
alpha = mm_and_si128(values, alphaMask)
|
||||||
var
|
eq = mm_cmpeq_epi8(values, alphaMask)
|
||||||
colorsEven = mm_slli_epi16(values, 8)
|
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
||||||
colorsOdd = mm_and_si128(values, oddMask)
|
let
|
||||||
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
||||||
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
||||||
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
var
|
||||||
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
colorsEven = mm_slli_epi16(values, 8)
|
||||||
mm_storeu_si128(
|
colorsOdd = mm_and_si128(values, oddMask)
|
||||||
data[i].addr,
|
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
||||||
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
||||||
)
|
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
||||||
i += 4
|
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
||||||
|
mm_storeu_si128(
|
||||||
|
data[i].addr,
|
||||||
|
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
||||||
|
)
|
||||||
|
i += 4
|
||||||
|
|
||||||
# Convert whatever is left
|
# Convert whatever is left
|
||||||
for i in i ..< data.len:
|
for i in i ..< data.len:
|
||||||
|
|
|
@ -102,16 +102,14 @@ proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool =
|
||||||
if data[i].a != 255:
|
if data[i].a != 255:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
|
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]): int =
|
||||||
var i: int
|
|
||||||
|
|
||||||
let
|
let
|
||||||
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||||
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||||
for _ in 0 ..< data.len div 8:
|
for _ in 0 ..< data.len div 8:
|
||||||
let
|
let
|
||||||
values = mm256_loadu_si256(data[i].addr)
|
values = mm256_loadu_si256(data[result].addr)
|
||||||
alpha = mm256_and_si256(values, alphaMask)
|
alpha = mm256_and_si256(values, alphaMask)
|
||||||
eq = mm256_cmpeq_epi8(values, alphaMask)
|
eq = mm256_cmpeq_epi8(values, alphaMask)
|
||||||
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
|
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
|
||||||
|
@ -126,18 +124,10 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
|
||||||
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
|
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
|
||||||
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
|
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
|
||||||
mm256_storeu_si256(
|
mm256_storeu_si256(
|
||||||
data[i].addr,
|
data[result].addr,
|
||||||
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
|
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
|
||||||
)
|
)
|
||||||
i += 8
|
result += 8
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
var c = data[i]
|
|
||||||
if c.a != 255:
|
|
||||||
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
|
||||||
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
|
||||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
|
||||||
data[i] = c
|
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
Loading…
Reference in a new issue