use return as progress marker

This commit is contained in:
Ryan Oldenburg 2022-06-28 18:42:06 -05:00
parent e0ac7dc7a2
commit fe488708cd
2 changed files with 30 additions and 43 deletions

View file

@ -141,39 +141,36 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} = proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
## Converts an image to premultiplied alpha from straight alpha. ## Converts an image to premultiplied alpha from straight alpha.
when defined(amd64) and allowSimd:
if cpuHasAvx2:
toPremultipliedAlphaAvx2(data)
return
var i: int var i: int
when defined(amd64) and allowSimd: when defined(amd64) and allowSimd:
# When supported, SIMD convert as much as possible if cpuHasAvx2:
let i = toPremultipliedAlphaAvx2(data)
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) else:
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< data.len div 4:
let let
values = mm_loadu_si128(data[i].addr) alphaMask = mm_set1_epi32(cast[int32](0xff000000))
alpha = mm_and_si128(values, alphaMask) oddMask = mm_set1_epi16(cast[int16](0xff00))
eq = mm_cmpeq_epi8(values, alphaMask) div255 = mm_set1_epi16(cast[int16](0x8081))
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888: for _ in 0 ..< data.len div 4:
let let
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16)) values = mm_loadu_si128(data[i].addr)
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask) alpha = mm_and_si128(values, alphaMask)
var eq = mm_cmpeq_epi8(values, alphaMask)
colorsEven = mm_slli_epi16(values, 8) if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
colorsOdd = mm_and_si128(values, oddMask) let
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier) evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier) oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7) var
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7) colorsEven = mm_slli_epi16(values, 8)
mm_storeu_si128( colorsOdd = mm_and_si128(values, oddMask)
data[i].addr, colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8)) colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
) colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
i += 4 colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
mm_storeu_si128(
data[i].addr,
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
)
i += 4
# Convert whatever is left # Convert whatever is left
for i in i ..< data.len: for i in i ..< data.len:

View file

@ -102,16 +102,14 @@ proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool =
if data[i].a != 255: if data[i].a != 255:
return false return false
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) = proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]): int =
var i: int
let let
alphaMask = mm256_set1_epi32(cast[int32](0xff000000)) alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
oddMask = mm256_set1_epi16(cast[int16](0xff00)) oddMask = mm256_set1_epi16(cast[int16](0xff00))
div255 = mm256_set1_epi16(cast[int16](0x8081)) div255 = mm256_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< data.len div 8: for _ in 0 ..< data.len div 8:
let let
values = mm256_loadu_si256(data[i].addr) values = mm256_loadu_si256(data[result].addr)
alpha = mm256_and_si256(values, alphaMask) alpha = mm256_and_si256(values, alphaMask)
eq = mm256_cmpeq_epi8(values, alphaMask) eq = mm256_cmpeq_epi8(values, alphaMask)
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888: if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
@ -126,18 +124,10 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7) colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7) colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
mm256_storeu_si256( mm256_storeu_si256(
data[i].addr, data[result].addr,
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8)) mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
) )
i += 8 result += 8
for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
data[i] = c
when defined(release): when defined(release):
{.pop.} {.pop.}