toPremultipliedAlpha rounds, + neon version

This commit is contained in:
Ryan Oldenburg 2022-07-21 20:21:04 -05:00
parent 94cbbb8d3b
commit 1a660ab8ec
7 changed files with 105 additions and 28 deletions

View file

@ -7,10 +7,10 @@ srcDir = "src"
requires "nim >= 1.4.8" requires "nim >= 1.4.8"
requires "vmath >= 1.1.4" requires "vmath >= 1.1.4"
requires "chroma >= 0.2.5" requires "chroma >= 0.2.6"
requires "zippy >= 0.10.2" requires "zippy >= 0.10.3"
requires "flatty >= 0.3.4" requires "flatty >= 0.3.4"
requires "nimsimd >= 1.1.6" requires "nimsimd >= 1.1.7"
requires "bumpy >= 1.1.1" requires "bumpy >= 1.1.1"
task bindings, "Generate bindings": task bindings, "Generate bindings":

View file

@ -104,9 +104,9 @@ proc toPremultipliedAlpha*(
for i in 0 ..< data.len: for i in 0 ..< data.len:
var c = data[i] var c = data[i]
if c.a != 255: if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8 c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8 c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8 c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c data[i] = c
proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool {.hasSimd.} = proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool {.hasSimd.} =

View file

@ -96,7 +96,8 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
let let
alphaMask = mm256_set1_epi32(cast[int32](0xff000000)) alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
oddMask = mm256_set1_epi16(0xff00) oddMask = mm256_set1_epi16(0xff00)
div255 = mm256_set1_epi16(0x8081) vec128 = mm256_set1_epi16(128)
hiMask = mm256_set1_epi16(255 shl 8)
iterations = data.len div 8 iterations = data.len div 8
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
@ -112,20 +113,26 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
colorsOdd = mm256_and_si256(values, oddMask) colorsOdd = mm256_and_si256(values, oddMask)
colorsEven = mm256_mulhi_epu16(colorsEven, evenMultiplier) colorsEven = mm256_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm256_mulhi_epu16(colorsOdd, oddMultiplier) colorsOdd = mm256_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7) let
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7) tmpEven = mm256_add_epi16(colorsEven, vec128)
mm256_storeu_si256( tmpOdd = mm256_add_epi16(colorsOdd, vec128)
data[i].addr, colorsEven = tmpEven
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8)) colorsOdd = tmpOdd
) colorsEven = mm256_srli_epi16(colorsEven, 8)
colorsOdd = mm256_srli_epi16(colorsOdd, 8)
colorsEven = mm256_add_epi16(colorsEven, tmpEven)
colorsOdd = mm256_add_epi16(colorsOdd, tmpOdd)
colorsEven = mm256_srli_epi16(colorsEven, 8)
colorsOdd = mm256_and_si256(colorsOdd, hiMask)
mm256_storeu_si256(data[i].addr, mm256_or_si256(colorsEven, colorsOdd))
i += 8 i += 8
for i in i ..< data.len: for i in i ..< data.len:
var c = data[i] var c = data[i]
if c.a != 255: if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8 c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8 c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8 c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c data[i] = c
when defined(release): when defined(release):

View file

@ -123,6 +123,43 @@ proc isOpaqueNeon*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
if data[i].a != 255: if data[i].a != 255:
return false return false
proc toPremultipliedAlphaNeon*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
var
i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c
inc i
p += 4
proc premultiply(c, a: uint8x8): uint8x8 {.inline.} =
let ca = vmull_u8(c, a)
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))
let iterations = (data.len - i) div 8
for _ in 0 ..< iterations:
var channels = vld4_u8(cast[pointer](p))
channels.val[0] = premultiply(channels.val[0], channels.val[3])
channels.val[1] = premultiply(channels.val[1], channels.val[3])
channels.val[2] = premultiply(channels.val[2], channels.val[3])
vst4_u8(cast[pointer](p), channels)
p += 32
i += 8
for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c
proc newImageNeon*(mask: Mask): Image {.simd.} = proc newImageNeon*(mask: Mask): Image {.simd.} =
result = newImage(mask.width, mask.height) result = newImage(mask.width, mask.height)

View file

@ -170,7 +170,8 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
let let
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(0xff00) oddMask = mm_set1_epi16(0xff00)
div255 = mm_set1_epi16(0x8081) vec128 = mm_set1_epi16(128)
hiMask = mm_set1_epi16(255 shl 8)
iterations = data.len div 4 iterations = data.len div 4
for _ in 0 ..< iterations: for _ in 0 ..< iterations:
let let
@ -186,20 +187,26 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
colorsOdd = mm_and_si128(values, oddMask) colorsOdd = mm_and_si128(values, oddMask)
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier) colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier) colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7) let
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7) tmpEven = mm_add_epi16(colorsEven, vec128)
mm_storeu_si128( tmpOdd = mm_add_epi16(colorsOdd, vec128)
data[i].addr, colorsEven = tmpEven
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8)) colorsOdd = tmpOdd
) colorsEven = mm_srli_epi16(colorsEven, 8)
colorsOdd = mm_srli_epi16(colorsOdd, 8)
colorsEven = mm_add_epi16(colorsEven, tmpEven)
colorsOdd = mm_add_epi16(colorsOdd, tmpOdd)
colorsEven = mm_srli_epi16(colorsEven, 8)
colorsOdd = mm_and_si128(colorsOdd, hiMask)
mm_storeu_si128(data[i].addr, mm_or_si128(colorsEven, colorsOdd))
i += 4 i += 4
for i in i ..< data.len: for i in i ..< data.len:
var c = data[i] var c = data[i]
if c.a != 255: if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8 c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8 c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8 c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c data[i] = c
proc newImageSse2*(mask: Mask): Image {.simd.} = proc newImageSse2*(mask: Mask): Image {.simd.} =

View file

@ -121,7 +121,7 @@ block:
let a = newImage(100, 100) let a = newImage(100, 100)
a.fill(rgbx(50, 100, 150, 200)) a.fill(rgbx(50, 100, 150, 200))
a.invert() a.invert()
doAssert a[0, 0] == rgbx(44, 33, 22, 55) doAssert a[0, 0] == rgbx(44, 33, 23, 55)
block: block:
let ctx = newContext(100, 100) let ctx = newContext(100, 100)
@ -226,3 +226,29 @@ block:
292.0, 45.0, 1.0 292.0, 45.0, 1.0
) )
) )
block:
var
colors: seq[ColorRGBA]
premultiplied: seq[ColorRGBX]
for a in 0.uint8 .. 255:
for r in 0.uint8 .. 255:
let
rgba = rgba(r, 0, 0, a)
floats = rgba.color()
premul = color(floats.r * floats.a, 0, 0, floats.a)
rgbx = rgbx(
round(premul.r * 255).uint8,
0,
0,
round(premul.a * 255).uint8
)
colors.add(rgba)
premultiplied.add(rgbx)
var converted = cast[seq[ColorRGBX]](colors)
toPremultipliedAlpha(converted)
for i in 0 ..< premultiplied.len:
doAssert premultiplied[i] == converted[i]
doAssert colors[i].rgbx == converted[i]

View file

@ -279,7 +279,7 @@ block:
image.draw(strokeImage) image.draw(strokeImage)
image.xray("tests/images/fillOptimization.png") image.xray("tests/images/fillOptimization.png")
doAssert image[10, 10] == rgbx(255, 127, 63, 255) doAssert image[10, 10] == rgbx(255, 128, 64, 255)
block: block:
let a = newImage(100, 100) let a = newImage(100, 100)