commit
2a54de7604
5 changed files with 71 additions and 62 deletions
|
@ -1,4 +1,4 @@
|
||||||
version = "3.0.2"
|
version = "3.0.3"
|
||||||
author = "Andre von Houck and Ryan Oldenburg"
|
author = "Andre von Houck and Ryan Oldenburg"
|
||||||
description = "Full-featured 2d graphics library for Nim."
|
description = "Full-featured 2d graphics library for Nim."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
@ -56,13 +56,11 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
|
||||||
for j in countup(i, data.len - 4, 4):
|
for _ in countup(i, data.len - 4, 4):
|
||||||
var
|
var
|
||||||
color = mm_loadu_si128(data[j].addr)
|
color = mm_loadu_si128(data[i].addr)
|
||||||
alpha = mm_and_si128(color, alphaMask)
|
alpha = mm_and_si128(color, alphaMask)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi16(alpha, alphaMask)) != 0xffff:
|
||||||
let eqOpaque = mm_cmpeq_epi16(alpha, alphaMask)
|
|
||||||
if mm_movemask_epi8(eqOpaque) != 0xffff:
|
|
||||||
# If not all of the alpha values are 255, premultiply
|
# If not all of the alpha values are 255, premultiply
|
||||||
var
|
var
|
||||||
colorEven = mm_slli_epi16(color, 8)
|
colorEven = mm_slli_epi16(color, 8)
|
||||||
|
@ -81,8 +79,10 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
|
||||||
mm_and_si128(alpha, alphaMask), mm_and_si128(color, notAlphaMask)
|
mm_and_si128(alpha, alphaMask), mm_and_si128(color, notAlphaMask)
|
||||||
)
|
)
|
||||||
|
|
||||||
mm_storeu_si128(data[j].addr, color)
|
mm_storeu_si128(data[i].addr, color)
|
||||||
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
# Convert whatever is left
|
# Convert whatever is left
|
||||||
for j in i ..< data.len:
|
for j in i ..< data.len:
|
||||||
var c = data[j]
|
var c = data[j]
|
||||||
|
|
|
@ -1164,10 +1164,10 @@ iterator walk(
|
||||||
windingRule: WindingRule,
|
windingRule: WindingRule,
|
||||||
y: int,
|
y: int,
|
||||||
width: float32
|
width: float32
|
||||||
): (float32, float32, int32) =
|
): (float32, float32, int) =
|
||||||
var
|
var
|
||||||
prevAt: float32
|
prevAt: float32
|
||||||
count: int32
|
count: int
|
||||||
for i in 0 ..< numHits:
|
for i in 0 ..< numHits:
|
||||||
let (at, winding) = hits[i]
|
let (at, winding) = hits[i]
|
||||||
if windingRule == wrNonZero and
|
if windingRule == wrNonZero and
|
||||||
|
@ -1257,10 +1257,10 @@ proc computeCoverages(
|
||||||
if fillLen > 0:
|
if fillLen > 0:
|
||||||
var i = fillStart
|
var i = fillStart
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
|
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
|
||||||
for j in countup(i, fillStart + fillLen - 16, 16):
|
for j in countup(i, fillStart + fillLen - 16, 16):
|
||||||
var coverage = mm_loadu_si128(coverages[j - startX].addr)
|
var coverage = mm_loadu_si128(coverages[j - startX].addr)
|
||||||
coverage = mm_add_epi8(coverage, vSampleCoverage)
|
coverage = mm_add_epi8(coverage, sampleCoverageVec)
|
||||||
mm_storeu_si128(coverages[j - startX].addr, coverage)
|
mm_storeu_si128(coverages[j - startX].addr, coverage)
|
||||||
i += 16
|
i += 16
|
||||||
for j in i ..< fillStart + fillLen:
|
for j in i ..< fillStart + fillLen:
|
||||||
|
@ -1291,56 +1291,64 @@ proc fillCoverage(
|
||||||
# When supported, SIMD blend as much as possible
|
# When supported, SIMD blend as much as possible
|
||||||
let
|
let
|
||||||
blenderSimd = blendMode.blenderSimd()
|
blenderSimd = blendMode.blenderSimd()
|
||||||
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
vColor = mm_set1_epi32(cast[int32](rgbx))
|
vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||||
for _ in countup(x, startX + coverages.len - 16, 4):
|
zeroVec = mm_setzero_si128()
|
||||||
var coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
coverage = mm_and_si128(coverage, first32)
|
for _ in countup(x, startX + coverages.len - 16, 16):
|
||||||
|
|
||||||
let
|
let
|
||||||
index = image.dataIndex(x, y)
|
index = image.dataIndex(x, y)
|
||||||
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
||||||
if mm_movemask_epi8(eqZero) != 0xffff: # or blendMode == bmExcludeMask:
|
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zeroVec)) != 0xffff:
|
||||||
# If the coverages are not all zero
|
# If the coverages are not all zero
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) == 0xffff:
|
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, vec255)) == 0xffff:
|
||||||
# Coverages are all 255
|
# If the coverages are all 255
|
||||||
if blendMode == bmNormal and rgbx.a == 255:
|
if blendMode == bmNormal and rgbx.a == 255:
|
||||||
mm_storeu_si128(image.data[index].addr, vColor)
|
for i in 0 ..< 4:
|
||||||
|
mm_storeu_si128(image.data[index + i * 4].addr, colorVec)
|
||||||
else:
|
else:
|
||||||
let backdrop = mm_loadu_si128(image.data[index].addr)
|
for i in 0 ..< 4:
|
||||||
mm_storeu_si128(
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
image.data[index].addr,
|
mm_storeu_si128(
|
||||||
blenderSimd(backdrop, vColor)
|
image.data[index + i * 4].addr,
|
||||||
)
|
blenderSimd(backdrop, colorVec)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Coverages are not all 255
|
# Coverages are not all 255
|
||||||
coverage = unpackAlphaValues(coverage)
|
var coverage = coverage
|
||||||
# Shift the coverages from `a` to `g` and `a` for multiplying
|
for i in 0 ..< 4:
|
||||||
coverage = mm_or_si128(coverage, mm_srli_epi32(coverage, 16))
|
var unpacked = unpackAlphaValues(coverage)
|
||||||
|
# Shift the coverages from `a` to `g` and `a` for multiplying
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||||
|
|
||||||
var
|
var
|
||||||
source = vColor
|
source = colorVec
|
||||||
sourceEven = mm_slli_epi16(source, 8)
|
sourceEven = mm_slli_epi16(source, 8)
|
||||||
sourceOdd = mm_and_si128(source, oddMask)
|
sourceOdd = mm_and_si128(source, oddMask)
|
||||||
|
|
||||||
sourceEven = mm_mulhi_epu16(sourceEven, coverage)
|
sourceEven = mm_mulhi_epu16(sourceEven, unpacked)
|
||||||
sourceOdd = mm_mulhi_epu16(sourceOdd, coverage)
|
sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked)
|
||||||
|
|
||||||
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
|
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
|
||||||
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
|
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
|
||||||
|
|
||||||
source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
|
source = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
|
||||||
|
|
||||||
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
image.data[index + i * 4].addr,
|
||||||
|
blenderSimd(backdrop, source)
|
||||||
|
)
|
||||||
|
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
|
||||||
let backdrop = mm_loadu_si128(image.data[index].addr)
|
|
||||||
mm_storeu_si128(
|
|
||||||
image.data[index].addr,
|
|
||||||
blenderSimd(backdrop, source)
|
|
||||||
)
|
|
||||||
elif blendMode == bmMask:
|
elif blendMode == bmMask:
|
||||||
mm_storeu_si128(image.data[index].addr, mm_setzero_si128())
|
for i in 0 ..< 4:
|
||||||
x += 4
|
mm_storeu_si128(image.data[index + i * 4].addr, zeroVec)
|
||||||
|
|
||||||
|
x += 16
|
||||||
|
|
||||||
let blender = blendMode.blender()
|
let blender = blendMode.blender()
|
||||||
while x < startX + coverages.len:
|
while x < startX + coverages.len:
|
||||||
|
@ -1375,13 +1383,14 @@ proc fillCoverage(
|
||||||
var x = startX
|
var x = startX
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
if blendMode.hasSimdMasker():
|
if blendMode.hasSimdMasker():
|
||||||
let maskerSimd = blendMode.maskerSimd()
|
let
|
||||||
|
maskerSimd = blendMode.maskerSimd()
|
||||||
|
zeroVec = mm_setzero_si128()
|
||||||
for _ in countup(x, startX + coverages.len - 16, 16):
|
for _ in countup(x, startX + coverages.len - 16, 16):
|
||||||
let
|
let
|
||||||
index = mask.dataIndex(x, y)
|
index = mask.dataIndex(x, y)
|
||||||
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
||||||
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zeroVec)) != 0xffff:
|
||||||
if mm_movemask_epi8(eqZero) != 0xffff: # or blendMode == bmExcludeMask:
|
|
||||||
# If the coverages are not all zero
|
# If the coverages are not all zero
|
||||||
let backdrop = mm_loadu_si128(mask.data[index].addr)
|
let backdrop = mm_loadu_si128(mask.data[index].addr)
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
|
@ -1389,7 +1398,7 @@ proc fillCoverage(
|
||||||
maskerSimd(backdrop, coverage)
|
maskerSimd(backdrop, coverage)
|
||||||
)
|
)
|
||||||
elif blendMode == bmMask:
|
elif blendMode == bmMask:
|
||||||
mm_storeu_si128(mask.data[index].addr, mm_setzero_si128())
|
mm_storeu_si128(mask.data[index].addr, zeroVec)
|
||||||
x += 16
|
x += 16
|
||||||
|
|
||||||
let masker = blendMode.masker()
|
let masker = blendMode.masker()
|
||||||
|
@ -1438,16 +1447,16 @@ proc fillHits(
|
||||||
# When supported, SIMD blend as much as possible
|
# When supported, SIMD blend as much as possible
|
||||||
let
|
let
|
||||||
blenderSimd = blendMode.blenderSimd()
|
blenderSimd = blendMode.blenderSimd()
|
||||||
vColor = mm_set1_epi32(cast[int32](rgbx))
|
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
for _ in countup(fillStart, fillLen - 16, 4):
|
for _ in countup(fillStart, fillLen - 16, 16):
|
||||||
let
|
let index = image.dataIndex(x, y)
|
||||||
index = image.dataIndex(x, y)
|
for i in 0 ..< 4:
|
||||||
backdrop = mm_loadu_si128(image.data[index].addr)
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
image.data[index].addr,
|
image.data[index + i * 4].addr,
|
||||||
blenderSimd(backdrop, vColor)
|
blenderSimd(backdrop, colorVec)
|
||||||
)
|
)
|
||||||
x += 4
|
x += 16
|
||||||
|
|
||||||
for x in x ..< fillStart + fillLen:
|
for x in x ..< fillStart + fillLen:
|
||||||
let backdrop = image.getRgbaUnsafe(x, y)
|
let backdrop = image.getRgbaUnsafe(x, y)
|
||||||
|
@ -1487,12 +1496,12 @@ proc fillHits(
|
||||||
if blendMode.hasSimdMasker():
|
if blendMode.hasSimdMasker():
|
||||||
let
|
let
|
||||||
maskerSimd = blendMode.maskerSimd()
|
maskerSimd = blendMode.maskerSimd()
|
||||||
vValue = mm_set1_epi8(cast[int8](255))
|
valueVec = mm_set1_epi8(cast[int8](255))
|
||||||
for _ in countup(fillStart, fillLen - 16, 16):
|
for _ in countup(fillStart, fillLen - 16, 16):
|
||||||
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
mask.data[mask.dataIndex(x, y)].addr,
|
mask.data[mask.dataIndex(x, y)].addr,
|
||||||
maskerSimd(backdrop, vValue)
|
maskerSimd(backdrop, valueVec)
|
||||||
)
|
)
|
||||||
x += 16
|
x += 16
|
||||||
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Binary file not shown.
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Loading…
Reference in a new issue