tmp
This commit is contained in:
parent
e0cb5c2b11
commit
24b36b077e
4 changed files with 287 additions and 310 deletions
|
@ -76,6 +76,19 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||||
a = ((color.a * x + 127) div 255).uint8
|
a = ((color.a * x + 127) div 255).uint8
|
||||||
rgbx(r, g, b, a)
|
rgbx(r, g, b, a)
|
||||||
|
|
||||||
|
proc `*`*(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
elif coverage == 255:
|
||||||
|
result = rgbx
|
||||||
|
else:
|
||||||
|
result = rgbx(
|
||||||
|
((rgbx.r.uint32 * coverage + 127) div 255).uint8,
|
||||||
|
((rgbx.g.uint32 * coverage + 127) div 255).uint8,
|
||||||
|
((rgbx.b.uint32 * coverage + 127) div 255).uint8,
|
||||||
|
((rgbx.a.uint32 * coverage + 127) div 255).uint8
|
||||||
|
)
|
||||||
|
|
||||||
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
||||||
let
|
let
|
||||||
xMin = rect.x
|
xMin = rect.x
|
||||||
|
|
|
@ -1429,6 +1429,47 @@ proc clearUnsafe(image: Image, startX, startY, toX, toY: int) =
|
||||||
len = image.dataIndex(toX, toY) - start
|
len = image.dataIndex(toX, toY) - start
|
||||||
fillUnsafe(image.data, rgbx(0, 0, 0, 0), start, len)
|
fillUnsafe(image.data, rgbx(0, 0, 0, 0), start, len)
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwrite(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
|
proc blendLineCoverageNormal(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 255 and rgbx.a == 255:
|
||||||
|
line[i] = rgbx
|
||||||
|
elif coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
|
proc blendLineCoverageMask(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
proc fillCoverage(
|
proc fillCoverage(
|
||||||
image: Image,
|
image: Image,
|
||||||
rgbx: ColorRGBX,
|
rgbx: ColorRGBX,
|
||||||
|
@ -1440,149 +1481,31 @@ proc fillCoverage(
|
||||||
x = startX
|
x = startX
|
||||||
dataIndex = image.dataIndex(x, y)
|
dataIndex = image.dataIndex(x, y)
|
||||||
|
|
||||||
when allowSimd:
|
|
||||||
when defined(amd64):
|
|
||||||
iterator simd(
|
|
||||||
coverages: seq[uint8], x: var int, startX: int
|
|
||||||
): (M128i, bool, bool) =
|
|
||||||
for _ in 0 ..< coverages.len div 16:
|
|
||||||
let
|
|
||||||
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
|
||||||
eqZero = mm_cmpeq_epi8(coverageVec, mm_setzero_si128())
|
|
||||||
eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(255))
|
|
||||||
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
|
|
||||||
all255 = mm_movemask_epi8(eq255) == 0xffff
|
|
||||||
yield (coverageVec, allZeroes, all255)
|
|
||||||
x += 16
|
|
||||||
|
|
||||||
proc source(colorVec, coverageVec: M128i): M128i {.inline.} =
|
|
||||||
let
|
|
||||||
oddMask = mm_set1_epi16(0xff00)
|
|
||||||
div255 = mm_set1_epi16(0x8081)
|
|
||||||
|
|
||||||
var unpacked = unpackAlphaValues(coverageVec)
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
|
||||||
|
|
||||||
var
|
|
||||||
sourceEven = mm_slli_epi16(colorVec, 8)
|
|
||||||
sourceOdd = mm_and_si128(colorVec, oddMask)
|
|
||||||
sourceEven = mm_mulhi_epu16(sourceEven, unpacked)
|
|
||||||
sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked)
|
|
||||||
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
|
|
||||||
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
|
|
||||||
result = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
|
|
||||||
|
|
||||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
|
||||||
|
|
||||||
proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
|
|
||||||
if coverage == 0:
|
|
||||||
discard
|
|
||||||
elif coverage == 255:
|
|
||||||
result = rgbx
|
|
||||||
else:
|
|
||||||
result = rgbx(
|
|
||||||
((rgbx.r.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.g.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.b.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.a.uint32 * coverage) div 255).uint8
|
|
||||||
)
|
|
||||||
|
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of OverwriteBlend:
|
of OverwriteBlend:
|
||||||
when allowSimd:
|
blendLineCoverageOverwrite(
|
||||||
when defined(amd64):
|
image.getUncheckedArray(startX, y),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
if allZeroes:
|
rgbx,
|
||||||
dataIndex += 16
|
coverages.len
|
||||||
else:
|
)
|
||||||
if all255:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, source)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage != 0:
|
|
||||||
image.data[dataIndex] = source(rgbx, coverage)
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of NormalBlend:
|
of NormalBlend:
|
||||||
when allowSimd:
|
blendLineCoverageNormal(
|
||||||
when defined(amd64):
|
image.getUncheckedArray(startX, y),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
if allZeroes:
|
rgbx,
|
||||||
dataIndex += 16
|
coverages.len
|
||||||
else:
|
)
|
||||||
if all255 and rgbx.a == 255:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let
|
|
||||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
|
||||||
source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(
|
|
||||||
image.data[dataIndex].addr,
|
|
||||||
blendNormalSimd(backdrop, source)
|
|
||||||
)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage == 255 and rgbx.a == 255:
|
|
||||||
image.data[dataIndex] = rgbx
|
|
||||||
elif coverage == 0:
|
|
||||||
discard
|
|
||||||
else:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendNormal(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of MaskBlend:
|
of MaskBlend:
|
||||||
{.linearScanEnd.}
|
{.linearScanEnd.}
|
||||||
when allowSimd:
|
blendLineCoverageMask(
|
||||||
when defined(amd64):
|
image.getUncheckedArray(startX, y),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
if not allZeroes:
|
rgbx,
|
||||||
if all255:
|
coverages.len
|
||||||
dataIndex += 16
|
)
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let
|
|
||||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
|
||||||
source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(
|
|
||||||
image.data[dataIndex].addr,
|
|
||||||
blendMaskSimd(backdrop, source)
|
|
||||||
)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, mm_setzero_si128())
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage == 0:
|
|
||||||
image.data[dataIndex] = rgbx(0, 0, 0, 0)
|
|
||||||
elif coverage == 255:
|
|
||||||
discard
|
|
||||||
else:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendMask(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
image.clearUnsafe(0, y, startX, y)
|
image.clearUnsafe(0, y, startX, y)
|
||||||
image.clearUnsafe(startX + coverages.len, y, image.width, y)
|
image.clearUnsafe(startX + coverages.len, y, image.width, y)
|
||||||
|
@ -1593,7 +1516,7 @@ proc fillCoverage(
|
||||||
let coverage = coverages[x - startX]
|
let coverage = coverages[x - startX]
|
||||||
if coverage != 0:
|
if coverage != 0:
|
||||||
let backdrop = image.data[dataIndex]
|
let backdrop = image.data[dataIndex]
|
||||||
image.data[dataIndex] = blender(backdrop, source(rgbx, coverage))
|
image.data[dataIndex] = blender(backdrop, rgbx * coverage)
|
||||||
inc dataIndex
|
inc dataIndex
|
||||||
|
|
||||||
proc blendLineNormal(
|
proc blendLineNormal(
|
||||||
|
|
|
@ -6,6 +6,41 @@ when defined(gcc) or defined(clang):
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
|
template blendNormalSimd(backdrop, source: M256i): M256i =
|
||||||
|
var
|
||||||
|
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||||
|
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||||
|
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||||
|
|
||||||
|
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||||
|
|
||||||
|
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||||
|
|
||||||
|
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||||
|
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||||
|
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
mm256_add_epi8(
|
||||||
|
source,
|
||||||
|
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||||
|
)
|
||||||
|
|
||||||
|
template blendMaskSimd(backdrop, source: M256i): M256i =
|
||||||
|
var
|
||||||
|
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||||
|
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||||
|
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||||
|
|
||||||
|
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||||
|
|
||||||
|
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||||
|
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||||
|
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
|
@ -400,26 +435,7 @@ proc blendLineNormalAvx2*(
|
||||||
)
|
)
|
||||||
while i < len - 8:
|
while i < len - 8:
|
||||||
let backdrop = mm256_load_si256(line[i].addr)
|
let backdrop = mm256_load_si256(line[i].addr)
|
||||||
var
|
mm256_store_si256(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm256_add_epi8(
|
|
||||||
source,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm256_store_si256(line[i].addr, added)
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
@ -451,27 +467,7 @@ proc blendLineNormalAvx2*(
|
||||||
mm256_storeu_si256(a[i].addr, source)
|
mm256_storeu_si256(a[i].addr, source)
|
||||||
else:
|
else:
|
||||||
let backdrop = mm256_load_si256(a[i].addr)
|
let backdrop = mm256_load_si256(a[i].addr)
|
||||||
var
|
mm256_store_si256(a[i].addr, blendNormalSimd(backdrop, source))
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm256_add_epi8(
|
|
||||||
source,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm256_store_si256(a[i].addr, added)
|
|
||||||
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
@ -496,22 +492,7 @@ proc blendLineMaskAvx2*(
|
||||||
)
|
)
|
||||||
while i < len - 8:
|
while i < len - 8:
|
||||||
let backdrop = mm256_load_si256(line[i].addr)
|
let backdrop = mm256_load_si256(line[i].addr)
|
||||||
var
|
mm256_store_si256(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm256_store_si256(
|
|
||||||
line[i].addr,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
@ -542,23 +523,7 @@ proc blendLineMaskAvx2*(
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
let backdrop = mm256_load_si256(a[i].addr)
|
let backdrop = mm256_load_si256(a[i].addr)
|
||||||
var
|
mm256_store_si256(a[i].addr, blendMaskSimd(backdrop, source))
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm256_store_si256(
|
|
||||||
a[i].addr,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
|
|
@ -10,17 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||||
|
|
||||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
template blendNormalSimd*(backdrop, source: M128i): M128i =
|
||||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
|
||||||
|
|
||||||
proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
|
||||||
let
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
|
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -28,14 +18,10 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||||
|
|
||||||
let k = mm_sub_epi32(
|
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
||||||
mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255])),
|
|
||||||
sourceAlpha
|
|
||||||
)
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, k)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, k)
|
|
||||||
|
|
||||||
|
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
||||||
|
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
@ -44,12 +30,7 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
template blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||||
let
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
|
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -59,7 +40,6 @@ proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||||
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
@ -527,6 +507,67 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc applyCoverage*(rgbxVec, coverage: M128i): M128i {.inline.} =
|
||||||
|
|
||||||
|
proc unpackAlphaValues(v: M128i): M128i {.inline.} =
|
||||||
|
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||||
|
|
||||||
|
let
|
||||||
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
|
div255 = mm_set1_epi16(0x8081)
|
||||||
|
|
||||||
|
var unpacked = unpackAlphaValues(coverage)
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||||
|
|
||||||
|
var
|
||||||
|
rgbxEven = mm_slli_epi16(rgbxVec, 8)
|
||||||
|
rgbxOdd = mm_and_si128(rgbxVec, oddMask)
|
||||||
|
rgbxEven = mm_mulhi_epu16(rgbxEven, unpacked)
|
||||||
|
rgbxOdd = mm_mulhi_epu16(rgbxOdd, unpacked)
|
||||||
|
rgbxEven = mm_srli_epi16(mm_mulhi_epu16(rgbxEven, div255), 7)
|
||||||
|
rgbxOdd = mm_srli_epi16(mm_mulhi_epu16(rgbxOdd, div255), 7)
|
||||||
|
|
||||||
|
mm_or_si128(rgbxEven, mm_slli_epi16(rgbxOdd, 8))
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwriteSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
i += 16
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, rgbxVec)
|
||||||
|
i += 4
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_storeu_si128(line[i].addr, rgbxVec.applyCoverage(coverage))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
proc blendLineNormalSse2*(
|
proc blendLineNormalSse2*(
|
||||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
|
@ -543,26 +584,7 @@ proc blendLineNormalSse2*(
|
||||||
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
while i < len - 4:
|
while i < len - 4:
|
||||||
let backdrop = mm_load_si128(line[i].addr)
|
let backdrop = mm_load_si128(line[i].addr)
|
||||||
var
|
mm_store_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm_add_epi8(
|
|
||||||
source,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm_store_si128(line[i].addr, added)
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
@ -590,32 +612,65 @@ proc blendLineNormalSse2*(
|
||||||
mm_storeu_si128(a[i].addr, source)
|
mm_storeu_si128(a[i].addr, source)
|
||||||
else:
|
else:
|
||||||
let backdrop = mm_load_si128(a[i].addr)
|
let backdrop = mm_load_si128(a[i].addr)
|
||||||
var
|
mm_store_si128(a[i].addr, blendNormalSimd(backdrop, source))
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm_add_epi8(
|
|
||||||
source,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm_store_si128(a[i].addr, added)
|
|
||||||
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageNormalSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 255 and rgbx.a == 255:
|
||||||
|
line[i] = rgbx
|
||||||
|
elif coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
i += 16
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, rgbxVec)
|
||||||
|
i += 4
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm_loadu_si128(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverage)
|
||||||
|
mm_storeu_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 255 and rgbx.a == 255:
|
||||||
|
line[i] = rgbx
|
||||||
|
elif coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
proc blendLineMaskSse2*(
|
proc blendLineMaskSse2*(
|
||||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
|
@ -631,22 +686,7 @@ proc blendLineMaskSse2*(
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
while i < len - 4:
|
while i < len - 4:
|
||||||
let backdrop = mm_load_si128(line[i].addr)
|
let backdrop = mm_load_si128(line[i].addr)
|
||||||
var
|
mm_store_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm_store_si128(
|
|
||||||
line[i].addr,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
@ -673,27 +713,63 @@ proc blendLineMaskSse2*(
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
let backdrop = mm_load_si128(a[i].addr)
|
let backdrop = mm_load_si128(a[i].addr)
|
||||||
var
|
mm_store_si128(a[i].addr, blendMaskSimd(backdrop, source))
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm_store_si128(
|
|
||||||
a[i].addr,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageMaskSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, mm_setzero_si128())
|
||||||
|
i += 4
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||||
|
i += 16
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm_loadu_si128(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverage)
|
||||||
|
mm_storeu_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
Loading…
Reference in a new issue