tmp
This commit is contained in:
parent
e0cb5c2b11
commit
24b36b077e
|
@ -76,6 +76,19 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
|||
a = ((color.a * x + 127) div 255).uint8
|
||||
rgbx(r, g, b, a)
|
||||
|
||||
proc `*`*(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
|
||||
if coverage == 0:
|
||||
discard
|
||||
elif coverage == 255:
|
||||
result = rgbx
|
||||
else:
|
||||
result = rgbx(
|
||||
((rgbx.r.uint32 * coverage + 127) div 255).uint8,
|
||||
((rgbx.g.uint32 * coverage + 127) div 255).uint8,
|
||||
((rgbx.b.uint32 * coverage + 127) div 255).uint8,
|
||||
((rgbx.a.uint32 * coverage + 127) div 255).uint8
|
||||
)
|
||||
|
||||
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
||||
let
|
||||
xMin = rect.x
|
||||
|
|
|
@ -1429,6 +1429,47 @@ proc clearUnsafe(image: Image, startX, startY, toX, toY: int) =
|
|||
len = image.dataIndex(toX, toY) - start
|
||||
fillUnsafe(image.data, rgbx(0, 0, 0, 0), start, len)
|
||||
|
||||
proc blendLineCoverageOverwrite(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.hasSimd.} =
|
||||
for i in 0 ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage != 0:
|
||||
line[i] = rgbx * coverage
|
||||
|
||||
proc blendLineCoverageNormal(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.hasSimd.} =
|
||||
for i in 0 ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 255 and rgbx.a == 255:
|
||||
line[i] = rgbx
|
||||
elif coverage == 0:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||
|
||||
proc blendLineCoverageMask(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.hasSimd.} =
|
||||
for i in 0 ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 0:
|
||||
line[i] = rgbx(0, 0, 0, 0)
|
||||
elif coverage == 255:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendMask(line[i], rgbx * coverage)
|
||||
|
||||
proc fillCoverage(
|
||||
image: Image,
|
||||
rgbx: ColorRGBX,
|
||||
|
@ -1440,149 +1481,31 @@ proc fillCoverage(
|
|||
x = startX
|
||||
dataIndex = image.dataIndex(x, y)
|
||||
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
iterator simd(
|
||||
coverages: seq[uint8], x: var int, startX: int
|
||||
): (M128i, bool, bool) =
|
||||
for _ in 0 ..< coverages.len div 16:
|
||||
let
|
||||
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
||||
eqZero = mm_cmpeq_epi8(coverageVec, mm_setzero_si128())
|
||||
eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(255))
|
||||
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
|
||||
all255 = mm_movemask_epi8(eq255) == 0xffff
|
||||
yield (coverageVec, allZeroes, all255)
|
||||
x += 16
|
||||
|
||||
proc source(colorVec, coverageVec: M128i): M128i {.inline.} =
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
div255 = mm_set1_epi16(0x8081)
|
||||
|
||||
var unpacked = unpackAlphaValues(coverageVec)
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||
|
||||
var
|
||||
sourceEven = mm_slli_epi16(colorVec, 8)
|
||||
sourceOdd = mm_and_si128(colorVec, oddMask)
|
||||
sourceEven = mm_mulhi_epu16(sourceEven, unpacked)
|
||||
sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked)
|
||||
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
|
||||
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
|
||||
result = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
|
||||
|
||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
|
||||
proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
|
||||
if coverage == 0:
|
||||
discard
|
||||
elif coverage == 255:
|
||||
result = rgbx
|
||||
else:
|
||||
result = rgbx(
|
||||
((rgbx.r.uint32 * coverage) div 255).uint8,
|
||||
((rgbx.g.uint32 * coverage) div 255).uint8,
|
||||
((rgbx.b.uint32 * coverage) div 255).uint8,
|
||||
((rgbx.a.uint32 * coverage) div 255).uint8
|
||||
)
|
||||
|
||||
case blendMode:
|
||||
of OverwriteBlend:
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
||||
if allZeroes:
|
||||
dataIndex += 16
|
||||
else:
|
||||
if all255:
|
||||
for i in 0 ..< 4:
|
||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
||||
dataIndex += 4
|
||||
else:
|
||||
var coverageVec = coverageVec
|
||||
for i in 0 ..< 4:
|
||||
let source = source(colorVec, coverageVec)
|
||||
mm_storeu_si128(image.data[dataIndex].addr, source)
|
||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
||||
dataIndex += 4
|
||||
|
||||
for x in x ..< startX + coverages.len:
|
||||
let coverage = coverages[x - startX]
|
||||
if coverage != 0:
|
||||
image.data[dataIndex] = source(rgbx, coverage)
|
||||
inc dataIndex
|
||||
blendLineCoverageOverwrite(
|
||||
image.getUncheckedArray(startX, y),
|
||||
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||
rgbx,
|
||||
coverages.len
|
||||
)
|
||||
|
||||
of NormalBlend:
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
||||
if allZeroes:
|
||||
dataIndex += 16
|
||||
else:
|
||||
if all255 and rgbx.a == 255:
|
||||
for i in 0 ..< 4:
|
||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
||||
dataIndex += 4
|
||||
else:
|
||||
var coverageVec = coverageVec
|
||||
for i in 0 ..< 4:
|
||||
let
|
||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
||||
source = source(colorVec, coverageVec)
|
||||
mm_storeu_si128(
|
||||
image.data[dataIndex].addr,
|
||||
blendNormalSimd(backdrop, source)
|
||||
)
|
||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
||||
dataIndex += 4
|
||||
|
||||
for x in x ..< startX + coverages.len:
|
||||
let coverage = coverages[x - startX]
|
||||
if coverage == 255 and rgbx.a == 255:
|
||||
image.data[dataIndex] = rgbx
|
||||
elif coverage == 0:
|
||||
discard
|
||||
else:
|
||||
let backdrop = image.data[dataIndex]
|
||||
image.data[dataIndex] = blendNormal(backdrop, source(rgbx, coverage))
|
||||
inc dataIndex
|
||||
blendLineCoverageNormal(
|
||||
image.getUncheckedArray(startX, y),
|
||||
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||
rgbx,
|
||||
coverages.len
|
||||
)
|
||||
|
||||
of MaskBlend:
|
||||
{.linearScanEnd.}
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
||||
if not allZeroes:
|
||||
if all255:
|
||||
dataIndex += 16
|
||||
else:
|
||||
var coverageVec = coverageVec
|
||||
for i in 0 ..< 4:
|
||||
let
|
||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
||||
source = source(colorVec, coverageVec)
|
||||
mm_storeu_si128(
|
||||
image.data[dataIndex].addr,
|
||||
blendMaskSimd(backdrop, source)
|
||||
)
|
||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
||||
dataIndex += 4
|
||||
else:
|
||||
for i in 0 ..< 4:
|
||||
mm_storeu_si128(image.data[dataIndex].addr, mm_setzero_si128())
|
||||
dataIndex += 4
|
||||
|
||||
for x in x ..< startX + coverages.len:
|
||||
let coverage = coverages[x - startX]
|
||||
if coverage == 0:
|
||||
image.data[dataIndex] = rgbx(0, 0, 0, 0)
|
||||
elif coverage == 255:
|
||||
discard
|
||||
else:
|
||||
let backdrop = image.data[dataIndex]
|
||||
image.data[dataIndex] = blendMask(backdrop, source(rgbx, coverage))
|
||||
inc dataIndex
|
||||
blendLineCoverageMask(
|
||||
image.getUncheckedArray(startX, y),
|
||||
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||
rgbx,
|
||||
coverages.len
|
||||
)
|
||||
|
||||
image.clearUnsafe(0, y, startX, y)
|
||||
image.clearUnsafe(startX + coverages.len, y, image.width, y)
|
||||
|
@ -1593,7 +1516,7 @@ proc fillCoverage(
|
|||
let coverage = coverages[x - startX]
|
||||
if coverage != 0:
|
||||
let backdrop = image.data[dataIndex]
|
||||
image.data[dataIndex] = blender(backdrop, source(rgbx, coverage))
|
||||
image.data[dataIndex] = blender(backdrop, rgbx * coverage)
|
||||
inc dataIndex
|
||||
|
||||
proc blendLineNormal(
|
||||
|
|
|
@ -6,6 +6,41 @@ when defined(gcc) or defined(clang):
|
|||
when defined(release):
|
||||
{.push checks: off.}
|
||||
|
||||
template blendNormalSimd(backdrop, source: M256i): M256i =
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm256_add_epi8(
|
||||
source,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
template blendMaskSimd(backdrop, source: M256i): M256i =
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
|
||||
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
||||
result = true
|
||||
|
||||
|
@ -400,26 +435,7 @@ proc blendLineNormalAvx2*(
|
|||
)
|
||||
while i < len - 8:
|
||||
let backdrop = mm256_load_si256(line[i].addr)
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
let added = mm256_add_epi8(
|
||||
source,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm256_store_si256(line[i].addr, added)
|
||||
mm256_store_si256(line[i].addr, blendNormalSimd(backdrop, source))
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
|
@ -451,27 +467,7 @@ proc blendLineNormalAvx2*(
|
|||
mm256_storeu_si256(a[i].addr, source)
|
||||
else:
|
||||
let backdrop = mm256_load_si256(a[i].addr)
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
let added = mm256_add_epi8(
|
||||
source,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm256_store_si256(a[i].addr, added)
|
||||
|
||||
mm256_store_si256(a[i].addr, blendNormalSimd(backdrop, source))
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
|
@ -496,22 +492,7 @@ proc blendLineMaskAvx2*(
|
|||
)
|
||||
while i < len - 8:
|
||||
let backdrop = mm256_load_si256(line[i].addr)
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm256_store_si256(
|
||||
line[i].addr,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
mm256_store_si256(line[i].addr, blendMaskSimd(backdrop, source))
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
|
@ -542,23 +523,7 @@ proc blendLineMaskAvx2*(
|
|||
discard
|
||||
else:
|
||||
let backdrop = mm256_load_si256(a[i].addr)
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm256_store_si256(
|
||||
a[i].addr,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm256_store_si256(a[i].addr, blendMaskSimd(backdrop, source))
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
|
|
|
@ -10,17 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
|||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||
|
||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||
|
||||
proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
|
||||
template blendNormalSimd*(backdrop, source: M128i): M128i =
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
|
@ -28,14 +18,10 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
|||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
let k = mm_sub_epi32(
|
||||
mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255])),
|
||||
sourceAlpha
|
||||
)
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, k)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, k)
|
||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
|
@ -44,12 +30,7 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
|||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
|
||||
template blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
|
@ -59,7 +40,6 @@ proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
|||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
|
@ -527,6 +507,67 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
|||
result.width * 4
|
||||
)
|
||||
|
||||
proc applyCoverage*(rgbxVec, coverage: M128i): M128i {.inline.} =
|
||||
|
||||
proc unpackAlphaValues(v: M128i): M128i {.inline.} =
|
||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
div255 = mm_set1_epi16(0x8081)
|
||||
|
||||
var unpacked = unpackAlphaValues(coverage)
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||
|
||||
var
|
||||
rgbxEven = mm_slli_epi16(rgbxVec, 8)
|
||||
rgbxOdd = mm_and_si128(rgbxVec, oddMask)
|
||||
rgbxEven = mm_mulhi_epu16(rgbxEven, unpacked)
|
||||
rgbxOdd = mm_mulhi_epu16(rgbxOdd, unpacked)
|
||||
rgbxEven = mm_srli_epi16(mm_mulhi_epu16(rgbxEven, div255), 7)
|
||||
rgbxOdd = mm_srli_epi16(mm_mulhi_epu16(rgbxOdd, div255), 7)
|
||||
|
||||
mm_or_si128(rgbxEven, mm_slli_epi16(rgbxOdd, 8))
|
||||
|
||||
proc blendLineCoverageOverwriteSse2*(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.simd.} =
|
||||
var i: int
|
||||
while (cast[uint](line[i].addr) and 15) != 0:
|
||||
let coverage = coverages[i]
|
||||
if coverage != 0:
|
||||
line[i] = rgbx * coverage
|
||||
inc i
|
||||
|
||||
let rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||
while i < len - 16:
|
||||
let
|
||||
coverage = mm_loadu_si128(coverages[i].addr)
|
||||
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||
i += 16
|
||||
elif mm_movemask_epi8(eq255) == 0xffff:
|
||||
for _ in 0 ..< 4:
|
||||
mm_store_si128(line[i].addr, rgbxVec)
|
||||
i += 4
|
||||
else:
|
||||
var coverage = coverage
|
||||
for _ in 0 ..< 4:
|
||||
mm_storeu_si128(line[i].addr, rgbxVec.applyCoverage(coverage))
|
||||
coverage = mm_srli_si128(coverage, 4)
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage != 0:
|
||||
line[i] = rgbx * coverage
|
||||
|
||||
proc blendLineNormalSse2*(
|
||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||
) {.simd.} =
|
||||
|
@ -543,26 +584,7 @@ proc blendLineNormalSse2*(
|
|||
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||
while i < len - 4:
|
||||
let backdrop = mm_load_si128(line[i].addr)
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
let added = mm_add_epi8(
|
||||
source,
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm_store_si128(line[i].addr, added)
|
||||
mm_store_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
|
@ -590,32 +612,65 @@ proc blendLineNormalSse2*(
|
|||
mm_storeu_si128(a[i].addr, source)
|
||||
else:
|
||||
let backdrop = mm_load_si128(a[i].addr)
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
let added = mm_add_epi8(
|
||||
source,
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm_store_si128(a[i].addr, added)
|
||||
|
||||
mm_store_si128(a[i].addr, blendNormalSimd(backdrop, source))
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
a[i] = blendNormal(a[i], b[i])
|
||||
|
||||
proc blendLineCoverageNormalSse2*(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.simd.} =
|
||||
var i: int
|
||||
while (cast[uint](line[i].addr) and 15) != 0:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 255 and rgbx.a == 255:
|
||||
line[i] = rgbx
|
||||
elif coverage == 0:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||
inc i
|
||||
|
||||
let
|
||||
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||
while i < len - 16:
|
||||
let
|
||||
coverage = mm_loadu_si128(coverages[i].addr)
|
||||
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||
i += 16
|
||||
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||
for _ in 0 ..< 4:
|
||||
mm_store_si128(line[i].addr, rgbxVec)
|
||||
i += 4
|
||||
else:
|
||||
var coverage = coverage
|
||||
for _ in 0 ..< 4:
|
||||
let
|
||||
backdrop = mm_loadu_si128(line[i].addr)
|
||||
source = rgbxVec.applyCoverage(coverage)
|
||||
mm_storeu_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||
coverage = mm_srli_si128(coverage, 4)
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 255 and rgbx.a == 255:
|
||||
line[i] = rgbx
|
||||
elif coverage == 0:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||
|
||||
proc blendLineMaskSse2*(
|
||||
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||
) {.simd.} =
|
||||
|
@ -631,22 +686,7 @@ proc blendLineMaskSse2*(
|
|||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
while i < len - 4:
|
||||
let backdrop = mm_load_si128(line[i].addr)
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm_store_si128(
|
||||
line[i].addr,
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
mm_store_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
|
@ -673,27 +713,63 @@ proc blendLineMaskSse2*(
|
|||
discard
|
||||
else:
|
||||
let backdrop = mm_load_si128(a[i].addr)
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm_store_si128(
|
||||
a[i].addr,
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm_store_si128(a[i].addr, blendMaskSimd(backdrop, source))
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
a[i] = blendMask(a[i], b[i])
|
||||
|
||||
proc blendLineCoverageMaskSse2*(
|
||||
line: ptr UncheckedArray[ColorRGBX],
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
rgbx: ColorRGBX,
|
||||
len: int
|
||||
) {.simd.} =
|
||||
var i: int
|
||||
while (cast[uint](line[i].addr) and 15) != 0:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 0:
|
||||
line[i] = rgbx(0, 0, 0, 0)
|
||||
elif coverage == 255:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendMask(line[i], rgbx * coverage)
|
||||
inc i
|
||||
|
||||
let
|
||||
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
while i < len - 16:
|
||||
let
|
||||
coverage = mm_loadu_si128(coverages[i].addr)
|
||||
eqZero = mm_cmpeq_epi8(coverage, mm_setzero_si128())
|
||||
eq255 = mm_cmpeq_epi8(coverage, mm_set1_epi8(255))
|
||||
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||
for _ in 0 ..< 4:
|
||||
mm_store_si128(line[i].addr, mm_setzero_si128())
|
||||
i += 4
|
||||
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||
i += 16
|
||||
else:
|
||||
var coverage = coverage
|
||||
for _ in 0 ..< 4:
|
||||
let
|
||||
backdrop = mm_loadu_si128(line[i].addr)
|
||||
source = rgbxVec.applyCoverage(coverage)
|
||||
mm_storeu_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||
coverage = mm_srli_si128(coverage, 4)
|
||||
i += 4
|
||||
|
||||
for i in i ..< len:
|
||||
let coverage = coverages[i]
|
||||
if coverage == 0:
|
||||
line[i] = rgbx(0, 0, 0, 0)
|
||||
elif coverage == 255:
|
||||
discard
|
||||
else:
|
||||
line[i] = blendMask(line[i], rgbx * coverage)
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
Loading…
Reference in a new issue