commit
f41d53b66c
10 changed files with 448 additions and 371 deletions
|
@ -76,6 +76,19 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||||
a = ((color.a * x + 127) div 255).uint8
|
a = ((color.a * x + 127) div 255).uint8
|
||||||
rgbx(r, g, b, a)
|
rgbx(r, g, b, a)
|
||||||
|
|
||||||
|
proc `*`*(rgbx: ColorRGBX, opacity: uint8): ColorRGBX {.inline.} =
|
||||||
|
if opacity == 0:
|
||||||
|
discard
|
||||||
|
elif opacity == 255:
|
||||||
|
result = rgbx
|
||||||
|
else:
|
||||||
|
result = rgbx(
|
||||||
|
((rgbx.r.uint32 * opacity + 127) div 255).uint8,
|
||||||
|
((rgbx.g.uint32 * opacity + 127) div 255).uint8,
|
||||||
|
((rgbx.b.uint32 * opacity + 127) div 255).uint8,
|
||||||
|
((rgbx.a.uint32 * opacity + 127) div 255).uint8
|
||||||
|
)
|
||||||
|
|
||||||
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
proc snapToPixels*(rect: Rect): Rect {.raises: [].} =
|
||||||
let
|
let
|
||||||
xMin = rect.x
|
xMin = rect.x
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import blends, bumpy, chroma, common, internal, simd, vmath
|
import blends, bumpy, chroma, common, internal, simd, vmath
|
||||||
|
|
||||||
export Image, newImage, copy, dataIndex
|
export Image, copy, dataIndex, newImage
|
||||||
|
|
||||||
const h = 0.5.float32
|
const h = 0.5.float32
|
||||||
|
|
||||||
|
@ -436,27 +436,26 @@ proc drawCorrect(
|
||||||
blended = blender(backdrop, sample)
|
blended = blender(backdrop, sample)
|
||||||
a.unsafe[x, y] = blended
|
a.unsafe[x, y] = blended
|
||||||
|
|
||||||
template getUncheckedArray(
|
proc blendLine(
|
||||||
image: Image, x, y: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int, blender: Blender
|
||||||
): ptr UncheckedArray[ColorRGBX] =
|
) {.inline.} =
|
||||||
cast[ptr UncheckedArray[ColorRGBX]](image.data[image.dataIndex(x, y)].addr)
|
|
||||||
|
|
||||||
proc blitLine(a, b: ptr UncheckedArray[ColorRGBX], len: int, blender: Blender) {.inline.} =
|
|
||||||
for i in 0 ..< len:
|
for i in 0 ..< len:
|
||||||
a[i] = blender(a[i], b[i])
|
a[i] = blender(a[i], b[i])
|
||||||
|
|
||||||
proc blitLineOverwrite(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.inline.} =
|
proc blendLineOverwrite(
|
||||||
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
|
) {.inline.} =
|
||||||
copyMem(a[0].addr, b[0].addr, len * 4)
|
copyMem(a[0].addr, b[0].addr, len * 4)
|
||||||
|
|
||||||
proc blitLineNormal(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} =
|
proc blendLineNormal(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} =
|
||||||
for i in 0 ..< len:
|
for i in 0 ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
proc blitLineMask(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} =
|
proc blendLineMask(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} =
|
||||||
for i in 0 ..< len:
|
for i in 0 ..< len:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
proc blitRect(a, b: Image, pos: Ivec2, blendMode: BlendMode) =
|
proc blendRect(a, b: Image, pos: Ivec2, blendMode: BlendMode) =
|
||||||
let
|
let
|
||||||
px = pos.x.int
|
px = pos.x.int
|
||||||
py = pos.y.int
|
py = pos.y.int
|
||||||
|
@ -475,14 +474,14 @@ proc blitRect(a, b: Image, pos: Ivec2, blendMode: BlendMode) =
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of NormalBlend:
|
of NormalBlend:
|
||||||
for y in yStart ..< yEnd:
|
for y in yStart ..< yEnd:
|
||||||
blitLineNormal(
|
blendLineNormal(
|
||||||
a.getUncheckedArray(xStart + px, y + py),
|
a.getUncheckedArray(xStart + px, y + py),
|
||||||
b.getUncheckedArray(xStart, y),
|
b.getUncheckedArray(xStart, y),
|
||||||
xEnd - xStart
|
xEnd - xStart
|
||||||
)
|
)
|
||||||
of OverwriteBlend:
|
of OverwriteBlend:
|
||||||
for y in yStart ..< yEnd:
|
for y in yStart ..< yEnd:
|
||||||
blitLineOverwrite(
|
blendLineOverwrite(
|
||||||
a.getUncheckedArray(xStart + px, y + py),
|
a.getUncheckedArray(xStart + px, y + py),
|
||||||
b.getUncheckedArray(xStart, y),
|
b.getUncheckedArray(xStart, y),
|
||||||
xEnd - xStart
|
xEnd - xStart
|
||||||
|
@ -494,7 +493,7 @@ proc blitRect(a, b: Image, pos: Ivec2, blendMode: BlendMode) =
|
||||||
for y in yStart ..< yEnd:
|
for y in yStart ..< yEnd:
|
||||||
if xStart + px > 0:
|
if xStart + px > 0:
|
||||||
zeroMem(a.data[a.dataIndex(0, y + py)].addr, (xStart + px) * 4)
|
zeroMem(a.data[a.dataIndex(0, y + py)].addr, (xStart + px) * 4)
|
||||||
blitLineMask(
|
blendLineMask(
|
||||||
a.getUncheckedArray(xStart + px, y + py),
|
a.getUncheckedArray(xStart + px, y + py),
|
||||||
b.getUncheckedArray(xStart, y),
|
b.getUncheckedArray(xStart, y),
|
||||||
xEnd - xStart
|
xEnd - xStart
|
||||||
|
@ -512,7 +511,7 @@ proc blitRect(a, b: Image, pos: Ivec2, blendMode: BlendMode) =
|
||||||
else:
|
else:
|
||||||
let blender = blendMode.blender()
|
let blender = blendMode.blender()
|
||||||
for y in yStart ..< yEnd:
|
for y in yStart ..< yEnd:
|
||||||
blitLine(
|
blendLine(
|
||||||
a.getUncheckedArray(xStart + px, y + py),
|
a.getUncheckedArray(xStart + px, y + py),
|
||||||
b.getUncheckedArray(xStart, y),
|
b.getUncheckedArray(xStart, y),
|
||||||
xEnd - xStart,
|
xEnd - xStart,
|
||||||
|
@ -560,7 +559,7 @@ proc draw*(
|
||||||
if hasRotationOrScaling or smooth:
|
if hasRotationOrScaling or smooth:
|
||||||
a.drawCorrect(b, inverseTransform.inverse(), blendMode, false)
|
a.drawCorrect(b, inverseTransform.inverse(), blendMode, false)
|
||||||
else:
|
else:
|
||||||
a.blitRect(b, ivec2(transform[2, 0].int32, transform[2, 1].int32), blendMode)
|
a.blendRect(b, ivec2(transform[2, 0].int32, transform[2, 1].int32), blendMode)
|
||||||
|
|
||||||
proc drawTiled*(
|
proc drawTiled*(
|
||||||
dst, src: Image, mat: Mat3, blendMode = NormalBlend
|
dst, src: Image, mat: Mat3, blendMode = NormalBlend
|
||||||
|
|
|
@ -47,6 +47,11 @@ proc intersectsInside*(a, b: Segment, at: var Vec2): bool {.inline.} =
|
||||||
at = a.at + (t * s1)
|
at = a.at + (t * s1)
|
||||||
return true
|
return true
|
||||||
|
|
||||||
|
template getUncheckedArray*(
|
||||||
|
image: Image, x, y: int
|
||||||
|
): ptr UncheckedArray[ColorRGBX] =
|
||||||
|
cast[ptr UncheckedArray[ColorRGBX]](image.data[image.dataIndex(x, y)].addr)
|
||||||
|
|
||||||
proc fillUnsafe*(
|
proc fillUnsafe*(
|
||||||
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
||||||
) {.hasSimd, raises: [].} =
|
) {.hasSimd, raises: [].} =
|
||||||
|
|
|
@ -1429,6 +1429,43 @@ proc clearUnsafe(image: Image, startX, startY, toX, toY: int) =
|
||||||
len = image.dataIndex(toX, toY) - start
|
len = image.dataIndex(toX, toY) - start
|
||||||
fillUnsafe(image.data, rgbx(0, 0, 0, 0), start, len)
|
fillUnsafe(image.data, rgbx(0, 0, 0, 0), start, len)
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwrite(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
|
proc blendLineCoverageNormal(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
|
proc blendLineCoverageMask(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
proc fillCoverage(
|
proc fillCoverage(
|
||||||
image: Image,
|
image: Image,
|
||||||
rgbx: ColorRGBX,
|
rgbx: ColorRGBX,
|
||||||
|
@ -1440,181 +1477,56 @@ proc fillCoverage(
|
||||||
x = startX
|
x = startX
|
||||||
dataIndex = image.dataIndex(x, y)
|
dataIndex = image.dataIndex(x, y)
|
||||||
|
|
||||||
when allowSimd:
|
|
||||||
when defined(amd64):
|
|
||||||
iterator simd(
|
|
||||||
coverages: seq[uint8], x: var int, startX: int
|
|
||||||
): (M128i, bool, bool) =
|
|
||||||
for _ in 0 ..< coverages.len div 16:
|
|
||||||
let
|
|
||||||
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
|
|
||||||
eqZero = mm_cmpeq_epi8(coverageVec, mm_setzero_si128())
|
|
||||||
eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(255))
|
|
||||||
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
|
|
||||||
all255 = mm_movemask_epi8(eq255) == 0xffff
|
|
||||||
yield (coverageVec, allZeroes, all255)
|
|
||||||
x += 16
|
|
||||||
|
|
||||||
proc source(colorVec, coverageVec: M128i): M128i {.inline.} =
|
|
||||||
let
|
|
||||||
oddMask = mm_set1_epi16(0xff00)
|
|
||||||
div255 = mm_set1_epi16(0x8081)
|
|
||||||
|
|
||||||
var unpacked = unpackAlphaValues(coverageVec)
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
|
||||||
|
|
||||||
var
|
|
||||||
sourceEven = mm_slli_epi16(colorVec, 8)
|
|
||||||
sourceOdd = mm_and_si128(colorVec, oddMask)
|
|
||||||
sourceEven = mm_mulhi_epu16(sourceEven, unpacked)
|
|
||||||
sourceOdd = mm_mulhi_epu16(sourceOdd, unpacked)
|
|
||||||
sourceEven = mm_srli_epi16(mm_mulhi_epu16(sourceEven, div255), 7)
|
|
||||||
sourceOdd = mm_srli_epi16(mm_mulhi_epu16(sourceOdd, div255), 7)
|
|
||||||
result = mm_or_si128(sourceEven, mm_slli_epi16(sourceOdd, 8))
|
|
||||||
|
|
||||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
|
||||||
|
|
||||||
proc source(rgbx: ColorRGBX, coverage: uint8): ColorRGBX {.inline.} =
|
|
||||||
if coverage == 0:
|
|
||||||
discard
|
|
||||||
elif coverage == 255:
|
|
||||||
result = rgbx
|
|
||||||
else:
|
|
||||||
result = rgbx(
|
|
||||||
((rgbx.r.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.g.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.b.uint32 * coverage) div 255).uint8,
|
|
||||||
((rgbx.a.uint32 * coverage) div 255).uint8
|
|
||||||
)
|
|
||||||
|
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of OverwriteBlend:
|
of OverwriteBlend:
|
||||||
when allowSimd:
|
blendLineCoverageOverwrite(
|
||||||
when defined(amd64):
|
image.getUncheckedArray(startX, y),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
if allZeroes:
|
rgbx,
|
||||||
dataIndex += 16
|
coverages.len
|
||||||
else:
|
)
|
||||||
if all255:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, source)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage != 0:
|
|
||||||
image.data[dataIndex] = source(rgbx, coverage)
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of NormalBlend:
|
of NormalBlend:
|
||||||
when allowSimd:
|
blendLineCoverageNormal(
|
||||||
when defined(amd64):
|
image.getUncheckedArray(startX, y),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
if allZeroes:
|
rgbx,
|
||||||
dataIndex += 16
|
coverages.len
|
||||||
else:
|
)
|
||||||
if all255 and rgbx.a == 255:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, colorVec)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let
|
|
||||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
|
||||||
source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(
|
|
||||||
image.data[dataIndex].addr,
|
|
||||||
blendNormalSimd(backdrop, source)
|
|
||||||
)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage == 255 and rgbx.a == 255:
|
|
||||||
image.data[dataIndex] = rgbx
|
|
||||||
elif coverage == 0:
|
|
||||||
discard
|
|
||||||
else:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendNormal(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of MaskBlend:
|
of MaskBlend:
|
||||||
{.linearScanEnd.}
|
{.linearScanEnd.}
|
||||||
|
blendLineCoverageMask(
|
||||||
when allowSimd:
|
image.getUncheckedArray(startX, y),
|
||||||
when defined(amd64):
|
cast[ptr UncheckedArray[uint8]](coverages[0].unsafeAddr),
|
||||||
for (coverageVec, allZeroes, all255) in simd(coverages, x, startX):
|
rgbx,
|
||||||
if not allZeroes:
|
coverages.len
|
||||||
if all255:
|
)
|
||||||
dataIndex += 16
|
|
||||||
else:
|
|
||||||
var coverageVec = coverageVec
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
let
|
|
||||||
backdrop = mm_loadu_si128(image.data[dataIndex].addr)
|
|
||||||
source = source(colorVec, coverageVec)
|
|
||||||
mm_storeu_si128(
|
|
||||||
image.data[dataIndex].addr,
|
|
||||||
blendMaskSimd(backdrop, source)
|
|
||||||
)
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
|
||||||
dataIndex += 4
|
|
||||||
else:
|
|
||||||
for i in 0 ..< 4:
|
|
||||||
mm_storeu_si128(image.data[dataIndex].addr, mm_setzero_si128())
|
|
||||||
dataIndex += 4
|
|
||||||
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage == 0:
|
|
||||||
image.data[dataIndex] = rgbx(0, 0, 0, 0)
|
|
||||||
elif coverage == 255:
|
|
||||||
discard
|
|
||||||
else:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendMask(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
image.clearUnsafe(0, y, startX, y)
|
image.clearUnsafe(0, y, startX, y)
|
||||||
image.clearUnsafe(startX + coverages.len, y, image.width, y)
|
image.clearUnsafe(startX + coverages.len, y, image.width, y)
|
||||||
|
|
||||||
of SubtractMaskBlend:
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let coverage = coverages[x - startX]
|
|
||||||
if coverage == 255 and rgbx.a == 255:
|
|
||||||
image.data[dataIndex] = rgbx(0, 0, 0, 0)
|
|
||||||
elif coverage != 0:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendSubtractMask(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of ExcludeMaskBlend:
|
|
||||||
for x in x ..< startX + coverages.len:
|
|
||||||
let
|
|
||||||
coverage = coverages[x - startX]
|
|
||||||
backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendExcludeMask(backdrop, source(rgbx, coverage))
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
let blender = blendMode.blender()
|
let blender = blendMode.blender()
|
||||||
for x in x ..< startX + coverages.len:
|
for x in x ..< startX + coverages.len:
|
||||||
let coverage = coverages[x - startX]
|
let coverage = coverages[x - startX]
|
||||||
if coverage != 0:
|
if coverage != 0:
|
||||||
let backdrop = image.data[dataIndex]
|
let backdrop = image.data[dataIndex]
|
||||||
image.data[dataIndex] = blender(backdrop, source(rgbx, coverage))
|
image.data[dataIndex] = blender(backdrop, rgbx * coverage)
|
||||||
inc dataIndex
|
inc dataIndex
|
||||||
|
|
||||||
|
proc blendLineNormal(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
|
||||||
|
proc blendLineMask(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.hasSimd.} =
|
||||||
|
for i in 0 ..< len:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
|
||||||
proc fillHits(
|
proc fillHits(
|
||||||
image: Image,
|
image: Image,
|
||||||
rgbx: ColorRGBX,
|
rgbx: ColorRGBX,
|
||||||
|
@ -1625,19 +1537,6 @@ proc fillHits(
|
||||||
blendMode: BlendMode,
|
blendMode: BlendMode,
|
||||||
maskClears = true
|
maskClears = true
|
||||||
) =
|
) =
|
||||||
template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) =
|
|
||||||
when allowSimd:
|
|
||||||
when defined(amd64):
|
|
||||||
var p = cast[uint](image.data[image.dataIndex(x, y)].addr)
|
|
||||||
let
|
|
||||||
iterations = len div 4
|
|
||||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let backdrop = mm_loadu_si128(cast[pointer](p))
|
|
||||||
mm_storeu_si128(cast[pointer](p), blendProc(backdrop, colorVec))
|
|
||||||
p += 16
|
|
||||||
x += iterations * 4
|
|
||||||
|
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of OverwriteBlend:
|
of OverwriteBlend:
|
||||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||||
|
@ -1648,17 +1547,10 @@ proc fillHits(
|
||||||
if rgbx.a == 255:
|
if rgbx.a == 255:
|
||||||
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
|
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
|
||||||
else:
|
else:
|
||||||
var x = start
|
blendLineNormal(image.getUncheckedArray(start, y), rgbx, len)
|
||||||
simdBlob(image, x, len, blendNormalSimd)
|
|
||||||
var dataIndex = image.dataIndex(x, y)
|
|
||||||
for _ in x ..< start + len:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendNormal(backdrop, rgbx)
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of MaskBlend:
|
of MaskBlend:
|
||||||
{.linearScanEnd.}
|
{.linearScanEnd.}
|
||||||
|
|
||||||
var filledTo = startX
|
var filledTo = startX
|
||||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||||
if maskClears: # Clear any gap between this fill and the previous fill
|
if maskClears: # Clear any gap between this fill and the previous fill
|
||||||
|
@ -1672,37 +1564,13 @@ proc fillHits(
|
||||||
)
|
)
|
||||||
block: # Handle this fill
|
block: # Handle this fill
|
||||||
if rgbx.a != 255:
|
if rgbx.a != 255:
|
||||||
var x = start
|
blendLineMask(image.getUncheckedArray(start, y), rgbx, len)
|
||||||
simdBlob(image, x, len, blendMaskSimd)
|
|
||||||
var dataIndex = image.dataIndex(x, y)
|
|
||||||
for _ in x ..< start + len:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendMask(backdrop, rgbx)
|
|
||||||
filledTo = start + len
|
filledTo = start + len
|
||||||
|
|
||||||
if maskClears:
|
if maskClears:
|
||||||
image.clearUnsafe(0, y, startX, y)
|
image.clearUnsafe(0, y, startX, y)
|
||||||
image.clearUnsafe(filledTo, y, image.width, y)
|
image.clearUnsafe(filledTo, y, image.width, y)
|
||||||
|
|
||||||
of SubtractMaskBlend:
|
|
||||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
|
||||||
var dataIndex = image.dataIndex(start, y)
|
|
||||||
for _ in 0 ..< len:
|
|
||||||
if rgbx.a == 255:
|
|
||||||
image.data[dataIndex] = rgbx(0, 0, 0, 0)
|
|
||||||
else:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendSubtractMask(backdrop, rgbx)
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
of ExcludeMaskBlend:
|
|
||||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
|
||||||
var dataIndex = image.dataIndex(start, y)
|
|
||||||
for _ in 0 ..< len:
|
|
||||||
let backdrop = image.data[dataIndex]
|
|
||||||
image.data[dataIndex] = blendExcludeMask(backdrop, rgbx)
|
|
||||||
inc dataIndex
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
let blender = blendMode.blender()
|
let blender = blendMode.blender()
|
||||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||||
|
|
|
@ -6,6 +6,41 @@ when defined(gcc) or defined(clang):
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
|
template blendNormalSimd(backdrop, source: M256i): M256i =
|
||||||
|
var
|
||||||
|
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||||
|
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||||
|
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||||
|
|
||||||
|
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||||
|
|
||||||
|
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||||
|
|
||||||
|
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||||
|
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||||
|
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
mm256_add_epi8(
|
||||||
|
source,
|
||||||
|
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||||
|
)
|
||||||
|
|
||||||
|
template blendMaskSimd(backdrop, source: M256i): M256i =
|
||||||
|
var
|
||||||
|
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||||
|
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||||
|
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||||
|
|
||||||
|
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
||||||
|
|
||||||
|
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||||
|
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||||
|
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||||
|
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
|
@ -380,11 +415,37 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
||||||
# Set src as this result for if we do another power
|
# Set src as this result for if we do another power
|
||||||
src = result
|
src = result
|
||||||
|
|
||||||
proc blitLineNormalAvx2*(
|
proc blendLineNormalAvx2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 31) != 0:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
source = mm256_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||||
|
vecAlpha255 = mm256_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
|
shuffleControl = mm256_set_epi8(
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1,
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1
|
||||||
|
)
|
||||||
|
while i < len - 8:
|
||||||
|
let backdrop = mm256_load_si256(line[i].addr)
|
||||||
|
mm256_store_si256(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
|
||||||
|
proc blendLineNormalAvx2*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 31) != 0:
|
while i < len and (cast[uint](a[i].addr) and 31) != 0:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -403,41 +464,45 @@ proc blitLineNormalAvx2*(
|
||||||
source = mm256_loadu_si256(b[i].addr)
|
source = mm256_loadu_si256(b[i].addr)
|
||||||
eq255 = mm256_cmpeq_epi8(source, vec255)
|
eq255 = mm256_cmpeq_epi8(source, vec255)
|
||||||
if (mm256_movemask_epi8(eq255) and 0x88888888) == 0x88888888: # Opaque source
|
if (mm256_movemask_epi8(eq255) and 0x88888888) == 0x88888888: # Opaque source
|
||||||
mm256_storeu_si256(a[i].addr, source)
|
mm256_store_si256(a[i].addr, source)
|
||||||
else:
|
else:
|
||||||
let backdrop = mm256_load_si256(a[i].addr)
|
let backdrop = mm256_load_si256(a[i].addr)
|
||||||
|
mm256_store_si256(a[i].addr, blendNormalSimd(backdrop, source))
|
||||||
var
|
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm256_add_epi8(
|
|
||||||
source,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm256_store_si256(a[i].addr, added)
|
|
||||||
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
proc blitLineMaskAvx2*(
|
proc blendLineMaskAvx2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 31) != 0:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
source = mm256_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||||
|
shuffleControl = mm256_set_epi8(
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1,
|
||||||
|
15, -1, 15, -1, 11, -1, 11, -1, 7, -1, 7, -1, 3, -1, 3, -1
|
||||||
|
)
|
||||||
|
while i < len - 8:
|
||||||
|
let backdrop = mm256_load_si256(line[i].addr)
|
||||||
|
mm256_store_si256(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
|
i += 8
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
|
||||||
|
proc blendLineMaskAvx2*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 31) != 0:
|
while i < len and (cast[uint](a[i].addr) and 31) != 0:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -458,24 +523,7 @@ proc blitLineMaskAvx2*(
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
let backdrop = mm256_load_si256(a[i].addr)
|
let backdrop = mm256_load_si256(a[i].addr)
|
||||||
|
mm256_store_si256(a[i].addr, blendMaskSimd(backdrop, source))
|
||||||
var
|
|
||||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
|
||||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm256_shuffle_epi8(sourceAlpha, shuffleControl)
|
|
||||||
|
|
||||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm256_store_si256(
|
|
||||||
a[i].addr,
|
|
||||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
i += 8
|
i += 8
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
|
|
|
@ -414,7 +414,7 @@ proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
proc blitLineNormalNeon*(
|
proc blendLineNormalNeon*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
|
@ -463,7 +463,7 @@ proc blitLineNormalNeon*(
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
proc blitLineMaskNeon*(
|
proc blendLineMaskNeon*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
|
|
|
@ -10,17 +10,7 @@ proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||||
|
|
||||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
template blendNormalSimd*(backdrop, source: M128i): M128i =
|
||||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
|
||||||
|
|
||||||
proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
|
||||||
let
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
|
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -28,14 +18,10 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||||
|
|
||||||
let k = mm_sub_epi32(
|
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
||||||
mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255])),
|
|
||||||
sourceAlpha
|
|
||||||
)
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, k)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, k)
|
|
||||||
|
|
||||||
|
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
||||||
|
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
@ -44,12 +30,7 @@ proc blendNormalSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
template blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||||
let
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
|
||||||
|
|
||||||
var
|
var
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||||
|
@ -59,7 +40,6 @@ proc blendMaskSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||||
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
|
@ -325,7 +305,7 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
|
||||||
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
|
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
|
||||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||||
mm_storeu_si128(
|
mm_store_si128(
|
||||||
cast[pointer](p),
|
cast[pointer](p),
|
||||||
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
||||||
)
|
)
|
||||||
|
@ -367,8 +347,8 @@ proc ceilSse2*(image: Image) {.simd.} =
|
||||||
values1 = mm_cmpeq_epi8(values1, vecZero)
|
values1 = mm_cmpeq_epi8(values1, vecZero)
|
||||||
values0 = mm_andnot_si128(values0, vec255)
|
values0 = mm_andnot_si128(values0, vec255)
|
||||||
values1 = mm_andnot_si128(values1, vec255)
|
values1 = mm_andnot_si128(values1, vec255)
|
||||||
mm_storeu_si128(cast[pointer](p), values0)
|
mm_store_si128(cast[pointer](p), values0)
|
||||||
mm_storeu_si128(cast[pointer](p + 16), values1)
|
mm_store_si128(cast[pointer](p + 16), values1)
|
||||||
p += 32
|
p += 32
|
||||||
i += 8 * iterations
|
i += 8 * iterations
|
||||||
|
|
||||||
|
@ -527,11 +507,91 @@ proc magnifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
proc blitLineNormalSse2*(
|
template applyCoverage*(rgbxVec, coverage: M128i): M128i =
|
||||||
|
## Unpack the first 4 coverage bytes.
|
||||||
|
var unpacked = mm_unpacklo_epi8(mm_setzero_si128(), coverage)
|
||||||
|
unpacked = mm_unpacklo_epi8(mm_setzero_si128(), unpacked)
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||||
|
|
||||||
|
var
|
||||||
|
rgbxEven = mm_slli_epi16(rgbxVec, 8)
|
||||||
|
rgbxOdd = mm_and_si128(rgbxVec, oddMask)
|
||||||
|
rgbxEven = mm_mulhi_epu16(rgbxEven, unpacked)
|
||||||
|
rgbxOdd = mm_mulhi_epu16(rgbxOdd, unpacked)
|
||||||
|
rgbxEven = mm_srli_epi16(mm_mulhi_epu16(rgbxEven, div255), 7)
|
||||||
|
rgbxOdd = mm_srli_epi16(mm_mulhi_epu16(rgbxOdd, div255), 7)
|
||||||
|
|
||||||
|
mm_or_si128(rgbxEven, mm_slli_epi16(rgbxOdd, 8))
|
||||||
|
|
||||||
|
proc blendLineCoverageOverwriteSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm_setzero_si128()
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
|
div255 = mm_set1_epi16(0x8081)
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
i += 16
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, rgbxVec)
|
||||||
|
i += 4
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, rgbxVec.applyCoverage(coverage))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage != 0:
|
||||||
|
line[i] = rgbx * coverage
|
||||||
|
|
||||||
|
proc blendLineNormalSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
source = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
|
while i < len - 4:
|
||||||
|
let backdrop = mm_load_si128(line[i].addr)
|
||||||
|
mm_store_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendNormal(line[i], rgbx)
|
||||||
|
|
||||||
|
proc blendLineNormalSse2*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 15) != 0:
|
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -546,41 +606,92 @@ proc blitLineNormalSse2*(
|
||||||
source = mm_loadu_si128(b[i].addr)
|
source = mm_loadu_si128(b[i].addr)
|
||||||
eq255 = mm_cmpeq_epi8(source, vec255)
|
eq255 = mm_cmpeq_epi8(source, vec255)
|
||||||
if (mm_movemask_epi8(eq255) and 0x00008888) == 0x00008888: # Opaque source
|
if (mm_movemask_epi8(eq255) and 0x00008888) == 0x00008888: # Opaque source
|
||||||
mm_storeu_si128(a[i].addr, source)
|
mm_store_si128(a[i].addr, source)
|
||||||
else:
|
else:
|
||||||
let backdrop = mm_load_si128(a[i].addr)
|
let backdrop = mm_load_si128(a[i].addr)
|
||||||
|
mm_store_si128(a[i].addr, blendNormalSimd(backdrop, source))
|
||||||
var
|
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
let multiplier = mm_sub_epi32(vecAlpha255, sourceAlpha)
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplier)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplier)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
let added = mm_add_epi8(
|
|
||||||
source,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
mm_store_si128(a[i].addr, added)
|
|
||||||
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendNormal(a[i], b[i])
|
a[i] = blendNormal(a[i], b[i])
|
||||||
|
|
||||||
proc blitLineMaskSse2*(
|
proc blendLineCoverageNormalSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm_setzero_si128()
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
vecAlpha255 = mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
i += 16
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, rgbxVec)
|
||||||
|
i += 4
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm_loadu_si128(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverage)
|
||||||
|
mm_store_si128(line[i].addr, blendNormalSimd(backdrop, source))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendNormal(line[i], rgbx * coverage)
|
||||||
|
|
||||||
|
proc blendLineMaskSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX], rgbx: ColorRGBX, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
source = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
while i < len - 4:
|
||||||
|
let backdrop = mm_load_si128(line[i].addr)
|
||||||
|
mm_store_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
line[i] = blendMask(line[i], rgbx)
|
||||||
|
|
||||||
|
proc blendLineMaskSse2*(
|
||||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||||
) {.simd.} =
|
) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
while (cast[uint](a[i].addr) and 15) != 0:
|
while i < len and (cast[uint](a[i].addr) and 15) != 0:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
|
@ -597,28 +708,65 @@ proc blitLineMaskSse2*(
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
let backdrop = mm_load_si128(a[i].addr)
|
let backdrop = mm_load_si128(a[i].addr)
|
||||||
|
mm_store_si128(a[i].addr, blendMaskSimd(backdrop, source))
|
||||||
var
|
|
||||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
|
||||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
|
||||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
|
||||||
|
|
||||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
|
||||||
|
|
||||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
|
||||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
|
||||||
|
|
||||||
mm_store_si128(
|
|
||||||
a[i].addr,
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
|
||||||
)
|
|
||||||
|
|
||||||
i += 4
|
i += 4
|
||||||
|
|
||||||
for i in i ..< len:
|
for i in i ..< len:
|
||||||
a[i] = blendMask(a[i], b[i])
|
a[i] = blendMask(a[i], b[i])
|
||||||
|
|
||||||
|
proc blendLineCoverageMaskSse2*(
|
||||||
|
line: ptr UncheckedArray[ColorRGBX],
|
||||||
|
coverages: ptr UncheckedArray[uint8],
|
||||||
|
rgbx: ColorRGBX,
|
||||||
|
len: int
|
||||||
|
) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
while i < len and (cast[uint](line[i].addr) and 15) != 0:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
inc i
|
||||||
|
|
||||||
|
let
|
||||||
|
rgbxVec = mm_set1_epi32(cast[uint32](rgbx))
|
||||||
|
vecZero = mm_setzero_si128()
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
while i < len - 16:
|
||||||
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[i].addr)
|
||||||
|
eqZero = mm_cmpeq_epi8(coverage, vecZero)
|
||||||
|
eq255 = mm_cmpeq_epi8(coverage, vec255)
|
||||||
|
if mm_movemask_epi8(eqZero) == 0xffff:
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
mm_store_si128(line[i].addr, vecZero)
|
||||||
|
i += 4
|
||||||
|
elif mm_movemask_epi8(eq255) == 0xffff and rgbx.a == 255:
|
||||||
|
i += 16
|
||||||
|
else:
|
||||||
|
var coverage = coverage
|
||||||
|
for _ in 0 ..< 4:
|
||||||
|
let
|
||||||
|
backdrop = mm_loadu_si128(line[i].addr)
|
||||||
|
source = rgbxVec.applyCoverage(coverage)
|
||||||
|
mm_store_si128(line[i].addr, blendMaskSimd(backdrop, source))
|
||||||
|
coverage = mm_srli_si128(coverage, 4)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let coverage = coverages[i]
|
||||||
|
if coverage == 0:
|
||||||
|
line[i] = rgbx(0, 0, 0, 0)
|
||||||
|
elif coverage == 255:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
line[i] = blendMask(line[i], rgbx * coverage)
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -5,8 +5,7 @@ const text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis in q
|
||||||
var font = readFont("tests/fonts/Roboto-Regular_1.ttf")
|
var font = readFont("tests/fonts/Roboto-Regular_1.ttf")
|
||||||
font.size = 16
|
font.size = 16
|
||||||
|
|
||||||
let
|
let image = newImage(500, 300)
|
||||||
image = newImage(500, 300)
|
|
||||||
|
|
||||||
timeIt "typeset":
|
timeIt "typeset":
|
||||||
discard font.typeset(text, bounds = vec2(image.width.float32, 0))
|
discard font.typeset(text, bounds = vec2(image.width.float32, 0))
|
||||||
|
|
|
@ -25,3 +25,31 @@ for i in 0 ..< 250:
|
||||||
|
|
||||||
a.draw(b, translate(vec2(translation.x.trunc, translation.y.trunc)))
|
a.draw(b, translate(vec2(translation.x.trunc, translation.y.trunc)))
|
||||||
a.draw(b, translate(translation))
|
a.draw(b, translate(translation))
|
||||||
|
|
||||||
|
for i in 0 ..< 25:
|
||||||
|
let a = newImage(rand(1 .. 20), rand(1 .. 20))
|
||||||
|
for j in 0 ..< 25:
|
||||||
|
let b = newImage(rand(1 .. 20), rand(1 .. 20))
|
||||||
|
|
||||||
|
let
|
||||||
|
translation = vec2(rand(25.0), rand(25.0)) - vec2(5, 5)
|
||||||
|
rotation = rand(2 * PI).float32
|
||||||
|
|
||||||
|
echo a, " ", b, " ", translation, " ", rotation
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(translation.x, translation.y)))
|
||||||
|
a.draw(b, translate(translation) * rotate(rotation))
|
||||||
|
|
||||||
|
for i in 0 ..< 25:
|
||||||
|
let a = newImage(rand(1 .. 2000), rand(1 .. 2000))
|
||||||
|
for j in 0 ..< 25:
|
||||||
|
let b = newImage(rand(1 .. 1000), rand(1 .. 1000))
|
||||||
|
|
||||||
|
let
|
||||||
|
translation = vec2(rand(2500.0), rand(2500.0)) - vec2(500, 500)
|
||||||
|
rotation = rand(2 * PI).float32
|
||||||
|
|
||||||
|
echo a, " ", b, " ", translation, " ", rotation
|
||||||
|
|
||||||
|
a.draw(b, translate(vec2(translation.x, translation.y)))
|
||||||
|
a.draw(b, translate(translation) * rotate(rotation))
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
import pixie, random
|
|
||||||
|
|
||||||
randomize()
|
|
||||||
|
|
||||||
for i in 0 ..< 25:
|
|
||||||
let a = newImage(rand(1 .. 20), rand(1 .. 20))
|
|
||||||
for j in 0 ..< 25:
|
|
||||||
let b = newImage(rand(1 .. 20), rand(1 .. 20))
|
|
||||||
|
|
||||||
let
|
|
||||||
translation = vec2(rand(25.0), rand(25.0)) - vec2(5, 5)
|
|
||||||
rotation = rand(2 * PI).float32
|
|
||||||
|
|
||||||
echo a, " ", b, " ", translation, " ", rotation
|
|
||||||
|
|
||||||
a.draw(b, translate(vec2(translation.x.trunc, translation.y.trunc)))
|
|
||||||
a.draw(b, translate(translation) * rotate(rotation))
|
|
||||||
|
|
||||||
for i in 0 ..< 25:
|
|
||||||
let a = newImage(rand(1 .. 2000), rand(1 .. 2000))
|
|
||||||
for j in 0 ..< 25:
|
|
||||||
let b = newImage(rand(1 .. 1000), rand(1 .. 1000))
|
|
||||||
|
|
||||||
let
|
|
||||||
translation = vec2(rand(2500.0), rand(2500.0)) - vec2(500, 500)
|
|
||||||
rotation = rand(2 * PI).float32
|
|
||||||
|
|
||||||
echo a, " ", b, " ", translation, " ", rotation
|
|
||||||
|
|
||||||
a.draw(b, translate(vec2(translation.x.trunc, translation.y.trunc)))
|
|
||||||
a.draw(b, translate(translation) * rotate(rotation))
|
|
Loading…
Reference in a new issue