Merge pull request #446 from guzba/master

implement the last 2 mask:mask blends simd
This commit is contained in:
Andre von Houck 2022-06-18 09:46:56 -07:00 committed by GitHub
commit 460d97c520
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 123 additions and 75 deletions

View file

@ -11,8 +11,8 @@ when defined(amd64) and allowSimd:
type
Blender* = proc(backdrop, source: ColorRGBX): ColorRGBX {.gcsafe, raises: [].}
## Function signature returned by blender.
Masker* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
## Function signature returned by masker.
MaskBlender* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
## Function signature returned by maskBlender.
when defined(release):
{.push checks: off.}
@ -484,29 +484,29 @@ proc maskBlendExclude*(backdrop, source: uint8): uint8 {.inline.} =
## Exclude blend masks
max(backdrop, source) - min(backdrop, source)
proc maskBlendNormalMasker(backdrop, source: uint8): uint8 =
proc maskBlendNormalMaskBlender(backdrop, source: uint8): uint8 =
maskBlendNormal(backdrop, source)
proc maskBlendMaskMasker(backdrop, source: uint8): uint8 =
proc maskBlendMaskMaskBlender(backdrop, source: uint8): uint8 =
maskBlendMask(backdrop, source)
proc maskBlendSubtractMasker(backdrop, source: uint8): uint8 =
proc maskBlendSubtractMaskBlender(backdrop, source: uint8): uint8 =
maskBlendSubtract(backdrop, source)
proc maskBlendExcludeMasker(backdrop, source: uint8): uint8 =
proc maskBlendExcludeMaskBlender(backdrop, source: uint8): uint8 =
maskBlendExclude(backdrop, source)
proc maskBlendOverwriteMasker(backdrop, source: uint8): uint8 =
proc maskBlendOverwriteMaskBlender(backdrop, source: uint8): uint8 =
source
proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
proc maskBlender*(blendMode: BlendMode): MaskBlender {.raises: [PixieError].} =
## Returns a blend masking function for a given blend masking mode.
case blendMode:
of NormalBlend: maskBlendNormalMasker
of MaskBlend: maskBlendMaskMasker
of OverwriteBlend: maskBlendOverwriteMasker
of SubtractMaskBlend: maskBlendSubtractMasker
of ExcludeMaskBlend: maskBlendExcludeMasker
of NormalBlend: maskBlendNormalMaskBlender
of MaskBlend: maskBlendMaskMaskBlender
of OverwriteBlend: maskBlendOverwriteMaskBlender
of SubtractMaskBlend: maskBlendSubtractMaskBlender
of ExcludeMaskBlend: maskBlendExcludeMaskBlender
else:
raise newException(PixieError, "No masker for " & $blendMode)
@ -647,24 +647,63 @@ when defined(amd64) and allowSimd:
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
proc maskBlendNormalSimdMasker(backdrop, source: M128i): M128i =
proc maskBlendSubtractSimd*(backdrop, source: M128i): M128i {.inline.} =
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
vec255 = mm_set1_epi8(255)
div255 = mm_set1_epi16(cast[int16](0x8081))
let sourceMinus255 = mm_sub_epi8(vec255, source)
var
multiplierEven = mm_slli_epi16(sourceMinus255, 8)
multiplierOdd = mm_and_si128(sourceMinus255, oddMask)
backdropEven = mm_slli_epi16(backdrop, 8)
backdropOdd = mm_and_si128(backdrop, oddMask)
backdropEven = mm_mulhi_epu16(backdropEven, multiplierEven)
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplierOdd)
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
proc maskBlendExcludeSimd*(backdrop, source: M128i): M128i {.inline.} =
mm_sub_epi8(mm_max_epu8(backdrop, source), mm_min_epu8(backdrop, source))
proc maskBlendNormalSimdMaskBlender(backdrop, source: M128i): M128i =
maskBlendNormalSimd(backdrop, source)
proc maskBlendMaskSimdMasker(backdrop, source: M128i): M128i =
proc maskBlendMaskSimdMaskBlender(backdrop, source: M128i): M128i =
maskBlendMaskSimd(backdrop, source)
proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
proc maskBlendExcludeSimdMaskBlender(backdrop, source: M128i): M128i =
maskBlendExcludeSimd(backdrop, source)
proc maskBlendSubtractSimdMaskBlender(backdrop, source: M128i): M128i =
maskBlendSubtractSimd(backdrop, source)
proc maskBlenderSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
## Returns a blend masking function with SIMD support.
case blendMode:
of NormalBlend: maskBlendNormalSimdMasker
of MaskBlend: maskBlendMaskSimdMasker
of NormalBlend: maskBlendNormalSimdMaskBlender
of MaskBlend: maskBlendMaskSimdMaskBlender
of OverwriteBlend: overwriteSimdBlender
of SubtractMaskBlend: maskBlendSubtractSimdMaskBlender
of ExcludeMaskBlend: maskBlendExcludeSimdMaskBlender
else:
raise newException(PixieError, "No SIMD masker for " & $blendMode)
proc hasSimdMasker*(blendMode: BlendMode): bool {.inline, raises: [].} =
proc hasSimdMaskBlender*(blendMode: BlendMode): bool {.inline, raises: [].} =
## Is there a blend masking function with SIMD support?
blendMode in {NormalBlend, MaskBlend, OverwriteBlend}
blendMode in {
NormalBlend,
MaskBlend,
OverwriteBlend,
SubtractMaskBlend,
ExcludeMaskBlend
}
when defined(release):
{.pop.}

View file

@ -714,7 +714,7 @@ proc drawUber(
when type(a) is Image:
let blender = blendMode.blender()
else: # a is a Mask
let masker = blendMode.masker()
let maskBlender = blendMode.maskBlender()
if blendMode == MaskBlend:
if yMin > 0:
@ -777,7 +777,7 @@ proc drawUber(
let sample = b.getRgbaSmooth(srcPos.x, srcPos.y).a
else: # b is a Mask
let sample = b.getValueSmooth(srcPos.x, srcPos.y)
a.unsafe[x, y] = masker(backdrop, sample)
a.unsafe[x, y] = maskBlender(backdrop, sample)
srcPos += dx
@ -972,8 +972,8 @@ proc drawUber(
x += 16
sx += 16
else: # is a Mask
if blendMode.hasSimdMasker():
let maskerSimd = blendMode.maskerSimd()
if blendMode.hasSimdMaskBlender():
let maskerSimd = blendMode.maskBlenderSimd()
for _ in 0 ..< (xStop - xStart) div 16:
let backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
when type(b) is Image:
@ -1089,7 +1089,7 @@ proc drawUber(
let sample = b.unsafe[samplePos.x, samplePos.y].a
else: # b is a Mask
let sample = b.unsafe[samplePos.x, samplePos.y]
a.unsafe[x, y] = masker(backdrop, sample)
a.unsafe[x, y] = maskBlender(backdrop, sample)
srcPos += dx
if blendMode == MaskBlend:

View file

@ -1223,6 +1223,20 @@ iterator walk(
if prevAt != width.float32.fixed32 and count != 0:
echo "Leak detected: ", count, " @ (", prevAt, ", ", y, ")"
iterator walkInteger(
hits: seq[(int32, int16)],
numHits: int,
windingRule: WindingRule,
y, width: int
): (int, int) =
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
fillStart = prevAt.integer
fillLen = at.integer - fillStart
if fillLen <= 0:
continue
yield (fillStart, fillLen)
proc computeCoverage(
coverages: ptr UncheckedArray[uint8],
hits: var seq[(Fixed32, int16)],
@ -1443,9 +1457,9 @@ proc fillCoverage(
) =
var x = startX
when defined(amd64) and allowSimd:
if blendMode.hasSimdMasker():
if blendMode.hasSimdMaskBlender():
let
maskerSimd = blendMode.maskerSimd()
maskerSimd = blendMode.maskBlenderSimd()
vecZero = mm_setzero_si128()
for _ in 0 ..< coverages.len div 16:
let
@ -1465,7 +1479,7 @@ proc fillCoverage(
mm_storeu_si128(mask.data[index].addr, vecZero)
x += 16
let masker = blendMode.masker()
let maskBlender = blendMode.maskBlender()
for x in x ..< startX + coverages.len:
let coverage = coverages[x - startX]
if coverage != 0 or blendMode == ExcludeMaskBlend:
@ -1473,7 +1487,7 @@ proc fillCoverage(
mask.unsafe[x, y] = coverage
else:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = masker(backdrop, coverage)
mask.unsafe[x, y] = maskBlender(backdrop, coverage)
elif blendMode == MaskBlend:
mask.unsafe[x, y] = 0
@ -1481,22 +1495,6 @@ proc fillCoverage(
mask.clearUnsafe(0, y, startX, y)
mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
template walkHits(
hits: seq[(int32, int16)],
numHits: int,
windingRule: WindingRule,
y, width: int,
inner: untyped
) =
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
let
fillStart {.inject.} = prevAt.integer
fillLen {.inject.} = at.integer - fillStart
if fillLen <= 0:
continue
inner
proc fillHits(
image: Image,
rgbx: ColorRGBX,
@ -1506,38 +1504,36 @@ proc fillHits(
windingRule: WindingRule,
blendMode: BlendMode
) =
template simdBlob(image: Image, x: var int, blendProc: untyped) =
template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) =
when allowSimd:
when defined(amd64):
let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< fillLen div 4:
let
index = image.dataIndex(x, y)
backdrop = mm_loadu_si128(image.data[index].addr)
mm_storeu_si128(image.data[index].addr, blendProc(backdrop, colorVec))
for _ in 0 ..< len div 4:
let backdrop = mm_loadu_si128(image.unsafe[x, y].addr)
mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec))
x += 4
case blendMode:
of OverwriteBlend:
walkHits hits, numHits, windingRule, y, image.width:
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
of NormalBlend:
walkHits hits, numHits, windingRule, y, image.width:
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
if rgbx.a == 255:
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
else:
var x = fillStart
simdBlob(image, x, blendNormalSimd)
for x in x ..< fillStart + fillLen:
var x = start
simdBlob(image, x, len, blendNormalSimd)
for x in x ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendNormal(backdrop, rgbx)
of MaskBlend:
var filledTo = startX
walkHits hits, numHits, windingRule, y, image.width:
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
block: # Clear any gap between this fill and the previous fill
let gapBetween = fillStart - filledTo
let gapBetween = start - filledTo
if gapBetween > 0:
fillUnsafe(
image.data,
@ -1545,12 +1541,12 @@ proc fillHits(
image.dataIndex(filledTo, y),
gapBetween
)
filledTo = fillStart + fillLen
filledTo = start + len
block: # Handle this fill
if rgbx.a != 255:
var x = fillStart
simdBlob(image, x, blendMaskSimd)
for x in x ..< fillStart + fillLen:
var x = start
simdBlob(image, x, len, blendMaskSimd)
for x in x ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blendMask(backdrop, rgbx)
@ -1559,8 +1555,8 @@ proc fillHits(
else:
let blender = blendMode.blender()
walkHits hits, numHits, windingRule, y, image.width:
for x in fillStart ..< fillStart + fillLen:
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
for x in start ..< start + len:
let backdrop = image.unsafe[x, y]
image.unsafe[x, y] = blender(backdrop, rgbx)
@ -1572,31 +1568,44 @@ proc fillHits(
windingRule: WindingRule,
blendMode: BlendMode
) =
template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) =
when allowSimd:
when defined(amd64):
let vec255 = mm_set1_epi8(255)
for _ in 0 ..< len div 16:
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255))
x += 16
case blendMode:
of NormalBlend, OverwriteBlend:
walkHits hits, numHits, windingRule, y, mask.width:
fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen)
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len)
of MaskBlend:
var filledTo = startX
walkHits hits, numHits, windingRule,y, mask.width:
let gapBetween = fillStart - filledTo
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
let gapBetween = start - filledTo
if gapBetween > 0:
fillUnsafe(mask.data, 0, mask.dataIndex(filledTo, y), gapBetween)
filledTo = fillStart + fillLen
filledTo = start + len
mask.clearUnsafe(0, y, startX, y)
mask.clearUnsafe(filledTo, y, mask.width, y)
of SubtractMaskBlend:
walkHits hits, numHits, windingRule, y, mask.width:
for x in fillStart ..< fillStart + fillLen:
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
var x = start
simdBlob(mask, x, len, maskBlendSubtractSimd)
for x in x ..< start + len:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255)
of ExcludeMaskBlend:
walkHits hits, numHits, windingRule, y, mask.width:
for x in fillStart ..< fillStart + fillLen:
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
var x = start
simdBlob(mask, x, len, maskBlendExcludeSimd)
for x in x ..< start + len:
let backdrop = mask.unsafe[x, y]
mask.unsafe[x, y] = maskBlendExclude(backdrop, 255)