Merge pull request #446 from guzba/master
implement the last 2 mask:mask blends simd
This commit is contained in:
commit
460d97c520
3 changed files with 123 additions and 75 deletions
|
@ -11,8 +11,8 @@ when defined(amd64) and allowSimd:
|
|||
type
|
||||
Blender* = proc(backdrop, source: ColorRGBX): ColorRGBX {.gcsafe, raises: [].}
|
||||
## Function signature returned by blender.
|
||||
Masker* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
|
||||
## Function signature returned by masker.
|
||||
MaskBlender* = proc(backdrop, source: uint8): uint8 {.gcsafe, raises: [].}
|
||||
## Function signature returned by maskBlender.
|
||||
|
||||
when defined(release):
|
||||
{.push checks: off.}
|
||||
|
@ -484,29 +484,29 @@ proc maskBlendExclude*(backdrop, source: uint8): uint8 {.inline.} =
|
|||
## Exclude blend masks
|
||||
max(backdrop, source) - min(backdrop, source)
|
||||
|
||||
proc maskBlendNormalMasker(backdrop, source: uint8): uint8 =
|
||||
proc maskBlendNormalMaskBlender(backdrop, source: uint8): uint8 =
|
||||
maskBlendNormal(backdrop, source)
|
||||
|
||||
proc maskBlendMaskMasker(backdrop, source: uint8): uint8 =
|
||||
proc maskBlendMaskMaskBlender(backdrop, source: uint8): uint8 =
|
||||
maskBlendMask(backdrop, source)
|
||||
|
||||
proc maskBlendSubtractMasker(backdrop, source: uint8): uint8 =
|
||||
proc maskBlendSubtractMaskBlender(backdrop, source: uint8): uint8 =
|
||||
maskBlendSubtract(backdrop, source)
|
||||
|
||||
proc maskBlendExcludeMasker(backdrop, source: uint8): uint8 =
|
||||
proc maskBlendExcludeMaskBlender(backdrop, source: uint8): uint8 =
|
||||
maskBlendExclude(backdrop, source)
|
||||
|
||||
proc maskBlendOverwriteMasker(backdrop, source: uint8): uint8 =
|
||||
proc maskBlendOverwriteMaskBlender(backdrop, source: uint8): uint8 =
|
||||
source
|
||||
|
||||
proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
|
||||
proc maskBlender*(blendMode: BlendMode): MaskBlender {.raises: [PixieError].} =
|
||||
## Returns a blend masking function for a given blend masking mode.
|
||||
case blendMode:
|
||||
of NormalBlend: maskBlendNormalMasker
|
||||
of MaskBlend: maskBlendMaskMasker
|
||||
of OverwriteBlend: maskBlendOverwriteMasker
|
||||
of SubtractMaskBlend: maskBlendSubtractMasker
|
||||
of ExcludeMaskBlend: maskBlendExcludeMasker
|
||||
of NormalBlend: maskBlendNormalMaskBlender
|
||||
of MaskBlend: maskBlendMaskMaskBlender
|
||||
of OverwriteBlend: maskBlendOverwriteMaskBlender
|
||||
of SubtractMaskBlend: maskBlendSubtractMaskBlender
|
||||
of ExcludeMaskBlend: maskBlendExcludeMaskBlender
|
||||
else:
|
||||
raise newException(PixieError, "No masker for " & $blendMode)
|
||||
|
||||
|
@ -647,24 +647,63 @@ when defined(amd64) and allowSimd:
|
|||
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
|
||||
proc maskBlendNormalSimdMasker(backdrop, source: M128i): M128i =
|
||||
proc maskBlendSubtractSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
vec255 = mm_set1_epi8(255)
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
|
||||
let sourceMinus255 = mm_sub_epi8(vec255, source)
|
||||
|
||||
var
|
||||
multiplierEven = mm_slli_epi16(sourceMinus255, 8)
|
||||
multiplierOdd = mm_and_si128(sourceMinus255, oddMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, multiplierEven)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, multiplierOdd)
|
||||
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
|
||||
proc maskBlendExcludeSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||
mm_sub_epi8(mm_max_epu8(backdrop, source), mm_min_epu8(backdrop, source))
|
||||
|
||||
proc maskBlendNormalSimdMaskBlender(backdrop, source: M128i): M128i =
|
||||
maskBlendNormalSimd(backdrop, source)
|
||||
|
||||
proc maskBlendMaskSimdMasker(backdrop, source: M128i): M128i =
|
||||
proc maskBlendMaskSimdMaskBlender(backdrop, source: M128i): M128i =
|
||||
maskBlendMaskSimd(backdrop, source)
|
||||
|
||||
proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
|
||||
proc maskBlendExcludeSimdMaskBlender(backdrop, source: M128i): M128i =
|
||||
maskBlendExcludeSimd(backdrop, source)
|
||||
|
||||
proc maskBlendSubtractSimdMaskBlender(backdrop, source: M128i): M128i =
|
||||
maskBlendSubtractSimd(backdrop, source)
|
||||
|
||||
proc maskBlenderSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
|
||||
## Returns a blend masking function with SIMD support.
|
||||
case blendMode:
|
||||
of NormalBlend: maskBlendNormalSimdMasker
|
||||
of MaskBlend: maskBlendMaskSimdMasker
|
||||
of NormalBlend: maskBlendNormalSimdMaskBlender
|
||||
of MaskBlend: maskBlendMaskSimdMaskBlender
|
||||
of OverwriteBlend: overwriteSimdBlender
|
||||
of SubtractMaskBlend: maskBlendSubtractSimdMaskBlender
|
||||
of ExcludeMaskBlend: maskBlendExcludeSimdMaskBlender
|
||||
else:
|
||||
raise newException(PixieError, "No SIMD masker for " & $blendMode)
|
||||
|
||||
proc hasSimdMasker*(blendMode: BlendMode): bool {.inline, raises: [].} =
|
||||
proc hasSimdMaskBlender*(blendMode: BlendMode): bool {.inline, raises: [].} =
|
||||
## Is there a blend masking function with SIMD support?
|
||||
blendMode in {NormalBlend, MaskBlend, OverwriteBlend}
|
||||
blendMode in {
|
||||
NormalBlend,
|
||||
MaskBlend,
|
||||
OverwriteBlend,
|
||||
SubtractMaskBlend,
|
||||
ExcludeMaskBlend
|
||||
}
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -714,7 +714,7 @@ proc drawUber(
|
|||
when type(a) is Image:
|
||||
let blender = blendMode.blender()
|
||||
else: # a is a Mask
|
||||
let masker = blendMode.masker()
|
||||
let maskBlender = blendMode.maskBlender()
|
||||
|
||||
if blendMode == MaskBlend:
|
||||
if yMin > 0:
|
||||
|
@ -777,7 +777,7 @@ proc drawUber(
|
|||
let sample = b.getRgbaSmooth(srcPos.x, srcPos.y).a
|
||||
else: # b is a Mask
|
||||
let sample = b.getValueSmooth(srcPos.x, srcPos.y)
|
||||
a.unsafe[x, y] = masker(backdrop, sample)
|
||||
a.unsafe[x, y] = maskBlender(backdrop, sample)
|
||||
|
||||
srcPos += dx
|
||||
|
||||
|
@ -972,8 +972,8 @@ proc drawUber(
|
|||
x += 16
|
||||
sx += 16
|
||||
else: # is a Mask
|
||||
if blendMode.hasSimdMasker():
|
||||
let maskerSimd = blendMode.maskerSimd()
|
||||
if blendMode.hasSimdMaskBlender():
|
||||
let maskerSimd = blendMode.maskBlenderSimd()
|
||||
for _ in 0 ..< (xStop - xStart) div 16:
|
||||
let backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||
when type(b) is Image:
|
||||
|
@ -1089,7 +1089,7 @@ proc drawUber(
|
|||
let sample = b.unsafe[samplePos.x, samplePos.y].a
|
||||
else: # b is a Mask
|
||||
let sample = b.unsafe[samplePos.x, samplePos.y]
|
||||
a.unsafe[x, y] = masker(backdrop, sample)
|
||||
a.unsafe[x, y] = maskBlender(backdrop, sample)
|
||||
srcPos += dx
|
||||
|
||||
if blendMode == MaskBlend:
|
||||
|
|
|
@ -1223,6 +1223,20 @@ iterator walk(
|
|||
if prevAt != width.float32.fixed32 and count != 0:
|
||||
echo "Leak detected: ", count, " @ (", prevAt, ", ", y, ")"
|
||||
|
||||
iterator walkInteger(
|
||||
hits: seq[(int32, int16)],
|
||||
numHits: int,
|
||||
windingRule: WindingRule,
|
||||
y, width: int
|
||||
): (int, int) =
|
||||
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
|
||||
let
|
||||
fillStart = prevAt.integer
|
||||
fillLen = at.integer - fillStart
|
||||
if fillLen <= 0:
|
||||
continue
|
||||
yield (fillStart, fillLen)
|
||||
|
||||
proc computeCoverage(
|
||||
coverages: ptr UncheckedArray[uint8],
|
||||
hits: var seq[(Fixed32, int16)],
|
||||
|
@ -1443,9 +1457,9 @@ proc fillCoverage(
|
|||
) =
|
||||
var x = startX
|
||||
when defined(amd64) and allowSimd:
|
||||
if blendMode.hasSimdMasker():
|
||||
if blendMode.hasSimdMaskBlender():
|
||||
let
|
||||
maskerSimd = blendMode.maskerSimd()
|
||||
maskerSimd = blendMode.maskBlenderSimd()
|
||||
vecZero = mm_setzero_si128()
|
||||
for _ in 0 ..< coverages.len div 16:
|
||||
let
|
||||
|
@ -1465,7 +1479,7 @@ proc fillCoverage(
|
|||
mm_storeu_si128(mask.data[index].addr, vecZero)
|
||||
x += 16
|
||||
|
||||
let masker = blendMode.masker()
|
||||
let maskBlender = blendMode.maskBlender()
|
||||
for x in x ..< startX + coverages.len:
|
||||
let coverage = coverages[x - startX]
|
||||
if coverage != 0 or blendMode == ExcludeMaskBlend:
|
||||
|
@ -1473,7 +1487,7 @@ proc fillCoverage(
|
|||
mask.unsafe[x, y] = coverage
|
||||
else:
|
||||
let backdrop = mask.unsafe[x, y]
|
||||
mask.unsafe[x, y] = masker(backdrop, coverage)
|
||||
mask.unsafe[x, y] = maskBlender(backdrop, coverage)
|
||||
elif blendMode == MaskBlend:
|
||||
mask.unsafe[x, y] = 0
|
||||
|
||||
|
@ -1481,22 +1495,6 @@ proc fillCoverage(
|
|||
mask.clearUnsafe(0, y, startX, y)
|
||||
mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
|
||||
|
||||
template walkHits(
|
||||
hits: seq[(int32, int16)],
|
||||
numHits: int,
|
||||
windingRule: WindingRule,
|
||||
y, width: int,
|
||||
inner: untyped
|
||||
) =
|
||||
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
|
||||
let
|
||||
fillStart {.inject.} = prevAt.integer
|
||||
fillLen {.inject.} = at.integer - fillStart
|
||||
if fillLen <= 0:
|
||||
continue
|
||||
|
||||
inner
|
||||
|
||||
proc fillHits(
|
||||
image: Image,
|
||||
rgbx: ColorRGBX,
|
||||
|
@ -1506,38 +1504,36 @@ proc fillHits(
|
|||
windingRule: WindingRule,
|
||||
blendMode: BlendMode
|
||||
) =
|
||||
template simdBlob(image: Image, x: var int, blendProc: untyped) =
|
||||
template simdBlob(image: Image, x: var int, len: int, blendProc: untyped) =
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
for _ in 0 ..< fillLen div 4:
|
||||
let
|
||||
index = image.dataIndex(x, y)
|
||||
backdrop = mm_loadu_si128(image.data[index].addr)
|
||||
mm_storeu_si128(image.data[index].addr, blendProc(backdrop, colorVec))
|
||||
for _ in 0 ..< len div 4:
|
||||
let backdrop = mm_loadu_si128(image.unsafe[x, y].addr)
|
||||
mm_storeu_si128(image.unsafe[x, y].addr, blendProc(backdrop, colorVec))
|
||||
x += 4
|
||||
|
||||
case blendMode:
|
||||
of OverwriteBlend:
|
||||
walkHits hits, numHits, windingRule, y, image.width:
|
||||
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
|
||||
|
||||
of NormalBlend:
|
||||
walkHits hits, numHits, windingRule, y, image.width:
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||
if rgbx.a == 255:
|
||||
fillUnsafe(image.data, rgbx, image.dataIndex(fillStart, y), fillLen)
|
||||
fillUnsafe(image.data, rgbx, image.dataIndex(start, y), len)
|
||||
else:
|
||||
var x = fillStart
|
||||
simdBlob(image, x, blendNormalSimd)
|
||||
for x in x ..< fillStart + fillLen:
|
||||
var x = start
|
||||
simdBlob(image, x, len, blendNormalSimd)
|
||||
for x in x ..< start + len:
|
||||
let backdrop = image.unsafe[x, y]
|
||||
image.unsafe[x, y] = blendNormal(backdrop, rgbx)
|
||||
|
||||
of MaskBlend:
|
||||
var filledTo = startX
|
||||
walkHits hits, numHits, windingRule, y, image.width:
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||
block: # Clear any gap between this fill and the previous fill
|
||||
let gapBetween = fillStart - filledTo
|
||||
let gapBetween = start - filledTo
|
||||
if gapBetween > 0:
|
||||
fillUnsafe(
|
||||
image.data,
|
||||
|
@ -1545,12 +1541,12 @@ proc fillHits(
|
|||
image.dataIndex(filledTo, y),
|
||||
gapBetween
|
||||
)
|
||||
filledTo = fillStart + fillLen
|
||||
filledTo = start + len
|
||||
block: # Handle this fill
|
||||
if rgbx.a != 255:
|
||||
var x = fillStart
|
||||
simdBlob(image, x, blendMaskSimd)
|
||||
for x in x ..< fillStart + fillLen:
|
||||
var x = start
|
||||
simdBlob(image, x, len, blendMaskSimd)
|
||||
for x in x ..< start + len:
|
||||
let backdrop = image.unsafe[x, y]
|
||||
image.unsafe[x, y] = blendMask(backdrop, rgbx)
|
||||
|
||||
|
@ -1559,8 +1555,8 @@ proc fillHits(
|
|||
|
||||
else:
|
||||
let blender = blendMode.blender()
|
||||
walkHits hits, numHits, windingRule, y, image.width:
|
||||
for x in fillStart ..< fillStart + fillLen:
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, image.width):
|
||||
for x in start ..< start + len:
|
||||
let backdrop = image.unsafe[x, y]
|
||||
image.unsafe[x, y] = blender(backdrop, rgbx)
|
||||
|
||||
|
@ -1572,31 +1568,44 @@ proc fillHits(
|
|||
windingRule: WindingRule,
|
||||
blendMode: BlendMode
|
||||
) =
|
||||
template simdBlob(mask: Mask, x: var int, len: int, blendProc: untyped) =
|
||||
when allowSimd:
|
||||
when defined(amd64):
|
||||
let vec255 = mm_set1_epi8(255)
|
||||
for _ in 0 ..< len div 16:
|
||||
let backdrop = mm_loadu_si128(mask.unsafe[x, y].addr)
|
||||
mm_storeu_si128(mask.unsafe[x, y].addr, blendProc(backdrop, vec255))
|
||||
x += 16
|
||||
|
||||
case blendMode:
|
||||
of NormalBlend, OverwriteBlend:
|
||||
walkHits hits, numHits, windingRule, y, mask.width:
|
||||
fillUnsafe(mask.data, 255, mask.dataIndex(fillStart, y), fillLen)
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
|
||||
fillUnsafe(mask.data, 255, mask.dataIndex(start, y), len)
|
||||
|
||||
of MaskBlend:
|
||||
var filledTo = startX
|
||||
walkHits hits, numHits, windingRule,y, mask.width:
|
||||
let gapBetween = fillStart - filledTo
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
|
||||
let gapBetween = start - filledTo
|
||||
if gapBetween > 0:
|
||||
fillUnsafe(mask.data, 0, mask.dataIndex(filledTo, y), gapBetween)
|
||||
filledTo = fillStart + fillLen
|
||||
filledTo = start + len
|
||||
|
||||
mask.clearUnsafe(0, y, startX, y)
|
||||
mask.clearUnsafe(filledTo, y, mask.width, y)
|
||||
|
||||
of SubtractMaskBlend:
|
||||
walkHits hits, numHits, windingRule, y, mask.width:
|
||||
for x in fillStart ..< fillStart + fillLen:
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
|
||||
var x = start
|
||||
simdBlob(mask, x, len, maskBlendSubtractSimd)
|
||||
for x in x ..< start + len:
|
||||
let backdrop = mask.unsafe[x, y]
|
||||
mask.unsafe[x, y] = maskBlendSubtract(backdrop, 255)
|
||||
|
||||
of ExcludeMaskBlend:
|
||||
walkHits hits, numHits, windingRule, y, mask.width:
|
||||
for x in fillStart ..< fillStart + fillLen:
|
||||
for (start, len) in hits.walkInteger(numHits, windingRule, y, mask.width):
|
||||
var x = start
|
||||
simdBlob(mask, x, len, maskBlendExcludeSimd)
|
||||
for x in x ..< start + len:
|
||||
let backdrop = mask.unsafe[x, y]
|
||||
mask.unsafe[x, y] = maskBlendExclude(backdrop, 255)
|
||||
|
||||
|
|
Loading…
Reference in a new issue