commit
3cfab49185
2 changed files with 183 additions and 43 deletions
|
@ -518,7 +518,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
proc blendNormalSimd(backdrop, source: M128i): M128i =
|
proc blendNormalSimd(backdrop, source: M128i): M128i =
|
||||||
blendNormalInlineSimd(backdrop, source)
|
blendNormalInlineSimd(backdrop, source)
|
||||||
|
|
||||||
proc blendMaskSimd(backdrop, source: M128i): M128i =
|
proc blendMaskInlineSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
let
|
let
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -539,6 +539,9 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
|
proc blendMaskSimd(backdrop, source: M128i): M128i =
|
||||||
|
blendMaskInlineSimd(backdrop, source)
|
||||||
|
|
||||||
proc blendOverwriteSimd(backdrop, source: M128i): M128i =
|
proc blendOverwriteSimd(backdrop, source: M128i): M128i =
|
||||||
source
|
source
|
||||||
|
|
||||||
|
@ -555,7 +558,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
## Is there a blend function for a given blend mode with SIMD support?
|
## Is there a blend function for a given blend mode with SIMD support?
|
||||||
blendMode in {bmNormal, bmMask, bmOverwrite}
|
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||||
|
|
||||||
proc maskNormalSimd(backdrop, source: M128i): M128i =
|
proc maskNormalInlineSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
## Blending masks
|
## Blending masks
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -592,7 +595,10 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
|
||||||
mm_or_si128(blendedEven, mm_slli_epi16(blendedOdd, 8))
|
mm_or_si128(blendedEven, mm_slli_epi16(blendedOdd, 8))
|
||||||
|
|
||||||
proc maskMaskSimd(backdrop, source: M128i): M128i =
|
proc maskNormalSimd(backdrop, source: M128i): M128i =
|
||||||
|
maskNormalInlineSimd(backdrop, source)
|
||||||
|
|
||||||
|
proc maskMaskInlineSimd*(backdrop, source: M128i): M128i =
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
@ -613,6 +619,9 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
|
proc maskMaskSimd(backdrop, source: M128i): M128i =
|
||||||
|
maskMaskInlineSimd(backdrop, source)
|
||||||
|
|
||||||
proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
|
proc maskerSimd*(blendMode: BlendMode): MaskerSimd {.raises: [PixieError].} =
|
||||||
## Returns a blend masking function with SIMD support.
|
## Returns a blend masking function with SIMD support.
|
||||||
case blendMode:
|
case blendMode:
|
||||||
|
|
|
@ -798,7 +798,138 @@ proc drawUber(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
# Check we are not rotated
|
case blendMode:
|
||||||
|
of bmOverwrite:
|
||||||
|
for _ in 0 ..< (xStop - xStart) div 16:
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
sx = srcPos.x.int
|
||||||
|
sy = srcPos.y.int
|
||||||
|
when type(a) is Image:
|
||||||
|
when type(b) is Image:
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx + q, sy)].addr)
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x + q, y)].addr, sourceVec)
|
||||||
|
else: # b is a Mask
|
||||||
|
var values = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = unpackAlphaValues(values)
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x + q, y)].addr, sourceVec)
|
||||||
|
# Shuffle 32 bits off for the next iteration
|
||||||
|
values = mm_srli_si128(values, 4)
|
||||||
|
else: # a is a Mask
|
||||||
|
when type(b) is Image:
|
||||||
|
var
|
||||||
|
i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr)
|
||||||
|
j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr)
|
||||||
|
k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr)
|
||||||
|
l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr)
|
||||||
|
let sourceVec = pack4xAlphaValues(i, j, k, l)
|
||||||
|
else: # b is a Mask
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x, y)].addr, sourceVec)
|
||||||
|
x += 16
|
||||||
|
of bmNormal:
|
||||||
|
let vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||||
|
for _ in 0 ..< (xStop - xStart) div 16:
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
sx = srcPos.x.int
|
||||||
|
sy = srcPos.y.int
|
||||||
|
when type(a) is Image:
|
||||||
|
when type(b) is Image:
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx + q, sy)].addr)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, mm_setzero_si128())) != 0xffff:
|
||||||
|
if (mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, vec255)) and 0x8888) == 0x8888:
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x + q, y)].addr, sourceVec)
|
||||||
|
else:
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x + q, y)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x + q, y)].addr,
|
||||||
|
blendNormalInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
else: # b is a Mask
|
||||||
|
var values = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = unpackAlphaValues(values)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, mm_setzero_si128())) != 0xffff:
|
||||||
|
if (mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, vec255)) and 0x8888) == 0x8888:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x + q, y)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x + q, y)].addr,
|
||||||
|
blendNormalInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
# Shuffle 32 bits off for the next iteration
|
||||||
|
values = mm_srli_si128(values, 4)
|
||||||
|
else: # a is a Mask
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||||
|
when type(b) is Image:
|
||||||
|
var
|
||||||
|
i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr)
|
||||||
|
j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr)
|
||||||
|
k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr)
|
||||||
|
l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr)
|
||||||
|
let sourceVec = pack4xAlphaValues(i, j, k, l)
|
||||||
|
else: # b is a Mask
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x, y)].addr,
|
||||||
|
maskNormalInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
of bmMask:
|
||||||
|
let vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||||
|
for _ in 0 ..< (xStop - xStart) div 16:
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
sx = srcPos.x.int
|
||||||
|
sy = srcPos.y.int
|
||||||
|
when type(a) is Image:
|
||||||
|
when type(b) is Image:
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx + q, sy)].addr)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, mm_setzero_si128())) == 0xffff:
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x + q, y)].addr, mm_setzero_si128())
|
||||||
|
elif mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, vec255)) != 0xffff:
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x + q, y)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x + q, y)].addr,
|
||||||
|
blendMaskInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
else: # b is a Mask
|
||||||
|
var values = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
for q in [0, 4, 8, 12]:
|
||||||
|
let sourceVec = unpackAlphaValues(values)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, mm_setzero_si128())) == 0xffff:
|
||||||
|
mm_storeu_si128(a.data[a.dataIndex(x + q, y)].addr, mm_setzero_si128())
|
||||||
|
elif (mm_movemask_epi8(mm_cmpeq_epi8(sourceVec, vec255)) and 0x8888) != 0x8888:
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x + q, y)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x + q, y)].addr,
|
||||||
|
blendMaskInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
# Shuffle 32 bits off for the next iteration
|
||||||
|
values = mm_srli_si128(values, 4)
|
||||||
|
else: # a is a Mask
|
||||||
|
let backdropVec = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||||
|
when type(b) is Image:
|
||||||
|
var
|
||||||
|
i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr)
|
||||||
|
j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr)
|
||||||
|
k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr)
|
||||||
|
l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr)
|
||||||
|
let sourceVec = pack4xAlphaValues(i, j, k, l)
|
||||||
|
else: # b is a Mask
|
||||||
|
let sourceVec = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
a.data[a.dataIndex(x, y)].addr,
|
||||||
|
maskMaskInlineSimd(backdropVec, sourceVec)
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
else:
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
if blendMode.hasSimdBlender():
|
if blendMode.hasSimdBlender():
|
||||||
let blenderSimd = blendMode.blenderSimd()
|
let blenderSimd = blendMode.blenderSimd()
|
||||||
|
|
Loading…
Reference in a new issue