simd image blend when possible
This commit is contained in:
parent
133bb0aa11
commit
01f6248522
2 changed files with 98 additions and 14 deletions
|
@ -534,16 +534,41 @@ when defined(amd64) and not defined(pixieNoSimd):
|
|||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
proc blendMaskSimd*(backdrop, source: M128i): M128i =
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
|
||||
var
|
||||
sourceAlpha = mm_and_si128(source, alphaMask)
|
||||
backdropEven = mm_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||
|
||||
proc blendOverwriteSimd*(backdrop, source: M128i): M128i =
|
||||
source
|
||||
|
||||
proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
|
||||
case blendMode:
|
||||
of bmNormal: blendNormalSimd
|
||||
of bmMask: blendMaskSimd
|
||||
of bmOverwrite: blendOverwriteSimd
|
||||
else:
|
||||
raise newException(PixieError, "No SIMD blender for " & $blendMode)
|
||||
|
||||
proc hasSimdBlender*(blendMode: BlendMode): bool =
|
||||
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||
|
||||
proc maskNormalSimd*(backdrop, source: M128i): M128i =
|
||||
## Blending masks
|
||||
let
|
||||
|
@ -583,12 +608,46 @@ when defined(amd64) and not defined(pixieNoSimd):
|
|||
|
||||
mm_or_si128(blendedEven, blendedOdd)
|
||||
|
||||
proc maskMaskSimd*(backdrop, source: M128i): M128i =
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
v255high = mm_set1_epi16(cast[int16](255.uint16 shl 8))
|
||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||
|
||||
var
|
||||
sourceEven = mm_slli_epi16(mm_andnot_si128(oddMask, source), 8)
|
||||
sourceOdd = mm_and_si128(source, oddMask)
|
||||
|
||||
let
|
||||
evenK = mm_sub_epi16(v255high, sourceEven)
|
||||
oddK = mm_sub_epi16(v255high, sourceOdd)
|
||||
|
||||
var
|
||||
backdropEven = mm_slli_epi16(mm_andnot_si128(oddMask, backdrop), 8)
|
||||
backdropOdd = mm_and_si128(backdrop, oddMask)
|
||||
|
||||
# backdrop * k
|
||||
backdropEven = mm_mulhi_epu16(backdropEven, evenK)
|
||||
backdropOdd = mm_mulhi_epu16(backdropOdd, oddK)
|
||||
|
||||
# div 255
|
||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
backdropOdd = mm_slli_epi16(backdropOdd, 8)
|
||||
|
||||
mm_or_si128(backdropEven, backdropOdd)
|
||||
|
||||
proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
|
||||
case blendMode:
|
||||
of bmNormal: maskNormalSimd
|
||||
of bmMask: maskMaskSimd
|
||||
of bmOverwrite: blendOverwriteSimd
|
||||
else:
|
||||
raise newException(PixieError, "No SIMD masker for " & $blendMode)
|
||||
|
||||
proc hasSimdMasker*(blendMode: BlendMode): bool =
|
||||
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -630,8 +630,8 @@ proc drawUber(
|
|||
yMin = a.height
|
||||
yMax = 0
|
||||
for segment in perimeter:
|
||||
yMin = min(yMin, segment.at.y.floor.int)
|
||||
yMax = max(yMax, segment.at.y.ceil.int)
|
||||
yMin = min(yMin, segment.at.y.floor.int)
|
||||
yMax = max(yMax, segment.at.y.ceil.int)
|
||||
|
||||
yMin = yMin.clamp(0, a.height)
|
||||
yMax = yMax.clamp(0, a.height)
|
||||
|
@ -659,18 +659,43 @@ proc drawUber(
|
|||
if xMin > 0:
|
||||
zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xMin)
|
||||
|
||||
for x in xMin ..< xMax:
|
||||
let
|
||||
srcPos = p + dx * float32(x) + dy * float32(y)
|
||||
xFloat = srcPos.x - h
|
||||
yFloat = srcPos.y - h
|
||||
rgba = a.getRgbaUnsafe(x, y)
|
||||
rgba2 =
|
||||
if smooth:
|
||||
b.getRgbaSmooth(xFloat, yFloat)
|
||||
else:
|
||||
b.getRgbaUnsafe(xFloat.int, yFloat.int)
|
||||
a.setRgbaUnsafe(x, y, blender(rgba, rgba2))
|
||||
if smooth:
|
||||
for x in xMin ..< xMax:
|
||||
let
|
||||
srcPos = p + dx * x.float32 + dy * y.float32
|
||||
xFloat = srcPos.x - h
|
||||
yFloat = srcPos.y - h
|
||||
backdrop = a.getRgbaUnsafe(x, y)
|
||||
source = b.getRgbaSmooth(xFloat, yFloat)
|
||||
a.setRgbaUnsafe(x, y, blender(backdrop, source))
|
||||
else:
|
||||
var x = xMin
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
if dx.y == 0 and dy.x == 0 and blendMode.hasSimdBlender():
|
||||
# Check we are not rotated before using SIMD blends
|
||||
let blenderSimd = blendMode.blenderSimd()
|
||||
for _ in countup(x, xMax - 4, 4):
|
||||
let
|
||||
srcPos = p + dx * x.float32 + dy * y.float32
|
||||
sx = srcPos.x.int
|
||||
sy = srcPos.y.int
|
||||
backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||
source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||
mm_storeu_si128(
|
||||
a.data[a.dataIndex(x, y)].addr,
|
||||
blenderSimd(backdrop, source)
|
||||
)
|
||||
x += 4
|
||||
|
||||
for _ in x ..< xMax:
|
||||
let
|
||||
srcPos = p + dx * x.float32 + dy * y.float32
|
||||
xFloat = srcPos.x - h
|
||||
yFloat = srcPos.y - h
|
||||
backdrop = a.getRgbaUnsafe(x, y)
|
||||
source = b.getRgbaUnsafe(xFloat.int, yFloat.int)
|
||||
a.setRgbaUnsafe(x, y, blender(backdrop, source))
|
||||
inc x
|
||||
|
||||
if blendMode == bmIntersectMask:
|
||||
if a.width - xMax > 0:
|
||||
|
|
Loading…
Reference in a new issue