inline simd faster
This commit is contained in:
parent
8ddd22d761
commit
ee5074c628
3 changed files with 78 additions and 36 deletions
|
@ -184,3 +184,15 @@ block:
|
||||||
# a.writeFile("pixie4.png")
|
# a.writeFile("pixie4.png")
|
||||||
|
|
||||||
# doDiff(readImage("cairo4.png"), a, "4")
|
# doDiff(readImage("cairo4.png"), a, "4")
|
||||||
|
|
||||||
|
let mask = newMask(1000, 1000)
|
||||||
|
|
||||||
|
timeIt "pixie4 mask":
|
||||||
|
mask.fill(63)
|
||||||
|
|
||||||
|
let p = newPath()
|
||||||
|
p.moveTo(shapes[0][0])
|
||||||
|
for shape in shapes:
|
||||||
|
for v in shape:
|
||||||
|
p.lineTo(v)
|
||||||
|
mask.fillPath(p)
|
||||||
|
|
|
@ -510,7 +510,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
MaskerSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].}
|
MaskerSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].}
|
||||||
## Function signature returned by maskerSimd.
|
## Function signature returned by maskerSimd.
|
||||||
|
|
||||||
proc blendNormalSimd(backdrop, source: M128i): M128i =
|
proc blendNormalInlineSimd*(backdrop, source: M128i): M128i {.inline.} =
|
||||||
let
|
let
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -539,6 +539,9 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc blendNormalSimd(backdrop, source: M128i): M128i =
|
||||||
|
blendNormalInlineSimd(backdrop, source)
|
||||||
|
|
||||||
proc blendMaskSimd(backdrop, source: M128i): M128i =
|
proc blendMaskSimd(backdrop, source: M128i): M128i =
|
||||||
let
|
let
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
|
|
@ -1338,9 +1338,17 @@ proc fillCoverage(
|
||||||
# If the coverages are not all zero
|
# If the coverages are not all zero
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
|
if mm_movemask_epi8(mm_cmpeq_epi32(coverageVec, vec255)) == 0xffff:
|
||||||
# If the coverages are all 255
|
# If the coverages are all 255
|
||||||
if blendMode == bmNormal and rgbx.a == 255:
|
if blendMode == bmNormal:
|
||||||
|
if rgbx.a == 255:
|
||||||
for i in 0 ..< 4:
|
for i in 0 ..< 4:
|
||||||
mm_storeu_si128(image.data[index + i * 4].addr, colorVec)
|
mm_storeu_si128(image.data[index + i * 4].addr, colorVec)
|
||||||
|
else:
|
||||||
|
for i in 0 ..< 4:
|
||||||
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
image.data[index + i * 4].addr,
|
||||||
|
blendNormalInlineSimd(backdrop, colorVec)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
for i in 0 ..< 4:
|
for i in 0 ..< 4:
|
||||||
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
|
@ -1350,6 +1358,7 @@ proc fillCoverage(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Coverages are not all 255
|
# Coverages are not all 255
|
||||||
|
template useCoverage(blendProc: untyped) =
|
||||||
var coverageVec = coverageVec
|
var coverageVec = coverageVec
|
||||||
for i in 0 ..< 4:
|
for i in 0 ..< 4:
|
||||||
var unpacked = unpackAlphaValues(coverageVec)
|
var unpacked = unpackAlphaValues(coverageVec)
|
||||||
|
@ -1372,11 +1381,16 @@ proc fillCoverage(
|
||||||
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
image.data[index + i * 4].addr,
|
image.data[index + i * 4].addr,
|
||||||
blenderSimd(backdrop, source)
|
blendProc(backdrop, source)
|
||||||
)
|
)
|
||||||
|
|
||||||
coverageVec = mm_srli_si128(coverageVec, 4)
|
coverageVec = mm_srli_si128(coverageVec, 4)
|
||||||
|
|
||||||
|
if blendMode == bmNormal:
|
||||||
|
useCoverage(blendNormalInlineSimd)
|
||||||
|
else:
|
||||||
|
useCoverage(blenderSimd)
|
||||||
|
|
||||||
elif blendMode == bmMask:
|
elif blendMode == bmMask:
|
||||||
for i in 0 ..< 4:
|
for i in 0 ..< 4:
|
||||||
mm_storeu_si128(image.data[index + i * 4].addr, zeroVec)
|
mm_storeu_si128(image.data[index + i * 4].addr, zeroVec)
|
||||||
|
@ -1460,6 +1474,7 @@ proc fillHits(
|
||||||
let
|
let
|
||||||
blender = blendMode.blender()
|
blender = blendMode.blender()
|
||||||
width = image.width.float32
|
width = image.width.float32
|
||||||
|
|
||||||
var filledTo: int
|
var filledTo: int
|
||||||
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
|
for (prevAt, at, count) in hits.walk(numHits, windingRule, y, width):
|
||||||
let
|
let
|
||||||
|
@ -1478,9 +1493,21 @@ proc fillHits(
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
if blendMode.hasSimdBlender():
|
if blendMode.hasSimdBlender():
|
||||||
# When supported, SIMD blend as much as possible
|
# When supported, SIMD blend as much as possible
|
||||||
|
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
|
if blendMode == bmNormal:
|
||||||
|
# For path filling, bmNormal is almost always used.
|
||||||
|
# Inline SIMD is faster here.
|
||||||
|
for _ in 0 ..< fillLen div 4:
|
||||||
let
|
let
|
||||||
blenderSimd = blendMode.blenderSimd()
|
index = image.dataIndex(x, y)
|
||||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
backdrop = mm_loadu_si128(image.data[index].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
image.data[index].addr,
|
||||||
|
blendNormalInlineSimd(backdrop, colorVec)
|
||||||
|
)
|
||||||
|
x += 4
|
||||||
|
else:
|
||||||
|
let blenderSimd = blendMode.blenderSimd()
|
||||||
for _ in 0 ..< fillLen div 4:
|
for _ in 0 ..< fillLen div 4:
|
||||||
let
|
let
|
||||||
index = image.dataIndex(x, y)
|
index = image.dataIndex(x, y)
|
||||||
|
|
Loading…
Reference in a new issue