commit
926d52cc6b
4 changed files with 101 additions and 115 deletions
|
@ -1,4 +1,4 @@
|
||||||
import cairo, math, benchy, pixie, pixie/paths, chroma
|
import cairo, math, benchy, pixie, chroma
|
||||||
|
|
||||||
var
|
var
|
||||||
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
|
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
|
||||||
|
@ -18,7 +18,7 @@ timeIt "cairo":
|
||||||
ctx.fill()
|
ctx.fill()
|
||||||
surface.flush()
|
surface.flush()
|
||||||
|
|
||||||
discard surface.writeToPng("cairo.png")
|
# discard surface.writeToPng("cairo.png")
|
||||||
|
|
||||||
var a = newImage(1000, 1000)
|
var a = newImage(1000, 1000)
|
||||||
a.fill(rgba(0, 0, 0, 255))
|
a.fill(rgba(0, 0, 0, 255))
|
||||||
|
@ -32,4 +32,4 @@ timeIt "pixie":
|
||||||
p.closePath()
|
p.closePath()
|
||||||
a.fillPath(p, rgba(0, 0, 255, 255))
|
a.fillPath(p, rgba(0, 0, 255, 255))
|
||||||
|
|
||||||
discard surface.writeToPng("pixie.png")
|
# a.writeFile("pixie.png")
|
||||||
|
|
|
@ -516,7 +516,32 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k))
|
result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k))
|
||||||
result = mm_and_si128(result, first32)
|
result = mm_and_si128(result, first32)
|
||||||
|
|
||||||
proc blendNormalSimd*(backdrop, source: M128i): M128i =
|
proc unpackAlphaValues*(v: M128i): M128i {.inline.} =
|
||||||
|
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
|
||||||
|
let
|
||||||
|
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) # Only `a`
|
||||||
|
|
||||||
|
result = mm_shuffle_epi32(v, MM_SHUFFLE(0, 0, 0, 0))
|
||||||
|
|
||||||
|
var
|
||||||
|
i = mm_and_si128(result, first32)
|
||||||
|
j = mm_and_si128(result, mm_slli_si128(first32, 4))
|
||||||
|
k = mm_and_si128(result, mm_slli_si128(first32, 8))
|
||||||
|
l = mm_and_si128(result, mm_slli_si128(first32, 12))
|
||||||
|
|
||||||
|
# Shift the values to `a`
|
||||||
|
i = mm_slli_si128(i, 3)
|
||||||
|
j = mm_slli_si128(j, 2)
|
||||||
|
k = mm_slli_si128(k, 1)
|
||||||
|
# l = mm_slli_si128(l, 0)
|
||||||
|
|
||||||
|
result = mm_and_si128(
|
||||||
|
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)),
|
||||||
|
alphaMask
|
||||||
|
)
|
||||||
|
|
||||||
|
proc blendNormalSimd(backdrop, source: M128i): M128i =
|
||||||
let
|
let
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -545,7 +570,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
)
|
)
|
||||||
|
|
||||||
proc blendMaskSimd*(backdrop, source: M128i): M128i =
|
proc blendMaskSimd(backdrop, source: M128i): M128i =
|
||||||
let
|
let
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -566,7 +591,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
|
||||||
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
proc blendOverwriteSimd*(backdrop, source: M128i): M128i =
|
proc blendOverwriteSimd(backdrop, source: M128i): M128i =
|
||||||
source
|
source
|
||||||
|
|
||||||
proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
|
proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
|
||||||
|
@ -580,7 +605,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
proc hasSimdBlender*(blendMode: BlendMode): bool =
|
proc hasSimdBlender*(blendMode: BlendMode): bool =
|
||||||
blendMode in {bmNormal, bmMask, bmOverwrite}
|
blendMode in {bmNormal, bmMask, bmOverwrite}
|
||||||
|
|
||||||
proc maskNormalSimd*(backdrop, source: M128i): M128i =
|
proc maskNormalSimd(backdrop, source: M128i): M128i =
|
||||||
## Blending masks
|
## Blending masks
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
|
@ -615,11 +640,9 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
blendedEven = mm_add_epi16(sourceEven, backdropEven)
|
blendedEven = mm_add_epi16(sourceEven, backdropEven)
|
||||||
blendedOdd = mm_add_epi16(sourceOdd, backdropOdd)
|
blendedOdd = mm_add_epi16(sourceOdd, backdropOdd)
|
||||||
|
|
||||||
blendedOdd = mm_slli_epi16(blendedOdd, 8)
|
mm_or_si128(blendedEven, mm_slli_epi16(blendedOdd, 8))
|
||||||
|
|
||||||
mm_or_si128(blendedEven, blendedOdd)
|
proc maskMaskSimd(backdrop, source: M128i): M128i =
|
||||||
|
|
||||||
proc maskMaskSimd*(backdrop, source: M128i): M128i =
|
|
||||||
let
|
let
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
|
@ -638,9 +661,7 @@ when defined(amd64) and not defined(pixieNoSimd):
|
||||||
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
|
||||||
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
|
||||||
|
|
||||||
backdropOdd = mm_slli_epi16(backdropOdd, 8)
|
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
|
||||||
|
|
||||||
mm_or_si128(backdropEven, backdropOdd)
|
|
||||||
|
|
||||||
proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
|
proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
|
||||||
case blendMode:
|
case blendMode:
|
||||||
|
|
|
@ -686,10 +686,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
|
||||||
# Check we are not rotated before using SIMD blends
|
# Check we are not rotated before using SIMD blends
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
if blendMode.hasSimdBlender():
|
if blendMode.hasSimdBlender():
|
||||||
let
|
let blenderSimd = blendMode.blenderSimd()
|
||||||
blenderSimd = blendMode.blenderSimd()
|
|
||||||
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) # Only `a`
|
|
||||||
for _ in countup(x, xMax - 4, 4):
|
for _ in countup(x, xMax - 4, 4):
|
||||||
let
|
let
|
||||||
srcPos = p + dx * x.float32 + dy * y.float32
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
@ -701,24 +698,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
|
||||||
else: # b is a Mask
|
else: # b is a Mask
|
||||||
# Need to move 4 mask values into the alpha slots
|
# Need to move 4 mask values into the alpha slots
|
||||||
var source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
var source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
source = mm_slli_si128(source, 2)
|
source = unpackAlphaValues(source)
|
||||||
source = mm_shuffle_epi32(source, MM_SHUFFLE(1, 1, 0, 0))
|
|
||||||
|
|
||||||
var
|
|
||||||
i = mm_and_si128(source, first32)
|
|
||||||
j = mm_and_si128(source, mm_slli_si128(first32, 4))
|
|
||||||
k = mm_and_si128(source, mm_slli_si128(first32, 8))
|
|
||||||
l = mm_and_si128(source, mm_slli_si128(first32, 12))
|
|
||||||
|
|
||||||
# Shift the values to `a`
|
|
||||||
i = mm_slli_si128(i, 1)
|
|
||||||
k = mm_slli_si128(k, 3)
|
|
||||||
l = mm_slli_si128(l, 2)
|
|
||||||
|
|
||||||
source = mm_and_si128(
|
|
||||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)),
|
|
||||||
alphaMask
|
|
||||||
)
|
|
||||||
|
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
a.data[a.dataIndex(x, y)].addr,
|
a.data[a.dataIndex(x, y)].addr,
|
||||||
|
|
|
@ -783,14 +783,14 @@ proc quickSort(a: var seq[(float32, int16)], inl, inr: int) =
|
||||||
quickSort(a, inl, r)
|
quickSort(a, inl, r)
|
||||||
quickSort(a, l, inr)
|
quickSort(a, l, inr)
|
||||||
|
|
||||||
proc computeBounds(seqs: varargs[seq[(Segment, int16)]]): Rect =
|
proc computeBounds(partitions: seq[seq[(Segment, int16)]]): Rect =
|
||||||
var
|
var
|
||||||
xMin = float32.high
|
xMin = float32.high
|
||||||
xMax = float32.low
|
xMax = float32.low
|
||||||
yMin = float32.high
|
yMin = float32.high
|
||||||
yMax = float32.low
|
yMax = float32.low
|
||||||
for s in seqs:
|
for partition in partitions:
|
||||||
for (segment, _) in s:
|
for (segment, _) in partition:
|
||||||
xMin = min(xMin, min(segment.at.x, segment.to.x))
|
xMin = min(xMin, min(segment.at.x, segment.to.x))
|
||||||
xMax = max(xMax, max(segment.at.x, segment.to.x))
|
xMax = max(xMax, max(segment.at.x, segment.to.x))
|
||||||
yMin = min(yMin, min(segment.at.y, segment.to.y))
|
yMin = min(yMin, min(segment.at.y, segment.to.y))
|
||||||
|
@ -813,11 +813,23 @@ proc shouldFill(windingRule: WindingRule, count: int): bool {.inline.} =
|
||||||
of wrEvenOdd:
|
of wrEvenOdd:
|
||||||
count mod 2 != 0
|
count mod 2 != 0
|
||||||
|
|
||||||
proc partitionSegments(shapes: seq[seq[Vec2]], middle: int): tuple[
|
proc partitionSegments(
|
||||||
topHalf: seq[(Segment, int16)],
|
shapes: seq[seq[Vec2]], height: int
|
||||||
bottomHalf: seq[(Segment, int16)],
|
): seq[seq[(Segment, int16)]] =
|
||||||
fullHeight: seq[(Segment, int16)]
|
## Puts segments into the height partitions they intersect with.
|
||||||
] =
|
|
||||||
|
var segmentCount: int
|
||||||
|
for shape in shapes:
|
||||||
|
segmentCount += shape.len - 1
|
||||||
|
|
||||||
|
let
|
||||||
|
maxPartitions = max(1, height div 10)
|
||||||
|
numPartitions = min(maxPartitions, max(1, segmentCount div 10))
|
||||||
|
|
||||||
|
result.setLen(numPartitions)
|
||||||
|
|
||||||
|
let partitionHeight = height div numPartitions
|
||||||
|
|
||||||
for shape in shapes:
|
for shape in shapes:
|
||||||
for segment in shape.segments:
|
for segment in shape.segments:
|
||||||
if segment.at.y == segment.to.y: # Skip horizontal
|
if segment.at.y == segment.to.y: # Skip horizontal
|
||||||
|
@ -828,19 +840,22 @@ proc partitionSegments(shapes: seq[seq[Vec2]], middle: int): tuple[
|
||||||
if segment.at.y > segment.to.y:
|
if segment.at.y > segment.to.y:
|
||||||
swap(segment.at, segment.to)
|
swap(segment.at, segment.to)
|
||||||
winding = -1
|
winding = -1
|
||||||
if ceil(segment.to.y).int < middle:
|
|
||||||
result.topHalf.add((segment, winding))
|
if partitionHeight == 0:
|
||||||
elif segment.at.y.int >= middle:
|
result[0].add((segment, winding))
|
||||||
result.bottomHalf.add((segment, winding))
|
|
||||||
else:
|
else:
|
||||||
result.fullHeight.add((segment, winding))
|
let
|
||||||
|
atPartition = max(0, segment.at.y).int div partitionHeight
|
||||||
|
toPartition = max(0, ceil(segment.to.y)).int div partitionHeight
|
||||||
|
for i in min(atPartition, result.high) .. min(toPartition, result.high):
|
||||||
|
result[i].add((segment, winding))
|
||||||
|
|
||||||
proc computeCoverages(
|
proc computeCoverages(
|
||||||
coverages: var seq[uint8],
|
coverages: var seq[uint8],
|
||||||
hits: var seq[(float32, int16)],
|
hits: var seq[(float32, int16)],
|
||||||
size: Vec2,
|
size: Vec2,
|
||||||
y: int,
|
y: int,
|
||||||
topHalf, bottomHalf, fullHeight: seq[(Segment, int16)],
|
partitions: seq[seq[(Segment, int16)]],
|
||||||
windingRule: WindingRule
|
windingRule: WindingRule
|
||||||
) =
|
) =
|
||||||
const
|
const
|
||||||
|
@ -850,37 +865,30 @@ proc computeCoverages(
|
||||||
offset = 1 / quality.float32
|
offset = 1 / quality.float32
|
||||||
initialOffset = offset / 2
|
initialOffset = offset / 2
|
||||||
|
|
||||||
proc intersects(
|
|
||||||
scanline: Line,
|
|
||||||
segment: Segment,
|
|
||||||
winding: int16,
|
|
||||||
hits: var seq[(float32, int16)],
|
|
||||||
numHits: var int
|
|
||||||
) {.inline.} =
|
|
||||||
if segment.at.y <= scanline.a.y and segment.to.y >= scanline.a.y:
|
|
||||||
var at: Vec2
|
|
||||||
if scanline.intersects(segment, at):# and segment.to != at:
|
|
||||||
if numHits == hits.len:
|
|
||||||
hits.setLen(hits.len * 2)
|
|
||||||
hits[numHits] = (at.x.clamp(0, scanline.b.x), winding)
|
|
||||||
inc numHits
|
|
||||||
|
|
||||||
var numHits: int
|
var numHits: int
|
||||||
|
|
||||||
|
let
|
||||||
|
partitionHeight = size.y.int div partitions.len
|
||||||
|
partition =
|
||||||
|
if partitionHeight == 0:
|
||||||
|
0
|
||||||
|
else:
|
||||||
|
min(y div partitionHeight, partitions.high)
|
||||||
|
|
||||||
# Do scanlines for this row
|
# Do scanlines for this row
|
||||||
for m in 0 ..< quality:
|
for m in 0 ..< quality:
|
||||||
let
|
let
|
||||||
yLine = y.float32 + initialOffset + offset * m.float32 + ep
|
yLine = y.float32 + initialOffset + offset * m.float32 + ep
|
||||||
scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine))
|
scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine))
|
||||||
numHits = 0
|
numHits = 0
|
||||||
if y < size.y.int div 2:
|
for (segment, winding) in partitions[partition]:
|
||||||
for (segment, winding) in topHalf:
|
if segment.at.y <= scanline.a.y and segment.to.y >= scanline.a.y:
|
||||||
scanline.intersects(segment, winding, hits, numHits)
|
var at: Vec2
|
||||||
else:
|
if scanline.intersects(segment, at):# and segment.to != at:
|
||||||
for (segment, winding) in bottomHalf:
|
if numHits == hits.len:
|
||||||
scanline.intersects(segment, winding, hits, numHits)
|
hits.setLen(hits.len * 2)
|
||||||
for (segment, winding) in fullHeight:
|
hits[numHits] = (at.x.clamp(0, scanline.b.x), winding)
|
||||||
scanline.intersects(segment, winding, hits, numHits)
|
inc numHits
|
||||||
|
|
||||||
quickSort(hits, 0, numHits - 1)
|
quickSort(hits, 0, numHits - 1)
|
||||||
|
|
||||||
|
@ -928,13 +936,12 @@ proc fillShapes(
|
||||||
windingRule: WindingRule,
|
windingRule: WindingRule,
|
||||||
blendMode: BlendMode
|
blendMode: BlendMode
|
||||||
) =
|
) =
|
||||||
let (topHalf, bottomHalf, fullHeight) =
|
let partitions = partitionSegments(shapes, image.height)
|
||||||
partitionSegments(shapes, image.height div 2)
|
|
||||||
|
|
||||||
# Figure out the total bounds of all the shapes,
|
# Figure out the total bounds of all the shapes,
|
||||||
# rasterize only within the total bounds
|
# rasterize only within the total bounds
|
||||||
let
|
let
|
||||||
bounds = computeBounds(topHalf, bottomHalf, fullHeight)
|
bounds = computeBounds(partitions)
|
||||||
startX = max(0, bounds.x.int)
|
startX = max(0, bounds.x.int)
|
||||||
startY = max(0, bounds.y.int)
|
startY = max(0, bounds.y.int)
|
||||||
stopY = min(image.height, (bounds.y + bounds.h).int)
|
stopY = min(image.height, (bounds.y + bounds.h).int)
|
||||||
|
@ -956,7 +963,7 @@ proc fillShapes(
|
||||||
hits,
|
hits,
|
||||||
image.wh,
|
image.wh,
|
||||||
y,
|
y,
|
||||||
topHalf, bottomHalf, fullHeight,
|
partitions,
|
||||||
windingRule
|
windingRule
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -966,13 +973,11 @@ proc fillShapes(
|
||||||
# When supported, SIMD blend as much as possible
|
# When supported, SIMD blend as much as possible
|
||||||
let
|
let
|
||||||
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
|
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
|
||||||
redMask = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r`
|
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
div255 = mm_set1_epi16(cast[int16](0x8081))
|
div255 = mm_set1_epi16(cast[int16](0x8081))
|
||||||
v255 = mm_set1_epi32(255)
|
|
||||||
vColor = mm_set1_epi32(cast[int32](color))
|
vColor = mm_set1_epi32(cast[int32](color))
|
||||||
|
|
||||||
for _ in countup(x, coverages.len - 16, 16):
|
for _ in countup(x, image.width - 16, 4):
|
||||||
var coverage = mm_loadu_si128(coverages[x].addr)
|
var coverage = mm_loadu_si128(coverages[x].addr)
|
||||||
coverage = mm_and_si128(coverage, first32)
|
coverage = mm_and_si128(coverage, first32)
|
||||||
|
|
||||||
|
@ -981,32 +986,11 @@ proc fillShapes(
|
||||||
# If the coverages are not all zero
|
# If the coverages are not all zero
|
||||||
var source = vColor
|
var source = vColor
|
||||||
|
|
||||||
coverage = mm_slli_si128(coverage, 2)
|
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) != 0xffff:
|
||||||
coverage = mm_shuffle_epi32(coverage, MM_SHUFFLE(1, 1, 0, 0))
|
|
||||||
|
|
||||||
var
|
|
||||||
a = mm_and_si128(coverage, first32)
|
|
||||||
b = mm_and_si128(coverage, mm_slli_si128(first32, 4))
|
|
||||||
c = mm_and_si128(coverage, mm_slli_si128(first32, 8))
|
|
||||||
d = mm_and_si128(coverage, mm_slli_si128(first32, 12))
|
|
||||||
|
|
||||||
# Shift the coverages to `r`
|
|
||||||
a = mm_srli_si128(a, 2)
|
|
||||||
b = mm_srli_si128(b, 3)
|
|
||||||
d = mm_srli_si128(d, 1)
|
|
||||||
|
|
||||||
coverage = mm_and_si128(
|
|
||||||
mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)),
|
|
||||||
redMask
|
|
||||||
)
|
|
||||||
|
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff:
|
|
||||||
# If the coverages are not all 255
|
# If the coverages are not all 255
|
||||||
|
coverage = unpackAlphaValues(coverage)
|
||||||
# Shift the coverages from `r` to `g` and `a` for multiplying later
|
# Shift the coverages from `a` to `g` and `a` for multiplying
|
||||||
coverage = mm_or_si128(
|
coverage = mm_or_si128(coverage, mm_srli_epi32(coverage, 16))
|
||||||
mm_slli_epi32(coverage, 8), mm_slli_epi32(coverage, 24)
|
|
||||||
)
|
|
||||||
|
|
||||||
var
|
var
|
||||||
colorEven = mm_slli_epi16(source, 8)
|
colorEven = mm_slli_epi16(source, 8)
|
||||||
|
@ -1052,13 +1036,12 @@ proc fillShapes(
|
||||||
shapes: seq[seq[Vec2]],
|
shapes: seq[seq[Vec2]],
|
||||||
windingRule: WindingRule
|
windingRule: WindingRule
|
||||||
) =
|
) =
|
||||||
let (topHalf, bottomHalf, fullHeight) =
|
let partitions = partitionSegments(shapes, mask.height)
|
||||||
partitionSegments(shapes, mask.height div 2)
|
|
||||||
|
|
||||||
# Figure out the total bounds of all the shapes,
|
# Figure out the total bounds of all the shapes,
|
||||||
# rasterize only within the total bounds
|
# rasterize only within the total bounds
|
||||||
let
|
let
|
||||||
bounds = computeBounds(topHalf, bottomHalf, fullHeight)
|
bounds = computeBounds(partitions)
|
||||||
startX = max(0, bounds.x.int)
|
startX = max(0, bounds.x.int)
|
||||||
startY = max(0, bounds.y.int)
|
startY = max(0, bounds.y.int)
|
||||||
stopY = min(mask.height, (bounds.y + bounds.h).int)
|
stopY = min(mask.height, (bounds.y + bounds.h).int)
|
||||||
|
@ -1067,6 +1050,10 @@ proc fillShapes(
|
||||||
coverages = newSeq[uint8](mask.width)
|
coverages = newSeq[uint8](mask.width)
|
||||||
hits = newSeq[(float32, int16)](4)
|
hits = newSeq[(float32, int16)](4)
|
||||||
|
|
||||||
|
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
let maskerSimd = bmNormal.maskerSimd()
|
||||||
|
|
||||||
for y in startY ..< stopY:
|
for y in startY ..< stopY:
|
||||||
# Reset buffer for this row
|
# Reset buffer for this row
|
||||||
zeroMem(coverages[0].addr, coverages.len)
|
zeroMem(coverages[0].addr, coverages.len)
|
||||||
|
@ -1076,7 +1063,7 @@ proc fillShapes(
|
||||||
hits,
|
hits,
|
||||||
mask.wh,
|
mask.wh,
|
||||||
y,
|
y,
|
||||||
topHalf, bottomHalf, fullHeight,
|
partitions,
|
||||||
windingRule
|
windingRule
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1085,18 +1072,16 @@ proc fillShapes(
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
# When supported, SIMD blend as much as possible
|
# When supported, SIMD blend as much as possible
|
||||||
for _ in countup(x, coverages.len - 16, 16):
|
for _ in countup(x, coverages.len - 16, 16):
|
||||||
var coverage = mm_loadu_si128(coverages[x].addr)
|
let
|
||||||
|
coverage = mm_loadu_si128(coverages[x].addr)
|
||||||
let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
|
||||||
if mm_movemask_epi8(eqZero) != 0xffff:
|
if mm_movemask_epi8(eqZero) != 0xffff:
|
||||||
# If the coverages are not all zero
|
# If the coverages are not all zero
|
||||||
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
||||||
|
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
mask.data[mask.dataIndex(x, y)].addr,
|
mask.data[mask.dataIndex(x, y)].addr,
|
||||||
maskNormalSimd(backdrop, coverage)
|
maskerSimd(backdrop, coverage)
|
||||||
)
|
)
|
||||||
|
|
||||||
x += 16
|
x += 16
|
||||||
|
|
||||||
while x < mask.width:
|
while x < mask.width:
|
||||||
|
|
Loading…
Reference in a new issue