Merge pull request #108 from guzba/master

faster paths.nim
This commit is contained in:
treeform 2021-02-13 19:26:24 -08:00 committed by GitHub
commit 926d52cc6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 101 additions and 115 deletions

View file

@ -1,4 +1,4 @@
import cairo, math, benchy, pixie, pixie/paths, chroma import cairo, math, benchy, pixie, chroma
var var
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
@ -18,7 +18,7 @@ timeIt "cairo":
ctx.fill() ctx.fill()
surface.flush() surface.flush()
discard surface.writeToPng("cairo.png") # discard surface.writeToPng("cairo.png")
var a = newImage(1000, 1000) var a = newImage(1000, 1000)
a.fill(rgba(0, 0, 0, 255)) a.fill(rgba(0, 0, 0, 255))
@ -32,4 +32,4 @@ timeIt "pixie":
p.closePath() p.closePath()
a.fillPath(p, rgba(0, 0, 255, 255)) a.fillPath(p, rgba(0, 0, 255, 255))
discard surface.writeToPng("pixie.png") # a.writeFile("pixie.png")

View file

@ -516,7 +516,32 @@ when defined(amd64) and not defined(pixieNoSimd):
result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k)) result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k))
result = mm_and_si128(result, first32) result = mm_and_si128(result, first32)
proc blendNormalSimd*(backdrop, source: M128i): M128i = proc unpackAlphaValues*(v: M128i): M128i {.inline.} =
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
let
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) # Only `a`
result = mm_shuffle_epi32(v, MM_SHUFFLE(0, 0, 0, 0))
var
i = mm_and_si128(result, first32)
j = mm_and_si128(result, mm_slli_si128(first32, 4))
k = mm_and_si128(result, mm_slli_si128(first32, 8))
l = mm_and_si128(result, mm_slli_si128(first32, 12))
# Shift the values to `a`
i = mm_slli_si128(i, 3)
j = mm_slli_si128(j, 2)
k = mm_slli_si128(k, 1)
# l = mm_slli_si128(l, 0)
result = mm_and_si128(
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)),
alphaMask
)
proc blendNormalSimd(backdrop, source: M128i): M128i =
let let
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
@ -545,7 +570,7 @@ when defined(amd64) and not defined(pixieNoSimd):
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
) )
proc blendMaskSimd*(backdrop, source: M128i): M128i = proc blendMaskSimd(backdrop, source: M128i): M128i =
let let
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
@ -566,7 +591,7 @@ when defined(amd64) and not defined(pixieNoSimd):
mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
proc blendOverwriteSimd*(backdrop, source: M128i): M128i = proc blendOverwriteSimd(backdrop, source: M128i): M128i =
source source
proc blenderSimd*(blendMode: BlendMode): BlenderSimd = proc blenderSimd*(blendMode: BlendMode): BlenderSimd =
@ -580,7 +605,7 @@ when defined(amd64) and not defined(pixieNoSimd):
proc hasSimdBlender*(blendMode: BlendMode): bool = proc hasSimdBlender*(blendMode: BlendMode): bool =
blendMode in {bmNormal, bmMask, bmOverwrite} blendMode in {bmNormal, bmMask, bmOverwrite}
proc maskNormalSimd*(backdrop, source: M128i): M128i = proc maskNormalSimd(backdrop, source: M128i): M128i =
## Blending masks ## Blending masks
let let
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
@ -615,11 +640,9 @@ when defined(amd64) and not defined(pixieNoSimd):
blendedEven = mm_add_epi16(sourceEven, backdropEven) blendedEven = mm_add_epi16(sourceEven, backdropEven)
blendedOdd = mm_add_epi16(sourceOdd, backdropOdd) blendedOdd = mm_add_epi16(sourceOdd, backdropOdd)
blendedOdd = mm_slli_epi16(blendedOdd, 8) mm_or_si128(blendedEven, mm_slli_epi16(blendedOdd, 8))
mm_or_si128(blendedEven, blendedOdd) proc maskMaskSimd(backdrop, source: M128i): M128i =
proc maskMaskSimd*(backdrop, source: M128i): M128i =
let let
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
@ -638,9 +661,7 @@ when defined(amd64) and not defined(pixieNoSimd):
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)
backdropOdd = mm_slli_epi16(backdropOdd, 8) mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8))
mm_or_si128(backdropEven, backdropOdd)
proc maskerSimd*(blendMode: BlendMode): MaskerSimd = proc maskerSimd*(blendMode: BlendMode): MaskerSimd =
case blendMode: case blendMode:

View file

@ -686,10 +686,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
# Check we are not rotated before using SIMD blends # Check we are not rotated before using SIMD blends
when type(a) is Image: when type(a) is Image:
if blendMode.hasSimdBlender(): if blendMode.hasSimdBlender():
let let blenderSimd = blendMode.blenderSimd()
blenderSimd = blendMode.blenderSimd()
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) # Only `a`
for _ in countup(x, xMax - 4, 4): for _ in countup(x, xMax - 4, 4):
let let
srcPos = p + dx * x.float32 + dy * y.float32 srcPos = p + dx * x.float32 + dy * y.float32
@ -701,24 +698,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
else: # b is a Mask else: # b is a Mask
# Need to move 4 mask values into the alpha slots # Need to move 4 mask values into the alpha slots
var source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr) var source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
source = mm_slli_si128(source, 2) source = unpackAlphaValues(source)
source = mm_shuffle_epi32(source, MM_SHUFFLE(1, 1, 0, 0))
var
i = mm_and_si128(source, first32)
j = mm_and_si128(source, mm_slli_si128(first32, 4))
k = mm_and_si128(source, mm_slli_si128(first32, 8))
l = mm_and_si128(source, mm_slli_si128(first32, 12))
# Shift the values to `a`
i = mm_slli_si128(i, 1)
k = mm_slli_si128(k, 3)
l = mm_slli_si128(l, 2)
source = mm_and_si128(
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l)),
alphaMask
)
mm_storeu_si128( mm_storeu_si128(
a.data[a.dataIndex(x, y)].addr, a.data[a.dataIndex(x, y)].addr,

View file

@ -783,14 +783,14 @@ proc quickSort(a: var seq[(float32, int16)], inl, inr: int) =
quickSort(a, inl, r) quickSort(a, inl, r)
quickSort(a, l, inr) quickSort(a, l, inr)
proc computeBounds(seqs: varargs[seq[(Segment, int16)]]): Rect = proc computeBounds(partitions: seq[seq[(Segment, int16)]]): Rect =
var var
xMin = float32.high xMin = float32.high
xMax = float32.low xMax = float32.low
yMin = float32.high yMin = float32.high
yMax = float32.low yMax = float32.low
for s in seqs: for partition in partitions:
for (segment, _) in s: for (segment, _) in partition:
xMin = min(xMin, min(segment.at.x, segment.to.x)) xMin = min(xMin, min(segment.at.x, segment.to.x))
xMax = max(xMax, max(segment.at.x, segment.to.x)) xMax = max(xMax, max(segment.at.x, segment.to.x))
yMin = min(yMin, min(segment.at.y, segment.to.y)) yMin = min(yMin, min(segment.at.y, segment.to.y))
@ -813,11 +813,23 @@ proc shouldFill(windingRule: WindingRule, count: int): bool {.inline.} =
of wrEvenOdd: of wrEvenOdd:
count mod 2 != 0 count mod 2 != 0
proc partitionSegments(shapes: seq[seq[Vec2]], middle: int): tuple[ proc partitionSegments(
topHalf: seq[(Segment, int16)], shapes: seq[seq[Vec2]], height: int
bottomHalf: seq[(Segment, int16)], ): seq[seq[(Segment, int16)]] =
fullHeight: seq[(Segment, int16)] ## Puts segments into the height partitions they intersect with.
] =
var segmentCount: int
for shape in shapes:
segmentCount += shape.len - 1
let
maxPartitions = max(1, height div 10)
numPartitions = min(maxPartitions, max(1, segmentCount div 10))
result.setLen(numPartitions)
let partitionHeight = height div numPartitions
for shape in shapes: for shape in shapes:
for segment in shape.segments: for segment in shape.segments:
if segment.at.y == segment.to.y: # Skip horizontal if segment.at.y == segment.to.y: # Skip horizontal
@ -828,19 +840,22 @@ proc partitionSegments(shapes: seq[seq[Vec2]], middle: int): tuple[
if segment.at.y > segment.to.y: if segment.at.y > segment.to.y:
swap(segment.at, segment.to) swap(segment.at, segment.to)
winding = -1 winding = -1
if ceil(segment.to.y).int < middle:
result.topHalf.add((segment, winding)) if partitionHeight == 0:
elif segment.at.y.int >= middle: result[0].add((segment, winding))
result.bottomHalf.add((segment, winding))
else: else:
result.fullHeight.add((segment, winding)) let
atPartition = max(0, segment.at.y).int div partitionHeight
toPartition = max(0, ceil(segment.to.y)).int div partitionHeight
for i in min(atPartition, result.high) .. min(toPartition, result.high):
result[i].add((segment, winding))
proc computeCoverages( proc computeCoverages(
coverages: var seq[uint8], coverages: var seq[uint8],
hits: var seq[(float32, int16)], hits: var seq[(float32, int16)],
size: Vec2, size: Vec2,
y: int, y: int,
topHalf, bottomHalf, fullHeight: seq[(Segment, int16)], partitions: seq[seq[(Segment, int16)]],
windingRule: WindingRule windingRule: WindingRule
) = ) =
const const
@ -850,37 +865,30 @@ proc computeCoverages(
offset = 1 / quality.float32 offset = 1 / quality.float32
initialOffset = offset / 2 initialOffset = offset / 2
proc intersects(
scanline: Line,
segment: Segment,
winding: int16,
hits: var seq[(float32, int16)],
numHits: var int
) {.inline.} =
if segment.at.y <= scanline.a.y and segment.to.y >= scanline.a.y:
var at: Vec2
if scanline.intersects(segment, at):# and segment.to != at:
if numHits == hits.len:
hits.setLen(hits.len * 2)
hits[numHits] = (at.x.clamp(0, scanline.b.x), winding)
inc numHits
var numHits: int var numHits: int
let
partitionHeight = size.y.int div partitions.len
partition =
if partitionHeight == 0:
0
else:
min(y div partitionHeight, partitions.high)
# Do scanlines for this row # Do scanlines for this row
for m in 0 ..< quality: for m in 0 ..< quality:
let let
yLine = y.float32 + initialOffset + offset * m.float32 + ep yLine = y.float32 + initialOffset + offset * m.float32 + ep
scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine)) scanline = Line(a: vec2(0, yLine), b: vec2(size.x, yLine))
numHits = 0 numHits = 0
if y < size.y.int div 2: for (segment, winding) in partitions[partition]:
for (segment, winding) in topHalf: if segment.at.y <= scanline.a.y and segment.to.y >= scanline.a.y:
scanline.intersects(segment, winding, hits, numHits) var at: Vec2
else: if scanline.intersects(segment, at):# and segment.to != at:
for (segment, winding) in bottomHalf: if numHits == hits.len:
scanline.intersects(segment, winding, hits, numHits) hits.setLen(hits.len * 2)
for (segment, winding) in fullHeight: hits[numHits] = (at.x.clamp(0, scanline.b.x), winding)
scanline.intersects(segment, winding, hits, numHits) inc numHits
quickSort(hits, 0, numHits - 1) quickSort(hits, 0, numHits - 1)
@ -928,13 +936,12 @@ proc fillShapes(
windingRule: WindingRule, windingRule: WindingRule,
blendMode: BlendMode blendMode: BlendMode
) = ) =
let (topHalf, bottomHalf, fullHeight) = let partitions = partitionSegments(shapes, image.height)
partitionSegments(shapes, image.height div 2)
# Figure out the total bounds of all the shapes, # Figure out the total bounds of all the shapes,
# rasterize only within the total bounds # rasterize only within the total bounds
let let
bounds = computeBounds(topHalf, bottomHalf, fullHeight) bounds = computeBounds(partitions)
startX = max(0, bounds.x.int) startX = max(0, bounds.x.int)
startY = max(0, bounds.y.int) startY = max(0, bounds.y.int)
stopY = min(image.height, (bounds.y + bounds.h).int) stopY = min(image.height, (bounds.y + bounds.h).int)
@ -956,7 +963,7 @@ proc fillShapes(
hits, hits,
image.wh, image.wh,
y, y,
topHalf, bottomHalf, fullHeight, partitions,
windingRule windingRule
) )
@ -966,13 +973,11 @@ proc fillShapes(
# When supported, SIMD blend as much as possible # When supported, SIMD blend as much as possible
let let
first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits first32 = cast[M128i]([uint32.high, 0, 0, 0]) # First 32 bits
redMask = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r`
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
v255 = mm_set1_epi32(255)
vColor = mm_set1_epi32(cast[int32](color)) vColor = mm_set1_epi32(cast[int32](color))
for _ in countup(x, coverages.len - 16, 16): for _ in countup(x, image.width - 16, 4):
var coverage = mm_loadu_si128(coverages[x].addr) var coverage = mm_loadu_si128(coverages[x].addr)
coverage = mm_and_si128(coverage, first32) coverage = mm_and_si128(coverage, first32)
@ -981,32 +986,11 @@ proc fillShapes(
# If the coverages are not all zero # If the coverages are not all zero
var source = vColor var source = vColor
coverage = mm_slli_si128(coverage, 2) if mm_movemask_epi8(mm_cmpeq_epi32(coverage, first32)) != 0xffff:
coverage = mm_shuffle_epi32(coverage, MM_SHUFFLE(1, 1, 0, 0))
var
a = mm_and_si128(coverage, first32)
b = mm_and_si128(coverage, mm_slli_si128(first32, 4))
c = mm_and_si128(coverage, mm_slli_si128(first32, 8))
d = mm_and_si128(coverage, mm_slli_si128(first32, 12))
# Shift the coverages to `r`
a = mm_srli_si128(a, 2)
b = mm_srli_si128(b, 3)
d = mm_srli_si128(d, 1)
coverage = mm_and_si128(
mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)),
redMask
)
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff:
# If the coverages are not all 255 # If the coverages are not all 255
coverage = unpackAlphaValues(coverage)
# Shift the coverages from `r` to `g` and `a` for multiplying later # Shift the coverages from `a` to `g` and `a` for multiplying
coverage = mm_or_si128( coverage = mm_or_si128(coverage, mm_srli_epi32(coverage, 16))
mm_slli_epi32(coverage, 8), mm_slli_epi32(coverage, 24)
)
var var
colorEven = mm_slli_epi16(source, 8) colorEven = mm_slli_epi16(source, 8)
@ -1052,13 +1036,12 @@ proc fillShapes(
shapes: seq[seq[Vec2]], shapes: seq[seq[Vec2]],
windingRule: WindingRule windingRule: WindingRule
) = ) =
let (topHalf, bottomHalf, fullHeight) = let partitions = partitionSegments(shapes, mask.height)
partitionSegments(shapes, mask.height div 2)
# Figure out the total bounds of all the shapes, # Figure out the total bounds of all the shapes,
# rasterize only within the total bounds # rasterize only within the total bounds
let let
bounds = computeBounds(topHalf, bottomHalf, fullHeight) bounds = computeBounds(partitions)
startX = max(0, bounds.x.int) startX = max(0, bounds.x.int)
startY = max(0, bounds.y.int) startY = max(0, bounds.y.int)
stopY = min(mask.height, (bounds.y + bounds.h).int) stopY = min(mask.height, (bounds.y + bounds.h).int)
@ -1067,6 +1050,10 @@ proc fillShapes(
coverages = newSeq[uint8](mask.width) coverages = newSeq[uint8](mask.width)
hits = newSeq[(float32, int16)](4) hits = newSeq[(float32, int16)](4)
when defined(amd64) and not defined(pixieNoSimd):
let maskerSimd = bmNormal.maskerSimd()
for y in startY ..< stopY: for y in startY ..< stopY:
# Reset buffer for this row # Reset buffer for this row
zeroMem(coverages[0].addr, coverages.len) zeroMem(coverages[0].addr, coverages.len)
@ -1076,7 +1063,7 @@ proc fillShapes(
hits, hits,
mask.wh, mask.wh,
y, y,
topHalf, bottomHalf, fullHeight, partitions,
windingRule windingRule
) )
@ -1085,18 +1072,16 @@ proc fillShapes(
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD blend as much as possible # When supported, SIMD blend as much as possible
for _ in countup(x, coverages.len - 16, 16): for _ in countup(x, coverages.len - 16, 16):
var coverage = mm_loadu_si128(coverages[x].addr) let
coverage = mm_loadu_si128(coverages[x].addr)
let eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff: if mm_movemask_epi8(eqZero) != 0xffff:
# If the coverages are not all zero # If the coverages are not all zero
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
mm_storeu_si128( mm_storeu_si128(
mask.data[mask.dataIndex(x, y)].addr, mask.data[mask.dataIndex(x, y)].addr,
maskNormalSimd(backdrop, coverage) maskerSimd(backdrop, coverage)
) )
x += 16 x += 16
while x < mask.width: while x < mask.width: