Merge pull request #237 from guzba/master

fill paths faster, random stuff
This commit is contained in:
treeform 2021-06-26 22:00:53 -07:00 committed by GitHub
commit 56b7361f65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 113 additions and 68 deletions

View file

@ -1,7 +1,7 @@
import benchy, cairo, chroma, math, pixie import benchy, cairo, chroma, math, pixie
block: block:
var let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create() ctx = surface.create()
@ -19,7 +19,7 @@ block:
# discard surface.writeToPng("cairo1.png") # discard surface.writeToPng("cairo1.png")
var a = newImage(1920, 1080) let a = newImage(1920, 1080)
a.fill(rgba(255, 255, 255, 255)) a.fill(rgba(255, 255, 255, 255))
timeIt "pixie1": timeIt "pixie1":
@ -34,7 +34,7 @@ block:
# a.writeFile("pixie1.png") # a.writeFile("pixie1.png")
block: block:
var let
surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080) surface = imageSurfaceCreate(FORMAT_ARGB32, 1920, 1080)
ctx = surface.create() ctx = surface.create()
@ -52,7 +52,7 @@ block:
# discard surface.writeToPng("cairo2.png") # discard surface.writeToPng("cairo2.png")
var a = newImage(1920, 1080) let a = newImage(1920, 1080)
a.fill(rgba(255, 255, 255, 255)) a.fill(rgba(255, 255, 255, 255))
timeIt "pixie2": timeIt "pixie2":
@ -65,3 +65,32 @@ block:
a.fillPath(p, rgba(0, 0, 255, 255)) a.fillPath(p, rgba(0, 0, 255, 255))
# a.writeFile("pixie2.png") # a.writeFile("pixie2.png")
block:
let
a = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)
b = imageSurfaceCreate(FORMAT_ARGB32, 500, 500)
ac = a.create()
bc = b.create()
ac.setSourceRgba(1, 0, 0, 1)
ac.newPath()
ac.rectangle(0, 0, 1000, 1000)
ac.fill()
bc.setSourceRgba(0, 1, 0, 1)
bc.newPath()
bc.rectangle(0, 0, 500, 500)
bc.fill()
let pattern = patternCreateForSurface(b)
timeIt "a":
ac.setSource(pattern)
ac.save()
ac.translate(25.2, 25.2)
ac.rectangle(0, 0, 500, 500)
ac.fill()
ac.restore()
discard a.writeToPng("a.png")

View file

@ -158,7 +158,7 @@ proc stroke(ctx: Context, image: Image, path: Path) =
ctx.layer.applyOpacity(ctx.globalAlpha) ctx.layer.applyOpacity(ctx.globalAlpha)
ctx.restore() ctx.restore()
proc fillText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} = proc fillText(ctx: Context, image: Image, text: string, at: Vec2) =
if ctx.font.typeface == nil: if ctx.font.typeface == nil:
raise newException(PixieError, "No font has been set on this Context") raise newException(PixieError, "No font has been set on this Context")
@ -185,7 +185,7 @@ proc fillText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} =
ctx.layer.applyOpacity(ctx.globalAlpha) ctx.layer.applyOpacity(ctx.globalAlpha)
ctx.restore() ctx.restore()
proc strokeText(ctx: Context, image: Image, text: string, at: Vec2) {.inline.} = proc strokeText(ctx: Context, image: Image, text: string, at: Vec2) =
if ctx.font.typeface == nil: if ctx.font.typeface == nil:
raise newException(PixieError, "No font has been set on this Context") raise newException(PixieError, "No font has been set on this Context")
@ -293,7 +293,7 @@ proc ellipse*(ctx: Context, x, y, rx, ry: float32) {.inline.} =
## Adds an ellipse to the current sub-path. ## Adds an ellipse to the current sub-path.
ctx.path.ellipse(x, y, rx, ry) ctx.path.ellipse(x, y, rx, ry)
proc fill*(ctx: Context, path: Path, windingRule = wrNonZero) {.inline.} = proc fill*(ctx: Context, path: Path, windingRule = wrNonZero) =
## Fills the path with the current fillStyle. ## Fills the path with the current fillStyle.
if ctx.mask != nil and ctx.layer == nil: if ctx.mask != nil and ctx.layer == nil:
ctx.saveLayer() ctx.saveLayer()
@ -308,7 +308,7 @@ proc fill*(ctx: Context, windingRule = wrNonZero) {.inline.} =
## Fills the current path with the current fillStyle. ## Fills the current path with the current fillStyle.
ctx.fill(ctx.path, windingRule) ctx.fill(ctx.path, windingRule)
proc clip*(ctx: Context, path: Path, windingRule = wrNonZero) {.inline.} = proc clip*(ctx: Context, path: Path, windingRule = wrNonZero) =
## Turns the path into the current clipping region. The previous clipping ## Turns the path into the current clipping region. The previous clipping
## region, if any, is intersected with the current or given path to create ## region, if any, is intersected with the current or given path to create
## the new clipping region. ## the new clipping region.
@ -326,7 +326,7 @@ proc clip*(ctx: Context, windingRule = wrNonZero) {.inline.} =
## to create the new clipping region. ## to create the new clipping region.
ctx.clip(ctx.path, windingRule) ctx.clip(ctx.path, windingRule)
proc stroke*(ctx: Context, path: Path) {.inline.} = proc stroke*(ctx: Context, path: Path) =
## Strokes (outlines) the current or given path with the current strokeStyle. ## Strokes (outlines) the current or given path with the current strokeStyle.
if ctx.mask != nil and ctx.layer == nil: if ctx.mask != nil and ctx.layer == nil:
ctx.saveLayer() ctx.saveLayer()

View file

@ -29,8 +29,7 @@ proc newImage*(mask: Mask): Image =
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
for _ in countup(0, mask.data.len - 16, 4): for _ in countup(0, mask.data.len - 16, 4):
let values = mm_loadu_si128(mask.data[i].addr) var alphas = unpackAlphaValues(mm_loadu_si128(mask.data[i].addr))
var alphas = unpackAlphaValues(values)
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8)) alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8))
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16)) alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16))
mm_storeu_si128(result.data[i].addr, alphas) mm_storeu_si128(result.data[i].addr, alphas)
@ -517,16 +516,12 @@ proc getRgbaSmooth*(image: Image, x, y: float32, wrapped = false): ColorRGBX =
## Gets a interpolated color with float point coordinates. ## Gets a interpolated color with float point coordinates.
## Pixes outside the image are transparent. ## Pixes outside the image are transparent.
let let
minX = floor(x) x0 = x.int
minY = floor(y) y0 = y.int
diffX = x - minX x1 = x0 + 1
diffY = y - minY y1 = y0 + 1
x = minX.int xFractional = x.fractional
y = minY.int yFractional = y.fractional
x0 = (x + 0)
y0 = (y + 0)
x1 = (x + 1)
y1 = (y + 1)
var x0y0, x1y0, x0y1, x1y1: ColorRGBX var x0y0, x1y0, x0y1, x1y1: ColorRGBX
if wrapped: if wrapped:
@ -541,10 +536,10 @@ proc getRgbaSmooth*(image: Image, x, y: float32, wrapped = false): ColorRGBX =
x1y1 = image[x1, y1] x1y1 = image[x1, y1]
let let
bottomMix = lerp(x0y0, x1y0, diffX) topMix = lerp(x0y0, x1y0, xFractional)
topMix = lerp(x0y1, x1y1, diffX) bottomMix = lerp(x0y1, x1y1, xFractional)
lerp(bottomMix, topMix, diffY) lerp(topMix, bottomMix, yFractional)
proc drawCorrect( proc drawCorrect(
a, b: Image | Mask, mat = mat3(), tiled = false, blendMode = bmNormal a, b: Image | Mask, mat = mat3(), tiled = false, blendMode = bmNormal
@ -709,7 +704,7 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
else: else:
var x = xMin var x = xMin
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
if dx.x == 1 and dx.y == 0 and dy.x == 0 and dy.y == 1: if dx == vec2(1, 0) and dy == vec2(0, 1):
# Check we are not rotated before using SIMD blends # Check we are not rotated before using SIMD blends
when type(a) is Image: when type(a) is Image:
if blendMode.hasSimdBlender(): if blendMode.hasSimdBlender():
@ -732,7 +727,6 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) =
blenderSimd(backdrop, source) blenderSimd(backdrop, source)
) )
x += 4 x += 4
else: # is a Mask else: # is a Mask
if blendMode.hasSimdMasker(): if blendMode.hasSimdMasker():
let maskerSimd = blendMode.maskerSimd() let maskerSimd = blendMode.maskerSimd()

View file

@ -81,7 +81,9 @@ proc minifyBy2*(mask: Mask, power = 1): Mask =
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
let let
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
first8 = cast[M128i]([uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) firstByte = cast[M128i](
[uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
)
for _ in countup(0, result.width - 16, 8): for _ in countup(0, result.width - 16, 8):
let let
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr) top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
@ -114,14 +116,14 @@ proc minifyBy2*(mask: Mask, power = 1): Mask =
# merged has the correct values in the even indices # merged has the correct values in the even indices
a = mm_and_si128(merged, first8) a = mm_and_si128(merged, firstByte)
b = mm_and_si128(mm_srli_si128(merged, 2), first8) b = mm_and_si128(mm_srli_si128(merged, 2), firstByte)
c = mm_and_si128(mm_srli_si128(merged, 4), first8) c = mm_and_si128(mm_srli_si128(merged, 4), firstByte)
d = mm_and_si128(mm_srli_si128(merged, 6), first8) d = mm_and_si128(mm_srli_si128(merged, 6), firstByte)
e = mm_and_si128(mm_srli_si128(merged, 8), first8) e = mm_and_si128(mm_srli_si128(merged, 8), firstByte)
f = mm_and_si128(mm_srli_si128(merged, 10), first8) f = mm_and_si128(mm_srli_si128(merged, 10), firstByte)
g = mm_and_si128(mm_srli_si128(merged, 12), first8) g = mm_and_si128(mm_srli_si128(merged, 12), firstByte)
h = mm_and_si128(mm_srli_si128(merged, 14), first8) h = mm_and_si128(mm_srli_si128(merged, 14), firstByte)
ab = mm_or_si128(a, mm_slli_si128(b, 1)) ab = mm_or_si128(a, mm_slli_si128(b, 1))
cd = mm_or_si128(c, mm_slli_si128(d, 1)) cd = mm_or_si128(c, mm_slli_si128(d, 1))
@ -159,22 +161,22 @@ proc fill*(mask: Mask, value: uint8) {.inline.} =
proc getValueSmooth*(mask: Mask, x, y: float32): uint8 = proc getValueSmooth*(mask: Mask, x, y: float32): uint8 =
## Gets a interpolated value with float point coordinates. ## Gets a interpolated value with float point coordinates.
let let
minX = floor(x) x0 = x.int
minY = floor(y) y0 = y.int
diffX = x - minX x1 = x0 + 1
diffY = y - minY y1 = y0 + 1
x = minX.int xFractional = x.fractional
y = minY.int yFractional = y.fractional
x0y0 = mask[x + 0, y + 0] x0y0 = mask[x0, y0]
x1y0 = mask[x + 1, y + 0] x1y0 = mask[x1, y0]
x0y1 = mask[x + 0, y + 1] x0y1 = mask[x0, y1]
x1y1 = mask[x + 1, y + 1] x1y1 = mask[x1, y1]
bottomMix = lerp(x0y0, x1y0, diffX) topMix = lerp(x0y0, x1y0, xFractional)
topMix = lerp(x0y1, x1y1, diffX) bottomMix = lerp(x0y1, x1y1, xFractional)
lerp(bottomMix, topMix, diffY) lerp(topMix, bottomMix, yFractional)
proc spread*(mask: Mask, spread: float32) = proc spread*(mask: Mask, spread: float32) =
## Grows the mask by spread. ## Grows the mask by spread.

View file

@ -26,8 +26,8 @@ type
PathCommand* = object PathCommand* = object
## Binary version of an SVG command. ## Binary version of an SVG command.
kind*: PathCommandKind kind: PathCommandKind
numbers*: seq[float32] numbers: seq[float32]
Path* = object Path* = object
## Used to hold paths and create paths. ## Used to hold paths and create paths.
@ -1152,7 +1152,7 @@ proc computeCoverages(
hits: var seq[(float32, int16)], hits: var seq[(float32, int16)],
numHits: var int, numHits: var int,
size: Vec2, size: Vec2,
y: int, y, startX: int,
aa: bool, aa: bool,
partitioning: Partitioning, partitioning: Partitioning,
windingRule: WindingRule windingRule: WindingRule
@ -1206,12 +1206,14 @@ proc computeCoverages(
at - prevAt at - prevAt
if leftCover != 0: if leftCover != 0:
inc fillStart inc fillStart
coverages[prevAt.int] += (leftCover * sampleCoverage.float32).uint8 coverages[prevAt.int - startX] +=
(leftCover * sampleCoverage.float32).uint8
if pixelCrossed: if pixelCrossed:
let rightCover = at - trunc(at) let rightCover = at - trunc(at)
if rightCover > 0: if rightCover > 0:
coverages[at.int] += (rightCover * sampleCoverage.float32).uint8 coverages[at.int - startX] +=
(rightCover * sampleCoverage.float32).uint8
let fillLen = at.int - fillStart let fillLen = at.int - fillStart
if fillLen > 0: if fillLen > 0:
@ -1219,12 +1221,12 @@ proc computeCoverages(
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage)) let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
for j in countup(i, fillStart + fillLen - 16, 16): for j in countup(i, fillStart + fillLen - 16, 16):
var coverage = mm_loadu_si128(coverages[j].addr) var coverage = mm_loadu_si128(coverages[j - startX].addr)
coverage = mm_add_epi8(coverage, vSampleCoverage) coverage = mm_add_epi8(coverage, vSampleCoverage)
mm_storeu_si128(coverages[j].addr, coverage) mm_storeu_si128(coverages[j - startX].addr, coverage)
i += 16 i += 16
for j in i ..< fillStart + fillLen: for j in i ..< fillStart + fillLen:
coverages[j] += sampleCoverage coverages[j - startX] += sampleCoverage
proc clearUnsafe(target: Image | Mask, startX, startY, toX, toY: int) = proc clearUnsafe(target: Image | Mask, startX, startY, toX, toY: int) =
## Clears data from [start, to). ## Clears data from [start, to).
@ -1253,8 +1255,8 @@ proc fillCoverage(
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
vColor = mm_set1_epi32(cast[int32](rgbx)) vColor = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(x, image.width - 16, 4): for _ in countup(x, startX + coverages.len - 16, 4):
var coverage = mm_loadu_si128(coverages[x].unsafeAddr) var coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
coverage = mm_and_si128(coverage, first32) coverage = mm_and_si128(coverage, first32)
let let
@ -1301,8 +1303,8 @@ proc fillCoverage(
x += 4 x += 4
let blender = blendMode.blender() let blender = blendMode.blender()
while x < image.width: while x < startX + coverages.len:
let coverage = coverages[x] let coverage = coverages[x - startX]
if coverage != 0 or blendMode == bmExcludeMask: if coverage != 0 or blendMode == bmExcludeMask:
if blendMode == bmNormal and coverage == 255 and rgbx.a == 255: if blendMode == bmNormal and coverage == 255 and rgbx.a == 255:
# Skip blending # Skip blending
@ -1322,6 +1324,7 @@ proc fillCoverage(
if blendMode == bmMask: if blendMode == bmMask:
image.clearUnsafe(0, y, startX, y) image.clearUnsafe(0, y, startX, y)
image.clearUnsafe(startX + coverages.len, y, image.width, y)
proc fillCoverage( proc fillCoverage(
mask: Mask, mask: Mask,
@ -1333,10 +1336,10 @@ proc fillCoverage(
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
if blendMode.hasSimdMasker(): if blendMode.hasSimdMasker():
let maskerSimd = blendMode.maskerSimd() let maskerSimd = blendMode.maskerSimd()
for _ in countup(x, coverages.len - 16, 16): for _ in countup(x, startX + coverages.len - 16, 16):
let let
index = mask.dataIndex(x, y) index = mask.dataIndex(x, y)
coverage = mm_loadu_si128(coverages[x].unsafeAddr) coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128()) eqZero = mm_cmpeq_epi16(coverage, mm_setzero_si128())
if mm_movemask_epi8(eqZero) != 0xffff: # or blendMode == bmExcludeMask: if mm_movemask_epi8(eqZero) != 0xffff: # or blendMode == bmExcludeMask:
# If the coverages are not all zero # If the coverages are not all zero
@ -1350,8 +1353,8 @@ proc fillCoverage(
x += 16 x += 16
let masker = blendMode.masker() let masker = blendMode.masker()
while x < mask.width: while x < startX + coverages.len:
let coverage = coverages[x] let coverage = coverages[x - startX]
if coverage != 0 or blendMode == bmExcludeMask: if coverage != 0 or blendMode == bmExcludeMask:
let backdrop = mask.getValueUnsafe(x, y) let backdrop = mask.getValueUnsafe(x, y)
mask.setValueUnsafe(x, y, masker(backdrop, coverage)) mask.setValueUnsafe(x, y, masker(backdrop, coverage))
@ -1361,6 +1364,7 @@ proc fillCoverage(
if blendMode == bmMask: if blendMode == bmMask:
mask.clearUnsafe(0, y, startX, y) mask.clearUnsafe(0, y, startX, y)
mask.clearUnsafe(startX + coverages.len, y, mask.width, y)
proc fillHits( proc fillHits(
image: Image, image: Image,
@ -1476,7 +1480,7 @@ proc fillShapes(
partitioning = partitionSegments(segments, startY, pathHeight - startY) partitioning = partitionSegments(segments, startY, pathHeight - startY)
var var
coverages = newSeq[uint8](image.width) coverages = newSeq[uint8](bounds.w.int)
hits = newSeq[(float32, int16)](4) hits = newSeq[(float32, int16)](4)
numHits: int numHits: int
@ -1487,6 +1491,7 @@ proc fillShapes(
numHits, numHits,
image.wh, image.wh,
y, y,
startX,
aa, aa,
partitioning, partitioning,
windingRule windingRule
@ -1532,7 +1537,7 @@ proc fillShapes(
partitioning = partitionSegments(segments, startY, pathHeight) partitioning = partitionSegments(segments, startY, pathHeight)
var var
coverages = newSeq[uint8](mask.width) coverages = newSeq[uint8](bounds.w.int)
hits = newSeq[(float32, int16)](4) hits = newSeq[(float32, int16)](4)
numHits: int numHits: int
@ -1543,6 +1548,7 @@ proc fillShapes(
numHits, numHits,
mask.wh, mask.wh,
y, y,
startX,
aa, aa,
partitioning, partitioning,
windingRule windingRule

View file

@ -1,7 +1,9 @@
import benchy, nimPNG, pixie/fileformats/png, stb_image/read as stbi, import benchy, cairo, nimPNG, pixie/fileformats/png, stb_image/read as stbi,
stb_image/write as stbr stb_image/write as stbr
let data = readFile("tests/images/png/lenna.png") let
filePath = "tests/images/png/lenna.png"
data = readFile(filePath)
timeIt "pixie decode": timeIt "pixie decode":
keep decodePng(cast[seq[uint8]](data)) keep decodePng(cast[seq[uint8]](data))
@ -37,3 +39,15 @@ timeIt "stb_image encode":
stbi.RGBA stbi.RGBA
) )
keep writePNG(width, height, channels, decoded).len keep writePNG(width, height, channels, decoded).len
timeIt "cairo decode":
keep imageSurfaceCreateFromPng(filePath)
timeIt "cairo encode":
let decoded = imageSurfaceCreateFromPng(filePath)
var write: WriteFunc =
proc(closure: pointer, data: cstring, len: int32): Status {.cdecl.} =
StatusSuccess
discard decoded.writeToPng(write, nil)