a few little things + uint8 coverage

This commit is contained in:
Ryan Oldenburg 2021-01-26 16:31:07 -06:00
parent e1c98b0822
commit be6bebf553
5 changed files with 83 additions and 55 deletions

View file

@ -1,4 +1,4 @@
import cairo, math, benchy, pixie, chroma import cairo, math, benchy, pixie, pixie/paths, chroma
var var
surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000)

View file

@ -10,5 +10,5 @@ requires "vmath >= 0.4.0"
requires "chroma >= 0.2.1" requires "chroma >= 0.2.1"
requires "zippy >= 0.3.5" requires "zippy >= 0.3.5"
requires "flatty >= 0.1.3" requires "flatty >= 0.1.3"
requires "nimsimd >= 0.4.6" requires "nimsimd >= 0.4.8"
requires "bumpy >= 1.0.1" requires "bumpy >= 1.0.1"

View file

@ -299,16 +299,16 @@ when defined(amd64) and not defined(pixieNoSimd):
else: else:
proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} =
let let
sa = source.a.int32 sa = source.a.uint32
ba = backdrop.a.int32 ba = backdrop.a.uint32
t0 = sa * (255 - ba) t0 = sa * (255 - ba)
t1 = sa * ba t1 = sa * ba
t2 = (255 - sa) * ba t2 = (255 - sa) * ba
let let
r = t0 * source.r.int32 + t1 * mixed.r.int32 + t2 * backdrop.r.int32 r = t0 * source.r.uint32 + t1 * mixed.r.uint32 + t2 * backdrop.r.uint32
g = t0 * source.g.int32 + t1 * mixed.g.int32 + t2 * backdrop.g.int32 g = t0 * source.g.uint32 + t1 * mixed.g.uint32 + t2 * backdrop.g.uint32
b = t0 * source.b.int32 + t1 * mixed.b.int32 + t2 * backdrop.b.int32 b = t0 * source.b.uint32 + t1 * mixed.b.uint32 + t2 * backdrop.b.uint32
a = sa + ba * (255 - sa) div 255 a = sa + ba * (255 - sa) div 255
if a == 0: if a == 0:

View file

@ -66,7 +66,7 @@ proc `[]=`*(image: Image, x, y: int, rgba: ColorRGBA) {.inline.} =
image.setRgbaUnsafe(x, y, rgba) image.setRgbaUnsafe(x, y, rgba)
proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) = proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
## Fills the image data with a solid color starting at index start and ## Fills the image data with the parameter color starting at index start and
## continuing for len indices. ## continuing for len indices.
# Use memset when every byte has the same value # Use memset when every byte has the same value
@ -95,7 +95,7 @@ proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) =
data[j] = rgba data[j] = rgba
proc fill*(image: Image, rgba: ColorRgba) {.inline.} = proc fill*(image: Image, rgba: ColorRgba) {.inline.} =
## Fills the image with a solid color. ## Fills the image with the parameter color.
fillUnsafe(image.data, rgba, 0, image.data.len) fillUnsafe(image.data, rgba, 0, image.data.len)
proc flipHorizontal*(image: Image) = proc flipHorizontal*(image: Image) =
@ -234,7 +234,7 @@ proc invert*(image: Image) =
## Inverts all of the colors and alpha. ## Inverts all of the colors and alpha.
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
let vec255 = mm_set1_epi8(255) let vec255 = mm_set1_epi8(cast[int8](255))
while i < image.data.len - 4: while i < image.data.len - 4:
var m = mm_loadu_si128(image.data[i].addr) var m = mm_loadu_si128(image.data[i].addr)
m = mm_sub_epi8(vec255, m) m = mm_sub_epi8(vec255, m)
@ -251,18 +251,18 @@ proc invert*(image: Image) =
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} = proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} =
let let
minX = x.floor.int minX = x.floor.int
difX = x - x.floor diffX = x - x.floor
minY = y.floor.int minY = y.floor.int
difY = y - y.floor diffY = y - y.floor
vX0Y0 = image[minX, minY].toPremultipliedAlpha() x0y0 = image[minX, minY].toPremultipliedAlpha()
vX1Y0 = image[minX + 1, minY].toPremultipliedAlpha() x1y0 = image[minX + 1, minY].toPremultipliedAlpha()
vX0Y1 = image[minX, minY + 1].toPremultipliedAlpha() x0y1 = image[minX, minY + 1].toPremultipliedAlpha()
vX1Y1 = image[minX + 1, minY + 1].toPremultipliedAlpha() x1y1 = image[minX + 1, minY + 1].toPremultipliedAlpha()
bottomMix = lerp(vX0Y0, vX1Y0, difX) bottomMix = lerp(x0y0, x1y0, diffX)
topMix = lerp(vX0Y1, vX1Y1, difX) topMix = lerp(x0y1, x1y1, diffX)
finalMix = lerp(bottomMix, topMix, difY) finalMix = lerp(bottomMix, topMix, diffY)
finalMix.toStraightAlpha() finalMix.toStraightAlpha()
@ -376,6 +376,7 @@ proc blurAlpha*(image: Image, radius: float32) =
proc shift*(image: Image, offset: Vec2) = proc shift*(image: Image, offset: Vec2) =
## Shifts the image by offset. ## Shifts the image by offset.
if offset != vec2(0, 0):
let copy = image.copy() # Copy to read from. let copy = image.copy() # Copy to read from.
image.fill(rgba(0, 0, 0, 0)) # Reset this for being drawn to. image.fill(rgba(0, 0, 0, 0)) # Reset this for being drawn to.
image.draw(copy, offset) # Draw copy into image. image.draw(copy, offset) # Draw copy into image.
@ -465,7 +466,7 @@ proc drawCorrect*(a, b: Image, mat: Mat3, blendMode: BlendMode) =
proc drawUber( proc drawUber(
a, b: Image, a, b: Image,
p, dx, dy: Vec2, p, dx, dy: Vec2,
lines: array[0..3, Segment], segments: array[0..3, Segment],
blendMode: BlendMode, blendMode: BlendMode,
smooth: bool smooth: bool
) = ) =
@ -475,13 +476,13 @@ proc drawUber(
xMin = a.width xMin = a.width
xMax = 0 xMax = 0
for yOffset in [0.float32, 1]: for yOffset in [0.float32, 1]:
var scanLine = segment( var scanLine = Line(
vec2(-100000, y.float32 + yOffset), a: vec2(-1000, y.float32 + yOffset),
vec2(10000, y.float32 + yOffset) b: vec2(1000, y.float32 + yOffset)
) )
for l in lines: for segment in segments:
var at: Vec2 var at: Vec2
if intersects(l, scanLine, at) and l.to != at: if scanline.intersects(segment, at) and segment.to != at:
xMin = min(xMin, at.x.floor.int) xMin = min(xMin, at.x.floor.int)
xMax = max(xMax, at.x.ceil.int) xMax = max(xMax, at.x.ceil.int)
@ -519,7 +520,7 @@ proc draw*(a, b: Image, mat: Mat3, blendMode: BlendMode) =
mat * vec2(b.width.float32, b.height.float32), mat * vec2(b.width.float32, b.height.float32),
mat * vec2(0, b.height.float32) mat * vec2(0, b.height.float32)
] ]
lines = [ segments = [
segment(corners[0], corners[1]), segment(corners[0], corners[1]),
segment(corners[1], corners[2]), segment(corners[1], corners[2]),
segment(corners[2], corners[3]), segment(corners[2], corners[3]),
@ -543,10 +544,14 @@ proc draw*(a, b: Image, mat: Mat3, blendMode: BlendMode) =
minFilterBy2 /= 2 minFilterBy2 /= 2
matInv = matInv * scale(vec2(0.5, 0.5)) matInv = matInv * scale(vec2(0.5, 0.5))
let smooth = not(dx.length == 1.0 and dy.length == 1.0 and let smooth = not(
mat[2, 0].fractional == 0.0 and mat[2, 1].fractional == 0.0) dx.length == 1.0 and
dy.length == 1.0 and
mat[2, 0].fractional == 0.0 and
mat[2, 1].fractional == 0.0
)
a.drawUber(b, p, dx, dy, lines, blendMode, smooth) a.drawUber(b, p, dx, dy, segments, blendMode, smooth)
proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.} = proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.} =
a.draw(b, translate(pos), blendMode) a.draw(b, translate(pos), blendMode)

View file

@ -772,23 +772,21 @@ proc fillShapes(
color: ColorRGBA, color: ColorRGBA,
windingRule: WindingRule windingRule: WindingRule
) = ) =
var sortedShapes = newSeq[seq[(Segment, bool)]](shapes.len)
for i, sorted in sortedShapes.mpairs:
for segment in shapes[i].segments:
if segment.at.y == segment.to.y:
# Skip horizontal and zero-length
continue
var var
segment = segment sortedShapes = newSeq[seq[(Segment, bool)]](shapes.len)
winding = segment.at.y > segment.to.y bounds = newSeq[Rect](shapes.len)
for i, sorted in sortedShapes.mpairs:
bounds[i] = computeBounds(shapes[i])
for segment in shapes[i].segments:
if segment.at.y == segment.to.y: # Skip horizontal
continue
let winding = segment.at.y > segment.to.y
if winding: if winding:
var segment = segment
swap(segment.at, segment.to) swap(segment.at, segment.to)
sorted.add((segment, winding)) sorted.add((segment, winding))
else:
# Compute the bounds of each shape sorted.add((segment, winding))
var bounds = newSeq[Rect](shapes.len)
for i, shape in shapes:
bounds[i] = computeBounds(shape)
# Figure out the total bounds of all the shapes # Figure out the total bounds of all the shapes
var var
@ -815,12 +813,12 @@ proc fillShapes(
var var
hits = newSeq[(float32, bool)](4) hits = newSeq[(float32, bool)](4)
coverages = newSeq[uint32](image.width) coverages = newSeq[uint8](image.width)
numHits: int numHits: int
for y in startY ..< stopY: for y in startY ..< stopY:
# Reset buffer for this row # Reset buffer for this row
zeroMem(coverages[0].addr, coverages.len * 4) zeroMem(coverages[0].addr, coverages.len)
# Do scanlines for this row # Do scanlines for this row
for m in 0 ..< quality: for m in 0 ..< quality:
@ -872,11 +870,14 @@ proc fillShapes(
if fillLen > 0 and shouldFill(windingRule, count): if fillLen > 0 and shouldFill(windingRule, count):
var i = fillStart var i = fillStart
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and not defined(pixieNoSimd):
let m = mm_set1_epi32(sampleCoverage.int32) let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage))
for j in countup(i, fillStart + fillLen - 4, 4): for j in countup(i, fillStart + fillLen - 16, 16):
let current = mm_loadu_si128(coverages[j].addr) let current = mm_loadu_si128(coverages[j].addr)
mm_storeu_si128(coverages[j].addr, mm_add_epi32(m, current)) mm_storeu_si128(
i += 4 coverages[j].addr,
mm_add_epi8(current, vSampleCoverage)
)
i += 16
for j in i ..< fillStart + fillLen: for j in i ..< fillStart + fillLen:
coverages[j] += sampleCoverage coverages[j] += sampleCoverage
@ -889,17 +890,39 @@ proc fillShapes(
# When supported, SIMD blend as much as possible # When supported, SIMD blend as much as possible
let let
coverageMask1 = cast[M128i]([0xffffffff, 0, 0, 0]) # First 32 bits
coverageMask3 = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r`
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081)) div255 = mm_set1_epi16(cast[int16](0x8081))
zero = mm_set1_epi32(0) zero = mm_set1_epi32(0)
v255 = mm_set1_epi32(255) v255 = mm_set1_epi32(255)
vColor = mm_set1_epi32(cast[int32](color))
for _ in countup(x, coverages.len - 4, 4): for _ in countup(x, coverages.len - 8, 8):
var coverage = mm_loadu_si128(coverages[x].addr) var coverage = mm_loadu_si128(coverages[x].addr)
coverage = mm_and_si128(coverage, coverageMask1)
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, zero)) != 0xffff: if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zero)) != 0xffff:
# If the coverages are not all zero # If the coverages are not all zero
var source = mm_set1_epi32(cast[int32](color)) var source = vColor
coverage = mm_slli_si128(coverage, 2)
coverage = mm_shuffle_epi32(coverage, MM_SHUFFLE(1, 1, 0, 0))
var
a = mm_and_si128(coverage, coverageMask1)
b = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 4))
c = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 8))
d = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 12))
# Shift the coverages to `r`
a = mm_srli_si128(a, 2)
b = mm_srli_si128(b, 3)
d = mm_srli_si128(d, 1)
coverage = mm_and_si128(
mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)),
coverageMask3
)
if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff: if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff:
# If the coverages are not all 255 # If the coverages are not all 255
@ -932,10 +955,10 @@ proc fillShapes(
x += 4 x += 4
while x < image.width: while x < image.width:
if x + 2 <= coverages.len: if x + 8 <= coverages.len:
let peeked = cast[ptr uint64](coverages[x].addr)[] let peeked = cast[ptr uint64](coverages[x].addr)[]
if peeked == 0: if peeked == 0:
x += 2 x += 8
continue continue
let coverage = coverages[x] let coverage = coverages[x]