diff --git a/experiments/benchmark_cairo.nim b/experiments/benchmark_cairo.nim index f34ea93..975c3a1 100644 --- a/experiments/benchmark_cairo.nim +++ b/experiments/benchmark_cairo.nim @@ -1,4 +1,4 @@ -import cairo, math, benchy, pixie, chroma +import cairo, math, benchy, pixie, pixie/paths, chroma var surface = imageSurfaceCreate(FORMAT_ARGB32, 1000, 1000) diff --git a/pixie.nimble b/pixie.nimble index 84c6e4f..6d31984 100644 --- a/pixie.nimble +++ b/pixie.nimble @@ -10,5 +10,5 @@ requires "vmath >= 0.4.0" requires "chroma >= 0.2.1" requires "zippy >= 0.3.5" requires "flatty >= 0.1.3" -requires "nimsimd >= 0.4.6" +requires "nimsimd >= 0.4.8" requires "bumpy >= 1.0.1" diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 705ac99..cd65455 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -299,16 +299,16 @@ when defined(amd64) and not defined(pixieNoSimd): else: proc alphaFix(backdrop, source, mixed: ColorRGBA): ColorRGBA {.inline.} = let - sa = source.a.int32 - ba = backdrop.a.int32 + sa = source.a.uint32 + ba = backdrop.a.uint32 t0 = sa * (255 - ba) t1 = sa * ba t2 = (255 - sa) * ba let - r = t0 * source.r.int32 + t1 * mixed.r.int32 + t2 * backdrop.r.int32 - g = t0 * source.g.int32 + t1 * mixed.g.int32 + t2 * backdrop.g.int32 - b = t0 * source.b.int32 + t1 * mixed.b.int32 + t2 * backdrop.b.int32 + r = t0 * source.r.uint32 + t1 * mixed.r.uint32 + t2 * backdrop.r.uint32 + g = t0 * source.g.uint32 + t1 * mixed.g.uint32 + t2 * backdrop.g.uint32 + b = t0 * source.b.uint32 + t1 * mixed.b.uint32 + t2 * backdrop.b.uint32 a = sa + ba * (255 - sa) div 255 if a == 0: diff --git a/src/pixie/common.nim b/src/pixie/common.nim index 214460f..894dede 100644 --- a/src/pixie/common.nim +++ b/src/pixie/common.nim @@ -47,9 +47,10 @@ proc toPremultipliedAlpha*(c: Color): Color {.inline.} = proc toStraightAlpha*(c: Color): Color {.inline.} = ## Converts a color to from premultiplied alpha to straight. - if c.a == 0: - return - result.r = c.r / c.a - result.g = c.g / c.a - result.b = c.b / c.a - result.a = c.a + if c.a != 0 and c.a != 1: + result = c + else: + result.r = c.r / c.a + result.g = c.g / c.a + result.b = c.b / c.a + result.a = c.a diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 2db254f..16793a3 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -437,10 +437,9 @@ proc encodePng*( raise newException(PixieError, "Invalid PNG number of channels") let data = cast[ptr UncheckedArray[uint8]](data) - const signature = [137.uint8, 80, 78, 71, 13, 10, 26, 10] # Add the PNG file signature - result.add(signature) + result.add(pngSignature) # Add IHDR result.addUint32(13.uint32.swap()) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index d9f3b3d..6435fe2 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -66,7 +66,7 @@ proc `[]=`*(image: Image, x, y: int, rgba: ColorRGBA) {.inline.} = image.setRgbaUnsafe(x, y, rgba) proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) = - ## Fills the image data with a solid color starting at index start and + ## Fills the image data with the parameter color starting at index start and ## continuing for len indices. # Use memset when every byte has the same value @@ -95,7 +95,7 @@ proc fillUnsafe(data: var seq[ColorRGBA], rgba: ColorRGBA, start, len: int) = data[j] = rgba proc fill*(image: Image, rgba: ColorRgba) {.inline.} = - ## Fills the image with a solid color. + ## Fills the image with the parameter color. fillUnsafe(image.data, rgba, 0, image.data.len) proc flipHorizontal*(image: Image) = @@ -234,7 +234,7 @@ proc invert*(image: Image) = ## Inverts all of the colors and alpha. var i: int when defined(amd64) and not defined(pixieNoSimd): - let vec255 = mm_set1_epi8(255) + let vec255 = mm_set1_epi8(cast[int8](255)) while i < image.data.len - 4: var m = mm_loadu_si128(image.data[i].addr) m = mm_sub_epi8(vec255, m) @@ -251,18 +251,18 @@ proc invert*(image: Image) = proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA {.inline.} = let minX = x.floor.int - difX = x - x.floor + diffX = x - x.floor minY = y.floor.int - difY = y - y.floor + diffY = y - y.floor - vX0Y0 = image[minX, minY].toPremultipliedAlpha() - vX1Y0 = image[minX + 1, minY].toPremultipliedAlpha() - vX0Y1 = image[minX, minY + 1].toPremultipliedAlpha() - vX1Y1 = image[minX + 1, minY + 1].toPremultipliedAlpha() + x0y0 = image[minX, minY].toPremultipliedAlpha() + x1y0 = image[minX + 1, minY].toPremultipliedAlpha() + x0y1 = image[minX, minY + 1].toPremultipliedAlpha() + x1y1 = image[minX + 1, minY + 1].toPremultipliedAlpha() - bottomMix = lerp(vX0Y0, vX1Y0, difX) - topMix = lerp(vX0Y1, vX1Y1, difX) - finalMix = lerp(bottomMix, topMix, difY) + bottomMix = lerp(x0y0, x1y0, diffX) + topMix = lerp(x0y1, x1y1, diffX) + finalMix = lerp(bottomMix, topMix, diffY) finalMix.toStraightAlpha() @@ -376,9 +376,10 @@ proc blurAlpha*(image: Image, radius: float32) = proc shift*(image: Image, offset: Vec2) = ## Shifts the image by offset. - let copy = image.copy() # Copy to read from. - image.fill(rgba(0, 0, 0, 0)) # Reset this for being drawn to. - image.draw(copy, offset) # Draw copy into image. + if offset != vec2(0, 0): + let copy = image.copy() # Copy to read from. + image.fill(rgba(0, 0, 0, 0)) # Reset this for being drawn to. + image.draw(copy, offset) # Draw copy into image. proc spread*(image: Image, spread: float32) = ## Grows the image as a mask by spread. @@ -465,7 +466,7 @@ proc drawCorrect*(a, b: Image, mat: Mat3, blendMode: BlendMode) = proc drawUber( a, b: Image, p, dx, dy: Vec2, - lines: array[0..3, Segment], + segments: array[0..3, Segment], blendMode: BlendMode, smooth: bool ) = @@ -475,13 +476,13 @@ proc drawUber( xMin = a.width xMax = 0 for yOffset in [0.float32, 1]: - var scanLine = segment( - vec2(-100000, y.float32 + yOffset), - vec2(10000, y.float32 + yOffset) + var scanLine = Line( + a: vec2(-1000, y.float32 + yOffset), + b: vec2(1000, y.float32 + yOffset) ) - for l in lines: + for segment in segments: var at: Vec2 - if intersects(l, scanLine, at) and l.to != at: + if scanline.intersects(segment, at) and segment.to != at: xMin = min(xMin, at.x.floor.int) xMax = max(xMax, at.x.ceil.int) @@ -519,7 +520,7 @@ proc draw*(a, b: Image, mat: Mat3, blendMode: BlendMode) = mat * vec2(b.width.float32, b.height.float32), mat * vec2(0, b.height.float32) ] - lines = [ + segments = [ segment(corners[0], corners[1]), segment(corners[1], corners[2]), segment(corners[2], corners[3]), @@ -543,10 +544,14 @@ proc draw*(a, b: Image, mat: Mat3, blendMode: BlendMode) = minFilterBy2 /= 2 matInv = matInv * scale(vec2(0.5, 0.5)) - let smooth = not(dx.length == 1.0 and dy.length == 1.0 and - mat[2, 0].fractional == 0.0 and mat[2, 1].fractional == 0.0) + let smooth = not( + dx.length == 1.0 and + dy.length == 1.0 and + mat[2, 0].fractional == 0.0 and + mat[2, 1].fractional == 0.0 + ) - a.drawUber(b, p, dx, dy, lines, blendMode, smooth) + a.drawUber(b, p, dx, dy, segments, blendMode, smooth) proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.} = a.draw(b, translate(pos), blendMode) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 190e363..f02290c 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -744,17 +744,18 @@ proc quickSort(a: var seq[(float32, bool)], inl, inr: int) = quickSort(a, inl, r) quickSort(a, l, inr) -proc computeBounds(shape: seq[Vec2]): Rect = +proc computeBounds(shapes: seq[seq[(Segment, bool)]]): Rect = var xMin = float32.high xMax = float32.low yMin = float32.high yMax = float32.low - for segment in shape.segments: - xMin = min(xMin, min(segment.at.x, segment.to.x)) - xMax = max(xMax, max(segment.at.x, segment.to.x)) - yMin = min(yMin, min(segment.at.y, segment.to.y)) - yMax = max(yMax, max(segment.at.y, segment.to.y)) + for shape in shapes: + for (segment, _) in shape: + xMin = min(xMin, min(segment.at.x, segment.to.x)) + xMax = max(xMax, max(segment.at.x, segment.to.x)) + yMin = min(yMin, min(segment.at.y, segment.to.y)) + yMax = max(yMax, max(segment.at.y, segment.to.y)) xMin = floor(xMin) xMax = ceil(xMax) @@ -775,36 +776,23 @@ proc fillShapes( var sortedShapes = newSeq[seq[(Segment, bool)]](shapes.len) for i, sorted in sortedShapes.mpairs: for segment in shapes[i].segments: - if segment.at.y == segment.to.y: - # Skip horizontal and zero-length + if segment.at.y == segment.to.y: # Skip horizontal continue - var - segment = segment - winding = segment.at.y > segment.to.y + let winding = segment.at.y > segment.to.y if winding: + var segment = segment swap(segment.at, segment.to) - sorted.add((segment, winding)) + sorted.add((segment, winding)) + else: + sorted.add((segment, winding)) - # Compute the bounds of each shape - var bounds = newSeq[Rect](shapes.len) - for i, shape in shapes: - bounds[i] = computeBounds(shape) - - # Figure out the total bounds of all the shapes - var - minX = float32.high - minY = float32.high - maxY = float32.low - for bounds in bounds: - minX = min(minX, bounds.x) - minY = min(minY, bounds.y) - maxY = max(maxY, bounds.y + bounds.h) - - # Rasterize only within the total bounds + # Figure out the total bounds of all the shapes, + # rasterize only within the total bounds let - startX = max(0, minX.int) - startY = max(0, miny.int) - stopY = min(image.height, maxY.int) + bounds = computeBounds(sortedShapes) + startX = max(0, bounds.x.int) + startY = max(0, bounds.y.int) + stopY = min(image.height, (bounds.y + bounds.h).int) const quality = 5 # Must divide 255 cleanly @@ -815,12 +803,12 @@ proc fillShapes( var hits = newSeq[(float32, bool)](4) - coverages = newSeq[uint32](image.width) + coverages = newSeq[uint8](image.width) numHits: int for y in startY ..< stopY: # Reset buffer for this row - zeroMem(coverages[0].addr, coverages.len * 4) + zeroMem(coverages[0].addr, coverages.len) # Do scanlines for this row for m in 0 ..< quality: @@ -829,10 +817,9 @@ proc fillShapes( scanline = Line(a: vec2(0, yLine), b: vec2(1000, yLine)) numHits = 0 for i, shape in sortedShapes: - let bounds = bounds[i] - if bounds.y > y.float32 or bounds.y + bounds.h < y.float32: - continue for (segment, winding) in shape: + if segment.at.y > yLine or segment.to.y < y.float32: + continue var at: Vec2 if scanline.intersects(segment, at):# and segment.to != at: if numHits == hits.len: @@ -872,11 +859,14 @@ proc fillShapes( if fillLen > 0 and shouldFill(windingRule, count): var i = fillStart when defined(amd64) and not defined(pixieNoSimd): - let m = mm_set1_epi32(sampleCoverage.int32) - for j in countup(i, fillStart + fillLen - 4, 4): + let vSampleCoverage = mm_set1_epi8(cast[int8](sampleCoverage)) + for j in countup(i, fillStart + fillLen - 16, 16): let current = mm_loadu_si128(coverages[j].addr) - mm_storeu_si128(coverages[j].addr, mm_add_epi32(m, current)) - i += 4 + mm_storeu_si128( + coverages[j].addr, + mm_add_epi8(current, vSampleCoverage) + ) + i += 16 for j in i ..< fillStart + fillLen: coverages[j] += sampleCoverage @@ -889,17 +879,39 @@ proc fillShapes( # When supported, SIMD blend as much as possible let + coverageMask1 = cast[M128i]([0xffffffff, 0, 0, 0]) # First 32 bits + coverageMask3 = mm_set1_epi32(cast[int32](0x000000ff)) # Only `r` oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) zero = mm_set1_epi32(0) v255 = mm_set1_epi32(255) + vColor = mm_set1_epi32(cast[int32](color)) - for _ in countup(x, coverages.len - 4, 4): + for _ in countup(x, coverages.len - 16, 16): var coverage = mm_loadu_si128(coverages[x].addr) + coverage = mm_and_si128(coverage, coverageMask1) - if mm_movemask_epi8(mm_cmpeq_epi32(coverage, zero)) != 0xffff: + if mm_movemask_epi8(mm_cmpeq_epi16(coverage, zero)) != 0xffff: # If the coverages are not all zero - var source = mm_set1_epi32(cast[int32](color)) + var source = vColor + coverage = mm_slli_si128(coverage, 2) + coverage = mm_shuffle_epi32(coverage, MM_SHUFFLE(1, 1, 0, 0)) + + var + a = mm_and_si128(coverage, coverageMask1) + b = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 4)) + c = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 8)) + d = mm_and_si128(coverage, mm_slli_si128(coverageMask1, 12)) + + # Shift the coverages to `r` + a = mm_srli_si128(a, 2) + b = mm_srli_si128(b, 3) + d = mm_srli_si128(d, 1) + + coverage = mm_and_si128( + mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d)), + coverageMask3 + ) if mm_movemask_epi8(mm_cmpeq_epi32(coverage, v255)) != 0xffff: # If the coverages are not all 255 @@ -932,10 +944,10 @@ proc fillShapes( x += 4 while x < image.width: - if x + 2 <= coverages.len: + if x + 8 <= coverages.len: let peeked = cast[ptr uint64](coverages[x].addr)[] if peeked == 0: - x += 2 + x += 8 continue let coverage = coverages[x]