Merge pull request #346 from guzba/master
opaque draw fastpath, simd mask magnifyBy2, float issue fix for 360deg rotation
This commit is contained in:
commit
39417e36df
9 changed files with 188 additions and 144 deletions
|
@ -1,4 +1,4 @@
|
||||||
import genny, pixie, unicode
|
import genny, pixie, pixie/internal, unicode
|
||||||
|
|
||||||
var lastError: ref PixieError
|
var lastError: ref PixieError
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import benchy, cairo, pixie, pixie/blends
|
import benchy, cairo, pixie, pixie/blends, pixie/internal
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
import nimsimd/sse2
|
import nimsimd/sse2
|
||||||
|
@ -7,10 +7,8 @@ when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
proc drawBasic(backdrop, source: Image) =
|
proc drawBasic(backdrop, source: Image) =
|
||||||
let sourceIsOpaque = source.isOpaque()
|
|
||||||
|
|
||||||
for y in 0 ..< min(backdrop.height, source.height):
|
for y in 0 ..< min(backdrop.height, source.height):
|
||||||
if sourceIsOpaque:
|
if isOpaque(source.data, source.dataIndex(0, y), source.width):
|
||||||
copyMem(
|
copyMem(
|
||||||
backdrop.data[backdrop.dataIndex(0, y)].addr,
|
backdrop.data[backdrop.dataIndex(0, y)].addr,
|
||||||
source.data[source.dataIndex(0, y)].addr,
|
source.data[source.dataIndex(0, y)].addr,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import blends, bumpy, chroma, common, masks, pixie/internal, system/memory, vmath
|
import blends, bumpy, chroma, common, masks, pixie/internal, vmath
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
import nimsimd/sse2
|
import nimsimd/sse2
|
||||||
|
@ -96,39 +96,6 @@ proc setColor*(image: Image, x, y: int, color: Color) {.inline, raises: [].} =
|
||||||
## Sets a color at (x, y) or does nothing if outside of bounds.
|
## Sets a color at (x, y) or does nothing if outside of bounds.
|
||||||
image[x, y] = color.rgbx()
|
image[x, y] = color.rgbx()
|
||||||
|
|
||||||
proc fillUnsafe*(
|
|
||||||
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
|
||||||
) {.raises: [].} =
|
|
||||||
## Fills the image data with the color starting at index start and
|
|
||||||
## continuing for len indices.
|
|
||||||
|
|
||||||
let rgbx = color.asRgbx()
|
|
||||||
|
|
||||||
# Use memset when every byte has the same value
|
|
||||||
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
|
|
||||||
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
|
|
||||||
else:
|
|
||||||
var i = start
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
|
||||||
# When supported, SIMD fill until we run out of room
|
|
||||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
|
||||||
for _ in 0 ..< len div 8:
|
|
||||||
mm_storeu_si128(data[i + 0].addr, colorVec)
|
|
||||||
mm_storeu_si128(data[i + 4].addr, colorVec)
|
|
||||||
i += 8
|
|
||||||
else:
|
|
||||||
when sizeof(int) == 8:
|
|
||||||
# Fill 8 bytes at a time when possible
|
|
||||||
let
|
|
||||||
u32 = cast[uint32](rgbx)
|
|
||||||
u64 = cast[uint64]([u32, u32])
|
|
||||||
for _ in 0 ..< len div 2:
|
|
||||||
cast[ptr uint64](data[i].addr)[] = u64
|
|
||||||
i += 2
|
|
||||||
# Fill whatever is left the slow way
|
|
||||||
for j in i ..< start + len:
|
|
||||||
data[j] = rgbx
|
|
||||||
|
|
||||||
proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
|
proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
|
||||||
## Fills the image with the color.
|
## Fills the image with the color.
|
||||||
fillUnsafe(image.data, color, 0, image.data.len)
|
fillUnsafe(image.data, color, 0, image.data.len)
|
||||||
|
@ -181,29 +148,8 @@ proc isTransparent*(image: Image): bool {.raises: [].} =
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc isOpaque*(image: Image): bool {.raises: [].} =
|
proc isOpaque*(image: Image): bool {.raises: [].} =
|
||||||
result = true
|
## Checks if the entire image is opaque (alpha values are all 255).
|
||||||
|
isOpaque(image.data, 0, image.data.len)
|
||||||
var i: int
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
|
||||||
let
|
|
||||||
vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
|
||||||
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
|
|
||||||
for _ in 0 ..< image.data.len div 16:
|
|
||||||
let
|
|
||||||
values0 = mm_loadu_si128(image.data[i + 0].addr)
|
|
||||||
values1 = mm_loadu_si128(image.data[i + 4].addr)
|
|
||||||
values2 = mm_loadu_si128(image.data[i + 8].addr)
|
|
||||||
values3 = mm_loadu_si128(image.data[i + 12].addr)
|
|
||||||
values01 = mm_and_si128(values0, values1)
|
|
||||||
values23 = mm_and_si128(values2, values3)
|
|
||||||
values = mm_or_si128(mm_and_si128(values01, values23), colorMask)
|
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi8(values, vec255)) != 0xffff:
|
|
||||||
return false
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for j in i ..< image.data.len:
|
|
||||||
if image.data[j].a != 255:
|
|
||||||
return false
|
|
||||||
|
|
||||||
proc flipHorizontal*(image: Image) {.raises: [].} =
|
proc flipHorizontal*(image: Image) {.raises: [].} =
|
||||||
## Flips the image around the Y axis.
|
## Flips the image around the Y axis.
|
||||||
|
@ -394,26 +340,26 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
||||||
var x: int
|
var x: int
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
if scale == 2:
|
if scale == 2:
|
||||||
let mask = cast[M128i]([uint32.high, 0, 0, 0])
|
while x <= image.width - 4:
|
||||||
for _ in countup(0, image.width - 4, 2):
|
|
||||||
let
|
let
|
||||||
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
|
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
|
||||||
first = mm_and_si128(values, mask)
|
lo = mm_unpacklo_epi32(values, mm_setzero_si128())
|
||||||
second = mm_and_si128(mm_srli_si128(values, 4), mask)
|
hi = mm_unpackhi_epi32(values, mm_setzero_si128())
|
||||||
combined = mm_or_si128(first, mm_slli_si128(second, 8))
|
|
||||||
doubled = mm_or_si128(combined, mm_slli_si128(combined, 4))
|
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
result.data[result.dataIndex(x * scale, y * scale)].addr,
|
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
|
||||||
doubled
|
mm_or_si128(lo, mm_slli_si128(lo, 4))
|
||||||
)
|
)
|
||||||
x += 2
|
mm_storeu_si128(
|
||||||
for _ in x ..< image.width:
|
result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
|
||||||
|
mm_or_si128(hi, mm_slli_si128(hi, 4))
|
||||||
|
)
|
||||||
|
x += 4
|
||||||
|
for x in x ..< image.width:
|
||||||
let
|
let
|
||||||
rgbx = image.unsafe[x, y]
|
rgbx = image.unsafe[x, y]
|
||||||
resultIdx = result.dataIndex(x * scale, y * scale)
|
resultIdx = result.dataIndex(x * scale, y * scale)
|
||||||
for i in 0 ..< scale:
|
for i in 0 ..< scale:
|
||||||
result.data[resultIdx + i] = rgbx
|
result.data[resultIdx + i] = rgbx
|
||||||
inc x
|
|
||||||
# Copy that row of pixels into (scale - 1) more rows
|
# Copy that row of pixels into (scale - 1) more rows
|
||||||
let rowStart = result.dataIndex(0, y * scale)
|
let rowStart = result.dataIndex(0, y * scale)
|
||||||
for i in 1 ..< scale:
|
for i in 1 ..< scale:
|
||||||
|
@ -596,26 +542,15 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
|
||||||
var i: int
|
var i: int
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
for _ in 0 ..< image.data.len div 16:
|
for _ in 0 ..< image.data.len div 16:
|
||||||
var
|
let
|
||||||
a = mm_loadu_si128(image.data[i + 0].addr)
|
a = mm_loadu_si128(image.data[i + 0].addr)
|
||||||
b = mm_loadu_si128(image.data[i + 4].addr)
|
b = mm_loadu_si128(image.data[i + 4].addr)
|
||||||
c = mm_loadu_si128(image.data[i + 8].addr)
|
c = mm_loadu_si128(image.data[i + 8].addr)
|
||||||
d = mm_loadu_si128(image.data[i + 12].addr)
|
d = mm_loadu_si128(image.data[i + 12].addr)
|
||||||
|
|
||||||
a = packAlphaValues(a)
|
|
||||||
b = packAlphaValues(b)
|
|
||||||
c = packAlphaValues(c)
|
|
||||||
d = packAlphaValues(d)
|
|
||||||
|
|
||||||
b = mm_slli_si128(b, 4)
|
|
||||||
c = mm_slli_si128(c, 8)
|
|
||||||
d = mm_slli_si128(d, 12)
|
|
||||||
|
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
result.data[i].addr,
|
result.data[i].addr,
|
||||||
mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d))
|
pack4xAlphaValues(a, b, c, d)
|
||||||
)
|
)
|
||||||
|
|
||||||
i += 16
|
i += 16
|
||||||
|
|
||||||
for j in i ..< image.data.len:
|
for j in i ..< image.data.len:
|
||||||
|
@ -760,12 +695,14 @@ proc drawUber(
|
||||||
dy *= 2
|
dy *= 2
|
||||||
filterBy2 *= 2
|
filterBy2 *= 2
|
||||||
|
|
||||||
let smooth = not(
|
let
|
||||||
dx.length == 1.0 and
|
hasRotation = not(dx == vec2(1, 0) and dy == vec2(0, 1))
|
||||||
dy.length == 1.0 and
|
smooth = not(
|
||||||
transform[2, 0].fractional == 0.0 and
|
dx.length == 1.0 and
|
||||||
transform[2, 1].fractional == 0.0
|
dy.length == 1.0 and
|
||||||
)
|
transform[2, 0].fractional == 0.0 and
|
||||||
|
transform[2, 1].fractional == 0.0
|
||||||
|
)
|
||||||
|
|
||||||
# Determine where we should start and stop drawing in the y dimension
|
# Determine where we should start and stop drawing in the y dimension
|
||||||
var
|
var
|
||||||
|
@ -789,8 +726,8 @@ proc drawUber(
|
||||||
for y in yMin ..< yMax:
|
for y in yMin ..< yMax:
|
||||||
# Determine where we should start and stop drawing in the x dimension
|
# Determine where we should start and stop drawing in the x dimension
|
||||||
var
|
var
|
||||||
xMin = a.width
|
xMin = a.width.float32
|
||||||
xMax = 0
|
xMax = 0.float32
|
||||||
for yOffset in [0.float32, 1]:
|
for yOffset in [0.float32, 1]:
|
||||||
let scanLine = Line(
|
let scanLine = Line(
|
||||||
a: vec2(-1000, y.float32 + yOffset),
|
a: vec2(-1000, y.float32 + yOffset),
|
||||||
|
@ -799,21 +736,29 @@ proc drawUber(
|
||||||
for segment in perimeter:
|
for segment in perimeter:
|
||||||
var at: Vec2
|
var at: Vec2
|
||||||
if scanline.intersects(segment, at) and segment.to != at:
|
if scanline.intersects(segment, at) and segment.to != at:
|
||||||
xMin = min(xMin, at.x.floor.int)
|
xMin = min(xMin, at.x)
|
||||||
xMax = max(xMax, at.x.ceil.int)
|
xMax = max(xMax, at.x)
|
||||||
|
|
||||||
xMin = xMin.clamp(0, a.width)
|
var xStart, xStop: int
|
||||||
xMax = xMax.clamp(0, a.width)
|
if hasRotation or smooth:
|
||||||
|
xStart = xMin.floor.int
|
||||||
|
xStop = xMax.ceil.int
|
||||||
|
else:
|
||||||
|
# Rotation of 360 degrees can cause knife-edge issues with floor and ceil
|
||||||
|
xStart = xMin.round().int
|
||||||
|
xStop = xMax.round().int
|
||||||
|
xStart = xStart.clamp(0, a.width)
|
||||||
|
xStop = xStop.clamp(0, a.width)
|
||||||
|
|
||||||
if blendMode == bmMask:
|
if blendMode == bmMask:
|
||||||
if xMin > 0:
|
if xStart > 0:
|
||||||
zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xMin)
|
zeroMem(a.data[a.dataIndex(0, y)].addr, 4 * xStart)
|
||||||
|
|
||||||
if smooth:
|
if smooth:
|
||||||
var srcPos = p + dx * xMin.float32 + dy * y.float32
|
var srcPos = p + dx * xStart.float32 + dy * y.float32
|
||||||
srcPos = vec2(srcPos.x - h, srcPos.y - h)
|
srcPos = vec2(srcPos.x - h, srcPos.y - h)
|
||||||
|
|
||||||
for x in xMin ..< xMax:
|
for x in xStart ..< xStop:
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
let backdrop = a.unsafe[x, y]
|
let backdrop = a.unsafe[x, y]
|
||||||
when type(b) is Image:
|
when type(b) is Image:
|
||||||
|
@ -836,14 +781,28 @@ proc drawUber(
|
||||||
srcPos += dx
|
srcPos += dx
|
||||||
|
|
||||||
else:
|
else:
|
||||||
var x = xMin
|
var x = xStart
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
if not hasRotation:
|
||||||
if dx == vec2(1, 0) and dy == vec2(0, 1):
|
when type(a) is Image and type(b) is Image:
|
||||||
# Check we are not rotated before using SIMD blends
|
if blendMode in {bmNormal, bmOverwrite} and
|
||||||
|
isOpaque(b.data, b.dataIndex(xStart, y), xStop - xStart):
|
||||||
|
let
|
||||||
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
|
sx = srcPos.x.int
|
||||||
|
sy = srcPos.y.int
|
||||||
|
copyMem(
|
||||||
|
a.data[a.dataIndex(x, y)].addr,
|
||||||
|
b.data[b.dataIndex(sx, sy)].addr,
|
||||||
|
(xStop - xStart) * 4
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
# Check we are not rotated
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
if blendMode.hasSimdBlender():
|
if blendMode.hasSimdBlender():
|
||||||
let blenderSimd = blendMode.blenderSimd()
|
let blenderSimd = blendMode.blenderSimd()
|
||||||
for _ in 0 ..< (xMax - xMin) div 16:
|
for _ in 0 ..< (xStop - xStart) div 16:
|
||||||
let
|
let
|
||||||
srcPos = p + dx * x.float32 + dy * y.float32
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
sx = srcPos.x.int
|
sx = srcPos.x.int
|
||||||
|
@ -873,7 +832,7 @@ proc drawUber(
|
||||||
else: # is a Mask
|
else: # is a Mask
|
||||||
if blendMode.hasSimdMasker():
|
if blendMode.hasSimdMasker():
|
||||||
let maskerSimd = blendMode.maskerSimd()
|
let maskerSimd = blendMode.maskerSimd()
|
||||||
for _ in 0 ..< (xMax - xMin) div 16:
|
for _ in 0 ..< (xStop - xStart) div 16:
|
||||||
let
|
let
|
||||||
srcPos = p + dx * x.float32 + dy * y.float32
|
srcPos = p + dx * x.float32 + dy * y.float32
|
||||||
sx = srcPos.x.int
|
sx = srcPos.x.int
|
||||||
|
@ -881,22 +840,12 @@ proc drawUber(
|
||||||
backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
backdrop = mm_loadu_si128(a.data[a.dataIndex(x, y)].addr)
|
||||||
when type(b) is Image:
|
when type(b) is Image:
|
||||||
# Need to read 16 colors and pack their alpha values
|
# Need to read 16 colors and pack their alpha values
|
||||||
var
|
let
|
||||||
i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr)
|
i = mm_loadu_si128(b.data[b.dataIndex(sx + 0, sy)].addr)
|
||||||
j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr)
|
j = mm_loadu_si128(b.data[b.dataIndex(sx + 4, sy)].addr)
|
||||||
k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr)
|
k = mm_loadu_si128(b.data[b.dataIndex(sx + 8, sy)].addr)
|
||||||
l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr)
|
l = mm_loadu_si128(b.data[b.dataIndex(sx + 12, sy)].addr)
|
||||||
|
source = pack4xAlphaValues(i, j, k, l)
|
||||||
i = packAlphaValues(i)
|
|
||||||
j = packAlphaValues(j)
|
|
||||||
k = packAlphaValues(k)
|
|
||||||
l = packAlphaValues(l)
|
|
||||||
|
|
||||||
j = mm_slli_si128(j, 4)
|
|
||||||
k = mm_slli_si128(k, 8)
|
|
||||||
l = mm_slli_si128(l, 12)
|
|
||||||
|
|
||||||
let source = mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
|
||||||
else: # b is a Mask
|
else: # b is a Mask
|
||||||
let source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
let source = mm_loadu_si128(b.data[b.dataIndex(sx, sy)].addr)
|
||||||
|
|
||||||
|
@ -914,7 +863,7 @@ proc drawUber(
|
||||||
|
|
||||||
case blendMode:
|
case blendMode:
|
||||||
of bmOverwrite:
|
of bmOverwrite:
|
||||||
for x in x ..< xMax:
|
for x in x ..< xStop:
|
||||||
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
when type(b) is Image:
|
when type(b) is Image:
|
||||||
|
@ -932,7 +881,7 @@ proc drawUber(
|
||||||
a.unsafe[x, y] = source
|
a.unsafe[x, y] = source
|
||||||
srcPos += dx
|
srcPos += dx
|
||||||
of bmNormal:
|
of bmNormal:
|
||||||
for x in x ..< xMax:
|
for x in x ..< xStop:
|
||||||
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
when type(b) is Image:
|
when type(b) is Image:
|
||||||
|
@ -958,7 +907,7 @@ proc drawUber(
|
||||||
a.unsafe[x, y] = blendAlpha(backdrop, source)
|
a.unsafe[x, y] = blendAlpha(backdrop, source)
|
||||||
srcPos += dx
|
srcPos += dx
|
||||||
of bmMask:
|
of bmMask:
|
||||||
for x in x ..< xMax:
|
for x in x ..< xStop:
|
||||||
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
when type(b) is Image:
|
when type(b) is Image:
|
||||||
|
@ -982,7 +931,7 @@ proc drawUber(
|
||||||
a.unsafe[x, y] = maskMaskInline(backdrop, source)
|
a.unsafe[x, y] = maskMaskInline(backdrop, source)
|
||||||
srcPos += dx
|
srcPos += dx
|
||||||
else:
|
else:
|
||||||
for x in x ..< xMax:
|
for x in x ..< xStop:
|
||||||
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32)
|
||||||
when type(a) is Image:
|
when type(a) is Image:
|
||||||
let backdrop = a.unsafe[x, y]
|
let backdrop = a.unsafe[x, y]
|
||||||
|
@ -1005,8 +954,8 @@ proc drawUber(
|
||||||
srcPos += dx
|
srcPos += dx
|
||||||
|
|
||||||
if blendMode == bmMask:
|
if blendMode == bmMask:
|
||||||
if a.width - xMax > 0:
|
if a.width - xStop > 0:
|
||||||
zeroMem(a.data[a.dataIndex(xMax, y)].addr, 4 * (a.width - xMax))
|
zeroMem(a.data[a.dataIndex(xStop, y)].addr, 4 * (a.width - xStop))
|
||||||
|
|
||||||
if blendMode == bmMask:
|
if blendMode == bmMask:
|
||||||
if a.height - yMax > 0:
|
if a.height - yMax > 0:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import chroma, vmath
|
import chroma, system/memory, vmath
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
import nimsimd/sse2
|
import nimsimd/sse2
|
||||||
|
@ -39,6 +39,46 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} =
|
||||||
a = ((color.a * x) div 255).uint8
|
a = ((color.a * x) div 255).uint8
|
||||||
rgbx(r, g, b, a)
|
rgbx(r, g, b, a)
|
||||||
|
|
||||||
|
proc fillUnsafe*(
|
||||||
|
data: var seq[uint8], value: uint8, start, len: int
|
||||||
|
) {.raises: [].} =
|
||||||
|
## Fills the mask data with the value starting at index start and
|
||||||
|
## continuing for len indices.
|
||||||
|
nimSetMem(data[start].addr, value.cint, len)
|
||||||
|
|
||||||
|
proc fillUnsafe*(
|
||||||
|
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
||||||
|
) {.raises: [].} =
|
||||||
|
## Fills the image data with the color starting at index start and
|
||||||
|
## continuing for len indices.
|
||||||
|
|
||||||
|
let rgbx = color.asRgbx()
|
||||||
|
|
||||||
|
# Use memset when every byte has the same value
|
||||||
|
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
|
||||||
|
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
|
||||||
|
else:
|
||||||
|
var i = start
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
# When supported, SIMD fill until we run out of room
|
||||||
|
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
|
for _ in 0 ..< len div 8:
|
||||||
|
mm_storeu_si128(data[i + 0].addr, colorVec)
|
||||||
|
mm_storeu_si128(data[i + 4].addr, colorVec)
|
||||||
|
i += 8
|
||||||
|
else:
|
||||||
|
when sizeof(int) == 8:
|
||||||
|
# Fill 8 bytes at a time when possible
|
||||||
|
let
|
||||||
|
u32 = cast[uint32](rgbx)
|
||||||
|
u64 = cast[uint64]([u32, u32])
|
||||||
|
for _ in 0 ..< len div 2:
|
||||||
|
cast[ptr uint64](data[i].addr)[] = u64
|
||||||
|
i += 2
|
||||||
|
# Fill whatever is left the slow way
|
||||||
|
for j in i ..< start + len:
|
||||||
|
data[j] = rgbx
|
||||||
|
|
||||||
proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||||
## Converts an image from premultiplied alpha to straight alpha.
|
## Converts an image from premultiplied alpha to straight alpha.
|
||||||
## This is expensive for large images.
|
## This is expensive for large images.
|
||||||
|
@ -96,17 +136,48 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
|
||||||
c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
|
c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
|
||||||
data[j] = c
|
data[j] = c
|
||||||
|
|
||||||
|
proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
|
||||||
|
result = true
|
||||||
|
|
||||||
|
var i: int
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
let
|
||||||
|
vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||||
|
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
|
||||||
|
for _ in 0 ..< len div 16:
|
||||||
|
let
|
||||||
|
values0 = mm_loadu_si128(data[i + 0].addr)
|
||||||
|
values1 = mm_loadu_si128(data[i + 4].addr)
|
||||||
|
values2 = mm_loadu_si128(data[i + 8].addr)
|
||||||
|
values3 = mm_loadu_si128(data[i + 12].addr)
|
||||||
|
values01 = mm_and_si128(values0, values1)
|
||||||
|
values23 = mm_and_si128(values2, values3)
|
||||||
|
values = mm_or_si128(mm_and_si128(values01, values23), colorMask)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(values, vec255)) != 0xffff:
|
||||||
|
return false
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for j in i ..< len:
|
||||||
|
if data[j].a != 255:
|
||||||
|
return false
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes
|
## Shuffle the alpha values for these 4 colors to the first 4 bytes
|
||||||
result = mm_srli_epi32(v, 24)
|
let mask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
result = mm_and_si128(v, mask)
|
||||||
|
result = mm_srli_epi32(result, 24)
|
||||||
|
result = mm_packus_epi16(result, result)
|
||||||
|
result = mm_packus_epi16(result, result)
|
||||||
|
result = mm_srli_si128(result, 12)
|
||||||
|
|
||||||
|
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} =
|
||||||
let
|
let
|
||||||
i = mm_srli_si128(result, 3)
|
i = packAlphaValues(i)
|
||||||
j = mm_srli_si128(result, 6)
|
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||||
k = mm_srli_si128(result, 9)
|
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||||
first32 = cast[M128i]([uint32.high, 0, 0, 0])
|
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||||
result = mm_or_si128(mm_or_si128(result, i), mm_or_si128(j, k))
|
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||||
result = mm_and_si128(result, first32)
|
|
||||||
|
|
||||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
|
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import common, internal, system/memory, vmath
|
import common, internal, vmath
|
||||||
|
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
import nimsimd/sse2
|
import nimsimd/sse2
|
||||||
|
@ -166,21 +166,41 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
||||||
|
|
||||||
let scale = 2 ^ power
|
let scale = 2 ^ power
|
||||||
result = newMask(mask.width * scale, mask.height * scale)
|
result = newMask(mask.width * scale, mask.height * scale)
|
||||||
for y in 0 ..< result.height:
|
|
||||||
for x in 0 ..< mask.width:
|
for y in 0 ..< mask.height:
|
||||||
|
# Write one row of values duplicated by scale
|
||||||
|
var x: int
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
if scale == 2:
|
||||||
|
while x <= mask.width - 16:
|
||||||
|
let
|
||||||
|
values = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
||||||
|
lo = mm_unpacklo_epi8(values, mm_setzero_si128())
|
||||||
|
hi = mm_unpacklo_epi8(values, mm_setzero_si128())
|
||||||
|
mm_storeu_si128(
|
||||||
|
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
|
||||||
|
mm_or_si128(lo, mm_slli_si128(lo, 1))
|
||||||
|
)
|
||||||
|
mm_storeu_si128(
|
||||||
|
result.data[result.dataIndex(x * scale + 16, y * scale)].addr,
|
||||||
|
mm_or_si128(hi, mm_slli_si128(hi, 1))
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
for x in x ..< mask.width:
|
||||||
let
|
let
|
||||||
value = mask.unsafe[x, y div scale]
|
value = mask.unsafe[x, y div scale]
|
||||||
scaledX = x * scale
|
scaledX = x * scale
|
||||||
idx = result.dataIndex(scaledX, y)
|
idx = result.dataIndex(scaledX, y)
|
||||||
for i in 0 ..< scale:
|
for i in 0 ..< scale:
|
||||||
result.data[idx + i] = value
|
result.data[idx + i] = value
|
||||||
|
# Copy that row of values into (scale - 1) more rows
|
||||||
proc fillUnsafe*(
|
let rowStart = result.dataIndex(0, y * scale)
|
||||||
data: var seq[uint8], value: uint8, start, len: int
|
for i in 1 ..< scale:
|
||||||
) {.raises: [].} =
|
copyMem(
|
||||||
## Fills the mask data with the value starting at index start and
|
result.data[rowStart + result.width * i].addr,
|
||||||
## continuing for len indices.
|
result.data[rowStart].addr,
|
||||||
nimSetMem(data[start].addr, value.cint, len)
|
result.width * 4
|
||||||
|
)
|
||||||
|
|
||||||
proc fill*(mask: Mask, value: uint8) {.inline, raises: [].} =
|
proc fill*(mask: Mask, value: uint8) {.inline, raises: [].} =
|
||||||
## Fills the mask with the value.
|
## Fills the mask with the value.
|
||||||
|
|
|
@ -13,6 +13,12 @@ timeIt "minifyBy2":
|
||||||
|
|
||||||
reset()
|
reset()
|
||||||
|
|
||||||
|
timeIt "magnifyBy2":
|
||||||
|
let magnified = mask.magnifyBy2()
|
||||||
|
doAssert magnified[0, 0] == 63
|
||||||
|
|
||||||
|
reset()
|
||||||
|
|
||||||
timeIt "invert":
|
timeIt "invert":
|
||||||
mask.invert()
|
mask.invert()
|
||||||
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 783 KiB After Width: | Height: | Size: 783 KiB |
Binary file not shown.
Before Width: | Height: | Size: 3.3 MiB After Width: | Height: | Size: 3.3 MiB |
Binary file not shown.
Before Width: | Height: | Size: 9.7 KiB After Width: | Height: | Size: 9.8 KiB |
Loading…
Reference in a new issue