simd, hasSimd pragmas
This commit is contained in:
parent
fd52dfecb4
commit
8bb6957fe9
15 changed files with 560 additions and 557 deletions
|
@ -1,9 +1,6 @@
|
||||||
## Blending modes.
|
## Blending modes.
|
||||||
|
|
||||||
import chroma, common, internal, std/math
|
import chroma, common, simd, std/math
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
# See https://www.w3.org/TR/compositing-1/
|
# See https://www.w3.org/TR/compositing-1/
|
||||||
# See https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_blend_equation_advanced.txt
|
# See https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_blend_equation_advanced.txt
|
||||||
|
|
|
@ -41,6 +41,26 @@ type
|
||||||
width*, height*: int
|
width*, height*: int
|
||||||
data*: seq[uint8]
|
data*: seq[uint8]
|
||||||
|
|
||||||
|
proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
||||||
|
## Creates a new image with the parameter dimensions.
|
||||||
|
if width <= 0 or height <= 0:
|
||||||
|
raise newException(PixieError, "Image width and height must be > 0")
|
||||||
|
|
||||||
|
result = Image()
|
||||||
|
result.width = width
|
||||||
|
result.height = height
|
||||||
|
result.data = newSeq[ColorRGBX](width * height)
|
||||||
|
|
||||||
|
proc newMask*(width, height: int): Mask {.raises: [PixieError].} =
|
||||||
|
## Creates a new mask with the parameter dimensions.
|
||||||
|
if width <= 0 or height <= 0:
|
||||||
|
raise newException(PixieError, "Mask width and height must be > 0")
|
||||||
|
|
||||||
|
result = Mask()
|
||||||
|
result.width = width
|
||||||
|
result.height = height
|
||||||
|
result.data = newSeq[uint8](width * height)
|
||||||
|
|
||||||
proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
|
proc mix*(a, b: uint8, t: float32): uint8 {.inline, raises: [].} =
|
||||||
## Linearly interpolate between a and b using t.
|
## Linearly interpolate between a and b using t.
|
||||||
let t = round(t * 255).uint32
|
let t = round(t * 255).uint32
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
|
import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
|
||||||
pixie/masks, std/decls, std/sequtils, std/strutils
|
pixie/masks, pixie/simd, std/decls, std/sequtils, std/strutils
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
# This JPEG decoder is loosely based on stb_image which is public domain.
|
# This JPEG decoder is loosely based on stb_image which is public domain.
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
import chroma, flatty/binny, math, pixie/common, pixie/images, pixie/internal,
|
import chroma, flatty/binny, math, pixie/common, pixie/images, pixie/internal,
|
||||||
zippy, zippy/crc
|
pixie/simd, zippy, zippy/crc
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
# See http://www.libpng.org/pub/png/spec/1.2/PNG-Contents.html
|
# See http://www.libpng.org/pub/png/spec/1.2/PNG-Contents.html
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,4 @@
|
||||||
import blends, bumpy, chroma, common, internal, masks, vmath
|
import blends, bumpy, chroma, common, internal, masks, simd, vmath
|
||||||
|
|
||||||
when allowSimd:
|
|
||||||
import simd
|
|
||||||
|
|
||||||
when defined(amd64):
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
const h = 0.5.float32
|
const h = 0.5.float32
|
||||||
|
|
||||||
|
@ -13,27 +7,18 @@ type UnsafeImage = distinct Image
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
proc newImage*(mask: Mask): Image {.hasSimd, raises: [PixieError].} =
|
||||||
## Creates a new image with the parameter dimensions.
|
|
||||||
if width <= 0 or height <= 0:
|
|
||||||
raise newException(PixieError, "Image width and height must be > 0")
|
|
||||||
|
|
||||||
result = Image()
|
|
||||||
result.width = width
|
|
||||||
result.height = height
|
|
||||||
result.data = newSeq[ColorRGBX](width * height)
|
|
||||||
|
|
||||||
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
|
|
||||||
result = newImage(mask.width, mask.height)
|
result = newImage(mask.width, mask.height)
|
||||||
|
|
||||||
when allowSimd and compiles(newImageFromMaskSimd):
|
|
||||||
newImageFromMaskSimd(result.data, mask.data)
|
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
for i in 0 ..< mask.data.len:
|
||||||
let v = mask.data[i]
|
let v = mask.data[i]
|
||||||
result.data[i] = rgbx(v, v, v, v)
|
result.data[i] = rgbx(v, v, v, v)
|
||||||
|
|
||||||
|
proc newMask*(image: Image): Mask {.hasSimd, raises: [PixieError].} =
|
||||||
|
## Returns a new mask using the alpha values of the image.
|
||||||
|
result = newMask(image.width, image.height)
|
||||||
|
for i in 0 ..< image.data.len:
|
||||||
|
result.data[i] = image.data[i].a
|
||||||
|
|
||||||
proc copy*(image: Image): Image {.raises: [PixieError].} =
|
proc copy*(image: Image): Image {.raises: [PixieError].} =
|
||||||
## Copies the image data into a new image.
|
## Copies the image data into a new image.
|
||||||
result = newImage(image.width, image.height)
|
result = newImage(image.width, image.height)
|
||||||
|
@ -89,25 +74,17 @@ proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
|
||||||
## Fills the image with the color.
|
## Fills the image with the color.
|
||||||
fillUnsafe(image.data, color, 0, image.data.len)
|
fillUnsafe(image.data, color, 0, image.data.len)
|
||||||
|
|
||||||
proc isOneColor*(image: Image): bool {.raises: [].} =
|
proc isOneColor*(image: Image): bool {.hasSimd, raises: [].} =
|
||||||
## Checks if the entire image is the same color.
|
## Checks if the entire image is the same color.
|
||||||
when allowSimd and compiles(isOneColorSimd):
|
|
||||||
return isOneColorSimd(image.data)
|
|
||||||
|
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
let color = cast[uint32](image.data[0])
|
let color = cast[uint32](image.data[0])
|
||||||
for i in 0 ..< image.data.len:
|
for i in 0 ..< image.data.len:
|
||||||
if cast[uint32](image.data[i]) != color:
|
if cast[uint32](image.data[i]) != color:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc isTransparent*(image: Image): bool {.raises: [].} =
|
proc isTransparent*(image: Image): bool {.hasSimd, raises: [].} =
|
||||||
## Checks if this image is fully transparent or not.
|
## Checks if this image is fully transparent or not.
|
||||||
when allowSimd and compiles(isTransparentSimd):
|
|
||||||
return isTransparentSimd(image.data)
|
|
||||||
|
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
for i in 0 ..< image.data.len:
|
||||||
if image.data[i].a != 0:
|
if image.data[i].a != 0:
|
||||||
return false
|
return false
|
||||||
|
@ -341,46 +318,38 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
|
proc applyOpacity*(target: Image, opacity: float32) {.hasSimd, raises: [].} =
|
||||||
## Multiplies alpha of the image by opacity.
|
## Multiplies alpha of the image by opacity.
|
||||||
let opacity = round(255 * opacity).uint16
|
let opacity = round(255 * opacity).uint16
|
||||||
if opacity == 255:
|
if opacity == 255:
|
||||||
return
|
return
|
||||||
|
|
||||||
if opacity == 0:
|
if opacity == 0:
|
||||||
image.fill(rgbx(0, 0, 0, 0))
|
target.fill(rgbx(0, 0, 0, 0))
|
||||||
return
|
return
|
||||||
|
|
||||||
when allowSimd and compiles(applyOpacitySimd):
|
for i in 0 ..< target.data.len:
|
||||||
applyOpacitySimd(image.data, opacity)
|
var rgbx = target.data[i]
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
|
||||||
var rgbx = image.data[i]
|
|
||||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||||
image.data[i] = rgbx
|
target.data[i] = rgbx
|
||||||
|
|
||||||
proc invert*(image: Image) {.raises: [].} =
|
proc invert*(target: Image) {.hasSimd, raises: [].} =
|
||||||
## Inverts all of the colors and alpha.
|
## Inverts all of the colors and alpha.
|
||||||
when allowSimd and compiles(invertImageSimd):
|
for i in 0 ..< target.data.len:
|
||||||
invertImageSimd(image.data)
|
var rgbx = target.data[i]
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
|
||||||
var rgbx = image.data[i]
|
|
||||||
rgbx.r = 255 - rgbx.r
|
rgbx.r = 255 - rgbx.r
|
||||||
rgbx.g = 255 - rgbx.g
|
rgbx.g = 255 - rgbx.g
|
||||||
rgbx.b = 255 - rgbx.b
|
rgbx.b = 255 - rgbx.b
|
||||||
rgbx.a = 255 - rgbx.a
|
rgbx.a = 255 - rgbx.a
|
||||||
image.data[i] = rgbx
|
target.data[i] = rgbx
|
||||||
|
|
||||||
# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
|
# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
|
||||||
# is not a valid premultiplied alpha color.
|
# is not a valid premultiplied alpha color.
|
||||||
# We need to convert back to premultiplied alpha after inverting.
|
# We need to convert back to premultiplied alpha after inverting.
|
||||||
image.data.toPremultipliedAlpha()
|
target.data.toPremultipliedAlpha()
|
||||||
|
|
||||||
proc blur*(
|
proc blur*(
|
||||||
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)
|
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)
|
||||||
|
@ -443,17 +412,6 @@ proc blur*(
|
||||||
values += outOfBounds * kernel[yy - y + radius]
|
values += outOfBounds * kernel[yy - y + radius]
|
||||||
image.unsafe[x, y] = rgbx(values)
|
image.unsafe[x, y] = rgbx(values)
|
||||||
|
|
||||||
proc newMask*(image: Image): Mask {.raises: [PixieError].} =
|
|
||||||
## Returns a new mask using the alpha values of the image.
|
|
||||||
result = newMask(image.width, image.height)
|
|
||||||
|
|
||||||
when allowSimd and compiles(newMaskFromImageSimd):
|
|
||||||
newMaskFromImageSimd(result.data, image.data)
|
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< image.data.len:
|
|
||||||
result.data[i] = image.data[i].a
|
|
||||||
|
|
||||||
proc getRgbaSmooth*(
|
proc getRgbaSmooth*(
|
||||||
image: Image, x, y: float32, wrapped = false
|
image: Image, x, y: float32, wrapped = false
|
||||||
): ColorRGBX {.raises: [].} =
|
): ColorRGBX {.raises: [].} =
|
||||||
|
|
|
@ -1,12 +1,4 @@
|
||||||
import bumpy, chroma, common, system/memory, vmath
|
import bumpy, chroma, common, simd, system/memory, vmath
|
||||||
|
|
||||||
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
|
|
||||||
|
|
||||||
when allowSimd:
|
|
||||||
import simd
|
|
||||||
|
|
||||||
when defined(amd64):
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
template currentExceptionAsPixieError*(): untyped =
|
template currentExceptionAsPixieError*(): untyped =
|
||||||
## Gets the current exception and returns it as a PixieError with stack trace.
|
## Gets the current exception and returns it as a PixieError with stack trace.
|
||||||
|
@ -76,7 +68,7 @@ proc fillUnsafe*(
|
||||||
|
|
||||||
proc fillUnsafe*(
|
proc fillUnsafe*(
|
||||||
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
data: var seq[ColorRGBX], color: SomeColor, start, len: int
|
||||||
) {.raises: [].} =
|
) {.hasSimd, raises: [].} =
|
||||||
## Fills the image data with the color starting at index start and
|
## Fills the image data with the color starting at index start and
|
||||||
## continuing for len indices.
|
## continuing for len indices.
|
||||||
when allowSimd and compiles(fillUnsafeSimd):
|
when allowSimd and compiles(fillUnsafeSimd):
|
||||||
|
@ -110,12 +102,10 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||||
c.b = straightAlphaTable[c.a][c.b]
|
c.b = straightAlphaTable[c.a][c.b]
|
||||||
data[i] = c
|
data[i] = c
|
||||||
|
|
||||||
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
proc toPremultipliedAlpha*(
|
||||||
|
data: var seq[ColorRGBA | ColorRGBX]
|
||||||
|
) {.hasSimd, raises: [].} =
|
||||||
## Converts an image to premultiplied alpha from straight alpha.
|
## Converts an image to premultiplied alpha from straight alpha.
|
||||||
when allowSimd and compiles(toPremultipliedAlphaSimd):
|
|
||||||
toPremultipliedAlphaSimd(data)
|
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< data.len:
|
for i in 0 ..< data.len:
|
||||||
var c = data[i]
|
var c = data[i]
|
||||||
if c.a != 255:
|
if c.a != 255:
|
||||||
|
@ -124,25 +114,15 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
|
||||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||||
data[i] = c
|
data[i] = c
|
||||||
|
|
||||||
proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
|
proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool {.hasSimd.} =
|
||||||
when allowSimd and compiles(isOpaqueSimd):
|
|
||||||
return isOpaqueSimd(data, start, len)
|
|
||||||
|
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
for i in start ..< start + len:
|
for i in start ..< start + len:
|
||||||
if data[i].a != 255:
|
if data[i].a != 255:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
when defined(amd64) and allowSimd:
|
||||||
proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
import simd/todo
|
||||||
let opacityVec = mm_set1_ps(opacity)
|
export todo
|
||||||
var finalColor = mm_cvtps_epi32(mm_mul_ps(color, opacityVec))
|
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
|
||||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
|
||||||
|
|
||||||
export pack4xAlphaValues, unpackAlphaValues
|
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -1,26 +1,10 @@
|
||||||
import common, internal, vmath
|
import common, internal, simd, vmath
|
||||||
|
|
||||||
when allowSimd:
|
|
||||||
import simd
|
|
||||||
|
|
||||||
when defined(amd64):
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
type UnsafeMask = distinct Mask
|
type UnsafeMask = distinct Mask
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
proc newMask*(width, height: int): Mask {.raises: [PixieError].} =
|
|
||||||
## Creates a new mask with the parameter dimensions.
|
|
||||||
if width <= 0 or height <= 0:
|
|
||||||
raise newException(PixieError, "Mask width and height must be > 0")
|
|
||||||
|
|
||||||
result = Mask()
|
|
||||||
result.width = width
|
|
||||||
result.height = height
|
|
||||||
result.data = newSeq[uint8](width * height)
|
|
||||||
|
|
||||||
proc copy*(mask: Mask): Mask {.raises: [PixieError].} =
|
proc copy*(mask: Mask): Mask {.raises: [PixieError].} =
|
||||||
## Copies the image data into a new image.
|
## Copies the image data into a new image.
|
||||||
result = newMask(mask.width, mask.height)
|
result = newMask(mask.width, mask.height)
|
||||||
|
@ -180,22 +164,18 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
||||||
result.width * 4
|
result.width * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
proc applyOpacity*(mask: Mask, opacity: float32) {.raises: [].} =
|
proc applyOpacity*(target: Mask, opacity: float32) {.hasSimd, raises: [].} =
|
||||||
## Multiplies alpha of the image by opacity.
|
## Multiplies alpha of the image by opacity.
|
||||||
let opacity = round(255 * opacity).uint16
|
let opacity = round(255 * opacity).uint16
|
||||||
if opacity == 255:
|
if opacity == 255:
|
||||||
return
|
return
|
||||||
|
|
||||||
if opacity == 0:
|
if opacity == 0:
|
||||||
mask.fill(0)
|
target.fill(0)
|
||||||
return
|
return
|
||||||
|
|
||||||
when allowSimd and compiles(applyOpacitySimd):
|
for i in 0 ..< target.data.len:
|
||||||
applyOpacitySimd(mask.data, opacity)
|
target.data[i] = ((target.data[i] * opacity) div 255).uint8
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
|
||||||
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8
|
|
||||||
|
|
||||||
proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
||||||
## Gets a interpolated value with float point coordinates.
|
## Gets a interpolated value with float point coordinates.
|
||||||
|
@ -225,14 +205,10 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
||||||
else:
|
else:
|
||||||
topMix
|
topMix
|
||||||
|
|
||||||
proc invert*(mask: Mask) {.raises: [].} =
|
proc invert*(target: Mask) {.hasSimd, raises: [].} =
|
||||||
## Inverts all of the values - creates a negative of the mask.
|
## Inverts all of the values - creates a negative of the mask.
|
||||||
when allowSimd and compiles(invertMaskSimd):
|
for i in 0 ..< target.data.len:
|
||||||
invertMaskSimd(mask.data)
|
target.data[i] = 255 - target.data[i]
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
|
||||||
mask.data[i] = 255 - mask.data[i]
|
|
||||||
|
|
||||||
proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
||||||
## Grows the mask by spread.
|
## Grows the mask by spread.
|
||||||
|
@ -295,12 +271,8 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
||||||
break
|
break
|
||||||
mask.unsafe[x, y] = maxValue
|
mask.unsafe[x, y] = maxValue
|
||||||
|
|
||||||
proc ceil*(mask: Mask) {.raises: [].} =
|
proc ceil*(mask: Mask) {.hasSimd, raises: [].} =
|
||||||
## A value of 0 stays 0. Anything else turns into 255.
|
## A value of 0 stays 0. Anything else turns into 255.
|
||||||
when allowSimd and compiles(invertImageSimd):
|
|
||||||
ceilMaskSimd(mask.data)
|
|
||||||
return
|
|
||||||
|
|
||||||
for i in 0 ..< mask.data.len:
|
for i in 0 ..< mask.data.len:
|
||||||
if mask.data[i] != 0:
|
if mask.data[i] != 0:
|
||||||
mask.data[i] = 255
|
mask.data[i] = 255
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
import chroma, common, images, internal, vmath
|
import chroma, common, images, simd, vmath
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
type
|
type
|
||||||
PaintKind* = enum
|
PaintKind* = enum
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
import blends, bumpy, chroma, common, images, internal, masks, paints, std/fenv,
|
import blends, bumpy, chroma, common, images, internal, masks, paints, simd,
|
||||||
std/strutils, vmath
|
std/fenv, std/strutils, vmath
|
||||||
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
import nimsimd/sse2
|
|
||||||
|
|
||||||
type
|
type
|
||||||
WindingRule* = enum
|
WindingRule* = enum
|
||||||
|
|
|
@ -1,392 +1,57 @@
|
||||||
import chroma
|
import simd/internal, std/macros, std/tables
|
||||||
|
|
||||||
when defined(release):
|
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
|
||||||
{.push checks: off.}
|
|
||||||
|
|
||||||
when defined(amd64):
|
macro hasSimd*(procedure: untyped) =
|
||||||
import nimsimd/runtimecheck, nimsimd/sse2, simd/avx, simd/avx2
|
let
|
||||||
|
name = procedure.procName()
|
||||||
|
args = procedure.procArguments()
|
||||||
|
originalBody = procedure[6]
|
||||||
|
nameSse2 = name & "Sse2"
|
||||||
|
nameAvx = name & "Avx"
|
||||||
|
nameAvx2 = name & "Avx2"
|
||||||
|
callAvx = call(ident(nameAvx), args)
|
||||||
|
callAvx2 = call(ident(nameAvx2), args)
|
||||||
|
|
||||||
|
var body = newStmtList()
|
||||||
|
|
||||||
|
when not defined(pixieNoAvx):
|
||||||
|
if nameAvx2 in simdProcs:
|
||||||
|
body.add quote do:
|
||||||
|
if cpuHasAvx2:
|
||||||
|
forceReturn `callAvx2`
|
||||||
|
|
||||||
|
if nameAvx in simdProcs:
|
||||||
|
body.add quote do:
|
||||||
|
if cpuHasAvx:
|
||||||
|
forceReturn `callAvx`
|
||||||
|
|
||||||
|
if nameSse2 in simdProcs:
|
||||||
|
let bodySse2 = simdProcs[nameSse2][6]
|
||||||
|
body.add quote do:
|
||||||
|
`bodySse2`
|
||||||
|
else:
|
||||||
|
body.add quote do:
|
||||||
|
echo "using ", `name`, " scalar"
|
||||||
|
`originalBody`
|
||||||
|
|
||||||
|
procedure[6] = body
|
||||||
|
|
||||||
|
return procedure
|
||||||
|
|
||||||
|
when allowSimd and defined(amd64):
|
||||||
|
import simd/sse2, simd/avx, simd/avx2
|
||||||
|
export sse2, avx, avx2
|
||||||
|
|
||||||
|
when defined(pixieNoAvx):
|
||||||
|
const
|
||||||
|
cpuHasAvx* = false
|
||||||
|
cpuHasAvx2* = false
|
||||||
|
else:
|
||||||
|
import nimsimd/runtimecheck
|
||||||
let
|
let
|
||||||
cpuHasAvx* = checkInstructionSets({AVX})
|
cpuHasAvx* = checkInstructionSets({AVX})
|
||||||
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
|
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
|
||||||
|
|
||||||
proc packAlphaValues(v: M128i): M128i {.inline.} =
|
import nimsimd/sse2 as nimsimdsse2
|
||||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
|
export nimsimdsse2
|
||||||
result = mm_srli_epi32(v, 24)
|
|
||||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
|
||||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
|
||||||
|
|
||||||
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
|
|
||||||
let
|
|
||||||
i = packAlphaValues(i)
|
|
||||||
j = mm_slli_si128(packAlphaValues(j), 4)
|
|
||||||
k = mm_slli_si128(packAlphaValues(k), 8)
|
|
||||||
l = mm_slli_si128(packAlphaValues(l), 12)
|
|
||||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
|
||||||
|
|
||||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
|
||||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
|
||||||
|
|
||||||
proc fillUnsafeSimd*(
|
|
||||||
data: var seq[ColorRGBX],
|
|
||||||
start, len: int,
|
|
||||||
color: SomeColor
|
|
||||||
) =
|
|
||||||
if cpuHasAvx:
|
|
||||||
fillUnsafeAvx(data, start, len, color)
|
|
||||||
return
|
|
||||||
|
|
||||||
let rgbx = color.asRgbx()
|
|
||||||
|
|
||||||
var
|
|
||||||
i = start
|
|
||||||
p = cast[uint](data[i].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < (start + len) and (p and 15) != 0:
|
|
||||||
data[i] = rgbx
|
|
||||||
inc i
|
|
||||||
p += 4
|
|
||||||
|
|
||||||
let
|
|
||||||
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
|
||||||
iterations = (start + len - i) div 8
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
mm_store_si128(cast[pointer](p), colorVec)
|
|
||||||
mm_store_si128(cast[pointer](p + 16), colorVec)
|
|
||||||
p += 32
|
|
||||||
i += iterations * 8
|
|
||||||
|
|
||||||
for i in i ..< start + len:
|
|
||||||
data[i] = rgbx
|
|
||||||
|
|
||||||
proc isOneColorSimd*(data: var seq[ColorRGBX]): bool =
|
|
||||||
if cpuHasAvx2:
|
|
||||||
return isOneColorAvx2(data)
|
|
||||||
|
|
||||||
result = true
|
|
||||||
|
|
||||||
let color = data[0]
|
|
||||||
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < data.len and (p and 15) != 0:
|
|
||||||
if data[i] != color:
|
|
||||||
return false
|
|
||||||
inc i
|
|
||||||
p += 4
|
|
||||||
|
|
||||||
let
|
|
||||||
colorVec = mm_set1_epi32(cast[int32](color))
|
|
||||||
iterations = (data.len - i) div 16
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
values0 = mm_load_si128(cast[pointer](p))
|
|
||||||
values1 = mm_load_si128(cast[pointer](p + 16))
|
|
||||||
values2 = mm_load_si128(cast[pointer](p + 32))
|
|
||||||
values3 = mm_load_si128(cast[pointer](p + 48))
|
|
||||||
eq0 = mm_cmpeq_epi8(values0, colorVec)
|
|
||||||
eq1 = mm_cmpeq_epi8(values1, colorVec)
|
|
||||||
eq2 = mm_cmpeq_epi8(values2, colorVec)
|
|
||||||
eq3 = mm_cmpeq_epi8(values3, colorVec)
|
|
||||||
eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
|
|
||||||
if mm_movemask_epi8(eq0123) != 0xffff:
|
|
||||||
return false
|
|
||||||
p += 64
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
if data[i] != color:
|
|
||||||
return false
|
|
||||||
|
|
||||||
proc isTransparentSimd*(data: var seq[ColorRGBX]): bool =
|
|
||||||
if cpuHasAvx2:
|
|
||||||
return isTransparentAvx2(data)
|
|
||||||
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < data.len and (p and 15) != 0:
|
|
||||||
if data[i].a != 0:
|
|
||||||
return false
|
|
||||||
inc i
|
|
||||||
p += 4
|
|
||||||
|
|
||||||
result = true
|
|
||||||
|
|
||||||
let
|
|
||||||
vecZero = mm_setzero_si128()
|
|
||||||
iterations = (data.len - i) div 16
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
values0 = mm_load_si128(cast[pointer](p))
|
|
||||||
values1 = mm_load_si128(cast[pointer](p + 16))
|
|
||||||
values2 = mm_load_si128(cast[pointer](p + 32))
|
|
||||||
values3 = mm_load_si128(cast[pointer](p + 48))
|
|
||||||
values01 = mm_or_si128(values0, values1)
|
|
||||||
values23 = mm_or_si128(values2, values3)
|
|
||||||
values0123 = mm_or_si128(values01, values23)
|
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff:
|
|
||||||
return false
|
|
||||||
p += 64
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
if data[i].a != 0:
|
|
||||||
return false
|
|
||||||
|
|
||||||
proc isOpaqueSimd*(data: var seq[ColorRGBX], start, len: int): bool =
|
|
||||||
if cpuHasAvx2:
|
|
||||||
return isOpaqueAvx2(data, start, len)
|
|
||||||
|
|
||||||
result = true
|
|
||||||
|
|
||||||
var
|
|
||||||
i = start
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < (start + len) and (p and 15) != 0:
|
|
||||||
if data[i].a != 255:
|
|
||||||
return false
|
|
||||||
inc i
|
|
||||||
p += 4
|
|
||||||
|
|
||||||
let
|
|
||||||
vec255 = mm_set1_epi8(255)
|
|
||||||
iterations = (start + len - i) div 16
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
values0 = mm_load_si128(cast[pointer](p))
|
|
||||||
values1 = mm_load_si128(cast[pointer](p + 16))
|
|
||||||
values2 = mm_load_si128(cast[pointer](p + 32))
|
|
||||||
values3 = mm_load_si128(cast[pointer](p + 48))
|
|
||||||
values01 = mm_and_si128(values0, values1)
|
|
||||||
values23 = mm_and_si128(values2, values3)
|
|
||||||
values0123 = mm_and_si128(values01, values23)
|
|
||||||
eq = mm_cmpeq_epi8(values0123, vec255)
|
|
||||||
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
|
||||||
return false
|
|
||||||
p += 64
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
for i in i ..< start + len:
|
|
||||||
if data[i].a != 255:
|
|
||||||
return false
|
|
||||||
|
|
||||||
proc toPremultipliedAlphaSimd*(data: var seq[ColorRGBA | ColorRGBX]) =
|
|
||||||
if cpuHasAvx2:
|
|
||||||
toPremultipliedAlphaAvx2(data)
|
|
||||||
return
|
|
||||||
|
|
||||||
var i: int
|
|
||||||
|
|
||||||
let
|
|
||||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
|
||||||
oddMask = mm_set1_epi16(0xff00)
|
|
||||||
div255 = mm_set1_epi16(0x8081)
|
|
||||||
iterations = data.len div 4
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
values = mm_loadu_si128(data[i].addr)
|
|
||||||
alpha = mm_and_si128(values, alphaMask)
|
|
||||||
eq = mm_cmpeq_epi8(values, alphaMask)
|
|
||||||
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
|
||||||
let
|
|
||||||
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
|
||||||
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
|
||||||
var
|
|
||||||
colorsEven = mm_slli_epi16(values, 8)
|
|
||||||
colorsOdd = mm_and_si128(values, oddMask)
|
|
||||||
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
|
||||||
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
|
||||||
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
|
||||||
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
|
||||||
mm_storeu_si128(
|
|
||||||
data[i].addr,
|
|
||||||
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
|
||||||
)
|
|
||||||
i += 4
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
var c = data[i]
|
|
||||||
if c.a != 255:
|
|
||||||
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
|
||||||
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
|
||||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
|
||||||
data[i] = c
|
|
||||||
|
|
||||||
proc newImageFromMaskSimd*(dst: var seq[ColorRGBX], src: var seq[uint8]) =
|
|
||||||
var i: int
|
|
||||||
for _ in 0 ..< src.len div 16:
|
|
||||||
var alphas = mm_loadu_si128(src[i].addr)
|
|
||||||
for j in 0 ..< 4:
|
|
||||||
var unpacked = unpackAlphaValues(alphas)
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
|
||||||
mm_storeu_si128(dst[i + j * 4].addr, unpacked)
|
|
||||||
alphas = mm_srli_si128(alphas, 4)
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for i in i ..< src.len:
|
|
||||||
let v = src[i]
|
|
||||||
dst[i] = rgbx(v, v, v, v)
|
|
||||||
|
|
||||||
proc newMaskFromImageSimd*(dst: var seq[uint8], src: var seq[ColorRGBX]) =
|
|
||||||
var i: int
|
|
||||||
for _ in 0 ..< src.len div 16:
|
|
||||||
let
|
|
||||||
a = mm_loadu_si128(src[i + 0].addr)
|
|
||||||
b = mm_loadu_si128(src[i + 4].addr)
|
|
||||||
c = mm_loadu_si128(src[i + 8].addr)
|
|
||||||
d = mm_loadu_si128(src[i + 12].addr)
|
|
||||||
mm_storeu_si128(
|
|
||||||
dst[i].addr,
|
|
||||||
pack4xAlphaValues(a, b, c, d)
|
|
||||||
)
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for i in i ..< src.len:
|
|
||||||
dst[i] = src[i].a
|
|
||||||
|
|
||||||
proc invertImageSimd*(data: var seq[ColorRGBX]) =
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < data.len and (p and 15) != 0:
|
|
||||||
var rgbx = data[i]
|
|
||||||
rgbx.r = 255 - rgbx.r
|
|
||||||
rgbx.g = 255 - rgbx.g
|
|
||||||
rgbx.b = 255 - rgbx.b
|
|
||||||
rgbx.a = 255 - rgbx.a
|
|
||||||
data[i] = rgbx
|
|
||||||
inc i
|
|
||||||
p += 4
|
|
||||||
|
|
||||||
let
|
|
||||||
vec255 = mm_set1_epi8(255)
|
|
||||||
iterations = data.len div 16
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
a = mm_load_si128(cast[pointer](p))
|
|
||||||
b = mm_load_si128(cast[pointer](p + 16))
|
|
||||||
c = mm_load_si128(cast[pointer](p + 32))
|
|
||||||
d = mm_load_si128(cast[pointer](p + 48))
|
|
||||||
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
|
||||||
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
|
||||||
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
|
||||||
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
|
||||||
p += 64
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
var rgbx = data[i]
|
|
||||||
rgbx.r = 255 - rgbx.r
|
|
||||||
rgbx.g = 255 - rgbx.g
|
|
||||||
rgbx.b = 255 - rgbx.b
|
|
||||||
rgbx.a = 255 - rgbx.a
|
|
||||||
data[i] = rgbx
|
|
||||||
|
|
||||||
toPremultipliedAlphaSimd(data)
|
|
||||||
|
|
||||||
proc invertMaskSimd*(data: var seq[uint8]) =
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
# Align to 16 bytes
|
|
||||||
while i < data.len and (p and 15) != 0:
|
|
||||||
data[i] = 255 - data[i]
|
|
||||||
inc i
|
|
||||||
inc p
|
|
||||||
|
|
||||||
let
|
|
||||||
vec255 = mm_set1_epi8(255)
|
|
||||||
iterations = data.len div 64
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
let
|
|
||||||
a = mm_load_si128(cast[pointer](p))
|
|
||||||
b = mm_load_si128(cast[pointer](p + 16))
|
|
||||||
c = mm_load_si128(cast[pointer](p + 32))
|
|
||||||
d = mm_load_si128(cast[pointer](p + 48))
|
|
||||||
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
|
||||||
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
|
||||||
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
|
||||||
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
|
||||||
p += 64
|
|
||||||
i += 64 * iterations
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
data[i] = 255 - data[i]
|
|
||||||
|
|
||||||
proc ceilMaskSimd*(data: var seq[uint8]) =
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
|
|
||||||
let
|
|
||||||
zeroVec = mm_setzero_si128()
|
|
||||||
vec255 = mm_set1_epi8(255)
|
|
||||||
iterations = data.len div 16
|
|
||||||
for _ in 0 ..< iterations:
|
|
||||||
var values = mm_loadu_si128(cast[pointer](p))
|
|
||||||
values = mm_cmpeq_epi8(values, zeroVec)
|
|
||||||
values = mm_andnot_si128(values, vec255)
|
|
||||||
mm_storeu_si128(cast[pointer](p), values)
|
|
||||||
p += 16
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
for i in i ..< data.len:
|
|
||||||
if data[i] != 0:
|
|
||||||
data[i] = 255
|
|
||||||
|
|
||||||
proc applyOpacitySimd*(data: var seq[uint8 | ColorRGBX], opacity: uint16) =
|
|
||||||
var
|
|
||||||
i: int
|
|
||||||
p = cast[uint](data[0].addr)
|
|
||||||
len =
|
|
||||||
when data is seq[ColorRGBX]:
|
|
||||||
data.len * 4
|
|
||||||
else:
|
|
||||||
data.len
|
|
||||||
|
|
||||||
let
|
|
||||||
oddMask = mm_set1_epi16(0xff00)
|
|
||||||
div255 = mm_set1_epi16(0x8081)
|
|
||||||
zeroVec = mm_setzero_si128()
|
|
||||||
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
|
||||||
iterations = len div 16
|
|
||||||
for _ in 0 ..< len div 16:
|
|
||||||
let values = mm_loadu_si128(cast[pointer](p))
|
|
||||||
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
|
||||||
var
|
|
||||||
valuesEven = mm_slli_epi16(values, 8)
|
|
||||||
valuesOdd = mm_and_si128(values, oddMask)
|
|
||||||
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
|
|
||||||
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
|
|
||||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
|
||||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
|
||||||
mm_storeu_si128(
|
|
||||||
cast[pointer](p),
|
|
||||||
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
|
||||||
)
|
|
||||||
p += 16
|
|
||||||
i += 16 * iterations
|
|
||||||
|
|
||||||
when data is seq[ColorRGBX]:
|
|
||||||
for i in i div 4 ..< data.len:
|
|
||||||
var rgbx = data[i]
|
|
||||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
|
||||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
|
||||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
|
||||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
|
||||||
data[i] = rgbx
|
|
||||||
else:
|
|
||||||
for i in i ..< data.len:
|
|
||||||
data[i] = ((data[i] * opacity) div 255).uint8
|
|
||||||
|
|
||||||
when defined(release):
|
|
||||||
{.pop.}
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import chroma, nimsimd/avx
|
import chroma, internal, nimsimd/avx
|
||||||
|
|
||||||
when defined(gcc) or defined(clang):
|
when defined(gcc) or defined(clang):
|
||||||
{.localPassc: "-mavx".}
|
{.localPassc: "-mavx".}
|
||||||
|
@ -8,9 +8,9 @@ when defined(release):
|
||||||
|
|
||||||
proc fillUnsafeAvx*(
|
proc fillUnsafeAvx*(
|
||||||
data: var seq[ColorRGBX],
|
data: var seq[ColorRGBX],
|
||||||
start, len: int,
|
color: SomeColor,
|
||||||
color: SomeColor
|
start, len: int
|
||||||
) =
|
) {.simd.} =
|
||||||
let rgbx = color.asRgbx()
|
let rgbx = color.asRgbx()
|
||||||
|
|
||||||
var
|
var
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import chroma, nimsimd/avx2
|
import chroma, internal, nimsimd/avx2, pixie/common
|
||||||
|
|
||||||
when defined(gcc) or defined(clang):
|
when defined(gcc) or defined(clang):
|
||||||
{.localPassc: "-mavx2".}
|
{.localPassc: "-mavx2".}
|
||||||
|
@ -6,25 +6,25 @@ when defined(gcc) or defined(clang):
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
proc isOneColorAvx2*(data: var seq[ColorRGBX]): bool =
|
proc isOneColorAvx2*(image: Image): bool {.simd.} =
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
let color = data[0]
|
let color = image.data[0]
|
||||||
|
|
||||||
var i: int
|
var i: int
|
||||||
# Align to 32 bytes
|
# Align to 32 bytes
|
||||||
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
|
while i < image.data.len and (cast[uint](image.data[i].addr) and 31) != 0:
|
||||||
if data[i] != color:
|
if image.data[i] != color:
|
||||||
return false
|
return false
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
let
|
let
|
||||||
colorVec = mm256_set1_epi32(cast[int32](color))
|
colorVec = mm256_set1_epi32(cast[int32](color))
|
||||||
iterations = (data.len - i) div 16
|
iterations = (image.data.len - i) div 16
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
let
|
let
|
||||||
values0 = mm256_load_si256(data[i].addr)
|
values0 = mm256_load_si256(image.data[i].addr)
|
||||||
values1 = mm256_load_si256(data[i + 8].addr)
|
values1 = mm256_load_si256(image.data[i + 8].addr)
|
||||||
eq0 = mm256_cmpeq_epi8(values0, colorVec)
|
eq0 = mm256_cmpeq_epi8(values0, colorVec)
|
||||||
eq1 = mm256_cmpeq_epi8(values1, colorVec)
|
eq1 = mm256_cmpeq_epi8(values1, colorVec)
|
||||||
eq01 = mm256_and_si256(eq0, eq1)
|
eq01 = mm256_and_si256(eq0, eq1)
|
||||||
|
@ -32,38 +32,38 @@ proc isOneColorAvx2*(data: var seq[ColorRGBX]): bool =
|
||||||
return false
|
return false
|
||||||
i += 16
|
i += 16
|
||||||
|
|
||||||
for i in i ..< data.len:
|
for i in i ..< image.data.len:
|
||||||
if data[i] != color:
|
if image.data[i] != color:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc isTransparentAvx2*(data: var seq[ColorRGBX]): bool =
|
proc isTransparentAvx2*(image: Image): bool {.simd.} =
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
var i: int
|
var i: int
|
||||||
# Align to 32 bytes
|
# Align to 32 bytes
|
||||||
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
|
while i < image.data.len and (cast[uint](image.data[i].addr) and 31) != 0:
|
||||||
if data[i].a != 0:
|
if image.data[i].a != 0:
|
||||||
return false
|
return false
|
||||||
inc i
|
inc i
|
||||||
|
|
||||||
let
|
let
|
||||||
vecZero = mm256_setzero_si256()
|
vecZero = mm256_setzero_si256()
|
||||||
iterations = (data.len - i) div 16
|
iterations = (image.data.len - i) div 16
|
||||||
for _ in 0 ..< iterations:
|
for _ in 0 ..< iterations:
|
||||||
let
|
let
|
||||||
values0 = mm256_load_si256(data[i].addr)
|
values0 = mm256_load_si256(image.data[i].addr)
|
||||||
values1 = mm256_load_si256(data[i + 8].addr)
|
values1 = mm256_load_si256(image.data[i + 8].addr)
|
||||||
values01 = mm256_or_si256(values0, values1)
|
values01 = mm256_or_si256(values0, values1)
|
||||||
eq = mm256_cmpeq_epi8(values01, vecZero)
|
eq = mm256_cmpeq_epi8(values01, vecZero)
|
||||||
if mm256_movemask_epi8(eq) != cast[int32](0xffffffff):
|
if mm256_movemask_epi8(eq) != cast[int32](0xffffffff):
|
||||||
return false
|
return false
|
||||||
i += 16
|
i += 16
|
||||||
|
|
||||||
for i in i ..< data.len:
|
for i in i ..< image.data.len:
|
||||||
if data[i].a != 0:
|
if image.data[i].a != 0:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool =
|
proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
var i = start
|
var i = start
|
||||||
|
@ -90,7 +90,7 @@ proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool =
|
||||||
if data[i].a != 255:
|
if data[i].a != 255:
|
||||||
return false
|
return false
|
||||||
|
|
||||||
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
|
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||||
var i: int
|
var i: int
|
||||||
|
|
||||||
let
|
let
|
||||||
|
|
39
src/pixie/simd/internal.nim
Normal file
39
src/pixie/simd/internal.nim
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
import std/macros, std/tables
|
||||||
|
|
||||||
|
var simdProcs* {.compiletime.}: Table[string, NimNode]
|
||||||
|
|
||||||
|
template forceReturn*(procedure: untyped) =
|
||||||
|
## Produce `return procedure()` when procedure returns something otherwise
|
||||||
|
## `procedure(); return` if it procedure returns nothing.
|
||||||
|
when compiles(block: return procedure):
|
||||||
|
return procedure
|
||||||
|
else:
|
||||||
|
procedure
|
||||||
|
return
|
||||||
|
|
||||||
|
proc procName*(procedure: NimNode): string =
|
||||||
|
## Given a procedure signature returns only name string.
|
||||||
|
let nameNode = procedure[0]
|
||||||
|
if nameNode.kind == nnkPostfix:
|
||||||
|
nameNode[1].strVal
|
||||||
|
else:
|
||||||
|
nameNode.strVal
|
||||||
|
|
||||||
|
proc procArguments*(procedure: NimNode): seq[NimNode] =
|
||||||
|
## Given a procedure signature gets the arguments as a list.
|
||||||
|
for i, arg in procedure[3]:
|
||||||
|
if i > 0:
|
||||||
|
for j in 0 ..< arg.len - 2:
|
||||||
|
result.add(arg[j])
|
||||||
|
|
||||||
|
proc call*(name: NimNode, args: seq[NimNode]): NimNode =
|
||||||
|
## Produces a procedure call with arguments.
|
||||||
|
result = newNimNode(nnkCall)
|
||||||
|
result.add(name)
|
||||||
|
for arg in args:
|
||||||
|
result.add(arg)
|
||||||
|
|
||||||
|
macro simd*(procedure: untyped) =
|
||||||
|
let name = procedure.procName()
|
||||||
|
simdProcs[name] = procedure.copy()
|
||||||
|
return procedure
|
351
src/pixie/simd/sse2.nim
Normal file
351
src/pixie/simd/sse2.nim
Normal file
|
@ -0,0 +1,351 @@
|
||||||
|
import chroma, internal, nimsimd/sse2, pixie/common, todo, vmath
|
||||||
|
|
||||||
|
when defined(release):
|
||||||
|
{.push checks: off.}
|
||||||
|
|
||||||
|
proc fillUnsafeSse2*(
|
||||||
|
data: var seq[ColorRGBX],
|
||||||
|
color: SomeColor,
|
||||||
|
start, len: int
|
||||||
|
) {.simd.} =
|
||||||
|
let rgbx = color.asRgbx()
|
||||||
|
|
||||||
|
var
|
||||||
|
i = start
|
||||||
|
p = cast[uint](data[i].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < (start + len) and (p and 15) != 0:
|
||||||
|
data[i] = rgbx
|
||||||
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
|
let
|
||||||
|
colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||||
|
iterations = (start + len - i) div 8
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
mm_store_si128(cast[pointer](p), colorVec)
|
||||||
|
mm_store_si128(cast[pointer](p + 16), colorVec)
|
||||||
|
p += 32
|
||||||
|
i += iterations * 8
|
||||||
|
|
||||||
|
for i in i ..< start + len:
|
||||||
|
data[i] = rgbx
|
||||||
|
|
||||||
|
proc isOneColorSse2*(image: Image): bool {.simd.} =
|
||||||
|
result = true
|
||||||
|
|
||||||
|
let color = image.data[0]
|
||||||
|
|
||||||
|
var
|
||||||
|
i: int
|
||||||
|
p = cast[uint](image.data[0].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < image.data.len and (p and 15) != 0:
|
||||||
|
if image.data[i] != color:
|
||||||
|
return false
|
||||||
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
|
let
|
||||||
|
colorVec = mm_set1_epi32(cast[int32](color))
|
||||||
|
iterations = (image.data.len - i) div 16
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
values0 = mm_load_si128(cast[pointer](p))
|
||||||
|
values1 = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
values2 = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
values3 = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
eq0 = mm_cmpeq_epi8(values0, colorVec)
|
||||||
|
eq1 = mm_cmpeq_epi8(values1, colorVec)
|
||||||
|
eq2 = mm_cmpeq_epi8(values2, colorVec)
|
||||||
|
eq3 = mm_cmpeq_epi8(values3, colorVec)
|
||||||
|
eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
|
||||||
|
if mm_movemask_epi8(eq0123) != 0xffff:
|
||||||
|
return false
|
||||||
|
p += 64
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< image.data.len:
|
||||||
|
if image.data[i] != color:
|
||||||
|
return false
|
||||||
|
|
||||||
|
proc isTransparentSse2*(image: Image): bool {.simd.} =
|
||||||
|
var
|
||||||
|
i: int
|
||||||
|
p = cast[uint](image.data[0].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < image.data.len and (p and 15) != 0:
|
||||||
|
if image.data[i].a != 0:
|
||||||
|
return false
|
||||||
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
|
result = true
|
||||||
|
|
||||||
|
let
|
||||||
|
vecZero = mm_setzero_si128()
|
||||||
|
iterations = (image.data.len - i) div 16
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
values0 = mm_load_si128(cast[pointer](p))
|
||||||
|
values1 = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
values2 = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
values3 = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
values01 = mm_or_si128(values0, values1)
|
||||||
|
values23 = mm_or_si128(values2, values3)
|
||||||
|
values0123 = mm_or_si128(values01, values23)
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff:
|
||||||
|
return false
|
||||||
|
p += 64
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< image.data.len:
|
||||||
|
if image.data[i].a != 0:
|
||||||
|
return false
|
||||||
|
|
||||||
|
proc isOpaqueSse2*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
|
||||||
|
result = true
|
||||||
|
|
||||||
|
var
|
||||||
|
i = start
|
||||||
|
p = cast[uint](data[0].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < (start + len) and (p and 15) != 0:
|
||||||
|
if data[i].a != 255:
|
||||||
|
return false
|
||||||
|
inc i
|
||||||
|
p += 4
|
||||||
|
|
||||||
|
let
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
iterations = (start + len - i) div 16
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
values0 = mm_load_si128(cast[pointer](p))
|
||||||
|
values1 = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
values2 = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
values3 = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
values01 = mm_and_si128(values0, values1)
|
||||||
|
values23 = mm_and_si128(values2, values3)
|
||||||
|
values0123 = mm_and_si128(values01, values23)
|
||||||
|
eq = mm_cmpeq_epi8(values0123, vec255)
|
||||||
|
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
||||||
|
return false
|
||||||
|
p += 64
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< start + len:
|
||||||
|
if data[i].a != 255:
|
||||||
|
return false
|
||||||
|
|
||||||
|
proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
|
||||||
|
var i: int
|
||||||
|
|
||||||
|
let
|
||||||
|
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||||
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
|
div255 = mm_set1_epi16(0x8081)
|
||||||
|
iterations = data.len div 4
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
values = mm_loadu_si128(data[i].addr)
|
||||||
|
alpha = mm_and_si128(values, alphaMask)
|
||||||
|
eq = mm_cmpeq_epi8(values, alphaMask)
|
||||||
|
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
|
||||||
|
let
|
||||||
|
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
|
||||||
|
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
|
||||||
|
var
|
||||||
|
colorsEven = mm_slli_epi16(values, 8)
|
||||||
|
colorsOdd = mm_and_si128(values, oddMask)
|
||||||
|
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
|
||||||
|
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
|
||||||
|
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
|
||||||
|
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
|
||||||
|
mm_storeu_si128(
|
||||||
|
data[i].addr,
|
||||||
|
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
|
||||||
|
)
|
||||||
|
i += 4
|
||||||
|
|
||||||
|
for i in i ..< data.len:
|
||||||
|
var c = data[i]
|
||||||
|
if c.a != 255:
|
||||||
|
c.r = ((c.r.uint32 * c.a) div 255).uint8
|
||||||
|
c.g = ((c.g.uint32 * c.a) div 255).uint8
|
||||||
|
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||||
|
data[i] = c
|
||||||
|
|
||||||
|
proc newImageSse2*(mask: Mask): Image {.simd.} =
|
||||||
|
result = newImage(mask.width, mask.height)
|
||||||
|
|
||||||
|
var i: int
|
||||||
|
for _ in 0 ..< mask.data.len div 16:
|
||||||
|
var alphas = mm_loadu_si128(mask.data[i].addr)
|
||||||
|
for j in 0 ..< 4:
|
||||||
|
var unpacked = unpackAlphaValues(alphas)
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||||
|
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
|
||||||
|
alphas = mm_srli_si128(alphas, 4)
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< mask.data.len:
|
||||||
|
let v = mask.data[i]
|
||||||
|
result.data[i] = rgbx(v, v, v, v)
|
||||||
|
|
||||||
|
proc newMaskSse2*(image: Image): Mask {.simd.} =
|
||||||
|
result = newMask(image.width, image.height)
|
||||||
|
|
||||||
|
var i: int
|
||||||
|
for _ in 0 ..< image.data.len div 16:
|
||||||
|
let
|
||||||
|
a = mm_loadu_si128(image.data[i + 0].addr)
|
||||||
|
b = mm_loadu_si128(image.data[i + 4].addr)
|
||||||
|
c = mm_loadu_si128(image.data[i + 8].addr)
|
||||||
|
d = mm_loadu_si128(image.data[i + 12].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
result.data[i].addr,
|
||||||
|
pack4xAlphaValues(a, b, c, d)
|
||||||
|
)
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< image.data.len:
|
||||||
|
result.data[i] = image.data[i].a
|
||||||
|
|
||||||
|
proc invertSse2*(target: Image | Mask) {.simd.} =
|
||||||
|
var
|
||||||
|
i: int
|
||||||
|
p = cast[uint](target.data[0].addr)
|
||||||
|
# Align to 16 bytes
|
||||||
|
while i < target.data.len and (p and 15) != 0:
|
||||||
|
when target is Image:
|
||||||
|
var rgbx = target.data[i]
|
||||||
|
rgbx.r = 255 - rgbx.r
|
||||||
|
rgbx.g = 255 - rgbx.g
|
||||||
|
rgbx.b = 255 - rgbx.b
|
||||||
|
rgbx.a = 255 - rgbx.a
|
||||||
|
target.data[i] = rgbx
|
||||||
|
inc i
|
||||||
|
p += 4
|
||||||
|
else:
|
||||||
|
target.data[i] = 255 - target.data[i]
|
||||||
|
inc i
|
||||||
|
inc p
|
||||||
|
|
||||||
|
let vec255 = mm_set1_epi8(255)
|
||||||
|
|
||||||
|
when target is Image:
|
||||||
|
let iterations = target.data.len div 16
|
||||||
|
else:
|
||||||
|
let iterations = target.data.len div 64
|
||||||
|
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
let
|
||||||
|
a = mm_load_si128(cast[pointer](p))
|
||||||
|
b = mm_load_si128(cast[pointer](p + 16))
|
||||||
|
c = mm_load_si128(cast[pointer](p + 32))
|
||||||
|
d = mm_load_si128(cast[pointer](p + 48))
|
||||||
|
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
||||||
|
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
||||||
|
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
||||||
|
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
||||||
|
p += 64
|
||||||
|
|
||||||
|
when target is Image:
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< target.data.len:
|
||||||
|
var rgbx = target.data[i]
|
||||||
|
rgbx.r = 255 - rgbx.r
|
||||||
|
rgbx.g = 255 - rgbx.g
|
||||||
|
rgbx.b = 255 - rgbx.b
|
||||||
|
rgbx.a = 255 - rgbx.a
|
||||||
|
target.data[i] = rgbx
|
||||||
|
|
||||||
|
toPremultipliedAlphaSse2(target.data)
|
||||||
|
else:
|
||||||
|
i += 64 * iterations
|
||||||
|
|
||||||
|
for i in i ..< target.data.len:
|
||||||
|
target.data[i] = 255 - target.data[i]
|
||||||
|
|
||||||
|
proc ceilSse2*(mask: Mask) {.simd.} =
|
||||||
|
var
|
||||||
|
i: int
|
||||||
|
p = cast[uint](mask.data[0].addr)
|
||||||
|
|
||||||
|
let
|
||||||
|
zeroVec = mm_setzero_si128()
|
||||||
|
vec255 = mm_set1_epi8(255)
|
||||||
|
iterations = mask.data.len div 16
|
||||||
|
for _ in 0 ..< iterations:
|
||||||
|
var values = mm_loadu_si128(cast[pointer](p))
|
||||||
|
values = mm_cmpeq_epi8(values, zeroVec)
|
||||||
|
values = mm_andnot_si128(values, vec255)
|
||||||
|
mm_storeu_si128(cast[pointer](p), values)
|
||||||
|
p += 16
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
for i in i ..< mask.data.len:
|
||||||
|
if mask.data[i] != 0:
|
||||||
|
mask.data[i] = 255
|
||||||
|
|
||||||
|
proc applyOpacitySse2*(target: Image | Mask, opacity: float32) {.simd.} =
|
||||||
|
let opacity = round(255 * opacity).uint16
|
||||||
|
if opacity == 255:
|
||||||
|
return
|
||||||
|
|
||||||
|
if opacity == 0:
|
||||||
|
when target is Image:
|
||||||
|
target.fill(rgbx(0, 0, 0, 0))
|
||||||
|
else:
|
||||||
|
target.fill(0)
|
||||||
|
return
|
||||||
|
|
||||||
|
var
|
||||||
|
i: int
|
||||||
|
p = cast[uint](target.data[0].addr)
|
||||||
|
len =
|
||||||
|
when target is Image:
|
||||||
|
target.data.len * 4
|
||||||
|
else:
|
||||||
|
target.data.len
|
||||||
|
|
||||||
|
let
|
||||||
|
oddMask = mm_set1_epi16(0xff00)
|
||||||
|
div255 = mm_set1_epi16(0x8081)
|
||||||
|
zeroVec = mm_setzero_si128()
|
||||||
|
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
||||||
|
iterations = len div 16
|
||||||
|
for _ in 0 ..< len div 16:
|
||||||
|
let values = mm_loadu_si128(cast[pointer](p))
|
||||||
|
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
||||||
|
var
|
||||||
|
valuesEven = mm_slli_epi16(values, 8)
|
||||||
|
valuesOdd = mm_and_si128(values, oddMask)
|
||||||
|
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
|
||||||
|
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
|
||||||
|
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||||
|
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||||
|
mm_storeu_si128(
|
||||||
|
cast[pointer](p),
|
||||||
|
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
||||||
|
)
|
||||||
|
p += 16
|
||||||
|
i += 16 * iterations
|
||||||
|
|
||||||
|
when target is Image:
|
||||||
|
for i in i div 4 ..< target.data.len:
|
||||||
|
var rgbx = target.data[i]
|
||||||
|
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||||
|
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||||
|
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||||
|
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||||
|
target.data[i] = rgbx
|
||||||
|
else:
|
||||||
|
for i in i ..< target.data.len:
|
||||||
|
target.data[i] = ((target.data[i] * opacity) div 255).uint8
|
||||||
|
|
||||||
|
when defined(release):
|
||||||
|
{.pop.}
|
33
src/pixie/simd/todo.nim
Normal file
33
src/pixie/simd/todo.nim
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import chroma, nimsimd/sse2
|
||||||
|
|
||||||
|
when defined(release):
|
||||||
|
{.push checks: off.}
|
||||||
|
|
||||||
|
proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
|
||||||
|
let opacityVec = mm_set1_ps(opacity)
|
||||||
|
var finalColor = mm_cvtps_epi32(mm_mul_ps(color, opacityVec))
|
||||||
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
|
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||||
|
|
||||||
|
proc packAlphaValues(v: M128i): M128i {.inline.} =
|
||||||
|
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
|
||||||
|
result = mm_srli_epi32(v, 24)
|
||||||
|
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||||
|
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||||
|
|
||||||
|
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
|
||||||
|
let
|
||||||
|
i = packAlphaValues(i)
|
||||||
|
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||||
|
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||||
|
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||||
|
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||||
|
|
||||||
|
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||||
|
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||||
|
|
||||||
|
when defined(release):
|
||||||
|
{.pop.}
|
Loading…
Reference in a new issue