move newImage(mask) and newMask(image) simd out
This commit is contained in:
parent
e2a966288d
commit
e56ad9e403
3 changed files with 82 additions and 48 deletions
|
@ -31,21 +31,18 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
||||||
|
|
||||||
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
|
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
|
||||||
result = newImage(mask.width, mask.height)
|
result = newImage(mask.width, mask.height)
|
||||||
var i: int
|
|
||||||
when defined(amd64) and allowSimd:
|
|
||||||
for _ in 0 ..< mask.data.len div 16:
|
|
||||||
var alphas = mm_loadu_si128(mask.data[i].addr)
|
|
||||||
for j in 0 ..< 4:
|
|
||||||
var unpacked = unpackAlphaValues(alphas)
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
|
||||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
|
||||||
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
|
|
||||||
alphas = mm_srli_si128(alphas, 4)
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for j in i ..< mask.data.len:
|
when allowSimd and compiles(newImageFromMaskSimd):
|
||||||
let v = mask.data[j]
|
newImageFromMaskSimd(
|
||||||
result.data[j] = rgbx(v, v, v, v)
|
cast[ptr UncheckedArray[ColorRGBX]](result.data[0].addr),
|
||||||
|
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
|
||||||
|
mask.data.len
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
for i in 0 ..< mask.data.len:
|
||||||
|
let v = mask.data[i]
|
||||||
|
result.data[i] = rgbx(v, v, v, v)
|
||||||
|
|
||||||
proc copy*(image: Image): Image {.raises: [PixieError].} =
|
proc copy*(image: Image): Image {.raises: [PixieError].} =
|
||||||
## Copies the image data into a new image.
|
## Copies the image data into a new image.
|
||||||
|
@ -421,7 +418,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
|
||||||
|
|
||||||
proc invert*(image: Image) {.raises: [].} =
|
proc invert*(image: Image) {.raises: [].} =
|
||||||
## Inverts all of the colors and alpha.
|
## Inverts all of the colors and alpha.
|
||||||
if allowSimd and compiles(invertSimd):
|
when allowSimd and compiles(invertSimd):
|
||||||
invertSimd(
|
invertSimd(
|
||||||
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
||||||
image.data.len
|
image.data.len
|
||||||
|
@ -506,22 +503,16 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
|
||||||
## Returns a new mask using the alpha values of the image.
|
## Returns a new mask using the alpha values of the image.
|
||||||
result = newMask(image.width, image.height)
|
result = newMask(image.width, image.height)
|
||||||
|
|
||||||
var i: int
|
when allowSimd and compiles(newMaskFromImageSimd):
|
||||||
when defined(amd64) and allowSimd:
|
newMaskFromImageSimd(
|
||||||
for _ in 0 ..< image.data.len div 16:
|
cast[ptr UncheckedArray[uint8]](result.data[0].addr),
|
||||||
let
|
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
||||||
a = mm_loadu_si128(image.data[i + 0].addr)
|
image.data.len
|
||||||
b = mm_loadu_si128(image.data[i + 4].addr)
|
)
|
||||||
c = mm_loadu_si128(image.data[i + 8].addr)
|
return
|
||||||
d = mm_loadu_si128(image.data[i + 12].addr)
|
|
||||||
mm_storeu_si128(
|
|
||||||
result.data[i].addr,
|
|
||||||
pack4xAlphaValues(a, b, c, d)
|
|
||||||
)
|
|
||||||
i += 16
|
|
||||||
|
|
||||||
for j in i ..< image.data.len:
|
for i in 0 ..< image.data.len:
|
||||||
result.data[j] = image.data[j].a
|
result.data[i] = image.data[i].a
|
||||||
|
|
||||||
proc getRgbaSmooth*(
|
proc getRgbaSmooth*(
|
||||||
image: Image, x, y: float32, wrapped = false
|
image: Image, x, y: float32, wrapped = false
|
||||||
|
|
|
@ -152,24 +152,7 @@ when defined(amd64) and allowSimd:
|
||||||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||||
|
|
||||||
proc packAlphaValues(v: M128i): M128i {.inline, raises: [].} =
|
export pack4xAlphaValues, unpackAlphaValues
|
||||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes
|
|
||||||
result = mm_srli_epi32(v, 24)
|
|
||||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
|
||||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
|
||||||
|
|
||||||
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} =
|
|
||||||
let
|
|
||||||
i = packAlphaValues(i)
|
|
||||||
j = mm_slli_si128(packAlphaValues(j), 4)
|
|
||||||
k = mm_slli_si128(packAlphaValues(k), 8)
|
|
||||||
l = mm_slli_si128(packAlphaValues(l), 12)
|
|
||||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
|
||||||
|
|
||||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
|
||||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
|
||||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
|
||||||
|
|
||||||
when defined(release):
|
when defined(release):
|
||||||
{.pop.}
|
{.pop.}
|
||||||
|
|
|
@ -7,6 +7,25 @@ when defined(amd64):
|
||||||
cpuHasAvx* = checkInstructionSets({AVX})
|
cpuHasAvx* = checkInstructionSets({AVX})
|
||||||
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
|
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
|
||||||
|
|
||||||
|
proc packAlphaValues(v: M128i): M128i {.inline.} =
|
||||||
|
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
|
||||||
|
result = mm_srli_epi32(v, 24)
|
||||||
|
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||||
|
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||||
|
|
||||||
|
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
|
||||||
|
let
|
||||||
|
i = packAlphaValues(i)
|
||||||
|
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||||
|
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||||
|
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||||
|
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||||
|
|
||||||
|
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||||
|
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||||
|
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||||
|
|
||||||
proc fillUnsafeSimd*(
|
proc fillUnsafeSimd*(
|
||||||
data: ptr UncheckedArray[ColorRGBX],
|
data: ptr UncheckedArray[ColorRGBX],
|
||||||
len: int,
|
len: int,
|
||||||
|
@ -171,6 +190,47 @@ when defined(amd64):
|
||||||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||||
copyMem(data[i].addr, c.addr, 4)
|
copyMem(data[i].addr, c.addr, 4)
|
||||||
|
|
||||||
|
proc newImageFromMaskSimd*(
|
||||||
|
dst: ptr UncheckedArray[ColorRGBX],
|
||||||
|
src: ptr UncheckedArray[uint8],
|
||||||
|
len: int
|
||||||
|
) =
|
||||||
|
var i: int
|
||||||
|
for _ in 0 ..< len div 16:
|
||||||
|
var alphas = mm_loadu_si128(src[i].addr)
|
||||||
|
for j in 0 ..< 4:
|
||||||
|
var unpacked = unpackAlphaValues(alphas)
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
||||||
|
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||||
|
mm_storeu_si128(dst[i + j * 4].addr, unpacked)
|
||||||
|
alphas = mm_srli_si128(alphas, 4)
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
let v = src[i]
|
||||||
|
dst[i] = rgbx(v, v, v, v)
|
||||||
|
|
||||||
|
proc newMaskFromImageSimd*(
|
||||||
|
dst: ptr UncheckedArray[uint8],
|
||||||
|
src: ptr UncheckedArray[ColorRGBX],
|
||||||
|
len: int
|
||||||
|
) =
|
||||||
|
var i: int
|
||||||
|
for _ in 0 ..< len div 16:
|
||||||
|
let
|
||||||
|
a = mm_loadu_si128(src[i + 0].addr)
|
||||||
|
b = mm_loadu_si128(src[i + 4].addr)
|
||||||
|
c = mm_loadu_si128(src[i + 8].addr)
|
||||||
|
d = mm_loadu_si128(src[i + 12].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
dst[i].addr,
|
||||||
|
pack4xAlphaValues(a, b, c, d)
|
||||||
|
)
|
||||||
|
i += 16
|
||||||
|
|
||||||
|
for i in i ..< len:
|
||||||
|
dst[i] = src[i].a
|
||||||
|
|
||||||
proc invertSimd*(data: ptr UncheckedArray[ColorRGBX], len: int) =
|
proc invertSimd*(data: ptr UncheckedArray[ColorRGBX], len: int) =
|
||||||
var i: int
|
var i: int
|
||||||
let vec255 = mm_set1_epi8(cast[int8](255))
|
let vec255 = mm_set1_epi8(cast[int8](255))
|
||||||
|
|
Loading…
Reference in a new issue