move newImage(mask) and newMask(image) simd out

This commit is contained in:
Ryan Oldenburg 2022-06-29 00:29:26 -05:00
parent e2a966288d
commit e56ad9e403
3 changed files with 82 additions and 48 deletions

View file

@ -31,21 +31,18 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height)
var i: int
when defined(amd64) and allowSimd:
for _ in 0 ..< mask.data.len div 16:
var alphas = mm_loadu_si128(mask.data[i].addr)
for j in 0 ..< 4:
var unpacked = unpackAlphaValues(alphas)
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
alphas = mm_srli_si128(alphas, 4)
i += 16
for j in i ..< mask.data.len:
let v = mask.data[j]
result.data[j] = rgbx(v, v, v, v)
when allowSimd and compiles(newImageFromMaskSimd):
newImageFromMaskSimd(
cast[ptr UncheckedArray[ColorRGBX]](result.data[0].addr),
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
return
for i in 0 ..< mask.data.len:
let v = mask.data[i]
result.data[i] = rgbx(v, v, v, v)
proc copy*(image: Image): Image {.raises: [PixieError].} =
## Copies the image data into a new image.
@ -421,7 +418,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
proc invert*(image: Image) {.raises: [].} =
## Inverts all of the colors and alpha.
if allowSimd and compiles(invertSimd):
when allowSimd and compiles(invertSimd):
invertSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
@ -506,22 +503,16 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
## Returns a new mask using the alpha values of the image.
result = newMask(image.width, image.height)
var i: int
when defined(amd64) and allowSimd:
for _ in 0 ..< image.data.len div 16:
let
a = mm_loadu_si128(image.data[i + 0].addr)
b = mm_loadu_si128(image.data[i + 4].addr)
c = mm_loadu_si128(image.data[i + 8].addr)
d = mm_loadu_si128(image.data[i + 12].addr)
mm_storeu_si128(
result.data[i].addr,
pack4xAlphaValues(a, b, c, d)
)
i += 16
when allowSimd and compiles(newMaskFromImageSimd):
newMaskFromImageSimd(
cast[ptr UncheckedArray[uint8]](result.data[0].addr),
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
return
for j in i ..< image.data.len:
result.data[j] = image.data[j].a
for i in 0 ..< image.data.len:
result.data[i] = image.data[i].a
proc getRgbaSmooth*(
image: Image, x, y: float32, wrapped = false

View file

@ -152,24 +152,7 @@ when defined(amd64) and allowSimd:
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
proc packAlphaValues(v: M128i): M128i {.inline, raises: [].} =
## Shuffle the alpha values for these 4 colors to the first 4 bytes
result = mm_srli_epi32(v, 24)
result = mm_packus_epi16(result, mm_setzero_si128())
result = mm_packus_epi16(result, mm_setzero_si128())
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} =
let
i = packAlphaValues(i)
j = mm_slli_si128(packAlphaValues(j), 4)
k = mm_slli_si128(packAlphaValues(k), 8)
l = mm_slli_si128(packAlphaValues(l), 12)
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
export pack4xAlphaValues, unpackAlphaValues
when defined(release):
{.pop.}

View file

@ -7,6 +7,25 @@ when defined(amd64):
cpuHasAvx* = checkInstructionSets({AVX})
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
proc packAlphaValues(v: M128i): M128i {.inline.} =
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
result = mm_srli_epi32(v, 24)
result = mm_packus_epi16(result, mm_setzero_si128())
result = mm_packus_epi16(result, mm_setzero_si128())
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
let
i = packAlphaValues(i)
j = mm_slli_si128(packAlphaValues(j), 4)
k = mm_slli_si128(packAlphaValues(k), 8)
l = mm_slli_si128(packAlphaValues(l), 12)
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
proc fillUnsafeSimd*(
data: ptr UncheckedArray[ColorRGBX],
len: int,
@ -171,6 +190,47 @@ when defined(amd64):
c.b = ((c.b.uint32 * c.a) div 255).uint8
copyMem(data[i].addr, c.addr, 4)
proc newImageFromMaskSimd*(
dst: ptr UncheckedArray[ColorRGBX],
src: ptr UncheckedArray[uint8],
len: int
) =
var i: int
for _ in 0 ..< len div 16:
var alphas = mm_loadu_si128(src[i].addr)
for j in 0 ..< 4:
var unpacked = unpackAlphaValues(alphas)
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
mm_storeu_si128(dst[i + j * 4].addr, unpacked)
alphas = mm_srli_si128(alphas, 4)
i += 16
for i in i ..< len:
let v = src[i]
dst[i] = rgbx(v, v, v, v)
proc newMaskFromImageSimd*(
dst: ptr UncheckedArray[uint8],
src: ptr UncheckedArray[ColorRGBX],
len: int
) =
var i: int
for _ in 0 ..< len div 16:
let
a = mm_loadu_si128(src[i + 0].addr)
b = mm_loadu_si128(src[i + 4].addr)
c = mm_loadu_si128(src[i + 8].addr)
d = mm_loadu_si128(src[i + 12].addr)
mm_storeu_si128(
dst[i].addr,
pack4xAlphaValues(a, b, c, d)
)
i += 16
for i in i ..< len:
dst[i] = src[i].a
proc invertSimd*(data: ptr UncheckedArray[ColorRGBX], len: int) =
var i: int
let vec255 = mm_set1_epi8(cast[int8](255))