move newImage(mask) and newMask(image) simd out
This commit is contained in:
parent
e2a966288d
commit
e56ad9e403
3 changed files with 82 additions and 48 deletions
|
@ -31,21 +31,18 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
|||
|
||||
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
|
||||
result = newImage(mask.width, mask.height)
|
||||
var i: int
|
||||
when defined(amd64) and allowSimd:
|
||||
for _ in 0 ..< mask.data.len div 16:
|
||||
var alphas = mm_loadu_si128(mask.data[i].addr)
|
||||
for j in 0 ..< 4:
|
||||
var unpacked = unpackAlphaValues(alphas)
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
|
||||
alphas = mm_srli_si128(alphas, 4)
|
||||
i += 16
|
||||
|
||||
for j in i ..< mask.data.len:
|
||||
let v = mask.data[j]
|
||||
result.data[j] = rgbx(v, v, v, v)
|
||||
when allowSimd and compiles(newImageFromMaskSimd):
|
||||
newImageFromMaskSimd(
|
||||
cast[ptr UncheckedArray[ColorRGBX]](result.data[0].addr),
|
||||
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
|
||||
mask.data.len
|
||||
)
|
||||
return
|
||||
|
||||
for i in 0 ..< mask.data.len:
|
||||
let v = mask.data[i]
|
||||
result.data[i] = rgbx(v, v, v, v)
|
||||
|
||||
proc copy*(image: Image): Image {.raises: [PixieError].} =
|
||||
## Copies the image data into a new image.
|
||||
|
@ -421,7 +418,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
|
|||
|
||||
proc invert*(image: Image) {.raises: [].} =
|
||||
## Inverts all of the colors and alpha.
|
||||
if allowSimd and compiles(invertSimd):
|
||||
when allowSimd and compiles(invertSimd):
|
||||
invertSimd(
|
||||
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
||||
image.data.len
|
||||
|
@ -506,22 +503,16 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
|
|||
## Returns a new mask using the alpha values of the image.
|
||||
result = newMask(image.width, image.height)
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and allowSimd:
|
||||
for _ in 0 ..< image.data.len div 16:
|
||||
let
|
||||
a = mm_loadu_si128(image.data[i + 0].addr)
|
||||
b = mm_loadu_si128(image.data[i + 4].addr)
|
||||
c = mm_loadu_si128(image.data[i + 8].addr)
|
||||
d = mm_loadu_si128(image.data[i + 12].addr)
|
||||
mm_storeu_si128(
|
||||
result.data[i].addr,
|
||||
pack4xAlphaValues(a, b, c, d)
|
||||
)
|
||||
i += 16
|
||||
when allowSimd and compiles(newMaskFromImageSimd):
|
||||
newMaskFromImageSimd(
|
||||
cast[ptr UncheckedArray[uint8]](result.data[0].addr),
|
||||
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
|
||||
image.data.len
|
||||
)
|
||||
return
|
||||
|
||||
for j in i ..< image.data.len:
|
||||
result.data[j] = image.data[j].a
|
||||
for i in 0 ..< image.data.len:
|
||||
result.data[i] = image.data[i].a
|
||||
|
||||
proc getRgbaSmooth*(
|
||||
image: Image, x, y: float32, wrapped = false
|
||||
|
|
|
@ -152,24 +152,7 @@ when defined(amd64) and allowSimd:
|
|||
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
|
||||
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
|
||||
|
||||
proc packAlphaValues(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes
|
||||
result = mm_srli_epi32(v, 24)
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
|
||||
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline, raises: [].} =
|
||||
let
|
||||
i = packAlphaValues(i)
|
||||
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||
|
||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value)
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||
export pack4xAlphaValues, unpackAlphaValues
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
|
@ -7,6 +7,25 @@ when defined(amd64):
|
|||
cpuHasAvx* = checkInstructionSets({AVX})
|
||||
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})
|
||||
|
||||
proc packAlphaValues(v: M128i): M128i {.inline.} =
|
||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes.
|
||||
result = mm_srli_epi32(v, 24)
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
result = mm_packus_epi16(result, mm_setzero_si128())
|
||||
|
||||
proc pack4xAlphaValues*(i, j, k, l: M128i): M128i {.inline.} =
|
||||
let
|
||||
i = packAlphaValues(i)
|
||||
j = mm_slli_si128(packAlphaValues(j), 4)
|
||||
k = mm_slli_si128(packAlphaValues(k), 8)
|
||||
l = mm_slli_si128(packAlphaValues(l), 12)
|
||||
mm_or_si128(mm_or_si128(i, j), mm_or_si128(k, l))
|
||||
|
||||
proc unpackAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Unpack the first 32 bits into 4 rgba(0, 0, 0, value).
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), v)
|
||||
result = mm_unpacklo_epi8(mm_setzero_si128(), result)
|
||||
|
||||
proc fillUnsafeSimd*(
|
||||
data: ptr UncheckedArray[ColorRGBX],
|
||||
len: int,
|
||||
|
@ -171,6 +190,47 @@ when defined(amd64):
|
|||
c.b = ((c.b.uint32 * c.a) div 255).uint8
|
||||
copyMem(data[i].addr, c.addr, 4)
|
||||
|
||||
proc newImageFromMaskSimd*(
|
||||
dst: ptr UncheckedArray[ColorRGBX],
|
||||
src: ptr UncheckedArray[uint8],
|
||||
len: int
|
||||
) =
|
||||
var i: int
|
||||
for _ in 0 ..< len div 16:
|
||||
var alphas = mm_loadu_si128(src[i].addr)
|
||||
for j in 0 ..< 4:
|
||||
var unpacked = unpackAlphaValues(alphas)
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
|
||||
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
|
||||
mm_storeu_si128(dst[i + j * 4].addr, unpacked)
|
||||
alphas = mm_srli_si128(alphas, 4)
|
||||
i += 16
|
||||
|
||||
for i in i ..< len:
|
||||
let v = src[i]
|
||||
dst[i] = rgbx(v, v, v, v)
|
||||
|
||||
proc newMaskFromImageSimd*(
|
||||
dst: ptr UncheckedArray[uint8],
|
||||
src: ptr UncheckedArray[ColorRGBX],
|
||||
len: int
|
||||
) =
|
||||
var i: int
|
||||
for _ in 0 ..< len div 16:
|
||||
let
|
||||
a = mm_loadu_si128(src[i + 0].addr)
|
||||
b = mm_loadu_si128(src[i + 4].addr)
|
||||
c = mm_loadu_si128(src[i + 8].addr)
|
||||
d = mm_loadu_si128(src[i + 12].addr)
|
||||
mm_storeu_si128(
|
||||
dst[i].addr,
|
||||
pack4xAlphaValues(a, b, c, d)
|
||||
)
|
||||
i += 16
|
||||
|
||||
for i in i ..< len:
|
||||
dst[i] = src[i].a
|
||||
|
||||
proc invertSimd*(data: ptr UncheckedArray[ColorRGBX], len: int) =
|
||||
var i: int
|
||||
let vec255 = mm_set1_epi8(cast[int8](255))
|
||||
|
|
Loading…
Reference in a new issue