avx2 versions
This commit is contained in:
parent
d9be882fe2
commit
0df68b6b65
|
@ -1,4 +1,4 @@
|
|||
import avx, chroma, internal, nimsimd/avx2, pixie/common, vmath
|
||||
import avx, chroma, internal, nimsimd/avx2, pixie/blends, pixie/common, vmath
|
||||
|
||||
when defined(gcc) or defined(clang):
|
||||
{.localPassc: "-mavx2".}
|
||||
|
@ -376,5 +376,92 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
|
|||
# Set src as this result for if we do another power
|
||||
src = result
|
||||
|
||||
proc blitLineNormalAvx2*(
|
||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||
) {.simd.} =
|
||||
let
|
||||
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||
vec255 = mm256_set1_epi8(255)
|
||||
vecAlpha255 = mm256_set1_epi32(cast[int32]([0.uint8, 255, 0, 255]))
|
||||
|
||||
var i: int
|
||||
while i < len - 8:
|
||||
let
|
||||
source = mm256_loadu_si256(b[i].addr)
|
||||
eq255 = mm256_cmpeq_epi8(source, vec255)
|
||||
if (mm256_movemask_epi8(eq255) and 0x88888888) == 0x88888888: # Opaque source
|
||||
mm256_storeu_si256(a[i].addr, source)
|
||||
else:
|
||||
let backdrop = mm256_loadu_si256(a[i].addr)
|
||||
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_or_si256(sourceAlpha, mm256_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
let multiplier = mm256_sub_epi32(vecAlpha255, sourceAlpha)
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, multiplier)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, multiplier)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
let added = mm256_add_epi8(
|
||||
source,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
mm256_storeu_si256(a[i].addr, added)
|
||||
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
a[i] = blendNormal(a[i], b[i])
|
||||
|
||||
proc blitLineMaskAvx2*(
|
||||
a, b: ptr UncheckedArray[ColorRGBX], len: int
|
||||
) {.simd.} =
|
||||
let
|
||||
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
|
||||
oddMask = mm256_set1_epi16(cast[int16](0xff00))
|
||||
div255 = mm256_set1_epi16(cast[int16](0x8081))
|
||||
vec255 = mm256_set1_epi8(255)
|
||||
|
||||
var i: int
|
||||
while i < len - 8:
|
||||
let
|
||||
source = mm256_loadu_si256(b[i].addr)
|
||||
eq255 = mm256_cmpeq_epi8(source, vec255)
|
||||
if (mm256_movemask_epi8(eq255) and 0x88888888) == 0x88888888: # Opaque source
|
||||
discard
|
||||
else:
|
||||
let backdrop = mm256_loadu_si256(a[i].addr)
|
||||
|
||||
var
|
||||
sourceAlpha = mm256_and_si256(source, alphaMask)
|
||||
backdropEven = mm256_slli_epi16(backdrop, 8)
|
||||
backdropOdd = mm256_and_si256(backdrop, oddMask)
|
||||
|
||||
sourceAlpha = mm256_or_si256(sourceAlpha, mm256_srli_epi32(sourceAlpha, 16))
|
||||
|
||||
backdropEven = mm256_mulhi_epu16(backdropEven, sourceAlpha)
|
||||
backdropOdd = mm256_mulhi_epu16(backdropOdd, sourceAlpha)
|
||||
backdropEven = mm256_srli_epi16(mm256_mulhi_epu16(backdropEven, div255), 7)
|
||||
backdropOdd = mm256_srli_epi16(mm256_mulhi_epu16(backdropOdd, div255), 7)
|
||||
|
||||
mm256_storeu_si256(
|
||||
a[i].addr,
|
||||
mm256_or_si256(backdropEven, mm256_slli_epi16(backdropOdd, 8))
|
||||
)
|
||||
|
||||
i += 8
|
||||
|
||||
for i in i ..< len:
|
||||
a[i] = blendMask(a[i], b[i])
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
Loading…
Reference in a new issue