diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index e6a38be..2694c73 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -553,16 +553,11 @@ when defined(amd64) and allowSimd: var backdropEven = mm_slli_epi16(backdrop, 8) backdropOdd = mm_and_si128(backdrop, oddMask) - - # backdrop * k backdropEven = mm_mulhi_epu16(backdropEven, evenK) backdropOdd = mm_mulhi_epu16(backdropOdd, oddK) - - # div 255 backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) - # Shift from high to low bits sourceEven = mm_srli_epi16(sourceEven, 8) sourceOdd = mm_srli_epi16(sourceOdd, 8) @@ -582,12 +577,8 @@ when defined(amd64) and allowSimd: var backdropEven = mm_slli_epi16(backdrop, 8) backdropOdd = mm_and_si128(backdrop, oddMask) - - # backdrop * source backdropEven = mm_mulhi_epu16(backdropEven, sourceEven) backdropOdd = mm_mulhi_epu16(backdropOdd, sourceOdd) - - # div 255 backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) diff --git a/src/pixie/simd/avx2.nim b/src/pixie/simd/avx2.nim index 89f74ef..be900bd 100644 --- a/src/pixie/simd/avx2.nim +++ b/src/pixie/simd/avx2.nim @@ -116,10 +116,8 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} = let tmpEven = mm256_add_epi16(colorsEven, vec128) tmpOdd = mm256_add_epi16(colorsOdd, vec128) - colorsEven = tmpEven - colorsOdd = tmpOdd - colorsEven = mm256_srli_epi16(colorsEven, 8) - colorsOdd = mm256_srli_epi16(colorsOdd, 8) + colorsEven = mm256_srli_epi16(tmpEven, 8) + colorsOdd = mm256_srli_epi16(tmpOdd, 8) colorsEven = mm256_add_epi16(colorsEven, tmpEven) colorsOdd = mm256_add_epi16(colorsOdd, tmpOdd) colorsEven = mm256_srli_epi16(colorsEven, 8) diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim index c5b1db4..b5021fc 100644 --- a/src/pixie/simd/sse2.nim +++ b/src/pixie/simd/sse2.nim @@ -190,10 +190,8 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} = let tmpEven = mm_add_epi16(colorsEven, vec128) tmpOdd = mm_add_epi16(colorsOdd, vec128) - colorsEven = tmpEven - colorsOdd = tmpOdd - colorsEven = mm_srli_epi16(colorsEven, 8) - colorsOdd = mm_srli_epi16(colorsOdd, 8) + colorsEven = mm_srli_epi16(tmpEven, 8) + colorsOdd = mm_srli_epi16(tmpOdd, 8) colorsEven = mm_add_epi16(colorsEven, tmpEven) colorsOdd = mm_add_epi16(colorsOdd, tmpOdd) colorsEven = mm_srli_epi16(colorsEven, 8)