This commit is contained in:
Ryan Oldenburg 2022-07-27 00:25:23 -05:00
parent bf15e44b4f
commit 2d39091c44
2 changed files with 4 additions and 7 deletions

View file

@ -330,9 +330,8 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0, 2, 4, 6 so mask the others out and permute into position
masked = mm256_and_si256(merged, mergedMask)
permuted = mm_256_permutevar8x32_epi32(masked, permuteControl)
# index 0, 2, 4, 6 so permute into position and store
permuted = mm_256_permutevar8x32_epi32(merged, permuteControl)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm256_castsi256_si128(permuted)

View file

@ -383,10 +383,8 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0 and 2 so mask the others out and shift 0 and 2 into
# position and store
masked = mm_and_si128(merged, mergedMask)
shuffled = mm_shuffle_epi32(masked, MM_SHUFFLE(3, 3, 2, 0))
# index 0 and 2 so shift 0 and 2 into position and store
shuffled = mm_shuffle_epi32(merged, MM_SHUFFLE(3, 3, 2, 0))
lower = mm_cvtsi128_si64(shuffled)
copyMem(result.data[result.dataIndex(x, y)].addr, lower.unsafeAddr, 8)
x += 2