use aligned store (bugfix)

This commit is contained in:
Ryan Oldenburg 2022-07-31 15:20:18 -05:00
parent a92e289e36
commit 58887e8eb6
2 changed files with 8 additions and 8 deletions

View file

@ -464,7 +464,7 @@ proc blendLineNormalAvx2*(
source = mm256_loadu_si256(b[i].addr)
eq255 = mm256_cmpeq_epi8(source, vec255)
if (mm256_movemask_epi8(eq255) and 0x88888888) == 0x88888888: # Opaque source
mm256_storeu_si256(a[i].addr, source)
mm256_store_si256(a[i].addr, source)
else:
let backdrop = mm256_load_si256(a[i].addr)
mm256_store_si256(a[i].addr, blendNormalSimd(backdrop, source))

View file

@ -305,7 +305,7 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
mm_storeu_si128(
mm_store_si128(
cast[pointer](p),
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
)
@ -347,8 +347,8 @@ proc ceilSse2*(image: Image) {.simd.} =
values1 = mm_cmpeq_epi8(values1, vecZero)
values0 = mm_andnot_si128(values0, vec255)
values1 = mm_andnot_si128(values1, vec255)
mm_storeu_si128(cast[pointer](p), values0)
mm_storeu_si128(cast[pointer](p + 16), values1)
mm_store_si128(cast[pointer](p), values0)
mm_store_si128(cast[pointer](p + 16), values1)
p += 32
i += 8 * iterations
@ -562,7 +562,7 @@ proc blendLineCoverageOverwriteSse2*(
else:
var coverage = coverage
for _ in 0 ..< 4:
mm_storeu_si128(line[i].addr, rgbxVec.applyCoverage(coverage))
mm_store_si128(line[i].addr, rgbxVec.applyCoverage(coverage))
coverage = mm_srli_si128(coverage, 4)
i += 4
@ -612,7 +612,7 @@ proc blendLineNormalSse2*(
source = mm_loadu_si128(b[i].addr)
eq255 = mm_cmpeq_epi8(source, vec255)
if (mm_movemask_epi8(eq255) and 0x00008888) == 0x00008888: # Opaque source
mm_storeu_si128(a[i].addr, source)
mm_store_si128(a[i].addr, source)
else:
let backdrop = mm_load_si128(a[i].addr)
mm_store_si128(a[i].addr, blendNormalSimd(backdrop, source))
@ -663,7 +663,7 @@ proc blendLineCoverageNormalSse2*(
let
backdrop = mm_loadu_si128(line[i].addr)
source = rgbxVec.applyCoverage(coverage)
mm_storeu_si128(line[i].addr, blendNormalSimd(backdrop, source))
mm_store_si128(line[i].addr, blendNormalSimd(backdrop, source))
coverage = mm_srli_si128(coverage, 4)
i += 4
@ -765,7 +765,7 @@ proc blendLineCoverageMaskSse2*(
let
backdrop = mm_loadu_si128(line[i].addr)
source = rgbxVec.applyCoverage(coverage)
mm_storeu_si128(line[i].addr, blendMaskSimd(backdrop, source))
mm_store_si128(line[i].addr, blendMaskSimd(backdrop, source))
coverage = mm_srli_si128(coverage, 4)
i += 4