From 0e1df4b0c8e08815f214489b372000f35c7fb7f7 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 27 Jul 2022 00:30:29 -0500 Subject: [PATCH] bugfix --- src/pixie/simd/avx2.nim | 8 ++++---- src/pixie/simd/sse2.nim | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pixie/simd/avx2.nim b/src/pixie/simd/avx2.nim index ff11e83..b79f695 100644 --- a/src/pixie/simd/avx2.nim +++ b/src/pixie/simd/avx2.nim @@ -110,7 +110,7 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} = oddMask = mm256_set1_epi16(0xff00) vec128 = mm256_set1_epi16(128) hiMask = mm256_set1_epi16(255 shl 8) - iterations = data.len div 8 + iterations = (data.len - i) div 8 for _ in 0 ..< iterations: let values = mm256_load_si256(cast[pointer](p)) @@ -163,7 +163,7 @@ proc invertAvx2*(image: Image) {.simd.} = let vec255 = mm256_set1_epi8(255) - iterations = image.data.len div 16 + iterations = (image.data.len - i) div 16 for _ in 0 ..< iterations: let a = mm256_load_si256(cast[pointer](p)) @@ -211,7 +211,7 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} = div255 = mm256_set1_epi16(0x8081) zeroVec = mm256_setzero_si256() opacityVec = mm256_slli_epi16(mm256_set1_epi16(opacity), 8) - iterations = image.data.len div 8 + iterations = (image.data.len - i) div 8 for _ in 0 ..< iterations: let values = mm256_load_si256(cast[pointer](p)) @@ -257,7 +257,7 @@ proc ceilAvx2*(image: Image) {.simd.} = let vecZero = mm256_setzero_si256() vec255 = mm256_set1_epi8(255) - iterations = image.data.len div 8 + iterations = (image.data.len - i) div 8 for _ in 0 ..< iterations: var values = mm256_load_si256(cast[pointer](p)) values = mm256_cmpeq_epi8(values, vecZero) diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim index 890bd35..84140de 100644 --- a/src/pixie/simd/sse2.nim +++ b/src/pixie/simd/sse2.nim @@ -212,7 +212,7 @@ proc invertSse2*(image: Image) {.simd.} = let vec255 = mm_set1_epi8(255) - iterations = image.data.len div 16 + iterations = (image.data.len - i) div 16 for _ in 0 ..< iterations: let a = mm_load_si128(cast[pointer](p)) @@ -264,7 +264,7 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} = div255 = mm_set1_epi16(0x8081) zeroVec = mm_setzero_si128() opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8) - iterations = image.data.len div 4 + iterations = (image.data.len - i) div 4 for _ in 0 ..< iterations: let values = mm_loadu_si128(cast[pointer](p)) if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff: @@ -308,7 +308,7 @@ proc ceilSse2*(image: Image) {.simd.} = let vecZero = mm_setzero_si128() vec255 = mm_set1_epi8(255) - iterations = image.data.len div 8 + iterations = (image.data.len - i) div 8 for _ in 0 ..< iterations: var values0 = mm_loadu_si128(cast[pointer](p))