much simpler
This commit is contained in:
parent
432dce4902
commit
14576fcd27
1 changed files with 8 additions and 27 deletions
|
@ -88,11 +88,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
||||||
for y in 0 ..< result.height:
|
for y in 0 ..< result.height:
|
||||||
var x: int
|
var x: int
|
||||||
when defined(amd64) and allowSimd:
|
when defined(amd64) and allowSimd:
|
||||||
let
|
let oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
|
||||||
firstByte = cast[M128i](
|
|
||||||
[uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
||||||
)
|
|
||||||
while x <= result.width - 16:
|
while x <= result.width - 16:
|
||||||
let
|
let
|
||||||
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
|
top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr)
|
||||||
|
@ -122,29 +118,14 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
||||||
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
|
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
|
||||||
|
|
||||||
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
|
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
|
||||||
|
# Merged has the correct values in the even indices
|
||||||
|
# Mask out the odd values for packing
|
||||||
|
masked = mm_andnot_si128(oddMask, merged)
|
||||||
|
|
||||||
# merged has the correct values in the even indices
|
mm_storeu_si128(
|
||||||
|
result.data[result.dataIndex(x, y)].addr,
|
||||||
a = mm_and_si128(merged, firstByte)
|
mm_packus_epi16(masked, mm_setzero_si128())
|
||||||
b = mm_and_si128(mm_srli_si128(merged, 2), firstByte)
|
)
|
||||||
c = mm_and_si128(mm_srli_si128(merged, 4), firstByte)
|
|
||||||
d = mm_and_si128(mm_srli_si128(merged, 6), firstByte)
|
|
||||||
e = mm_and_si128(mm_srli_si128(merged, 8), firstByte)
|
|
||||||
f = mm_and_si128(mm_srli_si128(merged, 10), firstByte)
|
|
||||||
g = mm_and_si128(mm_srli_si128(merged, 12), firstByte)
|
|
||||||
h = mm_and_si128(mm_srli_si128(merged, 14), firstByte)
|
|
||||||
|
|
||||||
ab = mm_or_si128(a, mm_slli_si128(b, 1))
|
|
||||||
cd = mm_or_si128(c, mm_slli_si128(d, 1))
|
|
||||||
ef = mm_or_si128(e, mm_slli_si128(f, 1))
|
|
||||||
gh = mm_or_si128(g, mm_slli_si128(h, 1))
|
|
||||||
|
|
||||||
abcd = mm_or_si128(ab, mm_slli_si128(cd, 2))
|
|
||||||
efgh = mm_or_si128(ef, mm_slli_si128(gh, 2))
|
|
||||||
|
|
||||||
abcdefgh = mm_or_si128(abcd, mm_slli_si128(efgh, 4))
|
|
||||||
|
|
||||||
mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, abcdefgh)
|
|
||||||
x += 8
|
x += 8
|
||||||
|
|
||||||
for x in x ..< result.width:
|
for x in x ..< result.width:
|
||||||
|
|
Loading…
Reference in a new issue