image.newMask 35% faster with simd

This commit is contained in:
Ryan Oldenburg 2021-02-09 16:08:08 -06:00
parent c95df08cbe
commit 264b30b118
2 changed files with 45 additions and 2 deletions

View file

@ -373,8 +373,45 @@ proc newMask*(image: Image): Mask =
## Returns a new mask using the alpha values of the parameter image.
result = newMask(image.width, image.height)
for i, rgba in image.data:
result.data[i] = rgba.a
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let mask32 = cast[M128i]([uint32.high, 0, 0, 0])
for _ in countup(0, image.data.len - 16, 16):
var
a = mm_loadu_si128(image.data[i + 0].addr)
b = mm_loadu_si128(image.data[i + 4].addr)
c = mm_loadu_si128(image.data[i + 8].addr)
d = mm_loadu_si128(image.data[i + 12].addr)
template pack(v: var M128i) =
# Shuffle the alpha values for these 4 colors to the first 4 bytes
v = mm_srli_epi32(v, 24)
let
i = mm_srli_si128(v, 3)
j = mm_srli_si128(v, 6)
k = mm_srli_si128(v, 9)
v = mm_or_si128(mm_or_si128(v, i), mm_or_si128(j, k))
v = mm_and_si128(v, mask32)
pack(a)
pack(b)
pack(c)
pack(d)
b = mm_slli_si128(b, 4)
c = mm_slli_si128(c, 8)
d = mm_slli_si128(d, 12)
mm_storeu_si128(
result.data[i].addr,
mm_or_si128(mm_or_si128(a, b), mm_or_si128(c, d))
)
i += 16
for j in i ..< image.data.len:
result.data[i] = image.data[j].a
proc getRgbaSmooth*(image: Image, x, y: float32): ColorRGBA =
let

View file

@ -72,6 +72,12 @@ block:
reset()
timeIt "newMask":
let mask = image.newMask()
doAssert mask[0, 0] == image[0, 0].a
reset()
timeIt "lerp integers":
for i in 0 ..< 100000:
let c = image[0, 0]