From 8e346d1477c1c1e8b7f20217a4c277bf0411fc4a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Sun, 17 Oct 2021 22:33:53 -0500 Subject: [PATCH] simd magnifyBy2 faster still --- src/pixie/images.nim | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 6b60b7e..d9c5a86 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -331,12 +331,29 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = for y in 0 ..< image.height: # Write one row of pixels duplicated by scale - for x in 0 ..< image.width: + var x: int + when defined(amd64) and not defined(pixieNoSimd): + if scale == 2: + let mask = cast[M128i]([uint32.high, 0, 0, 0]) + for _ in countup(0, image.width - 4, 2): + let + values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr) + first = mm_and_si128(values, mask) + second = mm_and_si128(mm_srli_si128(values, 4), mask) + combined = mm_or_si128(first, mm_slli_si128(second, 8)) + doubled = mm_or_si128(combined, mm_slli_si128(combined, 4)) + mm_storeu_si128( + result.data[result.dataIndex(x * scale, y * scale)].addr, + doubled + ) + x += 2 + for _ in x ..< image.width: let rgbx = image.getRgbaUnsafe(x, y) - idx = result.dataIndex(x * scale, y * scale) + resultIdx = result.dataIndex(x * scale, y * scale) for i in 0 ..< scale: - result.data[idx + i] = rgbx + result.data[resultIdx + i] = rgbx + inc x # Copy that row of pixels into (scale - 1) more rows let rowStart = result.dataIndex(0, y * scale) for i in 1 ..< scale: