Merge pull request #307 from guzba/master

simd magnifyBy2 faster still
This commit is contained in:
treeform 2021-10-17 21:30:31 -07:00 committed by GitHub
commit 42f98d8a1c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -331,12 +331,29 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale
for x in 0 ..< image.width:
var x: int
when defined(amd64) and not defined(pixieNoSimd):
if scale == 2:
let mask = cast[M128i]([uint32.high, 0, 0, 0])
for _ in countup(0, image.width - 4, 2):
let
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
first = mm_and_si128(values, mask)
second = mm_and_si128(mm_srli_si128(values, 4), mask)
combined = mm_or_si128(first, mm_slli_si128(second, 8))
doubled = mm_or_si128(combined, mm_slli_si128(combined, 4))
mm_storeu_si128(
result.data[result.dataIndex(x * scale, y * scale)].addr,
doubled
)
x += 2
for _ in x ..< image.width:
let
rgbx = image.getRgbaUnsafe(x, y)
idx = result.dataIndex(x * scale, y * scale)
resultIdx = result.dataIndex(x * scale, y * scale)
for i in 0 ..< scale:
result.data[idx + i] = rgbx
result.data[resultIdx + i] = rgbx
inc x
# Copy that row of pixels into (scale - 1) more rows
let rowStart = result.dataIndex(0, y * scale)
for i in 1 ..< scale: