From 4dc81592f67f9d1f38fedee3ca38266611e8ed31 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 25 May 2022 20:56:54 -0500 Subject: [PATCH] some simd --- src/pixie/fileformats/jpeg.nim | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/pixie/fileformats/jpeg.nim b/src/pixie/fileformats/jpeg.nim index e7c79b4..fa32e67 100644 --- a/src/pixie/fileformats/jpeg.nim +++ b/src/pixie/fileformats/jpeg.nim @@ -1,6 +1,9 @@ import pixie/common, pixie/images, pixie/masks, sequtils, strutils, chroma, std/decls, flatty/binny +when defined(amd64) and not defined(pixieNoSimd): + import nimsimd/sse2 + # This JPEG decoder is loosely based on stb_image which is public domain. # JPEG is a complex format, this decoder only supports the most common features: @@ -887,8 +890,19 @@ proc quantizationAndIDCTPass(state: var DecoderState) = for column in 0 ..< h: for row in 0 ..< w: var data {.byaddr.} = state.components[comp].blocks[row][column] - for i in 0 ..< 64: - data[i] = cast[int16](data[i] * state.quantizationTables[qTableId][i].int32) + + when defined(amd64) and not defined(pixieNoSimd): + for i in 0 ..< 8: # 8 per pass + var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr) + q = mm_unpacklo_epi8(q, mm_setzero_si128()) + var v = mm_loadu_si128(data[i * 8].addr) + mm_storeu_si128(data[i * 8].addr, mm_mullo_epi16(v, q)) + else: + for i in 0 ..< 64: + data[i] = cast[int16]( + data[i] * state.quantizationTables[qTableId][i].int32 + ) + state.components[comp].idctBlock( state.components[comp].widthStride * column * 8 + row * 8, data