Merge pull request #444 from guzba/pngupsimd
encode filter 2 pngs, simd unfilter filter 2 pngs
This commit is contained in:
commit
7718063d00
1 changed files with 20 additions and 8 deletions
|
@ -1,6 +1,9 @@
|
||||||
import chroma, flatty/binny, math, pixie/common, pixie/images, pixie/internal,
|
import chroma, flatty/binny, math, pixie/common, pixie/images, pixie/internal,
|
||||||
pixie/masks, zippy, zippy/crc
|
pixie/masks, zippy, zippy/crc
|
||||||
|
|
||||||
|
when defined(amd64) and allowSimd:
|
||||||
|
import nimsimd/sse2
|
||||||
|
|
||||||
# See http://www.libpng.org/pub/png/spec/1.2/PNG-Contents.html
|
# See http://www.libpng.org/pub/png/spec/1.2/PNG-Contents.html
|
||||||
|
|
||||||
const
|
const
|
||||||
|
@ -128,7 +131,19 @@ proc unfilter(
|
||||||
let
|
let
|
||||||
uncompressedStartIdx = uncompressedIdx(1, y)
|
uncompressedStartIdx = uncompressedIdx(1, y)
|
||||||
unfilteredStartIx = unfiteredIdx(0, y)
|
unfilteredStartIx = unfiteredIdx(0, y)
|
||||||
for x in 0 ..< rowBytes:
|
var x: int
|
||||||
|
when allowSimd and defined(amd64):
|
||||||
|
if y - 1 >= 0:
|
||||||
|
for _ in 0 ..< rowBytes div 16:
|
||||||
|
let
|
||||||
|
bytes = mm_loadu_si128(uncompressed[uncompressedStartIdx + x].addr)
|
||||||
|
up = mm_loadu_si128(result[unfilteredStartIx + x - rowBytes].addr)
|
||||||
|
mm_storeu_si128(
|
||||||
|
result[unfilteredStartIx + x].addr,
|
||||||
|
mm_add_epi8(bytes, up)
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
for x in x ..< rowBytes:
|
||||||
var value = uncompressed[uncompressedStartIdx + x]
|
var value = uncompressed[uncompressedStartIdx + x]
|
||||||
if y - 1 >= 0:
|
if y - 1 >= 0:
|
||||||
value += result[unfilteredStartIx + x - rowBytes]
|
value += result[unfilteredStartIx + x - rowBytes]
|
||||||
|
@ -556,19 +571,16 @@ proc encodePng*(
|
||||||
# Add room for 1 byte before each row for the filter type.
|
# Add room for 1 byte before each row for the filter type.
|
||||||
var filtered = newString(width * height * channels + height)
|
var filtered = newString(width * height * channels + height)
|
||||||
for y in 0 ..< height:
|
for y in 0 ..< height:
|
||||||
filtered[y * width * channels + y] = 3.char # Average
|
filtered[y * width * channels + y] = 2.char # Up filter type
|
||||||
for x in 0 ..< width * channels:
|
for x in 0 ..< width * channels:
|
||||||
# Move through the image data byte-by-byte
|
# Move through the image data byte-by-byte
|
||||||
let
|
let
|
||||||
dataPos = y * width * channels + x
|
dataPos = y * width * channels + x
|
||||||
filteredPos = y * width * channels + y + 1 + x
|
filteredPos = y * width * channels + y + 1 + x
|
||||||
var left, up: int
|
var up: uint8
|
||||||
if x - channels >= 0:
|
|
||||||
left = data[dataPos - channels].int
|
|
||||||
if y - 1 >= 0:
|
if y - 1 >= 0:
|
||||||
up = data[(y - 1) * width * channels + x].int
|
up = data[(y - 1) * width * channels + x]
|
||||||
let avg = ((left + up) div 2).uint8
|
filtered[filteredPos] = (data[dataPos] - up).char
|
||||||
filtered[filteredPos] = (data[dataPos] - avg).char
|
|
||||||
|
|
||||||
let compressed =
|
let compressed =
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue