Merge pull request #442 from guzba/master

faster png decode
This commit is contained in:
Andre von Houck 2022-06-15 19:46:42 -07:00 committed by GitHub
commit f96f7a5d49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 84 additions and 215 deletions

View file

@ -89,9 +89,9 @@ proc decodeDib*(
var rgba: ColorRGBA
if offset + 3 > len - 2:
failInvalid()
rgba.r = data.readUint8(offset + 2)
rgba.g = data.readUint8(offset + 1)
rgba.b = data.readUint8(offset + 0)
rgba.r = data[offset + 2]
rgba.g = data[offset + 1]
rgba.b = data[offset + 0]
rgba.a = 255
offset += 4
colorPalette[i] = rgba
@ -122,7 +122,7 @@ proc decodeDib*(
if haveBits == 0:
if offset >= len:
failInvalid()
colorBits = data.readUint8(offset)
colorBits = data[offset]
haveBits = 8
offset += 1
if (colorBits and 0b1000_0000) == 0:
@ -147,7 +147,7 @@ proc decodeDib*(
if haveBits == 0:
if offset >= len:
failInvalid()
colorBits = data.readUint8(offset)
colorBits = data[offset]
haveBits = 8
offset += 1
let index = (colorBits and 0b1111_0000) shr 4
@ -167,7 +167,7 @@ proc decodeDib*(
if offset >= len:
failInvalid()
var rgba: ColorRGBA
let index = data.readUint8(offset)
let index = data[offset]
offset += 1
if index.int >= colorPaletteSize:
failInvalid()
@ -183,9 +183,9 @@ proc decodeDib*(
if offset + 2 >= len:
failInvalid()
var rgba: ColorRGBA
rgba.r = data.readUint8(offset + 2)
rgba.g = data.readUint8(offset + 1)
rgba.b = data.readUint8(offset + 0)
rgba.r = data[offset + 2]
rgba.g = data[offset + 1]
rgba.b = data[offset + 0]
rgba.a = 255
offset += 3
result.unsafe[x, result.height - y - 1] = rgba.rgbx()

View file

@ -90,14 +90,12 @@ proc decodePalette(data: pointer, len: int): seq[ColorRGB] =
result.setLen(len div 3)
let data = cast[ptr UncheckedArray[uint8]](data)
for i in 0 ..< len div 3:
copyMem(result[i].addr, data[i * 3].unsafeAddr, 3)
copyMem(result[0].addr, data, len)
proc unfilter(
uncompressed: string, height, rowBytes, bpp: int
): string =
result.setLen(uncompressed.len - height)
uncompressed: pointer, len, height, rowBytes, bpp: int
): seq[uint8] =
result.setLen(len - height)
template uncompressedIdx(x, y: int): int =
x + y * (rowBytes + 1)
@ -105,9 +103,11 @@ proc unfilter(
template unfiteredIdx(x, y: int): int =
x + y * rowBytes
let uncompressed = cast[ptr UncheckedArray[uint8]](uncompressed)
# Unfilter the image data
for y in 0 ..< height:
let filterType = uncompressed.readUint8(uncompressedIdx(0, y))
let filterType = uncompressed[uncompressedIdx(0, y)]
case filterType:
of 0: # None
copyMem(
@ -116,39 +116,51 @@ proc unfilter(
rowBytes
)
of 1: # Sub
let
uncompressedStartIdx = uncompressedIdx(1, y)
unfilteredStartIx = unfiteredIdx(0, y)
for x in 0 ..< rowBytes:
var value = uncompressed.readUint8(uncompressedIdx(x + 1, y))
var value = uncompressed[uncompressedStartIdx + x]
if x - bpp >= 0:
value += result.readUint8(unfiteredIdx(x - bpp, y))
result[unfiteredIdx(x, y)] = value.char
value += result[unfilteredStartIx + x - bpp]
result[unfilteredStartIx + x] = value
of 2: # Up
let
uncompressedStartIdx = uncompressedIdx(1, y)
unfilteredStartIx = unfiteredIdx(0, y)
for x in 0 ..< rowBytes:
var value = uncompressed.readUint8(uncompressedIdx(x + 1, y))
var value = uncompressed[uncompressedStartIdx + x]
if y - 1 >= 0:
value += result.readUint8(unfiteredIdx(x, y - 1))
result[unfiteredIdx(x, y)] = value.char
value += result[unfilteredStartIx + x - rowBytes]
result[unfilteredStartIx + x] = value
of 3: # Average
let
uncompressedStartIdx = uncompressedIdx(1, y)
unfilteredStartIx = unfiteredIdx(0, y)
for x in 0 ..< rowBytes:
var
value = uncompressed.readUint8(uncompressedIdx(x + 1, y))
left, up: int
value = uncompressed[uncompressedStartIdx + x]
left, up: uint32
if x - bpp >= 0:
left = result[unfiteredIdx(x - bpp, y)].int
left = result[unfilteredStartIx + x - bpp]
if y - 1 >= 0:
up = result[unfiteredIdx(x, y - 1)].int
up = result[unfilteredStartIx + x - rowBytes]
value += ((left + up) div 2).uint8
result[unfiteredIdx(x, y)] = value.char
result[unfilteredStartIx + x] = value
of 4: # Paeth
let
uncompressedStartIdx = uncompressedIdx(1, y)
unfilteredStartIx = unfiteredIdx(0, y)
for x in 0 ..< rowBytes:
var
value = uncompressed.readUint8(uncompressedIdx(x + 1, y))
value = uncompressed[uncompressedStartIdx + x]
left, up, upLeft: int
if x - bpp >= 0:
left = result[unfiteredIdx(x - bpp, y)].int
left = result[unfilteredStartIx + x - bpp].int
if y - 1 >= 0:
up = result[unfiteredIdx(x, y - 1)].int
up = result[unfilteredStartIx + x - rowBytes].int
if x - bpp >= 0 and y - 1 >= 0:
upLeft = result[unfiteredIdx(x - bpp, y - 1)].int
upLeft = result[unfilteredStartIx + x - rowBytes - bpp].int
template paethPredictor(a, b, c: int): int =
let
p = a + b - c
@ -162,7 +174,7 @@ proc unfilter(
else:
c
value += paethPredictor(up, left, upLeft).uint8
result[unfiteredIdx(x, y)] = value.char
result[unfilteredStartIx + x] = value
else:
discard # Not possible, parseHeader validates
@ -210,7 +222,8 @@ proc decodeImageData(
failInvalid()
let unfiltered = unfilter(
uncompressed,
uncompressed.cstring,
uncompressed.len,
header.height,
rowBytes,
max(valuesPerPixel div valuesPerByte, 1)
@ -222,7 +235,7 @@ proc decodeImageData(
var bytePos, bitPos: int
for y in 0 ..< header.height:
for x in 0 ..< header.width:
var value = unfiltered.readUint8(bytePos)
var value = unfiltered[bytePos]
case header.bitDepth:
of 1:
value = (value shr (7 - bitPos)) and 1
@ -246,9 +259,7 @@ proc decodeImageData(
bitPos = 0
let alpha = if value.int == special: 0 else: 255
result[x + y * header.width] = ColorRGBA(
r: value, g: value, b: value, a: alpha.uint8
)
result[x + y * header.width] = rgba(value, value, value, alpha.uint8)
# If we move to a new row, skip to the next full byte
if bitPos > 0:
@ -275,17 +286,20 @@ proc decodeImageData(
result[i].a = 255
let lastOffset = header.height * header.width - 1
var rgb: array[3, uint8]
copyMem(rgb.addr, unfiltered[lastOffset * 3].unsafeAddr, 3)
var rgba = ColorRGBA(r: rgb[0], g: rgb[1], b: rgb[2], a: 255)
var rgba = rgba(
unfiltered[lastOffset * 3 + 0].uint8,
unfiltered[lastOffset * 3 + 1].uint8,
unfiltered[lastOffset * 3 + 2].uint8,
255
)
if rgba == special:
rgba.a = 0
result[header.height * header.width - 1] = cast[ColorRGBA](rgba)
result[header.height * header.width - 1] = rgba
of 3:
var bytePos, bitPos: int
for y in 0 ..< header.height:
for x in 0 ..< header.width:
var value = unfiltered.readUint8(bytePos)
var value = unfiltered[bytePos]
case header.bitDepth:
of 1:
value = (value shr (7 - bitPos)) and 1
@ -314,9 +328,7 @@ proc decodeImageData(
transparency.readUint8(value.int)
else:
255
result[x + y * header.width] = ColorRGBA(
r: rgb.r, g: rgb.g, b: rgb.b, a: transparency
)
result[x + y * header.width] = rgba(rgb.r, rgb.g, rgb.b, transparency)
# If we move to a new row, skip to the next full byte
if bitPos > 0:
@ -325,15 +337,14 @@ proc decodeImageData(
of 4:
for i in 0 ..< header.height * header.width:
let bytePos = i * 2
result[i] = ColorRGBA(
r: unfiltered.readUint8(bytePos),
g: unfiltered.readUint8(bytePos),
b: unfiltered.readUint8(bytePos),
a: unfiltered.readUint8(bytePos + 1)
result[i] = rgba(
unfiltered[bytePos],
unfiltered[bytePos],
unfiltered[bytePos],
unfiltered[bytePos + 1]
)
of 6:
for i in 0 ..< header.height * header.width:
result[i] = cast[ColorRGBA](unfiltered.readUint32(i * 4))
copyMem(result[0].addr, unfiltered[0].unsafeAddr, unfiltered.len)
else:
discard # Not possible, parseHeader validates

View file

@ -1,47 +0,0 @@
import benchy, chroma, vmath
include pixie/images
block:
let
a = newImage(1000, 1000)
b = newImage(50, 50)
a.fill(rgba(255, 0, 0, 255))
b.fill(rgba(0, 255, 0, 255))
timeIt "drawCorrect small-on-big":
a.drawCorrect(b, translate(vec2(25, 25)), blendMode = NormalBlend)
keep(b)
block:
let
a = newImage(1000, 1000)
b = newImage(50, 50)
a.fill(rgba(255, 0, 0, 255))
b.fill(rgba(0, 255, 0, 255))
timeIt "drawUber small-on-big":
a.drawUber(b, translate(vec2(25, 25)), blendMode = NormalBlend)
keep(b)
block:
let
a = newImage(1000, 1000)
b = newImage(50, 50)
a.fill(rgba(255, 0, 0, 255))
b.fill(rgba(0, 255, 0, 255))
timeIt "drawCorrect small-on-big smooth":
a.drawCorrect(b, translate(vec2(25.1, 25.1)), blendMode = NormalBlend)
keep(b)
block:
let
a = newImage(1000, 1000)
b = newImage(50, 50)
a.fill(rgba(255, 0, 0, 255))
b.fill(rgba(0, 255, 0, 255))
timeIt "drawUber small-on-big smooth":
a.drawUber(b, translate(vec2(25.1, 25.1)), blendMode = NormalBlend)
keep(b)

View file

@ -1,77 +0,0 @@
import benchy, pixie, pixie/internal
proc blurSlower*(
image: Image, radius: float32, outOfBounds: SomeColor = ColorRGBX()
) =
## Applies Gaussian blur to the image given a radius.
let radius = round(radius).int
if radius == 0:
return
let
kernel = gaussianKernel(radius)
outOfBounds = outOfBounds.asRgbx()
proc `*`(sample: ColorRGBX, a: uint32): array[4, uint32] {.inline.} =
[
sample.r * a,
sample.g * a,
sample.b * a,
sample.a * a
]
template `+=`(values: var array[4, uint32], sample: array[4, uint32]) =
values[0] += sample[0]
values[1] += sample[1]
values[2] += sample[2]
values[3] += sample[3]
template rgbx(values: array[4, uint32]): ColorRGBX =
rgbx(
(values[0] div 1024 div 255).uint8,
(values[1] div 1024 div 255).uint8,
(values[2] div 1024 div 255).uint8,
(values[3] div 1024 div 255).uint8
)
# Blur in the X direction.
let blurX = newImage(image.width, image.height)
for y in 0 ..< image.height:
for x in 0 ..< image.width:
var values: array[4, uint32]
for xx in x - radius ..< min(x + radius, 0):
values += outOfBounds * kernel[xx - x + radius]
for xx in max(x - radius, 0) .. min(x + radius, image.width - 1):
values += image.unsafe[xx, y] * kernel[xx - x + radius]
for xx in max(x - radius, image.width) .. x + radius:
values += outOfBounds * kernel[xx - x + radius]
blurX.unsafe[x, y] = rgbx(values)
# Blur in the Y direction.
for y in 0 ..< image.height:
for x in 0 ..< image.width:
var values: array[4, uint32]
for yy in y - radius ..< min(y + radius, 0):
values += outOfBounds * kernel[yy - y + radius]
for yy in max(y - radius, 0) .. min(y + radius, image.height - 1):
values += blurX.unsafe[x, yy] * kernel[yy - y + radius]
for yy in max(y - radius, image.height) .. y + radius:
values += outOfBounds * kernel[yy - y + radius]
image.unsafe[x, y] = rgbx(values)
let image = newImage(1920, 1080)
proc reset() =
let path = newPath()
path.rect(100, 100, 1720, 880)
image.fillPath(path, rgba(255, 255, 255, 255))
reset()
timeIt "blurSlower":
image.blurSlower(40)
reset()
timeIt "blur":
image.blur(40)

View file

@ -1,24 +0,0 @@
import benchy, pixie
let image = newImage(2560, 1440)
image.fill(rgba(50, 100, 150, 200))
timeIt "x then y":
var sum: uint64
for x in 0 ..< image.width:
for y in 0 ..< image.height:
let pixel = image.unsafe[x, y]
sum += pixel.r + pixel.g + pixel.b + pixel.a
if sum == 0:
echo "0"
keep sum
timeIt "y then x":
var sum: uint64
for y in 0 ..< image.height:
for x in 0 ..< image.width:
let pixel = image.unsafe[x, y]
sum += pixel.r + pixel.g + pixel.b + pixel.a
if sum == 0:
echo "0"
keep sum

View file

@ -1,6 +1,19 @@
import benchy, jpegsuite, pixie/fileformats/jpeg, strformat
import benchy, jpegsuite, pixie/fileformats/jpeg, stb_image/read as stbi, strformat
for file in jpegSuiteFiles:
let data = readFile(file)
timeIt &"jpeg {(data.len div 1024)}k decode":
timeIt &"pixie jpeg {(data.len div 1024)}k decode":
discard decodeJpeg(data)
block:
for file in jpegSuiteFiles:
let data = readFile(file)
timeIt &"stb_image jpeg {(data.len div 1024)}k decode":
var width, height, channels: int
discard loadFromMemory(
cast[seq[byte]](data),
width,
height,
channels,
stbi.RGBA
)

View file

@ -1,17 +1,10 @@
import pixie, pixie/fileformats/png, pngsuite, strformat
# for file in pngSuiteFiles:
# let
# original = cast[seq[uint8]](
# readFile(&"tests/images/png/pngsuite/{file}.png")
# )
# decoded = decodePng(original)
# encoded = encodePng(decoded)
# decoded2 = decodePng(cast[seq[uint8]](encoded))
# doAssert decoded.height == decoded2.height
# doAssert decoded.width == decoded2.width
# doAssert decoded.data == decoded2.data
for file in pngSuiteFiles:
let
original = readFile(&"tests/fileformats/png/pngsuite/{file}.png")
decoded = decodePng(original)
encoded = encodePng(decoded)
block:
for channels in 1 .. 4:

View file

@ -3,7 +3,7 @@ import chroma, pixie/fileformats/png, pngsuite, stb_image/read as stbi, strforma
for file in pngSuiteFiles:
let
data = readFile(&"tests/fileformats/png/pngsuite/{file}.png")
pixieLoaded = decodePngRaw(data)
pixieLoaded = decodePng(data)
var
width, height, channels: int