From 0600a5914830458d4acc9db255b68fd4824a639a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 03:22:45 -0500 Subject: [PATCH 01/15] simpler --- src/pixie/fileformats/png.nim | 15 ++++++++------- tests/test_png.nim | 17 +++++------------ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 7cc98e3..7cc09be 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -90,9 +90,7 @@ proc decodePalette(data: pointer, len: int): seq[ColorRGB] = result.setLen(len div 3) - let data = cast[ptr UncheckedArray[uint8]](data) - for i in 0 ..< len div 3: - copyMem(result[i].addr, data[i * 3].unsafeAddr, 3) + copyMem(result[0].addr, data, len) proc unfilter( uncompressed: string, height, rowBytes, bpp: int @@ -275,12 +273,15 @@ proc decodeImageData( result[i].a = 255 let lastOffset = header.height * header.width - 1 - var rgb: array[3, uint8] - copyMem(rgb.addr, unfiltered[lastOffset * 3].unsafeAddr, 3) - var rgba = ColorRGBA(r: rgb[0], g: rgb[1], b: rgb[2], a: 255) + var rgba = rgba( + unfiltered[lastOffset * 3 + 0].uint8, + unfiltered[lastOffset * 3 + 1].uint8, + unfiltered[lastOffset * 3 + 2].uint8, + 255 + ) if rgba == special: rgba.a = 0 - result[header.height * header.width - 1] = cast[ColorRGBA](rgba) + result[header.height * header.width - 1] = rgba of 3: var bytePos, bitPos: int for y in 0 ..< header.height: diff --git a/tests/test_png.nim b/tests/test_png.nim index 9e158f9..28989af 100644 --- a/tests/test_png.nim +++ b/tests/test_png.nim @@ -1,17 +1,10 @@ import pixie, pixie/fileformats/png, pngsuite, strformat -# for file in pngSuiteFiles: -# let -# original = cast[seq[uint8]]( -# readFile(&"tests/images/png/pngsuite/{file}.png") -# ) -# decoded = decodePng(original) -# encoded = encodePng(decoded) -# decoded2 = decodePng(cast[seq[uint8]](encoded)) - -# doAssert decoded.height == decoded2.height -# doAssert decoded.width == decoded2.width -# doAssert decoded.data == decoded2.data +for file in pngSuiteFiles: + let + original = readFile(&"tests/fileformats/png/pngsuite/{file}.png") + decoded = decodePng(original) + encoded = encodePng(decoded) block: for channels in 1 .. 4: From 35ea447b3b61e547dcc9e3e4e621b947f187e617 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 12:20:32 -0500 Subject: [PATCH 02/15] cleaner --- src/pixie/fileformats/png.nim | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 7cc09be..78ad7d5 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -244,9 +244,7 @@ proc decodeImageData( bitPos = 0 let alpha = if value.int == special: 0 else: 255 - result[x + y * header.width] = ColorRGBA( - r: value, g: value, b: value, a: alpha.uint8 - ) + result[x + y * header.width] = rgba(value, value, value, alpha.uint8) # If we move to a new row, skip to the next full byte if bitPos > 0: @@ -315,9 +313,7 @@ proc decodeImageData( transparency.readUint8(value.int) else: 255 - result[x + y * header.width] = ColorRGBA( - r: rgb.r, g: rgb.g, b: rgb.b, a: transparency - ) + result[x + y * header.width] = rgba(rgb.r, rgb.g, rgb.b, transparency) # If we move to a new row, skip to the next full byte if bitPos > 0: @@ -326,11 +322,11 @@ proc decodeImageData( of 4: for i in 0 ..< header.height * header.width: let bytePos = i * 2 - result[i] = ColorRGBA( - r: unfiltered.readUint8(bytePos), - g: unfiltered.readUint8(bytePos), - b: unfiltered.readUint8(bytePos), - a: unfiltered.readUint8(bytePos + 1) + result[i] = rgba( + unfiltered.readUint8(bytePos), + unfiltered.readUint8(bytePos), + unfiltered.readUint8(bytePos), + unfiltered.readUint8(bytePos + 1) ) of 6: for i in 0 ..< header.height * header.width: From ad303de223d7d2e1207802faf499c7a847ae5114 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 12:20:48 -0500 Subject: [PATCH 03/15] fixed --- tests/validate_png.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validate_png.nim b/tests/validate_png.nim index b39b5f6..e104be7 100644 --- a/tests/validate_png.nim +++ b/tests/validate_png.nim @@ -3,7 +3,7 @@ import chroma, pixie/fileformats/png, pngsuite, stb_image/read as stbi, strforma for file in pngSuiteFiles: let data = readFile(&"tests/fileformats/png/pngsuite/{file}.png") - pixieLoaded = decodePngRaw(data) + pixieLoaded = decodePng(data) var width, height, channels: int From ff45edf131816b63e6f76b17f4bc758d66236a6c Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 12:22:01 -0500 Subject: [PATCH 04/15] faster --- src/pixie/fileformats/png.nim | 40 +++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 78ad7d5..ae6acda 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -114,39 +114,51 @@ proc unfilter( rowBytes ) of 1: # Sub + let + uncompressedStartIdx = uncompressedIdx(1, y) + unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: - var value = uncompressed.readUint8(uncompressedIdx(x + 1, y)) + var value = uncompressed.readUint8(uncompressedStartIdx + x) if x - bpp >= 0: - value += result.readUint8(unfiteredIdx(x - bpp, y)) - result[unfiteredIdx(x, y)] = value.char + value += result.readUint8(unfilteredStartIx + x - bpp) + result[unfilteredStartIx + x] = value.char of 2: # Up + let + uncompressedStartIdx = uncompressedIdx(1, y) + unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: - var value = uncompressed.readUint8(uncompressedIdx(x + 1, y)) + var value = uncompressed.readUint8(uncompressedStartIdx + x) if y - 1 >= 0: value += result.readUint8(unfiteredIdx(x, y - 1)) - result[unfiteredIdx(x, y)] = value.char + result[unfilteredStartIx + x] = value.char of 3: # Average + let + uncompressedStartIdx = uncompressedIdx(1, y) + unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: var - value = uncompressed.readUint8(uncompressedIdx(x + 1, y)) + value = uncompressed.readUint8(uncompressedStartIdx + x) left, up: int if x - bpp >= 0: - left = result[unfiteredIdx(x - bpp, y)].int + left = result[unfilteredStartIx + x - bpp].int if y - 1 >= 0: - up = result[unfiteredIdx(x, y - 1)].int + up = result[unfilteredStartIx + x - rowBytes].int value += ((left + up) div 2).uint8 - result[unfiteredIdx(x, y)] = value.char + result[unfilteredStartIx + x] = value.char of 4: # Paeth + let + uncompressedStartIdx = uncompressedIdx(1, y) + unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: var - value = uncompressed.readUint8(uncompressedIdx(x + 1, y)) + value = uncompressed.readUint8(uncompressedStartIdx + x) left, up, upLeft: int if x - bpp >= 0: - left = result[unfiteredIdx(x - bpp, y)].int + left = result[unfilteredStartIx + x - bpp].int if y - 1 >= 0: - up = result[unfiteredIdx(x, y - 1)].int + up = result[unfilteredStartIx + x - rowBytes].int if x - bpp >= 0 and y - 1 >= 0: - upLeft = result[unfiteredIdx(x - bpp, y - 1)].int + upLeft = result[unfilteredStartIx + x - rowBytes - bpp].int template paethPredictor(a, b, c: int): int = let p = a + b - c @@ -160,7 +172,7 @@ proc unfilter( else: c value += paethPredictor(up, left, upLeft).uint8 - result[unfiteredIdx(x, y)] = value.char + result[unfilteredStartIx + x] = value.char else: discard # Not possible, parseHeader validates From 5afa5720dbdd7050e5e013664e9bf0caa8239ba4 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 14:03:28 -0500 Subject: [PATCH 05/15] faster on arc, arc string []= issue 18339 --- src/pixie/fileformats/png.nim | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index ae6acda..d6986de 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -94,7 +94,7 @@ proc decodePalette(data: pointer, len: int): seq[ColorRGB] = proc unfilter( uncompressed: string, height, rowBytes, bpp: int -): string = +): seq[uint8] = result.setLen(uncompressed.len - height) template uncompressedIdx(x, y: int): int = @@ -121,7 +121,7 @@ proc unfilter( var value = uncompressed.readUint8(uncompressedStartIdx + x) if x - bpp >= 0: value += result.readUint8(unfilteredStartIx + x - bpp) - result[unfilteredStartIx + x] = value.char + result[unfilteredStartIx + x] = value of 2: # Up let uncompressedStartIdx = uncompressedIdx(1, y) @@ -129,8 +129,8 @@ proc unfilter( for x in 0 ..< rowBytes: var value = uncompressed.readUint8(uncompressedStartIdx + x) if y - 1 >= 0: - value += result.readUint8(unfiteredIdx(x, y - 1)) - result[unfilteredStartIx + x] = value.char + value += result.readUint8(unfilteredStartIx + x - rowBytes) + result[unfilteredStartIx + x] = value of 3: # Average let uncompressedStartIdx = uncompressedIdx(1, y) @@ -144,7 +144,7 @@ proc unfilter( if y - 1 >= 0: up = result[unfilteredStartIx + x - rowBytes].int value += ((left + up) div 2).uint8 - result[unfilteredStartIx + x] = value.char + result[unfilteredStartIx + x] = value of 4: # Paeth let uncompressedStartIdx = uncompressedIdx(1, y) @@ -172,7 +172,7 @@ proc unfilter( else: c value += paethPredictor(up, left, upLeft).uint8 - result[unfilteredStartIx + x] = value.char + result[unfilteredStartIx + x] = value else: discard # Not possible, parseHeader validates From 4c100a255470d8ebdd3d90b012a1d6581912af2c Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 14:27:25 -0500 Subject: [PATCH 06/15] f --- src/pixie/fileformats/png.nim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index d6986de..1850318 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -138,11 +138,11 @@ proc unfilter( for x in 0 ..< rowBytes: var value = uncompressed.readUint8(uncompressedStartIdx + x) - left, up: int + left, up: uint32 if x - bpp >= 0: - left = result[unfilteredStartIx + x - bpp].int + left = result[unfilteredStartIx + x - bpp] if y - 1 >= 0: - up = result[unfilteredStartIx + x - rowBytes].int + up = result[unfilteredStartIx + x - rowBytes] value += ((left + up) div 2).uint8 result[unfilteredStartIx + x] = value of 4: # Paeth From 1493634920005070de5611ce90c868efde5c6071 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 14:26:47 -0500 Subject: [PATCH 07/15] faster --- src/pixie/internal.nim | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 9fda619..095147d 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -65,11 +65,15 @@ proc fillUnsafe*( else: var i = start when defined(amd64) and allowSimd: + # Align to 16 bytes + while (cast[uint](data[i].addr) and 15) != 0: + data[i] = rgbx + inc i # When supported, SIMD fill until we run out of room let colorVec = mm_set1_epi32(cast[int32](rgbx)) for _ in 0 ..< len div 8: - mm_storeu_si128(data[i + 0].addr, colorVec) - mm_storeu_si128(data[i + 4].addr, colorVec) + mm_store_si128(data[i + 0].addr, colorVec) + mm_store_si128(data[i + 4].addr, colorVec) i += 8 else: when sizeof(int) == 8: From 03c6ee14110ee27e73adac0e28b6031c812bcaba Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:00:26 -0500 Subject: [PATCH 08/15] simpler --- src/pixie/fileformats/png.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 1850318..ec0b0d1 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -120,7 +120,7 @@ proc unfilter( for x in 0 ..< rowBytes: var value = uncompressed.readUint8(uncompressedStartIdx + x) if x - bpp >= 0: - value += result.readUint8(unfilteredStartIx + x - bpp) + value += result[unfilteredStartIx + x - bpp] result[unfilteredStartIx + x] = value of 2: # Up let @@ -129,7 +129,7 @@ proc unfilter( for x in 0 ..< rowBytes: var value = uncompressed.readUint8(uncompressedStartIdx + x) if y - 1 >= 0: - value += result.readUint8(unfilteredStartIx + x - rowBytes) + value += result[unfilteredStartIx + x - rowBytes] result[unfilteredStartIx + x] = value of 3: # Average let From 6d2c681a64397123aa58f234d81dcf79672b2f5a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:02:43 -0500 Subject: [PATCH 09/15] faster --- src/pixie/fileformats/png.nim | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index ec0b0d1..7020d4f 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -93,9 +93,9 @@ proc decodePalette(data: pointer, len: int): seq[ColorRGB] = copyMem(result[0].addr, data, len) proc unfilter( - uncompressed: string, height, rowBytes, bpp: int + uncompressed: pointer, len, height, rowBytes, bpp: int ): seq[uint8] = - result.setLen(uncompressed.len - height) + result.setLen(len - height) template uncompressedIdx(x, y: int): int = x + y * (rowBytes + 1) @@ -103,9 +103,11 @@ proc unfilter( template unfiteredIdx(x, y: int): int = x + y * rowBytes + let uncompressed = cast[ptr UncheckedArray[uint8]](uncompressed) + # Unfilter the image data for y in 0 ..< height: - let filterType = uncompressed.readUint8(uncompressedIdx(0, y)) + let filterType = uncompressed[uncompressedIdx(0, y)] case filterType: of 0: # None copyMem( @@ -118,7 +120,7 @@ proc unfilter( uncompressedStartIdx = uncompressedIdx(1, y) unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: - var value = uncompressed.readUint8(uncompressedStartIdx + x) + var value = uncompressed[uncompressedStartIdx + x] if x - bpp >= 0: value += result[unfilteredStartIx + x - bpp] result[unfilteredStartIx + x] = value @@ -127,7 +129,7 @@ proc unfilter( uncompressedStartIdx = uncompressedIdx(1, y) unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: - var value = uncompressed.readUint8(uncompressedStartIdx + x) + var value = uncompressed[uncompressedStartIdx + x] if y - 1 >= 0: value += result[unfilteredStartIx + x - rowBytes] result[unfilteredStartIx + x] = value @@ -137,7 +139,7 @@ proc unfilter( unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: var - value = uncompressed.readUint8(uncompressedStartIdx + x) + value = uncompressed[uncompressedStartIdx + x] left, up: uint32 if x - bpp >= 0: left = result[unfilteredStartIx + x - bpp] @@ -151,7 +153,7 @@ proc unfilter( unfilteredStartIx = unfiteredIdx(0, y) for x in 0 ..< rowBytes: var - value = uncompressed.readUint8(uncompressedStartIdx + x) + value = uncompressed[uncompressedStartIdx + x] left, up, upLeft: int if x - bpp >= 0: left = result[unfilteredStartIx + x - bpp].int @@ -220,7 +222,8 @@ proc decodeImageData( failInvalid() let unfiltered = unfilter( - uncompressed, + uncompressed.cstring, + uncompressed.len, header.height, rowBytes, max(valuesPerPixel div valuesPerByte, 1) From 6734f5eef94026cae8c1f76cf6fb157cefec1ebe Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:11:29 -0500 Subject: [PATCH 10/15] simpler, faster --- src/pixie/fileformats/png.nim | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 7020d4f..77ef310 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -344,8 +344,7 @@ proc decodeImageData( unfiltered.readUint8(bytePos + 1) ) of 6: - for i in 0 ..< header.height * header.width: - result[i] = cast[ColorRGBA](unfiltered.readUint32(i * 4)) + copyMem(result[0].addr, unfiltered[0].unsafeAddr, unfiltered.len) else: discard # Not possible, parseHeader validates From 8797658b4729a74195e1f81b586996683119847a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:20:59 -0500 Subject: [PATCH 11/15] simpler --- src/pixie/fileformats/png.nim | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pixie/fileformats/png.nim b/src/pixie/fileformats/png.nim index 77ef310..30c93a5 100644 --- a/src/pixie/fileformats/png.nim +++ b/src/pixie/fileformats/png.nim @@ -235,7 +235,7 @@ proc decodeImageData( var bytePos, bitPos: int for y in 0 ..< header.height: for x in 0 ..< header.width: - var value = unfiltered.readUint8(bytePos) + var value = unfiltered[bytePos] case header.bitDepth: of 1: value = (value shr (7 - bitPos)) and 1 @@ -299,7 +299,7 @@ proc decodeImageData( var bytePos, bitPos: int for y in 0 ..< header.height: for x in 0 ..< header.width: - var value = unfiltered.readUint8(bytePos) + var value = unfiltered[bytePos] case header.bitDepth: of 1: value = (value shr (7 - bitPos)) and 1 @@ -338,10 +338,10 @@ proc decodeImageData( for i in 0 ..< header.height * header.width: let bytePos = i * 2 result[i] = rgba( - unfiltered.readUint8(bytePos), - unfiltered.readUint8(bytePos), - unfiltered.readUint8(bytePos), - unfiltered.readUint8(bytePos + 1) + unfiltered[bytePos], + unfiltered[bytePos], + unfiltered[bytePos], + unfiltered[bytePos + 1] ) of 6: copyMem(result[0].addr, unfiltered[0].unsafeAddr, unfiltered.len) From c9bd7fc5590387b574367a496d6f5b65c591c49d Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:29:26 -0500 Subject: [PATCH 12/15] simpler --- src/pixie/fileformats/bmp.nim | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pixie/fileformats/bmp.nim b/src/pixie/fileformats/bmp.nim index 44e7f7c..02294cf 100644 --- a/src/pixie/fileformats/bmp.nim +++ b/src/pixie/fileformats/bmp.nim @@ -89,9 +89,9 @@ proc decodeDib*( var rgba: ColorRGBA if offset + 3 > len - 2: failInvalid() - rgba.r = data.readUint8(offset + 2) - rgba.g = data.readUint8(offset + 1) - rgba.b = data.readUint8(offset + 0) + rgba.r = data[offset + 2] + rgba.g = data[offset + 1] + rgba.b = data[offset + 0] rgba.a = 255 offset += 4 colorPalette[i] = rgba @@ -122,7 +122,7 @@ proc decodeDib*( if haveBits == 0: if offset >= len: failInvalid() - colorBits = data.readUint8(offset) + colorBits = data[offset] haveBits = 8 offset += 1 if (colorBits and 0b1000_0000) == 0: @@ -147,7 +147,7 @@ proc decodeDib*( if haveBits == 0: if offset >= len: failInvalid() - colorBits = data.readUint8(offset) + colorBits = data[offset] haveBits = 8 offset += 1 let index = (colorBits and 0b1111_0000) shr 4 @@ -167,7 +167,7 @@ proc decodeDib*( if offset >= len: failInvalid() var rgba: ColorRGBA - let index = data.readUint8(offset) + let index = data[offset] offset += 1 if index.int >= colorPaletteSize: failInvalid() @@ -183,9 +183,9 @@ proc decodeDib*( if offset + 2 >= len: failInvalid() var rgba: ColorRGBA - rgba.r = data.readUint8(offset + 2) - rgba.g = data.readUint8(offset + 1) - rgba.b = data.readUint8(offset + 0) + rgba.r = data[offset + 2] + rgba.g = data[offset + 1] + rgba.b = data[offset + 0] rgba.a = 255 offset += 3 result.unsafe[x, result.height - y - 1] = rgba.rgbx() From 1a0f2c0c3df1e4caba94cb57920a1ef54f49bd3f Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 16:37:06 -0500 Subject: [PATCH 13/15] add comparison --- tests/benchmark_jpeg.nim | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/benchmark_jpeg.nim b/tests/benchmark_jpeg.nim index 436134b..1d9d22d 100644 --- a/tests/benchmark_jpeg.nim +++ b/tests/benchmark_jpeg.nim @@ -1,6 +1,19 @@ -import benchy, jpegsuite, pixie/fileformats/jpeg, strformat +import benchy, jpegsuite, pixie/fileformats/jpeg, stb_image/read as stbi, strformat for file in jpegSuiteFiles: let data = readFile(file) - timeIt &"jpeg {(data.len div 1024)}k decode": + timeIt &"pixie jpeg {(data.len div 1024)}k decode": discard decodeJpeg(data) + +block: + for file in jpegSuiteFiles: + let data = readFile(file) + timeIt &"stb_image jpeg {(data.len div 1024)}k decode": + var width, height, channels: int + discard loadFromMemory( + cast[seq[byte]](data), + width, + height, + channels, + stbi.RGBA + ) From 1c48875d42ebd3f398dbab04a293eeaaed4ddde1 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 19:44:28 -0500 Subject: [PATCH 14/15] rm --- tests/benchmark_drawCorrect_vs_drawUber.nim | 47 ------------- tests/benchmark_images_blur.nim | 77 --------------------- tests/benchmark_images_loop.nim | 24 ------- 3 files changed, 148 deletions(-) delete mode 100644 tests/benchmark_drawCorrect_vs_drawUber.nim delete mode 100644 tests/benchmark_images_blur.nim delete mode 100644 tests/benchmark_images_loop.nim diff --git a/tests/benchmark_drawCorrect_vs_drawUber.nim b/tests/benchmark_drawCorrect_vs_drawUber.nim deleted file mode 100644 index 12d4229..0000000 --- a/tests/benchmark_drawCorrect_vs_drawUber.nim +++ /dev/null @@ -1,47 +0,0 @@ -import benchy, chroma, vmath - -include pixie/images - -block: - let - a = newImage(1000, 1000) - b = newImage(50, 50) - a.fill(rgba(255, 0, 0, 255)) - b.fill(rgba(0, 255, 0, 255)) - - timeIt "drawCorrect small-on-big": - a.drawCorrect(b, translate(vec2(25, 25)), blendMode = NormalBlend) - keep(b) - -block: - let - a = newImage(1000, 1000) - b = newImage(50, 50) - a.fill(rgba(255, 0, 0, 255)) - b.fill(rgba(0, 255, 0, 255)) - - timeIt "drawUber small-on-big": - a.drawUber(b, translate(vec2(25, 25)), blendMode = NormalBlend) - keep(b) - -block: - let - a = newImage(1000, 1000) - b = newImage(50, 50) - a.fill(rgba(255, 0, 0, 255)) - b.fill(rgba(0, 255, 0, 255)) - - timeIt "drawCorrect small-on-big smooth": - a.drawCorrect(b, translate(vec2(25.1, 25.1)), blendMode = NormalBlend) - keep(b) - -block: - let - a = newImage(1000, 1000) - b = newImage(50, 50) - a.fill(rgba(255, 0, 0, 255)) - b.fill(rgba(0, 255, 0, 255)) - - timeIt "drawUber small-on-big smooth": - a.drawUber(b, translate(vec2(25.1, 25.1)), blendMode = NormalBlend) - keep(b) diff --git a/tests/benchmark_images_blur.nim b/tests/benchmark_images_blur.nim deleted file mode 100644 index 747ceba..0000000 --- a/tests/benchmark_images_blur.nim +++ /dev/null @@ -1,77 +0,0 @@ -import benchy, pixie, pixie/internal - -proc blurSlower*( - image: Image, radius: float32, outOfBounds: SomeColor = ColorRGBX() -) = - ## Applies Gaussian blur to the image given a radius. - let radius = round(radius).int - if radius == 0: - return - - let - kernel = gaussianKernel(radius) - outOfBounds = outOfBounds.asRgbx() - - proc `*`(sample: ColorRGBX, a: uint32): array[4, uint32] {.inline.} = - [ - sample.r * a, - sample.g * a, - sample.b * a, - sample.a * a - ] - - template `+=`(values: var array[4, uint32], sample: array[4, uint32]) = - values[0] += sample[0] - values[1] += sample[1] - values[2] += sample[2] - values[3] += sample[3] - - template rgbx(values: array[4, uint32]): ColorRGBX = - rgbx( - (values[0] div 1024 div 255).uint8, - (values[1] div 1024 div 255).uint8, - (values[2] div 1024 div 255).uint8, - (values[3] div 1024 div 255).uint8 - ) - - # Blur in the X direction. - let blurX = newImage(image.width, image.height) - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var values: array[4, uint32] - for xx in x - radius ..< min(x + radius, 0): - values += outOfBounds * kernel[xx - x + radius] - for xx in max(x - radius, 0) .. min(x + radius, image.width - 1): - values += image.unsafe[xx, y] * kernel[xx - x + radius] - for xx in max(x - radius, image.width) .. x + radius: - values += outOfBounds * kernel[xx - x + radius] - blurX.unsafe[x, y] = rgbx(values) - - # Blur in the Y direction. - for y in 0 ..< image.height: - for x in 0 ..< image.width: - var values: array[4, uint32] - for yy in y - radius ..< min(y + radius, 0): - values += outOfBounds * kernel[yy - y + radius] - for yy in max(y - radius, 0) .. min(y + radius, image.height - 1): - values += blurX.unsafe[x, yy] * kernel[yy - y + radius] - for yy in max(y - radius, image.height) .. y + radius: - values += outOfBounds * kernel[yy - y + radius] - image.unsafe[x, y] = rgbx(values) - -let image = newImage(1920, 1080) - -proc reset() = - let path = newPath() - path.rect(100, 100, 1720, 880) - image.fillPath(path, rgba(255, 255, 255, 255)) - -reset() - -timeIt "blurSlower": - image.blurSlower(40) - -reset() - -timeIt "blur": - image.blur(40) diff --git a/tests/benchmark_images_loop.nim b/tests/benchmark_images_loop.nim deleted file mode 100644 index 34b66ee..0000000 --- a/tests/benchmark_images_loop.nim +++ /dev/null @@ -1,24 +0,0 @@ -import benchy, pixie - -let image = newImage(2560, 1440) -image.fill(rgba(50, 100, 150, 200)) - -timeIt "x then y": - var sum: uint64 - for x in 0 ..< image.width: - for y in 0 ..< image.height: - let pixel = image.unsafe[x, y] - sum += pixel.r + pixel.g + pixel.b + pixel.a - if sum == 0: - echo "0" - keep sum - -timeIt "y then x": - var sum: uint64 - for y in 0 ..< image.height: - for x in 0 ..< image.width: - let pixel = image.unsafe[x, y] - sum += pixel.r + pixel.g + pixel.b + pixel.a - if sum == 0: - echo "0" - keep sum From f0090c020e06df331be267cbec5e8588e0cfe94d Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 15 Jun 2022 19:52:37 -0500 Subject: [PATCH 15/15] Revert "faster" This reverts commit 1493634920005070de5611ce90c868efde5c6071. --- src/pixie/internal.nim | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 095147d..9fda619 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -65,15 +65,11 @@ proc fillUnsafe*( else: var i = start when defined(amd64) and allowSimd: - # Align to 16 bytes - while (cast[uint](data[i].addr) and 15) != 0: - data[i] = rgbx - inc i # When supported, SIMD fill until we run out of room let colorVec = mm_set1_epi32(cast[int32](rgbx)) for _ in 0 ..< len div 8: - mm_store_si128(data[i + 0].addr, colorVec) - mm_store_si128(data[i + 4].addr, colorVec) + mm_storeu_si128(data[i + 0].addr, colorVec) + mm_storeu_si128(data[i + 4].addr, colorVec) i += 8 else: when sizeof(int) == 8: