From ce7507cd6cd6acc84e6af636492a25dae27da7df Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Wed, 6 Jan 2021 14:18:47 -0600 Subject: [PATCH] fast fill, remove benchmark weirdness --- src/pixie/images.nim | 34 +++++++++++---- tests/benchmark_images.nim | 87 +------------------------------------- 2 files changed, 26 insertions(+), 95 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index ba472a0..af6c8ee 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -1,4 +1,4 @@ -import chroma, blends, bumpy, vmath, common, nimsimd/sse2 +import chroma, blends, bumpy, vmath, common, nimsimd/sse2, system/memory const h = 0.5.float32 @@ -72,14 +72,30 @@ proc `[]=`*(image: Image, x, y: int, rgba: ColorRGBA) {.inline.} = proc fill*(image: Image, rgba: ColorRgba) = ## Fills the image with a solid color. - # SIMD fill until we run out of room. - let m = mm_set1_epi32(cast[int32](rgba)) - var i: int - while i < image.data.len - 4: - mm_store_si128(image.data[i].addr, m) - i += 4 - for j in i ..< image.data.len: - image.data[j] = rgba + + # Use memset whene very byte has the same value + if rgba.r == rgba.g and rgba.r == rgba.b and rgba.r == rgba.a: + nimSetMem(image.data[0].addr, rgba.r.cint, image.data.len * 4) + else: + var i: int + when defined(amd64): + # When supported, SIMD fill until we run out of room. + let m = mm_set1_epi32(cast[int32](rgba)) + while i < image.data.len - 4: + mm_store_si128(image.data[i].addr, m) + i += 4 + else: + when sizeof(int) == 8: + # Fill 8 bytes at a time when possible + let + u32 = cast[uint32](rgba) + u64 = cast[uint64]([u32, u32]) + while i < image.data.len - 2: + cast[ptr uint64](image.data[i].addr)[] = u64 + i += 2 + # Fill whatever is left the slow way + for j in i ..< image.data.len: + image.data[j] = rgba proc invert*(image: Image) = ## Inverts all of the colors and alpha. diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index 606b4f3..c9e3d43 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -1,110 +1,25 @@ import chroma, pixie, benchy, system/memory -proc fill1(a: Image, rgba: ColorRGBA) = - for y in 0 ..< a.height: - for x in 0 ..< a.width: - a.setRgbaUnsafe(x, y, rgba) - -proc fill2*(image: Image, rgba: ColorRgba) = - ## Fills the image with a solid color. - if rgba.r == rgba.g and rgba.r == rgba.b and rgba.r == rgba.a: - nimSetMem(image.data[0].addr, rgba.r.cint, image.data.len * 4) - else: - for c in image.data.mitems: - c = rgba - -proc invert1(a: Image) = - for y in 0 ..< a.height: - for x in 0 ..< a.width: - var rgba = a.getRgbaUnsafe(x, y) - rgba.r = 255 - rgba.r - rgba.g = 255 - rgba.g - rgba.b = 255 - rgba.b - rgba.a = 255 - rgba.a - a.setRgbaUnsafe(x, y, rgba) - -proc invert2*(image: Image) = - for rgba in image.data.mitems: - rgba.r = 255 - rgba.r - rgba.g = 255 - rgba.g - rgba.b = 255 - rgba.b - rgba.a = 255 - rgba.a - -proc applyOpacity1(a: Image, opacity: float32): Image = - result = newImage(a.width, a.height) - let op = (255 * opacity).uint32 - for y in 0 ..< a.height: - for x in 0 ..< a.width: - var rgba = a.getRgbaUnsafe(x, y) - rgba.a = ((rgba.a.uint32 * op) div 255).clamp(0, 255).uint8 - result.setRgbaUnsafe(x, y, rgba) - -proc sharpOpacity1(a: Image): Image = - result = newImage(a.width, a.height) - for y in 0 ..< a.height: - for x in 0 ..< a.width: - var rgba = a.getRgbaUnsafe(x, y) - if rgba.a == 0: - result.setRgbaUnsafe(x, y, rgba(0, 0, 0, 0)) - else: - result.setRgbaUnsafe(x, y, rgba(255, 255, 255, 255)) - -var a = newImage(2560, 1440) - -timeIt "fill1": - a.fill1(rgba(255, 255, 255, 255)) - doAssert a[0, 0] == rgba(255, 255, 255, 255) - keep(a) - -timeIt "fill2": - a.fill2(rgba(255, 255, 255, 255)) - doAssert a[0, 0] == rgba(255, 255, 255, 255) - keep(a) +let a = newImage(2560, 1440) timeIt "fill": a.fill(rgba(255, 255, 255, 255)) doAssert a[0, 0] == rgba(255, 255, 255, 255) keep(a) -timeIt "fill1_rgba": - a.fill1(rgba(63, 127, 191, 255)) - doAssert a[0, 0] == rgba(63, 127, 191, 255) - keep(a) - -timeIt "fill2_rgba": - a.fill2(rgba(63, 127, 191, 255)) - doAssert a[0, 0] == rgba(63, 127, 191, 255) - keep(a) - timeIt "fill_rgba": a.fill(rgba(63, 127, 191, 255)) doAssert a[0, 0] == rgba(63, 127, 191, 255) keep(a) -timeIt "invert1": - a.invert1() - keep(a) - -timeIt "invert2": - a.invert2() - keep(a) - timeIt "invert": a.invert() keep(a) -timeIt "applyOpacity1": - a = a.applyOpacity1(0.5) - keep(a) - timeIt "applyOpacity": a.applyOpacity(0.5) keep(a) -timeIt "sharpOpacity1": - a = a.sharpOpacity1() - keep(a) - timeIt "sharpOpacity": a.sharpOpacity() keep(a)