From ec5bc9ba1eea55b5190d4bfefa3810c932a60351 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Tue, 8 Dec 2020 18:10:20 -0600 Subject: [PATCH] simd helping --- pixie.nimble | 1 + src/pixie/images.nim | 23 +++++++++++--- tests/benchmark_images.nim | 65 ++++++++++++++++++++++++++++++-------- 3 files changed, 72 insertions(+), 17 deletions(-) diff --git a/pixie.nimble b/pixie.nimble index c759ac4..8c24e54 100644 --- a/pixie.nimble +++ b/pixie.nimble @@ -10,3 +10,4 @@ requires "vmath >= 0.3.3" requires "chroma >= 0.1.5" requires "zippy >= 0.3.5" requires "flatty >= 0.1.2" +requires "nimsimd >= 0.4.5" diff --git a/src/pixie/images.nim b/src/pixie/images.nim index aee7451..8e12fa8 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -1,4 +1,4 @@ -import chroma, blends, vmath, common, system/memory +import chroma, blends, vmath, common, system/memory, nimsimd/sse2 type Image* = ref object @@ -73,16 +73,31 @@ proc fill*(image: Image, rgba: ColorRgba) = if rgba.r == rgba.g and rgba.r == rgba.b and rgba.r == rgba.a: nimSetMem(image.data[0].addr, rgba.r.cint, image.data.len * 4) else: - for c in image.data.mitems: - c = rgba + # SIMD fill until we run out of room. + let m = mm_set1_epi32(cast[int32](rgba)) + var i: int + while i < image.data.len - 4: + mm_store_si128(image.data[i].addr, m) + i += 4 + for j in i ..< image.data.len: + image.data[j] = rgba proc invert*(image: Image) = ## Inverts all of the colors and alpha. - for rgba in image.data.mitems: + let vec255 = mm_set1_epi8(255) + var i: int + while i < image.data.len - 4: + var m = mm_loadu_si128(image.data[i].addr) + m = mm_sub_epi8(vec255, m) + mm_store_si128(image.data[i].addr, m) + i += 4 + for j in i ..< image.data.len: + var rgba = image.data[j] rgba.r = 255 - rgba.r rgba.g = 255 - rgba.g rgba.b = 255 - rgba.b rgba.a = 255 - rgba.a + image.data[j] = rgba proc subImage*(image: Image, x, y, w, h: int): Image = ## Gets a sub image of the main image. diff --git a/tests/benchmark_images.nim b/tests/benchmark_images.nim index 0e9b9d9..606b4f3 100644 --- a/tests/benchmark_images.nim +++ b/tests/benchmark_images.nim @@ -1,11 +1,19 @@ -import chroma, pixie, benchy +import chroma, pixie, benchy, system/memory -proc fillOriginal(a: Image, rgba: ColorRGBA) = +proc fill1(a: Image, rgba: ColorRGBA) = for y in 0 ..< a.height: for x in 0 ..< a.width: a.setRgbaUnsafe(x, y, rgba) -proc invertOriginal(a: Image) = +proc fill2*(image: Image, rgba: ColorRgba) = + ## Fills the image with a solid color. + if rgba.r == rgba.g and rgba.r == rgba.b and rgba.r == rgba.a: + nimSetMem(image.data[0].addr, rgba.r.cint, image.data.len * 4) + else: + for c in image.data.mitems: + c = rgba + +proc invert1(a: Image) = for y in 0 ..< a.height: for x in 0 ..< a.width: var rgba = a.getRgbaUnsafe(x, y) @@ -15,7 +23,14 @@ proc invertOriginal(a: Image) = rgba.a = 255 - rgba.a a.setRgbaUnsafe(x, y, rgba) -proc applyOpacityOriginal(a: Image, opacity: float32): Image = +proc invert2*(image: Image) = + for rgba in image.data.mitems: + rgba.r = 255 - rgba.r + rgba.g = 255 - rgba.g + rgba.b = 255 - rgba.b + rgba.a = 255 - rgba.a + +proc applyOpacity1(a: Image, opacity: float32): Image = result = newImage(a.width, a.height) let op = (255 * opacity).uint32 for y in 0 ..< a.height: @@ -24,7 +39,7 @@ proc applyOpacityOriginal(a: Image, opacity: float32): Image = rgba.a = ((rgba.a.uint32 * op) div 255).clamp(0, 255).uint8 result.setRgbaUnsafe(x, y, rgba) -proc sharpOpacityOriginal(a: Image): Image = +proc sharpOpacity1(a: Image): Image = result = newImage(a.width, a.height) for y in 0 ..< a.height: for x in 0 ..< a.width: @@ -36,8 +51,13 @@ proc sharpOpacityOriginal(a: Image): Image = var a = newImage(2560, 1440) -timeIt "fillOriginal": - a.fillOriginal(rgba(255, 255, 255, 255)) +timeIt "fill1": + a.fill1(rgba(255, 255, 255, 255)) + doAssert a[0, 0] == rgba(255, 255, 255, 255) + keep(a) + +timeIt "fill2": + a.fill2(rgba(255, 255, 255, 255)) doAssert a[0, 0] == rgba(255, 255, 255, 255) keep(a) @@ -46,24 +66,43 @@ timeIt "fill": doAssert a[0, 0] == rgba(255, 255, 255, 255) keep(a) -timeIt "invertOriginal": - a.invertOriginal() +timeIt "fill1_rgba": + a.fill1(rgba(63, 127, 191, 255)) + doAssert a[0, 0] == rgba(63, 127, 191, 255) + keep(a) + +timeIt "fill2_rgba": + a.fill2(rgba(63, 127, 191, 255)) + doAssert a[0, 0] == rgba(63, 127, 191, 255) + keep(a) + +timeIt "fill_rgba": + a.fill(rgba(63, 127, 191, 255)) + doAssert a[0, 0] == rgba(63, 127, 191, 255) + keep(a) + +timeIt "invert1": + a.invert1() + keep(a) + +timeIt "invert2": + a.invert2() keep(a) timeIt "invert": a.invert() keep(a) -timeIt "applyOpacityOriginal": - a = a.applyOpacityOriginal(0.5) +timeIt "applyOpacity1": + a = a.applyOpacity1(0.5) keep(a) timeIt "applyOpacity": a.applyOpacity(0.5) keep(a) -timeIt "sharpOpacityOriginal": - a = a.sharpOpacityOriginal() +timeIt "sharpOpacity1": + a = a.sharpOpacity1() keep(a) timeIt "sharpOpacity":