From 54ee511a47e707d7c13cd2fc618428af7959d8a3 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Fri, 18 Jun 2021 15:10:00 -0500 Subject: [PATCH 1/5] fix pixieNoSimd bug + add to github actions --- .github/workflows/build.yml | 1 + src/pixie/images.nim | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 064e468..9977fff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,5 +13,6 @@ jobs: - uses: actions/checkout@v2 - uses: jiro4989/setup-nim-action@v1 - run: nimble test -d:release -y + - run: nimble test -d:release -d:pixieNoSimd -y - run: nimble test --gc:orc -d:release -y - run: nim cpp -d:release -r tests/all.nim diff --git a/src/pixie/images.nim b/src/pixie/images.nim index f5614a7..b4185b0 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -753,31 +753,32 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = ) x += 16 - for _ in x ..< xMax: - let - srcPos = p + dx * x.float32 + dy * y.float32 - xFloat = srcPos.x - h - yFloat = srcPos.y - h + var srcPos = p + dx * x.float32 + dy * y.float32 + srcPos = vec2(max(0, srcPos.x), max(0, srcPos.y)) + + for x in x ..< xMax: + let samplePos = ivec2((srcPos.x - h).int32, (srcPos.y - h).int32) when type(a) is Image: let backdrop = a.getRgbaUnsafe(x, y) when type(b) is Image: let - sample = b.getRgbaUnsafe(xFloat.int, yFloat.int) + sample = b.getRgbaUnsafe(samplePos.x, samplePos.y) blended = blender(backdrop, sample) else: # b is a Mask let - sample = b.getValueUnsafe(xFloat.int, yFloat.int) + sample = b.getValueUnsafe(samplePos.x, samplePos.y) blended = blender(backdrop, rgbx(0, 0, 0, sample)) a.setRgbaUnsafe(x, y, blended) else: # a is a Mask let backdrop = a.getValueUnsafe(x, y) when type(b) is Image: - let sample = b.getRgbaUnsafe(xFloat.int, yFloat.int).a + let sample = b.getRgbaUnsafe(samplePos.x, samplePos.y).a else: # b is a Mask - let sample = b.getValueUnsafe(xFloat.int, yFloat.int) + let sample = b.getValueUnsafe(samplePos.x, samplePos.y) a.setValueUnsafe(x, y, masker(backdrop, sample)) - inc x + + srcPos += dx if blendMode == bmIntersectMask: if a.width - xMax > 0: From f72fff86512f7fbb38f180d16a8ccf08ca5e06e5 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Fri, 18 Jun 2021 15:30:02 -0500 Subject: [PATCH 2/5] draw proc cleanup --- src/pixie/images.nim | 54 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index b4185b0..2ecbc0f 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -784,41 +784,41 @@ proc drawUber(a, b: Image | Mask, mat = mat3(), blendMode = bmNormal) = if a.width - xMax > 0: zeroMem(a.data[a.dataIndex(xMax, y)].addr, 4 * (a.width - xMax)) -proc draw*(a, b: Image, mat: Mat3, blendMode = bmNormal) {.inline.} = +proc draw*( + a, b: Image, transform: Vec2 | Mat3 = vec2(), blendMode = bmNormal +) {.inline.} = ## Draws one image onto another using matrix with color blending. - a.drawUber(b, mat, blendMode) - -proc draw*(a, b: Image, pos = vec2(0, 0), blendMode = bmNormal) {.inline.} = - ## Draws one image onto another using a position offset with color blending. - a.draw(b, translate(pos), blendMode) - -proc draw*(image: Image, mask: Mask, mat: Mat3, blendMode = bmMask) {.inline.} = - ## Draws a mask onto an image using a matrix with color blending. - image.drawUber(mask, mat, blendMode) + when type(transform) is Vec2: + a.drawUber(b, translate(transform), blendMode) + else: + a.drawUber(b, transform, blendMode) proc draw*( - image: Image, mask: Mask, pos = vec2(0, 0), blendMode = bmMask + a, b: Mask, transform: Vec2 | Mat3 = vec2(), blendMode = bmMask ) {.inline.} = - ## Draws a mask onto an image using a position offset with color blending. - image.drawUber(mask, translate(pos), blendMode) - -proc draw*(a, b: Mask, mat: Mat3, blendMode = bmMask) {.inline.} = ## Draws a mask onto a mask using a matrix with color blending. - a.drawUber(b, mat, blendMode) - -proc draw*(a, b: Mask, pos = vec2(0, 0), blendMode = bmMask) {.inline.} = - ## Draws a mask onto a mask using a position offset with color blending. - a.draw(b, translate(pos), blendMode) - -proc draw*(mask: Mask, image: Image, mat: Mat3, blendMode = bmMask) {.inline.} = - ## Draws a image onto a mask using a matrix with color blending. - mask.drawUber(image, mat, blendMode) + when type(transform) is Vec2: + a.drawUber(b, translate(transform), blendMode) + else: + a.drawUber(b, transform, blendMode) proc draw*( - mask: Mask, image: Image, pos = vec2(0, 0), blendMode = bmMask + image: Image, mask: Mask, transform: Vec2 | Mat3 = vec2(), blendMode = bmMask ) {.inline.} = - ## Draws a image onto a mask using a position offset with color blending. - mask.draw(image, translate(pos), blendMode) + ## Draws a mask onto an image using a matrix with color blending. + when type(transform) is Vec2: + image.drawUber(mask, translate(transform), blendMode) + else: + image.drawUber(mask, transform, blendMode) + +proc draw*( + mask: Mask, image: Image, transform: Vec2 | Mat3 = vec2(), blendMode = bmMask +) {.inline.} = + ## Draws a image onto a mask using a matrix with color blending. + when type(transform) is Vec2: + mask.drawUber(image, translate(transform), blendMode) + else: + mask.drawUber(image, transform, blendMode) proc drawTiled*(dest, src: Image, mat: Mat3, blendMode = bmNormal) = dest.drawCorrect(src, mat, true, blendMode) From 280089b1b35bf878aec053ca557d67ee84fef336 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Fri, 18 Jun 2021 15:36:00 -0500 Subject: [PATCH 3/5] restore test --- tests/images/masks/maskMinified.png | Bin 269 -> 270 bytes tests/test_masks.nim | 29 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/images/masks/maskMinified.png b/tests/images/masks/maskMinified.png index 7db6efef050ebe1aa280bac434bec77c15f0b7dc..1c10946b480423e18b05b09621ac2747d3bf39f9 100644 GIT binary patch delta 242 zcmV6E3$XjcYcMeI9`FphIT%>1yUrR& zd<_=lH5mE`xER}G6TT^LNig$vvl+H%J&z?pwkqTA(|o7g3tq?6Ly-=Qmguwr4#fiotQ^?1D$?d;A+D$F8}}l07*qoM6N<$g2-NP;Q#;t delta 241 zcmV8W+N{;0OyaN8j5I-=J z@b|^v7d0sJ+?Q-xSae!g@L287Mx#ZNOqs160EVp=e6r_uXE@wZ9DlIT)~nDQ$*(3O rym6Z0r Date: Fri, 18 Jun 2021 16:03:25 -0500 Subject: [PATCH 4/5] 2x faster mask minifyBy2 --- src/pixie/masks.nim | 73 ++++++++++++++++++++++++++-- tests/images/masks/minifiedBlur.png | Bin 0 -> 1371 bytes tests/test_masks.nim | 8 +++ 3 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 tests/images/masks/minifiedBlur.png diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index d21b0be..763718c 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -73,17 +73,80 @@ proc minifyBy2*(mask: Mask, power = 1): Mask = if power == 0: return mask.copy() + var src = mask for i in 1 .. power: result = newMask(mask.width div 2, mask.height div 2) for y in 0 ..< result.height: - for x in 0 ..< result.width: + var x: int + when defined(amd64) and not defined(pixieNoSimd): + let + oddMask = mm_set1_epi16(cast[int16](0xff00)) + first8 = cast[M128i]([uint8.high, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + for _ in countup(0, result.width - 16, 8): + let + top = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 0)].addr) + btm = mm_loadu_si128(src.data[src.dataIndex(x * 2, y * 2 + 1)].addr) + topShifted = mm_srli_si128(top, 1) + btmShifted = mm_srli_si128(btm, 1) + + topEven = mm_andnot_si128(oddMask, top) + topOdd = mm_srli_epi16(mm_and_si128(top, oddMask), 8) + btmEven = mm_andnot_si128(oddMask, btm) + btmOdd = mm_srli_epi16(mm_and_si128(btm, oddMask), 8) + + topShiftedEven = mm_andnot_si128(oddMask, topShifted) + topShiftedOdd = mm_srli_epi16(mm_and_si128(topShifted, oddMask), 8) + btmShiftedEven = mm_andnot_si128(oddMask, btmShifted) + btmShiftedOdd = mm_srli_epi16(mm_and_si128(btmShifted, oddMask), 8) + + topAddedEven = mm_add_epi16(topEven, topShiftedEven) + btmAddedEven = mm_add_epi16(btmEven, btmShiftedEven) + topAddedOdd = mm_add_epi16(topOdd, topShiftedOdd) + bottomAddedOdd = mm_add_epi16(btmOdd, btmShiftedOdd) + + addedEven = mm_add_epi16(topAddedEven, btmAddedEven) + addedOdd = mm_add_epi16(topAddedOdd, bottomAddedOdd) + + addedEvenDiv4 = mm_srli_epi16(addedEven, 2) + addedOddDiv4 = mm_srli_epi16(addedOdd, 2) + + merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8)) + + # merged has the correct values in the even indices + + a = mm_and_si128(merged, first8) + b = mm_and_si128(mm_srli_si128(merged, 2), first8) + c = mm_and_si128(mm_srli_si128(merged, 4), first8) + d = mm_and_si128(mm_srli_si128(merged, 6), first8) + e = mm_and_si128(mm_srli_si128(merged, 8), first8) + f = mm_and_si128(mm_srli_si128(merged, 10), first8) + g = mm_and_si128(mm_srli_si128(merged, 12), first8) + h = mm_and_si128(mm_srli_si128(merged, 14), first8) + + ab = mm_or_si128(a, mm_slli_si128(b, 1)) + cd = mm_or_si128(c, mm_slli_si128(d, 1)) + ef = mm_or_si128(e, mm_slli_si128(f, 1)) + gh = mm_or_si128(g, mm_slli_si128(h, 1)) + + abcd = mm_or_si128(ab, mm_slli_si128(cd, 2)) + efgh = mm_or_si128(ef, mm_slli_si128(gh, 2)) + + abcdefgh = mm_or_si128(abcd, mm_slli_si128(efgh, 4)) + + mm_storeu_si128(result.data[result.dataIndex(x, y)].addr, abcdefgh) + x += 8 + + for x in x ..< result.width: let value = - mask.getValueUnsafe(x * 2 + 0, y * 2 + 0).uint32 + - mask.getValueUnsafe(x * 2 + 1, y * 2 + 0) + - mask.getValueUnsafe(x * 2 + 1, y * 2 + 1) + - mask.getValueUnsafe(x * 2 + 0, y * 2 + 1) + src.getValueUnsafe(x * 2 + 0, y * 2 + 0).uint32 + + src.getValueUnsafe(x * 2 + 1, y * 2 + 0) + + src.getValueUnsafe(x * 2 + 1, y * 2 + 1) + + src.getValueUnsafe(x * 2 + 0, y * 2 + 1) result.setValueUnsafe(x, y, (value div 4).uint8) + # Set src as this result for if we do another power + src = result + proc fillUnsafe*(data: var seq[uint8], value: uint8, start, len: int) = ## Fills the mask data with the parameter value starting at index start and ## continuing for len indices. diff --git a/tests/images/masks/minifiedBlur.png b/tests/images/masks/minifiedBlur.png new file mode 100644 index 0000000000000000000000000000000000000000..047ddca9620741c8a768e2851e54df0b7e17f5b2 GIT binary patch literal 1371 zcmV-h1*H0kP)Ghr;@BwlzK`W-tzpnk=@1wUKMYRf9e1_B z$iZD!;2?omT<{g|ydSyCMl0vN92=ku)2#rphDZ${Mo}0?d1&QFKSFZO5-wv+6O7{} z4ClQc!x%yclRVh!)DO&i?}qL~WQ{;1SOh9eVVb6Cp4sXp$Auj?IGK0T1k)jd8@%@% z|2!}AvdDJUYZ8thox_O%(%?8_$ZY4wF--HaEX&H~Db0i+gpqiGFs;F8KuBJUO!G3Y z>$djCQ>=NOLl{SlnAT2#G$1gX8$*~uZ0m;BXGEN*DM++TGiN)&80N%U>s$}kx~(tU z%eHM|=82F^^Mny+ZENRwo{gJ0v7~PZ^E;zco{85y5v>nKJ!|*kBwT(B)3mG`K04!> z1enB#?>_2r)QTB#!h+@fH6t%C!XQ%5oGhf48?0%y9^8(~thDZA9H)tC2l28k>oOBw zt->^pesDu;_&CaO9H&r#I0=_PdS($^{i=eE?&GXSDJ7R4F>&GsoXqpGuG{v)jFX7u zyHc`hS#zFeDLLm{N~s6=_2ER$k{7afAxFZ+jT^tn1Rx zTJQD0Spy3^jGR!;MKY{I`ngE7guz)mvRc*rB<~bQWM6AL@VV&d!yHd>6-<*e{%@lJ3I6_)CxpNIqvbX zLjiKoLZOUqsiQ-ocS+G128X3oq1BM%zQ+fQt1jHciKQ}3xt>k~Emg|YQhQY^F(Gk( zpnZ>5a$QcC6h!}HFy~p1TI9e~N|}nVFkT;z4<{W$WtdJQjfTOt*2s1W?pj5Ji`NcG z9~EvR@k0LUtHH>*w{f|b^tK=SBt#-0X7_Tr=!O7YC zjCQzg^dRXPl$1zzy&n61F&;aujpLxv*qy)_>|p3~lI=`jq^PeKGxB&q6C*Hyn0^Ha z*B6jiv_I&7JjGym9ulmak6LSQW->Y#Vz81(`vW5~iCTU5jxos!EYa$%^6rHeJ Date: Fri, 18 Jun 2021 16:04:31 -0500 Subject: [PATCH 5/5] 2.0.5 --- pixie.nimble | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixie.nimble b/pixie.nimble index 57c4d10..973ada1 100644 --- a/pixie.nimble +++ b/pixie.nimble @@ -1,4 +1,4 @@ -version = "2.0.4" +version = "2.0.5" author = "Andre von Houck and Ryan Oldenburg" description = "Full-featured 2d graphics library for Nim." license = "MIT"