From f1b04ca441e949f4890fb9913f6c562df6693c5a Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Fri, 19 Nov 2021 12:31:09 -0600 Subject: [PATCH] remove most countups --- experiments/svg_cairo.nim | 4 +- src/pixie/fileformats/svg.nim | 4 +- src/pixie/images.nim | 74 ++++++++++++++++++----------------- src/pixie/internal.nim | 3 +- src/pixie/masks.nim | 10 ++--- src/pixie/paths.nim | 14 +++---- tests/benchmark_blends.nim | 4 +- 7 files changed, 58 insertions(+), 55 deletions(-) diff --git a/experiments/svg_cairo.nim b/experiments/svg_cairo.nim index 70c5b31..9d6bb27 100644 --- a/experiments/svg_cairo.nim +++ b/experiments/svg_cairo.nim @@ -408,8 +408,8 @@ proc draw(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) = let points = points.split(" ") if points.len mod 2 != 0: failInvalid() - for i in countup(0, points.len - 2, 2): - vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1]))) + for i in 0 ..< points.len div 2: + vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1]))) if vecs.len == 0: failInvalid() diff --git a/src/pixie/fileformats/svg.nim b/src/pixie/fileformats/svg.nim index 8b8fa7f..df60f73 100644 --- a/src/pixie/fileformats/svg.nim +++ b/src/pixie/fileformats/svg.nim @@ -410,8 +410,8 @@ proc drawInternal(img: Image, node: XmlNode, ctxStack: var seq[Ctx]) = let points = points.split(" ") if points.len mod 2 != 0: failInvalid() - for i in countup(0, points.len - 2, 2): - vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1]))) + for i in 0 ..< points.len div 2: + vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1]))) if vecs.len == 0: failInvalid() diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 5d374cc..5bd07b1 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -28,16 +28,19 @@ proc newImage*(mask: Mask): Image {.raises: [PixieError].} = result = newImage(mask.width, mask.height) var i: int when defined(amd64) and not defined(pixieNoSimd): - for _ in countup(0, mask.data.len - 16, 4): - var alphas = unpackAlphaValues(mm_loadu_si128(mask.data[i].addr)) - alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8)) - alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16)) - mm_storeu_si128(result.data[i].addr, alphas) - i += 4 + for _ in 0 ..< mask.data.len div 16: + var alphas = mm_loadu_si128(mask.data[i].addr) + for j in 0 ..< 4: + var unpacked = unpackAlphaValues(alphas) + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8)) + unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16)) + mm_storeu_si128(result.data[i + j * 4].addr, unpacked) + alphas = mm_srli_si128(alphas, 4) + i += 16 - for i in i ..< mask.data.len: - let v = mask.data[i] - result.data[i] = rgbx(v, v, v, v) + for j in i ..< mask.data.len: + let v = mask.data[j] + result.data[j] = rgbx(v, v, v, v) proc copy*(image: Image): Image {.raises: [PixieError].} = ## Copies the image data into a new image. @@ -104,10 +107,10 @@ proc fillUnsafe*( var i = start when defined(amd64) and not defined(pixieNoSimd): # When supported, SIMD fill until we run out of room - let m = mm_set1_epi32(cast[int32](rgbx)) - for j in countup(i, start + len - 8, 8): - mm_storeu_si128(data[j].addr, m) - mm_storeu_si128(data[j + 4].addr, m) + let colorVec = mm_set1_epi32(cast[int32](rgbx)) + for _ in 0 ..< len div 8: + mm_storeu_si128(data[i + 0].addr, colorVec) + mm_storeu_si128(data[i + 4].addr, colorVec) i += 8 else: when sizeof(int) == 8: @@ -115,8 +118,8 @@ proc fillUnsafe*( let u32 = cast[uint32](rgbx) u64 = cast[uint64]([u32, u32]) - for j in countup(i, start + len - 2, 2): - cast[ptr uint64](data[j].addr)[] = u64 + for _ in 0 ..< len div 2: + cast[ptr uint64](data[i].addr)[] = u64 i += 2 # Fill whatever is left the slow way for j in i ..< start + len: @@ -135,10 +138,10 @@ proc isOneColor*(image: Image): bool {.raises: [].} = var i: int when defined(amd64) and not defined(pixieNoSimd): let colorVec = mm_set1_epi32(cast[int32](color)) - for j in countup(0, image.data.len - 8, 8): + for _ in 0 ..< image.data.len div 8: let - values0 = mm_loadu_si128(image.data[j].addr) - values1 = mm_loadu_si128(image.data[j + 4].addr) + values0 = mm_loadu_si128(image.data[i + 0].addr) + values1 = mm_loadu_si128(image.data[i + 4].addr) mask0 = mm_movemask_epi8(mm_cmpeq_epi8(values0, colorVec)) mask1 = mm_movemask_epi8(mm_cmpeq_epi8(values1, colorVec)) if mask0 != uint16.high.int or mask1 != uint16.high.int: @@ -155,17 +158,17 @@ proc isTransparent*(image: Image): bool {.raises: [].} = var i: int when defined(amd64) and not defined(pixieNoSimd): - let transparent = mm_setzero_si128() - for j in countup(0, image.data.len - 16, 16): + let zeroVec = mm_setzero_si128() + for _ in 0 ..< image.data.len div 16: let - values0 = mm_loadu_si128(image.data[j].addr) - values1 = mm_loadu_si128(image.data[j + 4].addr) - values2 = mm_loadu_si128(image.data[j + 8].addr) - values3 = mm_loadu_si128(image.data[j + 12].addr) + values0 = mm_loadu_si128(image.data[i + 0].addr) + values1 = mm_loadu_si128(image.data[i + 4].addr) + values2 = mm_loadu_si128(image.data[i + 8].addr) + values3 = mm_loadu_si128(image.data[i + 12].addr) values01 = mm_or_si128(values0, values1) values23 = mm_or_si128(values2, values3) values = mm_or_si128(values01, values23) - mask = mm_movemask_epi8(mm_cmpeq_epi8(values, transparent)) + mask = mm_movemask_epi8(mm_cmpeq_epi8(values, zeroVec)) if mask != uint16.high.int: return false i += 16 @@ -416,9 +419,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = let oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) - vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) - - for _ in countup(0, byteLen - 16, 16): + opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8) + for _ in 0 ..< byteLen div 16: when type(target) is Image: let index = i div 4 else: @@ -433,8 +435,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = valuesOdd = mm_and_si128(values, oddMask) # values * opacity - valuesEven = mm_mulhi_epu16(valuesEven, vOpacity) - valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity) + valuesEven = mm_mulhi_epu16(valuesEven, opacityVec) + valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec) # div 255 valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7) @@ -465,21 +467,21 @@ proc invert*(target: Image | Mask) {.raises: [].} = ## Inverts all of the colors and alpha. var i: int when defined(amd64) and not defined(pixieNoSimd): - let v255 = mm_set1_epi8(cast[int8](255)) + let vec255 = mm_set1_epi8(cast[int8](255)) when type(target) is Image: let byteLen = target.data.len * 4 else: let byteLen = target.data.len - for _ in countup(0, byteLen - 16, 16): + for _ in 0 ..< byteLen div 16: when type(target) is Image: let index = i div 4 else: let index = i var values = mm_loadu_si128(target.data[index].addr) - values = mm_sub_epi8(v255, values) + values = mm_sub_epi8(vec255, values) mm_storeu_si128(target.data[index].addr, values) i += 16 @@ -568,7 +570,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} = var i: int when defined(amd64) and not defined(pixieNoSimd): - for _ in countup(0, image.data.len - 16, 16): + for _ in 0 ..< image.data.len div 16: var a = mm_loadu_si128(image.data[i + 0].addr) b = mm_loadu_si128(image.data[i + 4].addr) @@ -817,7 +819,7 @@ proc drawUber( when type(a) is Image: if blendMode.hasSimdBlender(): let blenderSimd = blendMode.blenderSimd() - for _ in countup(x, xMax - 16, 16): + for _ in 0 ..< (xMax - xMin) div 16: let srcPos = p + dx * x.float32 + dy * y.float32 sx = srcPos.x.int @@ -847,7 +849,7 @@ proc drawUber( else: # is a Mask if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd() - for _ in countup(x, xMax - 16, 16): + for _ in 0 ..< (xMax - xMin) div 16: let srcPos = p + dx * x.float32 + dy * y.float32 sx = srcPos.x.int diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index 1932819..c46af28 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -55,8 +55,7 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} notAlphaMask = mm_set1_epi32(0x00ffffff) oddMask = mm_set1_epi16(cast[int16](0xff00)) div255 = mm_set1_epi16(cast[int16](0x8081)) - - for _ in countup(i, data.len - 4, 4): + for _ in 0 ..< data.len div 4: var color = mm_loadu_si128(data[i].addr) alpha = mm_and_si128(color, alphaMask) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 9bc39e8..ac8a7a6 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -246,12 +246,12 @@ proc ceil*(mask: Mask) {.raises: [].} = var i: int when defined(amd64) and not defined(pixieNoSimd): let - vZero = mm_setzero_si128() - vMax = mm_set1_epi32(cast[int32](uint32.high)) - for _ in countup(0, mask.data.len - 16, 16): + zeroVec = mm_setzero_si128() + vec255 = mm_set1_epi32(cast[int32](uint32.high)) + for _ in 0 ..< mask.data.len div 16: var values = mm_loadu_si128(mask.data[i].addr) - values = mm_cmpeq_epi8(values, vZero) - values = mm_andnot_si128(values, vMax) + values = mm_cmpeq_epi8(values, zeroVec) + values = mm_andnot_si128(values, vec255) mm_storeu_si128(mask.data[i].addr, values) i += 16 diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index ad34dfa..ab5df36 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1258,10 +1258,10 @@ proc computeCoverages( var i = fillStart when defined(amd64) and not defined(pixieNoSimd): let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage)) - for j in countup(i, fillStart + fillLen - 16, 16): - var coverage = mm_loadu_si128(coverages[j - startX].addr) + for _ in 0 ..< fillLen div 16: + var coverage = mm_loadu_si128(coverages[i - startX].addr) coverage = mm_add_epi8(coverage, sampleCoverageVec) - mm_storeu_si128(coverages[j - startX].addr, coverage) + mm_storeu_si128(coverages[i - startX].addr, coverage) i += 16 for j in i ..< fillStart + fillLen: coverages[j - startX] += sampleCoverage @@ -1296,7 +1296,7 @@ proc fillCoverage( vec255 = mm_set1_epi32(cast[int32](uint32.high)) zeroVec = mm_setzero_si128() colorVec = mm_set1_epi32(cast[int32](rgbx)) - for _ in countup(x, startX + coverages.len - 16, 16): + for _ in 0 ..< coverages.len div 16: let index = image.dataIndex(x, y) coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr) @@ -1386,7 +1386,7 @@ proc fillCoverage( let maskerSimd = blendMode.maskerSimd() zeroVec = mm_setzero_si128() - for _ in countup(x, startX + coverages.len - 16, 16): + for _ in 0 ..< coverages.len div 16: let index = mask.dataIndex(x, y) coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr) @@ -1448,7 +1448,7 @@ proc fillHits( let blenderSimd = blendMode.blenderSimd() colorVec = mm_set1_epi32(cast[int32](rgbx)) - for _ in countup(fillStart, fillLen - 16, 16): + for _ in 0 ..< fillLen div 16: let index = image.dataIndex(x, y) for i in 0 ..< 4: let backdrop = mm_loadu_si128(image.data[index + i * 4].addr) @@ -1497,7 +1497,7 @@ proc fillHits( let maskerSimd = blendMode.maskerSimd() valueVec = mm_set1_epi8(cast[int8](255)) - for _ in countup(fillStart, fillLen - 16, 16): + for _ in 0 ..< fillLen div 16: let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr) mm_storeu_si128( mask.data[mask.dataIndex(x, y)].addr, diff --git a/tests/benchmark_blends.nim b/tests/benchmark_blends.nim index 2052f85..d5f1f97 100644 --- a/tests/benchmark_blends.nim +++ b/tests/benchmark_blends.nim @@ -142,8 +142,10 @@ when defined(amd64) and not defined(pixieNoSimd): reset() timeIt "blendNormal [simd]": - for i in countup(0, backdrop.data.len - 4, 4): + var i: int + while i < backdrop.data.len - 4: let b = mm_loadu_si128(backdrop.data[i].addr) s = mm_loadu_si128(source.data[i].addr) mm_storeu_si128(backdrop.data[i].addr, blendNormalSimd(b, s)) + i += 4