remove most countups

This commit is contained in:
Ryan Oldenburg 2021-11-19 12:31:09 -06:00
parent 2a54de7604
commit f1b04ca441
7 changed files with 58 additions and 55 deletions

View file

@ -408,8 +408,8 @@ proc draw(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) =
let points = points.split(" ")
if points.len mod 2 != 0:
failInvalid()
for i in countup(0, points.len - 2, 2):
vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1])))
for i in 0 ..< points.len div 2:
vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1])))
if vecs.len == 0:
failInvalid()

View file

@ -410,8 +410,8 @@ proc drawInternal(img: Image, node: XmlNode, ctxStack: var seq[Ctx]) =
let points = points.split(" ")
if points.len mod 2 != 0:
failInvalid()
for i in countup(0, points.len - 2, 2):
vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1])))
for i in 0 ..< points.len div 2:
vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1])))
if vecs.len == 0:
failInvalid()

View file

@ -28,16 +28,19 @@ proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height)
var i: int
when defined(amd64) and not defined(pixieNoSimd):
for _ in countup(0, mask.data.len - 16, 4):
var alphas = unpackAlphaValues(mm_loadu_si128(mask.data[i].addr))
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8))
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16))
mm_storeu_si128(result.data[i].addr, alphas)
i += 4
for _ in 0 ..< mask.data.len div 16:
var alphas = mm_loadu_si128(mask.data[i].addr)
for j in 0 ..< 4:
var unpacked = unpackAlphaValues(alphas)
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
alphas = mm_srli_si128(alphas, 4)
i += 16
for i in i ..< mask.data.len:
let v = mask.data[i]
result.data[i] = rgbx(v, v, v, v)
for j in i ..< mask.data.len:
let v = mask.data[j]
result.data[j] = rgbx(v, v, v, v)
proc copy*(image: Image): Image {.raises: [PixieError].} =
## Copies the image data into a new image.
@ -104,10 +107,10 @@ proc fillUnsafe*(
var i = start
when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD fill until we run out of room
let m = mm_set1_epi32(cast[int32](rgbx))
for j in countup(i, start + len - 8, 8):
mm_storeu_si128(data[j].addr, m)
mm_storeu_si128(data[j + 4].addr, m)
let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< len div 8:
mm_storeu_si128(data[i + 0].addr, colorVec)
mm_storeu_si128(data[i + 4].addr, colorVec)
i += 8
else:
when sizeof(int) == 8:
@ -115,8 +118,8 @@ proc fillUnsafe*(
let
u32 = cast[uint32](rgbx)
u64 = cast[uint64]([u32, u32])
for j in countup(i, start + len - 2, 2):
cast[ptr uint64](data[j].addr)[] = u64
for _ in 0 ..< len div 2:
cast[ptr uint64](data[i].addr)[] = u64
i += 2
# Fill whatever is left the slow way
for j in i ..< start + len:
@ -135,10 +138,10 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let colorVec = mm_set1_epi32(cast[int32](color))
for j in countup(0, image.data.len - 8, 8):
for _ in 0 ..< image.data.len div 8:
let
values0 = mm_loadu_si128(image.data[j].addr)
values1 = mm_loadu_si128(image.data[j + 4].addr)
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
mask0 = mm_movemask_epi8(mm_cmpeq_epi8(values0, colorVec))
mask1 = mm_movemask_epi8(mm_cmpeq_epi8(values1, colorVec))
if mask0 != uint16.high.int or mask1 != uint16.high.int:
@ -155,17 +158,17 @@ proc isTransparent*(image: Image): bool {.raises: [].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let transparent = mm_setzero_si128()
for j in countup(0, image.data.len - 16, 16):
let zeroVec = mm_setzero_si128()
for _ in 0 ..< image.data.len div 16:
let
values0 = mm_loadu_si128(image.data[j].addr)
values1 = mm_loadu_si128(image.data[j + 4].addr)
values2 = mm_loadu_si128(image.data[j + 8].addr)
values3 = mm_loadu_si128(image.data[j + 12].addr)
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
values2 = mm_loadu_si128(image.data[i + 8].addr)
values3 = mm_loadu_si128(image.data[i + 12].addr)
values01 = mm_or_si128(values0, values1)
values23 = mm_or_si128(values2, values3)
values = mm_or_si128(values01, values23)
mask = mm_movemask_epi8(mm_cmpeq_epi8(values, transparent))
mask = mm_movemask_epi8(mm_cmpeq_epi8(values, zeroVec))
if mask != uint16.high.int:
return false
i += 16
@ -416,9 +419,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
for _ in countup(0, byteLen - 16, 16):
opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
for _ in 0 ..< byteLen div 16:
when type(target) is Image:
let index = i div 4
else:
@ -433,8 +435,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
valuesOdd = mm_and_si128(values, oddMask)
# values * opacity
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
# div 255
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
@ -465,21 +467,21 @@ proc invert*(target: Image | Mask) {.raises: [].} =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let v255 = mm_set1_epi8(cast[int8](255))
let vec255 = mm_set1_epi8(cast[int8](255))
when type(target) is Image:
let byteLen = target.data.len * 4
else:
let byteLen = target.data.len
for _ in countup(0, byteLen - 16, 16):
for _ in 0 ..< byteLen div 16:
when type(target) is Image:
let index = i div 4
else:
let index = i
var values = mm_loadu_si128(target.data[index].addr)
values = mm_sub_epi8(v255, values)
values = mm_sub_epi8(vec255, values)
mm_storeu_si128(target.data[index].addr, values)
i += 16
@ -568,7 +570,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
for _ in countup(0, image.data.len - 16, 16):
for _ in 0 ..< image.data.len div 16:
var
a = mm_loadu_si128(image.data[i + 0].addr)
b = mm_loadu_si128(image.data[i + 4].addr)
@ -817,7 +819,7 @@ proc drawUber(
when type(a) is Image:
if blendMode.hasSimdBlender():
let blenderSimd = blendMode.blenderSimd()
for _ in countup(x, xMax - 16, 16):
for _ in 0 ..< (xMax - xMin) div 16:
let
srcPos = p + dx * x.float32 + dy * y.float32
sx = srcPos.x.int
@ -847,7 +849,7 @@ proc drawUber(
else: # is a Mask
if blendMode.hasSimdMasker():
let maskerSimd = blendMode.maskerSimd()
for _ in countup(x, xMax - 16, 16):
for _ in 0 ..< (xMax - xMin) div 16:
let
srcPos = p + dx * x.float32 + dy * y.float32
sx = srcPos.x.int

View file

@ -55,8 +55,7 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
notAlphaMask = mm_set1_epi32(0x00ffffff)
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
for _ in countup(i, data.len - 4, 4):
for _ in 0 ..< data.len div 4:
var
color = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(color, alphaMask)

View file

@ -246,12 +246,12 @@ proc ceil*(mask: Mask) {.raises: [].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
vZero = mm_setzero_si128()
vMax = mm_set1_epi32(cast[int32](uint32.high))
for _ in countup(0, mask.data.len - 16, 16):
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi32(cast[int32](uint32.high))
for _ in 0 ..< mask.data.len div 16:
var values = mm_loadu_si128(mask.data[i].addr)
values = mm_cmpeq_epi8(values, vZero)
values = mm_andnot_si128(values, vMax)
values = mm_cmpeq_epi8(values, zeroVec)
values = mm_andnot_si128(values, vec255)
mm_storeu_si128(mask.data[i].addr, values)
i += 16

View file

@ -1258,10 +1258,10 @@ proc computeCoverages(
var i = fillStart
when defined(amd64) and not defined(pixieNoSimd):
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
for j in countup(i, fillStart + fillLen - 16, 16):
var coverage = mm_loadu_si128(coverages[j - startX].addr)
for _ in 0 ..< fillLen div 16:
var coverage = mm_loadu_si128(coverages[i - startX].addr)
coverage = mm_add_epi8(coverage, sampleCoverageVec)
mm_storeu_si128(coverages[j - startX].addr, coverage)
mm_storeu_si128(coverages[i - startX].addr, coverage)
i += 16
for j in i ..< fillStart + fillLen:
coverages[j - startX] += sampleCoverage
@ -1296,7 +1296,7 @@ proc fillCoverage(
vec255 = mm_set1_epi32(cast[int32](uint32.high))
zeroVec = mm_setzero_si128()
colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(x, startX + coverages.len - 16, 16):
for _ in 0 ..< coverages.len div 16:
let
index = image.dataIndex(x, y)
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
@ -1386,7 +1386,7 @@ proc fillCoverage(
let
maskerSimd = blendMode.maskerSimd()
zeroVec = mm_setzero_si128()
for _ in countup(x, startX + coverages.len - 16, 16):
for _ in 0 ..< coverages.len div 16:
let
index = mask.dataIndex(x, y)
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
@ -1448,7 +1448,7 @@ proc fillHits(
let
blenderSimd = blendMode.blenderSimd()
colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(fillStart, fillLen - 16, 16):
for _ in 0 ..< fillLen div 16:
let index = image.dataIndex(x, y)
for i in 0 ..< 4:
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
@ -1497,7 +1497,7 @@ proc fillHits(
let
maskerSimd = blendMode.maskerSimd()
valueVec = mm_set1_epi8(cast[int8](255))
for _ in countup(fillStart, fillLen - 16, 16):
for _ in 0 ..< fillLen div 16:
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
mm_storeu_si128(
mask.data[mask.dataIndex(x, y)].addr,

View file

@ -142,8 +142,10 @@ when defined(amd64) and not defined(pixieNoSimd):
reset()
timeIt "blendNormal [simd]":
for i in countup(0, backdrop.data.len - 4, 4):
var i: int
while i < backdrop.data.len - 4:
let
b = mm_loadu_si128(backdrop.data[i].addr)
s = mm_loadu_si128(source.data[i].addr)
mm_storeu_si128(backdrop.data[i].addr, blendNormalSimd(b, s))
i += 4