mask magnifyBy2 simd + copyMem
This commit is contained in:
parent
a6ed55592c
commit
39e37c357a
3 changed files with 46 additions and 13 deletions
|
@ -394,26 +394,26 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
||||||
var x: int
|
var x: int
|
||||||
when defined(amd64) and not defined(pixieNoSimd):
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
if scale == 2:
|
if scale == 2:
|
||||||
let mask = cast[M128i]([uint32.high, 0, 0, 0])
|
while x <= image.width - 4:
|
||||||
for _ in countup(0, image.width - 4, 2):
|
|
||||||
let
|
let
|
||||||
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
|
values = mm_loadu_si128(image.data[image.dataIndex(x, y)].addr)
|
||||||
first = mm_and_si128(values, mask)
|
lo = mm_unpacklo_epi32(values, mm_setzero_si128())
|
||||||
second = mm_and_si128(mm_srli_si128(values, 4), mask)
|
hi = mm_unpackhi_epi32(values, mm_setzero_si128())
|
||||||
combined = mm_or_si128(first, mm_slli_si128(second, 8))
|
|
||||||
doubled = mm_or_si128(combined, mm_slli_si128(combined, 4))
|
|
||||||
mm_storeu_si128(
|
mm_storeu_si128(
|
||||||
result.data[result.dataIndex(x * scale, y * scale)].addr,
|
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
|
||||||
doubled
|
mm_or_si128(lo, mm_slli_si128(lo, 4))
|
||||||
)
|
)
|
||||||
x += 2
|
mm_storeu_si128(
|
||||||
for _ in x ..< image.width:
|
result.data[result.dataIndex(x * scale + 4, y * scale)].addr,
|
||||||
|
mm_or_si128(hi, mm_slli_si128(hi, 4))
|
||||||
|
)
|
||||||
|
x += 4
|
||||||
|
for x in x ..< image.width:
|
||||||
let
|
let
|
||||||
rgbx = image.unsafe[x, y]
|
rgbx = image.unsafe[x, y]
|
||||||
resultIdx = result.dataIndex(x * scale, y * scale)
|
resultIdx = result.dataIndex(x * scale, y * scale)
|
||||||
for i in 0 ..< scale:
|
for i in 0 ..< scale:
|
||||||
result.data[resultIdx + i] = rgbx
|
result.data[resultIdx + i] = rgbx
|
||||||
inc x
|
|
||||||
# Copy that row of pixels into (scale - 1) more rows
|
# Copy that row of pixels into (scale - 1) more rows
|
||||||
let rowStart = result.dataIndex(0, y * scale)
|
let rowStart = result.dataIndex(0, y * scale)
|
||||||
for i in 1 ..< scale:
|
for i in 1 ..< scale:
|
||||||
|
|
|
@ -166,14 +166,41 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
||||||
|
|
||||||
let scale = 2 ^ power
|
let scale = 2 ^ power
|
||||||
result = newMask(mask.width * scale, mask.height * scale)
|
result = newMask(mask.width * scale, mask.height * scale)
|
||||||
for y in 0 ..< result.height:
|
|
||||||
for x in 0 ..< mask.width:
|
for y in 0 ..< mask.height:
|
||||||
|
# Write one row of values duplicated by scale
|
||||||
|
var x: int
|
||||||
|
when defined(amd64) and not defined(pixieNoSimd):
|
||||||
|
if scale == 2:
|
||||||
|
while x <= mask.width - 16:
|
||||||
|
let
|
||||||
|
values = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
|
||||||
|
lo = mm_unpacklo_epi8(values, mm_setzero_si128())
|
||||||
|
hi = mm_unpacklo_epi8(values, mm_setzero_si128())
|
||||||
|
mm_storeu_si128(
|
||||||
|
result.data[result.dataIndex(x * scale + 0, y * scale)].addr,
|
||||||
|
mm_or_si128(lo, mm_slli_si128(lo, 1))
|
||||||
|
)
|
||||||
|
mm_storeu_si128(
|
||||||
|
result.data[result.dataIndex(x * scale + 16, y * scale)].addr,
|
||||||
|
mm_or_si128(hi, mm_slli_si128(hi, 1))
|
||||||
|
)
|
||||||
|
x += 16
|
||||||
|
for x in x ..< mask.width:
|
||||||
let
|
let
|
||||||
value = mask.unsafe[x, y div scale]
|
value = mask.unsafe[x, y div scale]
|
||||||
scaledX = x * scale
|
scaledX = x * scale
|
||||||
idx = result.dataIndex(scaledX, y)
|
idx = result.dataIndex(scaledX, y)
|
||||||
for i in 0 ..< scale:
|
for i in 0 ..< scale:
|
||||||
result.data[idx + i] = value
|
result.data[idx + i] = value
|
||||||
|
# Copy that row of values into (scale - 1) more rows
|
||||||
|
let rowStart = result.dataIndex(0, y * scale)
|
||||||
|
for i in 1 ..< scale:
|
||||||
|
copyMem(
|
||||||
|
result.data[rowStart + result.width * i].addr,
|
||||||
|
result.data[rowStart].addr,
|
||||||
|
result.width * 4
|
||||||
|
)
|
||||||
|
|
||||||
proc fillUnsafe*(
|
proc fillUnsafe*(
|
||||||
data: var seq[uint8], value: uint8, start, len: int
|
data: var seq[uint8], value: uint8, start, len: int
|
||||||
|
|
|
@ -13,6 +13,12 @@ timeIt "minifyBy2":
|
||||||
|
|
||||||
reset()
|
reset()
|
||||||
|
|
||||||
|
timeIt "magnifyBy2":
|
||||||
|
let magnified = mask.magnifyBy2()
|
||||||
|
doAssert magnified[0, 0] == 63
|
||||||
|
|
||||||
|
reset()
|
||||||
|
|
||||||
timeIt "invert":
|
timeIt "invert":
|
||||||
mask.invert()
|
mask.invert()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue