simd macro works on signature not just name, split applyOpacity + invert

This commit is contained in:
Ryan Oldenburg 2022-07-22 22:27:18 -05:00
parent 044ebdca78
commit 2ace8e5e9f
4 changed files with 155 additions and 91 deletions

View file

@ -320,38 +320,38 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
result.width * 4
)
proc applyOpacity*(target: Image, opacity: float32) {.hasSimd, raises: [].} =
proc applyOpacity*(image: Image, opacity: float32) {.hasSimd, raises: [].} =
## Multiplies alpha of the image by opacity.
let opacity = round(255 * opacity).uint16
if opacity == 255:
return
if opacity == 0:
target.fill(rgbx(0, 0, 0, 0))
image.fill(rgbx(0, 0, 0, 0))
return
for i in 0 ..< target.data.len:
var rgbx = target.data[i]
for i in 0 ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
target.data[i] = rgbx
image.data[i] = rgbx
proc invert*(target: Image) {.hasSimd, raises: [].} =
proc invert*(image: Image) {.hasSimd, raises: [].} =
## Inverts all of the colors and alpha.
for i in 0 ..< target.data.len:
var rgbx = target.data[i]
for i in 0 ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
target.data[i] = rgbx
image.data[i] = rgbx
# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
# is not a valid premultiplied alpha color.
# We need to convert back to premultiplied alpha after inverting.
target.data.toPremultipliedAlpha()
image.data.toPremultipliedAlpha()
proc blur*(
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)

View file

@ -1,4 +1,4 @@
import common, internal, simd, vmath
import common, internal, simd, system/memory, vmath
export Mask, newMask
@ -165,18 +165,18 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
result.width
)
proc applyOpacity*(target: Mask, opacity: float32) {.hasSimd, raises: [].} =
proc applyOpacity*(mask: Mask, opacity: float32) {.hasSimd, raises: [].} =
## Multiplies alpha of the image by opacity.
let opacity = round(255 * opacity).uint16
if opacity == 255:
return
if opacity == 0:
target.fill(0)
mask.fill(0)
return
for i in 0 ..< target.data.len:
target.data[i] = ((target.data[i] * opacity) div 255).uint8
for i in 0 ..< mask.data.len:
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8
proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
## Gets a interpolated value with float point coordinates.
@ -206,10 +206,10 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
else:
topMix
proc invert*(target: Mask) {.hasSimd, raises: [].} =
proc invert*(mask: Mask) {.hasSimd, raises: [].} =
## Inverts all of the values - creates a negative of the mask.
for i in 0 ..< target.data.len:
target.data[i] = 255 - target.data[i]
for i in 0 ..< mask.data.len:
mask.data[i] = 255 - mask.data[i]
proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
## Grows the mask by spread.

View file

@ -3,7 +3,7 @@ import std/macros, std/tables
var simdProcs* {.compiletime.}: Table[string, NimNode]
proc procName(procedure: NimNode): string =
## Given a procedure signature returns only name string.
## Given a procedure this returns the name as a string.
let nameNode = procedure[0]
if nameNode.kind == nnkPostfix:
nameNode[1].strVal
@ -11,16 +11,30 @@ proc procName(procedure: NimNode): string =
nameNode.strVal
proc procArguments(procedure: NimNode): seq[NimNode] =
## Given a procedure signature gets the arguments as a list.
## Given a procedure this gets the arguments as a list.
for i, arg in procedure[3]:
if i > 0:
for j in 0 ..< arg.len - 2:
result.add(arg[j])
proc procReturnType(procedure: NimNode): NimNode =
## Given a procedure signature gets the return type.
## Given a procedure this gets the return type.
procedure[3][0]
proc procSignature(procName: string, procedure: NimNode): string =
## Given a procedure this returns the signature as a string.
result = procName & "("
for i, arg in procedure[3]:
if i > 0:
for j in 0 ..< arg.len - 2:
result &= arg[^2].repr & ", "
if procedure[3].len > 1:
result = result[0 ..^ 3]
result &= ")"
proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
## Produces a procedure call with arguments.
let
@ -38,8 +52,8 @@ proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
return `call`
macro simd*(procedure: untyped) =
let name = procedure.procName()
simdProcs[name] = procedure.copy()
let signature = procSignature(procedure.procName(), procedure)
simdProcs[signature] = procedure.copy()
return procedure
macro hasSimd*(procedure: untyped) =
@ -53,25 +67,31 @@ macro hasSimd*(procedure: untyped) =
callAvx = callAndReturn(ident(nameAvx), procedure)
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
var body = newStmtList()
var
foundSimd: bool
body = newStmtList()
when defined(amd64) and not defined(pixieNoAvx):
if nameAvx2 in simdProcs:
if procSignature(nameAvx2, procedure) in simdProcs:
foundSimd = true
body.add quote do:
if cpuHasAvx2:
`callAvx2`
if nameAvx in simdProcs:
if procSignature(nameAvx, procedure) in simdProcs:
foundSimd = true
body.add quote do:
if cpuHasAvx2:
`callAvx`
if nameSse2 in simdProcs:
let bodySse2 = simdProcs[nameSse2][6]
if procSignature(nameSse2, procedure) in simdProcs:
foundSimd = true
let bodySse2 = simdProcs[procSignature(nameSse2, procedure)][6]
body.add quote do:
`bodySse2`
elif nameNeon in simdProcs:
let bodyNeon = simdProcs[nameNeon][6]
elif procSignature(nameNeon, procedure) in simdProcs:
foundSimd = true
let bodyNeon = simdProcs[procSignature(nameNeon, procedure)][6]
body.add quote do:
`bodyNeon`
else:
@ -80,4 +100,7 @@ macro hasSimd*(procedure: untyped) =
procedure[6] = body
if not foundSimd:
echo "No SIMD found for " & procSignature(name, procedure)
return procedure

View file

@ -1,4 +1,4 @@
import chroma, internal, nimsimd/sse2, pixie/common, vmath
import chroma, internal, nimsimd/sse2, pixie/common, system/memory, vmath
when defined(release):
{.push checks: off.}
@ -244,32 +244,24 @@ proc newMaskSse2*(image: Image): Mask {.simd.} =
for i in i ..< image.data.len:
result.data[i] = image.data[i].a
proc invertSse2*(target: Image | Mask) {.simd.} =
proc invertSse2*(image: Image) {.simd.} =
var
i: int
p = cast[uint](target.data[0].addr)
p = cast[uint](image.data[0].addr)
# Align to 16 bytes
while i < target.data.len and (p and 15) != 0:
when target is Image:
var rgbx = target.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
target.data[i] = rgbx
inc i
p += 4
else:
target.data[i] = 255 - target.data[i]
inc i
inc p
while i < image.data.len and (p and 15) != 0:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
image.data[i] = rgbx
inc i
p += 4
let vec255 = mm_set1_epi8(255)
when target is Image:
let iterations = target.data.len div 16
else:
let iterations = target.data.len div 64
let
vec255 = mm_set1_epi8(255)
iterations = image.data.len div 16
for _ in 0 ..< iterations:
let
@ -282,24 +274,47 @@ proc invertSse2*(target: Image | Mask) {.simd.} =
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
p += 64
i += 16 * iterations
when target is Image:
i += 16 * iterations
for i in i ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
image.data[i] = rgbx
for i in i ..< target.data.len:
var rgbx = target.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
target.data[i] = rgbx
toPremultipliedAlphaSse2(image.data)
toPremultipliedAlphaSse2(target.data)
else:
i += 64 * iterations
proc invertSse2*(mask: Mask) {.simd.} =
var
i: int
p = cast[uint](mask.data[0].addr)
# Align to 16 bytes
while i < mask.data.len and (p and 15) != 0:
mask.data[i] = 255 - mask.data[i]
inc i
inc p
for i in i ..< target.data.len:
target.data[i] = 255 - target.data[i]
let
vec255 = mm_set1_epi8(255)
iterations = mask.data.len div 64
for _ in 0 ..< iterations:
let
a = mm_load_si128(cast[pointer](p))
b = mm_load_si128(cast[pointer](p + 16))
c = mm_load_si128(cast[pointer](p + 32))
d = mm_load_si128(cast[pointer](p + 48))
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
p += 64
i += 64 * iterations
for i in i ..< mask.data.len:
mask.data[i] = 255 - mask.data[i]
proc ceilSse2*(mask: Mask) {.simd.} =
var
@ -322,34 +337,69 @@ proc ceilSse2*(mask: Mask) {.simd.} =
if mask.data[i] != 0:
mask.data[i] = 255
proc applyOpacitySse2*(target: Image | Mask, opacity: float32) {.simd.} =
proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
let opacity = round(255 * opacity).uint16
if opacity == 255:
return
if opacity == 0:
when target is Image:
target.fill(rgbx(0, 0, 0, 0))
else:
target.fill(0)
fillUnsafeSse2(image.data, rgbx(0, 0, 0, 0), 0, image.data.len)
return
var
i: int
p = cast[uint](target.data[0].addr)
len =
when target is Image:
target.data.len * 4
else:
target.data.len
p = cast[uint](image.data[0].addr)
let
oddMask = mm_set1_epi16(0xff00)
div255 = mm_set1_epi16(0x8081)
zeroVec = mm_setzero_si128()
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
iterations = len div 16
for _ in 0 ..< len div 16:
iterations = image.data.len div 4
for _ in 0 ..< iterations:
let values = mm_loadu_si128(cast[pointer](p))
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
var
valuesEven = mm_slli_epi16(values, 8)
valuesOdd = mm_and_si128(values, oddMask)
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
mm_storeu_si128(
cast[pointer](p),
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
)
p += 16
i += 4 * iterations
for i in i ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
image.data[i] = rgbx
proc applyOpacitySse2*(mask: Mask, opacity: float32) {.simd.} =
let opacity = round(255 * opacity).uint16
if opacity == 255:
return
if opacity == 0:
nimSetMem(mask.data[0].addr, 0.cint, mask.data.len)
var
i: int
p = cast[uint](mask.data[0].addr)
let
oddMask = mm_set1_epi16(0xff00)
div255 = mm_set1_epi16(0x8081)
zeroVec = mm_setzero_si128()
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
iterations = mask.data.len div 16
for _ in 0 ..< iterations:
let values = mm_loadu_si128(cast[pointer](p))
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
var
@ -366,17 +416,8 @@ proc applyOpacitySse2*(target: Image | Mask, opacity: float32) {.simd.} =
p += 16
i += 16 * iterations
when target is Image:
for i in i div 4 ..< target.data.len:
var rgbx = target.data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
target.data[i] = rgbx
else:
for i in i ..< target.data.len:
target.data[i] = ((target.data[i] * opacity) div 255).uint8
for i in i ..< mask.data.len:
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8
when defined(release):
{.pop.}