simd macro works on signature not just name, split applyOpacity + invert
This commit is contained in:
parent
044ebdca78
commit
2ace8e5e9f
|
@ -320,38 +320,38 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
|||
result.width * 4
|
||||
)
|
||||
|
||||
proc applyOpacity*(target: Image, opacity: float32) {.hasSimd, raises: [].} =
|
||||
proc applyOpacity*(image: Image, opacity: float32) {.hasSimd, raises: [].} =
|
||||
## Multiplies alpha of the image by opacity.
|
||||
let opacity = round(255 * opacity).uint16
|
||||
if opacity == 255:
|
||||
return
|
||||
|
||||
if opacity == 0:
|
||||
target.fill(rgbx(0, 0, 0, 0))
|
||||
image.fill(rgbx(0, 0, 0, 0))
|
||||
return
|
||||
|
||||
for i in 0 ..< target.data.len:
|
||||
var rgbx = target.data[i]
|
||||
for i in 0 ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
target.data[i] = rgbx
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc invert*(target: Image) {.hasSimd, raises: [].} =
|
||||
proc invert*(image: Image) {.hasSimd, raises: [].} =
|
||||
## Inverts all of the colors and alpha.
|
||||
for i in 0 ..< target.data.len:
|
||||
var rgbx = target.data[i]
|
||||
for i in 0 ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = 255 - rgbx.r
|
||||
rgbx.g = 255 - rgbx.g
|
||||
rgbx.b = 255 - rgbx.b
|
||||
rgbx.a = 255 - rgbx.a
|
||||
target.data[i] = rgbx
|
||||
image.data[i] = rgbx
|
||||
|
||||
# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
|
||||
# is not a valid premultiplied alpha color.
|
||||
# We need to convert back to premultiplied alpha after inverting.
|
||||
target.data.toPremultipliedAlpha()
|
||||
image.data.toPremultipliedAlpha()
|
||||
|
||||
proc blur*(
|
||||
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import common, internal, simd, vmath
|
||||
import common, internal, simd, system/memory, vmath
|
||||
|
||||
export Mask, newMask
|
||||
|
||||
|
@ -165,18 +165,18 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
|||
result.width
|
||||
)
|
||||
|
||||
proc applyOpacity*(target: Mask, opacity: float32) {.hasSimd, raises: [].} =
|
||||
proc applyOpacity*(mask: Mask, opacity: float32) {.hasSimd, raises: [].} =
|
||||
## Multiplies alpha of the image by opacity.
|
||||
let opacity = round(255 * opacity).uint16
|
||||
if opacity == 255:
|
||||
return
|
||||
|
||||
if opacity == 0:
|
||||
target.fill(0)
|
||||
mask.fill(0)
|
||||
return
|
||||
|
||||
for i in 0 ..< target.data.len:
|
||||
target.data[i] = ((target.data[i] * opacity) div 255).uint8
|
||||
for i in 0 ..< mask.data.len:
|
||||
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8
|
||||
|
||||
proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
||||
## Gets a interpolated value with float point coordinates.
|
||||
|
@ -206,10 +206,10 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
|||
else:
|
||||
topMix
|
||||
|
||||
proc invert*(target: Mask) {.hasSimd, raises: [].} =
|
||||
proc invert*(mask: Mask) {.hasSimd, raises: [].} =
|
||||
## Inverts all of the values - creates a negative of the mask.
|
||||
for i in 0 ..< target.data.len:
|
||||
target.data[i] = 255 - target.data[i]
|
||||
for i in 0 ..< mask.data.len:
|
||||
mask.data[i] = 255 - mask.data[i]
|
||||
|
||||
proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
||||
## Grows the mask by spread.
|
||||
|
|
|
@ -3,7 +3,7 @@ import std/macros, std/tables
|
|||
var simdProcs* {.compiletime.}: Table[string, NimNode]
|
||||
|
||||
proc procName(procedure: NimNode): string =
|
||||
## Given a procedure signature returns only name string.
|
||||
## Given a procedure this returns the name as a string.
|
||||
let nameNode = procedure[0]
|
||||
if nameNode.kind == nnkPostfix:
|
||||
nameNode[1].strVal
|
||||
|
@ -11,16 +11,30 @@ proc procName(procedure: NimNode): string =
|
|||
nameNode.strVal
|
||||
|
||||
proc procArguments(procedure: NimNode): seq[NimNode] =
|
||||
## Given a procedure signature gets the arguments as a list.
|
||||
## Given a procedure this gets the arguments as a list.
|
||||
for i, arg in procedure[3]:
|
||||
if i > 0:
|
||||
for j in 0 ..< arg.len - 2:
|
||||
result.add(arg[j])
|
||||
|
||||
proc procReturnType(procedure: NimNode): NimNode =
|
||||
## Given a procedure signature gets the return type.
|
||||
## Given a procedure this gets the return type.
|
||||
procedure[3][0]
|
||||
|
||||
proc procSignature(procName: string, procedure: NimNode): string =
|
||||
## Given a procedure this returns the signature as a string.
|
||||
result = procName & "("
|
||||
|
||||
for i, arg in procedure[3]:
|
||||
if i > 0:
|
||||
for j in 0 ..< arg.len - 2:
|
||||
result &= arg[^2].repr & ", "
|
||||
|
||||
if procedure[3].len > 1:
|
||||
result = result[0 ..^ 3]
|
||||
|
||||
result &= ")"
|
||||
|
||||
proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
|
||||
## Produces a procedure call with arguments.
|
||||
let
|
||||
|
@ -38,8 +52,8 @@ proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
|
|||
return `call`
|
||||
|
||||
macro simd*(procedure: untyped) =
|
||||
let name = procedure.procName()
|
||||
simdProcs[name] = procedure.copy()
|
||||
let signature = procSignature(procedure.procName(), procedure)
|
||||
simdProcs[signature] = procedure.copy()
|
||||
return procedure
|
||||
|
||||
macro hasSimd*(procedure: untyped) =
|
||||
|
@ -53,25 +67,31 @@ macro hasSimd*(procedure: untyped) =
|
|||
callAvx = callAndReturn(ident(nameAvx), procedure)
|
||||
callAvx2 = callAndReturn(ident(nameAvx2), procedure)
|
||||
|
||||
var body = newStmtList()
|
||||
var
|
||||
foundSimd: bool
|
||||
body = newStmtList()
|
||||
|
||||
when defined(amd64) and not defined(pixieNoAvx):
|
||||
if nameAvx2 in simdProcs:
|
||||
if procSignature(nameAvx2, procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
body.add quote do:
|
||||
if cpuHasAvx2:
|
||||
`callAvx2`
|
||||
|
||||
if nameAvx in simdProcs:
|
||||
if procSignature(nameAvx, procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
body.add quote do:
|
||||
if cpuHasAvx2:
|
||||
`callAvx`
|
||||
|
||||
if nameSse2 in simdProcs:
|
||||
let bodySse2 = simdProcs[nameSse2][6]
|
||||
if procSignature(nameSse2, procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
let bodySse2 = simdProcs[procSignature(nameSse2, procedure)][6]
|
||||
body.add quote do:
|
||||
`bodySse2`
|
||||
elif nameNeon in simdProcs:
|
||||
let bodyNeon = simdProcs[nameNeon][6]
|
||||
elif procSignature(nameNeon, procedure) in simdProcs:
|
||||
foundSimd = true
|
||||
let bodyNeon = simdProcs[procSignature(nameNeon, procedure)][6]
|
||||
body.add quote do:
|
||||
`bodyNeon`
|
||||
else:
|
||||
|
@ -80,4 +100,7 @@ macro hasSimd*(procedure: untyped) =
|
|||
|
||||
procedure[6] = body
|
||||
|
||||
if not foundSimd:
|
||||
echo "No SIMD found for " & procSignature(name, procedure)
|
||||
|
||||
return procedure
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import chroma, internal, nimsimd/sse2, pixie/common, vmath
|
||||
import chroma, internal, nimsimd/sse2, pixie/common, system/memory, vmath
|
||||
|
||||
when defined(release):
|
||||
{.push checks: off.}
|
||||
|
@ -244,32 +244,24 @@ proc newMaskSse2*(image: Image): Mask {.simd.} =
|
|||
for i in i ..< image.data.len:
|
||||
result.data[i] = image.data[i].a
|
||||
|
||||
proc invertSse2*(target: Image | Mask) {.simd.} =
|
||||
proc invertSse2*(image: Image) {.simd.} =
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](target.data[0].addr)
|
||||
p = cast[uint](image.data[0].addr)
|
||||
# Align to 16 bytes
|
||||
while i < target.data.len and (p and 15) != 0:
|
||||
when target is Image:
|
||||
var rgbx = target.data[i]
|
||||
rgbx.r = 255 - rgbx.r
|
||||
rgbx.g = 255 - rgbx.g
|
||||
rgbx.b = 255 - rgbx.b
|
||||
rgbx.a = 255 - rgbx.a
|
||||
target.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
else:
|
||||
target.data[i] = 255 - target.data[i]
|
||||
inc i
|
||||
inc p
|
||||
while i < image.data.len and (p and 15) != 0:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = 255 - rgbx.r
|
||||
rgbx.g = 255 - rgbx.g
|
||||
rgbx.b = 255 - rgbx.b
|
||||
rgbx.a = 255 - rgbx.a
|
||||
image.data[i] = rgbx
|
||||
inc i
|
||||
p += 4
|
||||
|
||||
let vec255 = mm_set1_epi8(255)
|
||||
|
||||
when target is Image:
|
||||
let iterations = target.data.len div 16
|
||||
else:
|
||||
let iterations = target.data.len div 64
|
||||
let
|
||||
vec255 = mm_set1_epi8(255)
|
||||
iterations = image.data.len div 16
|
||||
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
|
@ -282,24 +274,47 @@ proc invertSse2*(target: Image | Mask) {.simd.} =
|
|||
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
||||
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
||||
p += 64
|
||||
i += 16 * iterations
|
||||
|
||||
when target is Image:
|
||||
i += 16 * iterations
|
||||
for i in i ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = 255 - rgbx.r
|
||||
rgbx.g = 255 - rgbx.g
|
||||
rgbx.b = 255 - rgbx.b
|
||||
rgbx.a = 255 - rgbx.a
|
||||
image.data[i] = rgbx
|
||||
|
||||
for i in i ..< target.data.len:
|
||||
var rgbx = target.data[i]
|
||||
rgbx.r = 255 - rgbx.r
|
||||
rgbx.g = 255 - rgbx.g
|
||||
rgbx.b = 255 - rgbx.b
|
||||
rgbx.a = 255 - rgbx.a
|
||||
target.data[i] = rgbx
|
||||
toPremultipliedAlphaSse2(image.data)
|
||||
|
||||
toPremultipliedAlphaSse2(target.data)
|
||||
else:
|
||||
i += 64 * iterations
|
||||
proc invertSse2*(mask: Mask) {.simd.} =
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](mask.data[0].addr)
|
||||
# Align to 16 bytes
|
||||
while i < mask.data.len and (p and 15) != 0:
|
||||
mask.data[i] = 255 - mask.data[i]
|
||||
inc i
|
||||
inc p
|
||||
|
||||
for i in i ..< target.data.len:
|
||||
target.data[i] = 255 - target.data[i]
|
||||
let
|
||||
vec255 = mm_set1_epi8(255)
|
||||
iterations = mask.data.len div 64
|
||||
|
||||
for _ in 0 ..< iterations:
|
||||
let
|
||||
a = mm_load_si128(cast[pointer](p))
|
||||
b = mm_load_si128(cast[pointer](p + 16))
|
||||
c = mm_load_si128(cast[pointer](p + 32))
|
||||
d = mm_load_si128(cast[pointer](p + 48))
|
||||
mm_store_si128(cast[pointer](p), mm_sub_epi8(vec255, a))
|
||||
mm_store_si128(cast[pointer](p + 16), mm_sub_epi8(vec255, b))
|
||||
mm_store_si128(cast[pointer](p + 32), mm_sub_epi8(vec255, c))
|
||||
mm_store_si128(cast[pointer](p + 48), mm_sub_epi8(vec255, d))
|
||||
p += 64
|
||||
i += 64 * iterations
|
||||
|
||||
for i in i ..< mask.data.len:
|
||||
mask.data[i] = 255 - mask.data[i]
|
||||
|
||||
proc ceilSse2*(mask: Mask) {.simd.} =
|
||||
var
|
||||
|
@ -322,34 +337,69 @@ proc ceilSse2*(mask: Mask) {.simd.} =
|
|||
if mask.data[i] != 0:
|
||||
mask.data[i] = 255
|
||||
|
||||
proc applyOpacitySse2*(target: Image | Mask, opacity: float32) {.simd.} =
|
||||
proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
|
||||
let opacity = round(255 * opacity).uint16
|
||||
if opacity == 255:
|
||||
return
|
||||
|
||||
if opacity == 0:
|
||||
when target is Image:
|
||||
target.fill(rgbx(0, 0, 0, 0))
|
||||
else:
|
||||
target.fill(0)
|
||||
fillUnsafeSse2(image.data, rgbx(0, 0, 0, 0), 0, image.data.len)
|
||||
return
|
||||
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](target.data[0].addr)
|
||||
len =
|
||||
when target is Image:
|
||||
target.data.len * 4
|
||||
else:
|
||||
target.data.len
|
||||
p = cast[uint](image.data[0].addr)
|
||||
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
div255 = mm_set1_epi16(0x8081)
|
||||
zeroVec = mm_setzero_si128()
|
||||
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
||||
iterations = len div 16
|
||||
for _ in 0 ..< len div 16:
|
||||
iterations = image.data.len div 4
|
||||
for _ in 0 ..< iterations:
|
||||
let values = mm_loadu_si128(cast[pointer](p))
|
||||
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
||||
var
|
||||
valuesEven = mm_slli_epi16(values, 8)
|
||||
valuesOdd = mm_and_si128(values, oddMask)
|
||||
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
|
||||
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)
|
||||
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
|
||||
valuesOdd = mm_srli_epi16(mm_mulhi_epu16(valuesOdd, div255), 7)
|
||||
mm_storeu_si128(
|
||||
cast[pointer](p),
|
||||
mm_or_si128(valuesEven, mm_slli_epi16(valuesOdd, 8))
|
||||
)
|
||||
p += 16
|
||||
i += 4 * iterations
|
||||
|
||||
for i in i ..< image.data.len:
|
||||
var rgbx = image.data[i]
|
||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
image.data[i] = rgbx
|
||||
|
||||
proc applyOpacitySse2*(mask: Mask, opacity: float32) {.simd.} =
|
||||
let opacity = round(255 * opacity).uint16
|
||||
if opacity == 255:
|
||||
return
|
||||
|
||||
if opacity == 0:
|
||||
nimSetMem(mask.data[0].addr, 0.cint, mask.data.len)
|
||||
|
||||
var
|
||||
i: int
|
||||
p = cast[uint](mask.data[0].addr)
|
||||
|
||||
let
|
||||
oddMask = mm_set1_epi16(0xff00)
|
||||
div255 = mm_set1_epi16(0x8081)
|
||||
zeroVec = mm_setzero_si128()
|
||||
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
|
||||
iterations = mask.data.len div 16
|
||||
for _ in 0 ..< iterations:
|
||||
let values = mm_loadu_si128(cast[pointer](p))
|
||||
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
|
||||
var
|
||||
|
@ -366,17 +416,8 @@ proc applyOpacitySse2*(target: Image | Mask, opacity: float32) {.simd.} =
|
|||
p += 16
|
||||
i += 16 * iterations
|
||||
|
||||
when target is Image:
|
||||
for i in i div 4 ..< target.data.len:
|
||||
var rgbx = target.data[i]
|
||||
rgbx.r = ((rgbx.r * opacity) div 255).uint8
|
||||
rgbx.g = ((rgbx.g * opacity) div 255).uint8
|
||||
rgbx.b = ((rgbx.b * opacity) div 255).uint8
|
||||
rgbx.a = ((rgbx.a * opacity) div 255).uint8
|
||||
target.data[i] = rgbx
|
||||
else:
|
||||
for i in i ..< target.data.len:
|
||||
target.data[i] = ((target.data[i] * opacity) div 255).uint8
|
||||
for i in i ..< mask.data.len:
|
||||
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8
|
||||
|
||||
when defined(release):
|
||||
{.pop.}
|
||||
|
|
Loading…
Reference in a new issue