no simd for tcc
This commit is contained in:
parent
f16c8226ef
commit
18b814ec65
7 changed files with 40 additions and 38 deletions
|
@ -1,8 +1,8 @@
|
|||
## Blending modes.
|
||||
|
||||
import chroma, common, math
|
||||
import chroma, common, internal, std/math
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
# See https://www.w3.org/TR/compositing-1/
|
||||
|
@ -274,7 +274,7 @@ proc blendSoftLight(backdrop, source: ColorRGBX): ColorRGBX =
|
|||
source = source.rgba()
|
||||
|
||||
var rgba: ColorRGBA
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
vb = mm_setr_ps(
|
||||
backdrop.r.float32,
|
||||
|
@ -479,7 +479,7 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
|
|||
else:
|
||||
raise newException(PixieError, "No masker for " & $blendMode)
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
type
|
||||
BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].}
|
||||
## Function signature returned by blenderSimd.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import chroma, flatty/binny, pixie/common, pixie/images, pixie/masks, sequtils,
|
||||
std/decls, strutils
|
||||
import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
|
||||
pixie/masks, sequtils, std/decls, strutils
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
# This JPEG decoder is loosely based on stb_image which is public domain.
|
||||
|
@ -881,7 +881,7 @@ proc quantizationAndIDCTPass(state: var DecoderState) =
|
|||
for row in 0 ..< w:
|
||||
var data {.byaddr.} = state.components[comp].blocks[row][column]
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
for i in 0 ..< 8: # 8 per pass
|
||||
var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr)
|
||||
q = mm_unpacklo_epi8(q, mm_setzero_si128())
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import blends, bumpy, chroma, common, masks, pixie/internal, vmath
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
const h = 0.5.float32
|
||||
|
@ -29,7 +29,7 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
|
|||
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
|
||||
result = newImage(mask.width, mask.height)
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
for _ in 0 ..< mask.data.len div 16:
|
||||
var alphas = mm_loadu_si128(mask.data[i].addr)
|
||||
for j in 0 ..< 4:
|
||||
|
@ -106,7 +106,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
|
|||
let color = image.data[0]
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let colorVec = mm_set1_epi32(cast[int32](color))
|
||||
for _ in 0 ..< image.data.len div 8:
|
||||
let
|
||||
|
@ -127,7 +127,7 @@ proc isTransparent*(image: Image): bool {.raises: [].} =
|
|||
result = true
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let vecZero = mm_setzero_si128()
|
||||
for _ in 0 ..< image.data.len div 16:
|
||||
let
|
||||
|
@ -254,7 +254,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
|||
)
|
||||
for y in 0 ..< resultEvenHeight:
|
||||
var x: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
first32 = cast[M128i]([uint32.high, 0, 0, 0])
|
||||
|
@ -348,7 +348,7 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
|
|||
for y in 0 ..< image.height:
|
||||
# Write one row of pixels duplicated by scale
|
||||
var x: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if scale == 2:
|
||||
while x <= image.width - 4:
|
||||
let
|
||||
|
@ -391,7 +391,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
|
|||
return
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
when type(target) is Image:
|
||||
let byteLen = target.data.len * 4
|
||||
else:
|
||||
|
@ -447,7 +447,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
|
|||
proc invert*(target: Image) {.raises: [].} =
|
||||
## Inverts all of the colors and alpha.
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let vec255 = mm_set1_epi8(cast[int8](255))
|
||||
let byteLen = target.data.len * 4
|
||||
for _ in 0 ..< byteLen div 16:
|
||||
|
@ -536,7 +536,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
|
|||
result = newMask(image.width, image.height)
|
||||
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
for _ in 0 ..< image.data.len div 16:
|
||||
let
|
||||
a = mm_loadu_si128(image.data[i + 0].addr)
|
||||
|
@ -798,7 +798,7 @@ proc drawUber(
|
|||
)
|
||||
continue
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
case blendMode:
|
||||
of OverwriteBlend:
|
||||
for _ in 0 ..< (xStop - xStart) div 16:
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import chroma, system/memory, vmath
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
|
||||
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
template currentExceptionAsPixieError*(): untyped =
|
||||
|
@ -59,7 +61,7 @@ proc fillUnsafe*(
|
|||
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
|
||||
else:
|
||||
var i = start
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
# When supported, SIMD fill until we run out of room
|
||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
for _ in 0 ..< len div 8:
|
||||
|
@ -93,7 +95,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
|||
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
|
||||
## Converts an image to premultiplied alpha from straight alpha.
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
# When supported, SIMD convert as much as possible
|
||||
let
|
||||
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
|
@ -140,7 +142,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
|
|||
result = true
|
||||
|
||||
var i = start
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
|
||||
|
@ -161,7 +163,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
|
|||
if data[j].a != 255:
|
||||
return false
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
|
||||
## Shuffle the alpha values for these 4 colors to the first 4 bytes
|
||||
let mask = mm_set1_epi32(cast[int32](0xff000000))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import common, internal, vmath
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
type
|
||||
|
@ -87,7 +87,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
|||
result = newMask(src.width div 2, src.height div 2)
|
||||
for y in 0 ..< result.height:
|
||||
var x: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
oddMask = mm_set1_epi16(cast[int16](0xff00))
|
||||
firstByte = cast[M128i](
|
||||
|
@ -169,7 +169,7 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
|
|||
for y in 0 ..< mask.height:
|
||||
# Write one row of values duplicated by scale
|
||||
var x: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if scale == 2:
|
||||
while x <= mask.width - 16:
|
||||
let
|
||||
|
@ -236,7 +236,7 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
|
|||
proc invert*(mask: Mask) {.raises: [].} =
|
||||
## Inverts all of the values - creates a negative of the mask.
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let vec255 = mm_set1_epi8(cast[int8](255))
|
||||
let byteLen = mask.data.len
|
||||
for _ in 0 ..< byteLen div 16:
|
||||
|
@ -312,7 +312,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
|
|||
proc ceil*(mask: Mask) {.raises: [].} =
|
||||
## A value of 0 stays 0. Anything else turns into 255.
|
||||
var i: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let
|
||||
zeroVec = mm_setzero_si128()
|
||||
vec255 = mm_set1_epi32(cast[int32](uint32.high))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import chroma, common, images, vmath
|
||||
import chroma, common, images, internal, vmath
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
type
|
||||
|
@ -122,7 +122,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
|
|||
if at.y == to.y: # Horizontal gradient
|
||||
var x: int
|
||||
while x < image.width:
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if x + 4 <= image.width:
|
||||
var colors: array[4, ColorRGBX]
|
||||
for i in 0 ..< 4:
|
||||
|
@ -153,7 +153,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
|
|||
t = toLineSpace(at, to, xy)
|
||||
rgbx = paint.gradientColor(t)
|
||||
var x: int
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
for _ in 0 ..< image.width div 4:
|
||||
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import blends, bumpy, chroma, common, fenv, images, internal, masks, paints,
|
||||
strutils, vmath
|
||||
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
import nimsimd/sse2
|
||||
|
||||
type
|
||||
|
@ -1296,7 +1296,7 @@ proc computeCoverage(
|
|||
let fillLen = at.int - fillStart
|
||||
if fillLen > 0:
|
||||
var i = fillStart
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
|
||||
for _ in 0 ..< fillLen div 16:
|
||||
var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
|
||||
|
@ -1326,7 +1326,7 @@ proc fillCoverage(
|
|||
blendMode: BlendMode
|
||||
) =
|
||||
var x = startX
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if blendMode.hasSimdBlender():
|
||||
# When supported, SIMD blend as much as possible
|
||||
let
|
||||
|
@ -1445,7 +1445,7 @@ proc fillCoverage(
|
|||
blendMode: BlendMode
|
||||
) =
|
||||
var x = startX
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if blendMode.hasSimdMasker():
|
||||
let
|
||||
maskerSimd = blendMode.maskerSimd()
|
||||
|
@ -1511,7 +1511,7 @@ proc fillHits(
|
|||
continue
|
||||
|
||||
var x = fillStart
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if blendMode.hasSimdBlender():
|
||||
# When supported, SIMD blend as much as possible
|
||||
let colorVec = mm_set1_epi32(cast[int32](rgbx))
|
||||
|
@ -1573,7 +1573,7 @@ proc fillHits(
|
|||
continue
|
||||
|
||||
var x = fillStart
|
||||
when defined(amd64) and not defined(pixieNoSimd):
|
||||
when defined(amd64) and allowSimd:
|
||||
if blendMode.hasSimdMasker():
|
||||
let
|
||||
maskerSimd = blendMode.maskerSimd()
|
||||
|
|
Loading…
Reference in a new issue