no simd for tcc

This commit is contained in:
Ryan Oldenburg 2022-06-02 01:22:15 -05:00
parent f16c8226ef
commit 18b814ec65
7 changed files with 40 additions and 38 deletions

View file

@ -1,8 +1,8 @@
## Blending modes.
import chroma, common, math
import chroma, common, internal, std/math
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
# See https://www.w3.org/TR/compositing-1/
@ -274,7 +274,7 @@ proc blendSoftLight(backdrop, source: ColorRGBX): ColorRGBX =
source = source.rgba()
var rgba: ColorRGBA
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let
vb = mm_setr_ps(
backdrop.r.float32,
@ -479,7 +479,7 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
else:
raise newException(PixieError, "No masker for " & $blendMode)
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
type
BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].}
## Function signature returned by blenderSimd.

View file

@ -1,7 +1,7 @@
import chroma, flatty/binny, pixie/common, pixie/images, pixie/masks, sequtils,
std/decls, strutils
import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
pixie/masks, sequtils, std/decls, strutils
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
# This JPEG decoder is loosely based on stb_image which is public domain.
@ -881,7 +881,7 @@ proc quantizationAndIDCTPass(state: var DecoderState) =
for row in 0 ..< w:
var data {.byaddr.} = state.components[comp].blocks[row][column]
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
for i in 0 ..< 8: # 8 per pass
var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr)
q = mm_unpacklo_epi8(q, mm_setzero_si128())

View file

@ -1,6 +1,6 @@
import blends, bumpy, chroma, common, masks, pixie/internal, vmath
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
const h = 0.5.float32
@ -29,7 +29,7 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height)
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
for _ in 0 ..< mask.data.len div 16:
var alphas = mm_loadu_si128(mask.data[i].addr)
for j in 0 ..< 4:
@ -106,7 +106,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
let color = image.data[0]
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](color))
for _ in 0 ..< image.data.len div 8:
let
@ -127,7 +127,7 @@ proc isTransparent*(image: Image): bool {.raises: [].} =
result = true
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let vecZero = mm_setzero_si128()
for _ in 0 ..< image.data.len div 16:
let
@ -254,7 +254,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
)
for y in 0 ..< resultEvenHeight:
var x: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
first32 = cast[M128i]([uint32.high, 0, 0, 0])
@ -348,7 +348,7 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale
var x: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if scale == 2:
while x <= image.width - 4:
let
@ -391,7 +391,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
return
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
when type(target) is Image:
let byteLen = target.data.len * 4
else:
@ -447,7 +447,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
proc invert*(target: Image) {.raises: [].} =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = target.data.len * 4
for _ in 0 ..< byteLen div 16:
@ -536,7 +536,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
result = newMask(image.width, image.height)
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
for _ in 0 ..< image.data.len div 16:
let
a = mm_loadu_si128(image.data[i + 0].addr)
@ -798,7 +798,7 @@ proc drawUber(
)
continue
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
case blendMode:
of OverwriteBlend:
for _ in 0 ..< (xStop - xStart) div 16:

View file

@ -1,6 +1,8 @@
import chroma, system/memory, vmath
when defined(amd64) and not defined(pixieNoSimd):
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
when defined(amd64) and allowSimd:
import nimsimd/sse2
template currentExceptionAsPixieError*(): untyped =
@ -59,7 +61,7 @@ proc fillUnsafe*(
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
else:
var i = start
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
# When supported, SIMD fill until we run out of room
let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< len div 8:
@ -93,7 +95,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
## Converts an image to premultiplied alpha from straight alpha.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
# When supported, SIMD convert as much as possible
let
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
@ -140,7 +142,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
result = true
var i = start
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let
vec255 = mm_set1_epi32(cast[int32](uint32.high))
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
@ -161,7 +163,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
if data[j].a != 255:
return false
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
## Shuffle the alpha values for these 4 colors to the first 4 bytes
let mask = mm_set1_epi32(cast[int32](0xff000000))

View file

@ -1,6 +1,6 @@
import common, internal, vmath
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
type
@ -87,7 +87,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
result = newMask(src.width div 2, src.height div 2)
for y in 0 ..< result.height:
var x: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
firstByte = cast[M128i](
@ -169,7 +169,7 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
for y in 0 ..< mask.height:
# Write one row of values duplicated by scale
var x: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if scale == 2:
while x <= mask.width - 16:
let
@ -236,7 +236,7 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
proc invert*(mask: Mask) {.raises: [].} =
## Inverts all of the values - creates a negative of the mask.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = mask.data.len
for _ in 0 ..< byteLen div 16:
@ -312,7 +312,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
proc ceil*(mask: Mask) {.raises: [].} =
## A value of 0 stays 0. Anything else turns into 255.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi32(cast[int32](uint32.high))

View file

@ -1,6 +1,6 @@
import chroma, common, images, vmath
import chroma, common, images, internal, vmath
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
type
@ -122,7 +122,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
if at.y == to.y: # Horizontal gradient
var x: int
while x < image.width:
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if x + 4 <= image.width:
var colors: array[4, ColorRGBX]
for i in 0 ..< 4:
@ -153,7 +153,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
t = toLineSpace(at, to, xy)
rgbx = paint.gradientColor(t)
var x: int
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< image.width div 4:
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)

View file

@ -1,7 +1,7 @@
import blends, bumpy, chroma, common, fenv, images, internal, masks, paints,
strutils, vmath
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
import nimsimd/sse2
type
@ -1296,7 +1296,7 @@ proc computeCoverage(
let fillLen = at.int - fillStart
if fillLen > 0:
var i = fillStart
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
for _ in 0 ..< fillLen div 16:
var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
@ -1326,7 +1326,7 @@ proc fillCoverage(
blendMode: BlendMode
) =
var x = startX
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if blendMode.hasSimdBlender():
# When supported, SIMD blend as much as possible
let
@ -1445,7 +1445,7 @@ proc fillCoverage(
blendMode: BlendMode
) =
var x = startX
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if blendMode.hasSimdMasker():
let
maskerSimd = blendMode.maskerSimd()
@ -1511,7 +1511,7 @@ proc fillHits(
continue
var x = fillStart
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if blendMode.hasSimdBlender():
# When supported, SIMD blend as much as possible
let colorVec = mm_set1_epi32(cast[int32](rgbx))
@ -1573,7 +1573,7 @@ proc fillHits(
continue
var x = fillStart
when defined(amd64) and not defined(pixieNoSimd):
when defined(amd64) and allowSimd:
if blendMode.hasSimdMasker():
let
maskerSimd = blendMode.maskerSimd()