no simd for tcc

This commit is contained in:
Ryan Oldenburg 2022-06-02 01:22:15 -05:00
parent f16c8226ef
commit 18b814ec65
7 changed files with 40 additions and 38 deletions

View file

@ -1,8 +1,8 @@
## Blending modes. ## Blending modes.
import chroma, common, math import chroma, common, internal, std/math
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
# See https://www.w3.org/TR/compositing-1/ # See https://www.w3.org/TR/compositing-1/
@ -274,7 +274,7 @@ proc blendSoftLight(backdrop, source: ColorRGBX): ColorRGBX =
source = source.rgba() source = source.rgba()
var rgba: ColorRGBA var rgba: ColorRGBA
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let let
vb = mm_setr_ps( vb = mm_setr_ps(
backdrop.r.float32, backdrop.r.float32,
@ -479,7 +479,7 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} =
else: else:
raise newException(PixieError, "No masker for " & $blendMode) raise newException(PixieError, "No masker for " & $blendMode)
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
type type
BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].} BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].}
## Function signature returned by blenderSimd. ## Function signature returned by blenderSimd.

View file

@ -1,7 +1,7 @@
import chroma, flatty/binny, pixie/common, pixie/images, pixie/masks, sequtils, import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
std/decls, strutils pixie/masks, sequtils, std/decls, strutils
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
# This JPEG decoder is loosely based on stb_image which is public domain. # This JPEG decoder is loosely based on stb_image which is public domain.
@ -881,7 +881,7 @@ proc quantizationAndIDCTPass(state: var DecoderState) =
for row in 0 ..< w: for row in 0 ..< w:
var data {.byaddr.} = state.components[comp].blocks[row][column] var data {.byaddr.} = state.components[comp].blocks[row][column]
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
for i in 0 ..< 8: # 8 per pass for i in 0 ..< 8: # 8 per pass
var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr) var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr)
q = mm_unpacklo_epi8(q, mm_setzero_si128()) q = mm_unpacklo_epi8(q, mm_setzero_si128())

View file

@ -1,6 +1,6 @@
import blends, bumpy, chroma, common, masks, pixie/internal, vmath import blends, bumpy, chroma, common, masks, pixie/internal, vmath
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
const h = 0.5.float32 const h = 0.5.float32
@ -29,7 +29,7 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} =
proc newImage*(mask: Mask): Image {.raises: [PixieError].} = proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height) result = newImage(mask.width, mask.height)
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
for _ in 0 ..< mask.data.len div 16: for _ in 0 ..< mask.data.len div 16:
var alphas = mm_loadu_si128(mask.data[i].addr) var alphas = mm_loadu_si128(mask.data[i].addr)
for j in 0 ..< 4: for j in 0 ..< 4:
@ -106,7 +106,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
let color = image.data[0] let color = image.data[0]
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](color)) let colorVec = mm_set1_epi32(cast[int32](color))
for _ in 0 ..< image.data.len div 8: for _ in 0 ..< image.data.len div 8:
let let
@ -127,7 +127,7 @@ proc isTransparent*(image: Image): bool {.raises: [].} =
result = true result = true
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let vecZero = mm_setzero_si128() let vecZero = mm_setzero_si128()
for _ in 0 ..< image.data.len div 16: for _ in 0 ..< image.data.len div 16:
let let
@ -254,7 +254,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
) )
for y in 0 ..< resultEvenHeight: for y in 0 ..< resultEvenHeight:
var x: int var x: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let let
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
first32 = cast[M128i]([uint32.high, 0, 0, 0]) first32 = cast[M128i]([uint32.high, 0, 0, 0])
@ -348,7 +348,7 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
for y in 0 ..< image.height: for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale # Write one row of pixels duplicated by scale
var x: int var x: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if scale == 2: if scale == 2:
while x <= image.width - 4: while x <= image.width - 4:
let let
@ -391,7 +391,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
return return
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
when type(target) is Image: when type(target) is Image:
let byteLen = target.data.len * 4 let byteLen = target.data.len * 4
else: else:
@ -447,7 +447,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
proc invert*(target: Image) {.raises: [].} = proc invert*(target: Image) {.raises: [].} =
## Inverts all of the colors and alpha. ## Inverts all of the colors and alpha.
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255)) let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = target.data.len * 4 let byteLen = target.data.len * 4
for _ in 0 ..< byteLen div 16: for _ in 0 ..< byteLen div 16:
@ -536,7 +536,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
result = newMask(image.width, image.height) result = newMask(image.width, image.height)
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
for _ in 0 ..< image.data.len div 16: for _ in 0 ..< image.data.len div 16:
let let
a = mm_loadu_si128(image.data[i + 0].addr) a = mm_loadu_si128(image.data[i + 0].addr)
@ -798,7 +798,7 @@ proc drawUber(
) )
continue continue
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
case blendMode: case blendMode:
of OverwriteBlend: of OverwriteBlend:
for _ in 0 ..< (xStop - xStart) div 16: for _ in 0 ..< (xStop - xStart) div 16:

View file

@ -1,6 +1,8 @@
import chroma, system/memory, vmath import chroma, system/memory, vmath
when defined(amd64) and not defined(pixieNoSimd): const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)
when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
template currentExceptionAsPixieError*(): untyped = template currentExceptionAsPixieError*(): untyped =
@ -59,7 +61,7 @@ proc fillUnsafe*(
nimSetMem(data[start].addr, rgbx.r.cint, len * 4) nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
else: else:
var i = start var i = start
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
# When supported, SIMD fill until we run out of room # When supported, SIMD fill until we run out of room
let colorVec = mm_set1_epi32(cast[int32](rgbx)) let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< len div 8: for _ in 0 ..< len div 8:
@ -93,7 +95,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} = proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
## Converts an image to premultiplied alpha from straight alpha. ## Converts an image to premultiplied alpha from straight alpha.
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
# When supported, SIMD convert as much as possible # When supported, SIMD convert as much as possible
let let
alphaMask = mm_set1_epi32(cast[int32](0xff000000)) alphaMask = mm_set1_epi32(cast[int32](0xff000000))
@ -140,7 +142,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
result = true result = true
var i = start var i = start
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let let
vec255 = mm_set1_epi32(cast[int32](uint32.high)) vec255 = mm_set1_epi32(cast[int32](uint32.high))
colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0])) colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0]))
@ -161,7 +163,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
if data[j].a != 255: if data[j].a != 255:
return false return false
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} = proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} =
## Shuffle the alpha values for these 4 colors to the first 4 bytes ## Shuffle the alpha values for these 4 colors to the first 4 bytes
let mask = mm_set1_epi32(cast[int32](0xff000000)) let mask = mm_set1_epi32(cast[int32](0xff000000))

View file

@ -1,6 +1,6 @@
import common, internal, vmath import common, internal, vmath
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
type type
@ -87,7 +87,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
result = newMask(src.width div 2, src.height div 2) result = newMask(src.width div 2, src.height div 2)
for y in 0 ..< result.height: for y in 0 ..< result.height:
var x: int var x: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let let
oddMask = mm_set1_epi16(cast[int16](0xff00)) oddMask = mm_set1_epi16(cast[int16](0xff00))
firstByte = cast[M128i]( firstByte = cast[M128i](
@ -169,7 +169,7 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
for y in 0 ..< mask.height: for y in 0 ..< mask.height:
# Write one row of values duplicated by scale # Write one row of values duplicated by scale
var x: int var x: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if scale == 2: if scale == 2:
while x <= mask.width - 16: while x <= mask.width - 16:
let let
@ -236,7 +236,7 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
proc invert*(mask: Mask) {.raises: [].} = proc invert*(mask: Mask) {.raises: [].} =
## Inverts all of the values - creates a negative of the mask. ## Inverts all of the values - creates a negative of the mask.
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi8(cast[int8](255)) let vec255 = mm_set1_epi8(cast[int8](255))
let byteLen = mask.data.len let byteLen = mask.data.len
for _ in 0 ..< byteLen div 16: for _ in 0 ..< byteLen div 16:
@ -312,7 +312,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
proc ceil*(mask: Mask) {.raises: [].} = proc ceil*(mask: Mask) {.raises: [].} =
## A value of 0 stays 0. Anything else turns into 255. ## A value of 0 stays 0. Anything else turns into 255.
var i: int var i: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let let
zeroVec = mm_setzero_si128() zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi32(cast[int32](uint32.high)) vec255 = mm_set1_epi32(cast[int32](uint32.high))

View file

@ -1,6 +1,6 @@
import chroma, common, images, vmath import chroma, common, images, internal, vmath
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
type type
@ -122,7 +122,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
if at.y == to.y: # Horizontal gradient if at.y == to.y: # Horizontal gradient
var x: int var x: int
while x < image.width: while x < image.width:
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if x + 4 <= image.width: if x + 4 <= image.width:
var colors: array[4, ColorRGBX] var colors: array[4, ColorRGBX]
for i in 0 ..< 4: for i in 0 ..< 4:
@ -153,7 +153,7 @@ proc fillGradientLinear(image: Image, paint: Paint) =
t = toLineSpace(at, to, xy) t = toLineSpace(at, to, xy)
rgbx = paint.gradientColor(t) rgbx = paint.gradientColor(t)
var x: int var x: int
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](rgbx)) let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< image.width div 4: for _ in 0 ..< image.width div 4:
mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec) mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec)

View file

@ -1,7 +1,7 @@
import blends, bumpy, chroma, common, fenv, images, internal, masks, paints, import blends, bumpy, chroma, common, fenv, images, internal, masks, paints,
strutils, vmath strutils, vmath
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
import nimsimd/sse2 import nimsimd/sse2
type type
@ -1296,7 +1296,7 @@ proc computeCoverage(
let fillLen = at.int - fillStart let fillLen = at.int - fillStart
if fillLen > 0: if fillLen > 0:
var i = fillStart var i = fillStart
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage)) let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
for _ in 0 ..< fillLen div 16: for _ in 0 ..< fillLen div 16:
var coverageVec = mm_loadu_si128(coverages[i - startX].addr) var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
@ -1326,7 +1326,7 @@ proc fillCoverage(
blendMode: BlendMode blendMode: BlendMode
) = ) =
var x = startX var x = startX
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if blendMode.hasSimdBlender(): if blendMode.hasSimdBlender():
# When supported, SIMD blend as much as possible # When supported, SIMD blend as much as possible
let let
@ -1445,7 +1445,7 @@ proc fillCoverage(
blendMode: BlendMode blendMode: BlendMode
) = ) =
var x = startX var x = startX
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if blendMode.hasSimdMasker(): if blendMode.hasSimdMasker():
let let
maskerSimd = blendMode.maskerSimd() maskerSimd = blendMode.maskerSimd()
@ -1511,7 +1511,7 @@ proc fillHits(
continue continue
var x = fillStart var x = fillStart
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if blendMode.hasSimdBlender(): if blendMode.hasSimdBlender():
# When supported, SIMD blend as much as possible # When supported, SIMD blend as much as possible
let colorVec = mm_set1_epi32(cast[int32](rgbx)) let colorVec = mm_set1_epi32(cast[int32](rgbx))
@ -1573,7 +1573,7 @@ proc fillHits(
continue continue
var x = fillStart var x = fillStart
when defined(amd64) and not defined(pixieNoSimd): when defined(amd64) and allowSimd:
if blendMode.hasSimdMasker(): if blendMode.hasSimdMasker():
let let
maskerSimd = blendMode.maskerSimd() maskerSimd = blendMode.maskerSimd()