From 18b814ec65eb297b6e4083a7143cfe5fbb244c92 Mon Sep 17 00:00:00 2001 From: Ryan Oldenburg Date: Thu, 2 Jun 2022 01:22:15 -0500 Subject: [PATCH] no simd for tcc --- src/pixie/blends.nim | 8 ++++---- src/pixie/fileformats/jpeg.nim | 8 ++++---- src/pixie/images.nim | 20 ++++++++++---------- src/pixie/internal.nim | 12 +++++++----- src/pixie/masks.nim | 10 +++++----- src/pixie/paints.nim | 8 ++++---- src/pixie/paths.nim | 12 ++++++------ 7 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/pixie/blends.nim b/src/pixie/blends.nim index 01a5c39..1ef0900 100644 --- a/src/pixie/blends.nim +++ b/src/pixie/blends.nim @@ -1,8 +1,8 @@ ## Blending modes. -import chroma, common, math +import chroma, common, internal, std/math -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 # See https://www.w3.org/TR/compositing-1/ @@ -274,7 +274,7 @@ proc blendSoftLight(backdrop, source: ColorRGBX): ColorRGBX = source = source.rgba() var rgba: ColorRGBA - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let vb = mm_setr_ps( backdrop.r.float32, @@ -479,7 +479,7 @@ proc masker*(blendMode: BlendMode): Masker {.raises: [PixieError].} = else: raise newException(PixieError, "No masker for " & $blendMode) -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: type BlenderSimd* = proc(blackdrop, source: M128i): M128i {.gcsafe, raises: [].} ## Function signature returned by blenderSimd. diff --git a/src/pixie/fileformats/jpeg.nim b/src/pixie/fileformats/jpeg.nim index 8f0b91a..22bf277 100644 --- a/src/pixie/fileformats/jpeg.nim +++ b/src/pixie/fileformats/jpeg.nim @@ -1,7 +1,7 @@ -import chroma, flatty/binny, pixie/common, pixie/images, pixie/masks, sequtils, - std/decls, strutils +import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal, + pixie/masks, sequtils, std/decls, strutils -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 # This JPEG decoder is loosely based on stb_image which is public domain. @@ -881,7 +881,7 @@ proc quantizationAndIDCTPass(state: var DecoderState) = for row in 0 ..< w: var data {.byaddr.} = state.components[comp].blocks[row][column] - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: for i in 0 ..< 8: # 8 per pass var q = mm_loadu_si128(state.quantizationTables[qTableId][i * 8].addr) q = mm_unpacklo_epi8(q, mm_setzero_si128()) diff --git a/src/pixie/images.nim b/src/pixie/images.nim index ce3b415..c3e82fb 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -1,6 +1,6 @@ import blends, bumpy, chroma, common, masks, pixie/internal, vmath -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 const h = 0.5.float32 @@ -29,7 +29,7 @@ proc newImage*(width, height: int): Image {.raises: [PixieError].} = proc newImage*(mask: Mask): Image {.raises: [PixieError].} = result = newImage(mask.width, mask.height) var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: for _ in 0 ..< mask.data.len div 16: var alphas = mm_loadu_si128(mask.data[i].addr) for j in 0 ..< 4: @@ -106,7 +106,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} = let color = image.data[0] var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let colorVec = mm_set1_epi32(cast[int32](color)) for _ in 0 ..< image.data.len div 8: let @@ -127,7 +127,7 @@ proc isTransparent*(image: Image): bool {.raises: [].} = result = true var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let vecZero = mm_setzero_si128() for _ in 0 ..< image.data.len div 16: let @@ -254,7 +254,7 @@ proc minifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = ) for y in 0 ..< resultEvenHeight: var x: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let oddMask = mm_set1_epi16(cast[int16](0xff00)) first32 = cast[M128i]([uint32.high, 0, 0, 0]) @@ -348,7 +348,7 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} = for y in 0 ..< image.height: # Write one row of pixels duplicated by scale var x: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if scale == 2: while x <= image.width - 4: let @@ -391,7 +391,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = return var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: when type(target) is Image: let byteLen = target.data.len * 4 else: @@ -447,7 +447,7 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} = proc invert*(target: Image) {.raises: [].} = ## Inverts all of the colors and alpha. var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let vec255 = mm_set1_epi8(cast[int8](255)) let byteLen = target.data.len * 4 for _ in 0 ..< byteLen div 16: @@ -536,7 +536,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} = result = newMask(image.width, image.height) var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: for _ in 0 ..< image.data.len div 16: let a = mm_loadu_si128(image.data[i + 0].addr) @@ -798,7 +798,7 @@ proc drawUber( ) continue - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: case blendMode: of OverwriteBlend: for _ in 0 ..< (xStop - xStart) div 16: diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index eea791f..ead70e4 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -1,6 +1,8 @@ import chroma, system/memory, vmath -when defined(amd64) and not defined(pixieNoSimd): +const allowSimd* = not defined(pixieNoSimd) and not defined(tcc) + +when defined(amd64) and allowSimd: import nimsimd/sse2 template currentExceptionAsPixieError*(): untyped = @@ -59,7 +61,7 @@ proc fillUnsafe*( nimSetMem(data[start].addr, rgbx.r.cint, len * 4) else: var i = start - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: # When supported, SIMD fill until we run out of room let colorVec = mm_set1_epi32(cast[int32](rgbx)) for _ in 0 ..< len div 8: @@ -93,7 +95,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} = proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} = ## Converts an image to premultiplied alpha from straight alpha. var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: # When supported, SIMD convert as much as possible let alphaMask = mm_set1_epi32(cast[int32](0xff000000)) @@ -140,7 +142,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool = result = true var i = start - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let vec255 = mm_set1_epi32(cast[int32](uint32.high)) colorMask = mm_set1_epi32(cast[int32]([255.uint8, 255, 255, 0])) @@ -161,7 +163,7 @@ proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool = if data[j].a != 255: return false -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: proc packAlphaValues*(v: M128i): M128i {.inline, raises: [].} = ## Shuffle the alpha values for these 4 colors to the first 4 bytes let mask = mm_set1_epi32(cast[int32](0xff000000)) diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 4ff81ac..3af7cf7 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -1,6 +1,6 @@ import common, internal, vmath -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 type @@ -87,7 +87,7 @@ proc minifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = result = newMask(src.width div 2, src.height div 2) for y in 0 ..< result.height: var x: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let oddMask = mm_set1_epi16(cast[int16](0xff00)) firstByte = cast[M128i]( @@ -169,7 +169,7 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = for y in 0 ..< mask.height: # Write one row of values duplicated by scale var x: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if scale == 2: while x <= mask.width - 16: let @@ -236,7 +236,7 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} = proc invert*(mask: Mask) {.raises: [].} = ## Inverts all of the values - creates a negative of the mask. var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let vec255 = mm_set1_epi8(cast[int8](255)) let byteLen = mask.data.len for _ in 0 ..< byteLen div 16: @@ -312,7 +312,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} = proc ceil*(mask: Mask) {.raises: [].} = ## A value of 0 stays 0. Anything else turns into 255. var i: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let zeroVec = mm_setzero_si128() vec255 = mm_set1_epi32(cast[int32](uint32.high)) diff --git a/src/pixie/paints.nim b/src/pixie/paints.nim index 1b6aa7d..133a367 100644 --- a/src/pixie/paints.nim +++ b/src/pixie/paints.nim @@ -1,6 +1,6 @@ -import chroma, common, images, vmath +import chroma, common, images, internal, vmath -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 type @@ -122,7 +122,7 @@ proc fillGradientLinear(image: Image, paint: Paint) = if at.y == to.y: # Horizontal gradient var x: int while x < image.width: - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if x + 4 <= image.width: var colors: array[4, ColorRGBX] for i in 0 ..< 4: @@ -153,7 +153,7 @@ proc fillGradientLinear(image: Image, paint: Paint) = t = toLineSpace(at, to, xy) rgbx = paint.gradientColor(t) var x: int - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let colorVec = mm_set1_epi32(cast[int32](rgbx)) for _ in 0 ..< image.width div 4: mm_storeu_si128(image.data[image.dataIndex(x, y)].addr, colorVec) diff --git a/src/pixie/paths.nim b/src/pixie/paths.nim index 414a619..50f6054 100644 --- a/src/pixie/paths.nim +++ b/src/pixie/paths.nim @@ -1,7 +1,7 @@ import blends, bumpy, chroma, common, fenv, images, internal, masks, paints, strutils, vmath -when defined(amd64) and not defined(pixieNoSimd): +when defined(amd64) and allowSimd: import nimsimd/sse2 type @@ -1296,7 +1296,7 @@ proc computeCoverage( let fillLen = at.int - fillStart if fillLen > 0: var i = fillStart - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage)) for _ in 0 ..< fillLen div 16: var coverageVec = mm_loadu_si128(coverages[i - startX].addr) @@ -1326,7 +1326,7 @@ proc fillCoverage( blendMode: BlendMode ) = var x = startX - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if blendMode.hasSimdBlender(): # When supported, SIMD blend as much as possible let @@ -1445,7 +1445,7 @@ proc fillCoverage( blendMode: BlendMode ) = var x = startX - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd() @@ -1511,7 +1511,7 @@ proc fillHits( continue var x = fillStart - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if blendMode.hasSimdBlender(): # When supported, SIMD blend as much as possible let colorVec = mm_set1_epi32(cast[int32](rgbx)) @@ -1573,7 +1573,7 @@ proc fillHits( continue var x = fillStart - when defined(amd64) and not defined(pixieNoSimd): + when defined(amd64) and allowSimd: if blendMode.hasSimdMasker(): let maskerSimd = blendMode.maskerSimd()