diff --git a/src/pixie/images.nim b/src/pixie/images.nim index b6b1072..6ad979c 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -1,4 +1,4 @@ -import blends, bumpy, chroma, common, masks, pixie/internal, system/memory, vmath +import blends, bumpy, chroma, common, masks, pixie/internal, vmath when defined(amd64) and not defined(pixieNoSimd): import nimsimd/sse2 @@ -96,39 +96,6 @@ proc setColor*(image: Image, x, y: int, color: Color) {.inline, raises: [].} = ## Sets a color at (x, y) or does nothing if outside of bounds. image[x, y] = color.rgbx() -proc fillUnsafe*( - data: var seq[ColorRGBX], color: SomeColor, start, len: int -) {.raises: [].} = - ## Fills the image data with the color starting at index start and - ## continuing for len indices. - - let rgbx = color.asRgbx() - - # Use memset when every byte has the same value - if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a: - nimSetMem(data[start].addr, rgbx.r.cint, len * 4) - else: - var i = start - when defined(amd64) and not defined(pixieNoSimd): - # When supported, SIMD fill until we run out of room - let colorVec = mm_set1_epi32(cast[int32](rgbx)) - for _ in 0 ..< len div 8: - mm_storeu_si128(data[i + 0].addr, colorVec) - mm_storeu_si128(data[i + 4].addr, colorVec) - i += 8 - else: - when sizeof(int) == 8: - # Fill 8 bytes at a time when possible - let - u32 = cast[uint32](rgbx) - u64 = cast[uint64]([u32, u32]) - for _ in 0 ..< len div 2: - cast[ptr uint64](data[i].addr)[] = u64 - i += 2 - # Fill whatever is left the slow way - for j in i ..< start + len: - data[j] = rgbx - proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} = ## Fills the image with the color. fillUnsafe(image.data, color, 0, image.data.len) diff --git a/src/pixie/internal.nim b/src/pixie/internal.nim index bff9dfb..669899e 100644 --- a/src/pixie/internal.nim +++ b/src/pixie/internal.nim @@ -1,4 +1,4 @@ -import chroma, vmath +import chroma, system/memory, vmath when defined(amd64) and not defined(pixieNoSimd): import nimsimd/sse2 @@ -39,6 +39,46 @@ proc `*`*(color: ColorRGBX, opacity: float32): ColorRGBX {.raises: [].} = a = ((color.a * x) div 255).uint8 rgbx(r, g, b, a) +proc fillUnsafe*( + data: var seq[uint8], value: uint8, start, len: int +) {.raises: [].} = + ## Fills the mask data with the value starting at index start and + ## continuing for len indices. + nimSetMem(data[start].addr, value.cint, len) + +proc fillUnsafe*( + data: var seq[ColorRGBX], color: SomeColor, start, len: int +) {.raises: [].} = + ## Fills the image data with the color starting at index start and + ## continuing for len indices. + + let rgbx = color.asRgbx() + + # Use memset when every byte has the same value + if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a: + nimSetMem(data[start].addr, rgbx.r.cint, len * 4) + else: + var i = start + when defined(amd64) and not defined(pixieNoSimd): + # When supported, SIMD fill until we run out of room + let colorVec = mm_set1_epi32(cast[int32](rgbx)) + for _ in 0 ..< len div 8: + mm_storeu_si128(data[i + 0].addr, colorVec) + mm_storeu_si128(data[i + 4].addr, colorVec) + i += 8 + else: + when sizeof(int) == 8: + # Fill 8 bytes at a time when possible + let + u32 = cast[uint32](rgbx) + u64 = cast[uint64]([u32, u32]) + for _ in 0 ..< len div 2: + cast[ptr uint64](data[i].addr)[] = u64 + i += 2 + # Fill whatever is left the slow way + for j in i ..< start + len: + data[j] = rgbx + proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} = ## Converts an image from premultiplied alpha to straight alpha. ## This is expensive for large images. diff --git a/src/pixie/masks.nim b/src/pixie/masks.nim index 7643a21..ea7b718 100644 --- a/src/pixie/masks.nim +++ b/src/pixie/masks.nim @@ -1,4 +1,4 @@ -import common, internal, system/memory, vmath +import common, internal, vmath when defined(amd64) and not defined(pixieNoSimd): import nimsimd/sse2 @@ -202,13 +202,6 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} = result.width * 4 ) -proc fillUnsafe*( - data: var seq[uint8], value: uint8, start, len: int -) {.raises: [].} = - ## Fills the mask data with the value starting at index start and - ## continuing for len indices. - nimSetMem(data[start].addr, value.cint, len) - proc fill*(mask: Mask, value: uint8) {.inline, raises: [].} = ## Fills the mask with the value. fillUnsafe(mask.data, value, 0, mask.data.len)