diff --git a/src/pixie/images.nim b/src/pixie/images.nim index 8e2cfd8..7e83979 100644 --- a/src/pixie/images.nim +++ b/src/pixie/images.nim @@ -454,16 +454,63 @@ proc getRgbaSmooth*( else: topMix +proc blitLine(a, b: ptr UncheckedArray[ColorRGBX], len: int, blender: Blender) = + for i in 0 ..< len: + a[i] = blender(a[i], b[i]) + +proc blitLineNormal(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.hasSimd.} = + for i in 0 ..< len: + a[i] = blendNormal(a[i], b[i]) + +proc blitLineOverwrite(a, b: ptr UncheckedArray[ColorRGBX], len: int) = + copyMem(a[0].addr, b[0].addr, len * 4) + +template getUncheckedArray(a: Image, x, y: int): ptr UncheckedArray[ColorRGBX] = + cast[ptr UncheckedArray[ColorRGBX]](a.data[a.dataIndex(x, y)].addr) + +proc blitRect( + a, b: Image, pos = ivec2(0, 0), blendMode = NormalBlend +) = + ## Blits one image onto another using integer position with color blending. + let + px = pos.x.int + py = pos.y.int + xStart = max(-px, 0) + yStart = max(-py, 0) + xEnd = min(b.width, a.width - px) + yEnd = min(b.height, a.height - py) + + case blendMode: + of NormalBlend: + for y in yStart ..< yEnd: + blitLineNormal( + a.getUncheckedArray(xStart + px, y + py), + b.getUncheckedArray(xStart, y), + xEnd - xStart + ) + of OverwriteBlend: + {.linearScanEnd.} + for y in yStart ..< yEnd: + blitLineOverwrite( + a.getUncheckedArray(xStart + px, y + py), + b.getUncheckedArray(xStart, y), + xEnd - xStart + ) + else: + let blender = blendMode.blender() + for y in yStart ..< yEnd: + blitLine( + a.getUncheckedArray(xStart + px, y + py), + b.getUncheckedArray(xStart, y), + xEnd - xStart, + blender + ) + proc drawCorrect( a, b: Image | Mask, transform = mat3(), blendMode = NormalBlend, tiled = false ) {.raises: [PixieError].} = ## Draws one image onto another using matrix with color blending. - when type(a) is Image: - let blender = blendMode.blender() - else: # a is a Mask - let masker = blendMode.masker() - var inverseTransform = transform.inverse() # Compute movement vectors @@ -489,6 +536,25 @@ proc drawCorrect( filterBy2 *= 2 inverseTransform = scale(vec2(2, 2)) * inverseTransform + let + hasRotationOrScaling = not(dx == vec2(1, 0) and dy == vec2(0, 1)) + smooth = not( + dx.length == 1.0 and + dy.length == 1.0 and + transform[2, 0].fractional == 0.0 and + transform[2, 1].fractional == 0.0 + ) + + when type(a) is Image and type(b) is Image: + if not hasRotationOrScaling and not smooth and not tiled: + blitRect(a, b, ivec2(transform[2, 0].int32, transform[2, 1].int32), blendMode) + return + + when type(a) is Image: + let blender = blendMode.blender() + else: # a is a Mask + let masker = blendMode.masker() + for y in 0 ..< a.height: for x in 0 ..< a.width: let diff --git a/src/pixie/simd/sse2.nim b/src/pixie/simd/sse2.nim index 0f9c347..5724cfd 100644 --- a/src/pixie/simd/sse2.nim +++ b/src/pixie/simd/sse2.nim @@ -417,5 +417,50 @@ proc applyOpacitySse2*(mask: Mask, opacity: float32) {.simd.} = for i in i ..< mask.data.len: mask.data[i] = ((mask.data[i] * opacity) div 255).uint8 +proc blitLineNormalSse2*(a, b: ptr UncheckedArray[ColorRGBX], len: int) {.simd.} = + + # TODO align to 16 + + var i = 0 + while i < len - 4: + + let + source = mm_loadu_si128(b[i].addr) + backdrop = mm_loadu_si128(a[i].addr) + alphaMask = mm_set1_epi32(cast[int32](0xff000000)) + oddMask = mm_set1_epi16(cast[int16](0xff00)) + div255 = mm_set1_epi16(cast[int16](0x8081)) + + var + sourceAlpha = mm_and_si128(source, alphaMask) + backdropEven = mm_slli_epi16(backdrop, 8) + backdropOdd = mm_and_si128(backdrop, oddMask) + + sourceAlpha = mm_or_si128(sourceAlpha, mm_srli_epi32(sourceAlpha, 16)) + + let k = mm_sub_epi32( + mm_set1_epi32(cast[int32]([0.uint8, 255, 0, 255])), + sourceAlpha + ) + + backdropEven = mm_mulhi_epu16(backdropEven, k) + backdropOdd = mm_mulhi_epu16(backdropOdd, k) + + backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7) + backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7) + + let done = mm_add_epi8( + source, + mm_or_si128(backdropEven, mm_slli_epi16(backdropOdd, 8)) + ) + + mm_storeu_si128(a[i].addr, done) + + i += 4 + + # TODO last 1-3 pixels + # for i in i ..< len: + # a[i] = blendNormal(a[i], b[i]) + when defined(release): {.pop.} diff --git a/tests/bench_blends.nim b/tests/bench_blends.nim index d149f20..59befb7 100644 --- a/tests/bench_blends.nim +++ b/tests/bench_blends.nim @@ -1,8 +1,8 @@ import benchy, chroma, pixie/blends, pixie/images, vmath let - backdrop = newImage(512, 512) - source = newImage(512, 512) + backdrop = newImage(256, 256) + source = newImage(256, 256) source.fill(rgba(100, 100, 100, 100)) template reset() = diff --git a/tests/bench_images.nim b/tests/bench_images.nim index 0185110..b99c3eb 100644 --- a/tests/bench_images.nim +++ b/tests/bench_images.nim @@ -134,9 +134,9 @@ timeIt "mix integers": doAssert z > 0 timeIt "mix floats": - for i in 0 ..< 100000: - let c = image[0, 0] + for i in 0 ..< 1000: + let c = image[0, 0].color var z: int for t in 0 .. 100: - z += mix(c.color, c.color, t.float32 / 100).rgba().a.int + z += mix(c, c, t.float32 / 100).rgba().a.int doAssert z > 0 diff --git a/tests/bench_images_draw_correct.nim b/tests/bench_images_draw_correct.nim new file mode 100644 index 0000000..bed3c6f --- /dev/null +++ b/tests/bench_images_draw_correct.nim @@ -0,0 +1,37 @@ +import pixie, strformat, xrays +import pixie/images {.all.} + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.drawCorrect(b, translate(vec2(250, 250)), blendMode = OverwriteBlend) + a.writeFile("tests/images/rotate0.png") + +import benchy + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + + timeIt "drawCorrect": + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.drawCorrect(b, translate(vec2(250, 250)), blendMode = OverwriteBlend) + + +block: + let + a = newImage(1000, 1000) + b = newImage(500, 500) + + timeIt "draw": + a.fill(rgba(255, 0, 0, 255)) + b.fill(rgba(0, 255, 0, 255)) + + a.draw(b, translate(vec2(250, 250)), blendMode = OverwriteBlend)