diff --git a/.gitmodules b/.gitmodules index fcda07a..45a38a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "libs/astc/astc-encoder"] path = libs/astc/astc-encoder url = https://github.com/ARM-software/astc-encoder +[submodule "libs/bc7enc/bc7enc_rdo"] + path = libs/bc7enc/bc7enc_rdo + url = https://github.com/DiThi/bc7enc_rdo diff --git a/libs/bc7enc/bc7enc.cpp b/libs/bc7enc/bc7enc.cpp new file mode 100644 index 0000000..af12805 --- /dev/null +++ b/libs/bc7enc/bc7enc.cpp @@ -0,0 +1,160 @@ +#define BC7ENC_VERSION "1.08" +#define COMPUTE_SSIM (0) +#if _OPENMP +#include +#endif +#include +#include "bc7enc_rdo/rdo_bc_encoder.h" +#include "bc7enc_rdo/utils.h" + +// This is based on main() in test.cpp to make a C function + +// Valid formats: 1 3 4 5 7 +// BC1 RGB 4bpp fast, small +// BC3 RGBA 8bpp fast +// BC4 gray 4bpp best for grayscale +// BC5 X+Y 8bpp +// BC7 RGB(A) 8bpp slow, best quality + +typedef struct { + void *data; + int32_t len; + int32_t width; + int32_t height; + int8_t format; // BCn format to use. + // TODO: encoding settings +} EncodeBcInput; + +typedef struct { + void *data; + int32_t len; + int32_t row_len; +} EncodeBcOutput; + +typedef enum { + NO_ERROR = 0, + ENCODER_INIT_ERROR = 1, + ENCODE_ERROR = 2, + INVALID_SIZE_ERROR = 3, + UNSUPPORTED_FORMAT_ERROR = 4, + INVALID_INPUT_LENGTH_ERROR = 5, + INVALID_OUTPUT_LENGTH_ERROR = 6, +} EncodeBcError; + +extern "C" EncodeBcError encode_bc(EncodeBcInput &input, EncodeBcOutput &output, bool verbose) +{ + bool quiet_mode = !verbose; + + int max_threads = 1; +#if _OPENMP + max_threads = std::min(std::max(1, omp_get_max_threads()), 128); +#endif + + uint32_t pixel_format_bpp = 8; + + rdo_bc::rdo_bc_params rp; + rp.m_rdo_max_threads = max_threads; + rp.m_status_output = !quiet_mode; + + switch(input.format){ + case 1: + rp.m_dxgi_format = DXGI_FORMAT_BC1_UNORM; + pixel_format_bpp = 4; + break; + case 3: + rp.m_dxgi_format = DXGI_FORMAT_BC3_UNORM; + break; + case 4: + rp.m_dxgi_format = DXGI_FORMAT_BC4_UNORM; + pixel_format_bpp = 4; + break; + case 5: + rp.m_dxgi_format = DXGI_FORMAT_BC5_UNORM; + break; + case 7: + // it's already default + break; + default: + return UNSUPPORTED_FORMAT_ERROR; + } + int32_t width = input.width; + int32_t height = input.height; + if(width == 0 || height == 0){ + return INVALID_SIZE_ERROR; + } + + utils::image_u8 source_image; + + // TODO: avoid a copy somehow + source_image.init(width, height); + int32_t input_data_size = width * height * sizeof(uint32_t); + if(input_data_size != input.len){ + return INVALID_INPUT_LENGTH_ERROR; + } + memcpy(source_image.get_pixels().data(), input.data, input.len); + + if (rp.m_status_output) + { + printf("Max threads: %u\n", max_threads); + printf("Supports bc7e.ispc: %u\n", SUPPORT_BC7E); + } + + clock_t overall_start_t = clock(); + + rdo_bc::rdo_bc_encoder encoder; + if (!encoder.init(source_image, rp)) + { + fprintf(stderr, "rdo_bc_encoder::init() failed!\n"); + return ENCODER_INIT_ERROR; + } + + if (rp.m_status_output) + { + if (encoder.get_has_alpha()) + printf("Source image has an alpha channel.\n"); + else + printf("Source image is opaque.\n"); + } + + if (!encoder.encode()) + { + fprintf(stderr, "rdo_bc_encoder::encode() failed!\n"); + return ENCODE_ERROR; + } + + clock_t overall_end_t = clock(); + + if (rp.m_status_output) + printf("Total processing time: %f secs\n", (double)(overall_end_t - overall_start_t) / CLOCKS_PER_SEC); + + // Compress the output data losslessly using Deflate + const uint32_t output_data_size = encoder.get_total_blocks_size_in_bytes(); + // const uint32_t pre_rdo_comp_size = get_deflate_size(encoder.get_prerdo_blocks(), output_data_size); + + // float pre_rdo_lz_bits_per_texel = (pre_rdo_comp_size * 8.0f) / encoder.get_total_texels(); + + // if (rp.m_status_output) + // { + // printf("Output data size: %u, LZ (Deflate) compressed file size: %u, %3.2f bits/texel\n", + // output_data_size, + // (uint32_t)pre_rdo_comp_size, + // pre_rdo_lz_bits_per_texel); + // } + + // const uint32_t comp_size = get_deflate_size(encoder.get_blocks(), output_data_size); + + // float lz_bits_per_texel = comp_size * 8.0f / encoder.get_total_texels(); + + // if (rp.m_status_output) + // printf("RDO output data size: %u, LZ (Deflate) compressed file size: %u, %3.2f bits/texel, savings: %3.2f%%\n", output_data_size, (uint32_t)comp_size, lz_bits_per_texel, + // (lz_bits_per_texel != pre_rdo_lz_bits_per_texel) ? 100.0f - (lz_bits_per_texel * 100.0f) / pre_rdo_lz_bits_per_texel : 0.0f); + + if(output_data_size != output.len){ + fprintf(stderr, "Output length is %d, expected %d\n", output.len, output_data_size); + return INVALID_OUTPUT_LENGTH_ERROR; + } + // TODO: avoid a copy + memcpy(output.data, encoder.get_blocks(), output_data_size); + + return NO_ERROR; +} diff --git a/libs/bc7enc/bc7enc.nim b/libs/bc7enc/bc7enc.nim new file mode 100644 index 0000000..0293b70 --- /dev/null +++ b/libs/bc7enc/bc7enc.nim @@ -0,0 +1,88 @@ + +{.passC:"-DBC7ENC_USE_MINIZ=0".} +{.compile:"./bc7enc_rdo/bc7decomp.cpp", +compile:"./bc7enc_rdo/bc7decomp_ref.cpp", +compile:"./bc7enc_rdo/bc7enc.cpp", +compile:"./bc7enc_rdo/ert.cpp", +compile:"./bc7enc_rdo/lodepng.cpp", +compile:"./bc7enc_rdo/rdo_bc_encoder.cpp", +compile:"./bc7enc_rdo/rgbcx.cpp", +compile:"./bc7enc_rdo/utils.cpp", +compile:"./bc7enc.cpp".} + +## Valid formats: 1 3 4 5 7 +## BC1 RGB 4bpp fast, small +## BC3 RGBA 8bpp fast +## BC4 gray 4bpp best for grayscale +## BC5 X+Y 8bpp +## BC7 RGB(A) 8bpp slow, best quality + +type EncodeBcInput* {.bycopy.} = object + data*: pointer + len*: int32 + width*: int32 + height*: int32 + format*: int8 ## BCn format to use. + ## TODO: encoding settings + +type EncodeBcOutput* {.bycopy.} = object + data*: pointer + len*: int32 + row_len*: int32 + +type EncodeBcError* {.size:4.} = enum + NO_ERROR = 0, ENCODER_INIT_ERROR = 1, ENCODE_ERROR = 2, + INVALID_SIZE_ERROR = 3, UNSUPPORTED_FORMAT_ERROR = 4, + INVALID_INPUT_LENGTH_ERROR = 5, INVALID_OUTPUT_LENGTH_ERROR = 6 + +proc encode_bc*(input: var EncodeBcInput, output: var EncodeBcOutput, verbose: bool): EncodeBcError {.importc, cdecl.} + +{.hint[XDeclaredButNotUsed]: off.} + +# S3TC (BC1-3) +const GL_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0'i32 +const GL_COMPRESSED_RGBA_S3TC_DXT1_EXT = 0x83F1'i32 +const GL_COMPRESSED_RGBA_S3TC_DXT3_EXT = 0x83F2'i32 +const GL_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3'i32 +# S3TC (BC1-3) sRGB +const GL_COMPRESSED_SRGB_S3TC_DXT1_EXT = 0x8C4C'i32 +const GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT = 0x8C4D'i32 +const GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT = 0x8C4E'i32 +const GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT = 0x8C4F'i32 +# RGTC (BC4-5) +const GL_COMPRESSED_RED_RGTC1_EXT = 0x8DBB'i32 +const GL_COMPRESSED_SIGNED_RED_RGTC1_EXT = 0x8DBC'i32 +const GL_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD'i32 +const GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT = 0x8DBE'i32 +# BPTC float (BC6H) +const GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB = 0x8E8E'i32 +const GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB = 0x8E8F'i32 +# BPTC (BC7) +const GL_COMPRESSED_RGBA_BPTC_UNORM_ARB = 0x8E8C'i32 +# BPTC (BC7) sRGB +const GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB = 0x8E8D'i32 + +func get_bc_bpp_internal_format*(bc_format: int8, is_sRGB: bool): (int8, int32) = + let (bpp, internal_format) = if is_sRGB: + case bc_format: + of 1: (4, GL_COMPRESSED_SRGB_S3TC_DXT1_EXT) + of 2: (8, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT) + of 3: (8, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT) + of 4: (4, GL_COMPRESSED_RED_RGTC1_EXT) # sRGB is ignored + of 5: (8, GL_COMPRESSED_RED_GREEN_RGTC2_EXT) # sRGB is ignored + of 6: (8, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB) # sRGB is ignored + of 7: (8, GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB) + else: (0, 0'i32) + else: + case bc_format: + of 1: (4, GL_COMPRESSED_RGB_S3TC_DXT1_EXT) + of 2: (8, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) + of 3: (8, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) + of 4: (4, GL_COMPRESSED_RED_RGTC1_EXT) + of 5: (8, GL_COMPRESSED_RED_GREEN_RGTC2_EXT) + of 6: (8, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB) + of 7: (8, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB) + else: (0, 0'i32) + assert bpp != 0, "Invalid bc_format " & $bc_format + return (bpp.int8, internal_format) + diff --git a/libs/bc7enc/bc7enc_rdo b/libs/bc7enc/bc7enc_rdo new file mode 160000 index 0000000..f3941ed --- /dev/null +++ b/libs/bc7enc/bc7enc_rdo @@ -0,0 +1 @@ +Subproject commit f3941ed03e771c5006bb43ebff10e669156c858a diff --git a/libs/dds_ktx/dds_ktx.nim b/libs/dds_ktx/dds_ktx.nim index 086bf52..4c7532c 100644 --- a/libs/dds_ktx/dds_ktx.nim +++ b/libs/dds_ktx/dds_ktx.nim @@ -12,6 +12,7 @@ type KtxInfo* = object type KtxPart* = object width*, height*: int32 layer*, face*, mip_level*: int32 + slice*: int32 data*: pointer len*: int row_len*: int diff --git a/src/gpu_formats/texture_decode.nim b/src/gpu_formats/texture_decode.nim index ab40565..183aa53 100644 --- a/src/gpu_formats/texture_decode.nim +++ b/src/gpu_formats/texture_decode.nim @@ -94,6 +94,10 @@ func resize*(format: TextureFormat, channel_count: int): TextureFormat = let f = format.int TextureFormat(f - format.channel_count + channel_count) +func format_depth*(tex: Texture): int {.inline.} = + if tex.tex_type == TexCube: 1 + else: tex.depth + template toOpenArrayByte(p: pointer, a,b: untyped): untyped = cast[ptr UncheckedArray[byte]](p).toOpenArray(a,b) diff --git a/src/gpu_formats/texture_optimize.nim b/src/gpu_formats/texture_optimize.nim index 82021bd..5804894 100644 --- a/src/gpu_formats/texture_optimize.nim +++ b/src/gpu_formats/texture_optimize.nim @@ -2,94 +2,58 @@ import ../types import ./texture_decode from dds_ktx import KtxInfo, KtxPart, get_ASTC_internal_format -import astc -export astc import arr_ref import stb_image_resize import std/monotimes import std/bitops +# TODO: don't import it here +from ../platform/gl import nil + +when defined(myouUseBC7Encoder): + import bc7enc +when defined(myouUseAstcEncoder): + import astc +when defined(android) or defined(ios) or defined(emscripten): + template has_bptc_support: bool = gl.GLAD_GL_EXT_texture_compression_bptc +else: + template has_bptc_support: bool = gl.GLAD_GL_ARB_texture_compression_bptc +template has_astc_support: bool = gl.GLAD_GL_OES_texture_compression_astc or + gl.GLAD_GL_KHR_texture_compression_astc_ldr + const myouEngineNumTextureThreads {.intdefine.} = 4 const myouEngineCompressTextures {.booldefine.} = true - -const GL_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD'i32 -const GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD'i32 +const myouBC7VerboseMode {.booldefine.} = true template u32(x: untyped): uint32 = cast[uint32](x) -proc astc_compress_layer*(context: ptr AstcencContext, config: AstcencConfig, - format: TextureFormat; width, height: int, slices: seq[pointer]): ArrRef[byte] = - let depth = slices.len - let data_type = case format: - of SRGB_u8, SRGB_Alpha_u8, R_u8, RG_u8, RGB_u8, RGBA_u8: ASTCENC_TYPE_U8 - of R_f16, RG_f16, RGB_f16, RGBA_f16: ASTCENC_TYPE_F16 - of R_f32, RG_f32, RGB_f32, RGBA_f32: ASTCENC_TYPE_F32 - else: raise ValueError.newException "Unsupported data type of " & $format - let swizzle = case format.channel_count: - # TODO: more optimal usage for 1/2 channels, normals, etc. - of 1: AstcencSwizzle(r: SWZ_R, g: SWZ_R, b: SWZ_R, a: SWZ_1) - of 2: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_0, a: SWZ_1) - of 3: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_1) - else: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_A) - let blocks_x = (width.u32 + config.block_x - 1) div config.block_x - let blocks_y = (height.u32 + config.block_y - 1) div config.block_y - let blocks_z = (depth.u32 + config.block_z - 1) div config.block_z - let buffer_size = blocks_x * blocks_y * blocks_z * 16 - result = newArrRef[byte](buffer_size) - let img = AstcencImage( - dim_x: width.u32, - dim_y: height.u32, - dim_z: depth.u32, - data_type: data_type, - data: slices[0].addr, - ) - const thread_index = 0 # TODO - let err = astcenc_compress_image(context, img.addr, swizzle.addr, - result[0].addr, buffer_size, thread_index) - assert err == ASTCENC_SUCCESS, "ASTC encoding error: " & $err +type CallbackUncompressed = proc(tex: Texture, data: SliceMem[byte]) {.gcsafe.} +type CallbackCompressed = proc(tex: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]]) {.gcsafe.} +type CompressMipmapResult = tuple[data: ArrRef[byte], row_len: int] +type CompressMipmap = proc(width, height: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} -proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], - callback: proc(self: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]])) = +template block_file_size(width, height, depth, block_x, block_y, block_z, block_byte_size: untyped): int = + let blocks_x = (width.int + block_x.int - 1) div block_x.int + let blocks_y = (height.int + block_y.int - 1) div block_y.int + let blocks_z = (depth.int + block_z.int - 1) div block_z.int + blocks_x * blocks_y * blocks_z * block_byte_size.int + +proc make_mipmaps(tex: Texture, pixels: SliceMem[byte], compress: CompressMipmap): (seq[KtxPart], seq[ArrRef[byte]]) = var width = tex.width.int32 var height = tex.height.int32 - let depth = if tex.tex_type == TexCube: 1'i32 else: tex.depth.int32 - # TODO: when do we need ASTCENC_PRF_HDR? - # maybe when A is connected to displacement? - var profile: AstcencProfile - var stbir_data_type: StbirDatatype - case tex.format.component_type: + let depth = tex.format_depth + assert depth == 1, "Compressed 2D arrays and 3D textures not supported yet" + assert tex.tex_type != TexCube, "Compressed cube textures not supported yet" + let stbir_data_type = case tex.format.component_type: of UByte: - if tex.is_sRGB: - profile = ASTCENC_PRF_LDR_SRGB - stbir_data_type = STBIR_TYPE_UINT8_SRGB - else: - profile = ASTCENC_PRF_LDR - stbir_data_type = STBIR_TYPE_UINT8 - of HalfFloat: - profile = ASTCENC_PRF_HDR_RGB_LDR_A - stbir_data_type = STBIR_TYPE_HALF_FLOAT - of Float: - profile = ASTCENC_PRF_HDR_RGB_LDR_A - stbir_data_type = STBIR_TYPE_FLOAT - of UShort: - profile = ASTCENC_PRF_HDR_RGB_LDR_A - stbir_data_type = STBIR_TYPE_UINT16 - else: - assert false, "unreachable" + if tex.is_sRGB: STBIR_TYPE_UINT8_SRGB + else: STBIR_TYPE_UINT8 + of HalfFloat: STBIR_TYPE_HALF_FLOAT + of Float: STBIR_TYPE_FLOAT + of UShort: STBIR_TYPE_UINT16 + else: raise Defect.newException "unreachable" # 4 because we're forcing 4 channels for the encoder let pixel_stride = 4 * (tex.format.stride div tex.format.channel_count) - let quality = 10.0 - let flags = 0'u32 - var config: AstcencConfig - let blk_size = (6,6) - # TODO: 3D texture block size - astc_assert astcenc_config_init(profile, - blk_size[0].uint32, blk_size[1].uint32, 1'u32, - quality, flags, config.addr) - # config.progress_callback = proc(p: float32) {.cdecl.} = - # echo "progress ", p - var ctx: ptr AstcencContext - astc_assert astcenc_context_alloc(config.addr, 1, ctx) let time = getmonotime().ticks.float/1000000000 var parts: seq[KtxPart] var data_refs: seq[ArrRef[byte]] @@ -102,8 +66,9 @@ proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], let resize_level_0 = w != prev_w or h != prev_h var current_level, last_level: ArrRef[byte] var last_src: pointer = pixels.data - while (w > 4 and h > 4) or parts.len == 0: - last_src = if mip_level != 0 or resize_level_0: + var last_len: int + while w >= 4 or h >= 4 or parts.len == 0: + (last_src, last_len) = if mip_level != 0 or resize_level_0: current_level = newArrRef[byte](w*h*pixel_stride) stbir_resize( last_src, prev_w, prev_h, 0, @@ -113,13 +78,16 @@ proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], STBIR_FILTER_BOX, ) last_level = current_level - current_level.toPointer + (current_level.toPointer, current_level.byte_len.int) else: - pixels.data - let data = astc_compress_layer(ctx, config, tex.format, w, h, @[last_src]) + (pixels.toPointer, pixels.byte_len) + + let (data, row_len) = compress(w, h, last_src, last_len.int32) + parts.add KtxPart( width: w, height: h, data: data.toPointer, len: data.byte_len, mip_level: mip_level, + row_len: row_len, ) data_refs.add data prev_w = w; prev_h = h @@ -128,18 +96,101 @@ proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], mip_level.inc let time2 = getmonotime().ticks.float/1000000000 echo "time: ", time2-time, " ", tex.name - let info = KtxInfo( - width: width, height: height, depth: depth, - num_layers: 1, num_mipmaps: mip_level, has_alpha: tex.format.channel_count == 4, - is_sRGB: tex.is_sRGB, is_astc: true, - internal_format: get_ASTC_internal_format(blk_size, tex.is_sRGB) - ) - callback(tex, info, parts, data_refs) - astc_assert astcenc_compress_reset(ctx) - astcenc_context_free(ctx) + return (parts, data_refs) -type CallbackUncompressed = proc(tex: Texture, data: SliceMem[byte]) {.gcsafe.} -type CallbackCompressed = proc(tex: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]]) {.gcsafe.} +when defined(myouUseBC7Encoder): + proc bcn_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, bc_format: int8) = + when not defined(android): + let (bpp, internal_format) = get_bc_bpp_internal_format(bc_format, tex.is_sRGB) + let block_size_bytes = (4*4*bpp.int) div 8 + proc compress(w, h: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} = + var input = EncodeBcInput( + data: p, len: len, + width: w, height: h, + format: bc_format, + ) + let out_len = block_file_size(w,h,1, 4,4,1, block_size_bytes) + let row_len = block_file_size(w,1,1, 4,4,1, block_size_bytes) + var data = newArrRef[byte](out_len) + var output = EncodeBcOutput( + data: data.toPointer, + len: out_len.int32, + ) + let err = encode_bc(input, output, myouBC7VerboseMode) + assert err == NO_ERROR, "bc7enc error: " & $err + return (data, row_len) + + let (parts, data_refs) = make_mipmaps(tex, pixels, compress) + + let info = KtxInfo( + width: parts[0].width, height: parts[0].height, depth: tex.format_depth.int32, + num_layers: 1, num_mipmaps: parts.len.int32, has_alpha: bc_format in [2,3,7], + is_sRGB: tex.is_sRGB, is_bc: true, + internal_format: internal_format, + ) + callback(tex, info, parts, data_refs) + +when defined(myouUseAstcEncoder): + proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, blk_size = (6,6), quality = 10.0) = + let profile = case tex.format.component_type: + of UByte: + if tex.is_sRGB: ASTCENC_PRF_LDR_SRGB + else: ASTCENC_PRF_LDR + of HalfFloat: ASTCENC_PRF_HDR_RGB_LDR_A + of Float: ASTCENC_PRF_HDR_RGB_LDR_A + of UShort: ASTCENC_PRF_HDR_RGB_LDR_A + else: raise Defect.newException "unreachable" + let data_type = case tex.format: + of SRGB_u8, SRGB_Alpha_u8, R_u8, RG_u8, RGB_u8, RGBA_u8: ASTCENC_TYPE_U8 + of R_f16, RG_f16, RGB_f16, RGBA_f16: ASTCENC_TYPE_F16 + of R_f32, RG_f32, RGB_f32, RGBA_f32: ASTCENC_TYPE_F32 + else: raise ValueError.newException "Unsupported data type of " & $tex.format + let swizzle = case tex.format.channel_count: + # TODO: more optimal usage for 1/2 channels, normals, etc. + of 1: AstcencSwizzle(r: SWZ_R, g: SWZ_R, b: SWZ_R, a: SWZ_1) + of 2: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_0, a: SWZ_1) + of 3: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_1) + else: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_A) + let flags = 0'u32 + var config: AstcencConfig + # TODO: 3D texture block size + astc_assert astcenc_config_init(profile, + blk_size[0].uint32, blk_size[1].uint32, 1'u32, + quality, flags, config.addr) + # config.progress_callback = proc(p: float32) {.cdecl.} = + # echo "progress ", p + var ctx: ptr AstcencContext + astc_assert astcenc_context_alloc(config.addr, 1, ctx) + let time = getmonotime().ticks.float/1000000000 + proc compress(w, h: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} = + let buffer_size = block_file_size(w, h, 1, + config.block_x, config.block_y, config.block_z, 16) + let row_len = block_file_size(w, 1, 1, + config.block_x, config.block_y, config.block_z, 16) + let data = newArrRef[byte](buffer_size) + let pointers = [p] + let img = AstcencImage( + dim_x: w.u32, dim_y: h.u32, dim_z: 1.u32, + data_type: data_type, + data: pointers[0].addr, + ) + const thread_index = 0 # TODO + let err = astcenc_compress_image(ctx, img.addr, swizzle.addr, + data[0].addr, buffer_size.csize_t, thread_index) + assert err == ASTCENC_SUCCESS, "ASTC encoding error: " & $err + return (data, row_len) + + let (parts, data_refs) = make_mipmaps(tex, pixels, compress) + + let info = KtxInfo( + width: parts[0].width, height: parts[0].height, depth: tex.format_depth.int32, + num_layers: 1, num_mipmaps: parts.len.int32, has_alpha: tex.format.channel_count == 4, + is_sRGB: tex.is_sRGB, is_astc: true, + internal_format: get_ASTC_internal_format(blk_size, tex.is_sRGB) + ) + callback(tex, info, parts, data_refs) + astc_assert astcenc_compress_reset(ctx) + astcenc_context_free(ctx) proc loadOptimized*(tex: Texture, slices: seq[SliceMem[byte]], callback_uncompressed: CallbackUncompressed = nil, @@ -149,20 +200,53 @@ proc loadOptimized*(tex: Texture, slices: seq[SliceMem[byte]], var min_channels = min_channels var will_compress = myouEngineCompressTextures var will_load_uncompressed_first = false + var will_encode_all = false will_compress = will_compress and callback_compressed != nil and tex.format.component_type != UShort + if has_bptc_support: + # TODO: BC6H or RGBM + will_compress = will_compress and + tex.format.component_type == UByte + elif has_astc_support: + # TODO: detect HDR support + discard + elif will_encode_all: + will_load_uncompressed_first = true + else: + will_compress = false + if will_compress: min_channels = 4 - + tex.loadFileFromSlices(slices, proc(tex: Texture, pixels: SliceMem[byte]) {.gcsafe.} = if callback_uncompressed != nil and (will_load_uncompressed_first or not will_compress): callback_uncompressed(tex, pixels) if will_compress: - atsc_compress(tex, pixels, callback_compressed) + when defined(myouUseBC7Encoder): + let bc_format = if tex.format.channel_count == 1: + 4.int8 # BC4 is 0.5 bytes per pixel grayscale + else: + 7.int8 # BC7 is just the best at anything else + if has_bptc_support: + bcn_compress(tex, pixels, callback_compressed, bc_format) + if not will_encode_all: return + elif will_encode_all: + # TODO: callback to store result + discard + # bcn_compress(tex, pixels, ..., bc_format) + when defined(myouUseAstcEncoder): + if has_astc_support or will_encode_all: + atsc_compress(tex, pixels, callback_compressed) + if not will_encode_all: return + elif will_encode_all: + # TODO: callback to store result + discard + # atsc_compress(tex, pixels, ...) + , flip = flip, min_channels = min_channels) @@ -191,7 +275,10 @@ proc loadOptimizedThreaded*(tex: Texture, slices: seq[SliceMem[byte]], callback_compressed: CallbackCompressed = nil, flip = true, min_channels = 0) = - decode_chan.send((tex: tex, slices: slices, + when false: + loadOptimized(tex, slices, callback_uncompressed, callback_compressed, flip, min_channels) + else: + decode_chan.send((tex: tex, slices: slices, callback_uncompressed: callback_uncompressed, callback_compressed: callback_compressed, flip: flip, min_channels: min_channels))