diff --git a/libs/dds_ktx/dds_ktx.nim b/libs/dds_ktx/dds_ktx.nim index 4c7532c..0fdbe04 100644 --- a/libs/dds_ktx/dds_ktx.nim +++ b/libs/dds_ktx/dds_ktx.nim @@ -17,6 +17,10 @@ type KtxPart* = object len*: int row_len*: int +type KtxInfoParts* = object + info*: KtxInfo + parts*: seq[KtxPart] + {.compile:("impl.c","-I.").} type ddsktx_format = enum @@ -252,7 +256,7 @@ proc ParseDdsKtx*(p: pointer, len: int): seq[KtxPart] = func get_ASTC_internal_format*(blk_size: (SomeInteger,SomeInteger), is_sRGB: bool): int32 = result = if is_sRGB: - case blk_size[0]*100 or blk_size[1]: + case (blk_size[0].int*100 or blk_size[1].int): of 04_04: GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR of 05_04: GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR of 05_05: GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR @@ -269,7 +273,7 @@ func get_ASTC_internal_format*(blk_size: (SomeInteger,SomeInteger), is_sRGB: boo of 12_12: GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR else: 0'i32 else: - case blk_size[0]*100 or blk_size[1]: + case (blk_size[0].int*100 or blk_size[1].int): of 04_04: GL_COMPRESSED_RGBA_ASTC_4x4_KHR of 05_04: GL_COMPRESSED_RGBA_ASTC_5x4_KHR of 05_05: GL_COMPRESSED_RGBA_ASTC_5x5_KHR diff --git a/src/gpu_formats/texture_optimize.nim b/src/gpu_formats/texture_optimize.nim index 5804894..27a1c13 100644 --- a/src/gpu_formats/texture_optimize.nim +++ b/src/gpu_formats/texture_optimize.nim @@ -1,11 +1,17 @@ import ../types import ./texture_decode -from dds_ktx import KtxInfo, KtxPart, get_ASTC_internal_format +from dds_ktx import KtxInfo, KtxPart, KtxInfoParts, get_ASTC_internal_format import arr_ref +import loadable +import zstd/decompress import stb_image_resize import std/monotimes +import std/marshal import std/bitops +import std/json +import std/uri +import std/os # TODO: don't import it here from ../platform/gl import nil @@ -14,6 +20,8 @@ when defined(myouUseBC7Encoder): import bc7enc when defined(myouUseAstcEncoder): import astc +when defined(myouUseBC7Encoder) or defined(myouUseAstcEncoder): + import zstd/compress when defined(android) or defined(ios) or defined(emscripten): template has_bptc_support: bool = gl.GLAD_GL_EXT_texture_compression_bptc else: @@ -22,14 +30,13 @@ template has_astc_support: bool = gl.GLAD_GL_OES_texture_compression_astc or gl.GLAD_GL_KHR_texture_compression_astc_ldr const myouEngineNumTextureThreads {.intdefine.} = 4 -const myouEngineCompressTextures {.booldefine.} = true -const myouBC7VerboseMode {.booldefine.} = true +const myouBC7VerboseMode {.booldefine.} = false template u32(x: untyped): uint32 = cast[uint32](x) type CallbackUncompressed = proc(tex: Texture, data: SliceMem[byte]) {.gcsafe.} -type CallbackCompressed = proc(tex: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]]) {.gcsafe.} -type CompressMipmapResult = tuple[data: ArrRef[byte], row_len: int] +type CallbackCompressed = proc(tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]]) {.gcsafe.} +type CompressMipmapResult = tuple[data: SliceMem[byte], row_len: int] type CompressMipmap = proc(width, height: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} template block_file_size(width, height, depth, block_x, block_y, block_z, block_byte_size: untyped): int = @@ -38,7 +45,7 @@ template block_file_size(width, height, depth, block_x, block_y, block_z, block_ let blocks_z = (depth.int + block_z.int - 1) div block_z.int blocks_x * blocks_y * blocks_z * block_byte_size.int -proc make_mipmaps(tex: Texture, pixels: SliceMem[byte], compress: CompressMipmap): (seq[KtxPart], seq[ArrRef[byte]]) = +proc make_mipmaps(tex: Texture, pixels: SliceMem[byte], compress: CompressMipmap): (seq[KtxPart], seq[SliceMem[byte]]) = var width = tex.width.int32 var height = tex.height.int32 let depth = tex.format_depth @@ -56,7 +63,7 @@ proc make_mipmaps(tex: Texture, pixels: SliceMem[byte], compress: CompressMipmap let pixel_stride = 4 * (tex.format.stride div tex.format.channel_count) let time = getmonotime().ticks.float/1000000000 var parts: seq[KtxPart] - var data_refs: seq[ArrRef[byte]] + var data_refs: seq[SliceMem[byte]] var mip_level = 0'i32 var w, prev_w = width var h, prev_h = height @@ -111,7 +118,7 @@ when defined(myouUseBC7Encoder): ) let out_len = block_file_size(w,h,1, 4,4,1, block_size_bytes) let row_len = block_file_size(w,1,1, 4,4,1, block_size_bytes) - var data = newArrRef[byte](out_len) + var data = newSliceMem[byte](out_len) var output = EncodeBcOutput( data: data.toPointer, len: out_len.int32, @@ -128,10 +135,16 @@ when defined(myouUseBC7Encoder): is_sRGB: tex.is_sRGB, is_bc: true, internal_format: internal_format, ) - callback(tex, info, parts, data_refs) + callback(tex, KtxInfoParts(info: info, parts: parts), data_refs) + +template first(bs: BlockSize): uint8 = + bs.uint8 shr 4'u8 + +template second(bs: BlockSize): uint8 = + bs.uint8 and 0xf'u8 when defined(myouUseAstcEncoder): - proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, blk_size = (6,6), quality = 10.0) = + proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, blk_size: BlockSize, speed: EncodingSpeed) = let profile = case tex.format.component_type: of UByte: if tex.is_sRGB: ASTCENC_PRF_LDR_SRGB @@ -152,10 +165,18 @@ when defined(myouUseAstcEncoder): of 3: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_1) else: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_A) let flags = 0'u32 + let quality = case speed: + of UltraFast: 0.0 + of VeryFast: 10.0 + of Fast: 30.0 + of Basic: 60.0 + of Slow: 98.0 + of VerySlow: 99.0 + of Slowest: 100.0 var config: AstcencConfig # TODO: 3D texture block size astc_assert astcenc_config_init(profile, - blk_size[0].uint32, blk_size[1].uint32, 1'u32, + blk_size.first.uint32, blk_size.second.uint32, 1'u32, quality, flags, config.addr) # config.progress_callback = proc(p: float32) {.cdecl.} = # echo "progress ", p @@ -167,7 +188,7 @@ when defined(myouUseAstcEncoder): config.block_x, config.block_y, config.block_z, 16) let row_len = block_file_size(w, 1, 1, config.block_x, config.block_y, config.block_z, 16) - let data = newArrRef[byte](buffer_size) + let data = newSliceMem[byte](buffer_size) let pointers = [p] let img = AstcencImage( dim_x: w.u32, dim_y: h.u32, dim_z: 1.u32, @@ -182,41 +203,67 @@ when defined(myouUseAstcEncoder): let (parts, data_refs) = make_mipmaps(tex, pixels, compress) + let blk = (blk_size.first, blk_size.second) let info = KtxInfo( width: parts[0].width, height: parts[0].height, depth: tex.format_depth.int32, num_layers: 1, num_mipmaps: parts.len.int32, has_alpha: tex.format.channel_count == 4, is_sRGB: tex.is_sRGB, is_astc: true, - internal_format: get_ASTC_internal_format(blk_size, tex.is_sRGB) + internal_format: get_ASTC_internal_format(blk, tex.is_sRGB) ) - callback(tex, info, parts, data_refs) + callback(tex, KtxInfoParts(info: info, parts: parts), data_refs) astc_assert astcenc_compress_reset(ctx) astcenc_context_free(ctx) +func `%`(p: pointer): JsonNode = %0 + proc loadOptimized*(tex: Texture, slices: seq[SliceMem[byte]], callback_uncompressed: CallbackUncompressed = nil, callback_compressed: CallbackCompressed = nil, flip = true, min_channels = 0) {.gcsafe.} = + let settings = tex.engine.cache_settings var min_channels = min_channels - var will_compress = myouEngineCompressTextures + var will_compress = settings.compress_textures var will_load_uncompressed_first = false - var will_encode_all = false + var will_encode_all = settings.compress_all_formats will_compress = will_compress and callback_compressed != nil and tex.format.component_type != UShort + var native_bc, native_astc = false if has_bptc_support: + native_bc = true # TODO: BC6H or RGBM will_compress = will_compress and tex.format.component_type == UByte elif has_astc_support: - # TODO: detect HDR support - discard - elif will_encode_all: - will_load_uncompressed_first = true - else: - will_compress = false + native_astc = true + # TODO: detect HDR support, use a fallback if unavailable + + # TODO: USE A BETTER KEY, INCLUDE SETTINGS + let cache_key = tex.name + let cache_file_name = cache_key.encodeUrl & ".zst" + if settings.use_cache and callback_compressed != nil: + let cache_file = settings.cache_dir & "/" & cache_file_name + # TODO: allow networked requests + if fileExists cache_file: + try: + let slices = readFile(cache_file).decompress.toSliceMem.to(byte).deserialize + var data = to[KtxInfoParts](slices[^1].toString) + for i,p in data.parts.mpairs: + p.data = slices[i].toPointer + tex.callback_compressed(data, slices) + return + except: + # TODO: proper error handling and logging + echo "ERROR: could not load cache file for " & tex.name + + if not (native_bc or native_astc): + if will_encode_all: + will_load_uncompressed_first = true + else: + will_compress = false if will_compress: min_channels = 4 @@ -226,26 +273,41 @@ proc loadOptimized*(tex: Texture, slices: seq[SliceMem[byte]], (will_load_uncompressed_first or not will_compress): callback_uncompressed(tex, pixels) if will_compress: + when defined(myouUseBC7Encoder) or defined(myouUseAstcEncoder): + proc cb(tex: Texture, data: KtxInfoParts, + refdata: seq[SliceMem[byte]]) {.gcsafe.} = + let info = data.info + if (info.is_bc and native_bc) or + (info.is_astc and native_astc): + callback_compressed(tex, data, refdata) + if settings.save_cache: + let dir = if info.is_bc: settings.cache_dir_bc + else: settings.cache_dir_astc + let cache_file = dir & "/" & cache_file_name + let outdata = refdata & @[($(%data)).toSliceMem.to(byte)] + writeFile cache_file, outdata.serialize.toOpenArray.compress + + let channels = tex.format.channel_count + when defined(myouUseBC7Encoder): - let bc_format = if tex.format.channel_count == 1: - 4.int8 # BC4 is 0.5 bytes per pixel grayscale - else: - 7.int8 # BC7 is just the best at anything else - if has_bptc_support: - bcn_compress(tex, pixels, callback_compressed, bc_format) + if has_bptc_support or will_encode_all: + let bc_format = if channels == 1: + 4.int8 # BC4 + else: + settings.bc_format_for_RGB.int8 # BC1 or BC7 + bcn_compress(tex, pixels, cb, bc_format) if not will_encode_all: return - elif will_encode_all: - # TODO: callback to store result - discard - # bcn_compress(tex, pixels, ..., bc_format) when defined(myouUseAstcEncoder): if has_astc_support or will_encode_all: - atsc_compress(tex, pixels, callback_compressed) + let blk_size = if channels == 1: + settings.astc_block_1ch + elif channels == 2: + settings.astc_block_2ch + else: + settings.astc_block_size + atsc_compress(tex, pixels, cb, + blk_size, settings.quality_speed) if not will_encode_all: return - elif will_encode_all: - # TODO: callback to store result - discard - # atsc_compress(tex, pixels, ...) , flip = flip, min_channels = min_channels) @@ -266,7 +328,7 @@ type DecodeReturnChanMsg = tuple[ var decode_return_chan: Channel[DecodeReturnChanMsg] type CompressedReturnChanMsg = tuple[ callback: CallbackCompressed, - tex: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]], + tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]], ] var compressed_return_chan: Channel[CompressedReturnChanMsg] @@ -292,9 +354,8 @@ proc workerThreadProc() {.thread.} = break proc cb(tex: Texture, data: SliceMem[byte]) = decode_return_chan.send((callback: to_decode.callback_uncompressed, tex: tex, data: data)) - proc cbc(tex: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]]) = - compressed_return_chan.send((callback: to_decode.callback_compressed, tex: tex, - info: info, data: data, refdata: refdata)) + proc cbc(tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]]) = + compressed_return_chan.send((callback: to_decode.callback_compressed, tex: tex, data: data, refdata: refdata)) let cb_out = if to_decode.callback_uncompressed != nil: cb else: nil let cbc_out = if to_decode.callback_compressed != nil: cbc else: nil loadOptimized(to_decode.tex, to_decode.slices, cb_out, cbc_out, @@ -318,8 +379,8 @@ proc updateTextureWorkerThreads*() = let tried = compressed_return_chan.tryRecv() if not tried.dataAvailable: break - let (cb, tex, info, data, refdata) = tried.msg - cb(tex, info, data, refdata) + let (cb, tex, data, refdata) = tried.msg + cb(tex, data, refdata) proc terminateTextureWorkerThreads*() = for worker in workers: diff --git a/src/graphics/texture.nim b/src/graphics/texture.nim index 561ee77..c4cedec 100644 --- a/src/graphics/texture.nim +++ b/src/graphics/texture.nim @@ -362,19 +362,19 @@ proc loadCubeSideFromPixels*(self: Texture, pixels: pointer, side: int32 = 0) = self.width.GLsizei, self.height.GLsizei, 0, ts.format, ts.gltype, pixels) -proc loadCompressedData*(self: Texture, info: KtxInfo, data: seq[KtxPart], refdata: seq[ArrRef[byte]]) {.gcsafe.} = - assert info.depth == 1 and info.num_layers == 1, +proc loadCompressedData*(self: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]]) {.gcsafe.} = + assert data.info.depth == 1 and data.info.num_layers == 1, "Compressed array and 3D textures not supported yet" let ts = self.storage self.loaded = true self.bind_it(needs_active_texture=true) let target = if self.tex_type == TexCube: GL_TEXTURE_CUBE_MAP_POSITIVE_X.GLuint.int32 else: ts.target.GLuint.int32 - for part in data: + for part in data.parts: glCompressedTexImage2D(cast[GLenum](target+part.face), part.mip_level, - info.internal_format.GLenum, part.width.GLsizei, part.height.GLsizei, + data.info.internal_format.GLenum, part.width.GLsizei, part.height.GLsizei, 0, part.len.GLsizei, part.data) - self.setMipmapRange(0, info.num_mipmaps - 1) + self.setMipmapRange(0, data.info.num_mipmaps - 1) proc setFilter*(self: Texture, filter: TextureFilter) = self.filter = filter @@ -485,7 +485,10 @@ proc newTexture*(engine: MyouEngine, name: string, file_name: string, is_sRGB: b engine.renderer.enqueue proc()= try: self.ensure_storage() - self.loadCompressedData(ktx_info, ParseDdsKtx(data.data, data.byte_len), @[]) + let info_parts = KtxInfoParts( + info: ktx_info, + parts: ParseDdsKtx(data.data, data.byte_len)) + self.loadCompressedData(info_parts, @[data]) self.loaded = true # except Exception as e: except: diff --git a/src/types.nim b/src/types.nim index 802bb1f..10ac33a 100644 --- a/src/types.nim +++ b/src/types.nim @@ -84,6 +84,7 @@ type use_glsl_tone_mapping*: bool loaders_by_ext*: Table[string, seq[proc(e: MyouEngine): Loader]] glsl_version*: string + cache_settings*: CacheSettings all_framebuffers*: seq[Framebuffer] ## private new_scenes*: Table[string, Scene] ## private @@ -762,6 +763,61 @@ type # on_destroy*: OnDestroy path_handler*: proc(path: string): string override_textures_sampler_type*: Table[string, string] + + # texture_optimize.nim + + EncodingSpeed* = enum + UltraFast + VeryFast + Fast + Basic + Slow + VerySlow + Slowest + + RgbBcFmt* = enum + BC1 = 1 ## 4 BPP (0.5 bytes per pixel), very small but low quality. + BC7 = 7 ## 8 BPP (1 byte per pixel), larger but very good quality. + + BlockSize* = enum + Bs4x4 = 0x4_4 ## 8.00 BPP + Bs5x4 = 0x5_4 ## 6.40 BPP + Bs5x5 = 0x5_5 ## 5.12 BPP + Bs6x5 = 0x6_5 ## 4.27 BPP + Bs6x6 = 0x6_6 ## 3.56 BPP + Bs8x5 = 0x8_5 ## 3.20 BPP + Bs8x6 = 0x8_6 ## 2.67 BPP + Bs8x8 = 0x8_8 ## 2.00 BPP (below 10x6) + Bs10x5 = 0xA_5 ## 2.56 BPP + Bs10x6 = 0xA_6 ## 2.13 BPP + Bs10x8 = 0xA_8 ## 1.60 BPP + Bs10x10 = 0xA_A ## 1.28 BPP + Bs12x10 = 0xC_A ## 1.07 BPP + Bs12x12 = 0xC_C ## 0.89 BPP + # TODO: 3D block sizes + + CacheSettings* = object + compress_textures*: bool ## Whether to compress textures on load + ## when not loaded from cache cache + use_cache*: bool ## Whether to try to load from cache + save_cache*: bool ## Whether to save compressed textures + cache_dir*: string ## Cache directory + quality_speed*: EncodingSpeed ## Whether you need them fast or good + bc_format_for_RGB*: RgbBcFmt ## Which BCn to use for RGB images. + ## BC7 is the best but BC1 is half the + ## size and encodes very fast + astc_block_size*: BlockSize ## Defines quality and size of ASTC. + ## 4x4 is the best but biggest, + ## 6x6 is a good balance, + ## 8x8 is bad but very small. + astc_block_1ch*: BlockSize ## Block size for 1 channel textures, + ## since less data has to be encoded + astc_block_2ch*: BlockSize ## Block size for 2 channel textures + + compress_all_formats*: bool ## Encode textures for all platforms + cache_dir_bc*: string ## Cache directory for writing bc + cache_dir_astc*: string ## Cache directory for writing astc + # INCOMPLETE