myou-engine/src/gpu_formats/texture_optimize.nim

447 lines
19 KiB
Nim

# The contents of this file are subject to the Common Public Attribution License
# Version 1.0 (the “License”); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# https://myou.dev/licenses/LICENSE-CPAL. The License is based on the Mozilla
# Public License Version 1.1 but Sections 14 and 15 have been added to cover use
# of software over a computer network and provide for limited attribution for
# the Original Developer. In addition, Exhibit A has been modified to be
# consistent with Exhibit B.
#
# Software distributed under the License is distributed on an “AS IS” basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is Myou Engine.
#
# the Original Developer is the Initial Developer.
#
# The Initial Developer of the Original Code is the Myou Engine developers.
# All portions of the code written by the Myou Engine developers are Copyright
# (c) 2024. All Rights Reserved.
#
# Alternatively, the contents of this file may be used under the terms of the
# GNU Affero General Public License version 3 (the [AGPL-3] License), in which
# case the provisions of [AGPL-3] License are applicable instead of those above.
#
# If you wish to allow use of your version of this file only under the terms of
# the [AGPL-3] License and not to allow others to use your version of this file
# under the CPAL, indicate your decision by deleting the provisions above and
# replace them with the notice and other provisions required by the [AGPL-3]
# License. If you do not delete the provisions above, a recipient may use your
# version of this file under either the CPAL or the [AGPL-3] License.
## This module takes textures decoded from JPEG, PNG, etc. and encodes them in
## compressed GPU formats like BCn and ASTC. In order to use it:
##
## * Configure `Engine.CacheSettings`
## * Add one or both of these defines: `myouUseBC7Encoder`, `myouUseAstcEncoder`
##
## The resulting cache can be used in builds without encoders.
import ../types
import ./texture_decode
from dds_ktx import KtxInfo, KtxPart, KtxInfoParts, get_ASTC_internal_format
import arr_ref
import zstd/decompress
import stb_image_resize
import std/monotimes
import std/marshal
import std/bitops
import std/json
import std/uri
import std/os
# TODO: don't import it here
from ../platform/gl import nil
when defined(myouUseBC7Encoder):
import bc7enc
when defined(myouUseAstcEncoder):
import astc
when defined(myouUseBC7Encoder) or defined(myouUseAstcEncoder):
import zstd/compress
when defined(nimdoc):
type TYPES* = CacheSettings | EncodingSpeed | RgbBcFmt | BlockSize
when defined(android) or defined(ios) or defined(emscripten):
template has_bptc_support: bool = gl.GLAD_GL_EXT_texture_compression_bptc
else:
template has_bptc_support: bool = gl.GLAD_GL_ARB_texture_compression_bptc
template has_astc_support: bool = gl.GLAD_GL_OES_texture_compression_astc or
gl.GLAD_GL_KHR_texture_compression_astc_ldr
const myouEngineNumTextureThreads {.intdefine.} = 4
const myouBC7VerboseMode {.booldefine.} = false
template u32(x: untyped): uint32 = cast[uint32](x)
type CallbackUncompressed = proc(tex: Texture, data: SliceMem[byte]) {.gcsafe.}
type CallbackCompressed = proc(tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]]) {.gcsafe.}
type CompressMipmapResult = tuple[data: SliceMem[byte], row_len: int]
type CompressMipmap = proc(width, height: int32, p: pointer, len: int32): CompressMipmapResult {.closure.}
template block_file_size(width, height, depth, block_x, block_y, block_z, block_byte_size: untyped): int =
let blocks_x = (width.int + block_x.int - 1) div block_x.int
let blocks_y = (height.int + block_y.int - 1) div block_y.int
let blocks_z = (depth.int + block_z.int - 1) div block_z.int
blocks_x * blocks_y * blocks_z * block_byte_size.int
proc make_mipmaps(tex: Texture, pixels: SliceMem[byte], compress: CompressMipmap): (seq[KtxPart], seq[SliceMem[byte]]) =
var width = tex.width.int32
var height = tex.height.int32
let depth = tex.format_depth
assert depth == 1, "Compressed 2D arrays and 3D textures not supported yet"
assert tex.tex_type != TexCube, "Compressed cube textures not supported yet"
let stbir_data_type = case tex.format.component_type:
of UByte:
if tex.is_sRGB: STBIR_TYPE_UINT8_SRGB
else: STBIR_TYPE_UINT8
of HalfFloat: STBIR_TYPE_HALF_FLOAT
of Float: STBIR_TYPE_FLOAT
of UShort: STBIR_TYPE_UINT16
else: raise Defect.newException "unreachable"
# 4 because we're forcing 4 channels for the encoder
let pixel_stride = 4 * (tex.format.stride div tex.format.channel_count)
let time = getmonotime().ticks.float/1000000000
var parts: seq[KtxPart]
var data_refs: seq[SliceMem[byte]]
var mip_level = 0'i32
var w, prev_w = width
var h, prev_h = height
# round down to POT (TODO: make configurable)
w = 1'i32 shl fastLog2(prev_w)
h = 1'i32 shl fastLog2(prev_h)
let resize_level_0 = w != prev_w or h != prev_h
var current_level, last_level: ArrRef[byte]
var last_src: pointer = pixels.data
var last_len: int
while w >= 4 or h >= 4 or parts.len == 0:
(last_src, last_len) = if mip_level != 0 or resize_level_0:
current_level = newArrRef[byte](w*h*pixel_stride)
stbir_resize(
last_src, prev_w, prev_h, 0,
current_level.toPointer, w, h, 0,
StbirRgba, stbir_data_type,
STBIR_EDGE_CLAMP, # TODO: handle other cases
STBIR_FILTER_BOX,
)
last_level = current_level
(current_level.toPointer, current_level.byte_len.int)
else:
(pixels.toPointer, pixels.byte_len)
let (data, row_len) = compress(w, h, last_src, last_len.int32)
parts.add KtxPart(
width: w, height: h, data: data.toPointer,
len: data.byte_len, mip_level: mip_level,
row_len: row_len,
)
data_refs.add data
prev_w = w; prev_h = h
w = max(1, w shr 1)
h = max(1, h shr 1)
mip_level.inc
let time2 = getmonotime().ticks.float/1000000000
echo "time: ", time2-time, " ", tex.name
return (parts, data_refs)
when defined(myouUseBC7Encoder):
proc bcn_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, bc_format: int8, quality_speed: EncodingSpeed) =
when not defined(android):
let (bpp, internal_format) = get_bc_bpp_internal_format(bc_format, tex.is_sRGB)
let block_size_bytes = (4*4*bpp.int) div 8
let (bc1_quality, bc7_quality) = case quality_speed:
of UltraFast: (0,0)
of VeryFast: (5,1)
of Fast: (10,2)
of Basic: (15,3)
of Slow: (16,4)
of VerySlow: (17,5)
of Slowest: (18,6)
proc compress(w, h: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} =
var input = EncodeBcInput(
data: p, len: len,
width: w, height: h,
format: bc_format,
bc1_quality: bc1_quality.int8,
bc7_quality: bc7_quality.int8,
)
let out_len = block_file_size(w,h,1, 4,4,1, block_size_bytes)
let row_len = block_file_size(w,1,1, 4,4,1, block_size_bytes)
var data = newSliceMem[byte](out_len)
var output = EncodeBcOutput(
data: data.toPointer,
len: out_len.int32,
)
let err = encode_bc(input, output, myouBC7VerboseMode)
assert err == NO_ERROR, "bc7enc error: " & $err
return (data, row_len)
let (parts, data_refs) = make_mipmaps(tex, pixels, compress)
let info = KtxInfo(
width: parts[0].width, height: parts[0].height, depth: tex.format_depth.int32,
num_layers: 1, num_mipmaps: parts.len.int32, has_alpha: bc_format in [2,3,7],
is_sRGB: tex.is_sRGB, is_bc: true,
internal_format: internal_format,
)
callback(tex, KtxInfoParts(info: info, parts: parts), data_refs)
template first(bs: BlockSize): uint8 =
bs.uint8 shr 4'u8
template second(bs: BlockSize): uint8 =
bs.uint8 and 0xf'u8
when defined(myouUseAstcEncoder):
proc atsc_compress*(tex: Texture, pixels: SliceMem[byte], callback: CallbackCompressed, blk_size: BlockSize, speed: EncodingSpeed) =
let profile = case tex.format.component_type:
of UByte:
if tex.is_sRGB: ASTCENC_PRF_LDR_SRGB
else: ASTCENC_PRF_LDR
of HalfFloat: ASTCENC_PRF_HDR_RGB_LDR_A
of Float: ASTCENC_PRF_HDR_RGB_LDR_A
of UShort: ASTCENC_PRF_HDR_RGB_LDR_A
else: raise Defect.newException "unreachable"
let data_type = case tex.format:
of SRGB_u8, SRGB_Alpha_u8, R_u8, RG_u8, RGB_u8, RGBA_u8: ASTCENC_TYPE_U8
of R_f16, RG_f16, RGB_f16, RGBA_f16: ASTCENC_TYPE_F16
of R_f32, RG_f32, RGB_f32, RGBA_f32: ASTCENC_TYPE_F32
else: raise ValueError.newException "Unsupported data type of " & $tex.format
let swizzle = case tex.format.channel_count:
# TODO: more optimal usage for 1/2 channels, normals, etc.
of 1: AstcencSwizzle(r: SWZ_R, g: SWZ_R, b: SWZ_R, a: SWZ_1)
of 2: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_0, a: SWZ_1)
of 3: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_1)
else: AstcencSwizzle(r: SWZ_R, g: SWZ_G, b: SWZ_B, a: SWZ_A)
let flags = 0'u32
let quality = case speed:
of UltraFast: 0.0
of VeryFast: 10.0
of Fast: 30.0
of Basic: 60.0
of Slow: 98.0
of VerySlow: 99.0
of Slowest: 100.0
var config: AstcencConfig
# TODO: 3D texture block size
astc_assert astcenc_config_init(profile,
blk_size.first.uint32, blk_size.second.uint32, 1'u32,
quality, flags, config.addr)
# config.progress_callback = proc(p: float32) {.cdecl.} =
# echo "progress ", p
var ctx: ptr AstcencContext
astc_assert astcenc_context_alloc(config.addr, 1, ctx)
proc compress(w, h: int32, p: pointer, len: int32): CompressMipmapResult {.closure.} =
let buffer_size = block_file_size(w, h, 1,
config.block_x, config.block_y, config.block_z, 16)
let row_len = block_file_size(w, 1, 1,
config.block_x, config.block_y, config.block_z, 16)
let data = newSliceMem[byte](buffer_size)
let pointers = [p]
let img = AstcencImage(
dim_x: w.u32, dim_y: h.u32, dim_z: 1.u32,
data_type: data_type,
data: pointers[0].addr,
)
const thread_index = 0 # TODO
let err = astcenc_compress_image(ctx, img.addr, swizzle.addr,
data[0].addr, buffer_size.csize_t, thread_index)
assert err == ASTCENC_SUCCESS, "ASTC encoding error: " & $err
return (data, row_len)
let (parts, data_refs) = make_mipmaps(tex, pixels, compress)
let blk = (blk_size.first, blk_size.second)
let info = KtxInfo(
width: parts[0].width, height: parts[0].height, depth: tex.format_depth.int32,
num_layers: 1, num_mipmaps: parts.len.int32, has_alpha: tex.format.channel_count == 4,
is_sRGB: tex.is_sRGB, is_astc: true,
internal_format: get_ASTC_internal_format(blk, tex.is_sRGB)
)
callback(tex, KtxInfoParts(info: info, parts: parts), data_refs)
astc_assert astcenc_compress_reset(ctx)
astcenc_context_free(ctx)
func `%`(p: pointer): JsonNode = %0
proc loadOptimized*(tex: Texture, slices: seq[SliceMem[byte]],
callback_uncompressed: CallbackUncompressed = nil,
callback_compressed: CallbackCompressed = nil,
flip = true, min_channels = 0) {.gcsafe.} =
let settings = tex.engine.cache_settings
var min_channels = min_channels
var will_compress = settings.compress_textures
var will_load_uncompressed_first = false
var will_encode_all = settings.compress_all_formats
will_compress = will_compress and
callback_compressed != nil and
tex.format.component_type != UShort
var native_bc, native_astc = false
if has_bptc_support:
native_bc = true
# TODO: BC6H or RGBM
will_compress = will_compress and
tex.format.component_type == UByte
elif has_astc_support:
native_astc = true
# TODO: detect HDR support, use a fallback if unavailable
# TODO: USE A BETTER KEY, INCLUDE SETTINGS
let cache_key = tex.name
let cache_file_name = cache_key.encodeUrl & ".zst"
if settings.use_cache and callback_compressed != nil:
let cache_file = settings.cache_dir & "/" & cache_file_name
# TODO: allow networked requests
if fileExists cache_file:
try:
let slices = readFile(cache_file).decompress.toSliceMem.to(byte).deserialize
var data = to[KtxInfoParts](slices[^1].toString)
for i,p in data.parts.mpairs:
p.data = slices[i].toPointer
tex.callback_compressed(data, slices)
return
except:
# TODO: proper error handling and logging
echo "ERROR: could not load cache file for " & tex.name
if not (native_bc or native_astc):
if will_encode_all:
will_load_uncompressed_first = true
else:
will_compress = false
if will_compress:
min_channels = 4
tex.loadFileFromSlices(slices, proc(tex: Texture, pixels: SliceMem[byte]) {.gcsafe.} =
if callback_uncompressed != nil and
(will_load_uncompressed_first or not will_compress):
callback_uncompressed(tex, pixels)
if will_compress:
when defined(myouUseBC7Encoder) or defined(myouUseAstcEncoder):
proc cb(tex: Texture, data: KtxInfoParts,
refdata: seq[SliceMem[byte]]) {.gcsafe.} =
let info = data.info
if (info.is_bc and native_bc) or
(info.is_astc and native_astc):
callback_compressed(tex, data, refdata)
if settings.save_cache:
let dir = if info.is_bc: settings.cache_dir_bc
else: settings.cache_dir_astc
let cache_file = dir & "/" & cache_file_name
let outdata = refdata & @[($(%data)).toSliceMem.to(byte)]
writeFile cache_file, outdata.serialize.toOpenArray.compress
let channels = tex.format.channel_count
when defined(myouUseBC7Encoder):
if has_bptc_support or will_encode_all:
let bc_format = if channels == 1:
4.int8 # BC4
else:
settings.bc_format_for_RGB.int8 # BC1 or BC7
bcn_compress(tex, pixels, cb, bc_format, settings.quality_speed)
if not will_encode_all: return
when defined(myouUseAstcEncoder):
if has_astc_support or will_encode_all:
let blk_size = if channels == 1:
settings.astc_block_1ch
elif channels == 2:
settings.astc_block_2ch
else:
settings.astc_block_size
atsc_compress(tex, pixels, cb,
blk_size, settings.quality_speed)
if not will_encode_all: return
, flip = flip, min_channels = min_channels)
when compileOption("threads"):
# main -> thread channels
type DecodeChanMsg = tuple[
tex: Texture, slices: seq[SliceMem[byte]],
callback_uncompressed: CallbackUncompressed,
callback_compressed: CallbackCompressed,
flip: bool, min_channels: int,
]
var decode_chan: Channel[DecodeChanMsg]
# main <- thread channels
type DecodeReturnChanMsg = tuple[
callback: CallbackUncompressed,
tex: Texture, data: SliceMem[byte],
]
var decode_return_chan: Channel[DecodeReturnChanMsg]
type CompressedReturnChanMsg = tuple[
callback: CallbackCompressed,
tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]],
]
var compressed_return_chan: Channel[CompressedReturnChanMsg]
proc loadOptimizedThreaded*(tex: Texture, slices: seq[SliceMem[byte]],
callback_uncompressed: CallbackUncompressed = nil,
callback_compressed: CallbackCompressed = nil,
flip = true, min_channels = 0) =
when not compileOption("threads"):
loadOptimized(tex, slices, callback_uncompressed, callback_compressed, flip, min_channels)
else:
decode_chan.send((tex: tex, slices: slices,
callback_uncompressed: callback_uncompressed,
callback_compressed: callback_compressed,
flip: flip, min_channels: min_channels))
when compileOption("threads"):
var workers = newSeq[Thread[void]](myouEngineNumTextureThreads)
proc workerThreadProc() {.thread.} =
# TODO: handle errors
while true:
let to_decode = decode_chan.recv()
if to_decode.tex == nil:
break
proc cb(tex: Texture, data: SliceMem[byte]) =
decode_return_chan.send((callback: to_decode.callback_uncompressed, tex: tex, data: data))
proc cbc(tex: Texture, data: KtxInfoParts, refdata: seq[SliceMem[byte]]) =
compressed_return_chan.send((callback: to_decode.callback_compressed, tex: tex, data: data, refdata: refdata))
let cb_out = if to_decode.callback_uncompressed != nil: cb else: nil
let cbc_out = if to_decode.callback_compressed != nil: cbc else: nil
loadOptimized(to_decode.tex, to_decode.slices, cb_out, cbc_out,
to_decode.flip, to_decode.min_channels)
decode_chan.open()
decode_return_chan.open()
compressed_return_chan.open()
for worker in workers.mitems:
worker.createThread(workerThreadProc)
proc updateTextureWorkerThreads*() =
# TODO: handle errors
while true:
let tried = decode_return_chan.tryRecv()
if not tried.dataAvailable:
break
let (cb, tex, data) = tried.msg
cb(tex, data)
while true:
let tried = compressed_return_chan.tryRecv()
if not tried.dataAvailable:
break
let (cb, tex, data, refdata) = tried.msg
cb(tex, data, refdata)
proc terminateTextureWorkerThreads*() =
for worker in workers:
decode_chan.send(DecodeChanMsg.default)
for worker in workers:
worker.joinThread()