pub fn model_quantize(
fname_inp: &str,
fname_out: &str,
params: &QuantizeParams,
) -> Result<(), u32>Expand description
Quantize a model file using typed [QuantizeParams].
Returns Ok(()) on success, or Err(code) with the non-zero error code
returned by llama_model_quantize.
§Panics
Panics if either path contains an interior null byte.
§Example
use llama_cpp_4::quantize::{LlamaFtype, QuantizeParams};
let params = QuantizeParams::new(LlamaFtype::MostlyQ4KM)
.with_nthread(8)
.with_quantize_output_tensor(true);
llama_cpp_4::model_quantize("model-f16.gguf", "model-q4km.gguf", ¶ms).unwrap();