Struct WhisperQuantize

Source

pub struct WhisperQuantize;

Expand description

Model quantizer for converting Whisper models to different quantization formats

Implementations§

Source §

impl WhisperQuantize

Source

pub fn quantize_model_file<P: AsRef<Path>>( input_path: P, output_path: P, qtype: QuantizationType, ) -> Result<(), QuantizeError>

Quantize a model file to a specified quantization type

§Arguments

input_path - Path to the input model file (must be in GGML format)
output_path - Path where the quantized model will be saved
qtype - The quantization type to use

§Example

use whisper_cpp_plus::{WhisperQuantize, QuantizationType};

WhisperQuantize::quantize_model_file(
    "models/ggml-base.bin",
    "models/ggml-base-q5_0.bin",
    QuantizationType::Q5_0
).expect("Failed to quantize model");

Source

pub fn quantize_model_file_with_progress<P, F>( input_path: P, output_path: P, qtype: QuantizationType, callback: F, ) -> Result<(), QuantizeError>
where P: AsRef<Path>, F: Fn(f32) + Send + 'static,

Quantize a model file with progress callback

§Arguments

input_path - Path to the input model file
output_path - Path where the quantized model will be saved
qtype - The quantization type to use
callback - Progress callback function (receives values from 0.0 to 1.0)

§Example

use whisper_cpp_plus::{WhisperQuantize, QuantizationType};

WhisperQuantize::quantize_model_file_with_progress(
    "models/ggml-base.bin",
    "models/ggml-base-q4_0.bin",
    QuantizationType::Q4_0,
    |progress| {
        println!("Progress: {:.1}%", progress * 100.0);
    }
).expect("Failed to quantize model");

Source

pub fn get_model_quantization_type<P: AsRef<Path>>( model_path: P, ) -> Result<Option<QuantizationType>, QuantizeError>

Get the quantization type of an existing model file

§Returns

Ok(Some(qtype)) - The quantization type if the model is quantized
Ok(None) - If the model is in full precision (F32 or F16)
Err(_) - If the file cannot be read or is not a valid model

§Example

use whisper_cpp_plus::WhisperQuantize;

match WhisperQuantize::get_model_quantization_type("models/ggml-base-q5_0.bin") {
    Ok(Some(qtype)) => println!("Model is quantized as: {}", qtype),
    Ok(None) => println!("Model is not quantized"),
    Err(e) => println!("Error reading model: {}", e),
}

Source

pub fn estimate_quantized_size<P: AsRef<Path>>( model_path: P, qtype: QuantizationType, ) -> Result<u64, QuantizeError>

Estimate the size of a quantized model given the original model path and target quantization type

§Returns

Estimated size in bytes of the quantized model

§Example

use whisper_cpp_plus::{WhisperQuantize, QuantizationType};

let estimated_size = WhisperQuantize::estimate_quantized_size(
    "models/ggml-base.bin",
    QuantizationType::Q5_0
).unwrap_or(0);

println!("Estimated after Q5_0: {} MB", estimated_size / 1024 / 1024);