use crate::error::{CliError, Result};
use crate::output;
use aprender::format::{apr_convert, ConvertOptions, QuantizationType};
use humansize::{format_size, BINARY};
use std::path::Path;
#[derive(Debug, Clone, Copy)]
pub enum QuantScheme {
Int8,
Int4,
Fp16,
Q4K,
}
impl std::str::FromStr for QuantScheme {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"int8" | "i8" | "q8_0" => Ok(Self::Int8),
"int4" | "i4" | "q4_0" => Ok(Self::Int4),
"fp16" | "f16" | "half" => Ok(Self::Fp16),
"q4k" | "q4_k" | "q4_k_m" => Ok(Self::Q4K),
_ => Err(format!(
"Unknown quantization scheme: {s}. Supported: int8, int4, fp16, q4k"
)),
}
}
}
impl From<QuantScheme> for QuantizationType {
fn from(s: QuantScheme) -> Self {
match s {
QuantScheme::Int8 => Self::Int8,
QuantScheme::Int4 => Self::Int4,
QuantScheme::Fp16 => Self::Fp16,
QuantScheme::Q4K => Self::Q4K,
}
}
}
fn estimate_memory(file_size: u64, scheme: QuantScheme) -> (u64, u64, f64) {
let bits_per_weight: f64 = match scheme {
QuantScheme::Int8 => 8.0,
QuantScheme::Int4 => 4.0,
QuantScheme::Fp16 => 16.0,
QuantScheme::Q4K => 4.5,
};
let input_size = file_size;
let output_size = (file_size as f64 * bits_per_weight / 32.0) as u64;
let reduction = if output_size > 0 {
input_size as f64 / output_size as f64
} else {
1.0
};
(input_size, output_size, reduction)
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::disallowed_methods)]
pub(crate) fn run(
file: &Path,
scheme: &str,
output_path: Option<&Path>,
format: Option<&str>,
batch: Option<&str>,
plan_only: bool,
force: bool,
json_output: bool,
) -> Result<()> {
contract_pre_quantize!();
contract_pre_quantize_precision_bound!();
if !file.exists() {
return Err(CliError::FileNotFound(file.to_path_buf()));
}
let quant_scheme: QuantScheme = scheme.parse().map_err(CliError::ValidationFailed)?;
if let Some(schemes) = batch {
if plan_only {
return run_batch_plan(file, schemes, format, json_output);
}
let output_path = output_path.ok_or_else(|| {
CliError::ValidationFailed("--output is required for batch quantization".to_string())
})?;
return run_batch(file, schemes, output_path, force, json_output);
}
if plan_only {
return run_plan(file, quant_scheme, format, json_output);
}
let output_path = output_path.ok_or_else(|| {
CliError::ValidationFailed("--output is required (unless --plan is used)".to_string())
})?;
check_overwrite_protection(output_path, force)?;
print_quantize_header(file, quant_scheme, output_path, json_output);
let output_ext = resolve_output_format(format, output_path);
if output_ext == "gguf" {
return quantize_to_gguf(file, quant_scheme, output_path, json_output);
}
run_apr_quantize(file, quant_scheme, output_path, json_output)
}
fn check_overwrite_protection(output_path: &Path, force: bool) -> Result<()> {
if output_path.exists() && !force {
return Err(CliError::ValidationFailed(format!(
"Output file '{}' already exists. Use --force to overwrite.",
output_path.display()
)));
}
Ok(())
}
fn print_quantize_header(file: &Path, scheme: QuantScheme, output_path: &Path, json: bool) {
if !json {
output::header("APR Quantize");
println!(
"{}",
output::kv_table(&[
("Input", file.display().to_string()),
("Scheme", format!("{scheme:?}")),
("Output", output_path.display().to_string()),
])
);
println!();
}
}
fn resolve_output_format<'a>(format: Option<&'a str>, output_path: &'a Path) -> &'a str {
format.unwrap_or_else(|| {
output_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("apr")
})
}
#[allow(clippy::disallowed_methods)]
fn run_apr_quantize(
file: &Path,
scheme: QuantScheme,
output_path: &Path,
json_output: bool,
) -> Result<()> {
if !json_output {
output::pipeline_stage("Quantizing", output::StageStatus::Running);
}
let options = ConvertOptions {
quantize: Some(scheme.into()),
compress: None,
validate: true,
};
match apr_convert(file, output_path, options) {
Ok(report) => {
print_apr_quantize_result(file, output_path, scheme, &report, json_output);
Ok(())
}
Err(e) => {
if !json_output {
println!();
println!(" {}", output::badge_fail("Quantization failed"));
}
Err(CliError::ValidationFailed(e.to_string()))
}
}
}
#[allow(clippy::disallowed_methods)]
fn print_apr_quantize_result(
file: &Path,
output_path: &Path,
scheme: QuantScheme,
report: &aprender::format::ConvertReport,
json_output: bool,
) {
if json_output {
let json = serde_json::json!({
"status": "success",
"input": file.display().to_string(),
"output": output_path.display().to_string(),
"scheme": format!("{scheme:?}"),
"original_size": report.original_size,
"quantized_size": report.converted_size,
"reduction_ratio": report.reduction_ratio,
"tensor_count": report.tensor_count,
});
println!(
"{}",
serde_json::to_string_pretty(&json).unwrap_or_default()
);
} else {
println!();
output::subheader("Quantization Report");
println!(
"{}",
output::kv_table(&[
("Original size", format_size(report.original_size, BINARY)),
("Quantized size", format_size(report.converted_size, BINARY)),
("Tensors", output::count_fmt(report.tensor_count)),
(
"Reduction",
format!(
"{} ({:.2}x)",
report.reduction_percent(),
report.reduction_ratio
),
),
])
);
println!();
println!(" {}", output::badge_pass("Quantization successful"));
}
}
#[allow(clippy::disallowed_methods)]
fn run_plan(
file: &Path,
scheme: QuantScheme,
format: Option<&str>,
json_output: bool,
) -> Result<()> {
let file_size = std::fs::metadata(file)
.map_err(|e| CliError::ValidationFailed(format!("Cannot read model file: {e}")))?
.len();
let (input_size, output_size, reduction) = estimate_memory(file_size, scheme);
let output_format = format.unwrap_or("apr");
if json_output {
let json = serde_json::json!({
"plan": true,
"input": file.display().to_string(),
"input_size": input_size,
"estimated_output_size": output_size,
"reduction_ratio": reduction,
"scheme": format!("{scheme:?}"),
"output_format": output_format,
"peak_memory_estimate": input_size + output_size,
});
println!(
"{}",
serde_json::to_string_pretty(&json).unwrap_or_default()
);
} else {
output::header("APR Quantize — Plan");
println!(
"{}",
output::kv_table(&[
("Input", file.display().to_string()),
("Input size", format_size(input_size, BINARY)),
("Scheme", format!("{scheme:?}")),
("Output format", output_format.to_string()),
("Estimated output", format_size(output_size, BINARY)),
("Reduction", format!("{reduction:.2}x")),
("Peak memory", format_size(input_size + output_size, BINARY),),
])
);
println!();
println!(
" {} Run without --plan to execute quantization.",
output::badge_info("INFO")
);
}
Ok(())
}
#[allow(clippy::disallowed_methods)]
fn run_batch_plan(
file: &Path,
schemes: &str,
format: Option<&str>,
json_output: bool,
) -> Result<()> {
let scheme_list: Vec<&str> = schemes.split(',').map(str::trim).collect();
if scheme_list.is_empty() {
return Err(CliError::ValidationFailed(
"No quantization schemes specified for batch mode".to_string(),
));
}
let parsed: Vec<QuantScheme> = scheme_list
.iter()
.map(|s| s.parse::<QuantScheme>().map_err(CliError::ValidationFailed))
.collect::<Result<Vec<_>>>()?;
let file_size = std::fs::metadata(file)
.map_err(|e| CliError::ValidationFailed(format!("Cannot read model file: {e}")))?
.len();
let output_format = format.unwrap_or("apr");
if json_output {
let plans: Vec<serde_json::Value> = scheme_list
.iter()
.zip(parsed.iter())
.map(|(name, scheme)| {
let (input_size, output_size, reduction) = estimate_memory(file_size, *scheme);
serde_json::json!({
"scheme": name,
"input_size": input_size,
"estimated_output_size": output_size,
"reduction_ratio": reduction,
"peak_memory_estimate": input_size + output_size,
})
})
.collect();
let json = serde_json::json!({
"plan": true,
"batch": true,
"input": file.display().to_string(),
"output_format": output_format,
"schemes": plans,
});
println!(
"{}",
serde_json::to_string_pretty(&json).unwrap_or_default()
);
} else {
output::header("APR Quantize — Batch Plan");
println!(" Input: {}", file.display());
println!(" Input size: {}", format_size(file_size, BINARY));
println!(" Output format: {output_format}");
println!();
for (name, scheme) in scheme_list.iter().zip(parsed.iter()) {
let (_input_size, output_size, reduction) = estimate_memory(file_size, *scheme);
println!(
" {name:<8} → {} ({reduction:.2}x reduction, peak memory: {})",
format_size(output_size, BINARY),
format_size(file_size + output_size, BINARY),
);
}
println!();
println!(
" {} Run without --plan to execute batch quantization.",
output::badge_info("INFO")
);
}
Ok(())
}
fn run_batch(
file: &Path,
schemes: &str,
output_dir: &Path,
force: bool,
json_output: bool,
) -> Result<()> {
let scheme_list: Vec<&str> = schemes.split(',').map(str::trim).collect();
if scheme_list.is_empty() {
return Err(CliError::ValidationFailed(
"No quantization schemes specified for batch mode".to_string(),
));
}
let parsed: Vec<QuantScheme> = scheme_list
.iter()
.map(|s| s.parse::<QuantScheme>().map_err(CliError::ValidationFailed))
.collect::<Result<Vec<_>>>()?;
ensure_output_dir(output_dir)?;
let stem = file.file_stem().and_then(|s| s.to_str()).unwrap_or("model");
let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("apr");
for scheme_name in &scheme_list {
let output_file = output_dir.join(format!("{stem}-{scheme_name}.{ext}"));
check_overwrite_protection(&output_file, force)?;
}
print_batch_header(file, &scheme_list, output_dir, json_output);
let mut results = Vec::new();
for (scheme_name, scheme) in scheme_list.iter().zip(parsed.iter()) {
let output_file = output_dir.join(format!("{stem}-{scheme_name}.{ext}"));
let ok = run_batch_single(file, scheme_name, *scheme, &output_file, force, json_output)?;
results.push(ok);
}
print_batch_summary(&results, json_output);
Ok(())
}
fn ensure_output_dir(output_dir: &Path) -> Result<()> {
if !output_dir.exists() {
std::fs::create_dir_all(output_dir).map_err(|e| {
CliError::ValidationFailed(format!(
"Cannot create output directory '{}': {e}",
output_dir.display()
))
})?;
}
Ok(())
}
fn print_batch_header(file: &Path, schemes: &[&str], output_dir: &Path, json: bool) {
if !json {
output::header("APR Quantize — Batch");
println!(" Input: {}", file.display());
println!(" Schemes: {}", schemes.join(", "));
println!(" Output: {}/", output_dir.display());
println!();
}
}
fn run_batch_single(
file: &Path,
scheme_name: &str,
scheme: QuantScheme,
output_file: &Path,
force: bool,
json_output: bool,
) -> Result<bool> {
if !json_output {
output::pipeline_stage(
&format!("Quantizing {scheme_name}"),
output::StageStatus::Running,
);
}
let options = ConvertOptions {
quantize: Some(scheme.into()),
compress: None,
validate: true,
};
match apr_convert(file, output_file, options) {
Ok(report) => {
if !json_output {
println!(
" {} {} → {} ({:.2}x)",
output::badge_pass("OK"),
format_size(report.original_size, BINARY),
format_size(report.converted_size, BINARY),
report.reduction_ratio,
);
}
Ok(true)
}
Err(e) => {
if !json_output {
println!(" {} {e}", output::badge_fail("FAIL"));
}
if !force {
return Err(CliError::ValidationFailed(format!(
"Batch quantization failed at scheme {scheme_name}: {e}"
)));
}
Ok(false)
}
}
}
fn print_batch_summary(results: &[bool], json_output: bool) {
if !json_output {
println!();
let success_count = results.iter().filter(|ok| **ok).count();
println!(
" {} {}/{} quantizations completed",
if success_count == results.len() {
output::badge_pass("DONE")
} else {
output::badge_warn("PARTIAL")
},
success_count,
results.len()
);
}
}
fn quantize_to_gguf(
file: &Path,
scheme: QuantScheme,
output_path: &Path,
json_output: bool,
) -> Result<()> {
use aprender::format::{apr_export, ExportFormat, ExportOptions};
if !json_output {
output::pipeline_stage("Quantizing to GGUF", output::StageStatus::Running);
}
let options = ExportOptions {
format: ExportFormat::Gguf,
quantize: Some(scheme.into()),
include_tokenizer: false,
include_config: false,
..Default::default()
};
match apr_export(file, output_path, options) {
Ok(report) => {
if !json_output {
println!();
output::subheader("GGUF Quantization Report");
println!(
"{}",
output::kv_table(&[
("Original size", format_size(report.original_size, BINARY)),
("GGUF size", format_size(report.exported_size, BINARY)),
("Tensors", output::count_fmt(report.tensor_count)),
("Format", "GGUF".to_string()),
])
);
println!();
println!(" {}", output::badge_pass("GGUF quantization successful"));
}
Ok(())
}
Err(e) => {
if !json_output {
println!();
println!(" {}", output::badge_fail("GGUF quantization failed"));
}
Err(CliError::ValidationFailed(e.to_string()))
}
}
}
include!("quantize_quant_scheme.rs");