use super::read_file;
use crate::error::CliError;
use hedl_core::{parse, Document};
use hedl_json::{to_json_value, ToJsonConfig};
use hedl_xml::{to_xml as hedl_to_xml, ToXmlConfig};
use hedl_yaml::{to_yaml as hedl_to_yaml, ToYamlConfig};
use rayon::prelude::*;
use std::sync::Arc;
const CHARS_PER_CONTENT_TOKEN: usize = 4;
const WHITESPACE_PER_TOKEN: usize = 3;
fn estimate_tokens(text: &str) -> usize {
let chars = text.len();
let whitespace = text.chars().filter(|c| c.is_whitespace()).count();
let non_whitespace = chars - whitespace;
let content_tokens = non_whitespace / CHARS_PER_CONTENT_TOKEN;
let whitespace_tokens = whitespace / WHITESPACE_PER_TOKEN;
content_tokens + whitespace_tokens
}
#[derive(Debug, Clone)]
struct FormatStats {
json_compact: String,
json_pretty: String,
yaml: String,
xml_compact: String,
xml_pretty: String,
}
impl FormatStats {
fn compute_parallel(doc: &Document) -> Result<Self, CliError> {
let doc = Arc::new(doc.clone());
let tasks: Vec<Box<dyn Fn() -> Result<String, CliError> + Send + Sync>> = vec![
Box::new({
let doc = Arc::clone(&doc);
move || {
let config = ToJsonConfig::default();
let value = to_json_value(&doc, &config).map_err(|e| {
CliError::json_conversion(format!("JSON conversion error: {e}"))
})?;
serde_json::to_string(&value).map_err(|e| {
CliError::json_conversion(format!("JSON serialization error: {e}"))
})
}
}),
Box::new({
let doc = Arc::clone(&doc);
move || {
let config = ToJsonConfig::default();
let value = to_json_value(&doc, &config).map_err(|e| {
CliError::json_conversion(format!("JSON conversion error: {e}"))
})?;
serde_json::to_string_pretty(&value).map_err(|e| {
CliError::json_conversion(format!("JSON pretty serialization error: {e}"))
})
}
}),
Box::new({
let doc = Arc::clone(&doc);
move || {
let config = ToYamlConfig::default();
hedl_to_yaml(&doc, &config).map_err(|e| {
CliError::yaml_conversion(format!("YAML conversion error: {e}"))
})
}
}),
Box::new({
let doc = Arc::clone(&doc);
move || {
let config = ToXmlConfig {
pretty: false,
..Default::default()
};
hedl_to_xml(&doc, &config)
.map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))
}
}),
Box::new({
let doc = Arc::clone(&doc);
move || {
let config = ToXmlConfig {
pretty: true,
..Default::default()
};
hedl_to_xml(&doc, &config).map_err(|e| {
CliError::xml_conversion(format!("XML pretty conversion error: {e}"))
})
}
}),
];
let results: Result<Vec<String>, CliError> = tasks.par_iter().map(|task| task()).collect();
let outputs = results?;
if outputs.len() != 5 {
return Err(CliError::parse(format!(
"Internal error: expected 5 format conversions, got {}",
outputs.len()
)));
}
let mut iter = outputs.into_iter();
Ok(FormatStats {
json_compact: iter.next().expect("length verified"),
json_pretty: iter.next().expect("length verified"),
yaml: iter.next().expect("length verified"),
xml_compact: iter.next().expect("length verified"),
xml_pretty: iter.next().expect("length verified"),
})
}
}
pub fn stats(file: &str, show_tokens: bool) -> Result<(), CliError> {
let content = read_file(file)?;
let hedl_bytes = content.len();
let doc =
parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
let formats = FormatStats::compute_parallel(&doc)?;
let json_bytes = formats.json_compact.len();
let json_pretty_bytes = formats.json_pretty.len();
let yaml_bytes = formats.yaml.len();
let xml_bytes = formats.xml_compact.len();
let xml_pretty_bytes = formats.xml_pretty.len();
let calc_savings = |other: usize| -> (i64, f64) {
let diff = other as i64 - hedl_bytes as i64;
let pct = if other > 0 {
(diff as f64 / other as f64) * 100.0
} else {
0.0
};
(diff, pct)
};
println!("HEDL Size Comparison");
println!("====================");
println!();
println!("Input: {file}");
println!();
println!("Bytes:");
println!(
" {:<20} {:>10} {:>12} {:>10}",
"Format", "Size", "Savings", "%"
);
println!(" {:-<20} {:-^10} {:-^12} {:-^10}", "", "", "", "");
println!(" {:<20} {:>10}", "HEDL", format_bytes(hedl_bytes));
let (json_diff, json_pct) = calc_savings(json_bytes);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"JSON (minified)",
format_bytes(json_bytes),
format_diff(json_diff),
json_pct
);
let (json_pretty_diff, json_pretty_pct) = calc_savings(json_pretty_bytes);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"JSON (pretty)",
format_bytes(json_pretty_bytes),
format_diff(json_pretty_diff),
json_pretty_pct
);
let (yaml_diff, yaml_pct) = calc_savings(yaml_bytes);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"YAML",
format_bytes(yaml_bytes),
format_diff(yaml_diff),
yaml_pct
);
let (xml_diff, xml_pct) = calc_savings(xml_bytes);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"XML (minified)",
format_bytes(xml_bytes),
format_diff(xml_diff),
xml_pct
);
let (xml_pretty_diff, xml_pretty_pct) = calc_savings(xml_pretty_bytes);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"XML (pretty)",
format_bytes(xml_pretty_bytes),
format_diff(xml_pretty_diff),
xml_pretty_pct
);
if show_tokens {
println!();
println!("Estimated Tokens (LLM context):");
let texts = vec![
&content,
&formats.json_compact,
&formats.json_pretty,
&formats.yaml,
&formats.xml_compact,
&formats.xml_pretty,
];
let token_counts: Vec<usize> = texts.par_iter().map(|text| estimate_tokens(text)).collect();
let hedl_tokens = token_counts[0];
let json_tokens = token_counts[1];
let json_pretty_tokens = token_counts[2];
let yaml_tokens = token_counts[3];
let xml_tokens = token_counts[4];
let xml_pretty_tokens = token_counts[5];
let calc_token_savings = |other: usize| -> (i64, f64) {
let diff = other as i64 - hedl_tokens as i64;
let pct = if other > 0 {
(diff as f64 / other as f64) * 100.0
} else {
0.0
};
(diff, pct)
};
println!(
" {:<20} {:>10} {:>12} {:>10}",
"Format", "Tokens", "Savings", "%"
);
println!(" {:-<20} {:-^10} {:-^12} {:-^10}", "", "", "", "");
println!(" {:<20} {:>10}", "HEDL", format_number(hedl_tokens));
let (json_tok_diff, json_tok_pct) = calc_token_savings(json_tokens);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"JSON (minified)",
format_number(json_tokens),
format_diff(json_tok_diff),
json_tok_pct
);
let (json_pretty_tok_diff, json_pretty_tok_pct) = calc_token_savings(json_pretty_tokens);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"JSON (pretty)",
format_number(json_pretty_tokens),
format_diff(json_pretty_tok_diff),
json_pretty_tok_pct
);
let (yaml_tok_diff, yaml_tok_pct) = calc_token_savings(yaml_tokens);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"YAML",
format_number(yaml_tokens),
format_diff(yaml_tok_diff),
yaml_tok_pct
);
let (xml_tok_diff, xml_tok_pct) = calc_token_savings(xml_tokens);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"XML (minified)",
format_number(xml_tokens),
format_diff(xml_tok_diff),
xml_tok_pct
);
let (xml_pretty_tok_diff, xml_pretty_tok_pct) = calc_token_savings(xml_pretty_tokens);
println!(
" {:<20} {:>10} {:>12} {:>9.1}%",
"XML (pretty)",
format_number(xml_pretty_tokens),
format_diff(xml_pretty_tok_diff),
xml_pretty_tok_pct
);
println!();
println!("Note: Token estimates use ~4 chars/token heuristic for structured data.");
}
Ok(())
}
fn format_bytes(bytes: usize) -> String {
if bytes >= 1_000_000 {
format!("{:.1} MB", bytes as f64 / 1_000_000.0)
} else if bytes >= 1_000 {
format!("{:.1} KB", bytes as f64 / 1_000.0)
} else {
format!("{bytes} B")
}
}
fn format_number(n: usize) -> String {
if n >= 1_000_000 {
format!("{:.1}M", n as f64 / 1_000_000.0)
} else if n >= 1_000 {
format!("{:.1}K", n as f64 / 1_000.0)
} else {
format!("{n}")
}
}
fn format_diff(diff: i64) -> String {
if diff > 0 {
format!("+{}", format_number(diff as usize))
} else if diff < 0 {
format!("-{}", format_number((-diff) as usize))
} else {
"0".to_string()
}
}