1use super::read_file;
35use crate::error::CliError;
36use hedl_core::{parse, Document};
37use hedl_json::{to_json_value, ToJsonConfig};
38use hedl_xml::{to_xml as hedl_to_xml, ToXmlConfig};
39use hedl_yaml::{to_yaml as hedl_to_yaml, ToYamlConfig};
40use rayon::prelude::*;
41use std::sync::Arc;
42
43const CHARS_PER_CONTENT_TOKEN: usize = 4;
46const WHITESPACE_PER_TOKEN: usize = 3;
47
48fn estimate_tokens(text: &str) -> usize {
51 let chars = text.len();
57 let whitespace = text.chars().filter(|c| c.is_whitespace()).count();
58 let non_whitespace = chars - whitespace;
59
60 let content_tokens = non_whitespace / CHARS_PER_CONTENT_TOKEN;
63 let whitespace_tokens = whitespace / WHITESPACE_PER_TOKEN;
64
65 content_tokens + whitespace_tokens
66}
67
68#[derive(Debug, Clone)]
70struct FormatStats {
71 json_compact: String,
72 json_pretty: String,
73 yaml: String,
74 xml_compact: String,
75 xml_pretty: String,
76}
77
78impl FormatStats {
79 fn compute_parallel(doc: &Document) -> Result<Self, CliError> {
101 let doc = Arc::new(doc.clone());
103
104 let tasks: Vec<Box<dyn Fn() -> Result<String, CliError> + Send + Sync>> = vec![
106 Box::new({
108 let doc = Arc::clone(&doc);
109 move || {
110 let config = ToJsonConfig::default();
111 let value = to_json_value(&doc, &config).map_err(|e| {
112 CliError::json_conversion(format!("JSON conversion error: {e}"))
113 })?;
114 serde_json::to_string(&value).map_err(|e| {
115 CliError::json_conversion(format!("JSON serialization error: {e}"))
116 })
117 }
118 }),
119 Box::new({
121 let doc = Arc::clone(&doc);
122 move || {
123 let config = ToJsonConfig::default();
124 let value = to_json_value(&doc, &config).map_err(|e| {
125 CliError::json_conversion(format!("JSON conversion error: {e}"))
126 })?;
127 serde_json::to_string_pretty(&value).map_err(|e| {
128 CliError::json_conversion(format!("JSON pretty serialization error: {e}"))
129 })
130 }
131 }),
132 Box::new({
134 let doc = Arc::clone(&doc);
135 move || {
136 let config = ToYamlConfig::default();
137 hedl_to_yaml(&doc, &config).map_err(|e| {
138 CliError::yaml_conversion(format!("YAML conversion error: {e}"))
139 })
140 }
141 }),
142 Box::new({
144 let doc = Arc::clone(&doc);
145 move || {
146 let config = ToXmlConfig {
147 pretty: false,
148 ..Default::default()
149 };
150 hedl_to_xml(&doc, &config)
151 .map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))
152 }
153 }),
154 Box::new({
156 let doc = Arc::clone(&doc);
157 move || {
158 let config = ToXmlConfig {
159 pretty: true,
160 ..Default::default()
161 };
162 hedl_to_xml(&doc, &config).map_err(|e| {
163 CliError::xml_conversion(format!("XML pretty conversion error: {e}"))
164 })
165 }
166 }),
167 ];
168
169 let results: Result<Vec<String>, CliError> = tasks.par_iter().map(|task| task()).collect();
171
172 let outputs = results?;
173
174 if outputs.len() != 5 {
177 return Err(CliError::parse(format!(
178 "Internal error: expected 5 format conversions, got {}",
179 outputs.len()
180 )));
181 }
182
183 let mut iter = outputs.into_iter();
185 Ok(FormatStats {
186 json_compact: iter.next().expect("length verified"),
188 json_pretty: iter.next().expect("length verified"),
189 yaml: iter.next().expect("length verified"),
190 xml_compact: iter.next().expect("length verified"),
191 xml_pretty: iter.next().expect("length verified"),
192 })
193 }
194}
195
196pub fn stats(file: &str, show_tokens: bool) -> Result<(), CliError> {
253 let content = read_file(file)?;
254 let hedl_bytes = content.len();
255
256 let doc =
258 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
259
260 let formats = FormatStats::compute_parallel(&doc)?;
262
263 let json_bytes = formats.json_compact.len();
265 let json_pretty_bytes = formats.json_pretty.len();
266 let yaml_bytes = formats.yaml.len();
267 let xml_bytes = formats.xml_compact.len();
268 let xml_pretty_bytes = formats.xml_pretty.len();
269
270 let calc_savings = |other: usize| -> (i64, f64) {
272 let diff = other as i64 - hedl_bytes as i64;
273 let pct = if other > 0 {
274 (diff as f64 / other as f64) * 100.0
275 } else {
276 0.0
277 };
278 (diff, pct)
279 };
280
281 println!("HEDL Size Comparison");
282 println!("====================");
283 println!();
284 println!("Input: {file}");
285 println!();
286
287 println!("Bytes:");
289 println!(
290 " {:<20} {:>10} {:>12} {:>10}",
291 "Format", "Size", "Savings", "%"
292 );
293 println!(" {:-<20} {:-^10} {:-^12} {:-^10}", "", "", "", "");
294
295 println!(" {:<20} {:>10}", "HEDL", format_bytes(hedl_bytes));
296
297 let (json_diff, json_pct) = calc_savings(json_bytes);
298 println!(
299 " {:<20} {:>10} {:>12} {:>9.1}%",
300 "JSON (minified)",
301 format_bytes(json_bytes),
302 format_diff(json_diff),
303 json_pct
304 );
305
306 let (json_pretty_diff, json_pretty_pct) = calc_savings(json_pretty_bytes);
307 println!(
308 " {:<20} {:>10} {:>12} {:>9.1}%",
309 "JSON (pretty)",
310 format_bytes(json_pretty_bytes),
311 format_diff(json_pretty_diff),
312 json_pretty_pct
313 );
314
315 let (yaml_diff, yaml_pct) = calc_savings(yaml_bytes);
316 println!(
317 " {:<20} {:>10} {:>12} {:>9.1}%",
318 "YAML",
319 format_bytes(yaml_bytes),
320 format_diff(yaml_diff),
321 yaml_pct
322 );
323
324 let (xml_diff, xml_pct) = calc_savings(xml_bytes);
325 println!(
326 " {:<20} {:>10} {:>12} {:>9.1}%",
327 "XML (minified)",
328 format_bytes(xml_bytes),
329 format_diff(xml_diff),
330 xml_pct
331 );
332
333 let (xml_pretty_diff, xml_pretty_pct) = calc_savings(xml_pretty_bytes);
334 println!(
335 " {:<20} {:>10} {:>12} {:>9.1}%",
336 "XML (pretty)",
337 format_bytes(xml_pretty_bytes),
338 format_diff(xml_pretty_diff),
339 xml_pretty_pct
340 );
341
342 if show_tokens {
344 println!();
345 println!("Estimated Tokens (LLM context):");
346
347 let texts = vec![
349 &content,
350 &formats.json_compact,
351 &formats.json_pretty,
352 &formats.yaml,
353 &formats.xml_compact,
354 &formats.xml_pretty,
355 ];
356
357 let token_counts: Vec<usize> = texts.par_iter().map(|text| estimate_tokens(text)).collect();
358
359 let hedl_tokens = token_counts[0];
360 let json_tokens = token_counts[1];
361 let json_pretty_tokens = token_counts[2];
362 let yaml_tokens = token_counts[3];
363 let xml_tokens = token_counts[4];
364 let xml_pretty_tokens = token_counts[5];
365
366 let calc_token_savings = |other: usize| -> (i64, f64) {
367 let diff = other as i64 - hedl_tokens as i64;
368 let pct = if other > 0 {
369 (diff as f64 / other as f64) * 100.0
370 } else {
371 0.0
372 };
373 (diff, pct)
374 };
375
376 println!(
377 " {:<20} {:>10} {:>12} {:>10}",
378 "Format", "Tokens", "Savings", "%"
379 );
380 println!(" {:-<20} {:-^10} {:-^12} {:-^10}", "", "", "", "");
381
382 println!(" {:<20} {:>10}", "HEDL", format_number(hedl_tokens));
383
384 let (json_tok_diff, json_tok_pct) = calc_token_savings(json_tokens);
385 println!(
386 " {:<20} {:>10} {:>12} {:>9.1}%",
387 "JSON (minified)",
388 format_number(json_tokens),
389 format_diff(json_tok_diff),
390 json_tok_pct
391 );
392
393 let (json_pretty_tok_diff, json_pretty_tok_pct) = calc_token_savings(json_pretty_tokens);
394 println!(
395 " {:<20} {:>10} {:>12} {:>9.1}%",
396 "JSON (pretty)",
397 format_number(json_pretty_tokens),
398 format_diff(json_pretty_tok_diff),
399 json_pretty_tok_pct
400 );
401
402 let (yaml_tok_diff, yaml_tok_pct) = calc_token_savings(yaml_tokens);
403 println!(
404 " {:<20} {:>10} {:>12} {:>9.1}%",
405 "YAML",
406 format_number(yaml_tokens),
407 format_diff(yaml_tok_diff),
408 yaml_tok_pct
409 );
410
411 let (xml_tok_diff, xml_tok_pct) = calc_token_savings(xml_tokens);
412 println!(
413 " {:<20} {:>10} {:>12} {:>9.1}%",
414 "XML (minified)",
415 format_number(xml_tokens),
416 format_diff(xml_tok_diff),
417 xml_tok_pct
418 );
419
420 let (xml_pretty_tok_diff, xml_pretty_tok_pct) = calc_token_savings(xml_pretty_tokens);
421 println!(
422 " {:<20} {:>10} {:>12} {:>9.1}%",
423 "XML (pretty)",
424 format_number(xml_pretty_tokens),
425 format_diff(xml_pretty_tok_diff),
426 xml_pretty_tok_pct
427 );
428
429 println!();
430 println!("Note: Token estimates use ~4 chars/token heuristic for structured data.");
431 }
432
433 Ok(())
434}
435
436fn format_bytes(bytes: usize) -> String {
437 if bytes >= 1_000_000 {
438 format!("{:.1} MB", bytes as f64 / 1_000_000.0)
439 } else if bytes >= 1_000 {
440 format!("{:.1} KB", bytes as f64 / 1_000.0)
441 } else {
442 format!("{bytes} B")
443 }
444}
445
446fn format_number(n: usize) -> String {
447 if n >= 1_000_000 {
448 format!("{:.1}M", n as f64 / 1_000_000.0)
449 } else if n >= 1_000 {
450 format!("{:.1}K", n as f64 / 1_000.0)
451 } else {
452 format!("{n}")
453 }
454}
455
456fn format_diff(diff: i64) -> String {
457 if diff > 0 {
458 format!("+{}", format_number(diff as usize))
459 } else if diff < 0 {
460 format!("-{}", format_number((-diff) as usize))
461 } else {
462 "0".to_string()
463 }
464}