Skip to main content

wolfxl_core/
format.rs

1//! Number-format detection and cell-value rendering.
2//!
3//! Mirrors the format-category logic that the PyO3 backend in `wolfxl` carries
4//! internally. Kept duplicated for now so wolfxl-core stays free of PyO3; the
5//! plan is to converge the two once the CLI is shipping.
6
7use crate::cell::{Cell, CellValue};
8
9/// Coarse classification of an Excel number format - what an agent needs to
10/// know to render a value sensibly.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum FormatCategory {
13    General,
14    Integer,
15    Float,
16    Percentage,
17    Currency,
18    Date,
19    Time,
20    DateTime,
21    Scientific,
22    Text,
23}
24
25impl FormatCategory {
26    pub fn as_str(self) -> &'static str {
27        match self {
28            FormatCategory::General => "general",
29            FormatCategory::Integer => "integer",
30            FormatCategory::Float => "float",
31            FormatCategory::Percentage => "percentage",
32            FormatCategory::Currency => "currency",
33            FormatCategory::Date => "date",
34            FormatCategory::Time => "time",
35            FormatCategory::DateTime => "datetime",
36            FormatCategory::Scientific => "scientific",
37            FormatCategory::Text => "text",
38        }
39    }
40}
41
42/// Classify an Excel number-format string. Best-effort heuristic; matches the
43/// categories the agent-facing tools care about (`peek`, `schema`).
44pub fn classify_format(fmt: &str) -> FormatCategory {
45    if fmt.is_empty() || fmt.eq_ignore_ascii_case("general") {
46        return FormatCategory::General;
47    }
48    if fmt == "@" {
49        return FormatCategory::Text;
50    }
51    // Currency markers (check raw fmt — `[$-409]` carries the locale tag).
52    if fmt.contains('$')
53        || fmt.contains('€')
54        || fmt.contains('£')
55        || fmt.contains('¥')
56        || fmt.contains("[$")
57    {
58        return FormatCategory::Currency;
59    }
60    // Strip `[...]` segments before the date/time substring scan: tags like
61    // `[Red]` or `[h]` contain `d` and `h` which would otherwise trigger the
62    // date / time heuristics on a plain numeric format such as
63    // `#,##0_);[Red](#,##0)`.
64    let stripped = strip_bracketed_tags(fmt);
65    if stripped.contains('%') {
66        return FormatCategory::Percentage;
67    }
68    if stripped.contains('E') && (stripped.contains("E+") || stripped.contains("E-")) {
69        return FormatCategory::Scientific;
70    }
71    let lower = stripped.to_ascii_lowercase();
72    let has_date = lower.contains('y') || lower.contains('d') || lower.contains("mmm");
73    let has_time = lower.contains('h') || lower.contains(":mm") || lower.contains(':');
74    match (has_date, has_time) {
75        (true, true) => FormatCategory::DateTime,
76        (true, false) => FormatCategory::Date,
77        (false, true) => FormatCategory::Time,
78        _ => {
79            if stripped.contains('.') {
80                FormatCategory::Float
81            } else if stripped.chars().any(|c| c == '0' || c == '#') {
82                FormatCategory::Integer
83            } else {
84                FormatCategory::General
85            }
86        }
87    }
88}
89
90/// Remove `[...]` segments from an Excel format code so substring-based
91/// scans don't get tripped up by characters inside color/locale tags.
92fn strip_bracketed_tags(fmt: &str) -> String {
93    let mut out = String::with_capacity(fmt.len());
94    let mut depth = 0usize;
95    for ch in fmt.chars() {
96        match ch {
97            '[' => depth += 1,
98            ']' if depth > 0 => depth -= 1,
99            _ if depth == 0 => out.push(ch),
100            _ => {}
101        }
102    }
103    out
104}
105
106/// Render a [`Cell`] for human/agent display, respecting its number format.
107///
108/// This is intentionally lossy in places where Excel's full format string is
109/// richer than what an agent needs. The goal: a sensible default that beats
110/// raw `Display` of the underlying value.
111pub fn format_cell(cell: &Cell) -> String {
112    let number_format = cell.number_format.as_deref();
113    let category = cell
114        .number_format
115        .as_deref()
116        .map(classify_format)
117        .unwrap_or(FormatCategory::General);
118
119    match (&cell.value, category) {
120        (CellValue::Empty, _) => String::new(),
121        (CellValue::String(s), _) => s.clone(),
122        (CellValue::Bool(b), _) => if *b { "TRUE" } else { "FALSE" }.to_string(),
123        (CellValue::Error(e), _) => e.clone(),
124        (CellValue::Date(d), _) => d.format("%Y-%m-%d").to_string(),
125        (CellValue::DateTime(dt), _) => dt.format("%Y-%m-%d %H:%M:%S").to_string(),
126        (CellValue::Time(t), _) => t.format("%H:%M:%S").to_string(),
127
128        (CellValue::Int(n), FormatCategory::Currency) => {
129            format_currency(*n as f64, 2, currency_symbol(number_format))
130        }
131        (CellValue::Float(n), FormatCategory::Currency) => {
132            format_currency(*n, 2, currency_symbol(number_format))
133        }
134
135        (CellValue::Int(n), FormatCategory::Percentage) => format_percentage(*n as f64, 1),
136        (CellValue::Float(n), FormatCategory::Percentage) => format_percentage(*n, 1),
137
138        (CellValue::Int(n), _) => format_with_grouping(*n),
139        (CellValue::Float(n), FormatCategory::Integer) => format_with_grouping(n.round() as i64),
140        (CellValue::Float(n), FormatCategory::Scientific) => format!("{:.4E}", n),
141        (CellValue::Float(n), _) => trim_float(*n),
142    }
143}
144
145fn currency_symbol(fmt: Option<&str>) -> &'static str {
146    let Some(fmt) = fmt else {
147        return "$";
148    };
149    if fmt.contains('€') {
150        "€"
151    } else if fmt.contains('£') {
152        "£"
153    } else if fmt.contains('¥') {
154        "¥"
155    } else {
156        "$"
157    }
158}
159
160fn format_currency(value: f64, decimals: usize, symbol: &str) -> String {
161    // Round once on a single scaled integer so 1.995 carries to 2.00, not 1.100.
162    // Splitting `trunc()` and `fract()` separately drops the carry.
163    let sign = if value < 0.0 { "-" } else { "" };
164    let scale = 10u64.pow(decimals as u32);
165    let scaled = (value.abs() * scale as f64).round() as u64;
166    let whole = scaled / scale;
167    let frac = scaled % scale;
168    format!(
169        "{}{}{}.{:0width$}",
170        sign,
171        symbol,
172        group_thousands(whole),
173        frac,
174        width = decimals
175    )
176}
177
178fn format_percentage(value: f64, decimals: usize) -> String {
179    format!("{:.*}%", decimals, value * 100.0)
180}
181
182fn format_with_grouping(value: i64) -> String {
183    if value < 0 {
184        format!("-{}", group_thousands(value.unsigned_abs()))
185    } else {
186        group_thousands(value as u64)
187    }
188}
189
190fn group_thousands(mut n: u64) -> String {
191    if n == 0 {
192        return "0".to_string();
193    }
194    let mut parts: Vec<String> = Vec::new();
195    while n > 0 {
196        let chunk = n % 1000;
197        n /= 1000;
198        if n > 0 {
199            parts.push(format!("{:03}", chunk));
200        } else {
201            parts.push(chunk.to_string());
202        }
203    }
204    parts.reverse();
205    parts.join(",")
206}
207
208fn trim_float(n: f64) -> String {
209    if n.fract() == 0.0 && n.abs() < 1e15 {
210        format!("{:.1}", n)
211    } else {
212        let s = format!("{:.6}", n);
213        s.trim_end_matches('0').trim_end_matches('.').to_string()
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn classify_known_formats() {
223        assert_eq!(classify_format(""), FormatCategory::General);
224        assert_eq!(classify_format("General"), FormatCategory::General);
225        assert_eq!(classify_format("@"), FormatCategory::Text);
226        assert_eq!(classify_format("0%"), FormatCategory::Percentage);
227        assert_eq!(classify_format("0.00%"), FormatCategory::Percentage);
228        assert_eq!(classify_format("$#,##0.00"), FormatCategory::Currency);
229        assert_eq!(classify_format("[$-409]#,##0.00"), FormatCategory::Currency);
230        assert_eq!(classify_format("yyyy-mm-dd"), FormatCategory::Date);
231        assert_eq!(classify_format("h:mm:ss"), FormatCategory::Time);
232        assert_eq!(classify_format("yyyy-mm-dd h:mm"), FormatCategory::DateTime);
233        assert_eq!(classify_format("0.00E+00"), FormatCategory::Scientific);
234        assert_eq!(classify_format("0.00"), FormatCategory::Float);
235        assert_eq!(classify_format("#,##0"), FormatCategory::Integer);
236    }
237
238    #[test]
239    fn bracketed_tags_dont_trigger_date_or_time_heuristic() {
240        // `[Red]` contains `d`, which previously misclassified the format
241        // as Date. `[h]:mm:ss` legitimately encodes elapsed-hour time and
242        // should still classify as Time, but any non-time `[...]` tag
243        // should be ignored by the date/time scan.
244        assert_eq!(
245            classify_format("#,##0_);[Red](#,##0)"),
246            FormatCategory::Integer
247        );
248        assert_eq!(classify_format("0.00;[Red]-0.00"), FormatCategory::Float);
249        assert_eq!(classify_format("[h]:mm:ss"), FormatCategory::Time);
250    }
251
252    #[test]
253    fn currency_render() {
254        let cell = Cell {
255            value: CellValue::Float(1234567.5),
256            number_format: Some("$#,##0.00".into()),
257        };
258        assert_eq!(format_cell(&cell), "$1,234,567.50");
259
260        let neg = Cell {
261            value: CellValue::Float(-42.0),
262            number_format: Some("$#,##0.00".into()),
263        };
264        assert_eq!(format_cell(&neg), "-$42.00");
265    }
266
267    #[test]
268    fn currency_render_preserves_common_symbols() {
269        let euro = Cell {
270            value: CellValue::Float(1234.5),
271            number_format: Some("€#,##0.00".into()),
272        };
273        assert_eq!(format_cell(&euro), "€1,234.50");
274
275        let pound = Cell {
276            value: CellValue::Float(-42.0),
277            number_format: Some("[$£-809]#,##0.00".into()),
278        };
279        assert_eq!(format_cell(&pound), "-£42.00");
280
281        let yen = Cell {
282            value: CellValue::Int(5000),
283            number_format: Some("¥#,##0".into()),
284        };
285        assert_eq!(format_cell(&yen), "¥5,000.00");
286    }
287
288    #[test]
289    fn currency_handles_carry_on_rounding() {
290        // Pre-fix: 1.995 → "$1.100" because frac rounded to 100 without carrying.
291        let cell = Cell {
292            value: CellValue::Float(1.995),
293            number_format: Some("$#,##0.00".into()),
294        };
295        assert_eq!(format_cell(&cell), "$2.00");
296
297        // Carry across the thousands boundary too.
298        let cell = Cell {
299            value: CellValue::Float(999.999),
300            number_format: Some("$#,##0.00".into()),
301        };
302        assert_eq!(format_cell(&cell), "$1,000.00");
303    }
304
305    #[test]
306    fn percentage_render() {
307        let cell = Cell {
308            value: CellValue::Float(0.234),
309            number_format: Some("0.0%".into()),
310        };
311        assert_eq!(format_cell(&cell), "23.4%");
312    }
313
314    #[test]
315    fn integer_grouping() {
316        let cell = Cell {
317            value: CellValue::Int(1234567),
318            number_format: None,
319        };
320        assert_eq!(format_cell(&cell), "1,234,567");
321    }
322
323    #[test]
324    fn empty_cell_renders_blank() {
325        assert_eq!(format_cell(&Cell::empty()), "");
326    }
327}