office_oxide 0.1.2

The fastest Office document processing library — DOCX, XLSX, PPTX, DOC, XLS, PPT
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
//! Excel number format rendering.
//!
//! Applies a numeric format string (or built-in format ID) to an f64 value
//! and returns the display string. Covers the cases that matter in practice:
//! integers, fixed decimals, thousands separators, percentages, currency,
//! and scientific notation. Complex conditions/colors are stripped gracefully.

/// Apply an Excel number format to a numeric value.
pub fn apply_format(n: f64, fmt_id: u32, fmt_str: Option<&str>) -> String {
    if n.is_nan() {
        return "NaN".to_string();
    }
    if n.is_infinite() {
        return if n < 0.0 {
            "-Infinity".to_string()
        } else {
            "Infinity".to_string()
        };
    }

    // Built-in format IDs per OOXML spec §18.8.30.
    match fmt_id {
        0 | 49 => return format_general(n),         // General / @
        1 => return format_integer(n),              // 0
        2 => return format_fixed(n, 2),             // 0.00
        3 => return format_commas(n, 0),            // #,##0
        4 => return format_commas(n, 2),            // #,##0.00
        5 | 6 => return format_currency(n, "$", 0), // $#,##0
        7 | 8 => return format_currency(n, "$", 2), // $#,##0.00
        9 => return format_percent(n, 0),           // 0%
        10 => return format_percent(n, 2),          // 0.00%
        11 => return format_scientific(n),          // 0.00E+00
        12 => return format_general(n),             // # ?/? (fractions — approx)
        13 => return format_general(n),             // # ??/??
        37 | 38 => return format_commas(n, 0),      // #,##0 accounting variants
        39 | 40 => return format_commas(n, 2),      // #,##0.00 accounting variants
        41..=44 => return format_commas(n, 2),      // _(* ...) accounting
        _ => {},
    }

    // Custom format string (IDs 164+).
    if let Some(fmt) = fmt_str {
        let fmt = fmt.trim();
        if !fmt.is_empty() && fmt != "General" && fmt != "@" {
            return apply_custom(n, fmt);
        }
    }

    format_general(n)
}

// ── Simple format primitives ───────────────────────────────────────────────

/// Format a number using Excel's General format (integer if whole, float otherwise).
pub fn format_general(n: f64) -> String {
    if n == n.trunc() && n.abs() < 1e15 {
        format!("{}", n as i64)
    } else {
        // Trim unnecessary trailing zeros from float repr.
        let s = format!("{}", n);
        s
    }
}

fn format_integer(n: f64) -> String {
    format!("{}", n.round() as i64)
}

fn format_fixed(n: f64, decimals: u8) -> String {
    format!("{:.prec$}", n, prec = decimals as usize)
}

/// Format a number with thousands-separator commas and the given decimal places.
pub fn format_commas(n: f64, decimals: u8) -> String {
    let negative = n < 0.0;
    let abs = n.abs();
    let sign = if negative { "-" } else { "" };

    let factor = 10f64.powi(decimals as i32);
    let scaled = (abs * factor).round();

    // Fall back to the locale-free Rust formatter for magnitudes that
    // overflow u64 — better to lose the thousands separators than to
    // emit a silently-wrapped integer.
    if !scaled.is_finite() || scaled >= u64::MAX as f64 {
        return format!("{}{:.prec$}", sign, abs, prec = decimals as usize);
    }

    let scaled_int = scaled as u64;

    if decimals == 0 {
        format!("{}{}", sign, insert_commas(scaled_int))
    } else {
        let divisor = factor as u64;
        let int_part = scaled_int / divisor;
        let frac = scaled_int % divisor;
        format!(
            "{}{}.{:0>width$}",
            sign,
            insert_commas(int_part),
            frac,
            width = decimals as usize
        )
    }
}

fn format_currency(n: f64, symbol: &str, decimals: u8) -> String {
    // Put any minus sign before the currency symbol so callers see
    // "-$99.50" rather than "$-99.50".
    if n < 0.0 {
        format!("-{}{}", symbol, format_commas(n.abs(), decimals))
    } else {
        format!("{}{}", symbol, format_commas(n, decimals))
    }
}

/// Format a number as a percentage (multiplied by 100, with optional decimal places).
pub fn format_percent(n: f64, decimals: u8) -> String {
    let pct = n * 100.0;
    if decimals == 0 {
        format!("{}%", pct.round() as i64)
    } else {
        format!("{:.prec$}%", pct, prec = decimals as usize)
    }
}

fn format_scientific(n: f64) -> String {
    // Excel uses E+XX notation (no leading zero in exponent on some locales, but
    // two-digit exponent is safest for matching).
    format!("{:.2E}", n)
}

fn insert_commas(n: u64) -> String {
    let s = n.to_string();
    let bytes = s.as_bytes();
    let len = bytes.len();
    let mut out = String::with_capacity(len + len / 3);
    for (i, &b) in bytes.iter().enumerate() {
        if i > 0 && (len - i).is_multiple_of(3) {
            out.push(',');
        }
        out.push(b as char);
    }
    out
}

// ── Custom format string interpreter ──────────────────────────────────────

/// Simplified parser for Excel format strings. Handles the common cases:
/// thousands separators, decimal places, percentages, currency symbols,
/// and scientific notation. Strips color/condition brackets and literals.
fn apply_custom(n: f64, fmt: &str) -> String {
    // Multi-section: take the first section (positive numbers).
    // Second section = negatives, third = zero, fourth = text.
    let section = fmt.split(';').next().unwrap_or(fmt);

    // ── Parse the section ────────────────────────────────────────────────
    let mut currency_prefix = String::new();
    let mut suffix = String::new(); // literal text after the number
    let mut has_percent = false;
    let mut has_comma_in_num = false;
    let mut decimal_zeros = 0u8; // '0' chars after '.'
    let mut _decimal_hashes = 0u8; // '#' chars after '.'  (optional digits)
    let mut has_scientific = false;
    let mut in_decimal = false;
    let mut in_num_part = false;

    let mut chars = section.chars().peekable();
    while let Some(c) = chars.next() {
        match c {
            // Bracketed: colour like [Red] or locale/currency like [$€-407]
            '[' => {
                let mut inner = String::new();
                for ch in chars.by_ref() {
                    if ch == ']' {
                        break;
                    }
                    inner.push(ch);
                }
                if let Some(rest) = inner.strip_prefix('$') {
                    // [$symbol-locale] — extract symbol
                    let sym: String = rest.chars().take_while(|&ch| ch != '-').collect();
                    if !sym.is_empty() {
                        currency_prefix = sym;
                    }
                }
                // Colour directives ignored.
            },
            // Quoted literal text — collect as suffix
            '"' => {
                for ch in chars.by_ref() {
                    if ch == '"' {
                        break;
                    }
                    suffix.push(ch);
                }
            },
            // Escape: next char is literal
            '\\' => {
                chars.next();
            },
            // _X = pad with X (alignment) — skip X
            '_' => {
                chars.next();
            },
            // *X = repeat X (fill) — skip X
            '*' => {
                chars.next();
            },

            '%' => {
                has_percent = true;
                in_num_part = true;
            },
            '.' => {
                in_decimal = true;
                in_num_part = true;
            },
            '0' => {
                in_num_part = true;
                if in_decimal {
                    decimal_zeros += 1;
                }
            },
            '#' => {
                in_num_part = true;
                if in_decimal {
                    _decimal_hashes += 1;
                }
            },
            ',' => {
                // Comma between '#'/'0' chars = thousands separator.
                // Comma at end of number part = scale-by-1000 (rare, skip for now).
                if in_num_part {
                    has_comma_in_num = true;
                }
            },
            'E' | 'e' => {
                // Only treat this as scientific notation when followed by
                // `+` or `-` (per ECMA-376 §18.8.31). Bare `E` is just
                // a literal in formats like "000E" and must not consume
                // the next character.
                if matches!(chars.peek(), Some('+') | Some('-')) {
                    has_scientific = true;
                    chars.next(); // consume the sign
                    while chars.peek().is_some_and(|c| c.is_ascii_digit()) {
                        chars.next();
                    }
                } else if !in_num_part {
                    currency_prefix.push(c);
                } else {
                    suffix.push(c);
                }
            },
            '$' => {
                currency_prefix = "$".to_string();
                in_num_part = true;
            },
            // Other literal characters before the number part = currency prefix
            c if !in_num_part && !c.is_ascii_whitespace() => {
                currency_prefix.push(c);
            },
            _ => {},
        }
    }

    let decimals = decimal_zeros; // treat '0' decimals as the required precision

    // ── Format the value ─────────────────────────────────────────────────
    let value = if has_percent { n * 100.0 } else { n };

    let body = if has_scientific {
        format_scientific(value)
    } else if has_comma_in_num {
        format_commas(value, decimals)
    } else if in_decimal && decimals > 0 {
        format_fixed(value, decimals)
    } else if in_num_part {
        format_integer(value)
    } else {
        format_general(value)
    };

    let pct_suffix = if has_percent { "%" } else { "" };

    format!("{}{}{}{}", currency_prefix, body, suffix, pct_suffix)
}

// ── Tests ──────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn builtin_general() {
        assert_eq!(apply_format(42.0, 0, None), "42");
        assert_eq!(apply_format(4.25, 0, None), "4.25");
    }

    #[test]
    fn builtin_integer() {
        assert_eq!(apply_format(42.7, 1, None), "43");
    }

    #[test]
    fn builtin_fixed_two() {
        assert_eq!(apply_format(4.25678, 2, None), "4.26");
    }

    #[test]
    fn builtin_commas_zero() {
        assert_eq!(apply_format(1234567.0, 3, None), "1,234,567");
    }

    #[test]
    fn builtin_commas_two() {
        assert_eq!(apply_format(1234567.891, 4, None), "1,234,567.89");
    }

    #[test]
    fn builtin_percent_zero() {
        assert_eq!(apply_format(0.75, 9, None), "75%");
    }

    #[test]
    fn builtin_percent_two() {
        assert_eq!(apply_format(0.1234, 10, None), "12.34%");
    }

    #[test]
    fn builtin_currency_usd() {
        assert_eq!(apply_format(1234.5, 7, None), "$1,234.50");
    }

    #[test]
    fn custom_thousands() {
        assert_eq!(apply_format(1234567.0, 164, Some("#,##0")), "1,234,567");
    }

    #[test]
    fn custom_thousands_two_decimals() {
        assert_eq!(apply_format(1234.5, 164, Some("#,##0.00")), "1,234.50");
    }

    #[test]
    fn custom_percent() {
        assert_eq!(apply_format(0.5, 164, Some("0%")), "50%");
    }

    #[test]
    fn custom_percent_decimals() {
        assert_eq!(apply_format(0.1256, 164, Some("0.00%")), "12.56%");
    }

    #[test]
    fn custom_euro() {
        let result = apply_format(1234.5, 164, Some("[$€-407]#,##0.00"));
        assert!(result.contains(""), "expected euro symbol, got: {result}");
        assert!(result.contains("1,234.50"), "expected formatted number, got: {result}");
    }

    #[test]
    fn custom_dollar_prefix() {
        assert_eq!(apply_format(99.9, 164, Some("$#,##0.00")), "$99.90");
    }

    #[test]
    fn negative_commas() {
        assert_eq!(apply_format(-1234.5, 4, None), "-1,234.50");
    }

    #[test]
    fn zero_percent() {
        assert_eq!(apply_format(0.0, 9, None), "0%");
    }

    #[test]
    fn large_commas() {
        assert_eq!(apply_format(1_000_000_000.0, 3, None), "1,000,000,000");
    }

    // ── Edge cases ──────────────────────────────────────────────────────

    #[test]
    fn nan_renders_as_label() {
        // Returning the literal "NaN" rather than an empty string keeps
        // anomalous cells visible in extracted text so they're not
        // mistaken for empty data.
        assert_eq!(apply_format(f64::NAN, 0, None), "NaN");
    }

    #[test]
    fn infinity_renders_as_label() {
        assert_eq!(apply_format(f64::INFINITY, 0, None), "Infinity");
        assert_eq!(apply_format(f64::NEG_INFINITY, 0, None), "-Infinity");
    }

    #[test]
    fn zero_renders_uniformly() {
        assert_eq!(apply_format(0.0, 0, None), "0");
        assert_eq!(apply_format(0.0, 2, None), "0.00");
        assert_eq!(apply_format(0.0, 4, None), "0.00");
    }

    #[test]
    fn negative_percent() {
        assert_eq!(apply_format(-0.25, 9, None), "-25%");
        assert_eq!(apply_format(-0.1234, 10, None), "-12.34%");
    }

    #[test]
    fn negative_currency() {
        assert_eq!(apply_format(-99.5, 7, None), "-$99.50");
    }

    #[test]
    fn scientific_builtin() {
        // Format id 11 = 0.00E+00 → uses Rust's "{:.2E}" wrapper.
        let s = apply_format(12345.6789, 11, None);
        assert!(s.contains('E'), "scientific got: {s}");
    }

    #[test]
    fn accounting_alias() {
        // 37–40 map to comma formats matching #,##0 family.
        assert_eq!(apply_format(1234.0, 37, None), "1,234");
        assert_eq!(apply_format(1234.5, 39, None), "1,234.50");
    }

    #[test]
    fn accounting_paren_range() {
        // 41..=44 are accounting variants → commas with 2 decimals.
        for id in 41u32..=44 {
            assert_eq!(apply_format(1234.5, id, None), "1,234.50", "fmt id {id}");
        }
    }

    #[test]
    fn fraction_falls_back_to_general() {
        // Fraction formats (12,13) currently render as general.
        assert_eq!(apply_format(1.5, 12, None), "1.5");
        assert_eq!(apply_format(2.0, 13, None), "2");
    }

    #[test]
    fn custom_general_falls_through_to_default() {
        // "General" and "@" should fall back to General formatting.
        assert_eq!(apply_format(42.5, 164, Some("General")), "42.5");
        assert_eq!(apply_format(42.0, 164, Some("@")), "42");
    }

    #[test]
    fn custom_blank_falls_back_to_general() {
        assert_eq!(apply_format(4.25, 164, Some("")), "4.25");
        assert_eq!(apply_format(4.25, 164, Some("   ")), "4.25");
    }

    #[test]
    fn custom_multi_section_uses_first() {
        // Multi-section format: positives use first section only.
        assert_eq!(apply_format(1234.5, 164, Some("#,##0.00;-#,##0.00")), "1,234.50");
    }

    #[test]
    fn custom_with_quoted_literal_suffix() {
        let result = apply_format(42.0, 164, Some(r#"0" units""#));
        assert!(result.contains("42"), "got: {result}");
        assert!(result.contains("units"), "got: {result}");
    }

    #[test]
    fn custom_color_directive_is_stripped() {
        // [Red] is a color directive — should be ignored, not emitted.
        let result = apply_format(123.0, 164, Some("[Red]#,##0"));
        assert!(!result.contains("Red"));
        assert!(result.contains("123"));
    }

    #[test]
    fn format_general_keeps_integers_unsuffixed() {
        // Whole-number floats render without ".0".
        assert_eq!(format_general(42.0), "42");
        assert_eq!(format_general(-7.0), "-7");
        assert_eq!(format_general(0.0), "0");
    }

    #[test]
    fn format_general_keeps_decimal_for_fraction() {
        assert_eq!(format_general(4.25), "4.25");
        assert_eq!(format_general(-2.5), "-2.5");
    }

    #[test]
    fn format_commas_negative_with_decimals() {
        assert_eq!(format_commas(-1234.5, 2), "-1,234.50");
    }

    #[test]
    fn format_commas_zero() {
        assert_eq!(format_commas(0.0, 0), "0");
        assert_eq!(format_commas(0.0, 2), "0.00");
    }

    #[test]
    fn format_percent_negative() {
        assert_eq!(format_percent(-0.5, 0), "-50%");
    }

    #[test]
    fn format_percent_zero_decimals() {
        // 50% with 0 decimals.
        assert_eq!(format_percent(0.5, 0), "50%");
    }
}