Skip to main content

wolfxl_core/
map.rs

1//! Workbook map: one-page summary of every sheet (dimensions, headers,
2//! classification, anchored tables) plus workbook-level named ranges.
3//!
4//! The map exists for agents that need to *orient* before fetching cell
5//! ranges. Loading every sheet's full grid just to ask "which sheet has
6//! the data I want?" is the cost the map prevents.
7//!
8//! Build via [`Workbook::map`](crate::Workbook::map). Render to JSON or
9//! plain text in the consuming binary — `wolfxl-core` stays serde-free.
10
11use crate::cell::CellValue;
12use crate::sheet::Sheet;
13
14/// Coarse classification of a sheet's apparent purpose, derived from its
15/// value grid alone (no merged-cell or formula inspection).
16///
17/// Drives downstream prompt strategy: `Data` sheets justify a `peek`,
18/// `Readme` sheets often want a single-column dump, `Summary` sheets
19/// look formula-heavy with low fill density.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum SheetClass {
22    Empty,
23    Readme,
24    Summary,
25    Data,
26}
27
28impl SheetClass {
29    /// Lowercase tag suitable for serialization or grep-friendly text.
30    pub fn as_str(&self) -> &'static str {
31        match self {
32            SheetClass::Empty => "empty",
33            SheetClass::Readme => "readme",
34            SheetClass::Summary => "summary",
35            SheetClass::Data => "data",
36        }
37    }
38}
39
40#[derive(Debug, Clone)]
41pub struct SheetMap {
42    pub name: String,
43    pub rows: usize,
44    pub cols: usize,
45    pub class: SheetClass,
46    /// First-row contents, with empty cells preserved as `""` so column
47    /// position is meaningful for downstream consumers.
48    pub headers: Vec<String>,
49    /// Workbook tables (calamine `table_names_in_sheet`) anchored on this
50    /// sheet. Empty when the workbook defines no tables, which is the
51    /// common case for hand-authored sheets.
52    pub tables: Vec<String>,
53}
54
55#[derive(Debug, Clone)]
56pub struct WorkbookMap {
57    pub path: String,
58    pub sheets: Vec<SheetMap>,
59    /// Workbook-level defined names as `(name, formula)` pairs, exactly
60    /// as calamine surfaces them. The formula string is a sheet+range
61    /// reference like `'P&L'!$A$1:$D$25` for typical named ranges.
62    pub named_ranges: Vec<(String, String)>,
63}
64
65/// Classify a sheet by shape and density. Pure value-grid heuristic — does
66/// not look at merged cells, formulas, or formatting.
67///
68/// Rules in priority order:
69/// 1. Zero rows or cols → `Empty`.
70/// 2. Exactly one column wide → `Readme` (notes-column convention).
71/// 3. Small (≤20 rows × ≤10 cols) AND fill density <40% → `Summary`
72///    (sparse formula sheets, dashboards, KPI panels).
73/// 4. Otherwise → `Data` (default for anything dense or large).
74pub fn classify_sheet(sheet: &Sheet) -> SheetClass {
75    let (rows, cols) = sheet.dimensions();
76    if rows == 0 || cols == 0 {
77        return SheetClass::Empty;
78    }
79    if cols == 1 {
80        return SheetClass::Readme;
81    }
82    let total = rows * cols;
83    let non_empty: usize = sheet
84        .rows()
85        .iter()
86        .map(|row| {
87            row.iter()
88                .filter(|c| !matches!(c.value, CellValue::Empty))
89                .count()
90        })
91        .sum();
92    let density = non_empty as f64 / total as f64;
93    if rows <= 20 && cols <= 10 && density < 0.4 {
94        return SheetClass::Summary;
95    }
96    SheetClass::Data
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102    use crate::cell::Cell;
103
104    fn cell(s: &str) -> Cell {
105        Cell {
106            value: CellValue::String(s.to_string()),
107            number_format: None,
108        }
109    }
110
111    fn empty() -> Cell {
112        Cell::empty()
113    }
114
115    #[test]
116    fn empty_sheet_is_classified_empty() {
117        let sheet = Sheet::from_rows_for_test("blank", vec![]);
118        assert_eq!(classify_sheet(&sheet), SheetClass::Empty);
119    }
120
121    #[test]
122    fn single_column_sheet_is_classified_readme() {
123        // A notes column — multiple rows but one column wide.
124        let rows = (0..15)
125            .map(|i| vec![cell(&format!("note line {i}"))])
126            .collect();
127        let sheet = Sheet::from_rows_for_test("Notes", rows);
128        assert_eq!(classify_sheet(&sheet), SheetClass::Readme);
129    }
130
131    #[test]
132    fn small_sparse_sheet_is_classified_summary() {
133        // 5×5 with only a title and one KPI populated → density 2/25 = 8%,
134        // well under the 40% threshold; small enough on both axes.
135        let mut rows = vec![vec![empty(); 5]; 5];
136        rows[0][0] = cell("Q1 2026 Summary");
137        rows[2][1] = cell("$1.2M");
138        let sheet = Sheet::from_rows_for_test("Summary", rows);
139        assert_eq!(classify_sheet(&sheet), SheetClass::Summary);
140    }
141
142    #[test]
143    fn dense_rectangular_sheet_is_classified_data() {
144        // 10×5 fully populated grid → density 100%, defaults to Data.
145        let rows = (0..10)
146            .map(|r| (0..5).map(|c| cell(&format!("r{r}c{c}"))).collect())
147            .collect();
148        let sheet = Sheet::from_rows_for_test("Ledger", rows);
149        assert_eq!(classify_sheet(&sheet), SheetClass::Data);
150    }
151
152    #[test]
153    fn large_sheet_skips_summary_branch_even_if_sparse() {
154        // 50×15 mostly empty (only first cell filled) → density well under
155        // 40% but dimensions exceed the small-sheet gate, so falls
156        // through to Data. Without this rule, a giant mostly-empty data
157        // grid would mis-classify as Summary.
158        let mut rows = vec![vec![empty(); 15]; 50];
159        rows[0][0] = cell("seed");
160        let sheet = Sheet::from_rows_for_test("Big", rows);
161        assert_eq!(classify_sheet(&sheet), SheetClass::Data);
162    }
163}