Skip to main content

pmcp_workbook_runtime/
resolve.rs

1//! Range/reference resolution PRIMITIVES (CMP-02, D-06/D-07) — the umya-free,
2//! SAFE FALLIBLE subset RELOCATED into `workbook-runtime` (Phase 11, Plan 05).
3//!
4//! The runtime executor's `run()` needs to expand an [`Expr::Range`](crate::Expr)
5//! into its member `cell_key`s + 2-D [`RangeShape`] at SERVE time, and to parse
6//! / split A1 addresses for the loop-instance keying — all WITHOUT pulling
7//! `crate::dialect` (findings) or `crate::ingest` (`DefinedNameRecord`, umya). So
8//! the pure functions over `RangeRef` + `cell_key` live here:
9//!
10//! - [`expand_range`] / [`RangeShape`] / [`ResolveError`] / [`MAX_RANGE_CELLS`]
11//! - [`parse_a1`] / [`split_ref`]
12//!
13//! The FINDING-PUSHING DAG-build path (`collect_refs` / `expand_range_into_report`
14//! / `resolve_name`, which need `LintReport` + `DefinedNameRecord`) STAYS in
15//! `workbook-compiler`; it imports these primitives from here.
16
17use serde::Serialize;
18
19use crate::range_ref::{cell_key, RangeRef};
20
21/// The maximum number of member cells a single range reference may expand into
22/// (finding #6, threat T-09-08). A range exceeding this cap (e.g.
23/// `A1:XFD1048576`, ~17 billion cells) is NOT expanded — it produces an `Err`
24/// (the compiler translates it to ONE located `dag/range-too-large` finding), so
25/// a hostile or careless whole-sheet range can never allocate millions of edges.
26pub const MAX_RANGE_CELLS: usize = 10_000;
27
28/// Strip `$`-anchors from an A1 address, normalizing `$C$16`/`C16`/`$C16`/`C$16`
29/// to the canonical `COLROW` form `C16` (D-07, RESEARCH Pitfall 2). The address
30/// may NOT carry a sheet qualifier (the caller splits that off first).
31fn strip_anchors(addr: &str) -> String {
32    addr.replace('$', "")
33}
34
35/// Split a possibly sheet-qualified, possibly `$`-anchored reference string into
36/// `(sheet, canonical_addr)`, defaulting the sheet to `current_sheet` when the
37/// reference is unqualified. The sheet name keeps any surrounding `'…'` quoting
38/// stripped.
39///
40/// Public so per-room row-offset rebasing can reuse the SAME
41/// cross-sheet/anchor-stripping split (no second A1 parser). Total + fallible:
42/// it never panics.
43pub fn split_ref(reference: &str, current_sheet: &str) -> (String, String) {
44    match reference.rsplit_once('!') {
45        Some((sheet, addr)) => (sheet.trim_matches('\'').to_string(), strip_anchors(addr)),
46        None => (current_sheet.to_string(), strip_anchors(reference)),
47    }
48}
49
50/// Parse a canonical (anchor-stripped) A1 cell address `C16` into its
51/// `(column_letters, row_number)` parts. Returns `None` for a malformed address
52/// (no panic — the value path stays fallible).
53pub fn parse_a1(addr: &str) -> Option<(String, u32)> {
54    let split = addr.find(|c: char| c.is_ascii_digit())?;
55    if split == 0 {
56        return None; // no column letters
57    }
58    let (col, row) = addr.split_at(split);
59    if !col.bytes().all(|b| b.is_ascii_alphabetic()) {
60        return None;
61    }
62    let row: u32 = row.parse().ok()?;
63    if row == 0 {
64        return None;
65    }
66    Some((col.to_ascii_uppercase(), row))
67}
68
69/// Convert a column-letter run (`A`, `Z`, `AA`, `XFD`) to its 1-based index.
70fn col_to_index(col: &str) -> Option<u32> {
71    if col.is_empty() {
72        return None;
73    }
74    let mut idx: u32 = 0;
75    for b in col.bytes() {
76        if !b.is_ascii_alphabetic() {
77            return None;
78        }
79        let v = (b.to_ascii_uppercase() - b'A') as u32 + 1;
80        idx = idx.checked_mul(26)?.checked_add(v)?;
81    }
82    Some(idx)
83}
84
85/// Convert a canonical A1 cell address (`C16`) to the ZERO-indexed
86/// `(row, col)` coordinate the `rust_xlsxwriter` writer expects (`C16` →
87/// `(15, 2)`) — review item 8, RESEARCH Pitfall 3.
88///
89/// This is the SINGLE shared A1→`(row, col)` conversion: the Plan-02 writer
90/// reuses it rather than duplicating column-letter math. Built on the existing
91/// [`parse_a1`] (which canonicalizes + rejects malformed) + [`col_to_index`]
92/// (1-based column index). Returns `None` for any malformed address — never a
93/// panic. The returned column is `u16` and the row is `u32`, matching the
94/// writer's `ColNum`/`RowNum` (so the writer needs no second cast layer).
95///
96/// The caller MUST strip any sheet qualifier + `$`-anchors first (e.g. via
97/// [`split_ref`]); this takes the canonical `COLROW` form `parse_a1` accepts.
98pub fn a1_to_zero_indexed_row_col(addr: &str) -> Option<(u32, u16)> {
99    let (col_letters, row) = parse_a1(addr)?;
100    let col_1based = col_to_index(&col_letters)?;
101    // parse_a1 guarantees row >= 1; col_to_index guarantees col >= 1. Convert to
102    // zero-indexed for the writer. col_1based fits a u16 only up to 65_536; the
103    // sheet column cap (XFD = 16_384) keeps it well inside u16, but guard anyway.
104    let col_zero = u16::try_from(col_1based.checked_sub(1)?).ok()?;
105    Some((row - 1, col_zero))
106}
107
108/// Convert a 1-based column index back to its letter run (`1` → `A`, `27` → `AA`).
109///
110/// WR-07: build the run from `char`s directly so there is NO fallible UTF-8
111/// decode.
112fn index_to_col(mut idx: u32) -> String {
113    let mut chars = Vec::new();
114    while idx > 0 {
115        let rem = ((idx - 1) % 26) as u8;
116        chars.push(char::from(b'A' + rem));
117        idx = (idx - 1) / 26;
118    }
119    chars.iter().rev().collect()
120}
121
122/// The 2-D shape (`rows` × `cols`) a [`RangeRef`] expands to — published
123/// alongside the member `cell_key`s so the executor can rebuild a shape-correct
124/// `Vec<Vec<CellValue>>` for `VLOOKUP`/`INDEX`/`MATCH`.
125///
126/// `rows = row_hi - row_lo + 1`, `cols = col_hi - col_lo + 1` — both ≥ 1 for any
127/// valid range (a single cell is `{rows: 1, cols: 1}`).
128#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, schemars::JsonSchema)]
129pub struct RangeShape {
130    /// The number of rows the range spans (inclusive).
131    pub rows: u32,
132    /// The number of columns the range spans (inclusive).
133    pub cols: u32,
134}
135
136/// A fallible range-expansion failure (D-06, T-09-08). [`expand_range`] returns
137/// this as an `Err` — NEVER a panic and NEVER a silent empty `Vec` that looks
138/// like a 0-cell range. The compiler's finding-pushing path translates each
139/// variant back into the located `dag/malformed-range` / `dag/range-too-large`
140/// `LintFinding`.
141#[derive(Debug, Clone, PartialEq, Eq, Serialize, schemars::JsonSchema)]
142pub enum ResolveError {
143    /// A range endpoint did not parse as a valid A1 address (or column).
144    MalformedRange {
145        /// The (anchor-stripped) start endpoint as authored.
146        start: String,
147        /// The (anchor-stripped) end endpoint as authored.
148        end: String,
149    },
150    /// The range expands to more than [`MAX_RANGE_CELLS`] member cells.
151    RangeTooLarge {
152        /// The member-cell count the range would expand to.
153        cells: u64,
154        /// The cap that was exceeded ([`MAX_RANGE_CELLS`]).
155        cap: usize,
156    },
157}
158
159/// Expand a [`RangeRef`] into its member `cell_key`s (column-major, D-06) AND
160/// the 2-D [`RangeShape`] it spans, bounded by [`MAX_RANGE_CELLS`]. This is the
161/// SAFE, FALLIBLE public API: an over-cap or malformed range is an `Err`, never
162/// a panic and never a silent empty.
163pub fn expand_range(
164    range: &RangeRef,
165    current_sheet: &str,
166) -> Result<(Vec<String>, RangeShape), ResolveError> {
167    let sheet = if range.sheet.is_empty() {
168        current_sheet.to_string()
169    } else {
170        range.sheet.trim_matches('\'').to_string()
171    };
172    let start = strip_anchors(&range.start);
173    let end = strip_anchors(&range.end);
174
175    let malformed = || ResolveError::MalformedRange {
176        start: start.clone(),
177        end: end.clone(),
178    };
179
180    let (Some((sc, sr)), Some((ec, er))) = (parse_a1(&start), parse_a1(&end)) else {
181        return Err(malformed());
182    };
183    let (Some(sci), Some(eci)) = (col_to_index(&sc), col_to_index(&ec)) else {
184        return Err(malformed());
185    };
186
187    let (col_lo, col_hi) = (sci.min(eci), sci.max(eci));
188    let (row_lo, row_hi) = (sr.min(er), sr.max(er));
189    let cols = col_hi - col_lo + 1;
190    let rows = row_hi - row_lo + 1;
191    // u64 product avoids overflow on a whole-sheet range before the cap check.
192    let n_cells = u64::from(cols) * u64::from(rows);
193
194    if n_cells > MAX_RANGE_CELLS as u64 {
195        return Err(ResolveError::RangeTooLarge {
196            cells: n_cells,
197            cap: MAX_RANGE_CELLS,
198        });
199    }
200
201    let mut keys = Vec::with_capacity(n_cells as usize);
202    for col in col_lo..=col_hi {
203        let col_letters = index_to_col(col);
204        for row in row_lo..=row_hi {
205            keys.push(cell_key(&sheet, &format!("{col_letters}{row}")));
206        }
207    }
208    Ok((keys, RangeShape { rows, cols }))
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    fn rr(sheet: &str, start: &str, end: &str) -> RangeRef {
216        RangeRef {
217            sheet: sheet.to_string(),
218            start: start.to_string(),
219            end: end.to_string(),
220        }
221    }
222
223    #[test]
224    fn public_expand_range_single_column_returns_keys_and_shape() {
225        let (keys, shape) = expand_range(&rr("S", "B2", "B4"), "S").expect("valid range");
226        assert_eq!(
227            keys,
228            vec!["S!B2".to_string(), "S!B3".to_string(), "S!B4".to_string()]
229        );
230        assert_eq!(shape, RangeShape { rows: 3, cols: 1 });
231    }
232
233    #[test]
234    fn public_expand_range_2x2_is_column_major_with_2x2_shape() {
235        let (keys, shape) = expand_range(&rr("S", "A1", "B2"), "S").expect("valid range");
236        assert_eq!(
237            keys,
238            vec![
239                "S!A1".to_string(),
240                "S!A2".to_string(),
241                "S!B1".to_string(),
242                "S!B2".to_string(),
243            ]
244        );
245        assert_eq!(shape, RangeShape { rows: 2, cols: 2 });
246    }
247
248    #[test]
249    fn public_expand_range_defaults_empty_sheet_to_current() {
250        let (keys, _shape) = expand_range(&rr("", "C1", "C2"), "5_Quantities").expect("valid");
251        assert_eq!(
252            keys,
253            vec!["5_Quantities!C1".to_string(), "5_Quantities!C2".to_string()]
254        );
255    }
256
257    #[test]
258    fn public_expand_range_over_cap_is_err() {
259        let err = expand_range(&rr("S", "A1", "XFD1048576"), "S")
260            .expect_err("an over-cap range must be Err");
261        assert!(matches!(
262            err,
263            ResolveError::RangeTooLarge { cap, cells } if cap == MAX_RANGE_CELLS && cells > MAX_RANGE_CELLS as u64
264        ));
265    }
266
267    #[test]
268    fn public_expand_range_malformed_endpoint_is_err() {
269        let err =
270            expand_range(&rr("S", "1A", "B2"), "S").expect_err("a malformed endpoint must be Err");
271        assert!(matches!(err, ResolveError::MalformedRange { .. }));
272    }
273
274    #[test]
275    fn public_parse_a1_parses_and_rejects() {
276        assert_eq!(parse_a1("C16"), Some(("C".to_string(), 16)));
277        assert_eq!(parse_a1("$C$16"), None); // anchors must be stripped by the caller
278        assert_eq!(parse_a1("16"), None); // no column letters
279        assert_eq!(parse_a1("C0"), None); // row 0 is invalid
280        assert_eq!(parse_a1("CC"), None); // no row digits
281    }
282
283    #[test]
284    fn public_split_ref_strips_anchors_and_defaults_sheet() {
285        assert_eq!(
286            split_ref("2_Constants!$C$17", "5_Quantities"),
287            ("2_Constants".to_string(), "C17".to_string())
288        );
289        assert_eq!(
290            split_ref("$C$16", "5_Quantities"),
291            ("5_Quantities".to_string(), "C16".to_string())
292        );
293    }
294
295    #[test]
296    fn a1_to_zero_indexed_row_col_converts_and_rejects() {
297        // C16 -> (15, 2) — review item 8 example.
298        assert_eq!(a1_to_zero_indexed_row_col("C16"), Some((15, 2)));
299        // A1 -> (0, 0) (top-left).
300        assert_eq!(a1_to_zero_indexed_row_col("A1"), Some((0, 0)));
301        // AA1 -> (0, 26) (col 27 1-based -> 26 zero-indexed).
302        assert_eq!(a1_to_zero_indexed_row_col("AA1"), Some((0, 26)));
303        // Malformed -> None, never a panic.
304        assert_eq!(a1_to_zero_indexed_row_col("1A"), None);
305        assert_eq!(a1_to_zero_indexed_row_col("$C$16"), None); // anchors not stripped
306        assert_eq!(a1_to_zero_indexed_row_col(""), None);
307    }
308
309    #[test]
310    fn col_index_round_trips() {
311        for (col, idx) in [("A", 1u32), ("Z", 26), ("AA", 27), ("XFD", 16384)] {
312            assert_eq!(col_to_index(col), Some(idx));
313            assert_eq!(index_to_col(idx), col);
314        }
315    }
316}