pmcp_workbook_runtime/resolve.rs
1//! Range/reference resolution PRIMITIVES (CMP-02, D-06/D-07) — the umya-free,
2//! SAFE FALLIBLE subset RELOCATED into `workbook-runtime` (Phase 11, Plan 05).
3//!
4//! The runtime executor's `run()` needs to expand an [`Expr::Range`](crate::Expr)
5//! into its member `cell_key`s + 2-D [`RangeShape`] at SERVE time, and to parse
6//! / split A1 addresses for the loop-instance keying — all WITHOUT pulling
7//! `crate::dialect` (findings) or `crate::ingest` (`DefinedNameRecord`, umya). So
8//! the pure functions over `RangeRef` + `cell_key` live here:
9//!
10//! - [`expand_range`] / [`RangeShape`] / [`ResolveError`] / [`MAX_RANGE_CELLS`]
11//! - [`parse_a1`] / [`split_ref`]
12//!
13//! The FINDING-PUSHING DAG-build path (`collect_refs` / `expand_range_into_report`
14//! / `resolve_name`, which need `LintReport` + `DefinedNameRecord`) STAYS in
15//! `workbook-compiler`; it imports these primitives from here.
16
17use serde::Serialize;
18
19use crate::range_ref::{cell_key, RangeRef};
20
21/// The maximum number of member cells a single range reference may expand into
22/// (finding #6, threat T-09-08). A range exceeding this cap (e.g.
23/// `A1:XFD1048576`, ~17 billion cells) is NOT expanded — it produces an `Err`
24/// (the compiler translates it to ONE located `dag/range-too-large` finding), so
25/// a hostile or careless whole-sheet range can never allocate millions of edges.
26pub const MAX_RANGE_CELLS: usize = 10_000;
27
28/// Strip `$`-anchors from an A1 address, normalizing `$C$16`/`C16`/`$C16`/`C$16`
29/// to the canonical `COLROW` form `C16` (D-07, RESEARCH Pitfall 2). The address
30/// may NOT carry a sheet qualifier (the caller splits that off first).
31fn strip_anchors(addr: &str) -> String {
32 addr.replace('$', "")
33}
34
35/// Split a possibly sheet-qualified, possibly `$`-anchored reference string into
36/// `(sheet, canonical_addr)`, defaulting the sheet to `current_sheet` when the
37/// reference is unqualified. The sheet name keeps any surrounding `'…'` quoting
38/// stripped.
39///
40/// Public so per-room row-offset rebasing can reuse the SAME
41/// cross-sheet/anchor-stripping split (no second A1 parser). Total + fallible:
42/// it never panics.
43pub fn split_ref(reference: &str, current_sheet: &str) -> (String, String) {
44 match reference.rsplit_once('!') {
45 Some((sheet, addr)) => (sheet.trim_matches('\'').to_string(), strip_anchors(addr)),
46 None => (current_sheet.to_string(), strip_anchors(reference)),
47 }
48}
49
50/// Parse a canonical (anchor-stripped) A1 cell address `C16` into its
51/// `(column_letters, row_number)` parts. Returns `None` for a malformed address
52/// (no panic — the value path stays fallible).
53pub fn parse_a1(addr: &str) -> Option<(String, u32)> {
54 let split = addr.find(|c: char| c.is_ascii_digit())?;
55 if split == 0 {
56 return None; // no column letters
57 }
58 let (col, row) = addr.split_at(split);
59 if !col.bytes().all(|b| b.is_ascii_alphabetic()) {
60 return None;
61 }
62 let row: u32 = row.parse().ok()?;
63 if row == 0 {
64 return None;
65 }
66 Some((col.to_ascii_uppercase(), row))
67}
68
69/// Convert a column-letter run (`A`, `Z`, `AA`, `XFD`) to its 1-based index.
70fn col_to_index(col: &str) -> Option<u32> {
71 if col.is_empty() {
72 return None;
73 }
74 let mut idx: u32 = 0;
75 for b in col.bytes() {
76 if !b.is_ascii_alphabetic() {
77 return None;
78 }
79 let v = (b.to_ascii_uppercase() - b'A') as u32 + 1;
80 idx = idx.checked_mul(26)?.checked_add(v)?;
81 }
82 Some(idx)
83}
84
85/// Convert a canonical A1 cell address (`C16`) to the ZERO-indexed
86/// `(row, col)` coordinate the `rust_xlsxwriter` writer expects (`C16` →
87/// `(15, 2)`) — review item 8, RESEARCH Pitfall 3.
88///
89/// This is the SINGLE shared A1→`(row, col)` conversion: the Plan-02 writer
90/// reuses it rather than duplicating column-letter math. Built on the existing
91/// [`parse_a1`] (which canonicalizes + rejects malformed) + [`col_to_index`]
92/// (1-based column index). Returns `None` for any malformed address — never a
93/// panic. The returned column is `u16` and the row is `u32`, matching the
94/// writer's `ColNum`/`RowNum` (so the writer needs no second cast layer).
95///
96/// The caller MUST strip any sheet qualifier + `$`-anchors first (e.g. via
97/// [`split_ref`]); this takes the canonical `COLROW` form `parse_a1` accepts.
98pub fn a1_to_zero_indexed_row_col(addr: &str) -> Option<(u32, u16)> {
99 let (col_letters, row) = parse_a1(addr)?;
100 let col_1based = col_to_index(&col_letters)?;
101 // parse_a1 guarantees row >= 1; col_to_index guarantees col >= 1. Convert to
102 // zero-indexed for the writer. col_1based fits a u16 only up to 65_536; the
103 // sheet column cap (XFD = 16_384) keeps it well inside u16, but guard anyway.
104 let col_zero = u16::try_from(col_1based.checked_sub(1)?).ok()?;
105 Some((row - 1, col_zero))
106}
107
108/// Convert a 1-based column index back to its letter run (`1` → `A`, `27` → `AA`).
109///
110/// WR-07: build the run from `char`s directly so there is NO fallible UTF-8
111/// decode.
112fn index_to_col(mut idx: u32) -> String {
113 let mut chars = Vec::new();
114 while idx > 0 {
115 let rem = ((idx - 1) % 26) as u8;
116 chars.push(char::from(b'A' + rem));
117 idx = (idx - 1) / 26;
118 }
119 chars.iter().rev().collect()
120}
121
122/// The 2-D shape (`rows` × `cols`) a [`RangeRef`] expands to — published
123/// alongside the member `cell_key`s so the executor can rebuild a shape-correct
124/// `Vec<Vec<CellValue>>` for `VLOOKUP`/`INDEX`/`MATCH`.
125///
126/// `rows = row_hi - row_lo + 1`, `cols = col_hi - col_lo + 1` — both ≥ 1 for any
127/// valid range (a single cell is `{rows: 1, cols: 1}`).
128#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, schemars::JsonSchema)]
129pub struct RangeShape {
130 /// The number of rows the range spans (inclusive).
131 pub rows: u32,
132 /// The number of columns the range spans (inclusive).
133 pub cols: u32,
134}
135
136/// A fallible range-expansion failure (D-06, T-09-08). [`expand_range`] returns
137/// this as an `Err` — NEVER a panic and NEVER a silent empty `Vec` that looks
138/// like a 0-cell range. The compiler's finding-pushing path translates each
139/// variant back into the located `dag/malformed-range` / `dag/range-too-large`
140/// `LintFinding`.
141#[derive(Debug, Clone, PartialEq, Eq, Serialize, schemars::JsonSchema)]
142pub enum ResolveError {
143 /// A range endpoint did not parse as a valid A1 address (or column).
144 MalformedRange {
145 /// The (anchor-stripped) start endpoint as authored.
146 start: String,
147 /// The (anchor-stripped) end endpoint as authored.
148 end: String,
149 },
150 /// The range expands to more than [`MAX_RANGE_CELLS`] member cells.
151 RangeTooLarge {
152 /// The member-cell count the range would expand to.
153 cells: u64,
154 /// The cap that was exceeded ([`MAX_RANGE_CELLS`]).
155 cap: usize,
156 },
157}
158
159/// Expand a [`RangeRef`] into its member `cell_key`s (column-major, D-06) AND
160/// the 2-D [`RangeShape`] it spans, bounded by [`MAX_RANGE_CELLS`]. This is the
161/// SAFE, FALLIBLE public API: an over-cap or malformed range is an `Err`, never
162/// a panic and never a silent empty.
163pub fn expand_range(
164 range: &RangeRef,
165 current_sheet: &str,
166) -> Result<(Vec<String>, RangeShape), ResolveError> {
167 let sheet = if range.sheet.is_empty() {
168 current_sheet.to_string()
169 } else {
170 range.sheet.trim_matches('\'').to_string()
171 };
172 let start = strip_anchors(&range.start);
173 let end = strip_anchors(&range.end);
174
175 let malformed = || ResolveError::MalformedRange {
176 start: start.clone(),
177 end: end.clone(),
178 };
179
180 let (Some((sc, sr)), Some((ec, er))) = (parse_a1(&start), parse_a1(&end)) else {
181 return Err(malformed());
182 };
183 let (Some(sci), Some(eci)) = (col_to_index(&sc), col_to_index(&ec)) else {
184 return Err(malformed());
185 };
186
187 let (col_lo, col_hi) = (sci.min(eci), sci.max(eci));
188 let (row_lo, row_hi) = (sr.min(er), sr.max(er));
189 let cols = col_hi - col_lo + 1;
190 let rows = row_hi - row_lo + 1;
191 // u64 product avoids overflow on a whole-sheet range before the cap check.
192 let n_cells = u64::from(cols) * u64::from(rows);
193
194 if n_cells > MAX_RANGE_CELLS as u64 {
195 return Err(ResolveError::RangeTooLarge {
196 cells: n_cells,
197 cap: MAX_RANGE_CELLS,
198 });
199 }
200
201 let mut keys = Vec::with_capacity(n_cells as usize);
202 for col in col_lo..=col_hi {
203 let col_letters = index_to_col(col);
204 for row in row_lo..=row_hi {
205 keys.push(cell_key(&sheet, &format!("{col_letters}{row}")));
206 }
207 }
208 Ok((keys, RangeShape { rows, cols }))
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 fn rr(sheet: &str, start: &str, end: &str) -> RangeRef {
216 RangeRef {
217 sheet: sheet.to_string(),
218 start: start.to_string(),
219 end: end.to_string(),
220 }
221 }
222
223 #[test]
224 fn public_expand_range_single_column_returns_keys_and_shape() {
225 let (keys, shape) = expand_range(&rr("S", "B2", "B4"), "S").expect("valid range");
226 assert_eq!(
227 keys,
228 vec!["S!B2".to_string(), "S!B3".to_string(), "S!B4".to_string()]
229 );
230 assert_eq!(shape, RangeShape { rows: 3, cols: 1 });
231 }
232
233 #[test]
234 fn public_expand_range_2x2_is_column_major_with_2x2_shape() {
235 let (keys, shape) = expand_range(&rr("S", "A1", "B2"), "S").expect("valid range");
236 assert_eq!(
237 keys,
238 vec![
239 "S!A1".to_string(),
240 "S!A2".to_string(),
241 "S!B1".to_string(),
242 "S!B2".to_string(),
243 ]
244 );
245 assert_eq!(shape, RangeShape { rows: 2, cols: 2 });
246 }
247
248 #[test]
249 fn public_expand_range_defaults_empty_sheet_to_current() {
250 let (keys, _shape) = expand_range(&rr("", "C1", "C2"), "5_Quantities").expect("valid");
251 assert_eq!(
252 keys,
253 vec!["5_Quantities!C1".to_string(), "5_Quantities!C2".to_string()]
254 );
255 }
256
257 #[test]
258 fn public_expand_range_over_cap_is_err() {
259 let err = expand_range(&rr("S", "A1", "XFD1048576"), "S")
260 .expect_err("an over-cap range must be Err");
261 assert!(matches!(
262 err,
263 ResolveError::RangeTooLarge { cap, cells } if cap == MAX_RANGE_CELLS && cells > MAX_RANGE_CELLS as u64
264 ));
265 }
266
267 #[test]
268 fn public_expand_range_malformed_endpoint_is_err() {
269 let err =
270 expand_range(&rr("S", "1A", "B2"), "S").expect_err("a malformed endpoint must be Err");
271 assert!(matches!(err, ResolveError::MalformedRange { .. }));
272 }
273
274 #[test]
275 fn public_parse_a1_parses_and_rejects() {
276 assert_eq!(parse_a1("C16"), Some(("C".to_string(), 16)));
277 assert_eq!(parse_a1("$C$16"), None); // anchors must be stripped by the caller
278 assert_eq!(parse_a1("16"), None); // no column letters
279 assert_eq!(parse_a1("C0"), None); // row 0 is invalid
280 assert_eq!(parse_a1("CC"), None); // no row digits
281 }
282
283 #[test]
284 fn public_split_ref_strips_anchors_and_defaults_sheet() {
285 assert_eq!(
286 split_ref("2_Constants!$C$17", "5_Quantities"),
287 ("2_Constants".to_string(), "C17".to_string())
288 );
289 assert_eq!(
290 split_ref("$C$16", "5_Quantities"),
291 ("5_Quantities".to_string(), "C16".to_string())
292 );
293 }
294
295 #[test]
296 fn a1_to_zero_indexed_row_col_converts_and_rejects() {
297 // C16 -> (15, 2) — review item 8 example.
298 assert_eq!(a1_to_zero_indexed_row_col("C16"), Some((15, 2)));
299 // A1 -> (0, 0) (top-left).
300 assert_eq!(a1_to_zero_indexed_row_col("A1"), Some((0, 0)));
301 // AA1 -> (0, 26) (col 27 1-based -> 26 zero-indexed).
302 assert_eq!(a1_to_zero_indexed_row_col("AA1"), Some((0, 26)));
303 // Malformed -> None, never a panic.
304 assert_eq!(a1_to_zero_indexed_row_col("1A"), None);
305 assert_eq!(a1_to_zero_indexed_row_col("$C$16"), None); // anchors not stripped
306 assert_eq!(a1_to_zero_indexed_row_col(""), None);
307 }
308
309 #[test]
310 fn col_index_round_trips() {
311 for (col, idx) in [("A", 1u32), ("Z", 26), ("AA", 27), ("XFD", 16384)] {
312 assert_eq!(col_to_index(col), Some(idx));
313 assert_eq!(index_to_col(idx), col);
314 }
315 }
316}