Skip to main content

formualizer_eval/formula_plane/
span_counters.rs

1//! Passive FormulaPlane span and row-block partition counters.
2//!
3//! These counters are diagnostic only. They describe candidate dependent formula
4//! placements and estimated row-block fanout; they do not route dirty
5//! propagation, evaluation, or dependency graph construction.
6
7use std::collections::{BTreeMap, BTreeSet};
8
9/// Default diagnostic row block size used by FP2.A candidate partition counts.
10pub const DEFAULT_CANDIDATE_ROW_BLOCK_SIZE: u32 = 4096;
11
12/// Options for passive span/partition counter construction.
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub struct SpanPartitionCounterOptions {
15    /// Fixed row-block height used for diagnostic candidate partitioning.
16    pub row_block_size: u32,
17}
18
19impl Default for SpanPartitionCounterOptions {
20    fn default() -> Self {
21        Self {
22            row_block_size: DEFAULT_CANDIDATE_ROW_BLOCK_SIZE,
23        }
24    }
25}
26
27impl SpanPartitionCounterOptions {
28    fn normalized(self) -> Self {
29        Self {
30            row_block_size: self.row_block_size.max(1),
31        }
32    }
33}
34
35/// One parser-backed formula cell observed by a passive scanner.
36#[derive(Clone, Debug, PartialEq, Eq)]
37pub struct FormulaPlaneCandidateCell {
38    pub sheet: String,
39    pub row: u32,
40    pub col: u32,
41    pub template_id: String,
42    pub parse_ok: bool,
43    pub volatile: bool,
44    pub dynamic: bool,
45    pub unsupported: bool,
46}
47
48/// Direction of a contiguous candidate dependent formula placement.
49#[derive(Clone, Copy, Debug, PartialEq, Eq)]
50pub enum CandidateRunOrientation {
51    Row,
52    Column,
53}
54
55/// Passive candidate run summary for a dependent formula placement.
56#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct CandidateFormulaRun {
58    pub template_id: String,
59    pub sheet: String,
60    pub orientation: CandidateRunOrientation,
61    /// Row for row runs, column for column runs.
62    pub fixed_index: u32,
63    /// Column start for row runs, row start for column runs.
64    pub start_index: u32,
65    /// Column end for row runs, row end for column runs.
66    pub end_index: u32,
67    pub len: u64,
68    pub partitions_touched: u64,
69}
70
71/// Per-template passive FormulaPlane candidate counters.
72#[derive(Clone, Debug, Default, PartialEq)]
73pub struct TemplateSpanPartitionCounters {
74    pub template_id: String,
75    pub formula_cells: u64,
76    pub row_run_count: u64,
77    pub column_run_count: u64,
78    pub max_run_length: u64,
79    pub formula_cells_represented_by_runs: u64,
80    pub singleton_formula_count: u64,
81    pub hole_count: u64,
82    pub exception_count: u64,
83    pub candidate_partition_count: u64,
84    pub candidate_formula_run_to_partition_edge_estimate: u64,
85    pub max_partitions_touched_by_run: u64,
86    pub dense_run_coverage_percent: f64,
87}
88
89/// Workbook-level passive FormulaPlane candidate counters.
90#[derive(Clone, Debug, Default, PartialEq)]
91pub struct SpanPartitionCounters {
92    pub row_block_size: u32,
93    pub template_count: u64,
94    pub repeated_template_count: u64,
95    pub formula_cell_count: u64,
96    pub parse_error_formula_count: u64,
97    pub volatile_formula_count: u64,
98    pub dynamic_formula_count: u64,
99    pub unsupported_formula_count: u64,
100    pub row_run_count: u64,
101    pub column_run_count: u64,
102    pub candidate_formula_run_count: u64,
103    pub max_run_length: u64,
104    pub formula_cells_represented_by_runs: u64,
105    pub singleton_formula_count: u64,
106    pub hole_count: u64,
107    pub exception_count: u64,
108    /// Estimate: formula cells in repeated candidate runs.
109    pub estimated_materialization_avoidable_cell_count: u64,
110    pub candidate_row_block_partition_count: u64,
111    pub candidate_formula_run_to_partition_edge_estimate: u64,
112    pub max_partitions_touched_by_run: u64,
113    pub dense_run_coverage_percent: f64,
114    pub template_counters: Vec<TemplateSpanPartitionCounters>,
115    pub candidate_runs: Vec<CandidateFormulaRun>,
116}
117
118/// Build passive span and row-block partition counters from scanner cells.
119///
120/// The result is diagnostic only. Formula runs are candidate dependent formula
121/// placements; candidate partitions are fixed row blocks of those placements.
122/// No precedent region or result region authority is inferred here.
123pub fn compute_span_partition_counters(
124    cells: &[FormulaPlaneCandidateCell],
125    options: SpanPartitionCounterOptions,
126) -> SpanPartitionCounters {
127    let options = options.normalized();
128    let mut by_template: BTreeMap<String, Vec<&FormulaPlaneCandidateCell>> = BTreeMap::new();
129    let mut cell_to_template: BTreeMap<(String, u32, u32), String> = BTreeMap::new();
130
131    for cell in cells {
132        cell_to_template.insert(
133            (cell.sheet.clone(), cell.row, cell.col),
134            cell.template_id.clone(),
135        );
136        by_template
137            .entry(cell.template_id.clone())
138            .or_default()
139            .push(cell);
140    }
141
142    let mut out = SpanPartitionCounters {
143        row_block_size: options.row_block_size,
144        template_count: by_template.len() as u64,
145        formula_cell_count: cells.len() as u64,
146        parse_error_formula_count: cells.iter().filter(|cell| !cell.parse_ok).count() as u64,
147        volatile_formula_count: cells.iter().filter(|cell| cell.volatile).count() as u64,
148        dynamic_formula_count: cells.iter().filter(|cell| cell.dynamic).count() as u64,
149        unsupported_formula_count: cells.iter().filter(|cell| cell.unsupported).count() as u64,
150        ..SpanPartitionCounters::default()
151    };
152
153    let mut represented_cells: BTreeSet<(String, u32, u32)> = BTreeSet::new();
154    let mut candidate_partitions: BTreeSet<(String, u32)> = BTreeSet::new();
155
156    for (template_id, template_cells) in by_template {
157        if template_cells.len() > 1 {
158            out.repeated_template_count += 1;
159        }
160
161        let mut template_counter = TemplateSpanPartitionCounters {
162            template_id: template_id.clone(),
163            formula_cells: template_cells.len() as u64,
164            ..TemplateSpanPartitionCounters::default()
165        };
166        let mut template_represented_cells: BTreeSet<(String, u32, u32)> = BTreeSet::new();
167        let mut template_partitions: BTreeSet<(String, u32)> = BTreeSet::new();
168
169        let row_runs = build_runs(
170            &template_id,
171            &template_cells,
172            CandidateRunOrientation::Row,
173            options.row_block_size,
174        );
175        let column_runs = build_runs(
176            &template_id,
177            &template_cells,
178            CandidateRunOrientation::Column,
179            options.row_block_size,
180        );
181
182        let (row_holes, row_exceptions) = count_axis_gaps(
183            &template_id,
184            &template_cells,
185            &cell_to_template,
186            CandidateRunOrientation::Row,
187        );
188        let (column_holes, column_exceptions) = count_axis_gaps(
189            &template_id,
190            &template_cells,
191            &cell_to_template,
192            CandidateRunOrientation::Column,
193        );
194
195        for run in row_runs.iter().chain(column_runs.iter()) {
196            template_counter.max_run_length = template_counter.max_run_length.max(run.len);
197            template_counter.candidate_formula_run_to_partition_edge_estimate +=
198                run.partitions_touched;
199            template_counter.max_partitions_touched_by_run = template_counter
200                .max_partitions_touched_by_run
201                .max(run.partitions_touched);
202
203            for cell_key in cells_for_run(run) {
204                represented_cells.insert(cell_key.clone());
205                template_represented_cells.insert(cell_key);
206            }
207            for partition in partitions_for_run(run, options.row_block_size) {
208                candidate_partitions.insert(partition.clone());
209                template_partitions.insert(partition);
210            }
211        }
212
213        template_counter.row_run_count = row_runs.len() as u64;
214        template_counter.column_run_count = column_runs.len() as u64;
215        template_counter.formula_cells_represented_by_runs =
216            template_represented_cells.len() as u64;
217        template_counter.singleton_formula_count = template_counter
218            .formula_cells
219            .saturating_sub(template_counter.formula_cells_represented_by_runs);
220        template_counter.hole_count = row_holes + column_holes;
221        template_counter.exception_count = row_exceptions + column_exceptions;
222        template_counter.candidate_partition_count = template_partitions.len() as u64;
223        template_counter.dense_run_coverage_percent = percent(
224            template_counter.formula_cells_represented_by_runs,
225            template_counter.formula_cells,
226        );
227
228        out.row_run_count += template_counter.row_run_count;
229        out.column_run_count += template_counter.column_run_count;
230        out.max_run_length = out.max_run_length.max(template_counter.max_run_length);
231        out.hole_count += template_counter.hole_count;
232        out.exception_count += template_counter.exception_count;
233        out.candidate_formula_run_to_partition_edge_estimate +=
234            template_counter.candidate_formula_run_to_partition_edge_estimate;
235        out.max_partitions_touched_by_run = out
236            .max_partitions_touched_by_run
237            .max(template_counter.max_partitions_touched_by_run);
238        out.candidate_runs.extend(row_runs);
239        out.candidate_runs.extend(column_runs);
240        out.template_counters.push(template_counter);
241    }
242
243    out.candidate_formula_run_count = out.row_run_count + out.column_run_count;
244    out.formula_cells_represented_by_runs = represented_cells.len() as u64;
245    out.singleton_formula_count = out
246        .formula_cell_count
247        .saturating_sub(out.formula_cells_represented_by_runs);
248    out.estimated_materialization_avoidable_cell_count = out.formula_cells_represented_by_runs;
249    out.candidate_row_block_partition_count = candidate_partitions.len() as u64;
250    out.dense_run_coverage_percent = percent(
251        out.formula_cells_represented_by_runs,
252        out.formula_cell_count,
253    );
254    out.template_counters
255        .sort_by(|a, b| a.template_id.cmp(&b.template_id));
256    out.candidate_runs.sort_by(|a, b| {
257        (
258            &a.template_id,
259            &a.sheet,
260            orientation_key(a.orientation),
261            a.fixed_index,
262            a.start_index,
263        )
264            .cmp(&(
265                &b.template_id,
266                &b.sheet,
267                orientation_key(b.orientation),
268                b.fixed_index,
269                b.start_index,
270            ))
271    });
272    out
273}
274
275fn build_runs(
276    template_id: &str,
277    cells: &[&FormulaPlaneCandidateCell],
278    orientation: CandidateRunOrientation,
279    row_block_size: u32,
280) -> Vec<CandidateFormulaRun> {
281    let mut groups: BTreeMap<(String, u32), Vec<u32>> = BTreeMap::new();
282    for cell in cells {
283        let key = match orientation {
284            CandidateRunOrientation::Row => (cell.sheet.clone(), cell.row),
285            CandidateRunOrientation::Column => (cell.sheet.clone(), cell.col),
286        };
287        let value = match orientation {
288            CandidateRunOrientation::Row => cell.col,
289            CandidateRunOrientation::Column => cell.row,
290        };
291        groups.entry(key).or_default().push(value);
292    }
293
294    let mut out = Vec::new();
295    for ((sheet, fixed_index), mut values) in groups {
296        values.sort_unstable();
297        values.dedup();
298        let mut start = None::<u32>;
299        let mut prev = None::<u32>;
300        for value in values {
301            match (start, prev) {
302                (None, _) => {
303                    start = Some(value);
304                    prev = Some(value);
305                }
306                (Some(_), Some(previous)) if value == previous + 1 => {
307                    prev = Some(value);
308                }
309                (Some(run_start), Some(run_end)) => {
310                    push_run(
311                        &mut out,
312                        template_id,
313                        &sheet,
314                        orientation,
315                        fixed_index,
316                        run_start,
317                        run_end,
318                        row_block_size,
319                    );
320                    start = Some(value);
321                    prev = Some(value);
322                }
323                (Some(_), None) => unreachable!(),
324            }
325        }
326        if let (Some(run_start), Some(run_end)) = (start, prev) {
327            push_run(
328                &mut out,
329                template_id,
330                &sheet,
331                orientation,
332                fixed_index,
333                run_start,
334                run_end,
335                row_block_size,
336            );
337        }
338    }
339    out
340}
341
342fn push_run(
343    out: &mut Vec<CandidateFormulaRun>,
344    template_id: &str,
345    sheet: &str,
346    orientation: CandidateRunOrientation,
347    fixed_index: u32,
348    start_index: u32,
349    end_index: u32,
350    row_block_size: u32,
351) {
352    if end_index <= start_index {
353        return;
354    }
355    let len = u64::from(end_index - start_index + 1);
356    let partitions_touched = match orientation {
357        CandidateRunOrientation::Row => 1,
358        CandidateRunOrientation::Column => {
359            let start_block = row_block_index(start_index, row_block_size);
360            let end_block = row_block_index(end_index, row_block_size);
361            u64::from(end_block - start_block + 1)
362        }
363    };
364    out.push(CandidateFormulaRun {
365        template_id: template_id.to_string(),
366        sheet: sheet.to_string(),
367        orientation,
368        fixed_index,
369        start_index,
370        end_index,
371        len,
372        partitions_touched,
373    });
374}
375
376fn count_axis_gaps(
377    template_id: &str,
378    cells: &[&FormulaPlaneCandidateCell],
379    cell_to_template: &BTreeMap<(String, u32, u32), String>,
380    orientation: CandidateRunOrientation,
381) -> (u64, u64) {
382    let mut groups: BTreeMap<(String, u32), Vec<u32>> = BTreeMap::new();
383    for cell in cells {
384        let key = match orientation {
385            CandidateRunOrientation::Row => (cell.sheet.clone(), cell.row),
386            CandidateRunOrientation::Column => (cell.sheet.clone(), cell.col),
387        };
388        let value = match orientation {
389            CandidateRunOrientation::Row => cell.col,
390            CandidateRunOrientation::Column => cell.row,
391        };
392        groups.entry(key).or_default().push(value);
393    }
394
395    let mut holes = 0u64;
396    let mut exceptions = 0u64;
397    for ((sheet, fixed), mut values) in groups {
398        values.sort_unstable();
399        values.dedup();
400        let Some(min) = values.first().copied() else {
401            continue;
402        };
403        let Some(max) = values.last().copied() else {
404            continue;
405        };
406        let present = values.into_iter().collect::<BTreeSet<_>>();
407        for value in min..=max {
408            if present.contains(&value) {
409                continue;
410            }
411            let key = match orientation {
412                CandidateRunOrientation::Row => (sheet.clone(), fixed, value),
413                CandidateRunOrientation::Column => (sheet.clone(), value, fixed),
414            };
415            match cell_to_template.get(&key) {
416                Some(other) if other != template_id => exceptions += 1,
417                Some(_) => {}
418                None => holes += 1,
419            }
420        }
421    }
422    (holes, exceptions)
423}
424
425fn cells_for_run(run: &CandidateFormulaRun) -> Vec<(String, u32, u32)> {
426    (run.start_index..=run.end_index)
427        .map(|index| match run.orientation {
428            CandidateRunOrientation::Row => (run.sheet.clone(), run.fixed_index, index),
429            CandidateRunOrientation::Column => (run.sheet.clone(), index, run.fixed_index),
430        })
431        .collect()
432}
433
434fn partitions_for_run(run: &CandidateFormulaRun, row_block_size: u32) -> Vec<(String, u32)> {
435    match run.orientation {
436        CandidateRunOrientation::Row => {
437            vec![(
438                run.sheet.clone(),
439                row_block_index(run.fixed_index, row_block_size),
440            )]
441        }
442        CandidateRunOrientation::Column => {
443            let start_block = row_block_index(run.start_index, row_block_size);
444            let end_block = row_block_index(run.end_index, row_block_size);
445            (start_block..=end_block)
446                .map(|block| (run.sheet.clone(), block))
447                .collect()
448        }
449    }
450}
451
452fn row_block_index(row: u32, row_block_size: u32) -> u32 {
453    row.saturating_sub(1) / row_block_size.max(1)
454}
455
456fn percent(numerator: u64, denominator: u64) -> f64 {
457    if denominator == 0 {
458        0.0
459    } else {
460        (numerator as f64) * 100.0 / (denominator as f64)
461    }
462}
463
464fn orientation_key(orientation: CandidateRunOrientation) -> u8 {
465    match orientation {
466        CandidateRunOrientation::Row => 0,
467        CandidateRunOrientation::Column => 1,
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    fn cell(row: u32, col: u32, template_id: &str) -> FormulaPlaneCandidateCell {
476        FormulaPlaneCandidateCell {
477            sheet: "Sheet1".to_string(),
478            row,
479            col,
480            template_id: template_id.to_string(),
481            parse_ok: true,
482            volatile: false,
483            dynamic: false,
484            unsupported: false,
485        }
486    }
487
488    #[test]
489    fn counts_vertical_runs_and_row_block_partitions() {
490        let cells = (1..=10)
491            .map(|row| cell(row, 2, "tpl_a"))
492            .collect::<Vec<_>>();
493        let counters = compute_span_partition_counters(
494            &cells,
495            SpanPartitionCounterOptions { row_block_size: 4 },
496        );
497
498        assert_eq!(counters.template_count, 1);
499        assert_eq!(counters.repeated_template_count, 1);
500        assert_eq!(counters.column_run_count, 1);
501        assert_eq!(counters.row_run_count, 0);
502        assert_eq!(counters.max_run_length, 10);
503        assert_eq!(counters.formula_cells_represented_by_runs, 10);
504        assert_eq!(counters.singleton_formula_count, 0);
505        assert_eq!(counters.candidate_row_block_partition_count, 3);
506        assert_eq!(counters.candidate_formula_run_to_partition_edge_estimate, 3);
507        assert_eq!(counters.max_partitions_touched_by_run, 3);
508        assert_eq!(counters.estimated_materialization_avoidable_cell_count, 10);
509        assert_eq!(counters.dense_run_coverage_percent, 100.0);
510    }
511
512    #[test]
513    fn counts_holes_exceptions_and_singletons() {
514        let cells = vec![
515            cell(1, 1, "tpl_a"),
516            cell(4, 1, "tpl_a"),
517            cell(3, 1, "tpl_b"),
518            FormulaPlaneCandidateCell {
519                parse_ok: false,
520                unsupported: true,
521                ..cell(10, 1, "tpl_parse")
522            },
523        ];
524        let counters = compute_span_partition_counters(
525            &cells,
526            SpanPartitionCounterOptions { row_block_size: 16 },
527        );
528
529        assert_eq!(counters.formula_cell_count, 4);
530        assert_eq!(counters.parse_error_formula_count, 1);
531        assert_eq!(counters.unsupported_formula_count, 1);
532        assert_eq!(counters.column_run_count, 0);
533        assert_eq!(counters.formula_cells_represented_by_runs, 0);
534        assert_eq!(counters.singleton_formula_count, 4);
535        assert_eq!(counters.hole_count, 1);
536        assert_eq!(counters.exception_count, 1);
537    }
538}