1use std::collections::{BTreeMap, BTreeSet};
8
9pub const DEFAULT_CANDIDATE_ROW_BLOCK_SIZE: u32 = 4096;
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub struct SpanPartitionCounterOptions {
15 pub row_block_size: u32,
17}
18
19impl Default for SpanPartitionCounterOptions {
20 fn default() -> Self {
21 Self {
22 row_block_size: DEFAULT_CANDIDATE_ROW_BLOCK_SIZE,
23 }
24 }
25}
26
27impl SpanPartitionCounterOptions {
28 fn normalized(self) -> Self {
29 Self {
30 row_block_size: self.row_block_size.max(1),
31 }
32 }
33}
34
35#[derive(Clone, Debug, PartialEq, Eq)]
37pub struct FormulaPlaneCandidateCell {
38 pub sheet: String,
39 pub row: u32,
40 pub col: u32,
41 pub template_id: String,
42 pub parse_ok: bool,
43 pub volatile: bool,
44 pub dynamic: bool,
45 pub unsupported: bool,
46}
47
48#[derive(Clone, Copy, Debug, PartialEq, Eq)]
50pub enum CandidateRunOrientation {
51 Row,
52 Column,
53}
54
55#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct CandidateFormulaRun {
58 pub template_id: String,
59 pub sheet: String,
60 pub orientation: CandidateRunOrientation,
61 pub fixed_index: u32,
63 pub start_index: u32,
65 pub end_index: u32,
67 pub len: u64,
68 pub partitions_touched: u64,
69}
70
71#[derive(Clone, Debug, Default, PartialEq)]
73pub struct TemplateSpanPartitionCounters {
74 pub template_id: String,
75 pub formula_cells: u64,
76 pub row_run_count: u64,
77 pub column_run_count: u64,
78 pub max_run_length: u64,
79 pub formula_cells_represented_by_runs: u64,
80 pub singleton_formula_count: u64,
81 pub hole_count: u64,
82 pub exception_count: u64,
83 pub candidate_partition_count: u64,
84 pub candidate_formula_run_to_partition_edge_estimate: u64,
85 pub max_partitions_touched_by_run: u64,
86 pub dense_run_coverage_percent: f64,
87}
88
89#[derive(Clone, Debug, Default, PartialEq)]
91pub struct SpanPartitionCounters {
92 pub row_block_size: u32,
93 pub template_count: u64,
94 pub repeated_template_count: u64,
95 pub formula_cell_count: u64,
96 pub parse_error_formula_count: u64,
97 pub volatile_formula_count: u64,
98 pub dynamic_formula_count: u64,
99 pub unsupported_formula_count: u64,
100 pub row_run_count: u64,
101 pub column_run_count: u64,
102 pub candidate_formula_run_count: u64,
103 pub max_run_length: u64,
104 pub formula_cells_represented_by_runs: u64,
105 pub singleton_formula_count: u64,
106 pub hole_count: u64,
107 pub exception_count: u64,
108 pub estimated_materialization_avoidable_cell_count: u64,
110 pub candidate_row_block_partition_count: u64,
111 pub candidate_formula_run_to_partition_edge_estimate: u64,
112 pub max_partitions_touched_by_run: u64,
113 pub dense_run_coverage_percent: f64,
114 pub template_counters: Vec<TemplateSpanPartitionCounters>,
115 pub candidate_runs: Vec<CandidateFormulaRun>,
116}
117
118pub fn compute_span_partition_counters(
124 cells: &[FormulaPlaneCandidateCell],
125 options: SpanPartitionCounterOptions,
126) -> SpanPartitionCounters {
127 let options = options.normalized();
128 let mut by_template: BTreeMap<String, Vec<&FormulaPlaneCandidateCell>> = BTreeMap::new();
129 let mut cell_to_template: BTreeMap<(String, u32, u32), String> = BTreeMap::new();
130
131 for cell in cells {
132 cell_to_template.insert(
133 (cell.sheet.clone(), cell.row, cell.col),
134 cell.template_id.clone(),
135 );
136 by_template
137 .entry(cell.template_id.clone())
138 .or_default()
139 .push(cell);
140 }
141
142 let mut out = SpanPartitionCounters {
143 row_block_size: options.row_block_size,
144 template_count: by_template.len() as u64,
145 formula_cell_count: cells.len() as u64,
146 parse_error_formula_count: cells.iter().filter(|cell| !cell.parse_ok).count() as u64,
147 volatile_formula_count: cells.iter().filter(|cell| cell.volatile).count() as u64,
148 dynamic_formula_count: cells.iter().filter(|cell| cell.dynamic).count() as u64,
149 unsupported_formula_count: cells.iter().filter(|cell| cell.unsupported).count() as u64,
150 ..SpanPartitionCounters::default()
151 };
152
153 let mut represented_cells: BTreeSet<(String, u32, u32)> = BTreeSet::new();
154 let mut candidate_partitions: BTreeSet<(String, u32)> = BTreeSet::new();
155
156 for (template_id, template_cells) in by_template {
157 if template_cells.len() > 1 {
158 out.repeated_template_count += 1;
159 }
160
161 let mut template_counter = TemplateSpanPartitionCounters {
162 template_id: template_id.clone(),
163 formula_cells: template_cells.len() as u64,
164 ..TemplateSpanPartitionCounters::default()
165 };
166 let mut template_represented_cells: BTreeSet<(String, u32, u32)> = BTreeSet::new();
167 let mut template_partitions: BTreeSet<(String, u32)> = BTreeSet::new();
168
169 let row_runs = build_runs(
170 &template_id,
171 &template_cells,
172 CandidateRunOrientation::Row,
173 options.row_block_size,
174 );
175 let column_runs = build_runs(
176 &template_id,
177 &template_cells,
178 CandidateRunOrientation::Column,
179 options.row_block_size,
180 );
181
182 let (row_holes, row_exceptions) = count_axis_gaps(
183 &template_id,
184 &template_cells,
185 &cell_to_template,
186 CandidateRunOrientation::Row,
187 );
188 let (column_holes, column_exceptions) = count_axis_gaps(
189 &template_id,
190 &template_cells,
191 &cell_to_template,
192 CandidateRunOrientation::Column,
193 );
194
195 for run in row_runs.iter().chain(column_runs.iter()) {
196 template_counter.max_run_length = template_counter.max_run_length.max(run.len);
197 template_counter.candidate_formula_run_to_partition_edge_estimate +=
198 run.partitions_touched;
199 template_counter.max_partitions_touched_by_run = template_counter
200 .max_partitions_touched_by_run
201 .max(run.partitions_touched);
202
203 for cell_key in cells_for_run(run) {
204 represented_cells.insert(cell_key.clone());
205 template_represented_cells.insert(cell_key);
206 }
207 for partition in partitions_for_run(run, options.row_block_size) {
208 candidate_partitions.insert(partition.clone());
209 template_partitions.insert(partition);
210 }
211 }
212
213 template_counter.row_run_count = row_runs.len() as u64;
214 template_counter.column_run_count = column_runs.len() as u64;
215 template_counter.formula_cells_represented_by_runs =
216 template_represented_cells.len() as u64;
217 template_counter.singleton_formula_count = template_counter
218 .formula_cells
219 .saturating_sub(template_counter.formula_cells_represented_by_runs);
220 template_counter.hole_count = row_holes + column_holes;
221 template_counter.exception_count = row_exceptions + column_exceptions;
222 template_counter.candidate_partition_count = template_partitions.len() as u64;
223 template_counter.dense_run_coverage_percent = percent(
224 template_counter.formula_cells_represented_by_runs,
225 template_counter.formula_cells,
226 );
227
228 out.row_run_count += template_counter.row_run_count;
229 out.column_run_count += template_counter.column_run_count;
230 out.max_run_length = out.max_run_length.max(template_counter.max_run_length);
231 out.hole_count += template_counter.hole_count;
232 out.exception_count += template_counter.exception_count;
233 out.candidate_formula_run_to_partition_edge_estimate +=
234 template_counter.candidate_formula_run_to_partition_edge_estimate;
235 out.max_partitions_touched_by_run = out
236 .max_partitions_touched_by_run
237 .max(template_counter.max_partitions_touched_by_run);
238 out.candidate_runs.extend(row_runs);
239 out.candidate_runs.extend(column_runs);
240 out.template_counters.push(template_counter);
241 }
242
243 out.candidate_formula_run_count = out.row_run_count + out.column_run_count;
244 out.formula_cells_represented_by_runs = represented_cells.len() as u64;
245 out.singleton_formula_count = out
246 .formula_cell_count
247 .saturating_sub(out.formula_cells_represented_by_runs);
248 out.estimated_materialization_avoidable_cell_count = out.formula_cells_represented_by_runs;
249 out.candidate_row_block_partition_count = candidate_partitions.len() as u64;
250 out.dense_run_coverage_percent = percent(
251 out.formula_cells_represented_by_runs,
252 out.formula_cell_count,
253 );
254 out.template_counters
255 .sort_by(|a, b| a.template_id.cmp(&b.template_id));
256 out.candidate_runs.sort_by(|a, b| {
257 (
258 &a.template_id,
259 &a.sheet,
260 orientation_key(a.orientation),
261 a.fixed_index,
262 a.start_index,
263 )
264 .cmp(&(
265 &b.template_id,
266 &b.sheet,
267 orientation_key(b.orientation),
268 b.fixed_index,
269 b.start_index,
270 ))
271 });
272 out
273}
274
275fn build_runs(
276 template_id: &str,
277 cells: &[&FormulaPlaneCandidateCell],
278 orientation: CandidateRunOrientation,
279 row_block_size: u32,
280) -> Vec<CandidateFormulaRun> {
281 let mut groups: BTreeMap<(String, u32), Vec<u32>> = BTreeMap::new();
282 for cell in cells {
283 let key = match orientation {
284 CandidateRunOrientation::Row => (cell.sheet.clone(), cell.row),
285 CandidateRunOrientation::Column => (cell.sheet.clone(), cell.col),
286 };
287 let value = match orientation {
288 CandidateRunOrientation::Row => cell.col,
289 CandidateRunOrientation::Column => cell.row,
290 };
291 groups.entry(key).or_default().push(value);
292 }
293
294 let mut out = Vec::new();
295 for ((sheet, fixed_index), mut values) in groups {
296 values.sort_unstable();
297 values.dedup();
298 let mut start = None::<u32>;
299 let mut prev = None::<u32>;
300 for value in values {
301 match (start, prev) {
302 (None, _) => {
303 start = Some(value);
304 prev = Some(value);
305 }
306 (Some(_), Some(previous)) if value == previous + 1 => {
307 prev = Some(value);
308 }
309 (Some(run_start), Some(run_end)) => {
310 push_run(
311 &mut out,
312 template_id,
313 &sheet,
314 orientation,
315 fixed_index,
316 run_start,
317 run_end,
318 row_block_size,
319 );
320 start = Some(value);
321 prev = Some(value);
322 }
323 (Some(_), None) => unreachable!(),
324 }
325 }
326 if let (Some(run_start), Some(run_end)) = (start, prev) {
327 push_run(
328 &mut out,
329 template_id,
330 &sheet,
331 orientation,
332 fixed_index,
333 run_start,
334 run_end,
335 row_block_size,
336 );
337 }
338 }
339 out
340}
341
342fn push_run(
343 out: &mut Vec<CandidateFormulaRun>,
344 template_id: &str,
345 sheet: &str,
346 orientation: CandidateRunOrientation,
347 fixed_index: u32,
348 start_index: u32,
349 end_index: u32,
350 row_block_size: u32,
351) {
352 if end_index <= start_index {
353 return;
354 }
355 let len = u64::from(end_index - start_index + 1);
356 let partitions_touched = match orientation {
357 CandidateRunOrientation::Row => 1,
358 CandidateRunOrientation::Column => {
359 let start_block = row_block_index(start_index, row_block_size);
360 let end_block = row_block_index(end_index, row_block_size);
361 u64::from(end_block - start_block + 1)
362 }
363 };
364 out.push(CandidateFormulaRun {
365 template_id: template_id.to_string(),
366 sheet: sheet.to_string(),
367 orientation,
368 fixed_index,
369 start_index,
370 end_index,
371 len,
372 partitions_touched,
373 });
374}
375
376fn count_axis_gaps(
377 template_id: &str,
378 cells: &[&FormulaPlaneCandidateCell],
379 cell_to_template: &BTreeMap<(String, u32, u32), String>,
380 orientation: CandidateRunOrientation,
381) -> (u64, u64) {
382 let mut groups: BTreeMap<(String, u32), Vec<u32>> = BTreeMap::new();
383 for cell in cells {
384 let key = match orientation {
385 CandidateRunOrientation::Row => (cell.sheet.clone(), cell.row),
386 CandidateRunOrientation::Column => (cell.sheet.clone(), cell.col),
387 };
388 let value = match orientation {
389 CandidateRunOrientation::Row => cell.col,
390 CandidateRunOrientation::Column => cell.row,
391 };
392 groups.entry(key).or_default().push(value);
393 }
394
395 let mut holes = 0u64;
396 let mut exceptions = 0u64;
397 for ((sheet, fixed), mut values) in groups {
398 values.sort_unstable();
399 values.dedup();
400 let Some(min) = values.first().copied() else {
401 continue;
402 };
403 let Some(max) = values.last().copied() else {
404 continue;
405 };
406 let present = values.into_iter().collect::<BTreeSet<_>>();
407 for value in min..=max {
408 if present.contains(&value) {
409 continue;
410 }
411 let key = match orientation {
412 CandidateRunOrientation::Row => (sheet.clone(), fixed, value),
413 CandidateRunOrientation::Column => (sheet.clone(), value, fixed),
414 };
415 match cell_to_template.get(&key) {
416 Some(other) if other != template_id => exceptions += 1,
417 Some(_) => {}
418 None => holes += 1,
419 }
420 }
421 }
422 (holes, exceptions)
423}
424
425fn cells_for_run(run: &CandidateFormulaRun) -> Vec<(String, u32, u32)> {
426 (run.start_index..=run.end_index)
427 .map(|index| match run.orientation {
428 CandidateRunOrientation::Row => (run.sheet.clone(), run.fixed_index, index),
429 CandidateRunOrientation::Column => (run.sheet.clone(), index, run.fixed_index),
430 })
431 .collect()
432}
433
434fn partitions_for_run(run: &CandidateFormulaRun, row_block_size: u32) -> Vec<(String, u32)> {
435 match run.orientation {
436 CandidateRunOrientation::Row => {
437 vec![(
438 run.sheet.clone(),
439 row_block_index(run.fixed_index, row_block_size),
440 )]
441 }
442 CandidateRunOrientation::Column => {
443 let start_block = row_block_index(run.start_index, row_block_size);
444 let end_block = row_block_index(run.end_index, row_block_size);
445 (start_block..=end_block)
446 .map(|block| (run.sheet.clone(), block))
447 .collect()
448 }
449 }
450}
451
452fn row_block_index(row: u32, row_block_size: u32) -> u32 {
453 row.saturating_sub(1) / row_block_size.max(1)
454}
455
456fn percent(numerator: u64, denominator: u64) -> f64 {
457 if denominator == 0 {
458 0.0
459 } else {
460 (numerator as f64) * 100.0 / (denominator as f64)
461 }
462}
463
464fn orientation_key(orientation: CandidateRunOrientation) -> u8 {
465 match orientation {
466 CandidateRunOrientation::Row => 0,
467 CandidateRunOrientation::Column => 1,
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 fn cell(row: u32, col: u32, template_id: &str) -> FormulaPlaneCandidateCell {
476 FormulaPlaneCandidateCell {
477 sheet: "Sheet1".to_string(),
478 row,
479 col,
480 template_id: template_id.to_string(),
481 parse_ok: true,
482 volatile: false,
483 dynamic: false,
484 unsupported: false,
485 }
486 }
487
488 #[test]
489 fn counts_vertical_runs_and_row_block_partitions() {
490 let cells = (1..=10)
491 .map(|row| cell(row, 2, "tpl_a"))
492 .collect::<Vec<_>>();
493 let counters = compute_span_partition_counters(
494 &cells,
495 SpanPartitionCounterOptions { row_block_size: 4 },
496 );
497
498 assert_eq!(counters.template_count, 1);
499 assert_eq!(counters.repeated_template_count, 1);
500 assert_eq!(counters.column_run_count, 1);
501 assert_eq!(counters.row_run_count, 0);
502 assert_eq!(counters.max_run_length, 10);
503 assert_eq!(counters.formula_cells_represented_by_runs, 10);
504 assert_eq!(counters.singleton_formula_count, 0);
505 assert_eq!(counters.candidate_row_block_partition_count, 3);
506 assert_eq!(counters.candidate_formula_run_to_partition_edge_estimate, 3);
507 assert_eq!(counters.max_partitions_touched_by_run, 3);
508 assert_eq!(counters.estimated_materialization_avoidable_cell_count, 10);
509 assert_eq!(counters.dense_run_coverage_percent, 100.0);
510 }
511
512 #[test]
513 fn counts_holes_exceptions_and_singletons() {
514 let cells = vec![
515 cell(1, 1, "tpl_a"),
516 cell(4, 1, "tpl_a"),
517 cell(3, 1, "tpl_b"),
518 FormulaPlaneCandidateCell {
519 parse_ok: false,
520 unsupported: true,
521 ..cell(10, 1, "tpl_parse")
522 },
523 ];
524 let counters = compute_span_partition_counters(
525 &cells,
526 SpanPartitionCounterOptions { row_block_size: 16 },
527 );
528
529 assert_eq!(counters.formula_cell_count, 4);
530 assert_eq!(counters.parse_error_formula_count, 1);
531 assert_eq!(counters.unsupported_formula_count, 1);
532 assert_eq!(counters.column_run_count, 0);
533 assert_eq!(counters.formula_cells_represented_by_runs, 0);
534 assert_eq!(counters.singleton_formula_count, 4);
535 assert_eq!(counters.hole_count, 1);
536 assert_eq!(counters.exception_count, 1);
537 }
538}