1pub mod filters;
2
3use crate::analysis::{formula::FormulaGraph, stats};
4use crate::model::*;
5use crate::state::AppState;
6use crate::workbook::{WorkbookContext, cell_to_value};
7use anyhow::{Result, anyhow};
8use regex::Regex;
9use schemars::JsonSchema;
10use serde::Deserialize;
11use std::cmp::Ordering;
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::sync::Arc;
14
15const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
16const TRACE_PAGE_MIN: usize = 5;
17const TRACE_PAGE_MAX: usize = 200;
18const TRACE_RANGE_THRESHOLD: usize = 4;
19const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
20const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
21const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
22const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
23const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
24const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
25const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
26
27pub async fn list_workbooks(
28 state: Arc<AppState>,
29 params: ListWorkbooksParams,
30) -> Result<WorkbookListResponse> {
31 let filter = params.into_filter()?;
32 state.list_workbooks(filter)
33}
34
35pub async fn describe_workbook(
36 state: Arc<AppState>,
37 params: DescribeWorkbookParams,
38) -> Result<WorkbookDescription> {
39 let workbook = state.open_workbook(¶ms.workbook_id).await?;
40 let desc = workbook.describe();
41 Ok(desc)
42}
43
44#[derive(Debug, Deserialize, JsonSchema)]
45pub struct ListWorkbooksParams {
46 pub slug_prefix: Option<String>,
47 pub folder: Option<String>,
48 pub path_glob: Option<String>,
49}
50
51impl ListWorkbooksParams {
52 fn into_filter(self) -> Result<filters::WorkbookFilter> {
53 filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
54 }
55}
56
57#[derive(Debug, Deserialize, JsonSchema)]
58pub struct DescribeWorkbookParams {
59 pub workbook_id: WorkbookId,
60}
61
62#[derive(Debug, Deserialize, JsonSchema)]
63pub struct ListSheetsParams {
64 pub workbook_id: WorkbookId,
65}
66
67pub async fn list_sheets(
68 state: Arc<AppState>,
69 params: ListSheetsParams,
70) -> Result<SheetListResponse> {
71 let workbook = state.open_workbook(¶ms.workbook_id).await?;
72 let summaries = workbook.list_summaries()?;
73 let response = SheetListResponse {
74 workbook_id: workbook.id.clone(),
75 workbook_short_id: workbook.short_id.clone(),
76 sheets: summaries,
77 };
78 Ok(response)
79}
80
81#[derive(Debug, Deserialize, JsonSchema)]
82pub struct SheetOverviewParams {
83 pub workbook_id: WorkbookId,
84 pub sheet_name: String,
85}
86
87#[derive(Debug, Deserialize, JsonSchema)]
88pub struct WorkbookSummaryParams {
89 pub workbook_id: WorkbookId,
90}
91
92pub async fn workbook_summary(
93 state: Arc<AppState>,
94 params: WorkbookSummaryParams,
95) -> Result<WorkbookSummaryResponse> {
96 let workbook = state.open_workbook(¶ms.workbook_id).await?;
97 let sheet_names = workbook.sheet_names();
98
99 let mut total_cells: u64 = 0;
100 let mut total_formulas: u64 = 0;
101 let mut breakdown = WorkbookBreakdown::default();
102 let mut region_counts = RegionCountSummary::default();
103 let mut entry_points: Vec<EntryPoint> = Vec::new();
104 let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
105
106 for sheet_name in &sheet_names {
107 let entry = workbook.get_sheet_metrics(sheet_name)?;
108 total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
109 total_formulas += entry.metrics.formula_cells as u64;
110 match entry.metrics.classification {
111 SheetClassification::Calculator => breakdown.calculator_sheets += 1,
112 SheetClassification::Metadata => breakdown.metadata_sheets += 1,
113 SheetClassification::Empty => {}
114 _ => breakdown.data_sheets += 1,
115 }
116
117 for region in &entry.detected_regions {
118 match region
119 .region_kind
120 .clone()
121 .unwrap_or(region.classification.clone())
122 {
123 RegionKind::Calculator => region_counts.calculator += 1,
124 RegionKind::Metadata => region_counts.metadata += 1,
125 RegionKind::Parameters => region_counts.parameters += 1,
126 RegionKind::Outputs => region_counts.outputs += 1,
127 RegionKind::Data | RegionKind::Table => region_counts.data += 1,
128 _ => region_counts.other += 1,
129 }
130 if region.confidence >= 0.3 {
131 let kind = region
132 .region_kind
133 .as_ref()
134 .unwrap_or(®ion.classification);
135 let priority = match kind {
136 RegionKind::Parameters => 0,
137 RegionKind::Data | RegionKind::Table => 1,
138 RegionKind::Outputs => 2,
139 RegionKind::Calculator => 3,
140 RegionKind::Metadata => 4,
141 _ => 5,
142 };
143 entry_points.push(EntryPoint {
144 sheet_name: sheet_name.clone(),
145 region_id: Some(region.id),
146 bounds: Some(region.bounds.clone()),
147 rationale: format!(
148 "{:?} region ({} rows, {:.0}% conf, p{})",
149 kind,
150 region.row_count,
151 region.confidence * 100.0,
152 priority
153 ),
154 });
155 }
156 }
157
158 if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
159 entry_points.push(EntryPoint {
160 sheet_name: sheet_name.clone(),
161 region_id: None,
162 bounds: None,
163 rationale: "Whole sheet is non-empty; start at top-left".to_string(),
164 });
165 }
166 }
167
168 entry_points.sort_by(|a, b| {
169 let pa = priority_from_rationale(&a.rationale);
170 let pb = priority_from_rationale(&b.rationale);
171 pa.cmp(&pb)
172 .then_with(|| {
173 a.bounds
174 .as_ref()
175 .map(|_| 1)
176 .cmp(&b.bounds.as_ref().map(|_| 1))
177 })
178 .then_with(|| a.sheet_name.cmp(&b.sheet_name))
179 });
180 entry_points.truncate(5);
181
182 let mut seen_ranges = std::collections::HashSet::new();
183 for item in workbook.named_items()? {
184 if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
185 continue;
186 }
187 if !seen_ranges.insert(item.refers_to.clone()) {
188 continue;
189 }
190 key_named_ranges.push(item);
191 if key_named_ranges.len() >= 10 {
192 break;
193 }
194 }
195
196 Ok(WorkbookSummaryResponse {
197 workbook_id: workbook.id.clone(),
198 workbook_short_id: workbook.short_id.clone(),
199 slug: workbook.slug.clone(),
200 sheet_count: sheet_names.len(),
201 total_cells,
202 total_formulas,
203 breakdown,
204 region_counts,
205 key_named_ranges,
206 suggested_entry_points: entry_points,
207 })
208}
209
210fn priority_from_rationale(rationale: &str) -> u32 {
211 if rationale.contains("p0") {
212 0
213 } else if rationale.contains("p1") {
214 1
215 } else if rationale.contains("p2") {
216 2
217 } else if rationale.contains("p3") {
218 3
219 } else if rationale.contains("p4") {
220 4
221 } else {
222 5
223 }
224}
225
226pub async fn sheet_overview(
227 state: Arc<AppState>,
228 params: SheetOverviewParams,
229) -> Result<SheetOverviewResponse> {
230 let workbook = state.open_workbook(¶ms.workbook_id).await?;
231 let overview = workbook.sheet_overview(¶ms.sheet_name)?;
232 Ok(overview)
233}
234
235fn default_start_row() -> u32 {
236 1
237}
238
239fn default_page_size() -> u32 {
240 50
241}
242
243fn default_include_formulas() -> bool {
244 true
245}
246
247fn default_include_header() -> bool {
248 true
249}
250
251#[derive(Debug, Deserialize, JsonSchema)]
252pub struct SheetPageParams {
253 pub workbook_id: WorkbookId,
254 pub sheet_name: String,
255 #[serde(default = "default_start_row")]
256 pub start_row: u32,
257 #[serde(default = "default_page_size")]
258 pub page_size: u32,
259 #[serde(default)]
260 pub columns: Option<Vec<String>>,
261 #[serde(default)]
262 pub columns_by_header: Option<Vec<String>>,
263 #[serde(default = "default_include_formulas")]
264 pub include_formulas: bool,
265 #[serde(default)]
266 pub include_styles: bool,
267 #[serde(default = "default_include_header")]
268 pub include_header: bool,
269 #[serde(default)]
270 pub format: Option<SheetPageFormat>,
271}
272
273impl Default for SheetPageParams {
274 fn default() -> Self {
275 SheetPageParams {
276 workbook_id: WorkbookId(String::new()),
277 sheet_name: String::new(),
278 start_row: default_start_row(),
279 page_size: default_page_size(),
280 columns: None,
281 columns_by_header: None,
282 include_formulas: default_include_formulas(),
283 include_styles: false,
284 include_header: default_include_header(),
285 format: None,
286 }
287 }
288}
289
290fn default_find_limit() -> u32 {
291 50
292}
293
294#[derive(Debug, Deserialize, JsonSchema)]
295pub struct FindValueParams {
296 pub workbook_id: WorkbookId,
297 pub query: String,
298 #[serde(default)]
299 pub label: Option<String>,
300 #[serde(default)]
301 pub mode: Option<FindMode>,
302 #[serde(default)]
303 pub match_mode: Option<String>,
304 #[serde(default)]
305 pub case_sensitive: bool,
306 #[serde(default)]
307 pub sheet_name: Option<String>,
308 #[serde(default)]
309 pub region_id: Option<u32>,
310 #[serde(default)]
311 pub table_name: Option<String>,
312 #[serde(default)]
313 pub value_types: Option<Vec<String>>,
314 #[serde(default)]
315 pub search_headers_only: bool,
316 #[serde(default)]
317 pub direction: Option<LabelDirection>,
318 #[serde(default = "default_find_limit")]
319 pub limit: u32,
320}
321
322impl Default for FindValueParams {
323 fn default() -> Self {
324 Self {
325 workbook_id: WorkbookId(String::new()),
326 query: String::new(),
327 label: None,
328 mode: None,
329 match_mode: None,
330 case_sensitive: false,
331 sheet_name: None,
332 region_id: None,
333 table_name: None,
334 value_types: None,
335 search_headers_only: false,
336 direction: None,
337 limit: default_find_limit(),
338 }
339 }
340}
341
342#[derive(Debug, Deserialize, JsonSchema, Default)]
343pub struct ReadTableParams {
344 pub workbook_id: WorkbookId,
345 #[serde(default)]
346 pub sheet_name: Option<String>,
347 #[serde(default)]
348 pub table_name: Option<String>,
349 #[serde(default)]
350 pub region_id: Option<u32>,
351 #[serde(default)]
352 pub range: Option<String>,
353 #[serde(default)]
354 pub header_row: Option<u32>,
355 #[serde(default)]
356 pub header_rows: Option<u32>,
357 #[serde(default)]
358 pub columns: Option<Vec<String>>,
359 #[serde(default)]
360 pub filters: Option<Vec<TableFilter>>,
361 #[serde(default)]
362 pub sample_mode: Option<String>,
363 #[serde(default)]
364 pub limit: Option<u32>,
365 #[serde(default)]
366 pub offset: Option<u32>,
367}
368
369#[derive(Debug, Deserialize, JsonSchema, Clone)]
370pub struct TableFilter {
371 pub column: String,
372 pub op: String,
373 pub value: serde_json::Value,
374}
375
376#[derive(Debug, Deserialize, JsonSchema, Default)]
377pub struct TableProfileParams {
378 pub workbook_id: WorkbookId,
379 #[serde(default)]
380 pub sheet_name: Option<String>,
381 #[serde(default)]
382 pub region_id: Option<u32>,
383 #[serde(default)]
384 pub table_name: Option<String>,
385 #[serde(default)]
386 pub sample_mode: Option<String>,
387 #[serde(default)]
388 pub sample_size: Option<u32>,
389}
390
391#[derive(Debug, Deserialize, JsonSchema)]
392pub struct RangeValuesParams {
393 pub workbook_id: WorkbookId,
394 pub sheet_name: String,
395 pub ranges: Vec<String>,
396 #[serde(default)]
397 pub include_headers: Option<bool>,
398}
399
400pub async fn sheet_page(
401 state: Arc<AppState>,
402 params: SheetPageParams,
403) -> Result<SheetPageResponse> {
404 if params.page_size == 0 {
405 return Err(anyhow!("page_size must be greater than zero"));
406 }
407
408 let workbook = state.open_workbook(¶ms.workbook_id).await?;
409 let metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
410 let format = params.format.unwrap_or_default();
411
412 let start_row = params.start_row.max(1);
413 let page_size = params.page_size.min(500);
414 let include_formulas = params.include_formulas;
415 let include_styles = params.include_styles;
416 let columns = params.columns.clone();
417 let columns_by_header = params.columns_by_header.clone();
418 let include_header = params.include_header;
419
420 let page = workbook.with_sheet(¶ms.sheet_name, |sheet| {
421 build_page(
422 sheet,
423 start_row,
424 page_size,
425 columns.clone(),
426 columns_by_header.clone(),
427 include_formulas,
428 include_styles,
429 include_header,
430 )
431 })?;
432
433 let has_more = page.end_row < metrics.metrics.row_count;
434 let next_start_row = if has_more {
435 Some(page.end_row + 1)
436 } else {
437 None
438 };
439
440 let compact_payload = if matches!(format, SheetPageFormat::Compact) {
441 Some(build_compact_payload(
442 &page.header,
443 &page.rows,
444 include_header,
445 ))
446 } else {
447 None
448 };
449
450 let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
451 Some(build_values_only_payload(
452 &page.header,
453 &page.rows,
454 include_header,
455 ))
456 } else {
457 None
458 };
459
460 let response = SheetPageResponse {
461 workbook_id: workbook.id.clone(),
462 workbook_short_id: workbook.short_id.clone(),
463 sheet_name: params.sheet_name,
464 rows: if matches!(format, SheetPageFormat::Full) {
465 page.rows
466 } else {
467 Vec::new()
468 },
469 has_more,
470 next_start_row,
471 header_row: if include_header { page.header } else { None },
472 compact: compact_payload,
473 values_only: values_only_payload,
474 format,
475 };
476 Ok(response)
477}
478
479#[derive(Debug, Deserialize, JsonSchema)]
480pub struct SheetFormulaMapParams {
481 pub workbook_id: WorkbookId,
482 pub sheet_name: String,
483 pub range: Option<String>,
484 #[serde(default)]
485 pub expand: bool,
486 #[serde(default)]
487 pub limit: Option<u32>,
488 #[serde(default)]
489 pub sort_by: Option<FormulaSortBy>,
490}
491
492#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
493#[serde(rename_all = "snake_case")]
494pub enum FormulaSortBy {
495 #[default]
496 Address,
497 Complexity,
498 Count,
499}
500
501pub async fn sheet_formula_map(
502 state: Arc<AppState>,
503 params: SheetFormulaMapParams,
504) -> Result<SheetFormulaMapResponse> {
505 let workbook = state.open_workbook(¶ms.workbook_id).await?;
506 let graph = workbook.formula_graph(¶ms.sheet_name)?;
507 let mut groups = Vec::new();
508 let mut truncated = false;
509
510 for mut group in graph.groups() {
511 if let Some(range) = ¶ms.range {
512 group.addresses.retain(|addr| address_in_range(addr, range));
513 if group.addresses.is_empty() {
514 continue;
515 }
516 }
517 if !params.expand && group.addresses.len() > 15 {
518 group.addresses.truncate(15);
519 truncated = true;
520 }
521 groups.push(group);
522 }
523
524 let sort_by = params.sort_by.unwrap_or_default();
525 match sort_by {
526 FormulaSortBy::Address => {
527 groups.sort_by(|a, b| a.addresses.first().cmp(&b.addresses.first()));
528 }
529 FormulaSortBy::Complexity => {
530 groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
531 }
532 FormulaSortBy::Count => {
533 groups.sort_by(|a, b| b.addresses.len().cmp(&a.addresses.len()));
534 }
535 }
536
537 if let Some(limit) = params.limit
538 && groups.len() > limit as usize
539 {
540 groups.truncate(limit as usize);
541 truncated = true;
542 }
543
544 let response = SheetFormulaMapResponse {
545 workbook_id: workbook.id.clone(),
546 workbook_short_id: workbook.short_id.clone(),
547 sheet_name: params.sheet_name.clone(),
548 groups,
549 truncated,
550 };
551 Ok(response)
552}
553
554#[derive(Debug, Deserialize, JsonSchema)]
555pub struct FormulaTraceParams {
556 pub workbook_id: WorkbookId,
557 pub sheet_name: String,
558 pub cell_address: String,
559 pub direction: TraceDirection,
560 pub depth: Option<u32>,
561 pub limit: Option<u32>,
562 #[serde(default)]
563 pub page_size: Option<usize>,
564 #[serde(default)]
565 pub cursor: Option<TraceCursor>,
566}
567
568pub async fn formula_trace(
569 state: Arc<AppState>,
570 params: FormulaTraceParams,
571) -> Result<FormulaTraceResponse> {
572 let workbook = state.open_workbook(¶ms.workbook_id).await?;
573 let graph = workbook.formula_graph(¶ms.sheet_name)?;
574 let formula_lookup = build_formula_lookup(&graph);
575 let depth = params.depth.unwrap_or(3).clamp(1, 5);
576 let page_size = params
577 .page_size
578 .or_else(|| params.limit.map(|v| v as usize))
579 .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
580 .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
581
582 let origin = params.cell_address.to_uppercase();
583 let config = TraceConfig {
584 direction: ¶ms.direction,
585 origin: &origin,
586 sheet_name: ¶ms.sheet_name,
587 depth_limit: depth,
588 page_size,
589 };
590 let (layers, next_cursor, notes) = build_trace_layers(
591 &workbook,
592 &graph,
593 &formula_lookup,
594 &config,
595 params.cursor.clone(),
596 )?;
597
598 let response = FormulaTraceResponse {
599 workbook_id: workbook.id.clone(),
600 workbook_short_id: workbook.short_id.clone(),
601 sheet_name: params.sheet_name.clone(),
602 origin,
603 direction: params.direction.clone(),
604 layers,
605 next_cursor,
606 notes,
607 };
608 Ok(response)
609}
610
611#[derive(Debug, Deserialize, JsonSchema)]
612pub struct NamedRangesParams {
613 pub workbook_id: WorkbookId,
614 pub sheet_name: Option<String>,
615 pub name_prefix: Option<String>,
616}
617
618pub async fn named_ranges(
619 state: Arc<AppState>,
620 params: NamedRangesParams,
621) -> Result<NamedRangesResponse> {
622 let workbook = state.open_workbook(¶ms.workbook_id).await?;
623 let mut items = workbook.named_items()?;
624
625 if let Some(sheet_filter) = ¶ms.sheet_name {
626 items.retain(|item| {
627 item.sheet_name
628 .as_ref()
629 .map(|name| name.eq_ignore_ascii_case(sheet_filter))
630 .unwrap_or(false)
631 });
632 }
633 if let Some(prefix) = ¶ms.name_prefix {
634 let prefix_lower = prefix.to_ascii_lowercase();
635 items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
636 }
637
638 let response = NamedRangesResponse {
639 workbook_id: workbook.id.clone(),
640 workbook_short_id: workbook.short_id.clone(),
641 items,
642 };
643 Ok(response)
644}
645
646struct PageBuildResult {
647 rows: Vec<RowSnapshot>,
648 header: Option<RowSnapshot>,
649 end_row: u32,
650}
651
652#[allow(clippy::too_many_arguments)]
653fn build_page(
654 sheet: &umya_spreadsheet::Worksheet,
655 start_row: u32,
656 page_size: u32,
657 columns: Option<Vec<String>>,
658 columns_by_header: Option<Vec<String>>,
659 include_formulas: bool,
660 include_styles: bool,
661 include_header: bool,
662) -> PageBuildResult {
663 let max_col = sheet.get_highest_column();
664 let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
665 let column_indices =
666 resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
667
668 let header = if include_header {
669 Some(build_row_snapshot(
670 sheet,
671 1,
672 &column_indices,
673 include_formulas,
674 include_styles,
675 ))
676 } else {
677 None
678 };
679
680 let mut rows = Vec::new();
681 for row_idx in start_row..=end_row {
682 rows.push(build_row_snapshot(
683 sheet,
684 row_idx,
685 &column_indices,
686 include_formulas,
687 include_styles,
688 ));
689 }
690
691 PageBuildResult {
692 rows,
693 header,
694 end_row,
695 }
696}
697
698fn build_row_snapshot(
699 sheet: &umya_spreadsheet::Worksheet,
700 row_index: u32,
701 columns: &[u32],
702 include_formulas: bool,
703 include_styles: bool,
704) -> RowSnapshot {
705 let mut cells = Vec::new();
706 for &col in columns {
707 if let Some(cell) = sheet.get_cell((col, row_index)) {
708 cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
709 } else {
710 let address = crate::utils::cell_address(col, row_index);
711 cells.push(CellSnapshot {
712 address,
713 value: None,
714 formula: None,
715 cached_value: None,
716 number_format: None,
717 style_tags: Vec::new(),
718 notes: Vec::new(),
719 });
720 }
721 }
722
723 RowSnapshot { row_index, cells }
724}
725
726fn build_cell_snapshot(
727 cell: &umya_spreadsheet::Cell,
728 include_formulas: bool,
729 include_styles: bool,
730) -> CellSnapshot {
731 let address = cell.get_coordinate().get_coordinate();
732 let value = crate::workbook::cell_to_value(cell);
733 let formula = if include_formulas && cell.is_formula() {
734 Some(cell.get_formula().to_string())
735 } else {
736 None
737 };
738 let cached_value = if cell.is_formula() {
739 value.clone()
740 } else {
741 None
742 };
743 let number_format = if include_styles {
744 cell.get_style()
745 .get_number_format()
746 .map(|fmt| fmt.get_format_code().to_string())
747 } else {
748 None
749 };
750 let style_tags = if include_styles {
751 crate::analysis::style::tag_cell(cell)
752 .map(|(_, tagging)| tagging.tags)
753 .unwrap_or_default()
754 } else {
755 Vec::new()
756 };
757
758 CellSnapshot {
759 address,
760 value,
761 formula,
762 cached_value,
763 number_format,
764 style_tags,
765 notes: Vec::new(),
766 }
767}
768
769fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
770 use std::collections::BTreeSet;
771 use umya_spreadsheet::helper::coordinate::column_index_from_string;
772
773 let mut indices = BTreeSet::new();
774 if let Some(specs) = columns {
775 for spec in specs {
776 if let Some((start, end)) = spec.split_once(':') {
777 let start_idx = column_index_from_string(start);
778 let end_idx = column_index_from_string(end);
779 let (min_idx, max_idx) = if start_idx <= end_idx {
780 (start_idx, end_idx)
781 } else {
782 (end_idx, start_idx)
783 };
784 for idx in min_idx..=max_idx {
785 indices.insert(idx);
786 }
787 } else {
788 indices.insert(column_index_from_string(spec));
789 }
790 }
791 } else {
792 for idx in 1..=max_column.max(1) {
793 indices.insert(idx);
794 }
795 }
796
797 indices.into_iter().collect()
798}
799
800fn resolve_columns_with_headers(
801 sheet: &umya_spreadsheet::Worksheet,
802 columns: Option<&Vec<String>>,
803 columns_by_header: Option<&Vec<String>>,
804 max_column: u32,
805) -> Vec<u32> {
806 if columns_by_header.is_none() {
807 return resolve_columns(columns, max_column);
808 }
809
810 let mut selected = Vec::new();
811 let header_targets: Vec<String> = columns_by_header
812 .unwrap()
813 .iter()
814 .map(|h| h.trim().to_ascii_lowercase())
815 .collect();
816
817 for col_idx in 1..=max_column.max(1) {
818 let header_cell = sheet.get_cell((col_idx, 1u32));
819 let header_value = header_cell
820 .and_then(cell_to_value)
821 .map(cell_value_to_string_lower);
822 if let Some(hval) = header_value
823 && header_targets.iter().any(|target| target == &hval)
824 {
825 selected.push(col_idx);
826 }
827 }
828
829 if selected.is_empty() {
830 resolve_columns(columns, max_column)
831 } else {
832 selected
833 }
834}
835
836fn cell_value_to_string_lower(value: CellValue) -> String {
837 match value {
838 CellValue::Text(s) => s.to_ascii_lowercase(),
839 CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
840 CellValue::Bool(b) => b.to_string(),
841 CellValue::Error(e) => e.to_ascii_lowercase(),
842 CellValue::Date(d) => d.to_ascii_lowercase(),
843 }
844}
845
846fn build_compact_payload(
847 header: &Option<RowSnapshot>,
848 rows: &[RowSnapshot],
849 include_header: bool,
850) -> SheetPageCompact {
851 let headers = derive_headers(header, rows);
852 let header_row = if include_header {
853 header
854 .as_ref()
855 .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
856 .unwrap_or_default()
857 } else {
858 Vec::new()
859 };
860 let data_rows = rows
861 .iter()
862 .map(|row| {
863 let mut vals: Vec<Option<CellValue>> = Vec::new();
864 vals.push(Some(CellValue::Number(row.row_index as f64)));
865 vals.extend(row.cells.iter().map(|c| c.value.clone()));
866 vals
867 })
868 .collect();
869
870 SheetPageCompact {
871 headers,
872 header_row,
873 rows: data_rows,
874 }
875}
876
877fn build_values_only_payload(
878 header: &Option<RowSnapshot>,
879 rows: &[RowSnapshot],
880 include_header: bool,
881) -> SheetPageValues {
882 let mut data = Vec::new();
883 if include_header && let Some(h) = header {
884 data.push(h.cells.iter().map(|c| c.value.clone()).collect());
885 }
886 for row in rows {
887 data.push(row.cells.iter().map(|c| c.value.clone()).collect());
888 }
889
890 SheetPageValues { rows: data }
891}
892
893fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
894 if let Some(h) = header {
895 let mut headers: Vec<String> = h
896 .cells
897 .iter()
898 .map(|c| match &c.value {
899 Some(CellValue::Text(t)) => t.clone(),
900 Some(CellValue::Number(n)) => n.to_string(),
901 Some(CellValue::Bool(b)) => b.to_string(),
902 Some(CellValue::Date(d)) => d.clone(),
903 Some(CellValue::Error(e)) => e.clone(),
904 None => c.address.clone(),
905 })
906 .collect();
907 headers.insert(0, "Row".to_string());
908 headers
909 } else if let Some(first) = rows.first() {
910 let mut headers = Vec::new();
911 headers.push("Row".to_string());
912 for cell in &first.cells {
913 headers.push(cell.address.clone());
914 }
915 headers
916 } else {
917 vec![]
918 }
919}
920fn default_stats_sample() -> usize {
921 500
922}
923
924#[derive(Debug, Deserialize, JsonSchema)]
925pub struct SheetStatisticsParams {
926 pub workbook_id: WorkbookId,
927 pub sheet_name: String,
928 #[serde(default)]
929 pub sample_rows: Option<usize>,
930}
931
932pub async fn sheet_statistics(
933 state: Arc<AppState>,
934 params: SheetStatisticsParams,
935) -> Result<SheetStatisticsResponse> {
936 let workbook = state.open_workbook(¶ms.workbook_id).await?;
937 let sheet_metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
938 let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
939 let stats = workbook.with_sheet(¶ms.sheet_name, |sheet| {
940 stats::compute_sheet_statistics(sheet, sample_rows)
941 })?;
942 let response = SheetStatisticsResponse {
943 workbook_id: workbook.id.clone(),
944 workbook_short_id: workbook.short_id.clone(),
945 sheet_name: params.sheet_name,
946 row_count: sheet_metrics.metrics.row_count,
947 column_count: sheet_metrics.metrics.column_count,
948 density: stats.density,
949 numeric_columns: stats.numeric_columns,
950 text_columns: stats.text_columns,
951 null_counts: stats.null_counts,
952 duplicate_warnings: stats.duplicate_warnings,
953 };
954 Ok(response)
955}
956
957fn address_in_range(address: &str, range: &str) -> bool {
958 parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
959 if let Some((col, row)) = parse_address(address) {
960 col >= start_col && col <= end_col && row >= start_row && row <= end_row
961 } else {
962 false
963 }
964 })
965}
966
967fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
968 let mut parts = range.split(':');
969 let start = parts.next()?;
970 let end = parts.next().unwrap_or(start);
971 let start_idx = parse_address(start)?;
972 let end_idx = parse_address(end)?;
973 Some((
974 (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
975 (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
976 ))
977}
978
979fn parse_address(address: &str) -> Option<(u32, u32)> {
980 use umya_spreadsheet::helper::coordinate::index_from_coordinate;
981 let (col, row, _, _) = index_from_coordinate(address);
982 match (col, row) {
983 (Some(c), Some(r)) => Some((c, r)),
984 _ => None,
985 }
986}
987
988#[derive(Clone)]
989struct TableTarget {
990 sheet_name: String,
991 table_name: Option<String>,
992 range: ((u32, u32), (u32, u32)),
993 header_hint: Option<u32>,
994}
995
996fn resolve_table_target(
997 workbook: &WorkbookContext,
998 params: &ReadTableParams,
999) -> Result<TableTarget> {
1000 if let Some(region_id) = params.region_id
1001 && let Some(sheet) = ¶ms.sheet_name
1002 && let Ok(region) = workbook.detected_region(sheet, region_id)
1003 {
1004 return Ok(TableTarget {
1005 sheet_name: sheet.clone(),
1006 table_name: None,
1007 range: parse_range(®ion.bounds).unwrap_or(((1, 1), (1, 1))),
1008 header_hint: region.header_row,
1009 });
1010 }
1011
1012 if let Some(table_name) = ¶ms.table_name {
1013 let items = workbook.named_items()?;
1014 for item in items {
1015 if item.name.eq_ignore_ascii_case(table_name)
1016 || item
1017 .name
1018 .to_ascii_lowercase()
1019 .contains(&table_name.to_ascii_lowercase())
1020 {
1021 let mut sheet_name = item
1022 .sheet_name
1023 .clone()
1024 .or_else(|| params.sheet_name.clone())
1025 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1026 let refers_to = item.refers_to.trim_start_matches('=');
1027 let mut range_part = refers_to;
1028 if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1029 sheet_name = sheet_part.trim_matches('\'').to_string();
1030 range_part = rest;
1031 }
1032 if let Some(range) = parse_range(range_part) {
1033 return Ok(TableTarget {
1034 sheet_name,
1035 table_name: Some(item.name.clone()),
1036 range,
1037 header_hint: if item.kind == NamedItemKind::Table {
1038 Some(range.0.1)
1039 } else {
1040 None
1041 },
1042 });
1043 }
1044 }
1045 }
1046 }
1047
1048 let sheet_name = params
1049 .sheet_name
1050 .clone()
1051 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1052
1053 if let Some(rng) = ¶ms.range
1054 && let Some(range) = parse_range(rng)
1055 {
1056 return Ok(TableTarget {
1057 sheet_name,
1058 table_name: None,
1059 range,
1060 header_hint: None,
1061 });
1062 }
1063
1064 let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1065 let end_col = metrics.metrics.column_count.max(1);
1066 let end_row = metrics.metrics.row_count.max(1);
1067 Ok(TableTarget {
1068 sheet_name,
1069 table_name: None,
1070 range: ((1, 1), (end_col, end_row)),
1071 header_hint: None,
1072 })
1073}
1074
1075#[allow(clippy::too_many_arguments)]
1076fn extract_table_rows(
1077 sheet: &umya_spreadsheet::Worksheet,
1078 target: &TableTarget,
1079 header_row: Option<u32>,
1080 header_rows: Option<u32>,
1081 columns: Option<Vec<String>>,
1082 filters: Option<Vec<TableFilter>>,
1083 limit: usize,
1084 offset: usize,
1085 sample_mode: &str,
1086) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1087 let ((start_col, start_row), (end_col, end_row)) = target.range;
1088 let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1089 if header_start < start_row {
1090 header_start = start_row;
1091 }
1092 if header_start > end_row {
1093 header_start = start_row;
1094 }
1095 let header_rows_count = header_rows.unwrap_or(1).max(1);
1096 let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1097 let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1098 resolve_columns(Some(cols), end_col).into_iter().collect()
1099 } else {
1100 (start_col..=end_col).collect()
1101 };
1102
1103 let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1104 let mut all_rows: Vec<TableRow> = Vec::new();
1105 let mut total_rows: u32 = 0;
1106
1107 for row_idx in data_start_row..=end_row {
1108 let mut row = BTreeMap::new();
1109 for (i, col_idx) in column_indices.iter().enumerate() {
1110 let header = headers
1111 .get(i)
1112 .cloned()
1113 .unwrap_or_else(|| format!("Col{col_idx}"));
1114 let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1115 row.insert(header, value);
1116 }
1117 if !row_passes_filters(&row, filters.as_ref()) {
1118 continue;
1119 }
1120 total_rows += 1;
1121 if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1122 continue;
1123 }
1124 all_rows.push(row);
1125 }
1126
1127 let rows = sample_rows(all_rows, limit, offset, sample_mode);
1128
1129 Ok((headers, rows, total_rows))
1130}
1131
1132fn build_headers(
1133 sheet: &umya_spreadsheet::Worksheet,
1134 columns: &[u32],
1135 header_start: u32,
1136 header_rows: u32,
1137) -> Vec<String> {
1138 let mut headers = Vec::new();
1139 for col_idx in columns {
1140 let mut parts = Vec::new();
1141 for h in header_start..(header_start + header_rows) {
1142 let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1143 if let Some(value) = sheet
1144 .get_cell((origin_col, origin_row))
1145 .and_then(cell_to_value)
1146 {
1147 match value {
1148 CellValue::Text(ref s) if s.trim().is_empty() => {}
1149 CellValue::Text(s) => parts.push(s),
1150 CellValue::Number(n) => parts.push(n.to_string()),
1151 CellValue::Bool(b) => parts.push(b.to_string()),
1152 CellValue::Error(e) => parts.push(e),
1153 CellValue::Date(d) => parts.push(d),
1154 }
1155 }
1156 }
1157 if parts.is_empty() {
1158 headers.push(crate::utils::column_number_to_name(*col_idx));
1159 } else {
1160 headers.push(parts.join(" / "));
1161 }
1162 }
1163
1164 if headers.iter().all(|h| h.trim().is_empty()) {
1165 return columns
1166 .iter()
1167 .map(|c| crate::utils::column_number_to_name(*c))
1168 .collect();
1169 }
1170
1171 dedupe_headers(headers)
1172}
1173
1174fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1175 let mut seen: HashMap<String, u32> = HashMap::new();
1176 for h in headers.iter_mut() {
1177 let key = h.clone();
1178 if key.trim().is_empty() {
1179 continue;
1180 }
1181 let count = seen.entry(key.clone()).or_insert(0);
1182 if *count > 0 {
1183 h.push_str(&format!("_{}", *count + 1));
1184 }
1185 *count += 1;
1186 }
1187 headers
1188}
1189
1190fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1191 if let Some(filters) = filters {
1192 for filter in filters {
1193 if let Some(value) = row.get(&filter.column) {
1194 match filter.op.as_str() {
1195 "eq" => {
1196 if !value_eq(value, &filter.value) {
1197 return false;
1198 }
1199 }
1200 "neq" => {
1201 if value_eq(value, &filter.value) {
1202 return false;
1203 }
1204 }
1205 "contains" => {
1206 if !value_contains(value, &filter.value) {
1207 return false;
1208 }
1209 }
1210 "gt" => {
1211 if !value_gt(value, &filter.value) {
1212 return false;
1213 }
1214 }
1215 "lt" => {
1216 if !value_lt(value, &filter.value) {
1217 return false;
1218 }
1219 }
1220 "in" => {
1221 let list = filter
1222 .value
1223 .as_array()
1224 .cloned()
1225 .unwrap_or_else(|| vec![filter.value.clone()]);
1226 if !list.iter().any(|cmp| value_eq(value, cmp)) {
1227 return false;
1228 }
1229 }
1230 _ => {}
1231 }
1232 }
1233 }
1234 }
1235 true
1236}
1237
1238fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1239 match (cell, cmp) {
1240 (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1241 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1242 v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1243 }
1244 (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1245 .parse::<f64>()
1246 .map(|f| (*n - f).abs() < f64::EPSILON)
1247 .unwrap_or(false),
1248 (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1249 (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1250 t.eq_ignore_ascii_case("true") == *b
1251 }
1252 (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1253 (None, serde_json::Value::Null) => true,
1254 _ => false,
1255 }
1256}
1257
1258fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1259 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1260 return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1261 }
1262 false
1263}
1264
1265fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1266 match (cell, cmp) {
1267 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1268 v.as_f64().is_some_and(|f| *n > f)
1269 }
1270 _ => false,
1271 }
1272}
1273
1274fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1275 match (cell, cmp) {
1276 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1277 v.as_f64().is_some_and(|f| *n < f)
1278 }
1279 _ => false,
1280 }
1281}
1282
1283fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1284 if rows.is_empty() {
1285 return rows;
1286 }
1287
1288 match mode {
1289 "distributed" => {
1290 if limit == 0 {
1291 return Vec::new();
1292 }
1293 let mut indices = Vec::new();
1294 let span = rows.len().saturating_sub(1);
1295 let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1296 let mut idx = offset;
1297 while idx < rows.len() && indices.len() < limit {
1298 indices.push(idx);
1299 idx = idx.saturating_add(step);
1300 if idx == indices.last().copied().unwrap_or(0) {
1301 idx += 1;
1302 }
1303 }
1304 if indices.len() < limit {
1305 let last_idx = rows.len().saturating_sub(1);
1306 if !indices.contains(&last_idx) {
1307 indices.push(last_idx);
1308 }
1309 }
1310 indices
1311 .into_iter()
1312 .filter_map(|i| rows.get(i).cloned())
1313 .collect()
1314 }
1315 "last" => {
1316 let start = rows.len().saturating_sub(limit + offset);
1317 rows.into_iter().skip(start + offset).take(limit).collect()
1318 }
1319 _ => rows.into_iter().skip(offset).take(limit).collect(),
1320 }
1321}
1322
1323fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1324 let mut summaries = Vec::new();
1325 for header in headers {
1326 let mut nulls = 0u32;
1327 let mut distinct_set: HashSet<String> = HashSet::new();
1328 let mut values: Vec<f64> = Vec::new();
1329 let mut top_counts: HashMap<String, u32> = HashMap::new();
1330
1331 for row in rows {
1332 match row.get(header) {
1333 Some(Some(CellValue::Number(n))) => {
1334 values.push(*n);
1335 let key = n.to_string();
1336 *top_counts.entry(key).or_default() += 1;
1337 }
1338 Some(Some(CellValue::Text(s))) => {
1339 distinct_set.insert(s.clone());
1340 *top_counts.entry(s.clone()).or_default() += 1;
1341 }
1342 Some(Some(CellValue::Bool(b))) => {
1343 let key = b.to_string();
1344 distinct_set.insert(key.clone());
1345 *top_counts.entry(key).or_default() += 1;
1346 }
1347 Some(Some(CellValue::Date(d))) => {
1348 distinct_set.insert(d.clone());
1349 *top_counts.entry(d.clone()).or_default() += 1;
1350 }
1351 Some(Some(CellValue::Error(e))) => {
1352 distinct_set.insert(e.clone());
1353 *top_counts.entry(e.clone()).or_default() += 1;
1354 }
1355 _ => {
1356 nulls += 1;
1357 }
1358 }
1359 }
1360
1361 let inferred_type = if !values.is_empty() {
1362 "number"
1363 } else if !distinct_set.is_empty() {
1364 "text"
1365 } else {
1366 "unknown"
1367 }
1368 .to_string();
1369
1370 let min = values.iter().cloned().reduce(f64::min);
1371 let max = values.iter().cloned().reduce(f64::max);
1372 let mean = if values.is_empty() {
1373 None
1374 } else {
1375 Some(values.iter().sum::<f64>() / values.len() as f64)
1376 };
1377
1378 let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1379 top_values.sort_by(|a, b| b.1.cmp(&a.1));
1380 let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1381
1382 summaries.push(ColumnTypeSummary {
1383 name: header.clone(),
1384 inferred_type,
1385 nulls,
1386 distinct: distinct_set.len() as u32,
1387 top_values,
1388 min,
1389 max,
1390 mean,
1391 });
1392 }
1393 summaries
1394}
1395
1396#[allow(clippy::too_many_arguments)]
1397fn collect_value_matches(
1398 sheet: &umya_spreadsheet::Worksheet,
1399 sheet_name: &str,
1400 mode: &FindMode,
1401 match_mode: &str,
1402 direction: &LabelDirection,
1403 params: &FindValueParams,
1404 region: Option<&DetectedRegion>,
1405 default_bounds: ((u32, u32), (u32, u32)),
1406) -> Result<Vec<FindValueMatch>> {
1407 let mut results = Vec::new();
1408 let regex = if match_mode == "regex" {
1409 Regex::new(¶ms.query).ok()
1410 } else {
1411 None
1412 };
1413 let bounds = region
1414 .as_ref()
1415 .and_then(|r| parse_range(&r.bounds))
1416 .unwrap_or(default_bounds);
1417
1418 let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1419
1420 for cell in sheet.get_cell_collection() {
1421 let coord = cell.get_coordinate();
1422 let col = *coord.get_col_num();
1423 let row = *coord.get_row_num();
1424 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1425 continue;
1426 }
1427 if params.search_headers_only && row != header_row {
1428 continue;
1429 }
1430
1431 let value = cell_to_value(cell);
1432 if let Some(ref allowed) = params.value_types
1433 && !value_type_matches(&value, allowed)
1434 {
1435 continue;
1436 }
1437 if matches!(mode, FindMode::Value) {
1438 if !value_matches(
1439 &value,
1440 ¶ms.query,
1441 match_mode,
1442 params.case_sensitive,
1443 ®ex,
1444 ) {
1445 continue;
1446 }
1447 } else if let Some(label) = ¶ms.label {
1448 if !label_matches(cell, label, match_mode, params.case_sensitive, ®ex) {
1449 continue;
1450 }
1451 } else {
1452 continue;
1453 }
1454
1455 let neighbors = collect_neighbors(sheet, row, col);
1456 let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1457 let target_value = match direction {
1458 LabelDirection::Right => sheet.get_cell((col + 1, row)),
1459 LabelDirection::Below => sheet.get_cell((col, row + 1)),
1460 LabelDirection::Any => sheet
1461 .get_cell((col + 1, row))
1462 .or_else(|| sheet.get_cell((col, row + 1))),
1463 }
1464 .and_then(cell_to_value);
1465 if target_value.is_none() {
1466 continue;
1467 }
1468 (
1469 Some(LabelHit {
1470 label_address: coord.get_coordinate(),
1471 label: label_from_cell(cell),
1472 }),
1473 target_value,
1474 )
1475 } else {
1476 (None, value.clone())
1477 };
1478
1479 let row_context = build_row_context(sheet, row, col);
1480
1481 results.push(FindValueMatch {
1482 address: coord.get_coordinate(),
1483 sheet_name: sheet_name.to_string(),
1484 value: match_value,
1485 row_context,
1486 neighbors,
1487 label_hit,
1488 });
1489 }
1490
1491 Ok(results)
1492}
1493
1494fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1495 cell_to_value(cell)
1496 .map(|v| match v {
1497 CellValue::Text(s) => s,
1498 CellValue::Number(n) => n.to_string(),
1499 CellValue::Bool(b) => b.to_string(),
1500 CellValue::Date(d) => d,
1501 CellValue::Error(e) => e,
1502 })
1503 .unwrap_or_else(|| cell.get_value().to_string())
1504}
1505
1506fn value_matches(
1507 value: &Option<CellValue>,
1508 query: &str,
1509 mode: &str,
1510 case_sensitive: bool,
1511 regex: &Option<Regex>,
1512) -> bool {
1513 if value.is_none() {
1514 return false;
1515 }
1516 let haystack = cell_value_to_string_lower(value.clone().unwrap());
1517 let needle = if case_sensitive {
1518 query.to_string()
1519 } else {
1520 query.to_ascii_lowercase()
1521 };
1522
1523 match mode {
1524 "exact" => haystack == needle,
1525 "regex" => regex
1526 .as_ref()
1527 .map(|re| re.is_match(&haystack))
1528 .unwrap_or(false),
1529 _ => haystack.contains(&needle),
1530 }
1531}
1532
1533fn label_matches(
1534 cell: &umya_spreadsheet::Cell,
1535 label: &str,
1536 mode: &str,
1537 case_sensitive: bool,
1538 regex: &Option<Regex>,
1539) -> bool {
1540 let value = cell_to_value(cell);
1541 if value.is_none() {
1542 return false;
1543 }
1544 let haystack = cell_value_to_string_lower(value.unwrap());
1545 let needle = if case_sensitive {
1546 label.to_string()
1547 } else {
1548 label.to_ascii_lowercase()
1549 };
1550 match mode {
1551 "exact" => haystack == needle,
1552 "regex" => regex
1553 .as_ref()
1554 .map(|re| re.is_match(&haystack))
1555 .unwrap_or(false),
1556 _ => haystack.contains(&needle),
1557 }
1558}
1559
1560fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1561 if value.is_none() {
1562 return allowed.iter().any(|v| v == "null");
1563 }
1564 match value.as_ref().unwrap() {
1565 CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1566 CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1567 CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1568 CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1569 CellValue::Error(_) => true,
1570 }
1571}
1572
1573fn collect_neighbors(
1574 sheet: &umya_spreadsheet::Worksheet,
1575 row: u32,
1576 col: u32,
1577) -> Option<NeighborValues> {
1578 Some(NeighborValues {
1579 left: if col > 1 {
1580 sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1581 } else {
1582 None
1583 },
1584 right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1585 up: if row > 1 {
1586 sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1587 } else {
1588 None
1589 },
1590 down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1591 })
1592}
1593
1594fn build_row_context(
1595 sheet: &umya_spreadsheet::Worksheet,
1596 row: u32,
1597 col: u32,
1598) -> Option<RowContext> {
1599 let header_value = sheet
1600 .get_cell((col, 1u32))
1601 .and_then(cell_to_value)
1602 .map(|v| match v {
1603 CellValue::Text(s) => s,
1604 CellValue::Number(n) => n.to_string(),
1605 CellValue::Bool(b) => b.to_string(),
1606 CellValue::Date(d) => d,
1607 CellValue::Error(e) => e,
1608 })
1609 .unwrap_or_else(|| format!("Col{}", col));
1610 let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1611 Some(RowContext {
1612 headers: vec![header_value],
1613 values: vec![value],
1614 })
1615}
1616
1617#[derive(Debug, Deserialize, JsonSchema)]
1618pub struct FindFormulaParams {
1619 pub workbook_id: WorkbookId,
1620 pub query: String,
1621 pub sheet_name: Option<String>,
1622 #[serde(default)]
1623 pub case_sensitive: bool,
1624}
1625
1626pub async fn find_formula(
1627 state: Arc<AppState>,
1628 params: FindFormulaParams,
1629) -> Result<FindFormulaResponse> {
1630 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1631 let query = if params.case_sensitive {
1632 params.query.clone()
1633 } else {
1634 params.query.to_ascii_lowercase()
1635 };
1636 let mut matches = Vec::new();
1637
1638 let sheet_names: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1639 vec![sheet.clone()]
1640 } else {
1641 workbook.sheet_names()
1642 };
1643
1644 for sheet_name in sheet_names {
1645 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1646 collect_formula_matches(sheet, &sheet_name, &query, params.case_sensitive)
1647 })?;
1648 matches.extend(sheet_matches);
1649 }
1650
1651 let response = FindFormulaResponse {
1652 workbook_id: workbook.id.clone(),
1653 workbook_short_id: workbook.short_id.clone(),
1654 matches,
1655 };
1656 Ok(response)
1657}
1658
1659#[derive(Debug, Deserialize, JsonSchema)]
1660pub struct ScanVolatilesParams {
1661 pub workbook_id: WorkbookId,
1662 pub sheet_name: Option<String>,
1663}
1664
1665pub async fn scan_volatiles(
1666 state: Arc<AppState>,
1667 params: ScanVolatilesParams,
1668) -> Result<VolatileScanResponse> {
1669 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1670 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1671 vec![sheet.clone()]
1672 } else {
1673 workbook.sheet_names()
1674 };
1675
1676 let mut items = Vec::new();
1677 let mut truncated = false;
1678
1679 for sheet_name in target_sheets {
1680 let graph = workbook.formula_graph(&sheet_name)?;
1681 for group in graph.groups() {
1682 if !group.is_volatile {
1683 continue;
1684 }
1685 for address in group.addresses.iter().take(50) {
1686 items.push(VolatileScanEntry {
1687 address: address.clone(),
1688 sheet_name: sheet_name.clone(),
1689 function: "volatile".to_string(),
1690 note: Some(group.formula.clone()),
1691 });
1692 }
1693 if group.addresses.len() > 50 {
1694 truncated = true;
1695 }
1696 }
1697 }
1698
1699 let response = VolatileScanResponse {
1700 workbook_id: workbook.id.clone(),
1701 workbook_short_id: workbook.short_id.clone(),
1702 items,
1703 truncated,
1704 };
1705 Ok(response)
1706}
1707
1708#[derive(Debug, Deserialize, JsonSchema)]
1709pub struct SheetStylesParams {
1710 pub workbook_id: WorkbookId,
1711 pub sheet_name: String,
1712}
1713
1714pub async fn sheet_styles(
1715 state: Arc<AppState>,
1716 params: SheetStylesParams,
1717) -> Result<SheetStylesResponse> {
1718 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1719 let entry = workbook.get_sheet_metrics(¶ms.sheet_name)?;
1720
1721 let styles = entry
1722 .metrics
1723 .style_map
1724 .iter()
1725 .map(|(style_id, usage)| StyleSummary {
1726 style_id: style_id.clone(),
1727 occurrences: usage.occurrences,
1728 tags: usage.tags.clone(),
1729 example_cells: usage.example_cells.clone(),
1730 })
1731 .collect();
1732
1733 let response = SheetStylesResponse {
1734 workbook_id: workbook.id.clone(),
1735 workbook_short_id: workbook.short_id.clone(),
1736 sheet_name: params.sheet_name.clone(),
1737 styles,
1738 conditional_rules: Vec::new(),
1739 };
1740 Ok(response)
1741}
1742
1743pub async fn range_values(
1744 state: Arc<AppState>,
1745 params: RangeValuesParams,
1746) -> Result<RangeValuesResponse> {
1747 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1748 let include_headers = params.include_headers.unwrap_or(false);
1749 let values = workbook.with_sheet(¶ms.sheet_name, |sheet| {
1750 params
1751 .ranges
1752 .iter()
1753 .filter_map(|range| {
1754 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
1755 let mut rows = Vec::new();
1756 for r in start_row..=end_row {
1757 let mut row_vals = Vec::new();
1758 for c in start_col..=end_col {
1759 if include_headers && r == start_row && start_row == 1 {
1760 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
1761 } else {
1762 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
1763 }
1764 }
1765 rows.push(row_vals);
1766 }
1767 RangeValuesEntry {
1768 range: range.clone(),
1769 rows,
1770 }
1771 })
1772 })
1773 .collect()
1774 })?;
1775
1776 Ok(RangeValuesResponse {
1777 workbook_id: workbook.id.clone(),
1778 workbook_short_id: workbook.short_id.clone(),
1779 sheet_name: params.sheet_name,
1780 values,
1781 })
1782}
1783
1784pub async fn find_value(
1785 state: Arc<AppState>,
1786 params: FindValueParams,
1787) -> Result<FindValueResponse> {
1788 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1789 let mut matches = Vec::new();
1790 let mut truncated = false;
1791 let mode = params.mode.clone().unwrap_or_else(|| {
1792 if params.label.is_some() {
1793 FindMode::Label
1794 } else {
1795 FindMode::Value
1796 }
1797 });
1798 let match_mode = params
1799 .match_mode
1800 .as_deref()
1801 .unwrap_or("contains")
1802 .to_ascii_lowercase();
1803 let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
1804
1805 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1806 vec![sheet.clone()]
1807 } else {
1808 workbook.sheet_names()
1809 };
1810
1811 for sheet_name in target_sheets {
1812 let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
1813 let default_bounds = (
1814 (1, 1),
1815 (
1816 metrics_entry.metrics.column_count.max(1),
1817 metrics_entry.metrics.row_count.max(1),
1818 ),
1819 );
1820 let region_bounds = params
1821 .region_id
1822 .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
1823 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1824 collect_value_matches(
1825 sheet,
1826 &sheet_name,
1827 &mode,
1828 &match_mode,
1829 &direction,
1830 ¶ms,
1831 region_bounds.as_ref(),
1832 default_bounds,
1833 )
1834 })??;
1835 matches.extend(sheet_matches);
1836 if matches.len() as u32 >= params.limit {
1837 truncated = true;
1838 matches.truncate(params.limit as usize);
1839 break;
1840 }
1841 }
1842
1843 Ok(FindValueResponse {
1844 workbook_id: workbook.id.clone(),
1845 workbook_short_id: workbook.short_id.clone(),
1846 matches,
1847 truncated,
1848 })
1849}
1850
1851pub async fn read_table(
1852 state: Arc<AppState>,
1853 params: ReadTableParams,
1854) -> Result<ReadTableResponse> {
1855 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1856 let resolved = resolve_table_target(&workbook, ¶ms)?;
1857 let limit = params.limit.unwrap_or(100) as usize;
1858 let offset = params.offset.unwrap_or(0) as usize;
1859 let sample_mode = params
1860 .sample_mode
1861 .clone()
1862 .unwrap_or_else(|| "first".to_string());
1863
1864 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1865 extract_table_rows(
1866 sheet,
1867 &resolved,
1868 params.header_row,
1869 params.header_rows,
1870 params.columns.clone(),
1871 params.filters.clone(),
1872 limit,
1873 offset,
1874 &sample_mode,
1875 )
1876 })??;
1877
1878 let has_more = offset + rows.len() < total_rows as usize;
1879
1880 Ok(ReadTableResponse {
1881 workbook_id: workbook.id.clone(),
1882 workbook_short_id: workbook.short_id.clone(),
1883 sheet_name: resolved.sheet_name,
1884 table_name: resolved.table_name,
1885 headers,
1886 rows,
1887 total_rows,
1888 has_more,
1889 })
1890}
1891
1892pub async fn table_profile(
1893 state: Arc<AppState>,
1894 params: TableProfileParams,
1895) -> Result<TableProfileResponse> {
1896 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1897 let resolved = resolve_table_target(
1898 &workbook,
1899 &ReadTableParams {
1900 workbook_id: params.workbook_id.clone(),
1901 sheet_name: params.sheet_name.clone(),
1902 table_name: params.table_name.clone(),
1903 region_id: params.region_id,
1904 range: None,
1905 header_row: None,
1906 header_rows: None,
1907 columns: None,
1908 filters: None,
1909 sample_mode: params.sample_mode.clone(),
1910 limit: params.sample_size,
1911 offset: Some(0),
1912 },
1913 )?;
1914
1915 let sample_size = params.sample_size.unwrap_or(10) as usize;
1916 let sample_mode = params
1917 .sample_mode
1918 .clone()
1919 .unwrap_or_else(|| "distributed".to_string());
1920
1921 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1922 extract_table_rows(
1923 sheet,
1924 &resolved,
1925 None,
1926 None,
1927 None,
1928 None,
1929 sample_size,
1930 0,
1931 &sample_mode,
1932 )
1933 })??;
1934
1935 let column_types = summarize_columns(&headers, &rows);
1936
1937 Ok(TableProfileResponse {
1938 workbook_id: workbook.id.clone(),
1939 workbook_short_id: workbook.short_id.clone(),
1940 sheet_name: resolved.sheet_name,
1941 table_name: resolved.table_name,
1942 headers,
1943 column_types,
1944 row_count: total_rows,
1945 samples: rows,
1946 notes: Vec::new(),
1947 })
1948}
1949
1950#[derive(Debug, Deserialize, JsonSchema)]
1951pub struct ManifestStubParams {
1952 pub workbook_id: WorkbookId,
1953 pub sheet_filter: Option<String>,
1954}
1955
1956pub async fn get_manifest_stub(
1957 state: Arc<AppState>,
1958 params: ManifestStubParams,
1959) -> Result<ManifestStubResponse> {
1960 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1961 let mut summaries = workbook.list_summaries()?;
1962
1963 if let Some(filter) = ¶ms.sheet_filter {
1964 summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
1965 }
1966
1967 let sheets = summaries
1968 .into_iter()
1969 .map(|summary| ManifestSheetStub {
1970 sheet_name: summary.name.clone(),
1971 classification: summary.classification.clone(),
1972 candidate_expectations: vec![format!(
1973 "Review {} sheet for expectation candidates",
1974 format!("{:?}", summary.classification).to_ascii_lowercase()
1975 )],
1976 notes: summary.style_tags,
1977 })
1978 .collect();
1979
1980 let response = ManifestStubResponse {
1981 workbook_id: workbook.id.clone(),
1982 workbook_short_id: workbook.short_id.clone(),
1983 slug: workbook.slug.clone(),
1984 sheets,
1985 };
1986 Ok(response)
1987}
1988
1989#[derive(Debug, Deserialize, JsonSchema)]
1990pub struct CloseWorkbookParams {
1991 pub workbook_id: WorkbookId,
1992}
1993
1994pub async fn close_workbook(
1995 state: Arc<AppState>,
1996 params: CloseWorkbookParams,
1997) -> Result<CloseWorkbookResponse> {
1998 state.close_workbook(¶ms.workbook_id)?;
1999 Ok(CloseWorkbookResponse {
2000 workbook_id: params.workbook_id.clone(),
2001 message: format!("workbook {} evicted", params.workbook_id.as_str()),
2002 })
2003}
2004fn collect_formula_matches(
2005 sheet: &umya_spreadsheet::Worksheet,
2006 sheet_name: &str,
2007 query: &str,
2008 case_sensitive: bool,
2009) -> Vec<FindFormulaMatch> {
2010 use crate::workbook::cell_to_value;
2011
2012 let mut results = Vec::new();
2013 for cell in sheet.get_cell_collection() {
2014 if !cell.is_formula() {
2015 continue;
2016 }
2017 let formula = cell.get_formula();
2018 let haystack = if case_sensitive {
2019 formula.to_string()
2020 } else {
2021 formula.to_ascii_lowercase()
2022 };
2023 if !haystack.contains(query) {
2024 continue;
2025 }
2026 let coord = cell.get_coordinate();
2027 let column = *coord.get_col_num();
2028 let row = *coord.get_row_num();
2029 let columns = vec![column];
2030 let context_row = build_row_snapshot(sheet, row, &columns, true, false);
2031 let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
2032
2033 results.push(FindFormulaMatch {
2034 address: coord.get_coordinate(),
2035 sheet_name: sheet_name.to_string(),
2036 formula: formula.to_string(),
2037 cached_value: if cell.is_formula() {
2038 cell_to_value(cell)
2039 } else {
2040 None
2041 },
2042 context: vec![header_row, context_row],
2043 });
2044 }
2045 results
2046}
2047
2048#[derive(Clone)]
2049struct TraceFormulaInfo {
2050 fingerprint: String,
2051 formula: String,
2052}
2053
2054#[derive(Clone)]
2055struct TraceEdgeRaw {
2056 from: String,
2057 to: String,
2058 neighbor: String,
2059}
2060
2061#[derive(Clone)]
2062struct LayerLinks {
2063 depth: u32,
2064 edges: Vec<TraceEdgeRaw>,
2065 truncated_cells: Vec<String>,
2066}
2067
2068#[derive(Clone)]
2069struct NeighborDetail {
2070 address: String,
2071 column: Option<u32>,
2072 row: Option<u32>,
2073 kind: TraceCellKind,
2074 value: Option<CellValue>,
2075 formula: Option<String>,
2076 fingerprint: Option<String>,
2077 external: bool,
2078}
2079
2080fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2081 let mut map = HashMap::new();
2082 for group in graph.groups() {
2083 for address in group.addresses.clone() {
2084 map.insert(
2085 address.to_ascii_uppercase(),
2086 TraceFormulaInfo {
2087 fingerprint: group.fingerprint.clone(),
2088 formula: group.formula.clone(),
2089 },
2090 );
2091 }
2092 }
2093 map
2094}
2095
2096struct TraceConfig<'a> {
2097 direction: &'a TraceDirection,
2098 origin: &'a str,
2099 sheet_name: &'a str,
2100 depth_limit: u32,
2101 page_size: usize,
2102}
2103
2104fn build_trace_layers(
2105 workbook: &WorkbookContext,
2106 graph: &FormulaGraph,
2107 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2108 config: &TraceConfig<'_>,
2109 cursor: Option<TraceCursor>,
2110) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2111 let layer_links =
2112 collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2113 let mut layers = Vec::new();
2114 let mut next_cursor = None;
2115 let mut notes = Vec::new();
2116 let focus_depth = cursor.as_ref().map(|c| c.depth);
2117
2118 for layer in layer_links {
2119 let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2120 let offset = cursor
2121 .as_ref()
2122 .filter(|c| c.depth == layer.depth)
2123 .map(|c| c.offset)
2124 .unwrap_or(0);
2125
2126 let mut node_set: HashSet<String> = HashSet::new();
2127 for edge in &layer.edges {
2128 node_set.insert(edge.neighbor.clone());
2129 }
2130 let mut nodes: Vec<String> = node_set.into_iter().collect();
2131 nodes.sort_by(|a, b| compare_addresses(a, b));
2132
2133 let details = workbook.with_sheet(config.sheet_name, |sheet| {
2134 collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2135 })?;
2136 let total_nodes = details.len();
2137 let start = offset.min(total_nodes);
2138 let end = if produce_edges {
2139 (start + config.page_size).min(total_nodes)
2140 } else {
2141 start
2142 };
2143 let selected_slice = if produce_edges {
2144 &details[start..end]
2145 } else {
2146 &details[0..0]
2147 };
2148 let selected_addresses: HashSet<String> = selected_slice
2149 .iter()
2150 .map(|detail| detail.address.clone())
2151 .collect();
2152
2153 let summary = build_layer_summary(&details);
2154 let range_highlights = build_range_highlights(&details);
2155 let group_highlights = build_formula_group_highlights(&details);
2156 let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2157
2158 let highlights = TraceLayerHighlights {
2159 top_ranges: range_highlights.clone(),
2160 top_formula_groups: group_highlights.clone(),
2161 notable_cells,
2162 };
2163
2164 let edges = if produce_edges {
2165 build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2166 } else {
2167 Vec::new()
2168 };
2169
2170 let has_more = produce_edges && end < total_nodes;
2171 if has_more && next_cursor.is_none() {
2172 next_cursor = Some(TraceCursor {
2173 depth: layer.depth,
2174 offset: end,
2175 });
2176 }
2177 if has_more {
2178 notes.push(format!(
2179 "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2180 layer.depth, end, total_nodes, layer.depth, end
2181 ));
2182 }
2183
2184 if !layer.truncated_cells.is_empty() {
2185 let cell_list = if layer.truncated_cells.len() <= 3 {
2186 layer.truncated_cells.join(", ")
2187 } else {
2188 format!(
2189 "{}, ... ({} more)",
2190 layer.truncated_cells[..3].join(", "),
2191 layer.truncated_cells.len() - 3
2192 )
2193 };
2194 notes.push(format!(
2195 "Layer {}: dependents truncated at {} per cell for: {}",
2196 layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
2197 ));
2198 }
2199
2200 layers.push(TraceLayer {
2201 depth: layer.depth,
2202 summary,
2203 highlights,
2204 edges,
2205 has_more,
2206 });
2207 }
2208
2209 Ok((layers, next_cursor, notes))
2210}
2211
2212fn collect_layer_links(
2213 graph: &FormulaGraph,
2214 direction: &TraceDirection,
2215 origin: &str,
2216 depth_limit: u32,
2217) -> Vec<LayerLinks> {
2218 let mut visited: HashSet<String> = HashSet::new();
2219 visited.insert(origin.to_string());
2220 let mut frontier = vec![origin.to_string()];
2221 let mut layers = Vec::new();
2222
2223 for depth in 1..=depth_limit {
2224 let mut next_frontier_set: HashSet<String> = HashSet::new();
2225 let mut edges = Vec::new();
2226 let mut truncated_cells = Vec::new();
2227
2228 for cell in &frontier {
2229 let (neighbors, was_truncated) = match direction {
2230 TraceDirection::Precedents => (graph.precedents(cell), false),
2231 TraceDirection::Dependents => {
2232 graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
2233 }
2234 };
2235
2236 if was_truncated {
2237 truncated_cells.push(cell.clone());
2238 }
2239
2240 for neighbor in neighbors {
2241 let neighbor_upper = neighbor.to_ascii_uppercase();
2242 let edge = match direction {
2243 TraceDirection::Precedents => TraceEdgeRaw {
2244 from: cell.clone(),
2245 to: neighbor_upper.clone(),
2246 neighbor: neighbor_upper.clone(),
2247 },
2248 TraceDirection::Dependents => TraceEdgeRaw {
2249 from: neighbor_upper.clone(),
2250 to: cell.clone(),
2251 neighbor: neighbor_upper.clone(),
2252 },
2253 };
2254 edges.push(edge);
2255 if visited.insert(neighbor_upper.clone()) {
2256 next_frontier_set.insert(neighbor_upper);
2257 }
2258 }
2259 }
2260
2261 if edges.is_empty() {
2262 break;
2263 }
2264
2265 layers.push(LayerLinks {
2266 depth,
2267 edges,
2268 truncated_cells,
2269 });
2270 if next_frontier_set.is_empty() {
2271 break;
2272 }
2273 let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2274 next_frontier.sort();
2275 frontier = next_frontier;
2276 }
2277
2278 layers
2279}
2280
2281fn collect_neighbor_details(
2282 sheet: &umya_spreadsheet::Worksheet,
2283 current_sheet: &str,
2284 addresses: &[String],
2285 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2286) -> Vec<NeighborDetail> {
2287 let mut details = Vec::new();
2288 for address in addresses {
2289 let (sheet_part, cell_part) = split_sheet_and_cell(address);
2290 let normalized_sheet = sheet_part
2291 .as_ref()
2292 .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2293 let is_external = normalized_sheet
2294 .as_ref()
2295 .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2296 .unwrap_or(false);
2297
2298 let Some(cell_ref) = cell_part else {
2299 details.push(NeighborDetail {
2300 address: address.clone(),
2301 column: None,
2302 row: None,
2303 kind: TraceCellKind::External,
2304 value: None,
2305 formula: None,
2306 fingerprint: None,
2307 external: true,
2308 });
2309 continue;
2310 };
2311
2312 let cell_ref_upper = cell_ref.to_ascii_uppercase();
2313
2314 if is_external {
2315 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2316 details.push(NeighborDetail {
2317 address: address.clone(),
2318 column: None,
2319 row: None,
2320 kind: TraceCellKind::External,
2321 value: None,
2322 formula: formula_info.map(|info| info.formula.clone()),
2323 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2324 external: true,
2325 });
2326 continue;
2327 }
2328
2329 let Some((col, row)) = parse_address(&cell_ref_upper) else {
2330 details.push(NeighborDetail {
2331 address: address.clone(),
2332 column: None,
2333 row: None,
2334 kind: TraceCellKind::External,
2335 value: None,
2336 formula: None,
2337 fingerprint: None,
2338 external: true,
2339 });
2340 continue;
2341 };
2342
2343 let cell_opt = sheet.get_cell((&col, &row));
2344 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2345 if let Some(cell) = cell_opt {
2346 let value = cell_to_value(cell);
2347 let kind = if cell.is_formula() {
2348 TraceCellKind::Formula
2349 } else if value.is_some() {
2350 TraceCellKind::Literal
2351 } else {
2352 TraceCellKind::Blank
2353 };
2354 details.push(NeighborDetail {
2355 address: address.clone(),
2356 column: Some(col),
2357 row: Some(row),
2358 kind,
2359 value,
2360 formula: formula_info.map(|info| info.formula.clone()),
2361 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2362 external: false,
2363 });
2364 } else {
2365 details.push(NeighborDetail {
2366 address: address.clone(),
2367 column: Some(col),
2368 row: Some(row),
2369 kind: TraceCellKind::Blank,
2370 value: None,
2371 formula: formula_info.map(|info| info.formula.clone()),
2372 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2373 external: false,
2374 });
2375 }
2376 }
2377 details
2378}
2379
2380fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2381 let mut summary = TraceLayerSummary {
2382 total_nodes: details.len(),
2383 formula_nodes: 0,
2384 value_nodes: 0,
2385 blank_nodes: 0,
2386 external_nodes: 0,
2387 unique_formula_groups: 0,
2388 };
2389
2390 let mut fingerprints: HashSet<String> = HashSet::new();
2391
2392 for detail in details {
2393 match detail.kind {
2394 TraceCellKind::Formula => {
2395 summary.formula_nodes += 1;
2396 if let Some(fp) = &detail.fingerprint {
2397 fingerprints.insert(fp.clone());
2398 }
2399 }
2400 TraceCellKind::Literal => summary.value_nodes += 1,
2401 TraceCellKind::Blank => summary.blank_nodes += 1,
2402 TraceCellKind::External => summary.external_nodes += 1,
2403 }
2404 }
2405
2406 summary.unique_formula_groups = fingerprints.len();
2407 summary
2408}
2409
2410fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2411 let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2412 for detail in details {
2413 if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2414 let entry = aggregates
2415 .entry(fingerprint.clone())
2416 .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2417 entry.1 += 1;
2418 if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2419 entry.2.push(detail.address.clone());
2420 }
2421 }
2422 }
2423
2424 let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2425 .into_iter()
2426 .map(
2427 |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2428 fingerprint,
2429 formula,
2430 count,
2431 sample_addresses,
2432 },
2433 )
2434 .collect();
2435
2436 highlights.sort_by(|a, b| b.count.cmp(&a.count));
2437 highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2438 highlights
2439}
2440
2441fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2442 let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2443 for detail in details {
2444 if let (Some(col), Some(_row)) = (detail.column, detail.row)
2445 && !detail.external
2446 {
2447 by_column.entry(col).or_default().push(detail);
2448 }
2449 }
2450
2451 for column_entries in by_column.values_mut() {
2452 column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2453 }
2454
2455 let mut ranges = Vec::new();
2456 for entries in by_column.values() {
2457 let mut current: Vec<&NeighborDetail> = Vec::new();
2458 for detail in entries {
2459 if current.is_empty() {
2460 current.push(detail);
2461 continue;
2462 }
2463 let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2464 if detail.row.unwrap_or(0) == prev_row + 1 {
2465 current.push(detail);
2466 } else {
2467 if current.len() >= TRACE_RANGE_THRESHOLD {
2468 ranges.push(make_range_highlight(¤t));
2469 }
2470 current.clear();
2471 current.push(detail);
2472 }
2473 }
2474 if current.len() >= TRACE_RANGE_THRESHOLD {
2475 ranges.push(make_range_highlight(¤t));
2476 }
2477 }
2478
2479 ranges.sort_by(|a, b| b.count.cmp(&a.count));
2480 ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2481 ranges
2482}
2483
2484fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2485 let mut literals = 0usize;
2486 let mut formulas = 0usize;
2487 let mut blanks = 0usize;
2488 let mut sample_values = Vec::new();
2489 let mut sample_formulas = Vec::new();
2490 let mut sample_addresses = Vec::new();
2491
2492 for detail in details {
2493 match detail.kind {
2494 TraceCellKind::Formula => {
2495 formulas += 1;
2496 if let Some(formula) = &detail.formula
2497 && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2498 && !sample_formulas.contains(formula)
2499 {
2500 sample_formulas.push(formula.clone());
2501 }
2502 }
2503 TraceCellKind::Literal => {
2504 literals += 1;
2505 if let Some(value) = &detail.value
2506 && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2507 {
2508 sample_values.push(value.clone());
2509 }
2510 }
2511 TraceCellKind::Blank => blanks += 1,
2512 TraceCellKind::External => {}
2513 }
2514 if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2515 sample_addresses.push(detail.address.clone());
2516 }
2517 }
2518
2519 TraceRangeHighlight {
2520 start: details
2521 .first()
2522 .map(|d| d.address.clone())
2523 .unwrap_or_default(),
2524 end: details
2525 .last()
2526 .map(|d| d.address.clone())
2527 .unwrap_or_default(),
2528 count: details.len(),
2529 literals,
2530 formulas,
2531 blanks,
2532 sample_values,
2533 sample_formulas,
2534 sample_addresses,
2535 }
2536}
2537
2538fn build_notable_cells(
2539 details: &[NeighborDetail],
2540 ranges: &[TraceRangeHighlight],
2541 groups: &[TraceFormulaGroupHighlight],
2542) -> Vec<TraceCellHighlight> {
2543 let mut exclude: HashSet<String> = HashSet::new();
2544 for range in ranges {
2545 exclude.insert(range.start.clone());
2546 exclude.insert(range.end.clone());
2547 for addr in &range.sample_addresses {
2548 exclude.insert(addr.clone());
2549 }
2550 }
2551 for group in groups {
2552 for addr in &group.sample_addresses {
2553 exclude.insert(addr.clone());
2554 }
2555 }
2556
2557 let mut highlights = Vec::new();
2558 let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
2559
2560 for detail in details {
2561 if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
2562 break;
2563 }
2564 if exclude.contains(&detail.address) {
2565 continue;
2566 }
2567 let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
2568 if *counter >= 2 && detail.kind != TraceCellKind::External {
2569 continue;
2570 }
2571 highlights.push(TraceCellHighlight {
2572 address: detail.address.clone(),
2573 kind: detail.kind.clone(),
2574 value: detail.value.clone(),
2575 formula: detail.formula.clone(),
2576 });
2577 *counter += 1;
2578 }
2579
2580 highlights
2581}
2582
2583fn build_edges_for_layer(
2584 raw_edges: &[TraceEdgeRaw],
2585 selected: &HashSet<String>,
2586 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2587) -> Vec<FormulaTraceEdge> {
2588 let mut edges = Vec::new();
2589 for edge in raw_edges {
2590 if selected.contains(&edge.neighbor) {
2591 let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
2592 .map(|info| info.formula.clone());
2593 edges.push(FormulaTraceEdge {
2594 from: edge.from.clone(),
2595 to: edge.to.clone(),
2596 formula,
2597 note: None,
2598 });
2599 }
2600 }
2601 edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
2602 edges
2603}
2604
2605fn lookup_formula_info<'a>(
2606 lookup: &'a HashMap<String, TraceFormulaInfo>,
2607 cell_ref: &str,
2608 original: &str,
2609) -> Option<&'a TraceFormulaInfo> {
2610 if let Some(info) = lookup.get(cell_ref) {
2611 return Some(info);
2612 }
2613 if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
2614 let upper = cell.to_ascii_uppercase();
2615 return lookup.get(&upper);
2616 }
2617 None
2618}
2619
2620fn compare_addresses(left: &str, right: &str) -> Ordering {
2621 let (sheet_left, cell_left) = split_sheet_and_cell(left);
2622 let (sheet_right, cell_right) = split_sheet_and_cell(right);
2623
2624 let sheet_left_key = sheet_left
2625 .as_ref()
2626 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2627 .unwrap_or_default();
2628 let sheet_right_key = sheet_right
2629 .as_ref()
2630 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2631 .unwrap_or_default();
2632
2633 match sheet_left_key.cmp(&sheet_right_key) {
2634 Ordering::Equal => {
2635 let left_core = cell_left.unwrap_or_else(|| left.to_string());
2636 let right_core = cell_right.unwrap_or_else(|| right.to_string());
2637 let left_coords = parse_address(&left_core.to_ascii_uppercase());
2638 let right_coords = parse_address(&right_core.to_ascii_uppercase());
2639 match (left_coords, right_coords) {
2640 (Some((lc, lr)), Some((rc, rr))) => lc
2641 .cmp(&rc)
2642 .then_with(|| lr.cmp(&rr))
2643 .then_with(|| left_core.cmp(&right_core)),
2644 _ => left_core.cmp(&right_core),
2645 }
2646 }
2647 other => other,
2648 }
2649}
2650
2651fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
2652 if let Some(idx) = address.rfind('!') {
2653 let sheet = address[..idx].to_string();
2654 let cell = address[idx + 1..].to_string();
2655 (Some(sheet), Some(cell))
2656 } else {
2657 (None, Some(address.to_string()))
2658 }
2659}
2660
2661fn clean_sheet_name(sheet: &str) -> String {
2662 let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
2663 let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
2664 after_bracket
2665 .trim_matches(|c| c == '\'' || c == '"')
2666 .to_string()
2667}