1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4
5use crate::analysis::{formula::FormulaGraph, stats};
6use crate::model::*;
7use crate::state::AppState;
8use crate::workbook::{WorkbookContext, cell_to_value};
9use anyhow::{Result, anyhow};
10use regex::Regex;
11use schemars::JsonSchema;
12use serde::Deserialize;
13use std::cmp::Ordering;
14use std::collections::{BTreeMap, HashMap, HashSet};
15use std::sync::Arc;
16
17const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
18const TRACE_PAGE_MIN: usize = 5;
19const TRACE_PAGE_MAX: usize = 200;
20const TRACE_RANGE_THRESHOLD: usize = 4;
21const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
22const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
23const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
24const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
25const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
26const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
27const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
28
29pub async fn list_workbooks(
30 state: Arc<AppState>,
31 params: ListWorkbooksParams,
32) -> Result<WorkbookListResponse> {
33 let filter = params.into_filter()?;
34 state.list_workbooks(filter)
35}
36
37pub async fn describe_workbook(
38 state: Arc<AppState>,
39 params: DescribeWorkbookParams,
40) -> Result<WorkbookDescription> {
41 let workbook = state.open_workbook(¶ms.workbook_id).await?;
42 let desc = workbook.describe();
43 Ok(desc)
44}
45
46#[derive(Debug, Deserialize, JsonSchema)]
47pub struct ListWorkbooksParams {
48 pub slug_prefix: Option<String>,
49 pub folder: Option<String>,
50 pub path_glob: Option<String>,
51}
52
53impl ListWorkbooksParams {
54 fn into_filter(self) -> Result<filters::WorkbookFilter> {
55 filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
56 }
57}
58
59#[derive(Debug, Deserialize, JsonSchema)]
60pub struct DescribeWorkbookParams {
61 pub workbook_id: WorkbookId,
62}
63
64#[derive(Debug, Deserialize, JsonSchema)]
65pub struct ListSheetsParams {
66 pub workbook_id: WorkbookId,
67}
68
69pub async fn list_sheets(
70 state: Arc<AppState>,
71 params: ListSheetsParams,
72) -> Result<SheetListResponse> {
73 let workbook = state.open_workbook(¶ms.workbook_id).await?;
74 let summaries = workbook.list_summaries()?;
75 let response = SheetListResponse {
76 workbook_id: workbook.id.clone(),
77 workbook_short_id: workbook.short_id.clone(),
78 sheets: summaries,
79 };
80 Ok(response)
81}
82
83#[derive(Debug, Deserialize, JsonSchema)]
84pub struct SheetOverviewParams {
85 pub workbook_id: WorkbookId,
86 pub sheet_name: String,
87}
88
89#[derive(Debug, Deserialize, JsonSchema)]
90pub struct WorkbookSummaryParams {
91 pub workbook_id: WorkbookId,
92}
93
94pub async fn workbook_summary(
95 state: Arc<AppState>,
96 params: WorkbookSummaryParams,
97) -> Result<WorkbookSummaryResponse> {
98 let workbook = state.open_workbook(¶ms.workbook_id).await?;
99 let sheet_names = workbook.sheet_names();
100
101 let mut total_cells: u64 = 0;
102 let mut total_formulas: u64 = 0;
103 let mut breakdown = WorkbookBreakdown::default();
104 let mut region_counts = RegionCountSummary::default();
105 let mut entry_points: Vec<EntryPoint> = Vec::new();
106 let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
107
108 for sheet_name in &sheet_names {
109 let entry = workbook.get_sheet_metrics(sheet_name)?;
110 total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
111 total_formulas += entry.metrics.formula_cells as u64;
112 match entry.metrics.classification {
113 SheetClassification::Calculator => breakdown.calculator_sheets += 1,
114 SheetClassification::Metadata => breakdown.metadata_sheets += 1,
115 SheetClassification::Empty => {}
116 _ => breakdown.data_sheets += 1,
117 }
118
119 for region in &entry.detected_regions {
120 match region
121 .region_kind
122 .clone()
123 .unwrap_or(region.classification.clone())
124 {
125 RegionKind::Calculator => region_counts.calculator += 1,
126 RegionKind::Metadata => region_counts.metadata += 1,
127 RegionKind::Parameters => region_counts.parameters += 1,
128 RegionKind::Outputs => region_counts.outputs += 1,
129 RegionKind::Data | RegionKind::Table => region_counts.data += 1,
130 _ => region_counts.other += 1,
131 }
132 if region.confidence >= 0.3 {
133 let kind = region
134 .region_kind
135 .as_ref()
136 .unwrap_or(®ion.classification);
137 let priority = match kind {
138 RegionKind::Parameters => 0,
139 RegionKind::Data | RegionKind::Table => 1,
140 RegionKind::Outputs => 2,
141 RegionKind::Calculator => 3,
142 RegionKind::Metadata => 4,
143 _ => 5,
144 };
145 entry_points.push(EntryPoint {
146 sheet_name: sheet_name.clone(),
147 region_id: Some(region.id),
148 bounds: Some(region.bounds.clone()),
149 rationale: format!(
150 "{:?} region ({} rows, {:.0}% conf, p{})",
151 kind,
152 region.row_count,
153 region.confidence * 100.0,
154 priority
155 ),
156 });
157 }
158 }
159
160 if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
161 entry_points.push(EntryPoint {
162 sheet_name: sheet_name.clone(),
163 region_id: None,
164 bounds: None,
165 rationale: "Whole sheet is non-empty; start at top-left".to_string(),
166 });
167 }
168 }
169
170 entry_points.sort_by(|a, b| {
171 let pa = priority_from_rationale(&a.rationale);
172 let pb = priority_from_rationale(&b.rationale);
173 pa.cmp(&pb)
174 .then_with(|| {
175 a.bounds
176 .as_ref()
177 .map(|_| 1)
178 .cmp(&b.bounds.as_ref().map(|_| 1))
179 })
180 .then_with(|| a.sheet_name.cmp(&b.sheet_name))
181 });
182 entry_points.truncate(5);
183
184 let mut seen_ranges = std::collections::HashSet::new();
185 for item in workbook.named_items()? {
186 if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
187 continue;
188 }
189 if !seen_ranges.insert(item.refers_to.clone()) {
190 continue;
191 }
192 key_named_ranges.push(item);
193 if key_named_ranges.len() >= 10 {
194 break;
195 }
196 }
197
198 Ok(WorkbookSummaryResponse {
199 workbook_id: workbook.id.clone(),
200 workbook_short_id: workbook.short_id.clone(),
201 slug: workbook.slug.clone(),
202 sheet_count: sheet_names.len(),
203 total_cells,
204 total_formulas,
205 breakdown,
206 region_counts,
207 key_named_ranges,
208 suggested_entry_points: entry_points,
209 })
210}
211
212fn priority_from_rationale(rationale: &str) -> u32 {
213 if rationale.contains("p0") {
214 0
215 } else if rationale.contains("p1") {
216 1
217 } else if rationale.contains("p2") {
218 2
219 } else if rationale.contains("p3") {
220 3
221 } else if rationale.contains("p4") {
222 4
223 } else {
224 5
225 }
226}
227
228pub async fn sheet_overview(
229 state: Arc<AppState>,
230 params: SheetOverviewParams,
231) -> Result<SheetOverviewResponse> {
232 let workbook = state.open_workbook(¶ms.workbook_id).await?;
233 let overview = workbook.sheet_overview(¶ms.sheet_name)?;
234 Ok(overview)
235}
236
237fn default_start_row() -> u32 {
238 1
239}
240
241fn default_page_size() -> u32 {
242 50
243}
244
245fn default_include_formulas() -> bool {
246 true
247}
248
249fn default_include_header() -> bool {
250 true
251}
252
253#[derive(Debug, Deserialize, JsonSchema)]
254pub struct SheetPageParams {
255 pub workbook_id: WorkbookId,
256 pub sheet_name: String,
257 #[serde(default = "default_start_row")]
258 pub start_row: u32,
259 #[serde(default = "default_page_size")]
260 pub page_size: u32,
261 #[serde(default)]
262 pub columns: Option<Vec<String>>,
263 #[serde(default)]
264 pub columns_by_header: Option<Vec<String>>,
265 #[serde(default = "default_include_formulas")]
266 pub include_formulas: bool,
267 #[serde(default)]
268 pub include_styles: bool,
269 #[serde(default = "default_include_header")]
270 pub include_header: bool,
271 #[serde(default)]
272 pub format: Option<SheetPageFormat>,
273}
274
275impl Default for SheetPageParams {
276 fn default() -> Self {
277 SheetPageParams {
278 workbook_id: WorkbookId(String::new()),
279 sheet_name: String::new(),
280 start_row: default_start_row(),
281 page_size: default_page_size(),
282 columns: None,
283 columns_by_header: None,
284 include_formulas: default_include_formulas(),
285 include_styles: false,
286 include_header: default_include_header(),
287 format: None,
288 }
289 }
290}
291
292fn default_find_limit() -> u32 {
293 50
294}
295
296#[derive(Debug, Deserialize, JsonSchema)]
297pub struct FindValueParams {
298 pub workbook_id: WorkbookId,
299 pub query: String,
300 #[serde(default)]
301 pub label: Option<String>,
302 #[serde(default)]
303 pub mode: Option<FindMode>,
304 #[serde(default)]
305 pub match_mode: Option<String>,
306 #[serde(default)]
307 pub case_sensitive: bool,
308 #[serde(default)]
309 pub sheet_name: Option<String>,
310 #[serde(default)]
311 pub region_id: Option<u32>,
312 #[serde(default)]
313 pub table_name: Option<String>,
314 #[serde(default)]
315 pub value_types: Option<Vec<String>>,
316 #[serde(default)]
317 pub search_headers_only: bool,
318 #[serde(default)]
319 pub direction: Option<LabelDirection>,
320 #[serde(default = "default_find_limit")]
321 pub limit: u32,
322}
323
324impl Default for FindValueParams {
325 fn default() -> Self {
326 Self {
327 workbook_id: WorkbookId(String::new()),
328 query: String::new(),
329 label: None,
330 mode: None,
331 match_mode: None,
332 case_sensitive: false,
333 sheet_name: None,
334 region_id: None,
335 table_name: None,
336 value_types: None,
337 search_headers_only: false,
338 direction: None,
339 limit: default_find_limit(),
340 }
341 }
342}
343
344#[derive(Debug, Deserialize, JsonSchema, Default)]
345pub struct ReadTableParams {
346 pub workbook_id: WorkbookId,
347 #[serde(default)]
348 pub sheet_name: Option<String>,
349 #[serde(default)]
350 pub table_name: Option<String>,
351 #[serde(default)]
352 pub region_id: Option<u32>,
353 #[serde(default)]
354 pub range: Option<String>,
355 #[serde(default)]
356 pub header_row: Option<u32>,
357 #[serde(default)]
358 pub header_rows: Option<u32>,
359 #[serde(default)]
360 pub columns: Option<Vec<String>>,
361 #[serde(default)]
362 pub filters: Option<Vec<TableFilter>>,
363 #[serde(default)]
364 pub sample_mode: Option<String>,
365 #[serde(default)]
366 pub limit: Option<u32>,
367 #[serde(default)]
368 pub offset: Option<u32>,
369}
370
371#[derive(Debug, Deserialize, JsonSchema, Clone)]
372pub struct TableFilter {
373 pub column: String,
374 pub op: String,
375 pub value: serde_json::Value,
376}
377
378#[derive(Debug, Deserialize, JsonSchema, Default)]
379pub struct TableProfileParams {
380 pub workbook_id: WorkbookId,
381 #[serde(default)]
382 pub sheet_name: Option<String>,
383 #[serde(default)]
384 pub region_id: Option<u32>,
385 #[serde(default)]
386 pub table_name: Option<String>,
387 #[serde(default)]
388 pub sample_mode: Option<String>,
389 #[serde(default)]
390 pub sample_size: Option<u32>,
391}
392
393#[derive(Debug, Deserialize, JsonSchema)]
394pub struct RangeValuesParams {
395 pub workbook_id: WorkbookId,
396 pub sheet_name: String,
397 pub ranges: Vec<String>,
398 #[serde(default)]
399 pub include_headers: Option<bool>,
400}
401
402pub async fn sheet_page(
403 state: Arc<AppState>,
404 params: SheetPageParams,
405) -> Result<SheetPageResponse> {
406 if params.page_size == 0 {
407 return Err(anyhow!("page_size must be greater than zero"));
408 }
409
410 let workbook = state.open_workbook(¶ms.workbook_id).await?;
411 let metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
412 let format = params.format.unwrap_or_default();
413
414 let start_row = params.start_row.max(1);
415 let page_size = params.page_size.min(500);
416 let include_formulas = params.include_formulas;
417 let include_styles = params.include_styles;
418 let columns = params.columns.clone();
419 let columns_by_header = params.columns_by_header.clone();
420 let include_header = params.include_header;
421
422 let page = workbook.with_sheet(¶ms.sheet_name, |sheet| {
423 build_page(
424 sheet,
425 start_row,
426 page_size,
427 columns.clone(),
428 columns_by_header.clone(),
429 include_formulas,
430 include_styles,
431 include_header,
432 )
433 })?;
434
435 let has_more = page.end_row < metrics.metrics.row_count;
436 let next_start_row = if has_more {
437 Some(page.end_row + 1)
438 } else {
439 None
440 };
441
442 let compact_payload = if matches!(format, SheetPageFormat::Compact) {
443 Some(build_compact_payload(
444 &page.header,
445 &page.rows,
446 include_header,
447 ))
448 } else {
449 None
450 };
451
452 let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
453 Some(build_values_only_payload(
454 &page.header,
455 &page.rows,
456 include_header,
457 ))
458 } else {
459 None
460 };
461
462 let response = SheetPageResponse {
463 workbook_id: workbook.id.clone(),
464 workbook_short_id: workbook.short_id.clone(),
465 sheet_name: params.sheet_name,
466 rows: if matches!(format, SheetPageFormat::Full) {
467 page.rows
468 } else {
469 Vec::new()
470 },
471 has_more,
472 next_start_row,
473 header_row: if include_header { page.header } else { None },
474 compact: compact_payload,
475 values_only: values_only_payload,
476 format,
477 };
478 Ok(response)
479}
480
481#[derive(Debug, Deserialize, JsonSchema)]
482pub struct SheetFormulaMapParams {
483 pub workbook_id: WorkbookId,
484 pub sheet_name: String,
485 pub range: Option<String>,
486 #[serde(default)]
487 pub expand: bool,
488 #[serde(default)]
489 pub limit: Option<u32>,
490 #[serde(default)]
491 pub sort_by: Option<FormulaSortBy>,
492}
493
494#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
495#[serde(rename_all = "snake_case")]
496pub enum FormulaSortBy {
497 #[default]
498 Address,
499 Complexity,
500 Count,
501}
502
503pub async fn sheet_formula_map(
504 state: Arc<AppState>,
505 params: SheetFormulaMapParams,
506) -> Result<SheetFormulaMapResponse> {
507 let workbook = state.open_workbook(¶ms.workbook_id).await?;
508 let graph = workbook.formula_graph(¶ms.sheet_name)?;
509 let mut groups = Vec::new();
510 let mut truncated = false;
511
512 for mut group in graph.groups() {
513 if let Some(range) = ¶ms.range {
514 group.addresses.retain(|addr| address_in_range(addr, range));
515 if group.addresses.is_empty() {
516 continue;
517 }
518 }
519 if !params.expand && group.addresses.len() > 15 {
520 group.addresses.truncate(15);
521 truncated = true;
522 }
523 groups.push(group);
524 }
525
526 let sort_by = params.sort_by.unwrap_or_default();
527 match sort_by {
528 FormulaSortBy::Address => {
529 groups.sort_by(|a, b| a.addresses.first().cmp(&b.addresses.first()));
530 }
531 FormulaSortBy::Complexity => {
532 groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
533 }
534 FormulaSortBy::Count => {
535 groups.sort_by(|a, b| b.addresses.len().cmp(&a.addresses.len()));
536 }
537 }
538
539 if let Some(limit) = params.limit
540 && groups.len() > limit as usize
541 {
542 groups.truncate(limit as usize);
543 truncated = true;
544 }
545
546 let response = SheetFormulaMapResponse {
547 workbook_id: workbook.id.clone(),
548 workbook_short_id: workbook.short_id.clone(),
549 sheet_name: params.sheet_name.clone(),
550 groups,
551 truncated,
552 };
553 Ok(response)
554}
555
556#[derive(Debug, Deserialize, JsonSchema)]
557pub struct FormulaTraceParams {
558 pub workbook_id: WorkbookId,
559 pub sheet_name: String,
560 pub cell_address: String,
561 pub direction: TraceDirection,
562 pub depth: Option<u32>,
563 pub limit: Option<u32>,
564 #[serde(default)]
565 pub page_size: Option<usize>,
566 #[serde(default)]
567 pub cursor: Option<TraceCursor>,
568}
569
570pub async fn formula_trace(
571 state: Arc<AppState>,
572 params: FormulaTraceParams,
573) -> Result<FormulaTraceResponse> {
574 let workbook = state.open_workbook(¶ms.workbook_id).await?;
575 let graph = workbook.formula_graph(¶ms.sheet_name)?;
576 let formula_lookup = build_formula_lookup(&graph);
577 let depth = params.depth.unwrap_or(3).clamp(1, 5);
578 let page_size = params
579 .page_size
580 .or_else(|| params.limit.map(|v| v as usize))
581 .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
582 .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
583
584 let origin = params.cell_address.to_uppercase();
585 let config = TraceConfig {
586 direction: ¶ms.direction,
587 origin: &origin,
588 sheet_name: ¶ms.sheet_name,
589 depth_limit: depth,
590 page_size,
591 };
592 let (layers, next_cursor, notes) = build_trace_layers(
593 &workbook,
594 &graph,
595 &formula_lookup,
596 &config,
597 params.cursor.clone(),
598 )?;
599
600 let response = FormulaTraceResponse {
601 workbook_id: workbook.id.clone(),
602 workbook_short_id: workbook.short_id.clone(),
603 sheet_name: params.sheet_name.clone(),
604 origin,
605 direction: params.direction.clone(),
606 layers,
607 next_cursor,
608 notes,
609 };
610 Ok(response)
611}
612
613#[derive(Debug, Deserialize, JsonSchema)]
614pub struct NamedRangesParams {
615 pub workbook_id: WorkbookId,
616 pub sheet_name: Option<String>,
617 pub name_prefix: Option<String>,
618}
619
620pub async fn named_ranges(
621 state: Arc<AppState>,
622 params: NamedRangesParams,
623) -> Result<NamedRangesResponse> {
624 let workbook = state.open_workbook(¶ms.workbook_id).await?;
625 let mut items = workbook.named_items()?;
626
627 if let Some(sheet_filter) = ¶ms.sheet_name {
628 items.retain(|item| {
629 item.sheet_name
630 .as_ref()
631 .map(|name| name.eq_ignore_ascii_case(sheet_filter))
632 .unwrap_or(false)
633 });
634 }
635 if let Some(prefix) = ¶ms.name_prefix {
636 let prefix_lower = prefix.to_ascii_lowercase();
637 items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
638 }
639
640 let response = NamedRangesResponse {
641 workbook_id: workbook.id.clone(),
642 workbook_short_id: workbook.short_id.clone(),
643 items,
644 };
645 Ok(response)
646}
647
648struct PageBuildResult {
649 rows: Vec<RowSnapshot>,
650 header: Option<RowSnapshot>,
651 end_row: u32,
652}
653
654#[allow(clippy::too_many_arguments)]
655fn build_page(
656 sheet: &umya_spreadsheet::Worksheet,
657 start_row: u32,
658 page_size: u32,
659 columns: Option<Vec<String>>,
660 columns_by_header: Option<Vec<String>>,
661 include_formulas: bool,
662 include_styles: bool,
663 include_header: bool,
664) -> PageBuildResult {
665 let max_col = sheet.get_highest_column();
666 let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
667 let column_indices =
668 resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
669
670 let header = if include_header {
671 Some(build_row_snapshot(
672 sheet,
673 1,
674 &column_indices,
675 include_formulas,
676 include_styles,
677 ))
678 } else {
679 None
680 };
681
682 let mut rows = Vec::new();
683 for row_idx in start_row..=end_row {
684 rows.push(build_row_snapshot(
685 sheet,
686 row_idx,
687 &column_indices,
688 include_formulas,
689 include_styles,
690 ));
691 }
692
693 PageBuildResult {
694 rows,
695 header,
696 end_row,
697 }
698}
699
700fn build_row_snapshot(
701 sheet: &umya_spreadsheet::Worksheet,
702 row_index: u32,
703 columns: &[u32],
704 include_formulas: bool,
705 include_styles: bool,
706) -> RowSnapshot {
707 let mut cells = Vec::new();
708 for &col in columns {
709 if let Some(cell) = sheet.get_cell((col, row_index)) {
710 cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
711 } else {
712 let address = crate::utils::cell_address(col, row_index);
713 cells.push(CellSnapshot {
714 address,
715 value: None,
716 formula: None,
717 cached_value: None,
718 number_format: None,
719 style_tags: Vec::new(),
720 notes: Vec::new(),
721 });
722 }
723 }
724
725 RowSnapshot { row_index, cells }
726}
727
728fn build_cell_snapshot(
729 cell: &umya_spreadsheet::Cell,
730 include_formulas: bool,
731 include_styles: bool,
732) -> CellSnapshot {
733 let address = cell.get_coordinate().get_coordinate();
734 let value = crate::workbook::cell_to_value(cell);
735 let formula = if include_formulas && cell.is_formula() {
736 Some(cell.get_formula().to_string())
737 } else {
738 None
739 };
740 let cached_value = if cell.is_formula() {
741 value.clone()
742 } else {
743 None
744 };
745 let number_format = if include_styles {
746 cell.get_style()
747 .get_number_format()
748 .map(|fmt| fmt.get_format_code().to_string())
749 } else {
750 None
751 };
752 let style_tags = if include_styles {
753 crate::analysis::style::tag_cell(cell)
754 .map(|(_, tagging)| tagging.tags)
755 .unwrap_or_default()
756 } else {
757 Vec::new()
758 };
759
760 CellSnapshot {
761 address,
762 value,
763 formula,
764 cached_value,
765 number_format,
766 style_tags,
767 notes: Vec::new(),
768 }
769}
770
771fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
772 use std::collections::BTreeSet;
773 use umya_spreadsheet::helper::coordinate::column_index_from_string;
774
775 let mut indices = BTreeSet::new();
776 if let Some(specs) = columns {
777 for spec in specs {
778 if let Some((start, end)) = spec.split_once(':') {
779 let start_idx = column_index_from_string(start);
780 let end_idx = column_index_from_string(end);
781 let (min_idx, max_idx) = if start_idx <= end_idx {
782 (start_idx, end_idx)
783 } else {
784 (end_idx, start_idx)
785 };
786 for idx in min_idx..=max_idx {
787 indices.insert(idx);
788 }
789 } else {
790 indices.insert(column_index_from_string(spec));
791 }
792 }
793 } else {
794 for idx in 1..=max_column.max(1) {
795 indices.insert(idx);
796 }
797 }
798
799 indices.into_iter().collect()
800}
801
802fn resolve_columns_with_headers(
803 sheet: &umya_spreadsheet::Worksheet,
804 columns: Option<&Vec<String>>,
805 columns_by_header: Option<&Vec<String>>,
806 max_column: u32,
807) -> Vec<u32> {
808 if columns_by_header.is_none() {
809 return resolve_columns(columns, max_column);
810 }
811
812 let mut selected = Vec::new();
813 let header_targets: Vec<String> = columns_by_header
814 .unwrap()
815 .iter()
816 .map(|h| h.trim().to_ascii_lowercase())
817 .collect();
818
819 for col_idx in 1..=max_column.max(1) {
820 let header_cell = sheet.get_cell((col_idx, 1u32));
821 let header_value = header_cell
822 .and_then(cell_to_value)
823 .map(cell_value_to_string_lower);
824 if let Some(hval) = header_value
825 && header_targets.iter().any(|target| target == &hval)
826 {
827 selected.push(col_idx);
828 }
829 }
830
831 if selected.is_empty() {
832 resolve_columns(columns, max_column)
833 } else {
834 selected
835 }
836}
837
838fn cell_value_to_string_lower(value: CellValue) -> String {
839 match value {
840 CellValue::Text(s) => s.to_ascii_lowercase(),
841 CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
842 CellValue::Bool(b) => b.to_string(),
843 CellValue::Error(e) => e.to_ascii_lowercase(),
844 CellValue::Date(d) => d.to_ascii_lowercase(),
845 }
846}
847
848fn build_compact_payload(
849 header: &Option<RowSnapshot>,
850 rows: &[RowSnapshot],
851 include_header: bool,
852) -> SheetPageCompact {
853 let headers = derive_headers(header, rows);
854 let header_row = if include_header {
855 header
856 .as_ref()
857 .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
858 .unwrap_or_default()
859 } else {
860 Vec::new()
861 };
862 let data_rows = rows
863 .iter()
864 .map(|row| {
865 let mut vals: Vec<Option<CellValue>> = Vec::new();
866 vals.push(Some(CellValue::Number(row.row_index as f64)));
867 vals.extend(row.cells.iter().map(|c| c.value.clone()));
868 vals
869 })
870 .collect();
871
872 SheetPageCompact {
873 headers,
874 header_row,
875 rows: data_rows,
876 }
877}
878
879fn build_values_only_payload(
880 header: &Option<RowSnapshot>,
881 rows: &[RowSnapshot],
882 include_header: bool,
883) -> SheetPageValues {
884 let mut data = Vec::new();
885 if include_header && let Some(h) = header {
886 data.push(h.cells.iter().map(|c| c.value.clone()).collect());
887 }
888 for row in rows {
889 data.push(row.cells.iter().map(|c| c.value.clone()).collect());
890 }
891
892 SheetPageValues { rows: data }
893}
894
895fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
896 if let Some(h) = header {
897 let mut headers: Vec<String> = h
898 .cells
899 .iter()
900 .map(|c| match &c.value {
901 Some(CellValue::Text(t)) => t.clone(),
902 Some(CellValue::Number(n)) => n.to_string(),
903 Some(CellValue::Bool(b)) => b.to_string(),
904 Some(CellValue::Date(d)) => d.clone(),
905 Some(CellValue::Error(e)) => e.clone(),
906 None => c.address.clone(),
907 })
908 .collect();
909 headers.insert(0, "Row".to_string());
910 headers
911 } else if let Some(first) = rows.first() {
912 let mut headers = Vec::new();
913 headers.push("Row".to_string());
914 for cell in &first.cells {
915 headers.push(cell.address.clone());
916 }
917 headers
918 } else {
919 vec![]
920 }
921}
922fn default_stats_sample() -> usize {
923 500
924}
925
926#[derive(Debug, Deserialize, JsonSchema)]
927pub struct SheetStatisticsParams {
928 pub workbook_id: WorkbookId,
929 pub sheet_name: String,
930 #[serde(default)]
931 pub sample_rows: Option<usize>,
932}
933
934pub async fn sheet_statistics(
935 state: Arc<AppState>,
936 params: SheetStatisticsParams,
937) -> Result<SheetStatisticsResponse> {
938 let workbook = state.open_workbook(¶ms.workbook_id).await?;
939 let sheet_metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
940 let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
941 let stats = workbook.with_sheet(¶ms.sheet_name, |sheet| {
942 stats::compute_sheet_statistics(sheet, sample_rows)
943 })?;
944 let response = SheetStatisticsResponse {
945 workbook_id: workbook.id.clone(),
946 workbook_short_id: workbook.short_id.clone(),
947 sheet_name: params.sheet_name,
948 row_count: sheet_metrics.metrics.row_count,
949 column_count: sheet_metrics.metrics.column_count,
950 density: stats.density,
951 numeric_columns: stats.numeric_columns,
952 text_columns: stats.text_columns,
953 null_counts: stats.null_counts,
954 duplicate_warnings: stats.duplicate_warnings,
955 };
956 Ok(response)
957}
958
959fn address_in_range(address: &str, range: &str) -> bool {
960 parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
961 if let Some((col, row)) = parse_address(address) {
962 col >= start_col && col <= end_col && row >= start_row && row <= end_row
963 } else {
964 false
965 }
966 })
967}
968
969fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
970 let mut parts = range.split(':');
971 let start = parts.next()?;
972 let end = parts.next().unwrap_or(start);
973 let start_idx = parse_address(start)?;
974 let end_idx = parse_address(end)?;
975 Some((
976 (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
977 (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
978 ))
979}
980
981fn parse_address(address: &str) -> Option<(u32, u32)> {
982 use umya_spreadsheet::helper::coordinate::index_from_coordinate;
983 let (col, row, _, _) = index_from_coordinate(address);
984 match (col, row) {
985 (Some(c), Some(r)) => Some((c, r)),
986 _ => None,
987 }
988}
989
990#[derive(Clone)]
991struct TableTarget {
992 sheet_name: String,
993 table_name: Option<String>,
994 range: ((u32, u32), (u32, u32)),
995 header_hint: Option<u32>,
996}
997
998fn resolve_table_target(
999 workbook: &WorkbookContext,
1000 params: &ReadTableParams,
1001) -> Result<TableTarget> {
1002 if let Some(region_id) = params.region_id
1003 && let Some(sheet) = ¶ms.sheet_name
1004 && let Ok(region) = workbook.detected_region(sheet, region_id)
1005 {
1006 return Ok(TableTarget {
1007 sheet_name: sheet.clone(),
1008 table_name: None,
1009 range: parse_range(®ion.bounds).unwrap_or(((1, 1), (1, 1))),
1010 header_hint: region.header_row,
1011 });
1012 }
1013
1014 if let Some(table_name) = ¶ms.table_name {
1015 let items = workbook.named_items()?;
1016 for item in items {
1017 if item.name.eq_ignore_ascii_case(table_name)
1018 || item
1019 .name
1020 .to_ascii_lowercase()
1021 .contains(&table_name.to_ascii_lowercase())
1022 {
1023 let mut sheet_name = item
1024 .sheet_name
1025 .clone()
1026 .or_else(|| params.sheet_name.clone())
1027 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1028 let refers_to = item.refers_to.trim_start_matches('=');
1029 let mut range_part = refers_to;
1030 if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1031 sheet_name = sheet_part.trim_matches('\'').to_string();
1032 range_part = rest;
1033 }
1034 if let Some(range) = parse_range(range_part) {
1035 return Ok(TableTarget {
1036 sheet_name,
1037 table_name: Some(item.name.clone()),
1038 range,
1039 header_hint: if item.kind == NamedItemKind::Table {
1040 Some(range.0.1)
1041 } else {
1042 None
1043 },
1044 });
1045 }
1046 }
1047 }
1048 }
1049
1050 let sheet_name = params
1051 .sheet_name
1052 .clone()
1053 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1054
1055 if let Some(rng) = ¶ms.range
1056 && let Some(range) = parse_range(rng)
1057 {
1058 return Ok(TableTarget {
1059 sheet_name,
1060 table_name: None,
1061 range,
1062 header_hint: None,
1063 });
1064 }
1065
1066 let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1067 let end_col = metrics.metrics.column_count.max(1);
1068 let end_row = metrics.metrics.row_count.max(1);
1069 Ok(TableTarget {
1070 sheet_name,
1071 table_name: None,
1072 range: ((1, 1), (end_col, end_row)),
1073 header_hint: None,
1074 })
1075}
1076
1077#[allow(clippy::too_many_arguments)]
1078fn extract_table_rows(
1079 sheet: &umya_spreadsheet::Worksheet,
1080 target: &TableTarget,
1081 header_row: Option<u32>,
1082 header_rows: Option<u32>,
1083 columns: Option<Vec<String>>,
1084 filters: Option<Vec<TableFilter>>,
1085 limit: usize,
1086 offset: usize,
1087 sample_mode: &str,
1088) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1089 let ((start_col, start_row), (end_col, end_row)) = target.range;
1090 let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1091 if header_start < start_row {
1092 header_start = start_row;
1093 }
1094 if header_start > end_row {
1095 header_start = start_row;
1096 }
1097 let header_rows_count = header_rows.unwrap_or(1).max(1);
1098 let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1099 let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1100 resolve_columns(Some(cols), end_col).into_iter().collect()
1101 } else {
1102 (start_col..=end_col).collect()
1103 };
1104
1105 let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1106 let mut all_rows: Vec<TableRow> = Vec::new();
1107 let mut total_rows: u32 = 0;
1108
1109 for row_idx in data_start_row..=end_row {
1110 let mut row = BTreeMap::new();
1111 for (i, col_idx) in column_indices.iter().enumerate() {
1112 let header = headers
1113 .get(i)
1114 .cloned()
1115 .unwrap_or_else(|| format!("Col{col_idx}"));
1116 let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1117 row.insert(header, value);
1118 }
1119 if !row_passes_filters(&row, filters.as_ref()) {
1120 continue;
1121 }
1122 total_rows += 1;
1123 if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1124 continue;
1125 }
1126 all_rows.push(row);
1127 }
1128
1129 let rows = sample_rows(all_rows, limit, offset, sample_mode);
1130
1131 Ok((headers, rows, total_rows))
1132}
1133
1134fn build_headers(
1135 sheet: &umya_spreadsheet::Worksheet,
1136 columns: &[u32],
1137 header_start: u32,
1138 header_rows: u32,
1139) -> Vec<String> {
1140 let mut headers = Vec::new();
1141 for col_idx in columns {
1142 let mut parts = Vec::new();
1143 for h in header_start..(header_start + header_rows) {
1144 let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1145 if let Some(value) = sheet
1146 .get_cell((origin_col, origin_row))
1147 .and_then(cell_to_value)
1148 {
1149 match value {
1150 CellValue::Text(ref s) if s.trim().is_empty() => {}
1151 CellValue::Text(s) => parts.push(s),
1152 CellValue::Number(n) => parts.push(n.to_string()),
1153 CellValue::Bool(b) => parts.push(b.to_string()),
1154 CellValue::Error(e) => parts.push(e),
1155 CellValue::Date(d) => parts.push(d),
1156 }
1157 }
1158 }
1159 if parts.is_empty() {
1160 headers.push(crate::utils::column_number_to_name(*col_idx));
1161 } else {
1162 headers.push(parts.join(" / "));
1163 }
1164 }
1165
1166 if headers.iter().all(|h| h.trim().is_empty()) {
1167 return columns
1168 .iter()
1169 .map(|c| crate::utils::column_number_to_name(*c))
1170 .collect();
1171 }
1172
1173 dedupe_headers(headers)
1174}
1175
1176fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1177 let mut seen: HashMap<String, u32> = HashMap::new();
1178 for h in headers.iter_mut() {
1179 let key = h.clone();
1180 if key.trim().is_empty() {
1181 continue;
1182 }
1183 let count = seen.entry(key.clone()).or_insert(0);
1184 if *count > 0 {
1185 h.push_str(&format!("_{}", *count + 1));
1186 }
1187 *count += 1;
1188 }
1189 headers
1190}
1191
1192fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1193 if let Some(filters) = filters {
1194 for filter in filters {
1195 if let Some(value) = row.get(&filter.column) {
1196 match filter.op.as_str() {
1197 "eq" => {
1198 if !value_eq(value, &filter.value) {
1199 return false;
1200 }
1201 }
1202 "neq" => {
1203 if value_eq(value, &filter.value) {
1204 return false;
1205 }
1206 }
1207 "contains" => {
1208 if !value_contains(value, &filter.value) {
1209 return false;
1210 }
1211 }
1212 "gt" => {
1213 if !value_gt(value, &filter.value) {
1214 return false;
1215 }
1216 }
1217 "lt" => {
1218 if !value_lt(value, &filter.value) {
1219 return false;
1220 }
1221 }
1222 "in" => {
1223 let list = filter
1224 .value
1225 .as_array()
1226 .cloned()
1227 .unwrap_or_else(|| vec![filter.value.clone()]);
1228 if !list.iter().any(|cmp| value_eq(value, cmp)) {
1229 return false;
1230 }
1231 }
1232 _ => {}
1233 }
1234 }
1235 }
1236 }
1237 true
1238}
1239
1240fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1241 match (cell, cmp) {
1242 (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1243 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1244 v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1245 }
1246 (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1247 .parse::<f64>()
1248 .map(|f| (*n - f).abs() < f64::EPSILON)
1249 .unwrap_or(false),
1250 (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1251 (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1252 t.eq_ignore_ascii_case("true") == *b
1253 }
1254 (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1255 (None, serde_json::Value::Null) => true,
1256 _ => false,
1257 }
1258}
1259
1260fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1261 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1262 return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1263 }
1264 false
1265}
1266
1267fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1268 match (cell, cmp) {
1269 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1270 v.as_f64().is_some_and(|f| *n > f)
1271 }
1272 _ => false,
1273 }
1274}
1275
1276fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1277 match (cell, cmp) {
1278 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1279 v.as_f64().is_some_and(|f| *n < f)
1280 }
1281 _ => false,
1282 }
1283}
1284
1285fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1286 if rows.is_empty() {
1287 return rows;
1288 }
1289
1290 match mode {
1291 "distributed" => {
1292 if limit == 0 {
1293 return Vec::new();
1294 }
1295 let mut indices = Vec::new();
1296 let span = rows.len().saturating_sub(1);
1297 let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1298 let mut idx = offset;
1299 while idx < rows.len() && indices.len() < limit {
1300 indices.push(idx);
1301 idx = idx.saturating_add(step);
1302 if idx == indices.last().copied().unwrap_or(0) {
1303 idx += 1;
1304 }
1305 }
1306 if indices.len() < limit {
1307 let last_idx = rows.len().saturating_sub(1);
1308 if !indices.contains(&last_idx) {
1309 indices.push(last_idx);
1310 }
1311 }
1312 indices
1313 .into_iter()
1314 .filter_map(|i| rows.get(i).cloned())
1315 .collect()
1316 }
1317 "last" => {
1318 let start = rows.len().saturating_sub(limit + offset);
1319 rows.into_iter().skip(start + offset).take(limit).collect()
1320 }
1321 _ => rows.into_iter().skip(offset).take(limit).collect(),
1322 }
1323}
1324
1325fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1326 let mut summaries = Vec::new();
1327 for header in headers {
1328 let mut nulls = 0u32;
1329 let mut distinct_set: HashSet<String> = HashSet::new();
1330 let mut values: Vec<f64> = Vec::new();
1331 let mut top_counts: HashMap<String, u32> = HashMap::new();
1332
1333 for row in rows {
1334 match row.get(header) {
1335 Some(Some(CellValue::Number(n))) => {
1336 values.push(*n);
1337 let key = n.to_string();
1338 *top_counts.entry(key).or_default() += 1;
1339 }
1340 Some(Some(CellValue::Text(s))) => {
1341 distinct_set.insert(s.clone());
1342 *top_counts.entry(s.clone()).or_default() += 1;
1343 }
1344 Some(Some(CellValue::Bool(b))) => {
1345 let key = b.to_string();
1346 distinct_set.insert(key.clone());
1347 *top_counts.entry(key).or_default() += 1;
1348 }
1349 Some(Some(CellValue::Date(d))) => {
1350 distinct_set.insert(d.clone());
1351 *top_counts.entry(d.clone()).or_default() += 1;
1352 }
1353 Some(Some(CellValue::Error(e))) => {
1354 distinct_set.insert(e.clone());
1355 *top_counts.entry(e.clone()).or_default() += 1;
1356 }
1357 _ => {
1358 nulls += 1;
1359 }
1360 }
1361 }
1362
1363 let inferred_type = if !values.is_empty() {
1364 "number"
1365 } else if !distinct_set.is_empty() {
1366 "text"
1367 } else {
1368 "unknown"
1369 }
1370 .to_string();
1371
1372 let min = values.iter().cloned().reduce(f64::min);
1373 let max = values.iter().cloned().reduce(f64::max);
1374 let mean = if values.is_empty() {
1375 None
1376 } else {
1377 Some(values.iter().sum::<f64>() / values.len() as f64)
1378 };
1379
1380 let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1381 top_values.sort_by(|a, b| b.1.cmp(&a.1));
1382 let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1383
1384 summaries.push(ColumnTypeSummary {
1385 name: header.clone(),
1386 inferred_type,
1387 nulls,
1388 distinct: distinct_set.len() as u32,
1389 top_values,
1390 min,
1391 max,
1392 mean,
1393 });
1394 }
1395 summaries
1396}
1397
1398#[allow(clippy::too_many_arguments)]
1399fn collect_value_matches(
1400 sheet: &umya_spreadsheet::Worksheet,
1401 sheet_name: &str,
1402 mode: &FindMode,
1403 match_mode: &str,
1404 direction: &LabelDirection,
1405 params: &FindValueParams,
1406 region: Option<&DetectedRegion>,
1407 default_bounds: ((u32, u32), (u32, u32)),
1408) -> Result<Vec<FindValueMatch>> {
1409 let mut results = Vec::new();
1410 let regex = if match_mode == "regex" {
1411 Regex::new(¶ms.query).ok()
1412 } else {
1413 None
1414 };
1415 let bounds = region
1416 .as_ref()
1417 .and_then(|r| parse_range(&r.bounds))
1418 .unwrap_or(default_bounds);
1419
1420 let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1421
1422 for cell in sheet.get_cell_collection() {
1423 let coord = cell.get_coordinate();
1424 let col = *coord.get_col_num();
1425 let row = *coord.get_row_num();
1426 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1427 continue;
1428 }
1429 if params.search_headers_only && row != header_row {
1430 continue;
1431 }
1432
1433 let value = cell_to_value(cell);
1434 if let Some(ref allowed) = params.value_types
1435 && !value_type_matches(&value, allowed)
1436 {
1437 continue;
1438 }
1439 if matches!(mode, FindMode::Value) {
1440 if !value_matches(
1441 &value,
1442 ¶ms.query,
1443 match_mode,
1444 params.case_sensitive,
1445 ®ex,
1446 ) {
1447 continue;
1448 }
1449 } else if let Some(label) = ¶ms.label {
1450 if !label_matches(cell, label, match_mode, params.case_sensitive, ®ex) {
1451 continue;
1452 }
1453 } else {
1454 continue;
1455 }
1456
1457 let neighbors = collect_neighbors(sheet, row, col);
1458 let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1459 let target_value = match direction {
1460 LabelDirection::Right => sheet.get_cell((col + 1, row)),
1461 LabelDirection::Below => sheet.get_cell((col, row + 1)),
1462 LabelDirection::Any => sheet
1463 .get_cell((col + 1, row))
1464 .or_else(|| sheet.get_cell((col, row + 1))),
1465 }
1466 .and_then(cell_to_value);
1467 if target_value.is_none() {
1468 continue;
1469 }
1470 (
1471 Some(LabelHit {
1472 label_address: coord.get_coordinate(),
1473 label: label_from_cell(cell),
1474 }),
1475 target_value,
1476 )
1477 } else {
1478 (None, value.clone())
1479 };
1480
1481 let row_context = build_row_context(sheet, row, col);
1482
1483 results.push(FindValueMatch {
1484 address: coord.get_coordinate(),
1485 sheet_name: sheet_name.to_string(),
1486 value: match_value,
1487 row_context,
1488 neighbors,
1489 label_hit,
1490 });
1491 }
1492
1493 Ok(results)
1494}
1495
1496fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1497 cell_to_value(cell)
1498 .map(|v| match v {
1499 CellValue::Text(s) => s,
1500 CellValue::Number(n) => n.to_string(),
1501 CellValue::Bool(b) => b.to_string(),
1502 CellValue::Date(d) => d,
1503 CellValue::Error(e) => e,
1504 })
1505 .unwrap_or_else(|| cell.get_value().to_string())
1506}
1507
1508fn value_matches(
1509 value: &Option<CellValue>,
1510 query: &str,
1511 mode: &str,
1512 case_sensitive: bool,
1513 regex: &Option<Regex>,
1514) -> bool {
1515 if value.is_none() {
1516 return false;
1517 }
1518 let haystack = cell_value_to_string_lower(value.clone().unwrap());
1519 let needle = if case_sensitive {
1520 query.to_string()
1521 } else {
1522 query.to_ascii_lowercase()
1523 };
1524
1525 match mode {
1526 "exact" => haystack == needle,
1527 "regex" => regex
1528 .as_ref()
1529 .map(|re| re.is_match(&haystack))
1530 .unwrap_or(false),
1531 _ => haystack.contains(&needle),
1532 }
1533}
1534
1535fn label_matches(
1536 cell: &umya_spreadsheet::Cell,
1537 label: &str,
1538 mode: &str,
1539 case_sensitive: bool,
1540 regex: &Option<Regex>,
1541) -> bool {
1542 let value = cell_to_value(cell);
1543 if value.is_none() {
1544 return false;
1545 }
1546 let haystack = cell_value_to_string_lower(value.unwrap());
1547 let needle = if case_sensitive {
1548 label.to_string()
1549 } else {
1550 label.to_ascii_lowercase()
1551 };
1552 match mode {
1553 "exact" => haystack == needle,
1554 "regex" => regex
1555 .as_ref()
1556 .map(|re| re.is_match(&haystack))
1557 .unwrap_or(false),
1558 _ => haystack.contains(&needle),
1559 }
1560}
1561
1562fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1563 if value.is_none() {
1564 return allowed.iter().any(|v| v == "null");
1565 }
1566 match value.as_ref().unwrap() {
1567 CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1568 CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1569 CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1570 CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1571 CellValue::Error(_) => true,
1572 }
1573}
1574
1575fn collect_neighbors(
1576 sheet: &umya_spreadsheet::Worksheet,
1577 row: u32,
1578 col: u32,
1579) -> Option<NeighborValues> {
1580 Some(NeighborValues {
1581 left: if col > 1 {
1582 sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1583 } else {
1584 None
1585 },
1586 right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1587 up: if row > 1 {
1588 sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1589 } else {
1590 None
1591 },
1592 down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1593 })
1594}
1595
1596fn build_row_context(
1597 sheet: &umya_spreadsheet::Worksheet,
1598 row: u32,
1599 col: u32,
1600) -> Option<RowContext> {
1601 let header_value = sheet
1602 .get_cell((col, 1u32))
1603 .and_then(cell_to_value)
1604 .map(|v| match v {
1605 CellValue::Text(s) => s,
1606 CellValue::Number(n) => n.to_string(),
1607 CellValue::Bool(b) => b.to_string(),
1608 CellValue::Date(d) => d,
1609 CellValue::Error(e) => e,
1610 })
1611 .unwrap_or_else(|| format!("Col{}", col));
1612 let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1613 Some(RowContext {
1614 headers: vec![header_value],
1615 values: vec![value],
1616 })
1617}
1618
1619#[derive(Debug, Deserialize, JsonSchema)]
1620pub struct FindFormulaParams {
1621 pub workbook_id: WorkbookId,
1622 pub query: String,
1623 pub sheet_name: Option<String>,
1624 #[serde(default)]
1625 pub case_sensitive: bool,
1626}
1627
1628pub async fn find_formula(
1629 state: Arc<AppState>,
1630 params: FindFormulaParams,
1631) -> Result<FindFormulaResponse> {
1632 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1633 let query = if params.case_sensitive {
1634 params.query.clone()
1635 } else {
1636 params.query.to_ascii_lowercase()
1637 };
1638 let mut matches = Vec::new();
1639
1640 let sheet_names: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1641 vec![sheet.clone()]
1642 } else {
1643 workbook.sheet_names()
1644 };
1645
1646 for sheet_name in sheet_names {
1647 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1648 collect_formula_matches(sheet, &sheet_name, &query, params.case_sensitive)
1649 })?;
1650 matches.extend(sheet_matches);
1651 }
1652
1653 let response = FindFormulaResponse {
1654 workbook_id: workbook.id.clone(),
1655 workbook_short_id: workbook.short_id.clone(),
1656 matches,
1657 };
1658 Ok(response)
1659}
1660
1661#[derive(Debug, Deserialize, JsonSchema)]
1662pub struct ScanVolatilesParams {
1663 pub workbook_id: WorkbookId,
1664 pub sheet_name: Option<String>,
1665}
1666
1667pub async fn scan_volatiles(
1668 state: Arc<AppState>,
1669 params: ScanVolatilesParams,
1670) -> Result<VolatileScanResponse> {
1671 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1672 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1673 vec![sheet.clone()]
1674 } else {
1675 workbook.sheet_names()
1676 };
1677
1678 let mut items = Vec::new();
1679 let mut truncated = false;
1680
1681 for sheet_name in target_sheets {
1682 let graph = workbook.formula_graph(&sheet_name)?;
1683 for group in graph.groups() {
1684 if !group.is_volatile {
1685 continue;
1686 }
1687 for address in group.addresses.iter().take(50) {
1688 items.push(VolatileScanEntry {
1689 address: address.clone(),
1690 sheet_name: sheet_name.clone(),
1691 function: "volatile".to_string(),
1692 note: Some(group.formula.clone()),
1693 });
1694 }
1695 if group.addresses.len() > 50 {
1696 truncated = true;
1697 }
1698 }
1699 }
1700
1701 let response = VolatileScanResponse {
1702 workbook_id: workbook.id.clone(),
1703 workbook_short_id: workbook.short_id.clone(),
1704 items,
1705 truncated,
1706 };
1707 Ok(response)
1708}
1709
1710#[derive(Debug, Deserialize, JsonSchema)]
1711pub struct SheetStylesParams {
1712 pub workbook_id: WorkbookId,
1713 pub sheet_name: String,
1714}
1715
1716pub async fn sheet_styles(
1717 state: Arc<AppState>,
1718 params: SheetStylesParams,
1719) -> Result<SheetStylesResponse> {
1720 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1721 let entry = workbook.get_sheet_metrics(¶ms.sheet_name)?;
1722
1723 let styles = entry
1724 .metrics
1725 .style_map
1726 .iter()
1727 .map(|(style_id, usage)| StyleSummary {
1728 style_id: style_id.clone(),
1729 occurrences: usage.occurrences,
1730 tags: usage.tags.clone(),
1731 example_cells: usage.example_cells.clone(),
1732 })
1733 .collect();
1734
1735 let response = SheetStylesResponse {
1736 workbook_id: workbook.id.clone(),
1737 workbook_short_id: workbook.short_id.clone(),
1738 sheet_name: params.sheet_name.clone(),
1739 styles,
1740 conditional_rules: Vec::new(),
1741 };
1742 Ok(response)
1743}
1744
1745pub async fn range_values(
1746 state: Arc<AppState>,
1747 params: RangeValuesParams,
1748) -> Result<RangeValuesResponse> {
1749 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1750 let include_headers = params.include_headers.unwrap_or(false);
1751 let values = workbook.with_sheet(¶ms.sheet_name, |sheet| {
1752 params
1753 .ranges
1754 .iter()
1755 .filter_map(|range| {
1756 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
1757 let mut rows = Vec::new();
1758 for r in start_row..=end_row {
1759 let mut row_vals = Vec::new();
1760 for c in start_col..=end_col {
1761 if include_headers && r == start_row && start_row == 1 {
1762 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
1763 } else {
1764 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
1765 }
1766 }
1767 rows.push(row_vals);
1768 }
1769 RangeValuesEntry {
1770 range: range.clone(),
1771 rows,
1772 }
1773 })
1774 })
1775 .collect()
1776 })?;
1777
1778 Ok(RangeValuesResponse {
1779 workbook_id: workbook.id.clone(),
1780 workbook_short_id: workbook.short_id.clone(),
1781 sheet_name: params.sheet_name,
1782 values,
1783 })
1784}
1785
1786pub async fn find_value(
1787 state: Arc<AppState>,
1788 params: FindValueParams,
1789) -> Result<FindValueResponse> {
1790 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1791 let mut matches = Vec::new();
1792 let mut truncated = false;
1793 let mode = params.mode.clone().unwrap_or_else(|| {
1794 if params.label.is_some() {
1795 FindMode::Label
1796 } else {
1797 FindMode::Value
1798 }
1799 });
1800 let match_mode = params
1801 .match_mode
1802 .as_deref()
1803 .unwrap_or("contains")
1804 .to_ascii_lowercase();
1805 let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
1806
1807 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1808 vec![sheet.clone()]
1809 } else {
1810 workbook.sheet_names()
1811 };
1812
1813 for sheet_name in target_sheets {
1814 let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
1815 let default_bounds = (
1816 (1, 1),
1817 (
1818 metrics_entry.metrics.column_count.max(1),
1819 metrics_entry.metrics.row_count.max(1),
1820 ),
1821 );
1822 let region_bounds = params
1823 .region_id
1824 .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
1825 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1826 collect_value_matches(
1827 sheet,
1828 &sheet_name,
1829 &mode,
1830 &match_mode,
1831 &direction,
1832 ¶ms,
1833 region_bounds.as_ref(),
1834 default_bounds,
1835 )
1836 })??;
1837 matches.extend(sheet_matches);
1838 if matches.len() as u32 >= params.limit {
1839 truncated = true;
1840 matches.truncate(params.limit as usize);
1841 break;
1842 }
1843 }
1844
1845 Ok(FindValueResponse {
1846 workbook_id: workbook.id.clone(),
1847 workbook_short_id: workbook.short_id.clone(),
1848 matches,
1849 truncated,
1850 })
1851}
1852
1853pub async fn read_table(
1854 state: Arc<AppState>,
1855 params: ReadTableParams,
1856) -> Result<ReadTableResponse> {
1857 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1858 let resolved = resolve_table_target(&workbook, ¶ms)?;
1859 let limit = params.limit.unwrap_or(100) as usize;
1860 let offset = params.offset.unwrap_or(0) as usize;
1861 let sample_mode = params
1862 .sample_mode
1863 .clone()
1864 .unwrap_or_else(|| "first".to_string());
1865
1866 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1867 extract_table_rows(
1868 sheet,
1869 &resolved,
1870 params.header_row,
1871 params.header_rows,
1872 params.columns.clone(),
1873 params.filters.clone(),
1874 limit,
1875 offset,
1876 &sample_mode,
1877 )
1878 })??;
1879
1880 let has_more = offset + rows.len() < total_rows as usize;
1881
1882 Ok(ReadTableResponse {
1883 workbook_id: workbook.id.clone(),
1884 workbook_short_id: workbook.short_id.clone(),
1885 sheet_name: resolved.sheet_name,
1886 table_name: resolved.table_name,
1887 headers,
1888 rows,
1889 total_rows,
1890 has_more,
1891 })
1892}
1893
1894pub async fn table_profile(
1895 state: Arc<AppState>,
1896 params: TableProfileParams,
1897) -> Result<TableProfileResponse> {
1898 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1899 let resolved = resolve_table_target(
1900 &workbook,
1901 &ReadTableParams {
1902 workbook_id: params.workbook_id.clone(),
1903 sheet_name: params.sheet_name.clone(),
1904 table_name: params.table_name.clone(),
1905 region_id: params.region_id,
1906 range: None,
1907 header_row: None,
1908 header_rows: None,
1909 columns: None,
1910 filters: None,
1911 sample_mode: params.sample_mode.clone(),
1912 limit: params.sample_size,
1913 offset: Some(0),
1914 },
1915 )?;
1916
1917 let sample_size = params.sample_size.unwrap_or(10) as usize;
1918 let sample_mode = params
1919 .sample_mode
1920 .clone()
1921 .unwrap_or_else(|| "distributed".to_string());
1922
1923 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1924 extract_table_rows(
1925 sheet,
1926 &resolved,
1927 None,
1928 None,
1929 None,
1930 None,
1931 sample_size,
1932 0,
1933 &sample_mode,
1934 )
1935 })??;
1936
1937 let column_types = summarize_columns(&headers, &rows);
1938
1939 Ok(TableProfileResponse {
1940 workbook_id: workbook.id.clone(),
1941 workbook_short_id: workbook.short_id.clone(),
1942 sheet_name: resolved.sheet_name,
1943 table_name: resolved.table_name,
1944 headers,
1945 column_types,
1946 row_count: total_rows,
1947 samples: rows,
1948 notes: Vec::new(),
1949 })
1950}
1951
1952#[derive(Debug, Deserialize, JsonSchema)]
1953pub struct ManifestStubParams {
1954 pub workbook_id: WorkbookId,
1955 pub sheet_filter: Option<String>,
1956}
1957
1958pub async fn get_manifest_stub(
1959 state: Arc<AppState>,
1960 params: ManifestStubParams,
1961) -> Result<ManifestStubResponse> {
1962 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1963 let mut summaries = workbook.list_summaries()?;
1964
1965 if let Some(filter) = ¶ms.sheet_filter {
1966 summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
1967 }
1968
1969 let sheets = summaries
1970 .into_iter()
1971 .map(|summary| ManifestSheetStub {
1972 sheet_name: summary.name.clone(),
1973 classification: summary.classification.clone(),
1974 candidate_expectations: vec![format!(
1975 "Review {} sheet for expectation candidates",
1976 format!("{:?}", summary.classification).to_ascii_lowercase()
1977 )],
1978 notes: summary.style_tags,
1979 })
1980 .collect();
1981
1982 let response = ManifestStubResponse {
1983 workbook_id: workbook.id.clone(),
1984 workbook_short_id: workbook.short_id.clone(),
1985 slug: workbook.slug.clone(),
1986 sheets,
1987 };
1988 Ok(response)
1989}
1990
1991#[derive(Debug, Deserialize, JsonSchema)]
1992pub struct CloseWorkbookParams {
1993 pub workbook_id: WorkbookId,
1994}
1995
1996pub async fn close_workbook(
1997 state: Arc<AppState>,
1998 params: CloseWorkbookParams,
1999) -> Result<CloseWorkbookResponse> {
2000 state.close_workbook(¶ms.workbook_id)?;
2001 Ok(CloseWorkbookResponse {
2002 workbook_id: params.workbook_id.clone(),
2003 message: format!("workbook {} evicted", params.workbook_id.as_str()),
2004 })
2005}
2006fn collect_formula_matches(
2007 sheet: &umya_spreadsheet::Worksheet,
2008 sheet_name: &str,
2009 query: &str,
2010 case_sensitive: bool,
2011) -> Vec<FindFormulaMatch> {
2012 use crate::workbook::cell_to_value;
2013
2014 let mut results = Vec::new();
2015 for cell in sheet.get_cell_collection() {
2016 if !cell.is_formula() {
2017 continue;
2018 }
2019 let formula = cell.get_formula();
2020 let haystack = if case_sensitive {
2021 formula.to_string()
2022 } else {
2023 formula.to_ascii_lowercase()
2024 };
2025 if !haystack.contains(query) {
2026 continue;
2027 }
2028 let coord = cell.get_coordinate();
2029 let column = *coord.get_col_num();
2030 let row = *coord.get_row_num();
2031 let columns = vec![column];
2032 let context_row = build_row_snapshot(sheet, row, &columns, true, false);
2033 let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
2034
2035 results.push(FindFormulaMatch {
2036 address: coord.get_coordinate(),
2037 sheet_name: sheet_name.to_string(),
2038 formula: formula.to_string(),
2039 cached_value: if cell.is_formula() {
2040 cell_to_value(cell)
2041 } else {
2042 None
2043 },
2044 context: vec![header_row, context_row],
2045 });
2046 }
2047 results
2048}
2049
2050#[derive(Clone)]
2051struct TraceFormulaInfo {
2052 fingerprint: String,
2053 formula: String,
2054}
2055
2056#[derive(Clone)]
2057struct TraceEdgeRaw {
2058 from: String,
2059 to: String,
2060 neighbor: String,
2061}
2062
2063#[derive(Clone)]
2064struct LayerLinks {
2065 depth: u32,
2066 edges: Vec<TraceEdgeRaw>,
2067 truncated_cells: Vec<String>,
2068}
2069
2070#[derive(Clone)]
2071struct NeighborDetail {
2072 address: String,
2073 column: Option<u32>,
2074 row: Option<u32>,
2075 kind: TraceCellKind,
2076 value: Option<CellValue>,
2077 formula: Option<String>,
2078 fingerprint: Option<String>,
2079 external: bool,
2080}
2081
2082fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2083 let mut map = HashMap::new();
2084 for group in graph.groups() {
2085 for address in group.addresses.clone() {
2086 map.insert(
2087 address.to_ascii_uppercase(),
2088 TraceFormulaInfo {
2089 fingerprint: group.fingerprint.clone(),
2090 formula: group.formula.clone(),
2091 },
2092 );
2093 }
2094 }
2095 map
2096}
2097
2098struct TraceConfig<'a> {
2099 direction: &'a TraceDirection,
2100 origin: &'a str,
2101 sheet_name: &'a str,
2102 depth_limit: u32,
2103 page_size: usize,
2104}
2105
2106fn build_trace_layers(
2107 workbook: &WorkbookContext,
2108 graph: &FormulaGraph,
2109 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2110 config: &TraceConfig<'_>,
2111 cursor: Option<TraceCursor>,
2112) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2113 let layer_links =
2114 collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2115 let mut layers = Vec::new();
2116 let mut next_cursor = None;
2117 let mut notes = Vec::new();
2118 let focus_depth = cursor.as_ref().map(|c| c.depth);
2119
2120 for layer in layer_links {
2121 let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2122 let offset = cursor
2123 .as_ref()
2124 .filter(|c| c.depth == layer.depth)
2125 .map(|c| c.offset)
2126 .unwrap_or(0);
2127
2128 let mut node_set: HashSet<String> = HashSet::new();
2129 for edge in &layer.edges {
2130 node_set.insert(edge.neighbor.clone());
2131 }
2132 let mut nodes: Vec<String> = node_set.into_iter().collect();
2133 nodes.sort_by(|a, b| compare_addresses(a, b));
2134
2135 let details = workbook.with_sheet(config.sheet_name, |sheet| {
2136 collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2137 })?;
2138 let total_nodes = details.len();
2139 let start = offset.min(total_nodes);
2140 let end = if produce_edges {
2141 (start + config.page_size).min(total_nodes)
2142 } else {
2143 start
2144 };
2145 let selected_slice = if produce_edges {
2146 &details[start..end]
2147 } else {
2148 &details[0..0]
2149 };
2150 let selected_addresses: HashSet<String> = selected_slice
2151 .iter()
2152 .map(|detail| detail.address.clone())
2153 .collect();
2154
2155 let summary = build_layer_summary(&details);
2156 let range_highlights = build_range_highlights(&details);
2157 let group_highlights = build_formula_group_highlights(&details);
2158 let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2159
2160 let highlights = TraceLayerHighlights {
2161 top_ranges: range_highlights.clone(),
2162 top_formula_groups: group_highlights.clone(),
2163 notable_cells,
2164 };
2165
2166 let edges = if produce_edges {
2167 build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2168 } else {
2169 Vec::new()
2170 };
2171
2172 let has_more = produce_edges && end < total_nodes;
2173 if has_more && next_cursor.is_none() {
2174 next_cursor = Some(TraceCursor {
2175 depth: layer.depth,
2176 offset: end,
2177 });
2178 }
2179 if has_more {
2180 notes.push(format!(
2181 "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2182 layer.depth, end, total_nodes, layer.depth, end
2183 ));
2184 }
2185
2186 if !layer.truncated_cells.is_empty() {
2187 let cell_list = if layer.truncated_cells.len() <= 3 {
2188 layer.truncated_cells.join(", ")
2189 } else {
2190 format!(
2191 "{}, ... ({} more)",
2192 layer.truncated_cells[..3].join(", "),
2193 layer.truncated_cells.len() - 3
2194 )
2195 };
2196 notes.push(format!(
2197 "Layer {}: dependents truncated at {} per cell for: {}",
2198 layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
2199 ));
2200 }
2201
2202 layers.push(TraceLayer {
2203 depth: layer.depth,
2204 summary,
2205 highlights,
2206 edges,
2207 has_more,
2208 });
2209 }
2210
2211 Ok((layers, next_cursor, notes))
2212}
2213
2214fn collect_layer_links(
2215 graph: &FormulaGraph,
2216 direction: &TraceDirection,
2217 origin: &str,
2218 depth_limit: u32,
2219) -> Vec<LayerLinks> {
2220 let mut visited: HashSet<String> = HashSet::new();
2221 visited.insert(origin.to_string());
2222 let mut frontier = vec![origin.to_string()];
2223 let mut layers = Vec::new();
2224
2225 for depth in 1..=depth_limit {
2226 let mut next_frontier_set: HashSet<String> = HashSet::new();
2227 let mut edges = Vec::new();
2228 let mut truncated_cells = Vec::new();
2229
2230 for cell in &frontier {
2231 let (neighbors, was_truncated) = match direction {
2232 TraceDirection::Precedents => (graph.precedents(cell), false),
2233 TraceDirection::Dependents => {
2234 graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
2235 }
2236 };
2237
2238 if was_truncated {
2239 truncated_cells.push(cell.clone());
2240 }
2241
2242 for neighbor in neighbors {
2243 let neighbor_upper = neighbor.to_ascii_uppercase();
2244 let edge = match direction {
2245 TraceDirection::Precedents => TraceEdgeRaw {
2246 from: cell.clone(),
2247 to: neighbor_upper.clone(),
2248 neighbor: neighbor_upper.clone(),
2249 },
2250 TraceDirection::Dependents => TraceEdgeRaw {
2251 from: neighbor_upper.clone(),
2252 to: cell.clone(),
2253 neighbor: neighbor_upper.clone(),
2254 },
2255 };
2256 edges.push(edge);
2257 if visited.insert(neighbor_upper.clone()) {
2258 next_frontier_set.insert(neighbor_upper);
2259 }
2260 }
2261 }
2262
2263 if edges.is_empty() {
2264 break;
2265 }
2266
2267 layers.push(LayerLinks {
2268 depth,
2269 edges,
2270 truncated_cells,
2271 });
2272 if next_frontier_set.is_empty() {
2273 break;
2274 }
2275 let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2276 next_frontier.sort();
2277 frontier = next_frontier;
2278 }
2279
2280 layers
2281}
2282
2283fn collect_neighbor_details(
2284 sheet: &umya_spreadsheet::Worksheet,
2285 current_sheet: &str,
2286 addresses: &[String],
2287 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2288) -> Vec<NeighborDetail> {
2289 let mut details = Vec::new();
2290 for address in addresses {
2291 let (sheet_part, cell_part) = split_sheet_and_cell(address);
2292 let normalized_sheet = sheet_part
2293 .as_ref()
2294 .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2295 let is_external = normalized_sheet
2296 .as_ref()
2297 .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2298 .unwrap_or(false);
2299
2300 let Some(cell_ref) = cell_part else {
2301 details.push(NeighborDetail {
2302 address: address.clone(),
2303 column: None,
2304 row: None,
2305 kind: TraceCellKind::External,
2306 value: None,
2307 formula: None,
2308 fingerprint: None,
2309 external: true,
2310 });
2311 continue;
2312 };
2313
2314 let cell_ref_upper = cell_ref.to_ascii_uppercase();
2315
2316 if is_external {
2317 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2318 details.push(NeighborDetail {
2319 address: address.clone(),
2320 column: None,
2321 row: None,
2322 kind: TraceCellKind::External,
2323 value: None,
2324 formula: formula_info.map(|info| info.formula.clone()),
2325 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2326 external: true,
2327 });
2328 continue;
2329 }
2330
2331 let Some((col, row)) = parse_address(&cell_ref_upper) else {
2332 details.push(NeighborDetail {
2333 address: address.clone(),
2334 column: None,
2335 row: None,
2336 kind: TraceCellKind::External,
2337 value: None,
2338 formula: None,
2339 fingerprint: None,
2340 external: true,
2341 });
2342 continue;
2343 };
2344
2345 let cell_opt = sheet.get_cell((&col, &row));
2346 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2347 if let Some(cell) = cell_opt {
2348 let value = cell_to_value(cell);
2349 let kind = if cell.is_formula() {
2350 TraceCellKind::Formula
2351 } else if value.is_some() {
2352 TraceCellKind::Literal
2353 } else {
2354 TraceCellKind::Blank
2355 };
2356 details.push(NeighborDetail {
2357 address: address.clone(),
2358 column: Some(col),
2359 row: Some(row),
2360 kind,
2361 value,
2362 formula: formula_info.map(|info| info.formula.clone()),
2363 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2364 external: false,
2365 });
2366 } else {
2367 details.push(NeighborDetail {
2368 address: address.clone(),
2369 column: Some(col),
2370 row: Some(row),
2371 kind: TraceCellKind::Blank,
2372 value: None,
2373 formula: formula_info.map(|info| info.formula.clone()),
2374 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2375 external: false,
2376 });
2377 }
2378 }
2379 details
2380}
2381
2382fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2383 let mut summary = TraceLayerSummary {
2384 total_nodes: details.len(),
2385 formula_nodes: 0,
2386 value_nodes: 0,
2387 blank_nodes: 0,
2388 external_nodes: 0,
2389 unique_formula_groups: 0,
2390 };
2391
2392 let mut fingerprints: HashSet<String> = HashSet::new();
2393
2394 for detail in details {
2395 match detail.kind {
2396 TraceCellKind::Formula => {
2397 summary.formula_nodes += 1;
2398 if let Some(fp) = &detail.fingerprint {
2399 fingerprints.insert(fp.clone());
2400 }
2401 }
2402 TraceCellKind::Literal => summary.value_nodes += 1,
2403 TraceCellKind::Blank => summary.blank_nodes += 1,
2404 TraceCellKind::External => summary.external_nodes += 1,
2405 }
2406 }
2407
2408 summary.unique_formula_groups = fingerprints.len();
2409 summary
2410}
2411
2412fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2413 let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2414 for detail in details {
2415 if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2416 let entry = aggregates
2417 .entry(fingerprint.clone())
2418 .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2419 entry.1 += 1;
2420 if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2421 entry.2.push(detail.address.clone());
2422 }
2423 }
2424 }
2425
2426 let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2427 .into_iter()
2428 .map(
2429 |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2430 fingerprint,
2431 formula,
2432 count,
2433 sample_addresses,
2434 },
2435 )
2436 .collect();
2437
2438 highlights.sort_by(|a, b| b.count.cmp(&a.count));
2439 highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2440 highlights
2441}
2442
2443fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2444 let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2445 for detail in details {
2446 if let (Some(col), Some(_row)) = (detail.column, detail.row)
2447 && !detail.external
2448 {
2449 by_column.entry(col).or_default().push(detail);
2450 }
2451 }
2452
2453 for column_entries in by_column.values_mut() {
2454 column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2455 }
2456
2457 let mut ranges = Vec::new();
2458 for entries in by_column.values() {
2459 let mut current: Vec<&NeighborDetail> = Vec::new();
2460 for detail in entries {
2461 if current.is_empty() {
2462 current.push(detail);
2463 continue;
2464 }
2465 let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2466 if detail.row.unwrap_or(0) == prev_row + 1 {
2467 current.push(detail);
2468 } else {
2469 if current.len() >= TRACE_RANGE_THRESHOLD {
2470 ranges.push(make_range_highlight(¤t));
2471 }
2472 current.clear();
2473 current.push(detail);
2474 }
2475 }
2476 if current.len() >= TRACE_RANGE_THRESHOLD {
2477 ranges.push(make_range_highlight(¤t));
2478 }
2479 }
2480
2481 ranges.sort_by(|a, b| b.count.cmp(&a.count));
2482 ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2483 ranges
2484}
2485
2486fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2487 let mut literals = 0usize;
2488 let mut formulas = 0usize;
2489 let mut blanks = 0usize;
2490 let mut sample_values = Vec::new();
2491 let mut sample_formulas = Vec::new();
2492 let mut sample_addresses = Vec::new();
2493
2494 for detail in details {
2495 match detail.kind {
2496 TraceCellKind::Formula => {
2497 formulas += 1;
2498 if let Some(formula) = &detail.formula
2499 && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2500 && !sample_formulas.contains(formula)
2501 {
2502 sample_formulas.push(formula.clone());
2503 }
2504 }
2505 TraceCellKind::Literal => {
2506 literals += 1;
2507 if let Some(value) = &detail.value
2508 && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2509 {
2510 sample_values.push(value.clone());
2511 }
2512 }
2513 TraceCellKind::Blank => blanks += 1,
2514 TraceCellKind::External => {}
2515 }
2516 if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2517 sample_addresses.push(detail.address.clone());
2518 }
2519 }
2520
2521 TraceRangeHighlight {
2522 start: details
2523 .first()
2524 .map(|d| d.address.clone())
2525 .unwrap_or_default(),
2526 end: details
2527 .last()
2528 .map(|d| d.address.clone())
2529 .unwrap_or_default(),
2530 count: details.len(),
2531 literals,
2532 formulas,
2533 blanks,
2534 sample_values,
2535 sample_formulas,
2536 sample_addresses,
2537 }
2538}
2539
2540fn build_notable_cells(
2541 details: &[NeighborDetail],
2542 ranges: &[TraceRangeHighlight],
2543 groups: &[TraceFormulaGroupHighlight],
2544) -> Vec<TraceCellHighlight> {
2545 let mut exclude: HashSet<String> = HashSet::new();
2546 for range in ranges {
2547 exclude.insert(range.start.clone());
2548 exclude.insert(range.end.clone());
2549 for addr in &range.sample_addresses {
2550 exclude.insert(addr.clone());
2551 }
2552 }
2553 for group in groups {
2554 for addr in &group.sample_addresses {
2555 exclude.insert(addr.clone());
2556 }
2557 }
2558
2559 let mut highlights = Vec::new();
2560 let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
2561
2562 for detail in details {
2563 if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
2564 break;
2565 }
2566 if exclude.contains(&detail.address) {
2567 continue;
2568 }
2569 let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
2570 if *counter >= 2 && detail.kind != TraceCellKind::External {
2571 continue;
2572 }
2573 highlights.push(TraceCellHighlight {
2574 address: detail.address.clone(),
2575 kind: detail.kind.clone(),
2576 value: detail.value.clone(),
2577 formula: detail.formula.clone(),
2578 });
2579 *counter += 1;
2580 }
2581
2582 highlights
2583}
2584
2585fn build_edges_for_layer(
2586 raw_edges: &[TraceEdgeRaw],
2587 selected: &HashSet<String>,
2588 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2589) -> Vec<FormulaTraceEdge> {
2590 let mut edges = Vec::new();
2591 for edge in raw_edges {
2592 if selected.contains(&edge.neighbor) {
2593 let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
2594 .map(|info| info.formula.clone());
2595 edges.push(FormulaTraceEdge {
2596 from: edge.from.clone(),
2597 to: edge.to.clone(),
2598 formula,
2599 note: None,
2600 });
2601 }
2602 }
2603 edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
2604 edges
2605}
2606
2607fn lookup_formula_info<'a>(
2608 lookup: &'a HashMap<String, TraceFormulaInfo>,
2609 cell_ref: &str,
2610 original: &str,
2611) -> Option<&'a TraceFormulaInfo> {
2612 if let Some(info) = lookup.get(cell_ref) {
2613 return Some(info);
2614 }
2615 if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
2616 let upper = cell.to_ascii_uppercase();
2617 return lookup.get(&upper);
2618 }
2619 None
2620}
2621
2622fn compare_addresses(left: &str, right: &str) -> Ordering {
2623 let (sheet_left, cell_left) = split_sheet_and_cell(left);
2624 let (sheet_right, cell_right) = split_sheet_and_cell(right);
2625
2626 let sheet_left_key = sheet_left
2627 .as_ref()
2628 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2629 .unwrap_or_default();
2630 let sheet_right_key = sheet_right
2631 .as_ref()
2632 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2633 .unwrap_or_default();
2634
2635 match sheet_left_key.cmp(&sheet_right_key) {
2636 Ordering::Equal => {
2637 let left_core = cell_left.unwrap_or_else(|| left.to_string());
2638 let right_core = cell_right.unwrap_or_else(|| right.to_string());
2639 let left_coords = parse_address(&left_core.to_ascii_uppercase());
2640 let right_coords = parse_address(&right_core.to_ascii_uppercase());
2641 match (left_coords, right_coords) {
2642 (Some((lc, lr)), Some((rc, rr))) => lc
2643 .cmp(&rc)
2644 .then_with(|| lr.cmp(&rr))
2645 .then_with(|| left_core.cmp(&right_core)),
2646 _ => left_core.cmp(&right_core),
2647 }
2648 }
2649 other => other,
2650 }
2651}
2652
2653fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
2654 if let Some(idx) = address.rfind('!') {
2655 let sheet = address[..idx].to_string();
2656 let cell = address[idx + 1..].to_string();
2657 (Some(sheet), Some(cell))
2658 } else {
2659 (None, Some(address.to_string()))
2660 }
2661}
2662
2663fn clean_sheet_name(sheet: &str) -> String {
2664 let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
2665 let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
2666 after_bracket
2667 .trim_matches(|c| c == '\'' || c == '"')
2668 .to_string()
2669}