1pub mod filters;
2
3use crate::analysis::{formula::FormulaGraph, stats};
4use crate::model::*;
5use crate::state::AppState;
6use crate::workbook::{WorkbookContext, cell_to_value};
7use anyhow::{Result, anyhow};
8use regex::Regex;
9use schemars::JsonSchema;
10use serde::Deserialize;
11use std::cmp::Ordering;
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::sync::Arc;
14
15const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
16const TRACE_PAGE_MIN: usize = 5;
17const TRACE_PAGE_MAX: usize = 200;
18const TRACE_RANGE_THRESHOLD: usize = 4;
19const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
20const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
21const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
22const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
23const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
24const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
25
26pub async fn list_workbooks(
27 state: Arc<AppState>,
28 params: ListWorkbooksParams,
29) -> Result<WorkbookListResponse> {
30 let filter = params.into_filter()?;
31 state.list_workbooks(filter)
32}
33
34pub async fn describe_workbook(
35 state: Arc<AppState>,
36 params: DescribeWorkbookParams,
37) -> Result<WorkbookDescription> {
38 let workbook = state.open_workbook(¶ms.workbook_id).await?;
39 let desc = workbook.describe();
40 Ok(desc)
41}
42
43#[derive(Debug, Deserialize, JsonSchema)]
44pub struct ListWorkbooksParams {
45 pub slug_prefix: Option<String>,
46 pub folder: Option<String>,
47 pub path_glob: Option<String>,
48}
49
50impl ListWorkbooksParams {
51 fn into_filter(self) -> Result<filters::WorkbookFilter> {
52 filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
53 }
54}
55
56#[derive(Debug, Deserialize, JsonSchema)]
57pub struct DescribeWorkbookParams {
58 pub workbook_id: WorkbookId,
59}
60
61#[derive(Debug, Deserialize, JsonSchema)]
62pub struct ListSheetsParams {
63 pub workbook_id: WorkbookId,
64}
65
66pub async fn list_sheets(
67 state: Arc<AppState>,
68 params: ListSheetsParams,
69) -> Result<SheetListResponse> {
70 let workbook = state.open_workbook(¶ms.workbook_id).await?;
71 let summaries = workbook.list_summaries()?;
72 let response = SheetListResponse {
73 workbook_id: workbook.id.clone(),
74 workbook_short_id: workbook.short_id.clone(),
75 sheets: summaries,
76 };
77 Ok(response)
78}
79
80#[derive(Debug, Deserialize, JsonSchema)]
81pub struct SheetOverviewParams {
82 pub workbook_id: WorkbookId,
83 pub sheet_name: String,
84}
85
86#[derive(Debug, Deserialize, JsonSchema)]
87pub struct WorkbookSummaryParams {
88 pub workbook_id: WorkbookId,
89}
90
91pub async fn workbook_summary(
92 state: Arc<AppState>,
93 params: WorkbookSummaryParams,
94) -> Result<WorkbookSummaryResponse> {
95 let workbook = state.open_workbook(¶ms.workbook_id).await?;
96 let sheet_names = workbook.sheet_names();
97
98 let mut total_cells: u64 = 0;
99 let mut total_formulas: u64 = 0;
100 let mut breakdown = WorkbookBreakdown::default();
101 let mut region_counts = RegionCountSummary::default();
102 let mut entry_points: Vec<EntryPoint> = Vec::new();
103 let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
104
105 for sheet_name in &sheet_names {
106 let entry = workbook.get_sheet_metrics(sheet_name)?;
107 total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
108 total_formulas += entry.metrics.formula_cells as u64;
109 match entry.metrics.classification {
110 SheetClassification::Calculator => breakdown.calculator_sheets += 1,
111 SheetClassification::Metadata => breakdown.metadata_sheets += 1,
112 SheetClassification::Empty => {}
113 _ => breakdown.data_sheets += 1,
114 }
115
116 for region in &entry.detected_regions {
117 match region
118 .region_kind
119 .clone()
120 .unwrap_or(region.classification.clone())
121 {
122 RegionKind::Calculator => region_counts.calculator += 1,
123 RegionKind::Metadata => region_counts.metadata += 1,
124 RegionKind::Parameters => region_counts.parameters += 1,
125 RegionKind::Outputs => region_counts.outputs += 1,
126 RegionKind::Data | RegionKind::Table => region_counts.data += 1,
127 _ => region_counts.other += 1,
128 }
129 if region.confidence >= 0.3 {
130 let kind = region
131 .region_kind
132 .as_ref()
133 .unwrap_or(®ion.classification);
134 let priority = match kind {
135 RegionKind::Parameters => 0,
136 RegionKind::Data | RegionKind::Table => 1,
137 RegionKind::Outputs => 2,
138 RegionKind::Calculator => 3,
139 RegionKind::Metadata => 4,
140 _ => 5,
141 };
142 entry_points.push(EntryPoint {
143 sheet_name: sheet_name.clone(),
144 region_id: Some(region.id),
145 bounds: Some(region.bounds.clone()),
146 rationale: format!(
147 "{:?} region ({} rows, {:.0}% conf, p{})",
148 kind,
149 region.row_count,
150 region.confidence * 100.0,
151 priority
152 ),
153 });
154 }
155 }
156
157 if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
158 entry_points.push(EntryPoint {
159 sheet_name: sheet_name.clone(),
160 region_id: None,
161 bounds: None,
162 rationale: "Whole sheet is non-empty; start at top-left".to_string(),
163 });
164 }
165 }
166
167 entry_points.sort_by(|a, b| {
168 let pa = priority_from_rationale(&a.rationale);
169 let pb = priority_from_rationale(&b.rationale);
170 pa.cmp(&pb)
171 .then_with(|| {
172 a.bounds
173 .as_ref()
174 .map(|_| 1)
175 .cmp(&b.bounds.as_ref().map(|_| 1))
176 })
177 .then_with(|| a.sheet_name.cmp(&b.sheet_name))
178 });
179 entry_points.truncate(5);
180
181 let mut seen_ranges = std::collections::HashSet::new();
182 for item in workbook.named_items()? {
183 if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
184 continue;
185 }
186 if !seen_ranges.insert(item.refers_to.clone()) {
187 continue;
188 }
189 key_named_ranges.push(item);
190 if key_named_ranges.len() >= 10 {
191 break;
192 }
193 }
194
195 Ok(WorkbookSummaryResponse {
196 workbook_id: workbook.id.clone(),
197 workbook_short_id: workbook.short_id.clone(),
198 slug: workbook.slug.clone(),
199 sheet_count: sheet_names.len(),
200 total_cells,
201 total_formulas,
202 breakdown,
203 region_counts,
204 key_named_ranges,
205 suggested_entry_points: entry_points,
206 })
207}
208
209fn priority_from_rationale(rationale: &str) -> u32 {
210 if rationale.contains("p0") {
211 0
212 } else if rationale.contains("p1") {
213 1
214 } else if rationale.contains("p2") {
215 2
216 } else if rationale.contains("p3") {
217 3
218 } else if rationale.contains("p4") {
219 4
220 } else {
221 5
222 }
223}
224
225pub async fn sheet_overview(
226 state: Arc<AppState>,
227 params: SheetOverviewParams,
228) -> Result<SheetOverviewResponse> {
229 let workbook = state.open_workbook(¶ms.workbook_id).await?;
230 let overview = workbook.sheet_overview(¶ms.sheet_name)?;
231 Ok(overview)
232}
233
234fn default_start_row() -> u32 {
235 1
236}
237
238fn default_page_size() -> u32 {
239 50
240}
241
242fn default_include_formulas() -> bool {
243 true
244}
245
246fn default_include_header() -> bool {
247 true
248}
249
250#[derive(Debug, Deserialize, JsonSchema)]
251pub struct SheetPageParams {
252 pub workbook_id: WorkbookId,
253 pub sheet_name: String,
254 #[serde(default = "default_start_row")]
255 pub start_row: u32,
256 #[serde(default = "default_page_size")]
257 pub page_size: u32,
258 #[serde(default)]
259 pub columns: Option<Vec<String>>,
260 #[serde(default)]
261 pub columns_by_header: Option<Vec<String>>,
262 #[serde(default = "default_include_formulas")]
263 pub include_formulas: bool,
264 #[serde(default)]
265 pub include_styles: bool,
266 #[serde(default = "default_include_header")]
267 pub include_header: bool,
268 #[serde(default)]
269 pub format: Option<SheetPageFormat>,
270}
271
272impl Default for SheetPageParams {
273 fn default() -> Self {
274 SheetPageParams {
275 workbook_id: WorkbookId(String::new()),
276 sheet_name: String::new(),
277 start_row: default_start_row(),
278 page_size: default_page_size(),
279 columns: None,
280 columns_by_header: None,
281 include_formulas: default_include_formulas(),
282 include_styles: false,
283 include_header: default_include_header(),
284 format: None,
285 }
286 }
287}
288
289fn default_find_limit() -> u32 {
290 50
291}
292
293#[derive(Debug, Deserialize, JsonSchema)]
294pub struct FindValueParams {
295 pub workbook_id: WorkbookId,
296 pub query: String,
297 #[serde(default)]
298 pub label: Option<String>,
299 #[serde(default)]
300 pub mode: Option<FindMode>,
301 #[serde(default)]
302 pub match_mode: Option<String>,
303 #[serde(default)]
304 pub case_sensitive: bool,
305 #[serde(default)]
306 pub sheet_name: Option<String>,
307 #[serde(default)]
308 pub region_id: Option<u32>,
309 #[serde(default)]
310 pub table_name: Option<String>,
311 #[serde(default)]
312 pub value_types: Option<Vec<String>>,
313 #[serde(default)]
314 pub search_headers_only: bool,
315 #[serde(default)]
316 pub direction: Option<LabelDirection>,
317 #[serde(default = "default_find_limit")]
318 pub limit: u32,
319}
320
321impl Default for FindValueParams {
322 fn default() -> Self {
323 Self {
324 workbook_id: WorkbookId(String::new()),
325 query: String::new(),
326 label: None,
327 mode: None,
328 match_mode: None,
329 case_sensitive: false,
330 sheet_name: None,
331 region_id: None,
332 table_name: None,
333 value_types: None,
334 search_headers_only: false,
335 direction: None,
336 limit: default_find_limit(),
337 }
338 }
339}
340
341#[derive(Debug, Deserialize, JsonSchema, Default)]
342pub struct ReadTableParams {
343 pub workbook_id: WorkbookId,
344 #[serde(default)]
345 pub sheet_name: Option<String>,
346 #[serde(default)]
347 pub table_name: Option<String>,
348 #[serde(default)]
349 pub region_id: Option<u32>,
350 #[serde(default)]
351 pub range: Option<String>,
352 #[serde(default)]
353 pub header_row: Option<u32>,
354 #[serde(default)]
355 pub header_rows: Option<u32>,
356 #[serde(default)]
357 pub columns: Option<Vec<String>>,
358 #[serde(default)]
359 pub filters: Option<Vec<TableFilter>>,
360 #[serde(default)]
361 pub sample_mode: Option<String>,
362 #[serde(default)]
363 pub limit: Option<u32>,
364 #[serde(default)]
365 pub offset: Option<u32>,
366}
367
368#[derive(Debug, Deserialize, JsonSchema, Clone)]
369pub struct TableFilter {
370 pub column: String,
371 pub op: String,
372 pub value: serde_json::Value,
373}
374
375#[derive(Debug, Deserialize, JsonSchema, Default)]
376pub struct TableProfileParams {
377 pub workbook_id: WorkbookId,
378 #[serde(default)]
379 pub sheet_name: Option<String>,
380 #[serde(default)]
381 pub region_id: Option<u32>,
382 #[serde(default)]
383 pub table_name: Option<String>,
384 #[serde(default)]
385 pub sample_mode: Option<String>,
386 #[serde(default)]
387 pub sample_size: Option<u32>,
388}
389
390#[derive(Debug, Deserialize, JsonSchema)]
391pub struct RangeValuesParams {
392 pub workbook_id: WorkbookId,
393 pub sheet_name: String,
394 pub ranges: Vec<String>,
395 #[serde(default)]
396 pub include_headers: Option<bool>,
397}
398
399pub async fn sheet_page(
400 state: Arc<AppState>,
401 params: SheetPageParams,
402) -> Result<SheetPageResponse> {
403 if params.page_size == 0 {
404 return Err(anyhow!("page_size must be greater than zero"));
405 }
406
407 let workbook = state.open_workbook(¶ms.workbook_id).await?;
408 let metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
409 let format = params.format.unwrap_or_default();
410
411 let start_row = params.start_row.max(1);
412 let page_size = params.page_size.min(500);
413 let include_formulas = params.include_formulas;
414 let include_styles = params.include_styles;
415 let columns = params.columns.clone();
416 let columns_by_header = params.columns_by_header.clone();
417 let include_header = params.include_header;
418
419 let page = workbook.with_sheet(¶ms.sheet_name, |sheet| {
420 build_page(
421 sheet,
422 start_row,
423 page_size,
424 columns.clone(),
425 columns_by_header.clone(),
426 include_formulas,
427 include_styles,
428 include_header,
429 )
430 })?;
431
432 let has_more = page.end_row < metrics.metrics.row_count;
433 let next_start_row = if has_more {
434 Some(page.end_row + 1)
435 } else {
436 None
437 };
438
439 let compact_payload = if matches!(format, SheetPageFormat::Compact) {
440 Some(build_compact_payload(
441 &page.header,
442 &page.rows,
443 include_header,
444 ))
445 } else {
446 None
447 };
448
449 let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
450 Some(build_values_only_payload(
451 &page.header,
452 &page.rows,
453 include_header,
454 ))
455 } else {
456 None
457 };
458
459 let response = SheetPageResponse {
460 workbook_id: workbook.id.clone(),
461 workbook_short_id: workbook.short_id.clone(),
462 sheet_name: params.sheet_name,
463 rows: if matches!(format, SheetPageFormat::Full) {
464 page.rows
465 } else {
466 Vec::new()
467 },
468 has_more,
469 next_start_row,
470 header_row: if include_header { page.header } else { None },
471 compact: compact_payload,
472 values_only: values_only_payload,
473 format,
474 };
475 Ok(response)
476}
477
478#[derive(Debug, Deserialize, JsonSchema)]
479pub struct SheetFormulaMapParams {
480 pub workbook_id: WorkbookId,
481 pub sheet_name: String,
482 pub range: Option<String>,
483 #[serde(default)]
484 pub expand: bool,
485}
486
487pub async fn sheet_formula_map(
488 state: Arc<AppState>,
489 params: SheetFormulaMapParams,
490) -> Result<SheetFormulaMapResponse> {
491 let workbook = state.open_workbook(¶ms.workbook_id).await?;
492 let graph = workbook.formula_graph(¶ms.sheet_name)?;
493 let mut groups = Vec::new();
494 let mut truncated = false;
495
496 for mut group in graph.groups() {
497 if let Some(range) = ¶ms.range {
498 group.addresses.retain(|addr| address_in_range(addr, range));
499 if group.addresses.is_empty() {
500 continue;
501 }
502 }
503 if !params.expand && group.addresses.len() > 15 {
504 group.addresses.truncate(15);
505 truncated = true;
506 }
507 groups.push(group);
508 }
509
510 let response = SheetFormulaMapResponse {
511 workbook_id: workbook.id.clone(),
512 workbook_short_id: workbook.short_id.clone(),
513 sheet_name: params.sheet_name.clone(),
514 groups,
515 truncated,
516 };
517 Ok(response)
518}
519
520#[derive(Debug, Deserialize, JsonSchema)]
521pub struct FormulaTraceParams {
522 pub workbook_id: WorkbookId,
523 pub sheet_name: String,
524 pub cell_address: String,
525 pub direction: TraceDirection,
526 pub depth: Option<u32>,
527 pub limit: Option<u32>,
528 #[serde(default)]
529 pub page_size: Option<usize>,
530 #[serde(default)]
531 pub cursor: Option<TraceCursor>,
532}
533
534pub async fn formula_trace(
535 state: Arc<AppState>,
536 params: FormulaTraceParams,
537) -> Result<FormulaTraceResponse> {
538 let workbook = state.open_workbook(¶ms.workbook_id).await?;
539 let graph = workbook.formula_graph(¶ms.sheet_name)?;
540 let formula_lookup = build_formula_lookup(&graph);
541 let depth = params.depth.unwrap_or(3).clamp(1, 5);
542 let page_size = params
543 .page_size
544 .or_else(|| params.limit.map(|v| v as usize))
545 .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
546 .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
547
548 let origin = params.cell_address.to_uppercase();
549 let config = TraceConfig {
550 direction: ¶ms.direction,
551 origin: &origin,
552 sheet_name: ¶ms.sheet_name,
553 depth_limit: depth,
554 page_size,
555 };
556 let (layers, next_cursor, notes) = build_trace_layers(
557 &workbook,
558 &graph,
559 &formula_lookup,
560 &config,
561 params.cursor.clone(),
562 )?;
563
564 let response = FormulaTraceResponse {
565 workbook_id: workbook.id.clone(),
566 workbook_short_id: workbook.short_id.clone(),
567 sheet_name: params.sheet_name.clone(),
568 origin,
569 direction: params.direction.clone(),
570 layers,
571 next_cursor,
572 notes,
573 };
574 Ok(response)
575}
576
577#[derive(Debug, Deserialize, JsonSchema)]
578pub struct NamedRangesParams {
579 pub workbook_id: WorkbookId,
580 pub sheet_name: Option<String>,
581 pub name_prefix: Option<String>,
582}
583
584pub async fn named_ranges(
585 state: Arc<AppState>,
586 params: NamedRangesParams,
587) -> Result<NamedRangesResponse> {
588 let workbook = state.open_workbook(¶ms.workbook_id).await?;
589 let mut items = workbook.named_items()?;
590
591 if let Some(sheet_filter) = ¶ms.sheet_name {
592 items.retain(|item| {
593 item.sheet_name
594 .as_ref()
595 .map(|name| name.eq_ignore_ascii_case(sheet_filter))
596 .unwrap_or(false)
597 });
598 }
599 if let Some(prefix) = ¶ms.name_prefix {
600 let prefix_lower = prefix.to_ascii_lowercase();
601 items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
602 }
603
604 let response = NamedRangesResponse {
605 workbook_id: workbook.id.clone(),
606 workbook_short_id: workbook.short_id.clone(),
607 items,
608 };
609 Ok(response)
610}
611
612struct PageBuildResult {
613 rows: Vec<RowSnapshot>,
614 header: Option<RowSnapshot>,
615 end_row: u32,
616}
617
618#[allow(clippy::too_many_arguments)]
619fn build_page(
620 sheet: &umya_spreadsheet::Worksheet,
621 start_row: u32,
622 page_size: u32,
623 columns: Option<Vec<String>>,
624 columns_by_header: Option<Vec<String>>,
625 include_formulas: bool,
626 include_styles: bool,
627 include_header: bool,
628) -> PageBuildResult {
629 let max_col = sheet.get_highest_column();
630 let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
631 let column_indices =
632 resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
633
634 let header = if include_header {
635 Some(build_row_snapshot(
636 sheet,
637 1,
638 &column_indices,
639 include_formulas,
640 include_styles,
641 ))
642 } else {
643 None
644 };
645
646 let mut rows = Vec::new();
647 for row_idx in start_row..=end_row {
648 rows.push(build_row_snapshot(
649 sheet,
650 row_idx,
651 &column_indices,
652 include_formulas,
653 include_styles,
654 ));
655 }
656
657 PageBuildResult {
658 rows,
659 header,
660 end_row,
661 }
662}
663
664fn build_row_snapshot(
665 sheet: &umya_spreadsheet::Worksheet,
666 row_index: u32,
667 columns: &[u32],
668 include_formulas: bool,
669 include_styles: bool,
670) -> RowSnapshot {
671 let mut cells = Vec::new();
672 for &col in columns {
673 if let Some(cell) = sheet.get_cell((col, row_index)) {
674 cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
675 } else {
676 let address = crate::utils::cell_address(col, row_index);
677 cells.push(CellSnapshot {
678 address,
679 value: None,
680 formula: None,
681 cached_value: None,
682 number_format: None,
683 style_tags: Vec::new(),
684 notes: Vec::new(),
685 });
686 }
687 }
688
689 RowSnapshot { row_index, cells }
690}
691
692fn build_cell_snapshot(
693 cell: &umya_spreadsheet::Cell,
694 include_formulas: bool,
695 include_styles: bool,
696) -> CellSnapshot {
697 let address = cell.get_coordinate().get_coordinate();
698 let value = crate::workbook::cell_to_value(cell);
699 let formula = if include_formulas && cell.is_formula() {
700 Some(cell.get_formula().to_string())
701 } else {
702 None
703 };
704 let cached_value = if cell.is_formula() {
705 value.clone()
706 } else {
707 None
708 };
709 let number_format = if include_styles {
710 cell.get_style()
711 .get_number_format()
712 .map(|fmt| fmt.get_format_code().to_string())
713 } else {
714 None
715 };
716 let style_tags = if include_styles {
717 crate::analysis::style::tag_cell(cell)
718 .map(|(_, tagging)| tagging.tags)
719 .unwrap_or_default()
720 } else {
721 Vec::new()
722 };
723
724 CellSnapshot {
725 address,
726 value,
727 formula,
728 cached_value,
729 number_format,
730 style_tags,
731 notes: Vec::new(),
732 }
733}
734
735fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
736 use std::collections::BTreeSet;
737 use umya_spreadsheet::helper::coordinate::column_index_from_string;
738
739 let mut indices = BTreeSet::new();
740 if let Some(specs) = columns {
741 for spec in specs {
742 if let Some((start, end)) = spec.split_once(':') {
743 let start_idx = column_index_from_string(start);
744 let end_idx = column_index_from_string(end);
745 let (min_idx, max_idx) = if start_idx <= end_idx {
746 (start_idx, end_idx)
747 } else {
748 (end_idx, start_idx)
749 };
750 for idx in min_idx..=max_idx {
751 indices.insert(idx);
752 }
753 } else {
754 indices.insert(column_index_from_string(spec));
755 }
756 }
757 } else {
758 for idx in 1..=max_column.max(1) {
759 indices.insert(idx);
760 }
761 }
762
763 indices.into_iter().collect()
764}
765
766fn resolve_columns_with_headers(
767 sheet: &umya_spreadsheet::Worksheet,
768 columns: Option<&Vec<String>>,
769 columns_by_header: Option<&Vec<String>>,
770 max_column: u32,
771) -> Vec<u32> {
772 if columns_by_header.is_none() {
773 return resolve_columns(columns, max_column);
774 }
775
776 let mut selected = Vec::new();
777 let header_targets: Vec<String> = columns_by_header
778 .unwrap()
779 .iter()
780 .map(|h| h.trim().to_ascii_lowercase())
781 .collect();
782
783 for col_idx in 1..=max_column.max(1) {
784 let header_cell = sheet.get_cell((col_idx, 1u32));
785 let header_value = header_cell
786 .and_then(cell_to_value)
787 .map(cell_value_to_string_lower);
788 if let Some(hval) = header_value
789 && header_targets.iter().any(|target| target == &hval)
790 {
791 selected.push(col_idx);
792 }
793 }
794
795 if selected.is_empty() {
796 resolve_columns(columns, max_column)
797 } else {
798 selected
799 }
800}
801
802fn cell_value_to_string_lower(value: CellValue) -> String {
803 match value {
804 CellValue::Text(s) => s.to_ascii_lowercase(),
805 CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
806 CellValue::Bool(b) => b.to_string(),
807 CellValue::Error(e) => e.to_ascii_lowercase(),
808 CellValue::Date(d) => d.to_ascii_lowercase(),
809 }
810}
811
812fn build_compact_payload(
813 header: &Option<RowSnapshot>,
814 rows: &[RowSnapshot],
815 include_header: bool,
816) -> SheetPageCompact {
817 let headers = derive_headers(header, rows);
818 let header_row = if include_header {
819 header
820 .as_ref()
821 .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
822 .unwrap_or_default()
823 } else {
824 Vec::new()
825 };
826 let data_rows = rows
827 .iter()
828 .map(|row| {
829 let mut vals: Vec<Option<CellValue>> = Vec::new();
830 vals.push(Some(CellValue::Number(row.row_index as f64)));
831 vals.extend(row.cells.iter().map(|c| c.value.clone()));
832 vals
833 })
834 .collect();
835
836 SheetPageCompact {
837 headers,
838 header_row,
839 rows: data_rows,
840 }
841}
842
843fn build_values_only_payload(
844 header: &Option<RowSnapshot>,
845 rows: &[RowSnapshot],
846 include_header: bool,
847) -> SheetPageValues {
848 let mut data = Vec::new();
849 if include_header && let Some(h) = header {
850 data.push(h.cells.iter().map(|c| c.value.clone()).collect());
851 }
852 for row in rows {
853 data.push(row.cells.iter().map(|c| c.value.clone()).collect());
854 }
855
856 SheetPageValues { rows: data }
857}
858
859fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
860 if let Some(h) = header {
861 let mut headers: Vec<String> = h
862 .cells
863 .iter()
864 .map(|c| match &c.value {
865 Some(CellValue::Text(t)) => t.clone(),
866 Some(CellValue::Number(n)) => n.to_string(),
867 Some(CellValue::Bool(b)) => b.to_string(),
868 Some(CellValue::Date(d)) => d.clone(),
869 Some(CellValue::Error(e)) => e.clone(),
870 None => c.address.clone(),
871 })
872 .collect();
873 headers.insert(0, "Row".to_string());
874 headers
875 } else if let Some(first) = rows.first() {
876 let mut headers = Vec::new();
877 headers.push("Row".to_string());
878 for cell in &first.cells {
879 headers.push(cell.address.clone());
880 }
881 headers
882 } else {
883 vec![]
884 }
885}
886fn default_stats_sample() -> usize {
887 500
888}
889
890#[derive(Debug, Deserialize, JsonSchema)]
891pub struct SheetStatisticsParams {
892 pub workbook_id: WorkbookId,
893 pub sheet_name: String,
894 #[serde(default)]
895 pub sample_rows: Option<usize>,
896}
897
898pub async fn sheet_statistics(
899 state: Arc<AppState>,
900 params: SheetStatisticsParams,
901) -> Result<SheetStatisticsResponse> {
902 let workbook = state.open_workbook(¶ms.workbook_id).await?;
903 let sheet_metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
904 let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
905 let stats = workbook.with_sheet(¶ms.sheet_name, |sheet| {
906 stats::compute_sheet_statistics(sheet, sample_rows)
907 })?;
908 let response = SheetStatisticsResponse {
909 workbook_id: workbook.id.clone(),
910 workbook_short_id: workbook.short_id.clone(),
911 sheet_name: params.sheet_name,
912 row_count: sheet_metrics.metrics.row_count,
913 column_count: sheet_metrics.metrics.column_count,
914 density: stats.density,
915 numeric_columns: stats.numeric_columns,
916 text_columns: stats.text_columns,
917 null_counts: stats.null_counts,
918 duplicate_warnings: stats.duplicate_warnings,
919 };
920 Ok(response)
921}
922
923fn address_in_range(address: &str, range: &str) -> bool {
924 parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
925 if let Some((col, row)) = parse_address(address) {
926 col >= start_col && col <= end_col && row >= start_row && row <= end_row
927 } else {
928 false
929 }
930 })
931}
932
933fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
934 let mut parts = range.split(':');
935 let start = parts.next()?;
936 let end = parts.next().unwrap_or(start);
937 let start_idx = parse_address(start)?;
938 let end_idx = parse_address(end)?;
939 Some((
940 (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
941 (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
942 ))
943}
944
945fn parse_address(address: &str) -> Option<(u32, u32)> {
946 use umya_spreadsheet::helper::coordinate::index_from_coordinate;
947 let (col, row, _, _) = index_from_coordinate(address);
948 match (col, row) {
949 (Some(c), Some(r)) => Some((c, r)),
950 _ => None,
951 }
952}
953
954#[derive(Clone)]
955struct TableTarget {
956 sheet_name: String,
957 table_name: Option<String>,
958 range: ((u32, u32), (u32, u32)),
959 header_hint: Option<u32>,
960}
961
962fn resolve_table_target(
963 workbook: &WorkbookContext,
964 params: &ReadTableParams,
965) -> Result<TableTarget> {
966 if let Some(region_id) = params.region_id
967 && let Some(sheet) = ¶ms.sheet_name
968 && let Ok(region) = workbook.detected_region(sheet, region_id)
969 {
970 return Ok(TableTarget {
971 sheet_name: sheet.clone(),
972 table_name: None,
973 range: parse_range(®ion.bounds).unwrap_or(((1, 1), (1, 1))),
974 header_hint: region.header_row,
975 });
976 }
977
978 if let Some(table_name) = ¶ms.table_name {
979 let items = workbook.named_items()?;
980 for item in items {
981 if item.name.eq_ignore_ascii_case(table_name)
982 || item
983 .name
984 .to_ascii_lowercase()
985 .contains(&table_name.to_ascii_lowercase())
986 {
987 let mut sheet_name = item
988 .sheet_name
989 .clone()
990 .or_else(|| params.sheet_name.clone())
991 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
992 let refers_to = item.refers_to.trim_start_matches('=');
993 let mut range_part = refers_to;
994 if let Some((sheet_part, rest)) = refers_to.split_once('!') {
995 sheet_name = sheet_part.trim_matches('\'').to_string();
996 range_part = rest;
997 }
998 if let Some(range) = parse_range(range_part) {
999 return Ok(TableTarget {
1000 sheet_name,
1001 table_name: Some(item.name.clone()),
1002 range,
1003 header_hint: if item.kind == NamedItemKind::Table {
1004 Some(range.0.1)
1005 } else {
1006 None
1007 },
1008 });
1009 }
1010 }
1011 }
1012 }
1013
1014 let sheet_name = params
1015 .sheet_name
1016 .clone()
1017 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1018
1019 if let Some(rng) = ¶ms.range
1020 && let Some(range) = parse_range(rng)
1021 {
1022 return Ok(TableTarget {
1023 sheet_name,
1024 table_name: None,
1025 range,
1026 header_hint: None,
1027 });
1028 }
1029
1030 let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1031 let end_col = metrics.metrics.column_count.max(1);
1032 let end_row = metrics.metrics.row_count.max(1);
1033 Ok(TableTarget {
1034 sheet_name,
1035 table_name: None,
1036 range: ((1, 1), (end_col, end_row)),
1037 header_hint: None,
1038 })
1039}
1040
1041#[allow(clippy::too_many_arguments)]
1042fn extract_table_rows(
1043 sheet: &umya_spreadsheet::Worksheet,
1044 target: &TableTarget,
1045 header_row: Option<u32>,
1046 header_rows: Option<u32>,
1047 columns: Option<Vec<String>>,
1048 filters: Option<Vec<TableFilter>>,
1049 limit: usize,
1050 offset: usize,
1051 sample_mode: &str,
1052) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1053 let ((start_col, start_row), (end_col, end_row)) = target.range;
1054 let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1055 if header_start < start_row {
1056 header_start = start_row;
1057 }
1058 if header_start > end_row {
1059 header_start = start_row;
1060 }
1061 let header_rows_count = header_rows.unwrap_or(1).max(1);
1062 let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1063 let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1064 resolve_columns(Some(cols), end_col).into_iter().collect()
1065 } else {
1066 (start_col..=end_col).collect()
1067 };
1068
1069 let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1070 let mut all_rows: Vec<TableRow> = Vec::new();
1071 let mut total_rows: u32 = 0;
1072
1073 for row_idx in data_start_row..=end_row {
1074 let mut row = BTreeMap::new();
1075 for (i, col_idx) in column_indices.iter().enumerate() {
1076 let header = headers
1077 .get(i)
1078 .cloned()
1079 .unwrap_or_else(|| format!("Col{col_idx}"));
1080 let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1081 row.insert(header, value);
1082 }
1083 if !row_passes_filters(&row, filters.as_ref()) {
1084 continue;
1085 }
1086 total_rows += 1;
1087 if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1088 continue;
1089 }
1090 all_rows.push(row);
1091 }
1092
1093 let rows = sample_rows(all_rows, limit, offset, sample_mode);
1094
1095 Ok((headers, rows, total_rows))
1096}
1097
1098fn build_headers(
1099 sheet: &umya_spreadsheet::Worksheet,
1100 columns: &[u32],
1101 header_start: u32,
1102 header_rows: u32,
1103) -> Vec<String> {
1104 let mut headers = Vec::new();
1105 for col_idx in columns {
1106 let mut parts = Vec::new();
1107 for h in header_start..(header_start + header_rows) {
1108 let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1109 if let Some(value) = sheet
1110 .get_cell((origin_col, origin_row))
1111 .and_then(cell_to_value)
1112 {
1113 match value {
1114 CellValue::Text(ref s) if s.trim().is_empty() => {}
1115 CellValue::Text(s) => parts.push(s),
1116 CellValue::Number(n) => parts.push(n.to_string()),
1117 CellValue::Bool(b) => parts.push(b.to_string()),
1118 CellValue::Error(e) => parts.push(e),
1119 CellValue::Date(d) => parts.push(d),
1120 }
1121 }
1122 }
1123 if parts.is_empty() {
1124 headers.push(crate::utils::column_number_to_name(*col_idx));
1125 } else {
1126 headers.push(parts.join(" / "));
1127 }
1128 }
1129
1130 if headers.iter().all(|h| h.trim().is_empty()) {
1131 return columns
1132 .iter()
1133 .map(|c| crate::utils::column_number_to_name(*c))
1134 .collect();
1135 }
1136
1137 dedupe_headers(headers)
1138}
1139
1140fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1141 let mut seen: HashMap<String, u32> = HashMap::new();
1142 for h in headers.iter_mut() {
1143 let key = h.clone();
1144 if key.trim().is_empty() {
1145 continue;
1146 }
1147 let count = seen.entry(key.clone()).or_insert(0);
1148 if *count > 0 {
1149 h.push_str(&format!("_{}", *count + 1));
1150 }
1151 *count += 1;
1152 }
1153 headers
1154}
1155
1156fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1157 if let Some(filters) = filters {
1158 for filter in filters {
1159 if let Some(value) = row.get(&filter.column) {
1160 match filter.op.as_str() {
1161 "eq" => {
1162 if !value_eq(value, &filter.value) {
1163 return false;
1164 }
1165 }
1166 "neq" => {
1167 if value_eq(value, &filter.value) {
1168 return false;
1169 }
1170 }
1171 "contains" => {
1172 if !value_contains(value, &filter.value) {
1173 return false;
1174 }
1175 }
1176 "gt" => {
1177 if !value_gt(value, &filter.value) {
1178 return false;
1179 }
1180 }
1181 "lt" => {
1182 if !value_lt(value, &filter.value) {
1183 return false;
1184 }
1185 }
1186 "in" => {
1187 let list = filter
1188 .value
1189 .as_array()
1190 .cloned()
1191 .unwrap_or_else(|| vec![filter.value.clone()]);
1192 if !list.iter().any(|cmp| value_eq(value, cmp)) {
1193 return false;
1194 }
1195 }
1196 _ => {}
1197 }
1198 }
1199 }
1200 }
1201 true
1202}
1203
1204fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1205 match (cell, cmp) {
1206 (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1207 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1208 v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1209 }
1210 (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1211 .parse::<f64>()
1212 .map(|f| (*n - f).abs() < f64::EPSILON)
1213 .unwrap_or(false),
1214 (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1215 (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1216 t.eq_ignore_ascii_case("true") == *b
1217 }
1218 (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1219 (None, serde_json::Value::Null) => true,
1220 _ => false,
1221 }
1222}
1223
1224fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1225 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1226 return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1227 }
1228 false
1229}
1230
1231fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1232 match (cell, cmp) {
1233 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1234 v.as_f64().is_some_and(|f| *n > f)
1235 }
1236 _ => false,
1237 }
1238}
1239
1240fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1241 match (cell, cmp) {
1242 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1243 v.as_f64().is_some_and(|f| *n < f)
1244 }
1245 _ => false,
1246 }
1247}
1248
1249fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1250 if rows.is_empty() {
1251 return rows;
1252 }
1253
1254 match mode {
1255 "distributed" => {
1256 if limit == 0 {
1257 return Vec::new();
1258 }
1259 let mut indices = Vec::new();
1260 let span = rows.len().saturating_sub(1);
1261 let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1262 let mut idx = offset;
1263 while idx < rows.len() && indices.len() < limit {
1264 indices.push(idx);
1265 idx = idx.saturating_add(step);
1266 if idx == indices.last().copied().unwrap_or(0) {
1267 idx += 1;
1268 }
1269 }
1270 if indices.len() < limit {
1271 let last_idx = rows.len().saturating_sub(1);
1272 if !indices.contains(&last_idx) {
1273 indices.push(last_idx);
1274 }
1275 }
1276 indices
1277 .into_iter()
1278 .filter_map(|i| rows.get(i).cloned())
1279 .collect()
1280 }
1281 "last" => {
1282 let start = rows.len().saturating_sub(limit + offset);
1283 rows.into_iter().skip(start + offset).take(limit).collect()
1284 }
1285 _ => rows.into_iter().skip(offset).take(limit).collect(),
1286 }
1287}
1288
1289fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1290 let mut summaries = Vec::new();
1291 for header in headers {
1292 let mut nulls = 0u32;
1293 let mut distinct_set: HashSet<String> = HashSet::new();
1294 let mut values: Vec<f64> = Vec::new();
1295 let mut top_counts: HashMap<String, u32> = HashMap::new();
1296
1297 for row in rows {
1298 match row.get(header) {
1299 Some(Some(CellValue::Number(n))) => {
1300 values.push(*n);
1301 let key = n.to_string();
1302 *top_counts.entry(key).or_default() += 1;
1303 }
1304 Some(Some(CellValue::Text(s))) => {
1305 distinct_set.insert(s.clone());
1306 *top_counts.entry(s.clone()).or_default() += 1;
1307 }
1308 Some(Some(CellValue::Bool(b))) => {
1309 let key = b.to_string();
1310 distinct_set.insert(key.clone());
1311 *top_counts.entry(key).or_default() += 1;
1312 }
1313 Some(Some(CellValue::Date(d))) => {
1314 distinct_set.insert(d.clone());
1315 *top_counts.entry(d.clone()).or_default() += 1;
1316 }
1317 Some(Some(CellValue::Error(e))) => {
1318 distinct_set.insert(e.clone());
1319 *top_counts.entry(e.clone()).or_default() += 1;
1320 }
1321 _ => {
1322 nulls += 1;
1323 }
1324 }
1325 }
1326
1327 let inferred_type = if !values.is_empty() {
1328 "number"
1329 } else if !distinct_set.is_empty() {
1330 "text"
1331 } else {
1332 "unknown"
1333 }
1334 .to_string();
1335
1336 let min = values.iter().cloned().reduce(f64::min);
1337 let max = values.iter().cloned().reduce(f64::max);
1338 let mean = if values.is_empty() {
1339 None
1340 } else {
1341 Some(values.iter().sum::<f64>() / values.len() as f64)
1342 };
1343
1344 let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1345 top_values.sort_by(|a, b| b.1.cmp(&a.1));
1346 let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1347
1348 summaries.push(ColumnTypeSummary {
1349 name: header.clone(),
1350 inferred_type,
1351 nulls,
1352 distinct: distinct_set.len() as u32,
1353 top_values,
1354 min,
1355 max,
1356 mean,
1357 });
1358 }
1359 summaries
1360}
1361
1362#[allow(clippy::too_many_arguments)]
1363fn collect_value_matches(
1364 sheet: &umya_spreadsheet::Worksheet,
1365 sheet_name: &str,
1366 mode: &FindMode,
1367 match_mode: &str,
1368 direction: &LabelDirection,
1369 params: &FindValueParams,
1370 region: Option<&DetectedRegion>,
1371 default_bounds: ((u32, u32), (u32, u32)),
1372) -> Result<Vec<FindValueMatch>> {
1373 let mut results = Vec::new();
1374 let regex = if match_mode == "regex" {
1375 Regex::new(¶ms.query).ok()
1376 } else {
1377 None
1378 };
1379 let bounds = region
1380 .as_ref()
1381 .and_then(|r| parse_range(&r.bounds))
1382 .unwrap_or(default_bounds);
1383
1384 let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1385
1386 for cell in sheet.get_cell_collection() {
1387 let coord = cell.get_coordinate();
1388 let col = *coord.get_col_num();
1389 let row = *coord.get_row_num();
1390 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1391 continue;
1392 }
1393 if params.search_headers_only && row != header_row {
1394 continue;
1395 }
1396
1397 let value = cell_to_value(cell);
1398 if let Some(ref allowed) = params.value_types
1399 && !value_type_matches(&value, allowed)
1400 {
1401 continue;
1402 }
1403 if matches!(mode, FindMode::Value) {
1404 if !value_matches(
1405 &value,
1406 ¶ms.query,
1407 match_mode,
1408 params.case_sensitive,
1409 ®ex,
1410 ) {
1411 continue;
1412 }
1413 } else if let Some(label) = ¶ms.label {
1414 if !label_matches(cell, label, match_mode, params.case_sensitive, ®ex) {
1415 continue;
1416 }
1417 } else {
1418 continue;
1419 }
1420
1421 let neighbors = collect_neighbors(sheet, row, col);
1422 let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1423 let target_value = match direction {
1424 LabelDirection::Right => sheet.get_cell((col + 1, row)),
1425 LabelDirection::Below => sheet.get_cell((col, row + 1)),
1426 LabelDirection::Any => sheet
1427 .get_cell((col + 1, row))
1428 .or_else(|| sheet.get_cell((col, row + 1))),
1429 }
1430 .and_then(cell_to_value);
1431 if target_value.is_none() {
1432 continue;
1433 }
1434 (
1435 Some(LabelHit {
1436 label_address: coord.get_coordinate(),
1437 label: label_from_cell(cell),
1438 }),
1439 target_value,
1440 )
1441 } else {
1442 (None, value.clone())
1443 };
1444
1445 let row_context = build_row_context(sheet, row, col);
1446
1447 results.push(FindValueMatch {
1448 address: coord.get_coordinate(),
1449 sheet_name: sheet_name.to_string(),
1450 value: match_value,
1451 row_context,
1452 neighbors,
1453 label_hit,
1454 });
1455 }
1456
1457 Ok(results)
1458}
1459
1460fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1461 cell_to_value(cell)
1462 .map(|v| match v {
1463 CellValue::Text(s) => s,
1464 CellValue::Number(n) => n.to_string(),
1465 CellValue::Bool(b) => b.to_string(),
1466 CellValue::Date(d) => d,
1467 CellValue::Error(e) => e,
1468 })
1469 .unwrap_or_else(|| cell.get_value().to_string())
1470}
1471
1472fn value_matches(
1473 value: &Option<CellValue>,
1474 query: &str,
1475 mode: &str,
1476 case_sensitive: bool,
1477 regex: &Option<Regex>,
1478) -> bool {
1479 if value.is_none() {
1480 return false;
1481 }
1482 let haystack = cell_value_to_string_lower(value.clone().unwrap());
1483 let needle = if case_sensitive {
1484 query.to_string()
1485 } else {
1486 query.to_ascii_lowercase()
1487 };
1488
1489 match mode {
1490 "exact" => haystack == needle,
1491 "regex" => regex
1492 .as_ref()
1493 .map(|re| re.is_match(&haystack))
1494 .unwrap_or(false),
1495 _ => haystack.contains(&needle),
1496 }
1497}
1498
1499fn label_matches(
1500 cell: &umya_spreadsheet::Cell,
1501 label: &str,
1502 mode: &str,
1503 case_sensitive: bool,
1504 regex: &Option<Regex>,
1505) -> bool {
1506 let value = cell_to_value(cell);
1507 if value.is_none() {
1508 return false;
1509 }
1510 let haystack = cell_value_to_string_lower(value.unwrap());
1511 let needle = if case_sensitive {
1512 label.to_string()
1513 } else {
1514 label.to_ascii_lowercase()
1515 };
1516 match mode {
1517 "exact" => haystack == needle,
1518 "regex" => regex
1519 .as_ref()
1520 .map(|re| re.is_match(&haystack))
1521 .unwrap_or(false),
1522 _ => haystack.contains(&needle),
1523 }
1524}
1525
1526fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1527 if value.is_none() {
1528 return allowed.iter().any(|v| v == "null");
1529 }
1530 match value.as_ref().unwrap() {
1531 CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1532 CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1533 CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1534 CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1535 CellValue::Error(_) => true,
1536 }
1537}
1538
1539fn collect_neighbors(
1540 sheet: &umya_spreadsheet::Worksheet,
1541 row: u32,
1542 col: u32,
1543) -> Option<NeighborValues> {
1544 Some(NeighborValues {
1545 left: if col > 1 {
1546 sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1547 } else {
1548 None
1549 },
1550 right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1551 up: if row > 1 {
1552 sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1553 } else {
1554 None
1555 },
1556 down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1557 })
1558}
1559
1560fn build_row_context(
1561 sheet: &umya_spreadsheet::Worksheet,
1562 row: u32,
1563 col: u32,
1564) -> Option<RowContext> {
1565 let header_value = sheet
1566 .get_cell((col, 1u32))
1567 .and_then(cell_to_value)
1568 .map(|v| match v {
1569 CellValue::Text(s) => s,
1570 CellValue::Number(n) => n.to_string(),
1571 CellValue::Bool(b) => b.to_string(),
1572 CellValue::Date(d) => d,
1573 CellValue::Error(e) => e,
1574 })
1575 .unwrap_or_else(|| format!("Col{}", col));
1576 let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1577 Some(RowContext {
1578 headers: vec![header_value],
1579 values: vec![value],
1580 })
1581}
1582
1583#[derive(Debug, Deserialize, JsonSchema)]
1584pub struct FindFormulaParams {
1585 pub workbook_id: WorkbookId,
1586 pub query: String,
1587 pub sheet_name: Option<String>,
1588 #[serde(default)]
1589 pub case_sensitive: bool,
1590}
1591
1592pub async fn find_formula(
1593 state: Arc<AppState>,
1594 params: FindFormulaParams,
1595) -> Result<FindFormulaResponse> {
1596 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1597 let query = if params.case_sensitive {
1598 params.query.clone()
1599 } else {
1600 params.query.to_ascii_lowercase()
1601 };
1602 let mut matches = Vec::new();
1603
1604 let sheet_names: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1605 vec![sheet.clone()]
1606 } else {
1607 workbook.sheet_names()
1608 };
1609
1610 for sheet_name in sheet_names {
1611 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1612 collect_formula_matches(sheet, &sheet_name, &query, params.case_sensitive)
1613 })?;
1614 matches.extend(sheet_matches);
1615 }
1616
1617 let response = FindFormulaResponse {
1618 workbook_id: workbook.id.clone(),
1619 workbook_short_id: workbook.short_id.clone(),
1620 matches,
1621 };
1622 Ok(response)
1623}
1624
1625#[derive(Debug, Deserialize, JsonSchema)]
1626pub struct ScanVolatilesParams {
1627 pub workbook_id: WorkbookId,
1628 pub sheet_name: Option<String>,
1629}
1630
1631pub async fn scan_volatiles(
1632 state: Arc<AppState>,
1633 params: ScanVolatilesParams,
1634) -> Result<VolatileScanResponse> {
1635 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1636 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1637 vec![sheet.clone()]
1638 } else {
1639 workbook.sheet_names()
1640 };
1641
1642 let mut items = Vec::new();
1643 let mut truncated = false;
1644
1645 for sheet_name in target_sheets {
1646 let graph = workbook.formula_graph(&sheet_name)?;
1647 for group in graph.groups() {
1648 if !group.is_volatile {
1649 continue;
1650 }
1651 for address in group.addresses.iter().take(50) {
1652 items.push(VolatileScanEntry {
1653 address: address.clone(),
1654 sheet_name: sheet_name.clone(),
1655 function: "volatile".to_string(),
1656 note: Some(group.formula.clone()),
1657 });
1658 }
1659 if group.addresses.len() > 50 {
1660 truncated = true;
1661 }
1662 }
1663 }
1664
1665 let response = VolatileScanResponse {
1666 workbook_id: workbook.id.clone(),
1667 workbook_short_id: workbook.short_id.clone(),
1668 items,
1669 truncated,
1670 };
1671 Ok(response)
1672}
1673
1674#[derive(Debug, Deserialize, JsonSchema)]
1675pub struct SheetStylesParams {
1676 pub workbook_id: WorkbookId,
1677 pub sheet_name: String,
1678}
1679
1680pub async fn sheet_styles(
1681 state: Arc<AppState>,
1682 params: SheetStylesParams,
1683) -> Result<SheetStylesResponse> {
1684 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1685 let entry = workbook.get_sheet_metrics(¶ms.sheet_name)?;
1686
1687 let styles = entry
1688 .metrics
1689 .style_map
1690 .iter()
1691 .map(|(style_id, usage)| StyleSummary {
1692 style_id: style_id.clone(),
1693 occurrences: usage.occurrences,
1694 tags: usage.tags.clone(),
1695 example_cells: usage.example_cells.clone(),
1696 })
1697 .collect();
1698
1699 let response = SheetStylesResponse {
1700 workbook_id: workbook.id.clone(),
1701 workbook_short_id: workbook.short_id.clone(),
1702 sheet_name: params.sheet_name.clone(),
1703 styles,
1704 conditional_rules: Vec::new(),
1705 };
1706 Ok(response)
1707}
1708
1709pub async fn range_values(
1710 state: Arc<AppState>,
1711 params: RangeValuesParams,
1712) -> Result<RangeValuesResponse> {
1713 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1714 let include_headers = params.include_headers.unwrap_or(false);
1715 let values = workbook.with_sheet(¶ms.sheet_name, |sheet| {
1716 params
1717 .ranges
1718 .iter()
1719 .filter_map(|range| {
1720 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
1721 let mut rows = Vec::new();
1722 for r in start_row..=end_row {
1723 let mut row_vals = Vec::new();
1724 for c in start_col..=end_col {
1725 if include_headers && r == start_row && start_row == 1 {
1726 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
1727 } else {
1728 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
1729 }
1730 }
1731 rows.push(row_vals);
1732 }
1733 RangeValuesEntry {
1734 range: range.clone(),
1735 rows,
1736 }
1737 })
1738 })
1739 .collect()
1740 })?;
1741
1742 Ok(RangeValuesResponse {
1743 workbook_id: workbook.id.clone(),
1744 workbook_short_id: workbook.short_id.clone(),
1745 sheet_name: params.sheet_name,
1746 values,
1747 })
1748}
1749
1750pub async fn find_value(
1751 state: Arc<AppState>,
1752 params: FindValueParams,
1753) -> Result<FindValueResponse> {
1754 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1755 let mut matches = Vec::new();
1756 let mut truncated = false;
1757 let mode = params.mode.clone().unwrap_or_else(|| {
1758 if params.label.is_some() {
1759 FindMode::Label
1760 } else {
1761 FindMode::Value
1762 }
1763 });
1764 let match_mode = params
1765 .match_mode
1766 .as_deref()
1767 .unwrap_or("contains")
1768 .to_ascii_lowercase();
1769 let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
1770
1771 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1772 vec![sheet.clone()]
1773 } else {
1774 workbook.sheet_names()
1775 };
1776
1777 for sheet_name in target_sheets {
1778 let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
1779 let default_bounds = (
1780 (1, 1),
1781 (
1782 metrics_entry.metrics.column_count.max(1),
1783 metrics_entry.metrics.row_count.max(1),
1784 ),
1785 );
1786 let region_bounds = params
1787 .region_id
1788 .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
1789 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1790 collect_value_matches(
1791 sheet,
1792 &sheet_name,
1793 &mode,
1794 &match_mode,
1795 &direction,
1796 ¶ms,
1797 region_bounds.as_ref(),
1798 default_bounds,
1799 )
1800 })??;
1801 matches.extend(sheet_matches);
1802 if matches.len() as u32 >= params.limit {
1803 truncated = true;
1804 matches.truncate(params.limit as usize);
1805 break;
1806 }
1807 }
1808
1809 Ok(FindValueResponse {
1810 workbook_id: workbook.id.clone(),
1811 workbook_short_id: workbook.short_id.clone(),
1812 matches,
1813 truncated,
1814 })
1815}
1816
1817pub async fn read_table(
1818 state: Arc<AppState>,
1819 params: ReadTableParams,
1820) -> Result<ReadTableResponse> {
1821 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1822 let resolved = resolve_table_target(&workbook, ¶ms)?;
1823 let limit = params.limit.unwrap_or(100) as usize;
1824 let offset = params.offset.unwrap_or(0) as usize;
1825 let sample_mode = params
1826 .sample_mode
1827 .clone()
1828 .unwrap_or_else(|| "first".to_string());
1829
1830 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1831 extract_table_rows(
1832 sheet,
1833 &resolved,
1834 params.header_row,
1835 params.header_rows,
1836 params.columns.clone(),
1837 params.filters.clone(),
1838 limit,
1839 offset,
1840 &sample_mode,
1841 )
1842 })??;
1843
1844 let has_more = offset + rows.len() < total_rows as usize;
1845
1846 Ok(ReadTableResponse {
1847 workbook_id: workbook.id.clone(),
1848 workbook_short_id: workbook.short_id.clone(),
1849 sheet_name: resolved.sheet_name,
1850 table_name: resolved.table_name,
1851 headers,
1852 rows,
1853 total_rows,
1854 has_more,
1855 })
1856}
1857
1858pub async fn table_profile(
1859 state: Arc<AppState>,
1860 params: TableProfileParams,
1861) -> Result<TableProfileResponse> {
1862 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1863 let resolved = resolve_table_target(
1864 &workbook,
1865 &ReadTableParams {
1866 workbook_id: params.workbook_id.clone(),
1867 sheet_name: params.sheet_name.clone(),
1868 table_name: params.table_name.clone(),
1869 region_id: params.region_id,
1870 range: None,
1871 header_row: None,
1872 header_rows: None,
1873 columns: None,
1874 filters: None,
1875 sample_mode: params.sample_mode.clone(),
1876 limit: params.sample_size,
1877 offset: Some(0),
1878 },
1879 )?;
1880
1881 let sample_size = params.sample_size.unwrap_or(10) as usize;
1882 let sample_mode = params
1883 .sample_mode
1884 .clone()
1885 .unwrap_or_else(|| "distributed".to_string());
1886
1887 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1888 extract_table_rows(
1889 sheet,
1890 &resolved,
1891 None,
1892 None,
1893 None,
1894 None,
1895 sample_size,
1896 0,
1897 &sample_mode,
1898 )
1899 })??;
1900
1901 let column_types = summarize_columns(&headers, &rows);
1902
1903 Ok(TableProfileResponse {
1904 workbook_id: workbook.id.clone(),
1905 workbook_short_id: workbook.short_id.clone(),
1906 sheet_name: resolved.sheet_name,
1907 table_name: resolved.table_name,
1908 headers,
1909 column_types,
1910 row_count: total_rows,
1911 samples: rows,
1912 notes: Vec::new(),
1913 })
1914}
1915
1916#[derive(Debug, Deserialize, JsonSchema)]
1917pub struct ManifestStubParams {
1918 pub workbook_id: WorkbookId,
1919 pub sheet_filter: Option<String>,
1920}
1921
1922pub async fn get_manifest_stub(
1923 state: Arc<AppState>,
1924 params: ManifestStubParams,
1925) -> Result<ManifestStubResponse> {
1926 let workbook = state.open_workbook(¶ms.workbook_id).await?;
1927 let mut summaries = workbook.list_summaries()?;
1928
1929 if let Some(filter) = ¶ms.sheet_filter {
1930 summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
1931 }
1932
1933 let sheets = summaries
1934 .into_iter()
1935 .map(|summary| ManifestSheetStub {
1936 sheet_name: summary.name.clone(),
1937 classification: summary.classification.clone(),
1938 candidate_expectations: vec![format!(
1939 "Review {} sheet for expectation candidates",
1940 format!("{:?}", summary.classification).to_ascii_lowercase()
1941 )],
1942 notes: summary.style_tags,
1943 })
1944 .collect();
1945
1946 let response = ManifestStubResponse {
1947 workbook_id: workbook.id.clone(),
1948 workbook_short_id: workbook.short_id.clone(),
1949 slug: workbook.slug.clone(),
1950 sheets,
1951 };
1952 Ok(response)
1953}
1954
1955#[derive(Debug, Deserialize, JsonSchema)]
1956pub struct CloseWorkbookParams {
1957 pub workbook_id: WorkbookId,
1958}
1959
1960pub async fn close_workbook(
1961 state: Arc<AppState>,
1962 params: CloseWorkbookParams,
1963) -> Result<CloseWorkbookResponse> {
1964 state.close_workbook(¶ms.workbook_id)?;
1965 Ok(CloseWorkbookResponse {
1966 workbook_id: params.workbook_id.clone(),
1967 message: format!("workbook {} evicted", params.workbook_id.as_str()),
1968 })
1969}
1970fn collect_formula_matches(
1971 sheet: &umya_spreadsheet::Worksheet,
1972 sheet_name: &str,
1973 query: &str,
1974 case_sensitive: bool,
1975) -> Vec<FindFormulaMatch> {
1976 use crate::workbook::cell_to_value;
1977
1978 let mut results = Vec::new();
1979 for cell in sheet.get_cell_collection() {
1980 if !cell.is_formula() {
1981 continue;
1982 }
1983 let formula = cell.get_formula();
1984 let haystack = if case_sensitive {
1985 formula.to_string()
1986 } else {
1987 formula.to_ascii_lowercase()
1988 };
1989 if !haystack.contains(query) {
1990 continue;
1991 }
1992 let coord = cell.get_coordinate();
1993 let column = *coord.get_col_num();
1994 let row = *coord.get_row_num();
1995 let columns = vec![column];
1996 let context_row = build_row_snapshot(sheet, row, &columns, true, false);
1997 let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
1998
1999 results.push(FindFormulaMatch {
2000 address: coord.get_coordinate(),
2001 sheet_name: sheet_name.to_string(),
2002 formula: formula.to_string(),
2003 cached_value: if cell.is_formula() {
2004 cell_to_value(cell)
2005 } else {
2006 None
2007 },
2008 context: vec![header_row, context_row],
2009 });
2010 }
2011 results
2012}
2013
2014#[derive(Clone)]
2015struct TraceFormulaInfo {
2016 fingerprint: String,
2017 formula: String,
2018}
2019
2020#[derive(Clone)]
2021struct TraceEdgeRaw {
2022 from: String,
2023 to: String,
2024 neighbor: String,
2025}
2026
2027#[derive(Clone)]
2028struct LayerLinks {
2029 depth: u32,
2030 edges: Vec<TraceEdgeRaw>,
2031}
2032
2033#[derive(Clone)]
2034struct NeighborDetail {
2035 address: String,
2036 column: Option<u32>,
2037 row: Option<u32>,
2038 kind: TraceCellKind,
2039 value: Option<CellValue>,
2040 formula: Option<String>,
2041 fingerprint: Option<String>,
2042 external: bool,
2043}
2044
2045fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2046 let mut map = HashMap::new();
2047 for group in graph.groups() {
2048 for address in group.addresses.clone() {
2049 map.insert(
2050 address.to_ascii_uppercase(),
2051 TraceFormulaInfo {
2052 fingerprint: group.fingerprint.clone(),
2053 formula: group.formula.clone(),
2054 },
2055 );
2056 }
2057 }
2058 map
2059}
2060
2061struct TraceConfig<'a> {
2062 direction: &'a TraceDirection,
2063 origin: &'a str,
2064 sheet_name: &'a str,
2065 depth_limit: u32,
2066 page_size: usize,
2067}
2068
2069fn build_trace_layers(
2070 workbook: &WorkbookContext,
2071 graph: &FormulaGraph,
2072 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2073 config: &TraceConfig<'_>,
2074 cursor: Option<TraceCursor>,
2075) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2076 let layer_links =
2077 collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2078 let mut layers = Vec::new();
2079 let mut next_cursor = None;
2080 let mut notes = Vec::new();
2081 let focus_depth = cursor.as_ref().map(|c| c.depth);
2082
2083 for layer in layer_links {
2084 let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2085 let offset = cursor
2086 .as_ref()
2087 .filter(|c| c.depth == layer.depth)
2088 .map(|c| c.offset)
2089 .unwrap_or(0);
2090
2091 let mut node_set: HashSet<String> = HashSet::new();
2092 for edge in &layer.edges {
2093 node_set.insert(edge.neighbor.clone());
2094 }
2095 let mut nodes: Vec<String> = node_set.into_iter().collect();
2096 nodes.sort_by(|a, b| compare_addresses(a, b));
2097
2098 let details = workbook.with_sheet(config.sheet_name, |sheet| {
2099 collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2100 })?;
2101 let total_nodes = details.len();
2102 let start = offset.min(total_nodes);
2103 let end = if produce_edges {
2104 (start + config.page_size).min(total_nodes)
2105 } else {
2106 start
2107 };
2108 let selected_slice = if produce_edges {
2109 &details[start..end]
2110 } else {
2111 &details[0..0]
2112 };
2113 let selected_addresses: HashSet<String> = selected_slice
2114 .iter()
2115 .map(|detail| detail.address.clone())
2116 .collect();
2117
2118 let summary = build_layer_summary(&details);
2119 let range_highlights = build_range_highlights(&details);
2120 let group_highlights = build_formula_group_highlights(&details);
2121 let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2122
2123 let highlights = TraceLayerHighlights {
2124 top_ranges: range_highlights.clone(),
2125 top_formula_groups: group_highlights.clone(),
2126 notable_cells,
2127 };
2128
2129 let edges = if produce_edges {
2130 build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2131 } else {
2132 Vec::new()
2133 };
2134
2135 let has_more = produce_edges && end < total_nodes;
2136 if has_more && next_cursor.is_none() {
2137 next_cursor = Some(TraceCursor {
2138 depth: layer.depth,
2139 offset: end,
2140 });
2141 }
2142 if has_more {
2143 notes.push(format!(
2144 "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2145 layer.depth, end, total_nodes, layer.depth, end
2146 ));
2147 }
2148
2149 layers.push(TraceLayer {
2150 depth: layer.depth,
2151 summary,
2152 highlights,
2153 edges,
2154 has_more,
2155 });
2156 }
2157
2158 Ok((layers, next_cursor, notes))
2159}
2160
2161fn collect_layer_links(
2162 graph: &FormulaGraph,
2163 direction: &TraceDirection,
2164 origin: &str,
2165 depth_limit: u32,
2166) -> Vec<LayerLinks> {
2167 let mut visited: HashSet<String> = HashSet::new();
2168 visited.insert(origin.to_string());
2169 let mut frontier = vec![origin.to_string()];
2170 let mut layers = Vec::new();
2171
2172 for depth in 1..=depth_limit {
2173 let mut next_frontier_set: HashSet<String> = HashSet::new();
2174 let mut edges = Vec::new();
2175
2176 for cell in &frontier {
2177 let neighbors = match direction {
2178 TraceDirection::Precedents => graph.precedents(cell),
2179 TraceDirection::Dependents => graph.dependents(cell),
2180 };
2181
2182 for neighbor in neighbors {
2183 let neighbor_upper = neighbor.to_ascii_uppercase();
2184 let edge = match direction {
2185 TraceDirection::Precedents => TraceEdgeRaw {
2186 from: cell.clone(),
2187 to: neighbor_upper.clone(),
2188 neighbor: neighbor_upper.clone(),
2189 },
2190 TraceDirection::Dependents => TraceEdgeRaw {
2191 from: neighbor_upper.clone(),
2192 to: cell.clone(),
2193 neighbor: neighbor_upper.clone(),
2194 },
2195 };
2196 edges.push(edge);
2197 if visited.insert(neighbor_upper.clone()) {
2198 next_frontier_set.insert(neighbor_upper);
2199 }
2200 }
2201 }
2202
2203 if edges.is_empty() {
2204 break;
2205 }
2206
2207 layers.push(LayerLinks { depth, edges });
2208 if next_frontier_set.is_empty() {
2209 break;
2210 }
2211 let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2212 next_frontier.sort();
2213 frontier = next_frontier;
2214 }
2215
2216 layers
2217}
2218
2219fn collect_neighbor_details(
2220 sheet: &umya_spreadsheet::Worksheet,
2221 current_sheet: &str,
2222 addresses: &[String],
2223 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2224) -> Vec<NeighborDetail> {
2225 let mut details = Vec::new();
2226 for address in addresses {
2227 let (sheet_part, cell_part) = split_sheet_and_cell(address);
2228 let normalized_sheet = sheet_part
2229 .as_ref()
2230 .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2231 let is_external = normalized_sheet
2232 .as_ref()
2233 .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2234 .unwrap_or(false);
2235
2236 let Some(cell_ref) = cell_part else {
2237 details.push(NeighborDetail {
2238 address: address.clone(),
2239 column: None,
2240 row: None,
2241 kind: TraceCellKind::External,
2242 value: None,
2243 formula: None,
2244 fingerprint: None,
2245 external: true,
2246 });
2247 continue;
2248 };
2249
2250 let cell_ref_upper = cell_ref.to_ascii_uppercase();
2251
2252 if is_external {
2253 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2254 details.push(NeighborDetail {
2255 address: address.clone(),
2256 column: None,
2257 row: None,
2258 kind: TraceCellKind::External,
2259 value: None,
2260 formula: formula_info.map(|info| info.formula.clone()),
2261 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2262 external: true,
2263 });
2264 continue;
2265 }
2266
2267 let Some((col, row)) = parse_address(&cell_ref_upper) else {
2268 details.push(NeighborDetail {
2269 address: address.clone(),
2270 column: None,
2271 row: None,
2272 kind: TraceCellKind::External,
2273 value: None,
2274 formula: None,
2275 fingerprint: None,
2276 external: true,
2277 });
2278 continue;
2279 };
2280
2281 let cell_opt = sheet.get_cell((&col, &row));
2282 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2283 if let Some(cell) = cell_opt {
2284 let value = cell_to_value(cell);
2285 let kind = if cell.is_formula() {
2286 TraceCellKind::Formula
2287 } else if value.is_some() {
2288 TraceCellKind::Literal
2289 } else {
2290 TraceCellKind::Blank
2291 };
2292 details.push(NeighborDetail {
2293 address: address.clone(),
2294 column: Some(col),
2295 row: Some(row),
2296 kind,
2297 value,
2298 formula: formula_info.map(|info| info.formula.clone()),
2299 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2300 external: false,
2301 });
2302 } else {
2303 details.push(NeighborDetail {
2304 address: address.clone(),
2305 column: Some(col),
2306 row: Some(row),
2307 kind: TraceCellKind::Blank,
2308 value: None,
2309 formula: formula_info.map(|info| info.formula.clone()),
2310 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2311 external: false,
2312 });
2313 }
2314 }
2315 details
2316}
2317
2318fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2319 let mut summary = TraceLayerSummary {
2320 total_nodes: details.len(),
2321 formula_nodes: 0,
2322 value_nodes: 0,
2323 blank_nodes: 0,
2324 external_nodes: 0,
2325 unique_formula_groups: 0,
2326 };
2327
2328 let mut fingerprints: HashSet<String> = HashSet::new();
2329
2330 for detail in details {
2331 match detail.kind {
2332 TraceCellKind::Formula => {
2333 summary.formula_nodes += 1;
2334 if let Some(fp) = &detail.fingerprint {
2335 fingerprints.insert(fp.clone());
2336 }
2337 }
2338 TraceCellKind::Literal => summary.value_nodes += 1,
2339 TraceCellKind::Blank => summary.blank_nodes += 1,
2340 TraceCellKind::External => summary.external_nodes += 1,
2341 }
2342 }
2343
2344 summary.unique_formula_groups = fingerprints.len();
2345 summary
2346}
2347
2348fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2349 let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2350 for detail in details {
2351 if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2352 let entry = aggregates
2353 .entry(fingerprint.clone())
2354 .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2355 entry.1 += 1;
2356 if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2357 entry.2.push(detail.address.clone());
2358 }
2359 }
2360 }
2361
2362 let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2363 .into_iter()
2364 .map(
2365 |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2366 fingerprint,
2367 formula,
2368 count,
2369 sample_addresses,
2370 },
2371 )
2372 .collect();
2373
2374 highlights.sort_by(|a, b| b.count.cmp(&a.count));
2375 highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2376 highlights
2377}
2378
2379fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2380 let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2381 for detail in details {
2382 if let (Some(col), Some(_row)) = (detail.column, detail.row)
2383 && !detail.external
2384 {
2385 by_column.entry(col).or_default().push(detail);
2386 }
2387 }
2388
2389 for column_entries in by_column.values_mut() {
2390 column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2391 }
2392
2393 let mut ranges = Vec::new();
2394 for entries in by_column.values() {
2395 let mut current: Vec<&NeighborDetail> = Vec::new();
2396 for detail in entries {
2397 if current.is_empty() {
2398 current.push(detail);
2399 continue;
2400 }
2401 let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2402 if detail.row.unwrap_or(0) == prev_row + 1 {
2403 current.push(detail);
2404 } else {
2405 if current.len() >= TRACE_RANGE_THRESHOLD {
2406 ranges.push(make_range_highlight(¤t));
2407 }
2408 current.clear();
2409 current.push(detail);
2410 }
2411 }
2412 if current.len() >= TRACE_RANGE_THRESHOLD {
2413 ranges.push(make_range_highlight(¤t));
2414 }
2415 }
2416
2417 ranges.sort_by(|a, b| b.count.cmp(&a.count));
2418 ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2419 ranges
2420}
2421
2422fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2423 let mut literals = 0usize;
2424 let mut formulas = 0usize;
2425 let mut blanks = 0usize;
2426 let mut sample_values = Vec::new();
2427 let mut sample_formulas = Vec::new();
2428 let mut sample_addresses = Vec::new();
2429
2430 for detail in details {
2431 match detail.kind {
2432 TraceCellKind::Formula => {
2433 formulas += 1;
2434 if let Some(formula) = &detail.formula
2435 && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2436 && !sample_formulas.contains(formula)
2437 {
2438 sample_formulas.push(formula.clone());
2439 }
2440 }
2441 TraceCellKind::Literal => {
2442 literals += 1;
2443 if let Some(value) = &detail.value
2444 && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2445 {
2446 sample_values.push(value.clone());
2447 }
2448 }
2449 TraceCellKind::Blank => blanks += 1,
2450 TraceCellKind::External => {}
2451 }
2452 if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2453 sample_addresses.push(detail.address.clone());
2454 }
2455 }
2456
2457 TraceRangeHighlight {
2458 start: details
2459 .first()
2460 .map(|d| d.address.clone())
2461 .unwrap_or_default(),
2462 end: details
2463 .last()
2464 .map(|d| d.address.clone())
2465 .unwrap_or_default(),
2466 count: details.len(),
2467 literals,
2468 formulas,
2469 blanks,
2470 sample_values,
2471 sample_formulas,
2472 sample_addresses,
2473 }
2474}
2475
2476fn build_notable_cells(
2477 details: &[NeighborDetail],
2478 ranges: &[TraceRangeHighlight],
2479 groups: &[TraceFormulaGroupHighlight],
2480) -> Vec<TraceCellHighlight> {
2481 let mut exclude: HashSet<String> = HashSet::new();
2482 for range in ranges {
2483 exclude.insert(range.start.clone());
2484 exclude.insert(range.end.clone());
2485 for addr in &range.sample_addresses {
2486 exclude.insert(addr.clone());
2487 }
2488 }
2489 for group in groups {
2490 for addr in &group.sample_addresses {
2491 exclude.insert(addr.clone());
2492 }
2493 }
2494
2495 let mut highlights = Vec::new();
2496 let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
2497
2498 for detail in details {
2499 if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
2500 break;
2501 }
2502 if exclude.contains(&detail.address) {
2503 continue;
2504 }
2505 let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
2506 if *counter >= 2 && detail.kind != TraceCellKind::External {
2507 continue;
2508 }
2509 highlights.push(TraceCellHighlight {
2510 address: detail.address.clone(),
2511 kind: detail.kind.clone(),
2512 value: detail.value.clone(),
2513 formula: detail.formula.clone(),
2514 });
2515 *counter += 1;
2516 }
2517
2518 highlights
2519}
2520
2521fn build_edges_for_layer(
2522 raw_edges: &[TraceEdgeRaw],
2523 selected: &HashSet<String>,
2524 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2525) -> Vec<FormulaTraceEdge> {
2526 let mut edges = Vec::new();
2527 for edge in raw_edges {
2528 if selected.contains(&edge.neighbor) {
2529 let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
2530 .map(|info| info.formula.clone());
2531 edges.push(FormulaTraceEdge {
2532 from: edge.from.clone(),
2533 to: edge.to.clone(),
2534 formula,
2535 note: None,
2536 });
2537 }
2538 }
2539 edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
2540 edges
2541}
2542
2543fn lookup_formula_info<'a>(
2544 lookup: &'a HashMap<String, TraceFormulaInfo>,
2545 cell_ref: &str,
2546 original: &str,
2547) -> Option<&'a TraceFormulaInfo> {
2548 if let Some(info) = lookup.get(cell_ref) {
2549 return Some(info);
2550 }
2551 if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
2552 let upper = cell.to_ascii_uppercase();
2553 return lookup.get(&upper);
2554 }
2555 None
2556}
2557
2558fn compare_addresses(left: &str, right: &str) -> Ordering {
2559 let (sheet_left, cell_left) = split_sheet_and_cell(left);
2560 let (sheet_right, cell_right) = split_sheet_and_cell(right);
2561
2562 let sheet_left_key = sheet_left
2563 .as_ref()
2564 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2565 .unwrap_or_default();
2566 let sheet_right_key = sheet_right
2567 .as_ref()
2568 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2569 .unwrap_or_default();
2570
2571 match sheet_left_key.cmp(&sheet_right_key) {
2572 Ordering::Equal => {
2573 let left_core = cell_left.unwrap_or_else(|| left.to_string());
2574 let right_core = cell_right.unwrap_or_else(|| right.to_string());
2575 let left_coords = parse_address(&left_core.to_ascii_uppercase());
2576 let right_coords = parse_address(&right_core.to_ascii_uppercase());
2577 match (left_coords, right_coords) {
2578 (Some((lc, lr)), Some((rc, rr))) => lc
2579 .cmp(&rc)
2580 .then_with(|| lr.cmp(&rr))
2581 .then_with(|| left_core.cmp(&right_core)),
2582 _ => left_core.cmp(&right_core),
2583 }
2584 }
2585 other => other,
2586 }
2587}
2588
2589fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
2590 if let Some(idx) = address.rfind('!') {
2591 let sheet = address[..idx].to_string();
2592 let cell = address[idx + 1..].to_string();
2593 (Some(sheet), Some(cell))
2594 } else {
2595 (None, Some(address.to_string()))
2596 }
2597}
2598
2599fn clean_sheet_name(sheet: &str) -> String {
2600 let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
2601 let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
2602 after_bracket
2603 .trim_matches(|c| c == '\'' || c == '"')
2604 .to_string()
2605}