1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4
5use crate::analysis::{formula::FormulaGraph, stats};
6use crate::model::*;
7use crate::state::AppState;
8use crate::workbook::{WorkbookContext, cell_to_value};
9use anyhow::{Result, anyhow};
10use regex::Regex;
11use schemars::JsonSchema;
12use serde::Deserialize;
13use std::cmp::Ordering;
14use std::collections::{BTreeMap, HashMap, HashSet};
15use std::sync::Arc;
16
17const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
18const TRACE_PAGE_MIN: usize = 5;
19const TRACE_PAGE_MAX: usize = 200;
20const TRACE_RANGE_THRESHOLD: usize = 4;
21const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
22const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
23const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
24const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
25const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
26const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
27const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
28
29pub async fn list_workbooks(
30 state: Arc<AppState>,
31 params: ListWorkbooksParams,
32) -> Result<WorkbookListResponse> {
33 let filter = params.into_filter()?;
34 state.list_workbooks(filter)
35}
36
37pub async fn describe_workbook(
38 state: Arc<AppState>,
39 params: DescribeWorkbookParams,
40) -> Result<WorkbookDescription> {
41 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
42 let desc = workbook.describe();
43 Ok(desc)
44}
45
46#[derive(Debug, Deserialize, JsonSchema)]
47pub struct ListWorkbooksParams {
48 pub slug_prefix: Option<String>,
49 pub folder: Option<String>,
50 pub path_glob: Option<String>,
51}
52
53impl ListWorkbooksParams {
54 fn into_filter(self) -> Result<filters::WorkbookFilter> {
55 filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
56 }
57}
58
59#[derive(Debug, Deserialize, JsonSchema)]
60pub struct DescribeWorkbookParams {
61 #[serde(alias = "workbook_id")]
62 pub workbook_or_fork_id: WorkbookId,
63}
64
65#[derive(Debug, Deserialize, JsonSchema)]
66pub struct ListSheetsParams {
67 #[serde(alias = "workbook_id")]
68 pub workbook_or_fork_id: WorkbookId,
69}
70
71pub async fn list_sheets(
72 state: Arc<AppState>,
73 params: ListSheetsParams,
74) -> Result<SheetListResponse> {
75 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
76 let summaries = workbook.list_summaries()?;
77 let response = SheetListResponse {
78 workbook_id: workbook.id.clone(),
79 workbook_short_id: workbook.short_id.clone(),
80 sheets: summaries,
81 };
82 Ok(response)
83}
84
85#[derive(Debug, Deserialize, JsonSchema)]
86pub struct SheetOverviewParams {
87 #[serde(alias = "workbook_id")]
88 pub workbook_or_fork_id: WorkbookId,
89 pub sheet_name: String,
90}
91
92#[derive(Debug, Deserialize, JsonSchema)]
93pub struct WorkbookSummaryParams {
94 #[serde(alias = "workbook_id")]
95 pub workbook_or_fork_id: WorkbookId,
96}
97
98pub async fn workbook_summary(
99 state: Arc<AppState>,
100 params: WorkbookSummaryParams,
101) -> Result<WorkbookSummaryResponse> {
102 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
103 let sheet_names = workbook.sheet_names();
104
105 let mut total_cells: u64 = 0;
106 let mut total_formulas: u64 = 0;
107 let mut breakdown = WorkbookBreakdown::default();
108 let mut region_counts = RegionCountSummary::default();
109 let mut entry_points: Vec<EntryPoint> = Vec::new();
110 let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
111
112 for sheet_name in &sheet_names {
113 let entry = workbook.get_sheet_metrics(sheet_name)?;
114 total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
115 total_formulas += entry.metrics.formula_cells as u64;
116 match entry.metrics.classification {
117 SheetClassification::Calculator => breakdown.calculator_sheets += 1,
118 SheetClassification::Metadata => breakdown.metadata_sheets += 1,
119 SheetClassification::Empty => {}
120 _ => breakdown.data_sheets += 1,
121 }
122
123 for region in &entry.detected_regions {
124 match region
125 .region_kind
126 .clone()
127 .unwrap_or(region.classification.clone())
128 {
129 RegionKind::Calculator => region_counts.calculator += 1,
130 RegionKind::Metadata => region_counts.metadata += 1,
131 RegionKind::Parameters => region_counts.parameters += 1,
132 RegionKind::Outputs => region_counts.outputs += 1,
133 RegionKind::Data | RegionKind::Table => region_counts.data += 1,
134 _ => region_counts.other += 1,
135 }
136 if region.confidence >= 0.3 {
137 let kind = region
138 .region_kind
139 .as_ref()
140 .unwrap_or(®ion.classification);
141 let priority = match kind {
142 RegionKind::Parameters => 0,
143 RegionKind::Data | RegionKind::Table => 1,
144 RegionKind::Outputs => 2,
145 RegionKind::Calculator => 3,
146 RegionKind::Metadata => 4,
147 _ => 5,
148 };
149 entry_points.push(EntryPoint {
150 sheet_name: sheet_name.clone(),
151 region_id: Some(region.id),
152 bounds: Some(region.bounds.clone()),
153 rationale: format!(
154 "{:?} region ({} rows, {:.0}% conf, p{})",
155 kind,
156 region.row_count,
157 region.confidence * 100.0,
158 priority
159 ),
160 });
161 }
162 }
163
164 if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
165 entry_points.push(EntryPoint {
166 sheet_name: sheet_name.clone(),
167 region_id: None,
168 bounds: None,
169 rationale: "Whole sheet is non-empty; start at top-left".to_string(),
170 });
171 }
172 }
173
174 entry_points.sort_by(|a, b| {
175 let pa = priority_from_rationale(&a.rationale);
176 let pb = priority_from_rationale(&b.rationale);
177 pa.cmp(&pb)
178 .then_with(|| {
179 a.bounds
180 .as_ref()
181 .map(|_| 1)
182 .cmp(&b.bounds.as_ref().map(|_| 1))
183 })
184 .then_with(|| a.sheet_name.cmp(&b.sheet_name))
185 });
186 entry_points.truncate(5);
187
188 let mut seen_ranges = std::collections::HashSet::new();
189 for item in workbook.named_items()? {
190 if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
191 continue;
192 }
193 if !seen_ranges.insert(item.refers_to.clone()) {
194 continue;
195 }
196 key_named_ranges.push(item);
197 if key_named_ranges.len() >= 10 {
198 break;
199 }
200 }
201
202 Ok(WorkbookSummaryResponse {
203 workbook_id: workbook.id.clone(),
204 workbook_short_id: workbook.short_id.clone(),
205 slug: workbook.slug.clone(),
206 sheet_count: sheet_names.len(),
207 total_cells,
208 total_formulas,
209 breakdown,
210 region_counts,
211 key_named_ranges,
212 suggested_entry_points: entry_points,
213 })
214}
215
216fn priority_from_rationale(rationale: &str) -> u32 {
217 if rationale.contains("p0") {
218 0
219 } else if rationale.contains("p1") {
220 1
221 } else if rationale.contains("p2") {
222 2
223 } else if rationale.contains("p3") {
224 3
225 } else if rationale.contains("p4") {
226 4
227 } else {
228 5
229 }
230}
231
232pub async fn sheet_overview(
233 state: Arc<AppState>,
234 params: SheetOverviewParams,
235) -> Result<SheetOverviewResponse> {
236 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
237 let overview = workbook.sheet_overview(¶ms.sheet_name)?;
238 Ok(overview)
239}
240
241fn default_start_row() -> u32 {
242 1
243}
244
245fn default_page_size() -> u32 {
246 50
247}
248
249fn default_include_formulas() -> bool {
250 true
251}
252
253fn default_include_header() -> bool {
254 true
255}
256
257#[derive(Debug, Deserialize, JsonSchema)]
258pub struct SheetPageParams {
259 #[serde(alias = "workbook_id")]
260 pub workbook_or_fork_id: WorkbookId,
261 pub sheet_name: String,
262 #[serde(default = "default_start_row")]
263 pub start_row: u32,
264 #[serde(default = "default_page_size")]
265 pub page_size: u32,
266 #[serde(default)]
267 pub columns: Option<Vec<String>>,
268 #[serde(default)]
269 pub columns_by_header: Option<Vec<String>>,
270 #[serde(default = "default_include_formulas")]
271 pub include_formulas: bool,
272 #[serde(default)]
273 pub include_styles: bool,
274 #[serde(default = "default_include_header")]
275 pub include_header: bool,
276 #[serde(default)]
277 pub format: Option<SheetPageFormat>,
278}
279
280impl Default for SheetPageParams {
281 fn default() -> Self {
282 SheetPageParams {
283 workbook_or_fork_id: WorkbookId(String::new()),
284 sheet_name: String::new(),
285 start_row: default_start_row(),
286 page_size: default_page_size(),
287 columns: None,
288 columns_by_header: None,
289 include_formulas: default_include_formulas(),
290 include_styles: false,
291 include_header: default_include_header(),
292 format: None,
293 }
294 }
295}
296
297fn default_find_limit() -> u32 {
298 50
299}
300
301#[derive(Debug, Deserialize, JsonSchema)]
302pub struct FindValueParams {
303 #[serde(alias = "workbook_id")]
304 pub workbook_or_fork_id: WorkbookId,
305 pub query: String,
306 #[serde(default)]
307 pub label: Option<String>,
308 #[serde(default)]
309 pub mode: Option<FindMode>,
310 #[serde(default)]
311 pub match_mode: Option<String>,
312 #[serde(default)]
313 pub case_sensitive: bool,
314 #[serde(default)]
315 pub sheet_name: Option<String>,
316 #[serde(default)]
317 pub region_id: Option<u32>,
318 #[serde(default)]
319 pub table_name: Option<String>,
320 #[serde(default)]
321 pub value_types: Option<Vec<String>>,
322 #[serde(default)]
323 pub search_headers_only: bool,
324 #[serde(default)]
325 pub direction: Option<LabelDirection>,
326 #[serde(default = "default_find_limit")]
327 pub limit: u32,
328}
329
330impl Default for FindValueParams {
331 fn default() -> Self {
332 Self {
333 workbook_or_fork_id: WorkbookId(String::new()),
334 query: String::new(),
335 label: None,
336 mode: None,
337 match_mode: None,
338 case_sensitive: false,
339 sheet_name: None,
340 region_id: None,
341 table_name: None,
342 value_types: None,
343 search_headers_only: false,
344 direction: None,
345 limit: default_find_limit(),
346 }
347 }
348}
349
350#[derive(Debug, Deserialize, JsonSchema, Default)]
351pub struct ReadTableParams {
352 #[serde(alias = "workbook_id")]
353 pub workbook_or_fork_id: WorkbookId,
354 #[serde(default)]
355 pub sheet_name: Option<String>,
356 #[serde(default)]
357 pub table_name: Option<String>,
358 #[serde(default)]
359 pub region_id: Option<u32>,
360 #[serde(default)]
361 pub range: Option<String>,
362 #[serde(default)]
363 pub header_row: Option<u32>,
364 #[serde(default)]
365 pub header_rows: Option<u32>,
366 #[serde(default)]
367 pub columns: Option<Vec<String>>,
368 #[serde(default)]
369 pub filters: Option<Vec<TableFilter>>,
370 #[serde(default)]
371 pub sample_mode: Option<String>,
372 #[serde(default)]
373 pub limit: Option<u32>,
374 #[serde(default)]
375 pub offset: Option<u32>,
376}
377
378#[derive(Debug, Deserialize, JsonSchema, Clone)]
379pub struct TableFilter {
380 pub column: String,
381 pub op: String,
382 pub value: serde_json::Value,
383}
384
385#[derive(Debug, Deserialize, JsonSchema, Default)]
386pub struct TableProfileParams {
387 #[serde(alias = "workbook_id")]
388 pub workbook_or_fork_id: WorkbookId,
389 #[serde(default)]
390 pub sheet_name: Option<String>,
391 #[serde(default)]
392 pub region_id: Option<u32>,
393 #[serde(default)]
394 pub table_name: Option<String>,
395 #[serde(default)]
396 pub sample_mode: Option<String>,
397 #[serde(default)]
398 pub sample_size: Option<u32>,
399}
400
401#[derive(Debug, Deserialize, JsonSchema)]
402pub struct RangeValuesParams {
403 #[serde(alias = "workbook_id")]
404 pub workbook_or_fork_id: WorkbookId,
405 pub sheet_name: String,
406 pub ranges: Vec<String>,
407 #[serde(default)]
408 pub include_headers: Option<bool>,
409}
410
411pub async fn sheet_page(
412 state: Arc<AppState>,
413 params: SheetPageParams,
414) -> Result<SheetPageResponse> {
415 if params.page_size == 0 {
416 return Err(anyhow!("page_size must be greater than zero"));
417 }
418
419 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
420 let metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
421 let format = params.format.unwrap_or_default();
422
423 let start_row = params.start_row.max(1);
424 let page_size = params.page_size.min(500);
425 let include_formulas = params.include_formulas;
426 let include_styles = params.include_styles;
427 let columns = params.columns.clone();
428 let columns_by_header = params.columns_by_header.clone();
429 let include_header = params.include_header;
430
431 let page = workbook.with_sheet(¶ms.sheet_name, |sheet| {
432 build_page(
433 sheet,
434 start_row,
435 page_size,
436 columns.clone(),
437 columns_by_header.clone(),
438 include_formulas,
439 include_styles,
440 include_header,
441 )
442 })?;
443
444 let has_more = page.end_row < metrics.metrics.row_count;
445 let next_start_row = if has_more {
446 Some(page.end_row + 1)
447 } else {
448 None
449 };
450
451 let compact_payload = if matches!(format, SheetPageFormat::Compact) {
452 Some(build_compact_payload(
453 &page.header,
454 &page.rows,
455 include_header,
456 ))
457 } else {
458 None
459 };
460
461 let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
462 Some(build_values_only_payload(
463 &page.header,
464 &page.rows,
465 include_header,
466 ))
467 } else {
468 None
469 };
470
471 let response = SheetPageResponse {
472 workbook_id: workbook.id.clone(),
473 workbook_short_id: workbook.short_id.clone(),
474 sheet_name: params.sheet_name,
475 rows: if matches!(format, SheetPageFormat::Full) {
476 page.rows
477 } else {
478 Vec::new()
479 },
480 has_more,
481 next_start_row,
482 header_row: if include_header { page.header } else { None },
483 compact: compact_payload,
484 values_only: values_only_payload,
485 format,
486 };
487 Ok(response)
488}
489
490#[derive(Debug, Deserialize, JsonSchema)]
491pub struct SheetFormulaMapParams {
492 #[serde(alias = "workbook_id")]
493 pub workbook_or_fork_id: WorkbookId,
494 pub sheet_name: String,
495 pub range: Option<String>,
496 #[serde(default)]
497 pub expand: bool,
498 #[serde(default)]
499 pub limit: Option<u32>,
500 #[serde(default)]
501 pub sort_by: Option<FormulaSortBy>,
502}
503
504#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
505#[serde(rename_all = "snake_case")]
506pub enum FormulaSortBy {
507 #[default]
508 Address,
509 Complexity,
510 Count,
511}
512
513pub async fn sheet_formula_map(
514 state: Arc<AppState>,
515 params: SheetFormulaMapParams,
516) -> Result<SheetFormulaMapResponse> {
517 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
518 let graph = workbook.formula_graph(¶ms.sheet_name)?;
519 let mut groups = Vec::new();
520 let mut truncated = false;
521
522 for mut group in graph.groups() {
523 if let Some(range) = ¶ms.range {
524 group.addresses.retain(|addr| address_in_range(addr, range));
525 if group.addresses.is_empty() {
526 continue;
527 }
528 }
529 if !params.expand && group.addresses.len() > 15 {
530 group.addresses.truncate(15);
531 truncated = true;
532 }
533 groups.push(group);
534 }
535
536 let sort_by = params.sort_by.unwrap_or_default();
537 match sort_by {
538 FormulaSortBy::Address => {
539 groups.sort_by(|a, b| a.addresses.first().cmp(&b.addresses.first()));
540 }
541 FormulaSortBy::Complexity => {
542 groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
543 }
544 FormulaSortBy::Count => {
545 groups.sort_by(|a, b| b.addresses.len().cmp(&a.addresses.len()));
546 }
547 }
548
549 if let Some(limit) = params.limit
550 && groups.len() > limit as usize
551 {
552 groups.truncate(limit as usize);
553 truncated = true;
554 }
555
556 let response = SheetFormulaMapResponse {
557 workbook_id: workbook.id.clone(),
558 workbook_short_id: workbook.short_id.clone(),
559 sheet_name: params.sheet_name.clone(),
560 groups,
561 truncated,
562 };
563 Ok(response)
564}
565
566#[derive(Debug, Deserialize, JsonSchema)]
567pub struct FormulaTraceParams {
568 #[serde(alias = "workbook_id")]
569 pub workbook_or_fork_id: WorkbookId,
570 pub sheet_name: String,
571 pub cell_address: String,
572 pub direction: TraceDirection,
573 pub depth: Option<u32>,
574 pub limit: Option<u32>,
575 #[serde(default)]
576 pub page_size: Option<usize>,
577 #[serde(default)]
578 pub cursor: Option<TraceCursor>,
579}
580
581pub async fn formula_trace(
582 state: Arc<AppState>,
583 params: FormulaTraceParams,
584) -> Result<FormulaTraceResponse> {
585 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
586 let graph = workbook.formula_graph(¶ms.sheet_name)?;
587 let formula_lookup = build_formula_lookup(&graph);
588 let depth = params.depth.unwrap_or(3).clamp(1, 5);
589 let page_size = params
590 .page_size
591 .or_else(|| params.limit.map(|v| v as usize))
592 .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
593 .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
594
595 let origin = params.cell_address.to_uppercase();
596 let config = TraceConfig {
597 direction: ¶ms.direction,
598 origin: &origin,
599 sheet_name: ¶ms.sheet_name,
600 depth_limit: depth,
601 page_size,
602 };
603 let (layers, next_cursor, notes) = build_trace_layers(
604 &workbook,
605 &graph,
606 &formula_lookup,
607 &config,
608 params.cursor.clone(),
609 )?;
610
611 let response = FormulaTraceResponse {
612 workbook_id: workbook.id.clone(),
613 workbook_short_id: workbook.short_id.clone(),
614 sheet_name: params.sheet_name.clone(),
615 origin,
616 direction: params.direction.clone(),
617 layers,
618 next_cursor,
619 notes,
620 };
621 Ok(response)
622}
623
624#[derive(Debug, Deserialize, JsonSchema)]
625pub struct NamedRangesParams {
626 #[serde(alias = "workbook_id")]
627 pub workbook_or_fork_id: WorkbookId,
628 pub sheet_name: Option<String>,
629 pub name_prefix: Option<String>,
630}
631
632pub async fn named_ranges(
633 state: Arc<AppState>,
634 params: NamedRangesParams,
635) -> Result<NamedRangesResponse> {
636 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
637 let mut items = workbook.named_items()?;
638
639 if let Some(sheet_filter) = ¶ms.sheet_name {
640 items.retain(|item| {
641 item.sheet_name
642 .as_ref()
643 .map(|name| name.eq_ignore_ascii_case(sheet_filter))
644 .unwrap_or(false)
645 });
646 }
647 if let Some(prefix) = ¶ms.name_prefix {
648 let prefix_lower = prefix.to_ascii_lowercase();
649 items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
650 }
651
652 let response = NamedRangesResponse {
653 workbook_id: workbook.id.clone(),
654 workbook_short_id: workbook.short_id.clone(),
655 items,
656 };
657 Ok(response)
658}
659
660struct PageBuildResult {
661 rows: Vec<RowSnapshot>,
662 header: Option<RowSnapshot>,
663 end_row: u32,
664}
665
666#[allow(clippy::too_many_arguments)]
667fn build_page(
668 sheet: &umya_spreadsheet::Worksheet,
669 start_row: u32,
670 page_size: u32,
671 columns: Option<Vec<String>>,
672 columns_by_header: Option<Vec<String>>,
673 include_formulas: bool,
674 include_styles: bool,
675 include_header: bool,
676) -> PageBuildResult {
677 let max_col = sheet.get_highest_column();
678 let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
679 let column_indices =
680 resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
681
682 let header = if include_header {
683 Some(build_row_snapshot(
684 sheet,
685 1,
686 &column_indices,
687 include_formulas,
688 include_styles,
689 ))
690 } else {
691 None
692 };
693
694 let mut rows = Vec::new();
695 for row_idx in start_row..=end_row {
696 rows.push(build_row_snapshot(
697 sheet,
698 row_idx,
699 &column_indices,
700 include_formulas,
701 include_styles,
702 ));
703 }
704
705 PageBuildResult {
706 rows,
707 header,
708 end_row,
709 }
710}
711
712fn build_row_snapshot(
713 sheet: &umya_spreadsheet::Worksheet,
714 row_index: u32,
715 columns: &[u32],
716 include_formulas: bool,
717 include_styles: bool,
718) -> RowSnapshot {
719 let mut cells = Vec::new();
720 for &col in columns {
721 if let Some(cell) = sheet.get_cell((col, row_index)) {
722 cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
723 } else {
724 let address = crate::utils::cell_address(col, row_index);
725 cells.push(CellSnapshot {
726 address,
727 value: None,
728 formula: None,
729 cached_value: None,
730 number_format: None,
731 style_tags: Vec::new(),
732 notes: Vec::new(),
733 });
734 }
735 }
736
737 RowSnapshot { row_index, cells }
738}
739
740fn build_cell_snapshot(
741 cell: &umya_spreadsheet::Cell,
742 include_formulas: bool,
743 include_styles: bool,
744) -> CellSnapshot {
745 let address = cell.get_coordinate().get_coordinate();
746 let value = crate::workbook::cell_to_value(cell);
747 let formula = if include_formulas && cell.is_formula() {
748 Some(cell.get_formula().to_string())
749 } else {
750 None
751 };
752 let cached_value = if cell.is_formula() {
753 value.clone()
754 } else {
755 None
756 };
757 let number_format = if include_styles {
758 cell.get_style()
759 .get_number_format()
760 .map(|fmt| fmt.get_format_code().to_string())
761 } else {
762 None
763 };
764 let style_tags = if include_styles {
765 crate::analysis::style::tag_cell(cell)
766 .map(|(_, tagging)| tagging.tags)
767 .unwrap_or_default()
768 } else {
769 Vec::new()
770 };
771
772 CellSnapshot {
773 address,
774 value,
775 formula,
776 cached_value,
777 number_format,
778 style_tags,
779 notes: Vec::new(),
780 }
781}
782
783fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
784 use std::collections::BTreeSet;
785 use umya_spreadsheet::helper::coordinate::column_index_from_string;
786
787 let mut indices = BTreeSet::new();
788 if let Some(specs) = columns {
789 for spec in specs {
790 if let Some((start, end)) = spec.split_once(':') {
791 let start_idx = column_index_from_string(start);
792 let end_idx = column_index_from_string(end);
793 let (min_idx, max_idx) = if start_idx <= end_idx {
794 (start_idx, end_idx)
795 } else {
796 (end_idx, start_idx)
797 };
798 for idx in min_idx..=max_idx {
799 indices.insert(idx);
800 }
801 } else {
802 indices.insert(column_index_from_string(spec));
803 }
804 }
805 } else {
806 for idx in 1..=max_column.max(1) {
807 indices.insert(idx);
808 }
809 }
810
811 indices.into_iter().collect()
812}
813
814fn resolve_columns_with_headers(
815 sheet: &umya_spreadsheet::Worksheet,
816 columns: Option<&Vec<String>>,
817 columns_by_header: Option<&Vec<String>>,
818 max_column: u32,
819) -> Vec<u32> {
820 if columns_by_header.is_none() {
821 return resolve_columns(columns, max_column);
822 }
823
824 let mut selected = Vec::new();
825 let header_targets: Vec<String> = columns_by_header
826 .unwrap()
827 .iter()
828 .map(|h| h.trim().to_ascii_lowercase())
829 .collect();
830
831 for col_idx in 1..=max_column.max(1) {
832 let header_cell = sheet.get_cell((col_idx, 1u32));
833 let header_value = header_cell
834 .and_then(cell_to_value)
835 .map(cell_value_to_string_lower);
836 if let Some(hval) = header_value
837 && header_targets.iter().any(|target| target == &hval)
838 {
839 selected.push(col_idx);
840 }
841 }
842
843 if selected.is_empty() {
844 resolve_columns(columns, max_column)
845 } else {
846 selected
847 }
848}
849
850fn cell_value_to_string_lower(value: CellValue) -> String {
851 match value {
852 CellValue::Text(s) => s.to_ascii_lowercase(),
853 CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
854 CellValue::Bool(b) => b.to_string(),
855 CellValue::Error(e) => e.to_ascii_lowercase(),
856 CellValue::Date(d) => d.to_ascii_lowercase(),
857 }
858}
859
860fn build_compact_payload(
861 header: &Option<RowSnapshot>,
862 rows: &[RowSnapshot],
863 include_header: bool,
864) -> SheetPageCompact {
865 let headers = derive_headers(header, rows);
866 let header_row = if include_header {
867 header
868 .as_ref()
869 .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
870 .unwrap_or_default()
871 } else {
872 Vec::new()
873 };
874 let data_rows = rows
875 .iter()
876 .map(|row| {
877 let mut vals: Vec<Option<CellValue>> = Vec::new();
878 vals.push(Some(CellValue::Number(row.row_index as f64)));
879 vals.extend(row.cells.iter().map(|c| c.value.clone()));
880 vals
881 })
882 .collect();
883
884 SheetPageCompact {
885 headers,
886 header_row,
887 rows: data_rows,
888 }
889}
890
891fn build_values_only_payload(
892 header: &Option<RowSnapshot>,
893 rows: &[RowSnapshot],
894 include_header: bool,
895) -> SheetPageValues {
896 let mut data = Vec::new();
897 if include_header && let Some(h) = header {
898 data.push(h.cells.iter().map(|c| c.value.clone()).collect());
899 }
900 for row in rows {
901 data.push(row.cells.iter().map(|c| c.value.clone()).collect());
902 }
903
904 SheetPageValues { rows: data }
905}
906
907fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
908 if let Some(h) = header {
909 let mut headers: Vec<String> = h
910 .cells
911 .iter()
912 .map(|c| match &c.value {
913 Some(CellValue::Text(t)) => t.clone(),
914 Some(CellValue::Number(n)) => n.to_string(),
915 Some(CellValue::Bool(b)) => b.to_string(),
916 Some(CellValue::Date(d)) => d.clone(),
917 Some(CellValue::Error(e)) => e.clone(),
918 None => c.address.clone(),
919 })
920 .collect();
921 headers.insert(0, "Row".to_string());
922 headers
923 } else if let Some(first) = rows.first() {
924 let mut headers = Vec::new();
925 headers.push("Row".to_string());
926 for cell in &first.cells {
927 headers.push(cell.address.clone());
928 }
929 headers
930 } else {
931 vec![]
932 }
933}
934fn default_stats_sample() -> usize {
935 500
936}
937
938#[derive(Debug, Deserialize, JsonSchema)]
939pub struct SheetStatisticsParams {
940 #[serde(alias = "workbook_id")]
941 pub workbook_or_fork_id: WorkbookId,
942 pub sheet_name: String,
943 #[serde(default)]
944 pub sample_rows: Option<usize>,
945}
946
947pub async fn sheet_statistics(
948 state: Arc<AppState>,
949 params: SheetStatisticsParams,
950) -> Result<SheetStatisticsResponse> {
951 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
952 let sheet_metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
953 let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
954 let stats = workbook.with_sheet(¶ms.sheet_name, |sheet| {
955 stats::compute_sheet_statistics(sheet, sample_rows)
956 })?;
957 let response = SheetStatisticsResponse {
958 workbook_id: workbook.id.clone(),
959 workbook_short_id: workbook.short_id.clone(),
960 sheet_name: params.sheet_name,
961 row_count: sheet_metrics.metrics.row_count,
962 column_count: sheet_metrics.metrics.column_count,
963 density: stats.density,
964 numeric_columns: stats.numeric_columns,
965 text_columns: stats.text_columns,
966 null_counts: stats.null_counts,
967 duplicate_warnings: stats.duplicate_warnings,
968 };
969 Ok(response)
970}
971
972fn address_in_range(address: &str, range: &str) -> bool {
973 parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
974 if let Some((col, row)) = parse_address(address) {
975 col >= start_col && col <= end_col && row >= start_row && row <= end_row
976 } else {
977 false
978 }
979 })
980}
981
982fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
983 let mut parts = range.split(':');
984 let start = parts.next()?;
985 let end = parts.next().unwrap_or(start);
986 let start_idx = parse_address(start)?;
987 let end_idx = parse_address(end)?;
988 Some((
989 (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
990 (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
991 ))
992}
993
994fn parse_address(address: &str) -> Option<(u32, u32)> {
995 use umya_spreadsheet::helper::coordinate::index_from_coordinate;
996 let (col, row, _, _) = index_from_coordinate(address);
997 match (col, row) {
998 (Some(c), Some(r)) => Some((c, r)),
999 _ => None,
1000 }
1001}
1002
1003#[derive(Clone)]
1004struct TableTarget {
1005 sheet_name: String,
1006 table_name: Option<String>,
1007 range: ((u32, u32), (u32, u32)),
1008 header_hint: Option<u32>,
1009}
1010
1011fn resolve_table_target(
1012 workbook: &WorkbookContext,
1013 params: &ReadTableParams,
1014) -> Result<TableTarget> {
1015 if let Some(region_id) = params.region_id
1016 && let Some(sheet) = ¶ms.sheet_name
1017 && let Ok(region) = workbook.detected_region(sheet, region_id)
1018 {
1019 return Ok(TableTarget {
1020 sheet_name: sheet.clone(),
1021 table_name: None,
1022 range: parse_range(®ion.bounds).unwrap_or(((1, 1), (1, 1))),
1023 header_hint: region.header_row,
1024 });
1025 }
1026
1027 if let Some(table_name) = ¶ms.table_name {
1028 let items = workbook.named_items()?;
1029 for item in items {
1030 if item.name.eq_ignore_ascii_case(table_name)
1031 || item
1032 .name
1033 .to_ascii_lowercase()
1034 .contains(&table_name.to_ascii_lowercase())
1035 {
1036 let mut sheet_name = item
1037 .sheet_name
1038 .clone()
1039 .or_else(|| params.sheet_name.clone())
1040 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1041 let refers_to = item.refers_to.trim_start_matches('=');
1042 let mut range_part = refers_to;
1043 if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1044 sheet_name = sheet_part.trim_matches('\'').to_string();
1045 range_part = rest;
1046 }
1047 if let Some(range) = parse_range(range_part) {
1048 return Ok(TableTarget {
1049 sheet_name,
1050 table_name: Some(item.name.clone()),
1051 range,
1052 header_hint: if item.kind == NamedItemKind::Table {
1053 Some(range.0.1)
1054 } else {
1055 None
1056 },
1057 });
1058 }
1059 }
1060 }
1061 }
1062
1063 let sheet_name = params
1064 .sheet_name
1065 .clone()
1066 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1067
1068 if let Some(rng) = ¶ms.range
1069 && let Some(range) = parse_range(rng)
1070 {
1071 return Ok(TableTarget {
1072 sheet_name,
1073 table_name: None,
1074 range,
1075 header_hint: None,
1076 });
1077 }
1078
1079 let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1080 let end_col = metrics.metrics.column_count.max(1);
1081 let end_row = metrics.metrics.row_count.max(1);
1082 Ok(TableTarget {
1083 sheet_name,
1084 table_name: None,
1085 range: ((1, 1), (end_col, end_row)),
1086 header_hint: None,
1087 })
1088}
1089
1090#[allow(clippy::too_many_arguments)]
1091fn extract_table_rows(
1092 sheet: &umya_spreadsheet::Worksheet,
1093 target: &TableTarget,
1094 header_row: Option<u32>,
1095 header_rows: Option<u32>,
1096 columns: Option<Vec<String>>,
1097 filters: Option<Vec<TableFilter>>,
1098 limit: usize,
1099 offset: usize,
1100 sample_mode: &str,
1101) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1102 let ((start_col, start_row), (end_col, end_row)) = target.range;
1103 let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1104 if header_start < start_row {
1105 header_start = start_row;
1106 }
1107 if header_start > end_row {
1108 header_start = start_row;
1109 }
1110 let header_rows_count = header_rows.unwrap_or(1).max(1);
1111 let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1112 let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1113 resolve_columns(Some(cols), end_col).into_iter().collect()
1114 } else {
1115 (start_col..=end_col).collect()
1116 };
1117
1118 let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1119 let mut all_rows: Vec<TableRow> = Vec::new();
1120 let mut total_rows: u32 = 0;
1121
1122 for row_idx in data_start_row..=end_row {
1123 let mut row = BTreeMap::new();
1124 for (i, col_idx) in column_indices.iter().enumerate() {
1125 let header = headers
1126 .get(i)
1127 .cloned()
1128 .unwrap_or_else(|| format!("Col{col_idx}"));
1129 let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1130 row.insert(header, value);
1131 }
1132 if !row_passes_filters(&row, filters.as_ref()) {
1133 continue;
1134 }
1135 total_rows += 1;
1136 if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1137 continue;
1138 }
1139 all_rows.push(row);
1140 }
1141
1142 let rows = sample_rows(all_rows, limit, offset, sample_mode);
1143
1144 Ok((headers, rows, total_rows))
1145}
1146
1147fn build_headers(
1148 sheet: &umya_spreadsheet::Worksheet,
1149 columns: &[u32],
1150 header_start: u32,
1151 header_rows: u32,
1152) -> Vec<String> {
1153 let mut headers = Vec::new();
1154 for col_idx in columns {
1155 let mut parts = Vec::new();
1156 for h in header_start..(header_start + header_rows) {
1157 let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1158 if let Some(value) = sheet
1159 .get_cell((origin_col, origin_row))
1160 .and_then(cell_to_value)
1161 {
1162 match value {
1163 CellValue::Text(ref s) if s.trim().is_empty() => {}
1164 CellValue::Text(s) => parts.push(s),
1165 CellValue::Number(n) => parts.push(n.to_string()),
1166 CellValue::Bool(b) => parts.push(b.to_string()),
1167 CellValue::Error(e) => parts.push(e),
1168 CellValue::Date(d) => parts.push(d),
1169 }
1170 }
1171 }
1172 if parts.is_empty() {
1173 headers.push(crate::utils::column_number_to_name(*col_idx));
1174 } else {
1175 headers.push(parts.join(" / "));
1176 }
1177 }
1178
1179 if headers.iter().all(|h| h.trim().is_empty()) {
1180 return columns
1181 .iter()
1182 .map(|c| crate::utils::column_number_to_name(*c))
1183 .collect();
1184 }
1185
1186 dedupe_headers(headers)
1187}
1188
1189fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1190 let mut seen: HashMap<String, u32> = HashMap::new();
1191 for h in headers.iter_mut() {
1192 let key = h.clone();
1193 if key.trim().is_empty() {
1194 continue;
1195 }
1196 let count = seen.entry(key.clone()).or_insert(0);
1197 if *count > 0 {
1198 h.push_str(&format!("_{}", *count + 1));
1199 }
1200 *count += 1;
1201 }
1202 headers
1203}
1204
1205fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1206 if let Some(filters) = filters {
1207 for filter in filters {
1208 if let Some(value) = row.get(&filter.column) {
1209 match filter.op.as_str() {
1210 "eq" => {
1211 if !value_eq(value, &filter.value) {
1212 return false;
1213 }
1214 }
1215 "neq" => {
1216 if value_eq(value, &filter.value) {
1217 return false;
1218 }
1219 }
1220 "contains" => {
1221 if !value_contains(value, &filter.value) {
1222 return false;
1223 }
1224 }
1225 "gt" => {
1226 if !value_gt(value, &filter.value) {
1227 return false;
1228 }
1229 }
1230 "lt" => {
1231 if !value_lt(value, &filter.value) {
1232 return false;
1233 }
1234 }
1235 "in" => {
1236 let list = filter
1237 .value
1238 .as_array()
1239 .cloned()
1240 .unwrap_or_else(|| vec![filter.value.clone()]);
1241 if !list.iter().any(|cmp| value_eq(value, cmp)) {
1242 return false;
1243 }
1244 }
1245 _ => {}
1246 }
1247 }
1248 }
1249 }
1250 true
1251}
1252
1253fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1254 match (cell, cmp) {
1255 (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1256 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1257 v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1258 }
1259 (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1260 .parse::<f64>()
1261 .map(|f| (*n - f).abs() < f64::EPSILON)
1262 .unwrap_or(false),
1263 (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1264 (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1265 t.eq_ignore_ascii_case("true") == *b
1266 }
1267 (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1268 (None, serde_json::Value::Null) => true,
1269 _ => false,
1270 }
1271}
1272
1273fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1274 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1275 return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1276 }
1277 false
1278}
1279
1280fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1281 match (cell, cmp) {
1282 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1283 v.as_f64().is_some_and(|f| *n > f)
1284 }
1285 _ => false,
1286 }
1287}
1288
1289fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1290 match (cell, cmp) {
1291 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1292 v.as_f64().is_some_and(|f| *n < f)
1293 }
1294 _ => false,
1295 }
1296}
1297
1298fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1299 if rows.is_empty() {
1300 return rows;
1301 }
1302
1303 match mode {
1304 "distributed" => {
1305 if limit == 0 {
1306 return Vec::new();
1307 }
1308 let mut indices = Vec::new();
1309 let span = rows.len().saturating_sub(1);
1310 let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1311 let mut idx = offset;
1312 while idx < rows.len() && indices.len() < limit {
1313 indices.push(idx);
1314 idx = idx.saturating_add(step);
1315 if idx == indices.last().copied().unwrap_or(0) {
1316 idx += 1;
1317 }
1318 }
1319 if indices.len() < limit {
1320 let last_idx = rows.len().saturating_sub(1);
1321 if !indices.contains(&last_idx) {
1322 indices.push(last_idx);
1323 }
1324 }
1325 indices
1326 .into_iter()
1327 .filter_map(|i| rows.get(i).cloned())
1328 .collect()
1329 }
1330 "last" => {
1331 let start = rows.len().saturating_sub(limit + offset);
1332 rows.into_iter().skip(start + offset).take(limit).collect()
1333 }
1334 _ => rows.into_iter().skip(offset).take(limit).collect(),
1335 }
1336}
1337
1338fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1339 let mut summaries = Vec::new();
1340 for header in headers {
1341 let mut nulls = 0u32;
1342 let mut distinct_set: HashSet<String> = HashSet::new();
1343 let mut values: Vec<f64> = Vec::new();
1344 let mut top_counts: HashMap<String, u32> = HashMap::new();
1345
1346 for row in rows {
1347 match row.get(header) {
1348 Some(Some(CellValue::Number(n))) => {
1349 values.push(*n);
1350 let key = n.to_string();
1351 *top_counts.entry(key).or_default() += 1;
1352 }
1353 Some(Some(CellValue::Text(s))) => {
1354 distinct_set.insert(s.clone());
1355 *top_counts.entry(s.clone()).or_default() += 1;
1356 }
1357 Some(Some(CellValue::Bool(b))) => {
1358 let key = b.to_string();
1359 distinct_set.insert(key.clone());
1360 *top_counts.entry(key).or_default() += 1;
1361 }
1362 Some(Some(CellValue::Date(d))) => {
1363 distinct_set.insert(d.clone());
1364 *top_counts.entry(d.clone()).or_default() += 1;
1365 }
1366 Some(Some(CellValue::Error(e))) => {
1367 distinct_set.insert(e.clone());
1368 *top_counts.entry(e.clone()).or_default() += 1;
1369 }
1370 _ => {
1371 nulls += 1;
1372 }
1373 }
1374 }
1375
1376 let inferred_type = if !values.is_empty() {
1377 "number"
1378 } else if !distinct_set.is_empty() {
1379 "text"
1380 } else {
1381 "unknown"
1382 }
1383 .to_string();
1384
1385 let min = values.iter().cloned().reduce(f64::min);
1386 let max = values.iter().cloned().reduce(f64::max);
1387 let mean = if values.is_empty() {
1388 None
1389 } else {
1390 Some(values.iter().sum::<f64>() / values.len() as f64)
1391 };
1392
1393 let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1394 top_values.sort_by(|a, b| b.1.cmp(&a.1));
1395 let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1396
1397 summaries.push(ColumnTypeSummary {
1398 name: header.clone(),
1399 inferred_type,
1400 nulls,
1401 distinct: distinct_set.len() as u32,
1402 top_values,
1403 min,
1404 max,
1405 mean,
1406 });
1407 }
1408 summaries
1409}
1410
1411#[allow(clippy::too_many_arguments)]
1412fn collect_value_matches(
1413 sheet: &umya_spreadsheet::Worksheet,
1414 sheet_name: &str,
1415 mode: &FindMode,
1416 match_mode: &str,
1417 direction: &LabelDirection,
1418 params: &FindValueParams,
1419 region: Option<&DetectedRegion>,
1420 default_bounds: ((u32, u32), (u32, u32)),
1421) -> Result<Vec<FindValueMatch>> {
1422 let mut results = Vec::new();
1423 let regex = if match_mode == "regex" {
1424 Regex::new(¶ms.query).ok()
1425 } else {
1426 None
1427 };
1428 let bounds = region
1429 .as_ref()
1430 .and_then(|r| parse_range(&r.bounds))
1431 .unwrap_or(default_bounds);
1432
1433 let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1434
1435 for cell in sheet.get_cell_collection() {
1436 let coord = cell.get_coordinate();
1437 let col = *coord.get_col_num();
1438 let row = *coord.get_row_num();
1439 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1440 continue;
1441 }
1442 if params.search_headers_only && row != header_row {
1443 continue;
1444 }
1445
1446 let value = cell_to_value(cell);
1447 if let Some(ref allowed) = params.value_types
1448 && !value_type_matches(&value, allowed)
1449 {
1450 continue;
1451 }
1452 if matches!(mode, FindMode::Value) {
1453 if !value_matches(
1454 &value,
1455 ¶ms.query,
1456 match_mode,
1457 params.case_sensitive,
1458 ®ex,
1459 ) {
1460 continue;
1461 }
1462 } else if let Some(label) = ¶ms.label {
1463 if !label_matches(cell, label, match_mode, params.case_sensitive, ®ex) {
1464 continue;
1465 }
1466 } else {
1467 continue;
1468 }
1469
1470 let neighbors = collect_neighbors(sheet, row, col);
1471 let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1472 let target_value = match direction {
1473 LabelDirection::Right => sheet.get_cell((col + 1, row)),
1474 LabelDirection::Below => sheet.get_cell((col, row + 1)),
1475 LabelDirection::Any => sheet
1476 .get_cell((col + 1, row))
1477 .or_else(|| sheet.get_cell((col, row + 1))),
1478 }
1479 .and_then(cell_to_value);
1480 if target_value.is_none() {
1481 continue;
1482 }
1483 (
1484 Some(LabelHit {
1485 label_address: coord.get_coordinate(),
1486 label: label_from_cell(cell),
1487 }),
1488 target_value,
1489 )
1490 } else {
1491 (None, value.clone())
1492 };
1493
1494 let row_context = build_row_context(sheet, row, col);
1495
1496 results.push(FindValueMatch {
1497 address: coord.get_coordinate(),
1498 sheet_name: sheet_name.to_string(),
1499 value: match_value,
1500 row_context,
1501 neighbors,
1502 label_hit,
1503 });
1504 }
1505
1506 Ok(results)
1507}
1508
1509fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1510 cell_to_value(cell)
1511 .map(|v| match v {
1512 CellValue::Text(s) => s,
1513 CellValue::Number(n) => n.to_string(),
1514 CellValue::Bool(b) => b.to_string(),
1515 CellValue::Date(d) => d,
1516 CellValue::Error(e) => e,
1517 })
1518 .unwrap_or_else(|| cell.get_value().to_string())
1519}
1520
1521fn value_matches(
1522 value: &Option<CellValue>,
1523 query: &str,
1524 mode: &str,
1525 case_sensitive: bool,
1526 regex: &Option<Regex>,
1527) -> bool {
1528 if value.is_none() {
1529 return false;
1530 }
1531 let haystack = cell_value_to_string_lower(value.clone().unwrap());
1532 let needle = if case_sensitive {
1533 query.to_string()
1534 } else {
1535 query.to_ascii_lowercase()
1536 };
1537
1538 match mode {
1539 "exact" => haystack == needle,
1540 "regex" => regex
1541 .as_ref()
1542 .map(|re| re.is_match(&haystack))
1543 .unwrap_or(false),
1544 _ => haystack.contains(&needle),
1545 }
1546}
1547
1548fn label_matches(
1549 cell: &umya_spreadsheet::Cell,
1550 label: &str,
1551 mode: &str,
1552 case_sensitive: bool,
1553 regex: &Option<Regex>,
1554) -> bool {
1555 let value = cell_to_value(cell);
1556 if value.is_none() {
1557 return false;
1558 }
1559 let haystack = cell_value_to_string_lower(value.unwrap());
1560 let needle = if case_sensitive {
1561 label.to_string()
1562 } else {
1563 label.to_ascii_lowercase()
1564 };
1565 match mode {
1566 "exact" => haystack == needle,
1567 "regex" => regex
1568 .as_ref()
1569 .map(|re| re.is_match(&haystack))
1570 .unwrap_or(false),
1571 _ => haystack.contains(&needle),
1572 }
1573}
1574
1575fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1576 if value.is_none() {
1577 return allowed.iter().any(|v| v == "null");
1578 }
1579 match value.as_ref().unwrap() {
1580 CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1581 CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1582 CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1583 CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1584 CellValue::Error(_) => true,
1585 }
1586}
1587
1588fn collect_neighbors(
1589 sheet: &umya_spreadsheet::Worksheet,
1590 row: u32,
1591 col: u32,
1592) -> Option<NeighborValues> {
1593 Some(NeighborValues {
1594 left: if col > 1 {
1595 sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1596 } else {
1597 None
1598 },
1599 right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1600 up: if row > 1 {
1601 sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1602 } else {
1603 None
1604 },
1605 down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1606 })
1607}
1608
1609fn build_row_context(
1610 sheet: &umya_spreadsheet::Worksheet,
1611 row: u32,
1612 col: u32,
1613) -> Option<RowContext> {
1614 let header_value = sheet
1615 .get_cell((col, 1u32))
1616 .and_then(cell_to_value)
1617 .map(|v| match v {
1618 CellValue::Text(s) => s,
1619 CellValue::Number(n) => n.to_string(),
1620 CellValue::Bool(b) => b.to_string(),
1621 CellValue::Date(d) => d,
1622 CellValue::Error(e) => e,
1623 })
1624 .unwrap_or_else(|| format!("Col{}", col));
1625 let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1626 Some(RowContext {
1627 headers: vec![header_value],
1628 values: vec![value],
1629 })
1630}
1631
1632fn default_find_formula_limit() -> u32 {
1633 50
1634}
1635
1636#[derive(Debug, Deserialize, JsonSchema, Default)]
1637pub struct FindFormulaParams {
1638 #[serde(alias = "workbook_id")]
1639 pub workbook_or_fork_id: WorkbookId,
1640 pub query: String,
1641 pub sheet_name: Option<String>,
1642 #[serde(default)]
1643 pub case_sensitive: bool,
1644 #[serde(default)]
1645 pub include_context: bool,
1646 #[serde(default = "default_find_formula_limit")]
1647 pub limit: u32,
1648 #[serde(default)]
1649 pub offset: u32,
1650}
1651
1652pub async fn find_formula(
1653 state: Arc<AppState>,
1654 params: FindFormulaParams,
1655) -> Result<FindFormulaResponse> {
1656 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1657 let query = if params.case_sensitive {
1658 params.query.clone()
1659 } else {
1660 params.query.to_ascii_lowercase()
1661 };
1662
1663 let sheet_names: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1664 vec![sheet.clone()]
1665 } else {
1666 workbook.sheet_names()
1667 };
1668
1669 let limit = params.limit.clamp(1, 500);
1670 let offset = params.offset;
1671
1672 let mut matches = Vec::new();
1673 let mut seen: u32 = 0;
1674 let mut truncated = false;
1675
1676 for sheet_name in sheet_names {
1677 let (sheet_matches, sheet_seen, sheet_truncated) =
1678 workbook.with_sheet(&sheet_name, |sheet| {
1679 collect_formula_matches(
1680 sheet,
1681 &sheet_name,
1682 &query,
1683 params.case_sensitive,
1684 params.include_context,
1685 offset,
1686 limit,
1687 seen,
1688 )
1689 })?;
1690
1691 seen = sheet_seen;
1692 truncated |= sheet_truncated;
1693 matches.extend(sheet_matches);
1694
1695 if truncated {
1696 break;
1697 }
1698 }
1699
1700 let response = FindFormulaResponse {
1701 workbook_id: workbook.id.clone(),
1702 workbook_short_id: workbook.short_id.clone(),
1703 matches,
1704 truncated,
1705 next_offset: truncated.then(|| offset.saturating_add(limit)),
1706 };
1707 Ok(response)
1708}
1709
1710#[derive(Debug, Deserialize, JsonSchema)]
1711pub struct ScanVolatilesParams {
1712 #[serde(alias = "workbook_id")]
1713 pub workbook_or_fork_id: WorkbookId,
1714 pub sheet_name: Option<String>,
1715}
1716
1717pub async fn scan_volatiles(
1718 state: Arc<AppState>,
1719 params: ScanVolatilesParams,
1720) -> Result<VolatileScanResponse> {
1721 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1722 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
1723 vec![sheet.clone()]
1724 } else {
1725 workbook.sheet_names()
1726 };
1727
1728 let mut items = Vec::new();
1729 let mut truncated = false;
1730
1731 for sheet_name in target_sheets {
1732 let graph = workbook.formula_graph(&sheet_name)?;
1733 for group in graph.groups() {
1734 if !group.is_volatile {
1735 continue;
1736 }
1737 for address in group.addresses.iter().take(50) {
1738 items.push(VolatileScanEntry {
1739 address: address.clone(),
1740 sheet_name: sheet_name.clone(),
1741 function: "volatile".to_string(),
1742 note: Some(group.formula.clone()),
1743 });
1744 }
1745 if group.addresses.len() > 50 {
1746 truncated = true;
1747 }
1748 }
1749 }
1750
1751 let response = VolatileScanResponse {
1752 workbook_id: workbook.id.clone(),
1753 workbook_short_id: workbook.short_id.clone(),
1754 items,
1755 truncated,
1756 };
1757 Ok(response)
1758}
1759
1760#[derive(Debug, Deserialize, JsonSchema)]
1761pub struct WorkbookStyleSummaryParams {
1762 #[serde(alias = "workbook_id")]
1763 pub workbook_or_fork_id: WorkbookId,
1764 pub max_styles: Option<u32>,
1765 pub max_conditional_formats: Option<u32>,
1766 pub max_cells_scan: Option<u32>,
1767}
1768
1769#[derive(Debug)]
1770struct WorkbookStyleAccum {
1771 descriptor: StyleDescriptor,
1772 occurrences: u32,
1773 tags: HashSet<String>,
1774 example_cells: Vec<String>,
1775}
1776
1777impl WorkbookStyleAccum {
1778 fn new(descriptor: StyleDescriptor) -> Self {
1779 Self {
1780 descriptor,
1781 occurrences: 0,
1782 tags: HashSet::new(),
1783 example_cells: Vec::new(),
1784 }
1785 }
1786}
1787
1788pub async fn workbook_style_summary(
1789 state: Arc<AppState>,
1790 params: WorkbookStyleSummaryParams,
1791) -> Result<WorkbookStyleSummaryResponse> {
1792 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1793 let sheet_names = workbook.sheet_names();
1794
1795 const STYLE_EXAMPLE_LIMIT: usize = 5;
1796 const STYLE_LIMIT_DEFAULT: usize = 200;
1797 const CF_LIMIT_DEFAULT: usize = 200;
1798 const CELL_SCAN_LIMIT_DEFAULT: usize = 500_000;
1799
1800 let style_limit = params
1801 .max_styles
1802 .map(|v| v as usize)
1803 .unwrap_or(STYLE_LIMIT_DEFAULT);
1804 let cf_limit = params
1805 .max_conditional_formats
1806 .map(|v| v as usize)
1807 .unwrap_or(CF_LIMIT_DEFAULT);
1808 let cell_scan_limit = params
1809 .max_cells_scan
1810 .map(|v| v as usize)
1811 .unwrap_or(CELL_SCAN_LIMIT_DEFAULT);
1812
1813 let mut acc: HashMap<String, WorkbookStyleAccum> = HashMap::new();
1814 let mut scanned_cells: usize = 0;
1815 let mut scan_truncated = false;
1816
1817 for sheet_name in &sheet_names {
1818 if scan_truncated {
1819 break;
1820 }
1821 workbook.with_sheet(sheet_name, |sheet| {
1822 for cell in sheet.get_cell_collection() {
1823 if scanned_cells >= cell_scan_limit {
1824 scan_truncated = true;
1825 break;
1826 }
1827 scanned_cells += 1;
1828
1829 let address = cell.get_coordinate().get_coordinate().to_string();
1830 let descriptor = crate::styles::descriptor_from_style(cell.get_style());
1831 let style_id = crate::styles::stable_style_id(&descriptor);
1832
1833 let entry = acc
1834 .entry(style_id.clone())
1835 .or_insert_with(|| WorkbookStyleAccum::new(descriptor.clone()));
1836 entry.occurrences += 1;
1837 if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
1838 entry.example_cells.push(format!("{sheet_name}!{address}"));
1839 }
1840
1841 if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
1842 for tag in tagging.tags {
1843 entry.tags.insert(tag);
1844 }
1845 }
1846 }
1847 })?;
1848 }
1849
1850 let total_styles = acc.len() as u32;
1851 let mut styles: Vec<WorkbookStyleUsage> = acc
1852 .into_iter()
1853 .map(|(style_id, entry)| {
1854 let mut tags: Vec<String> = entry.tags.into_iter().collect();
1855 tags.sort();
1856 WorkbookStyleUsage {
1857 style_id,
1858 occurrences: entry.occurrences,
1859 tags,
1860 example_cells: entry.example_cells,
1861 descriptor: Some(entry.descriptor),
1862 }
1863 })
1864 .collect();
1865
1866 styles.sort_by(|a, b| {
1867 b.occurrences
1868 .cmp(&a.occurrences)
1869 .then_with(|| a.style_id.cmp(&b.style_id))
1870 });
1871
1872 let inferred_default_style_id = styles.first().map(|s| s.style_id.clone());
1873 let mut inferred_default_font = styles
1874 .first()
1875 .and_then(|s| s.descriptor.as_ref().and_then(|d| d.font.clone()));
1876
1877 let styles_truncated = if styles.len() > style_limit {
1878 styles.truncate(style_limit);
1879 true
1880 } else {
1881 false
1882 };
1883
1884 let theme = workbook.with_spreadsheet(|book| {
1885 let theme = book.get_theme();
1886 let elements = theme.get_theme_elements();
1887 let scheme = elements.get_color_scheme();
1888 let mut colors = BTreeMap::new();
1889
1890 let mut insert_color = |name: &str, value: String| {
1891 if !value.trim().is_empty() {
1892 colors.insert(name.to_string(), value);
1893 }
1894 };
1895
1896 insert_color("dk1", scheme.get_dk1().get_val());
1897 insert_color("lt1", scheme.get_lt1().get_val());
1898 insert_color("dk2", scheme.get_dk2().get_val());
1899 insert_color("lt2", scheme.get_lt2().get_val());
1900 insert_color("accent1", scheme.get_accent1().get_val());
1901 insert_color("accent2", scheme.get_accent2().get_val());
1902 insert_color("accent3", scheme.get_accent3().get_val());
1903 insert_color("accent4", scheme.get_accent4().get_val());
1904 insert_color("accent5", scheme.get_accent5().get_val());
1905 insert_color("accent6", scheme.get_accent6().get_val());
1906 insert_color("hlink", scheme.get_hlink().get_val());
1907 insert_color("fol_hlink", scheme.get_fol_hlink().get_val());
1908
1909 let font_scheme = elements.get_font_scheme();
1910 let major = font_scheme.get_major_font();
1911 let minor = font_scheme.get_minor_font();
1912 let font_scheme_summary = ThemeFontSchemeSummary {
1913 major_latin: Some(major.get_latin_font().get_typeface().to_string())
1914 .filter(|s| !s.trim().is_empty()),
1915 major_east_asian: Some(major.get_east_asian_font().get_typeface().to_string())
1916 .filter(|s| !s.trim().is_empty()),
1917 major_complex_script: Some(major.get_complex_script_font().get_typeface().to_string())
1918 .filter(|s| !s.trim().is_empty()),
1919 minor_latin: Some(minor.get_latin_font().get_typeface().to_string())
1920 .filter(|s| !s.trim().is_empty()),
1921 minor_east_asian: Some(minor.get_east_asian_font().get_typeface().to_string())
1922 .filter(|s| !s.trim().is_empty()),
1923 minor_complex_script: Some(minor.get_complex_script_font().get_typeface().to_string())
1924 .filter(|s| !s.trim().is_empty()),
1925 };
1926
1927 ThemeSummary {
1928 name: Some(theme.get_name().to_string()).filter(|s| !s.trim().is_empty()),
1929 colors,
1930 font_scheme: font_scheme_summary,
1931 }
1932 })?;
1933
1934 if inferred_default_font.is_none()
1935 && let Some(name) = theme
1936 .font_scheme
1937 .minor_latin
1938 .clone()
1939 .or_else(|| theme.font_scheme.major_latin.clone())
1940 {
1941 inferred_default_font = Some(FontDescriptor {
1942 name: Some(name),
1943 size: None,
1944 bold: None,
1945 italic: None,
1946 underline: None,
1947 strikethrough: None,
1948 color: None,
1949 });
1950 }
1951
1952 let mut conditional_formats: Vec<ConditionalFormatSummary> = Vec::new();
1953 let mut conditional_formats_truncated = false;
1954 {
1955 use umya_spreadsheet::structs::EnumTrait;
1956 for sheet_name in &sheet_names {
1957 if conditional_formats_truncated {
1958 break;
1959 }
1960 workbook.with_sheet(sheet_name, |sheet| {
1961 for cf in sheet.get_conditional_formatting_collection() {
1962 if conditional_formats.len() >= cf_limit {
1963 conditional_formats_truncated = true;
1964 break;
1965 }
1966 let range = cf.get_sequence_of_references().get_sqref().to_string();
1967 let mut types: HashSet<String> = HashSet::new();
1968 for rule in cf.get_conditional_collection() {
1969 types.insert(rule.get_type().get_value_string().to_string());
1970 }
1971 let mut rule_types: Vec<String> = types.into_iter().collect();
1972 rule_types.sort();
1973 conditional_formats.push(ConditionalFormatSummary {
1974 sheet_name: sheet_name.clone(),
1975 range,
1976 rule_types,
1977 rule_count: cf.get_conditional_collection().len() as u32,
1978 });
1979 }
1980 })?;
1981 }
1982 }
1983
1984 let mut notes: Vec<String> = Vec::new();
1985 if scan_truncated {
1986 notes.push(format!(
1987 "Stopped scanning after {cell_scan_limit} cells; style counts may be incomplete."
1988 ));
1989 }
1990 notes.push(
1991 "Named styles are not directly exposed by umya-spreadsheet; styles here are inferred from cell formatting."
1992 .to_string(),
1993 );
1994
1995 Ok(WorkbookStyleSummaryResponse {
1996 workbook_id: workbook.id.clone(),
1997 workbook_short_id: workbook.short_id.clone(),
1998 theme: Some(theme),
1999 inferred_default_style_id,
2000 inferred_default_font,
2001 styles,
2002 total_styles,
2003 styles_truncated,
2004 conditional_formats,
2005 conditional_formats_truncated,
2006 scan_truncated,
2007 notes,
2008 })
2009}
2010
2011#[derive(Debug, Deserialize, JsonSchema)]
2012pub struct SheetStylesParams {
2013 #[serde(alias = "workbook_id")]
2014 pub workbook_or_fork_id: WorkbookId,
2015 pub sheet_name: String,
2016 #[serde(default)]
2017 pub scope: Option<SheetStylesScope>,
2018 #[serde(default)]
2019 pub granularity: Option<String>,
2020 #[serde(default)]
2021 pub max_items: Option<usize>,
2022}
2023
2024#[derive(Debug, Deserialize, JsonSchema)]
2025#[serde(tag = "kind", rename_all = "snake_case")]
2026pub enum SheetStylesScope {
2027 Range { range: String },
2028 Region { region_id: u32 },
2029}
2030
2031#[derive(Debug)]
2032struct StyleAccum {
2033 descriptor: StyleDescriptor,
2034 occurrences: u32,
2035 tags: HashSet<String>,
2036 example_cells: Vec<String>,
2037 positions: Vec<(u32, u32)>,
2038}
2039
2040impl StyleAccum {
2041 fn new(descriptor: StyleDescriptor) -> Self {
2042 Self {
2043 descriptor,
2044 occurrences: 0,
2045 tags: HashSet::new(),
2046 example_cells: Vec::new(),
2047 positions: Vec::new(),
2048 }
2049 }
2050}
2051
2052pub async fn sheet_styles(
2053 state: Arc<AppState>,
2054 params: SheetStylesParams,
2055) -> Result<SheetStylesResponse> {
2056 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2057 const STYLE_EXAMPLE_LIMIT: usize = 5;
2058 const STYLE_RANGE_LIMIT: usize = 50;
2059 const STYLE_LIMIT: usize = 200;
2060 const MAX_MAX_ITEMS: usize = 5000;
2061
2062 let metrics = workbook.get_sheet_metrics(¶ms.sheet_name)?;
2063 let full_bounds = (
2064 (1, 1),
2065 (
2066 metrics.metrics.column_count.max(1),
2067 metrics.metrics.row_count.max(1),
2068 ),
2069 );
2070
2071 let bounds = match ¶ms.scope {
2072 Some(SheetStylesScope::Range { range }) => {
2073 parse_range(range).ok_or_else(|| anyhow!("invalid range: {}", range))?
2074 }
2075 Some(SheetStylesScope::Region { region_id }) => {
2076 let region = workbook.detected_region(¶ms.sheet_name, *region_id)?;
2077 parse_range(®ion.bounds)
2078 .ok_or_else(|| anyhow!("invalid region bounds: {}", region.bounds))?
2079 }
2080 None => full_bounds,
2081 };
2082
2083 let granularity = params
2084 .granularity
2085 .as_deref()
2086 .unwrap_or("runs")
2087 .to_ascii_lowercase();
2088 if granularity != "runs" && granularity != "cells" {
2089 return Err(anyhow!(
2090 "invalid granularity: {} (expected runs|cells)",
2091 granularity
2092 ));
2093 }
2094
2095 let max_items = params
2096 .max_items
2097 .unwrap_or(STYLE_RANGE_LIMIT)
2098 .clamp(1, MAX_MAX_ITEMS);
2099
2100 let (styles, total_styles, styles_truncated) =
2101 workbook.with_sheet(¶ms.sheet_name, |sheet| {
2102 let mut acc: HashMap<String, StyleAccum> = HashMap::new();
2103
2104 for cell in sheet.get_cell_collection() {
2105 let address = cell.get_coordinate().get_coordinate().to_string();
2106 let Some((col, row)) = parse_address(&address) else {
2107 continue;
2108 };
2109 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
2110 continue;
2111 }
2112
2113 let descriptor = crate::styles::descriptor_from_style(cell.get_style());
2114 let style_id = crate::styles::stable_style_id(&descriptor);
2115
2116 let entry = acc
2117 .entry(style_id.clone())
2118 .or_insert_with(|| StyleAccum::new(descriptor.clone()));
2119 entry.occurrences += 1;
2120 if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
2121 entry.example_cells.push(address.clone());
2122 }
2123
2124 if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
2125 for tag in tagging.tags {
2126 entry.tags.insert(tag);
2127 }
2128 }
2129
2130 entry.positions.push((row, col));
2131 }
2132
2133 let mut summaries: Vec<StyleSummary> = acc
2134 .into_iter()
2135 .map(|(style_id, mut entry)| {
2136 entry.positions.sort_unstable();
2137 entry.positions.dedup();
2138
2139 let (cell_ranges, ranges_truncated) = if granularity == "cells" {
2140 let mut out = Vec::new();
2141 for (row, col) in entry.positions.iter().take(max_items) {
2142 out.push(crate::utils::cell_address(*col, *row));
2143 }
2144 (out, entry.positions.len() > max_items)
2145 } else {
2146 crate::styles::compress_positions_to_ranges(&entry.positions, max_items)
2147 };
2148
2149 StyleSummary {
2150 style_id,
2151 occurrences: entry.occurrences,
2152 tags: entry.tags.into_iter().collect(),
2153 example_cells: entry.example_cells,
2154 descriptor: Some(entry.descriptor),
2155 cell_ranges,
2156 ranges_truncated,
2157 }
2158 })
2159 .collect();
2160
2161 summaries.sort_by(|a, b| {
2162 b.occurrences
2163 .cmp(&a.occurrences)
2164 .then_with(|| a.style_id.cmp(&b.style_id))
2165 });
2166
2167 let total = summaries.len() as u32;
2168 let truncated = if summaries.len() > STYLE_LIMIT {
2169 summaries.truncate(STYLE_LIMIT);
2170 true
2171 } else {
2172 false
2173 };
2174
2175 Ok::<_, anyhow::Error>((summaries, total, truncated))
2176 })??;
2177
2178 Ok(SheetStylesResponse {
2179 workbook_id: workbook.id.clone(),
2180 workbook_short_id: workbook.short_id.clone(),
2181 sheet_name: params.sheet_name.clone(),
2182 styles,
2183 conditional_rules: Vec::new(),
2184 total_styles,
2185 styles_truncated,
2186 })
2187}
2188
2189pub async fn range_values(
2190 state: Arc<AppState>,
2191 params: RangeValuesParams,
2192) -> Result<RangeValuesResponse> {
2193 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2194 let include_headers = params.include_headers.unwrap_or(false);
2195 let values = workbook.with_sheet(¶ms.sheet_name, |sheet| {
2196 params
2197 .ranges
2198 .iter()
2199 .filter_map(|range| {
2200 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
2201 let mut rows = Vec::new();
2202 for r in start_row..=end_row {
2203 let mut row_vals = Vec::new();
2204 for c in start_col..=end_col {
2205 if include_headers && r == start_row && start_row == 1 {
2206 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
2207 } else {
2208 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
2209 }
2210 }
2211 rows.push(row_vals);
2212 }
2213 RangeValuesEntry {
2214 range: range.clone(),
2215 rows,
2216 }
2217 })
2218 })
2219 .collect()
2220 })?;
2221
2222 Ok(RangeValuesResponse {
2223 workbook_id: workbook.id.clone(),
2224 workbook_short_id: workbook.short_id.clone(),
2225 sheet_name: params.sheet_name,
2226 values,
2227 })
2228}
2229
2230pub async fn find_value(
2231 state: Arc<AppState>,
2232 params: FindValueParams,
2233) -> Result<FindValueResponse> {
2234 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2235 let mut matches = Vec::new();
2236 let mut truncated = false;
2237 let mode = params.mode.clone().unwrap_or_else(|| {
2238 if params.label.is_some() {
2239 FindMode::Label
2240 } else {
2241 FindMode::Value
2242 }
2243 });
2244 let match_mode = params
2245 .match_mode
2246 .as_deref()
2247 .unwrap_or("contains")
2248 .to_ascii_lowercase();
2249 let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
2250
2251 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
2252 vec![sheet.clone()]
2253 } else {
2254 workbook.sheet_names()
2255 };
2256
2257 for sheet_name in target_sheets {
2258 let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
2259 let default_bounds = (
2260 (1, 1),
2261 (
2262 metrics_entry.metrics.column_count.max(1),
2263 metrics_entry.metrics.row_count.max(1),
2264 ),
2265 );
2266 let region_bounds = params
2267 .region_id
2268 .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
2269 let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
2270 collect_value_matches(
2271 sheet,
2272 &sheet_name,
2273 &mode,
2274 &match_mode,
2275 &direction,
2276 ¶ms,
2277 region_bounds.as_ref(),
2278 default_bounds,
2279 )
2280 })??;
2281 matches.extend(sheet_matches);
2282 if matches.len() as u32 >= params.limit {
2283 truncated = true;
2284 matches.truncate(params.limit as usize);
2285 break;
2286 }
2287 }
2288
2289 Ok(FindValueResponse {
2290 workbook_id: workbook.id.clone(),
2291 workbook_short_id: workbook.short_id.clone(),
2292 matches,
2293 truncated,
2294 })
2295}
2296
2297pub async fn read_table(
2298 state: Arc<AppState>,
2299 params: ReadTableParams,
2300) -> Result<ReadTableResponse> {
2301 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2302 let resolved = resolve_table_target(&workbook, ¶ms)?;
2303 let limit = params.limit.unwrap_or(100) as usize;
2304 let offset = params.offset.unwrap_or(0) as usize;
2305 let sample_mode = params
2306 .sample_mode
2307 .clone()
2308 .unwrap_or_else(|| "first".to_string());
2309
2310 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
2311 extract_table_rows(
2312 sheet,
2313 &resolved,
2314 params.header_row,
2315 params.header_rows,
2316 params.columns.clone(),
2317 params.filters.clone(),
2318 limit,
2319 offset,
2320 &sample_mode,
2321 )
2322 })??;
2323
2324 let has_more = offset + rows.len() < total_rows as usize;
2325
2326 Ok(ReadTableResponse {
2327 workbook_id: workbook.id.clone(),
2328 workbook_short_id: workbook.short_id.clone(),
2329 sheet_name: resolved.sheet_name,
2330 table_name: resolved.table_name,
2331 headers,
2332 rows,
2333 total_rows,
2334 has_more,
2335 })
2336}
2337
2338pub async fn table_profile(
2339 state: Arc<AppState>,
2340 params: TableProfileParams,
2341) -> Result<TableProfileResponse> {
2342 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2343 let resolved = resolve_table_target(
2344 &workbook,
2345 &ReadTableParams {
2346 workbook_or_fork_id: params.workbook_or_fork_id.clone(),
2347 sheet_name: params.sheet_name.clone(),
2348 table_name: params.table_name.clone(),
2349 region_id: params.region_id,
2350 range: None,
2351 header_row: None,
2352 header_rows: None,
2353 columns: None,
2354 filters: None,
2355 sample_mode: params.sample_mode.clone(),
2356 limit: params.sample_size,
2357 offset: Some(0),
2358 },
2359 )?;
2360
2361 let sample_size = params.sample_size.unwrap_or(10) as usize;
2362 let sample_mode = params
2363 .sample_mode
2364 .clone()
2365 .unwrap_or_else(|| "distributed".to_string());
2366
2367 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
2368 extract_table_rows(
2369 sheet,
2370 &resolved,
2371 None,
2372 None,
2373 None,
2374 None,
2375 sample_size,
2376 0,
2377 &sample_mode,
2378 )
2379 })??;
2380
2381 let column_types = summarize_columns(&headers, &rows);
2382
2383 Ok(TableProfileResponse {
2384 workbook_id: workbook.id.clone(),
2385 workbook_short_id: workbook.short_id.clone(),
2386 sheet_name: resolved.sheet_name,
2387 table_name: resolved.table_name,
2388 headers,
2389 column_types,
2390 row_count: total_rows,
2391 samples: rows,
2392 notes: Vec::new(),
2393 })
2394}
2395
2396#[derive(Debug, Deserialize, JsonSchema)]
2397pub struct ManifestStubParams {
2398 #[serde(alias = "workbook_id")]
2399 pub workbook_or_fork_id: WorkbookId,
2400 pub sheet_filter: Option<String>,
2401}
2402
2403pub async fn get_manifest_stub(
2404 state: Arc<AppState>,
2405 params: ManifestStubParams,
2406) -> Result<ManifestStubResponse> {
2407 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2408 let mut summaries = workbook.list_summaries()?;
2409
2410 if let Some(filter) = ¶ms.sheet_filter {
2411 summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
2412 }
2413
2414 let sheets = summaries
2415 .into_iter()
2416 .map(|summary| ManifestSheetStub {
2417 sheet_name: summary.name.clone(),
2418 classification: summary.classification.clone(),
2419 candidate_expectations: vec![format!(
2420 "Review {} sheet for expectation candidates",
2421 format!("{:?}", summary.classification).to_ascii_lowercase()
2422 )],
2423 notes: summary.style_tags,
2424 })
2425 .collect();
2426
2427 let response = ManifestStubResponse {
2428 workbook_id: workbook.id.clone(),
2429 workbook_short_id: workbook.short_id.clone(),
2430 slug: workbook.slug.clone(),
2431 sheets,
2432 };
2433 Ok(response)
2434}
2435
2436#[derive(Debug, Deserialize, JsonSchema)]
2437pub struct CloseWorkbookParams {
2438 #[serde(alias = "workbook_id")]
2439 pub workbook_or_fork_id: WorkbookId,
2440}
2441
2442pub async fn close_workbook(
2443 state: Arc<AppState>,
2444 params: CloseWorkbookParams,
2445) -> Result<CloseWorkbookResponse> {
2446 state.close_workbook(¶ms.workbook_or_fork_id)?;
2447 Ok(CloseWorkbookResponse {
2448 workbook_id: params.workbook_or_fork_id.clone(),
2449 message: format!("workbook {} evicted", params.workbook_or_fork_id.as_str()),
2450 })
2451}
2452#[allow(clippy::too_many_arguments)]
2453fn collect_formula_matches(
2454 sheet: &umya_spreadsheet::Worksheet,
2455 sheet_name: &str,
2456 query: &str,
2457 case_sensitive: bool,
2458 include_context: bool,
2459 offset: u32,
2460 limit: u32,
2461 seen_so_far: u32,
2462) -> (Vec<FindFormulaMatch>, u32, bool) {
2463 use crate::workbook::cell_to_value;
2464
2465 let mut results = Vec::new();
2466 let mut seen = seen_so_far;
2467
2468 for cell in sheet.get_cell_collection() {
2469 if !cell.is_formula() {
2470 continue;
2471 }
2472 let formula = cell.get_formula();
2473 let haystack = if case_sensitive {
2474 formula.to_string()
2475 } else {
2476 formula.to_ascii_lowercase()
2477 };
2478 if !haystack.contains(query) {
2479 continue;
2480 }
2481
2482 if seen < offset {
2483 seen += 1;
2484 continue;
2485 }
2486
2487 if results.len() as u32 >= limit {
2488 return (results, seen, true);
2489 }
2490
2491 let coord = cell.get_coordinate();
2492 let column = *coord.get_col_num();
2493 let row = *coord.get_row_num();
2494
2495 let context = if include_context {
2496 let columns = vec![column];
2497 let context_row = build_row_snapshot(sheet, row, &columns, true, false);
2498 let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
2499 vec![header_row, context_row]
2500 } else {
2501 Vec::new()
2502 };
2503
2504 results.push(FindFormulaMatch {
2505 address: coord.get_coordinate(),
2506 sheet_name: sheet_name.to_string(),
2507 formula: formula.to_string(),
2508 cached_value: cell_to_value(cell),
2509 context,
2510 });
2511
2512 seen += 1;
2513 }
2514
2515 (results, seen, false)
2516}
2517
2518#[derive(Clone)]
2519struct TraceFormulaInfo {
2520 fingerprint: String,
2521 formula: String,
2522}
2523
2524#[derive(Clone)]
2525struct TraceEdgeRaw {
2526 from: String,
2527 to: String,
2528 neighbor: String,
2529}
2530
2531#[derive(Clone)]
2532struct LayerLinks {
2533 depth: u32,
2534 edges: Vec<TraceEdgeRaw>,
2535 truncated_cells: Vec<String>,
2536}
2537
2538#[derive(Clone)]
2539struct NeighborDetail {
2540 address: String,
2541 column: Option<u32>,
2542 row: Option<u32>,
2543 kind: TraceCellKind,
2544 value: Option<CellValue>,
2545 formula: Option<String>,
2546 fingerprint: Option<String>,
2547 external: bool,
2548}
2549
2550fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2551 let mut map = HashMap::new();
2552 for group in graph.groups() {
2553 for address in group.addresses.clone() {
2554 map.insert(
2555 address.to_ascii_uppercase(),
2556 TraceFormulaInfo {
2557 fingerprint: group.fingerprint.clone(),
2558 formula: group.formula.clone(),
2559 },
2560 );
2561 }
2562 }
2563 map
2564}
2565
2566struct TraceConfig<'a> {
2567 direction: &'a TraceDirection,
2568 origin: &'a str,
2569 sheet_name: &'a str,
2570 depth_limit: u32,
2571 page_size: usize,
2572}
2573
2574fn build_trace_layers(
2575 workbook: &WorkbookContext,
2576 graph: &FormulaGraph,
2577 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2578 config: &TraceConfig<'_>,
2579 cursor: Option<TraceCursor>,
2580) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2581 let layer_links =
2582 collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2583 let mut layers = Vec::new();
2584 let mut next_cursor = None;
2585 let mut notes = Vec::new();
2586 let focus_depth = cursor.as_ref().map(|c| c.depth);
2587
2588 for layer in layer_links {
2589 let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2590 let offset = cursor
2591 .as_ref()
2592 .filter(|c| c.depth == layer.depth)
2593 .map(|c| c.offset)
2594 .unwrap_or(0);
2595
2596 let mut node_set: HashSet<String> = HashSet::new();
2597 for edge in &layer.edges {
2598 node_set.insert(edge.neighbor.clone());
2599 }
2600 let mut nodes: Vec<String> = node_set.into_iter().collect();
2601 nodes.sort_by(|a, b| compare_addresses(a, b));
2602
2603 let details = workbook.with_sheet(config.sheet_name, |sheet| {
2604 collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2605 })?;
2606 let total_nodes = details.len();
2607 let start = offset.min(total_nodes);
2608 let end = if produce_edges {
2609 (start + config.page_size).min(total_nodes)
2610 } else {
2611 start
2612 };
2613 let selected_slice = if produce_edges {
2614 &details[start..end]
2615 } else {
2616 &details[0..0]
2617 };
2618 let selected_addresses: HashSet<String> = selected_slice
2619 .iter()
2620 .map(|detail| detail.address.clone())
2621 .collect();
2622
2623 let summary = build_layer_summary(&details);
2624 let range_highlights = build_range_highlights(&details);
2625 let group_highlights = build_formula_group_highlights(&details);
2626 let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2627
2628 let highlights = TraceLayerHighlights {
2629 top_ranges: range_highlights.clone(),
2630 top_formula_groups: group_highlights.clone(),
2631 notable_cells,
2632 };
2633
2634 let edges = if produce_edges {
2635 build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2636 } else {
2637 Vec::new()
2638 };
2639
2640 let has_more = produce_edges && end < total_nodes;
2641 if has_more && next_cursor.is_none() {
2642 next_cursor = Some(TraceCursor {
2643 depth: layer.depth,
2644 offset: end,
2645 });
2646 }
2647 if has_more {
2648 notes.push(format!(
2649 "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2650 layer.depth, end, total_nodes, layer.depth, end
2651 ));
2652 }
2653
2654 if !layer.truncated_cells.is_empty() {
2655 let cell_list = if layer.truncated_cells.len() <= 3 {
2656 layer.truncated_cells.join(", ")
2657 } else {
2658 format!(
2659 "{}, ... ({} more)",
2660 layer.truncated_cells[..3].join(", "),
2661 layer.truncated_cells.len() - 3
2662 )
2663 };
2664 notes.push(format!(
2665 "Layer {}: dependents truncated at {} per cell for: {}",
2666 layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
2667 ));
2668 }
2669
2670 layers.push(TraceLayer {
2671 depth: layer.depth,
2672 summary,
2673 highlights,
2674 edges,
2675 has_more,
2676 });
2677 }
2678
2679 Ok((layers, next_cursor, notes))
2680}
2681
2682fn collect_layer_links(
2683 graph: &FormulaGraph,
2684 direction: &TraceDirection,
2685 origin: &str,
2686 depth_limit: u32,
2687) -> Vec<LayerLinks> {
2688 let mut visited: HashSet<String> = HashSet::new();
2689 visited.insert(origin.to_string());
2690 let mut frontier = vec![origin.to_string()];
2691 let mut layers = Vec::new();
2692
2693 for depth in 1..=depth_limit {
2694 let mut next_frontier_set: HashSet<String> = HashSet::new();
2695 let mut edges = Vec::new();
2696 let mut truncated_cells = Vec::new();
2697
2698 for cell in &frontier {
2699 let (neighbors, was_truncated) = match direction {
2700 TraceDirection::Precedents => (graph.precedents(cell), false),
2701 TraceDirection::Dependents => {
2702 graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
2703 }
2704 };
2705
2706 if was_truncated {
2707 truncated_cells.push(cell.clone());
2708 }
2709
2710 for neighbor in neighbors {
2711 let neighbor_upper = neighbor.to_ascii_uppercase();
2712 let edge = match direction {
2713 TraceDirection::Precedents => TraceEdgeRaw {
2714 from: cell.clone(),
2715 to: neighbor_upper.clone(),
2716 neighbor: neighbor_upper.clone(),
2717 },
2718 TraceDirection::Dependents => TraceEdgeRaw {
2719 from: neighbor_upper.clone(),
2720 to: cell.clone(),
2721 neighbor: neighbor_upper.clone(),
2722 },
2723 };
2724 edges.push(edge);
2725 if visited.insert(neighbor_upper.clone()) {
2726 next_frontier_set.insert(neighbor_upper);
2727 }
2728 }
2729 }
2730
2731 if edges.is_empty() {
2732 break;
2733 }
2734
2735 layers.push(LayerLinks {
2736 depth,
2737 edges,
2738 truncated_cells,
2739 });
2740 if next_frontier_set.is_empty() {
2741 break;
2742 }
2743 let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2744 next_frontier.sort();
2745 frontier = next_frontier;
2746 }
2747
2748 layers
2749}
2750
2751fn collect_neighbor_details(
2752 sheet: &umya_spreadsheet::Worksheet,
2753 current_sheet: &str,
2754 addresses: &[String],
2755 formula_lookup: &HashMap<String, TraceFormulaInfo>,
2756) -> Vec<NeighborDetail> {
2757 let mut details = Vec::new();
2758 for address in addresses {
2759 let (sheet_part, cell_part) = split_sheet_and_cell(address);
2760 let normalized_sheet = sheet_part
2761 .as_ref()
2762 .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2763 let is_external = normalized_sheet
2764 .as_ref()
2765 .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2766 .unwrap_or(false);
2767
2768 let Some(cell_ref) = cell_part else {
2769 details.push(NeighborDetail {
2770 address: address.clone(),
2771 column: None,
2772 row: None,
2773 kind: TraceCellKind::External,
2774 value: None,
2775 formula: None,
2776 fingerprint: None,
2777 external: true,
2778 });
2779 continue;
2780 };
2781
2782 let cell_ref_upper = cell_ref.to_ascii_uppercase();
2783
2784 if is_external {
2785 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2786 details.push(NeighborDetail {
2787 address: address.clone(),
2788 column: None,
2789 row: None,
2790 kind: TraceCellKind::External,
2791 value: None,
2792 formula: formula_info.map(|info| info.formula.clone()),
2793 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2794 external: true,
2795 });
2796 continue;
2797 }
2798
2799 let Some((col, row)) = parse_address(&cell_ref_upper) else {
2800 details.push(NeighborDetail {
2801 address: address.clone(),
2802 column: None,
2803 row: None,
2804 kind: TraceCellKind::External,
2805 value: None,
2806 formula: None,
2807 fingerprint: None,
2808 external: true,
2809 });
2810 continue;
2811 };
2812
2813 let cell_opt = sheet.get_cell((&col, &row));
2814 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2815 if let Some(cell) = cell_opt {
2816 let value = cell_to_value(cell);
2817 let kind = if cell.is_formula() {
2818 TraceCellKind::Formula
2819 } else if value.is_some() {
2820 TraceCellKind::Literal
2821 } else {
2822 TraceCellKind::Blank
2823 };
2824 details.push(NeighborDetail {
2825 address: address.clone(),
2826 column: Some(col),
2827 row: Some(row),
2828 kind,
2829 value,
2830 formula: formula_info.map(|info| info.formula.clone()),
2831 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2832 external: false,
2833 });
2834 } else {
2835 details.push(NeighborDetail {
2836 address: address.clone(),
2837 column: Some(col),
2838 row: Some(row),
2839 kind: TraceCellKind::Blank,
2840 value: None,
2841 formula: formula_info.map(|info| info.formula.clone()),
2842 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2843 external: false,
2844 });
2845 }
2846 }
2847 details
2848}
2849
2850fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2851 let mut summary = TraceLayerSummary {
2852 total_nodes: details.len(),
2853 formula_nodes: 0,
2854 value_nodes: 0,
2855 blank_nodes: 0,
2856 external_nodes: 0,
2857 unique_formula_groups: 0,
2858 };
2859
2860 let mut fingerprints: HashSet<String> = HashSet::new();
2861
2862 for detail in details {
2863 match detail.kind {
2864 TraceCellKind::Formula => {
2865 summary.formula_nodes += 1;
2866 if let Some(fp) = &detail.fingerprint {
2867 fingerprints.insert(fp.clone());
2868 }
2869 }
2870 TraceCellKind::Literal => summary.value_nodes += 1,
2871 TraceCellKind::Blank => summary.blank_nodes += 1,
2872 TraceCellKind::External => summary.external_nodes += 1,
2873 }
2874 }
2875
2876 summary.unique_formula_groups = fingerprints.len();
2877 summary
2878}
2879
2880fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2881 let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2882 for detail in details {
2883 if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2884 let entry = aggregates
2885 .entry(fingerprint.clone())
2886 .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2887 entry.1 += 1;
2888 if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2889 entry.2.push(detail.address.clone());
2890 }
2891 }
2892 }
2893
2894 let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2895 .into_iter()
2896 .map(
2897 |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2898 fingerprint,
2899 formula,
2900 count,
2901 sample_addresses,
2902 },
2903 )
2904 .collect();
2905
2906 highlights.sort_by(|a, b| b.count.cmp(&a.count));
2907 highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2908 highlights
2909}
2910
2911fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2912 let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2913 for detail in details {
2914 if let (Some(col), Some(_row)) = (detail.column, detail.row)
2915 && !detail.external
2916 {
2917 by_column.entry(col).or_default().push(detail);
2918 }
2919 }
2920
2921 for column_entries in by_column.values_mut() {
2922 column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2923 }
2924
2925 let mut ranges = Vec::new();
2926 for entries in by_column.values() {
2927 let mut current: Vec<&NeighborDetail> = Vec::new();
2928 for detail in entries {
2929 if current.is_empty() {
2930 current.push(detail);
2931 continue;
2932 }
2933 let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2934 if detail.row.unwrap_or(0) == prev_row + 1 {
2935 current.push(detail);
2936 } else {
2937 if current.len() >= TRACE_RANGE_THRESHOLD {
2938 ranges.push(make_range_highlight(¤t));
2939 }
2940 current.clear();
2941 current.push(detail);
2942 }
2943 }
2944 if current.len() >= TRACE_RANGE_THRESHOLD {
2945 ranges.push(make_range_highlight(¤t));
2946 }
2947 }
2948
2949 ranges.sort_by(|a, b| b.count.cmp(&a.count));
2950 ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2951 ranges
2952}
2953
2954fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2955 let mut literals = 0usize;
2956 let mut formulas = 0usize;
2957 let mut blanks = 0usize;
2958 let mut sample_values = Vec::new();
2959 let mut sample_formulas = Vec::new();
2960 let mut sample_addresses = Vec::new();
2961
2962 for detail in details {
2963 match detail.kind {
2964 TraceCellKind::Formula => {
2965 formulas += 1;
2966 if let Some(formula) = &detail.formula
2967 && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2968 && !sample_formulas.contains(formula)
2969 {
2970 sample_formulas.push(formula.clone());
2971 }
2972 }
2973 TraceCellKind::Literal => {
2974 literals += 1;
2975 if let Some(value) = &detail.value
2976 && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2977 {
2978 sample_values.push(value.clone());
2979 }
2980 }
2981 TraceCellKind::Blank => blanks += 1,
2982 TraceCellKind::External => {}
2983 }
2984 if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2985 sample_addresses.push(detail.address.clone());
2986 }
2987 }
2988
2989 TraceRangeHighlight {
2990 start: details
2991 .first()
2992 .map(|d| d.address.clone())
2993 .unwrap_or_default(),
2994 end: details
2995 .last()
2996 .map(|d| d.address.clone())
2997 .unwrap_or_default(),
2998 count: details.len(),
2999 literals,
3000 formulas,
3001 blanks,
3002 sample_values,
3003 sample_formulas,
3004 sample_addresses,
3005 }
3006}
3007
3008fn build_notable_cells(
3009 details: &[NeighborDetail],
3010 ranges: &[TraceRangeHighlight],
3011 groups: &[TraceFormulaGroupHighlight],
3012) -> Vec<TraceCellHighlight> {
3013 let mut exclude: HashSet<String> = HashSet::new();
3014 for range in ranges {
3015 exclude.insert(range.start.clone());
3016 exclude.insert(range.end.clone());
3017 for addr in &range.sample_addresses {
3018 exclude.insert(addr.clone());
3019 }
3020 }
3021 for group in groups {
3022 for addr in &group.sample_addresses {
3023 exclude.insert(addr.clone());
3024 }
3025 }
3026
3027 let mut highlights = Vec::new();
3028 let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
3029
3030 for detail in details {
3031 if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
3032 break;
3033 }
3034 if exclude.contains(&detail.address) {
3035 continue;
3036 }
3037 let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
3038 if *counter >= 2 && detail.kind != TraceCellKind::External {
3039 continue;
3040 }
3041 highlights.push(TraceCellHighlight {
3042 address: detail.address.clone(),
3043 kind: detail.kind.clone(),
3044 value: detail.value.clone(),
3045 formula: detail.formula.clone(),
3046 });
3047 *counter += 1;
3048 }
3049
3050 highlights
3051}
3052
3053fn build_edges_for_layer(
3054 raw_edges: &[TraceEdgeRaw],
3055 selected: &HashSet<String>,
3056 formula_lookup: &HashMap<String, TraceFormulaInfo>,
3057) -> Vec<FormulaTraceEdge> {
3058 let mut edges = Vec::new();
3059 for edge in raw_edges {
3060 if selected.contains(&edge.neighbor) {
3061 let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
3062 .map(|info| info.formula.clone());
3063 edges.push(FormulaTraceEdge {
3064 from: edge.from.clone(),
3065 to: edge.to.clone(),
3066 formula,
3067 note: None,
3068 });
3069 }
3070 }
3071 edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
3072 edges
3073}
3074
3075fn lookup_formula_info<'a>(
3076 lookup: &'a HashMap<String, TraceFormulaInfo>,
3077 cell_ref: &str,
3078 original: &str,
3079) -> Option<&'a TraceFormulaInfo> {
3080 if let Some(info) = lookup.get(cell_ref) {
3081 return Some(info);
3082 }
3083 if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
3084 let upper = cell.to_ascii_uppercase();
3085 return lookup.get(&upper);
3086 }
3087 None
3088}
3089
3090fn compare_addresses(left: &str, right: &str) -> Ordering {
3091 let (sheet_left, cell_left) = split_sheet_and_cell(left);
3092 let (sheet_right, cell_right) = split_sheet_and_cell(right);
3093
3094 let sheet_left_key = sheet_left
3095 .as_ref()
3096 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
3097 .unwrap_or_default();
3098 let sheet_right_key = sheet_right
3099 .as_ref()
3100 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
3101 .unwrap_or_default();
3102
3103 match sheet_left_key.cmp(&sheet_right_key) {
3104 Ordering::Equal => {
3105 let left_core = cell_left.unwrap_or_else(|| left.to_string());
3106 let right_core = cell_right.unwrap_or_else(|| right.to_string());
3107 let left_coords = parse_address(&left_core.to_ascii_uppercase());
3108 let right_coords = parse_address(&right_core.to_ascii_uppercase());
3109 match (left_coords, right_coords) {
3110 (Some((lc, lr)), Some((rc, rr))) => lc
3111 .cmp(&rc)
3112 .then_with(|| lr.cmp(&rr))
3113 .then_with(|| left_core.cmp(&right_core)),
3114 _ => left_core.cmp(&right_core),
3115 }
3116 }
3117 other => other,
3118 }
3119}
3120
3121fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
3122 if let Some(idx) = address.rfind('!') {
3123 let sheet = address[..idx].to_string();
3124 let cell = address[idx + 1..].to_string();
3125 (Some(sheet), Some(cell))
3126 } else {
3127 (None, Some(address.to_string()))
3128 }
3129}
3130
3131fn clean_sheet_name(sheet: &str) -> String {
3132 let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
3133 let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
3134 after_bracket
3135 .trim_matches(|c| c == '\'' || c == '"')
3136 .to_string()
3137}