1use serde_json::{json, Value};
2use std::collections::HashMap;
3
4use crate::cli::args::InspectCommands;
5use crate::cli::common::{file_format, format_range, sheet_by_index};
6use crate::cli::envelope;
7use crate::cli::error::AppError;
8use crate::cli::sheet_query::{
9 cell_at, load_target_sheet, resolve_bounds, resolve_header_row, resolve_optional_header_row,
10};
11use crate::excel::{open_workbook, Cell, CellType, Sheet};
12use crate::utils::index_to_col_name;
13
14pub fn handle(cmd: InspectCommands) -> Result<Value, AppError> {
15 match cmd {
16 InspectCommands::Workbook { file, format: _ } => inspect_workbook(file),
17 InspectCommands::Sheet {
18 file,
19 sheet,
20 sheet_index,
21 format: _,
22 } => inspect_sheet(file, sheet, sheet_index),
23 InspectCommands::Sample {
24 file,
25 sheet,
26 sheet_index,
27 range,
28 rows,
29 header_row,
30 format: _,
31 } => inspect_sample(file, sheet, sheet_index, range, rows, header_row),
32 InspectCommands::Columns {
33 file,
34 sheet,
35 header_row,
36 format: _,
37 } => inspect_columns(file, sheet, header_row),
38 InspectCommands::Tables {
39 file,
40 sheet,
41 format: _,
42 } => inspect_tables(file, sheet),
43 }
44}
45
46fn inspect_workbook(file: std::path::PathBuf) -> Result<Value, AppError> {
47 let format_str = file_format(&file);
48 let path_str = file.to_string_lossy().to_string();
49
50 let workbook = open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
51
52 let sheets: Vec<Value> = workbook
53 .get_sheet_names()
54 .iter()
55 .enumerate()
56 .map(|(index, name)| {
57 let is_empty = if let Some(sheet) = workbook.get_sheet_by_index(index) {
58 sheet.max_rows == 0 || sheet.max_cols == 0
59 } else {
60 true
61 };
62 json!({
63 "name": name,
64 "index": index,
65 "is_empty": is_empty,
66 "is_hidden_if_available": false,
67 })
68 })
69 .collect();
70
71 let data = json!({
72 "sheet_count": sheets.len(),
73 "sheets": sheets,
74 });
75
76 Ok(envelope::success_envelope(
77 "inspect.workbook",
78 &path_str,
79 &format_str,
80 envelope::target_workbook(),
81 json!({}),
82 data,
83 vec![],
84 ))
85}
86
87fn inspect_sheet(
88 file: std::path::PathBuf,
89 sheet: Option<String>,
90 sheet_index: Option<usize>,
91) -> Result<Value, AppError> {
92 let format_str = file_format(&file);
93 let path_str = file.to_string_lossy().to_string();
94
95 let mut workbook =
96 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
97
98 let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
99
100 workbook
101 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
102 .map_err(crate::cli::error::anyhow_to_app_error)?;
103
104 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
105
106 let used_range = workbook
107 .get_used_range(resolved_sheet.index)
108 .map_err(crate::cli::error::anyhow_to_app_error)?;
109
110 let non_empty_rows = workbook
111 .count_non_empty_rows(resolved_sheet.index)
112 .map_err(crate::cli::error::anyhow_to_app_error)?;
113 let non_empty_cols = workbook
114 .count_non_empty_cols(resolved_sheet.index)
115 .map_err(crate::cli::error::anyhow_to_app_error)?;
116
117 let (header_candidates, recommended_header_row) = workbook
118 .find_header_candidates(resolved_sheet.index)
119 .map_err(crate::cli::error::anyhow_to_app_error)?;
120
121 let data = json!({
122 "name": sheet_obj.name,
123 "index": resolved_sheet.index,
124 "used_range": used_range,
125 "max_rows": sheet_obj.max_rows,
126 "max_cols": sheet_obj.max_cols,
127 "non_empty_rows": non_empty_rows,
128 "non_empty_cols": non_empty_cols,
129 "recommended_header_row": recommended_header_row,
130 "header_candidates": header_candidates,
131 });
132
133 Ok(envelope::success_envelope(
134 "inspect.sheet",
135 &path_str,
136 &format_str,
137 envelope::target_sheet(&resolved_sheet.name, resolved_sheet.index),
138 json!({}),
139 data,
140 vec![],
141 ))
142}
143
144fn inspect_sample(
145 file: std::path::PathBuf,
146 sheet: Option<String>,
147 sheet_index: Option<usize>,
148 range: Option<String>,
149 rows: Option<usize>,
150 header_row: String,
151) -> Result<Value, AppError> {
152 let format_str = file_format(&file);
153 let path_str = file.to_string_lossy().to_string();
154
155 let mut workbook =
156 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
157
158 let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
159
160 workbook
161 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
162 .map_err(crate::cli::error::anyhow_to_app_error)?;
163
164 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
165 let bounds = resolve_bounds(&workbook, sheet_obj, resolved_sheet.index, range.as_deref())?;
166
167 let row_limit = rows.unwrap_or(10);
169 let sample_end_row = (bounds.start_row + row_limit.saturating_sub(1)).min(bounds.end_row);
170
171 let resolved_header =
172 resolve_optional_header_row(&workbook, sheet_obj, resolved_sheet.index, &header_row)?;
173
174 let sample_mode = if resolved_header.is_some() {
175 "records"
176 } else {
177 "rows"
178 };
179
180 let range_str = format_range(
181 bounds.start_row,
182 bounds.start_col,
183 sample_end_row,
184 bounds.end_col,
185 );
186
187 let data = if let Some(header_row_idx) = resolved_header {
188 let mut headers = Vec::new();
190 if header_row_idx < sheet_obj.data.len() {
191 for col in bounds.start_col..=bounds.end_col {
192 let val = if col < sheet_obj.data[header_row_idx].len() {
193 sheet_obj.data[header_row_idx][col].value.clone()
194 } else {
195 String::new()
196 };
197 headers.push(val);
198 }
199 }
200
201 let mut records = Vec::new();
202 for row in bounds.start_row..=sample_end_row {
203 if row == header_row_idx {
204 continue;
205 }
206 if row >= sheet_obj.data.len() {
207 break;
208 }
209 let mut record = serde_json::Map::new();
210 for (col_idx, col) in (bounds.start_col..=bounds.end_col).enumerate() {
211 let key = headers.get(col_idx).cloned().unwrap_or_default();
212 let key = if key.is_empty() {
213 format!("col_{}", col_idx + 1)
214 } else {
215 key
216 };
217 let value = if col < sheet_obj.data[row].len() {
218 crate::json_export::process_cell_value(&sheet_obj.data[row][col])
219 } else {
220 Value::Null
221 };
222 record.insert(key, value);
223 }
224 records.push(Value::Object(record));
225 }
226
227 json!({
228 "resolved_header_row": header_row_idx,
229 "sample_mode": sample_mode,
230 "records": records,
231 })
232 } else {
233 let mut row_values = Vec::new();
235 for row in bounds.start_row..=sample_end_row {
236 if row >= sheet_obj.data.len() {
237 break;
238 }
239 let mut cols = Vec::new();
240 for col in bounds.start_col..=bounds.end_col {
241 let value = if col < sheet_obj.data[row].len() {
242 crate::json_export::process_cell_value(&sheet_obj.data[row][col])
243 } else {
244 Value::Null
245 };
246 cols.push(value);
247 }
248 row_values.push(Value::Array(cols));
249 }
250
251 json!({
252 "resolved_header_row": Value::Null,
253 "sample_mode": sample_mode,
254 "rows": row_values,
255 })
256 };
257
258 Ok(envelope::success_envelope(
259 "inspect.sample",
260 &path_str,
261 &format_str,
262 envelope::target_range(&resolved_sheet.name, resolved_sheet.index, &range_str),
263 json!({}),
264 data,
265 vec![],
266 ))
267}
268
269fn inspect_columns(
270 file: std::path::PathBuf,
271 sheet: String,
272 header_row: String,
273) -> Result<Value, AppError> {
274 let format_str = file_format(&file);
275 let path_str = file.to_string_lossy().to_string();
276
277 let mut workbook =
278 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
279
280 let resolved_sheet = load_target_sheet(&workbook, &Some(sheet), &None)?;
281
282 workbook
283 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
284 .map_err(crate::cli::error::anyhow_to_app_error)?;
285
286 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
287 let resolved_header =
288 resolve_header_row(&workbook, sheet_obj, resolved_sheet.index, &header_row)?;
289 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
290
291 let header_names = column_header_names(sheet_obj, resolved_header);
292 let duplicate_flags = duplicate_header_flags(&header_names);
293 let safe_names = stable_safe_names(&header_names);
294 let data_start_row = resolved_header.map_or(1, |row| row.saturating_add(1));
295 let data_row_count = if sheet_obj.max_rows >= data_start_row {
296 sheet_obj.max_rows - data_start_row + 1
297 } else {
298 0
299 };
300
301 let columns: Vec<Value> = (1..=sheet_obj.max_cols)
302 .map(|col| {
303 let stats = analyze_column(sheet_obj, col, data_start_row, data_row_count);
304 json!({
305 "index": col,
306 "name": header_names.get(col - 1).cloned().unwrap_or_default(),
307 "safe_name": safe_names.get(col - 1).cloned().unwrap_or_else(|| {
308 format!("col_{}", index_to_col_name(col))
309 }),
310 "is_duplicate": duplicate_flags.get(col - 1).copied().unwrap_or(false),
311 "inferred_type": stats.inferred_type,
312 "non_null_ratio": ratio(stats.non_null_count, data_row_count),
313 "formula_ratio": ratio(stats.formula_count, data_row_count),
314 "sample_values": stats.sample_values,
315 })
316 })
317 .collect();
318
319 let mut warnings = Vec::new();
320 if header_row == "auto" && resolved_header.is_none() {
321 warnings.push(json!({
322 "code": "header_not_detected",
323 "message": "No header row was detected; column names are synthetic.",
324 }));
325 }
326
327 Ok(envelope::success_envelope(
328 "inspect.columns",
329 &path_str,
330 &format_str,
331 envelope::target_sheet(&resolved_sheet.name, resolved_sheet.index),
332 json!({
333 "header_row_mode": header_row,
334 "resolved_header_row": resolved_header,
335 "column_count": sheet_obj.max_cols,
336 "data_row_count": data_row_count,
337 }),
338 json!({
339 "columns": columns,
340 }),
341 warnings,
342 ))
343}
344
345fn inspect_tables(file: std::path::PathBuf, sheet: String) -> Result<Value, AppError> {
346 let format_str = file_format(&file);
347 let path_str = file.to_string_lossy().to_string();
348
349 let mut workbook =
350 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
351
352 let resolved_sheet = load_target_sheet(&workbook, &Some(sheet), &None)?;
353
354 workbook
355 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
356 .map_err(crate::cli::error::anyhow_to_app_error)?;
357
358 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
359
360 let candidates = detect_table_candidates(sheet_obj);
361 let candidate_count = candidates.len();
362
363 Ok(envelope::success_envelope(
364 "inspect.tables",
365 &path_str,
366 &format_str,
367 envelope::target_sheet(&resolved_sheet.name, resolved_sheet.index),
368 json!({
369 "candidate_count": candidate_count,
370 }),
371 json!({
372 "candidates": candidates,
373 }),
374 vec![],
375 ))
376}
377
378fn column_header_names(sheet: &Sheet, resolved_header: Option<usize>) -> Vec<String> {
379 (1..=sheet.max_cols)
380 .map(|col| {
381 resolved_header
382 .and_then(|row| cell_at(sheet, row, col))
383 .map(|cell| cell.value.clone())
384 .unwrap_or_default()
385 })
386 .collect()
387}
388
389fn duplicate_header_flags(headers: &[String]) -> Vec<bool> {
390 let mut counts = HashMap::new();
391 for header in headers {
392 let normalized = header.trim();
393 if !normalized.is_empty() {
394 *counts.entry(normalized.to_string()).or_insert(0usize) += 1;
395 }
396 }
397
398 headers
399 .iter()
400 .map(|header| {
401 let normalized = header.trim();
402 !normalized.is_empty() && counts.get(normalized).copied().unwrap_or(0) > 1
403 })
404 .collect()
405}
406
407fn stable_safe_names(headers: &[String]) -> Vec<String> {
408 let mut counts = HashMap::new();
409
410 headers
411 .iter()
412 .enumerate()
413 .map(|(offset, header)| {
414 let col = offset + 1;
415 let base = slugify_header(header, col);
416 let count = counts.entry(base.clone()).or_insert(0usize);
417 *count += 1;
418 if *count == 1 {
419 base
420 } else {
421 format!("{base}_{count}")
422 }
423 })
424 .collect()
425}
426
427fn slugify_header(header: &str, col: usize) -> String {
428 let mut slug = String::new();
429 let mut last_was_separator = false;
430
431 for ch in header.trim().chars() {
432 if ch.is_alphanumeric() {
433 for lower in ch.to_lowercase() {
434 slug.push(lower);
435 }
436 last_was_separator = false;
437 } else if !slug.is_empty() && !last_was_separator {
438 slug.push('_');
439 last_was_separator = true;
440 }
441 }
442
443 while slug.ends_with('_') {
444 slug.pop();
445 }
446
447 if slug.is_empty() {
448 format!("col_{}", index_to_col_name(col))
449 } else {
450 slug
451 }
452}
453
454struct ColumnStats {
455 inferred_type: &'static str,
456 non_null_count: usize,
457 formula_count: usize,
458 sample_values: Vec<Value>,
459}
460
461fn analyze_column(
462 sheet: &Sheet,
463 col: usize,
464 data_start_row: usize,
465 data_row_count: usize,
466) -> ColumnStats {
467 let mut inferred_type = None;
468 let mut is_mixed = false;
469 let mut non_null_count = 0usize;
470 let mut formula_count = 0usize;
471 let mut sample_values = Vec::new();
472
473 if data_row_count > 0 {
474 for row in data_start_row..data_start_row + data_row_count {
475 if let Some(cell) = cell_at(sheet, row, col) {
476 if cell.is_formula || cell.formula.is_some() {
477 formula_count += 1;
478 }
479
480 if is_non_null(cell) {
481 non_null_count += 1;
482
483 if sample_values.len() < 5 {
484 sample_values.push(crate::json_export::process_cell_value(cell));
485 }
486
487 if let Some(cell_type) = inferred_kind(cell) {
488 match inferred_type {
489 None => inferred_type = Some(cell_type),
490 Some(existing) if existing == cell_type => {}
491 Some(_) => is_mixed = true,
492 }
493 }
494 }
495 }
496 }
497 }
498
499 ColumnStats {
500 inferred_type: if is_mixed {
501 "mixed"
502 } else {
503 inferred_type.unwrap_or("string")
504 },
505 non_null_count,
506 formula_count,
507 sample_values,
508 }
509}
510
511fn is_non_null(cell: &Cell) -> bool {
512 !cell.value.is_empty()
513}
514
515fn inferred_kind(cell: &Cell) -> Option<&'static str> {
516 match cell.cell_type {
517 CellType::Text => Some("string"),
518 CellType::Number => Some("number"),
519 CellType::Date => Some("date"),
520 CellType::Boolean => Some("boolean"),
521 CellType::Empty => None,
522 }
523}
524
525fn ratio(numerator: usize, denominator: usize) -> f64 {
526 if denominator == 0 {
527 0.0
528 } else {
529 numerator as f64 / denominator as f64
530 }
531}
532
533#[derive(Clone)]
534struct TableCandidate {
535 start_row: usize,
536 start_col: usize,
537 end_row: usize,
538 end_col: usize,
539 confidence: f64,
540}
541
542fn detect_table_candidates(sheet: &Sheet) -> Vec<Value> {
543 if sheet.max_rows == 0 || sheet.max_cols == 0 {
544 return Vec::new();
545 }
546
547 let mut candidates = Vec::new();
548
549 for row in 1..=sheet.max_rows {
550 let spans = non_empty_spans(sheet, row);
551 for (start_col, end_col) in spans {
552 let column_count = end_col - start_col + 1;
553 if column_count < 2 {
554 continue;
555 }
556
557 if !is_header_like(sheet, row, start_col, end_col) {
558 continue;
559 }
560
561 if row > 1 && previous_row_blocks_header_candidate(sheet, row, start_col, end_col) {
562 continue;
563 }
564
565 let Some(end_row) = extend_candidate(sheet, row, start_col, end_col) else {
566 continue;
567 };
568
569 let confidence = score_candidate(sheet, row, start_col, end_row, end_col);
570 if confidence < 0.5 {
571 continue;
572 }
573
574 candidates.push(TableCandidate {
575 start_row: row,
576 start_col,
577 end_row,
578 end_col,
579 confidence,
580 });
581 }
582 }
583
584 candidates.sort_by_key(|candidate| (candidate.start_row, candidate.start_col));
585 let candidates = remove_duplicate_candidates(candidates);
586
587 candidates
588 .into_iter()
589 .map(|candidate| {
590 let range = format_range(
591 candidate.start_row,
592 candidate.start_col,
593 candidate.end_row,
594 candidate.end_col,
595 );
596 json!({
597 "range": range,
598 "header_row": candidate.start_row,
599 "column_count": candidate.end_col - candidate.start_col + 1,
600 "row_count": candidate.end_row - candidate.start_row + 1,
601 "confidence": candidate.confidence,
602 })
603 })
604 .collect()
605}
606
607fn non_empty_spans(sheet: &Sheet, row: usize) -> Vec<(usize, usize)> {
608 let mut spans = Vec::new();
609 let mut current_start = None;
610
611 for col in 1..=sheet.max_cols {
612 if cell_has_value(sheet, row, col) {
613 current_start.get_or_insert(col);
614 } else if let Some(start) = current_start.take() {
615 spans.push((start, col - 1));
616 }
617 }
618
619 if let Some(start) = current_start {
620 spans.push((start, sheet.max_cols));
621 }
622
623 spans
624}
625
626fn is_header_like(sheet: &Sheet, row: usize, start_col: usize, end_col: usize) -> bool {
627 let width = end_col - start_col + 1;
628 let non_empty = non_empty_count(sheet, row, start_col, end_col);
629 if non_empty < 2 {
630 return false;
631 }
632
633 let fill_ratio = non_empty as f64 / width as f64;
634 let text_ratio = text_count(sheet, row, start_col, end_col) as f64 / non_empty as f64;
635 fill_ratio >= 0.8 && text_ratio >= 0.5
636}
637
638fn extend_candidate(
639 sheet: &Sheet,
640 header_row: usize,
641 start_col: usize,
642 end_col: usize,
643) -> Option<usize> {
644 let mut end_row = header_row;
645 let mut has_data_row = false;
646
647 for row in (header_row + 1)..=sheet.max_rows {
648 if row_has_any_value(sheet, row, start_col, end_col) {
649 end_row = row;
650 has_data_row = true;
651 } else {
652 break;
653 }
654 }
655
656 has_data_row.then_some(end_row)
657}
658
659fn score_candidate(
660 sheet: &Sheet,
661 header_row: usize,
662 start_col: usize,
663 end_row: usize,
664 end_col: usize,
665) -> f64 {
666 let width = end_col - start_col + 1;
667 let header_non_empty = non_empty_count(sheet, header_row, start_col, end_col);
668 let header_fill = header_non_empty as f64 / width as f64;
669 let header_text =
670 text_count(sheet, header_row, start_col, end_col) as f64 / header_non_empty.max(1) as f64;
671
672 let data_rows = end_row.saturating_sub(header_row);
673 let data_fill = if data_rows == 0 {
674 0.0
675 } else {
676 let filled_cells: usize = ((header_row + 1)..=end_row)
677 .map(|row| non_empty_count(sheet, row, start_col, end_col))
678 .sum();
679 filled_cells as f64 / (data_rows * width) as f64
680 };
681
682 let height_score = (data_rows as f64 / 3.0).min(1.0);
683 let before_boundary =
684 header_row == 1 || !row_has_any_value(sheet, header_row - 1, start_col, end_col);
685 let after_boundary =
686 end_row == sheet.max_rows || !row_has_any_value(sheet, end_row + 1, start_col, end_col);
687 let boundary_score = match (before_boundary, after_boundary) {
688 (true, true) => 1.0,
689 (true, false) | (false, true) => 0.5,
690 (false, false) => 0.0,
691 };
692
693 let confidence = header_fill * 0.25
694 + header_text * 0.25
695 + data_fill * 0.25
696 + height_score * 0.15
697 + boundary_score * 0.10;
698
699 (confidence.clamp(0.0, 1.0) * 100.0).round() / 100.0
700}
701
702fn remove_duplicate_candidates(candidates: Vec<TableCandidate>) -> Vec<TableCandidate> {
703 let mut kept: Vec<TableCandidate> = Vec::new();
704
705 'candidate: for candidate in candidates {
706 for existing in &kept {
707 if same_table_region(existing, &candidate) {
708 continue 'candidate;
709 }
710 }
711 kept.push(candidate);
712 }
713
714 kept
715}
716
717fn same_table_region(left: &TableCandidate, right: &TableCandidate) -> bool {
718 left.start_row == right.start_row
719 && left.start_col == right.start_col
720 && left.end_row == right.end_row
721 && left.end_col == right.end_col
722}
723
724fn previous_row_blocks_header_candidate(
725 sheet: &Sheet,
726 row: usize,
727 start_col: usize,
728 end_col: usize,
729) -> bool {
730 let width = end_col - start_col + 1;
731 let previous_non_empty = non_empty_count(sheet, row - 1, start_col, end_col);
732 previous_non_empty >= 2 && previous_non_empty as f64 / width as f64 >= 0.5
733}
734
735fn row_has_any_value(sheet: &Sheet, row: usize, start_col: usize, end_col: usize) -> bool {
736 (start_col..=end_col).any(|col| cell_has_value(sheet, row, col))
737}
738
739fn non_empty_count(sheet: &Sheet, row: usize, start_col: usize, end_col: usize) -> usize {
740 (start_col..=end_col)
741 .filter(|&col| cell_has_value(sheet, row, col))
742 .count()
743}
744
745fn text_count(sheet: &Sheet, row: usize, start_col: usize, end_col: usize) -> usize {
746 (start_col..=end_col)
747 .filter(|&col| {
748 cell_has_value(sheet, row, col)
749 && matches!(table_cell_type(sheet, row, col), Some(CellType::Text))
750 })
751 .count()
752}
753
754fn cell_has_value(sheet: &Sheet, row: usize, col: usize) -> bool {
755 sheet
756 .data
757 .get(row)
758 .and_then(|row_data| row_data.get(col))
759 .map(|cell| !cell.value.trim().is_empty() || cell.formula.is_some())
760 .unwrap_or(false)
761}
762
763fn table_cell_type(sheet: &Sheet, row: usize, col: usize) -> Option<CellType> {
764 sheet
765 .data
766 .get(row)
767 .and_then(|row_data| row_data.get(col))
768 .map(|cell| cell.cell_type.clone())
769}