memvid_core/reader/
xls.rs1use std::io::Cursor;
2
3use calamine::{DataType, Reader as CalamineReader, Xls};
4
5use crate::{
6 DocumentFormat, DocumentReader, PassthroughReader, ReaderDiagnostics, ReaderHint, ReaderOutput,
7 Result,
8};
9
10pub struct XlsReader;
12
13impl XlsReader {
14 fn extract_text(bytes: &[u8]) -> Result<String> {
15 let cursor = Cursor::new(bytes);
16 let mut workbook =
17 Xls::new(cursor).map_err(|err| crate::MemvidError::ExtractionFailed {
18 reason: format!("failed to read xls workbook: {err}").into(),
19 })?;
20
21 let mut out = String::new();
22 for sheet_name in workbook.sheet_names().to_owned() {
23 if let Some(Ok(range)) = workbook.worksheet_range(&sheet_name) {
24 if !out.is_empty() {
25 out.push_str("\n");
26 }
27 out.push_str(&format!("Sheet: {}\n", sheet_name));
28 for row in range.rows() {
29 let mut first_cell = true;
30 for cell in row {
31 if !first_cell {
32 out.push('\t');
33 }
34 first_cell = false;
35 match cell {
36 DataType::String(s) => out.push_str(s.trim()),
37 DataType::Float(v) => out.push_str(&format!("{}", v)),
38 DataType::Int(v) => out.push_str(&format!("{}", v)),
39 DataType::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
40 DataType::Error(e) => out.push_str(&format!("#{:?}", e)),
41 DataType::Empty => {}
42 DataType::DateTime(v) => out.push_str(&format!("{}", v)),
43 DataType::DateTimeIso(s) => out.push_str(s),
44 DataType::Duration(v) => out.push_str(&format!("{}", v)),
45 DataType::DurationIso(s) => out.push_str(s),
46 }
47 }
48 out.push('\n');
49 }
50 }
51 }
52
53 Ok(out.trim().to_string())
54 }
55}
56
57impl DocumentReader for XlsReader {
58 fn name(&self) -> &'static str {
59 "xls"
60 }
61
62 fn supports(&self, hint: &ReaderHint<'_>) -> bool {
63 matches!(hint.format, Some(DocumentFormat::Xls))
64 || hint
65 .mime
66 .map(|mime| mime.eq_ignore_ascii_case("application/vnd.ms-excel"))
67 .unwrap_or(false)
68 }
69
70 fn extract(&self, bytes: &[u8], hint: &ReaderHint<'_>) -> Result<ReaderOutput> {
71 match Self::extract_text(bytes) {
72 Ok(text) => {
73 if text.trim().is_empty() {
74 let mut fallback = PassthroughReader.extract(bytes, hint)?;
76 fallback.reader_name = self.name().to_string();
77 fallback.diagnostics.mark_fallback();
78 fallback.diagnostics.record_warning(
79 "xls reader produced empty text; falling back to default extractor",
80 );
81 Ok(fallback)
82 } else {
83 let mut document = crate::ExtractedDocument::empty();
85 document.text = Some(text);
86 document.mime_type = Some("application/vnd.ms-excel".to_string());
87 Ok(ReaderOutput::new(document, self.name())
88 .with_diagnostics(ReaderDiagnostics::default()))
89 }
90 }
91 Err(err) => {
92 let mut fallback = PassthroughReader.extract(bytes, hint)?;
94 fallback.reader_name = self.name().to_string();
95 fallback.diagnostics.mark_fallback();
96 fallback
97 .diagnostics
98 .record_warning(format!("xls reader error: {err}"));
99 Ok(fallback)
100 }
101 }
102 }
103}