1use anyhow::{Context, Result};
2use calamine::{open_workbook, Ods, Reader, Xlsx};
3use std::collections::HashMap;
4use std::sync::{Arc, RwLock};
5
6use crate::csv_handler::CellRange;
7use crate::traits::DataReader;
8
9#[derive(Debug, Clone)]
11struct ExcelMetadata {
12 sheet_names: Vec<String>,
13 modified_time: Option<std::time::SystemTime>,
14}
15
16struct ExcelMetadataCache {
18 cache: Arc<RwLock<HashMap<String, ExcelMetadata>>>,
19}
20
21impl ExcelMetadataCache {
22 fn new() -> Self {
23 Self {
24 cache: Arc::new(RwLock::new(HashMap::new())),
25 }
26 }
27
28 fn get(&self, path: &str) -> Option<ExcelMetadata> {
29 let cache = self.cache.read().ok()?;
30 if let Some(metadata) = cache.get(path) {
31 if let Ok(current_modified) = std::fs::metadata(path).and_then(|m| m.modified()) {
33 if let Some(cached_modified) = metadata.modified_time {
34 if current_modified == cached_modified {
35 return Some(metadata.clone());
36 }
37 }
38 }
39 }
40 None
41 }
42
43 fn insert(&self, path: String, metadata: ExcelMetadata) {
44 if let Ok(mut cache) = self.cache.write() {
45 if cache.len() > 100 {
47 cache.clear();
48 }
49 cache.insert(path, metadata);
50 }
51 }
52
53 fn invalidate(&self, path: &str) {
54 if let Ok(mut cache) = self.cache.write() {
55 cache.remove(path);
56 }
57 }
58}
59
60pub struct ExcelHandler {
62 metadata_cache: ExcelMetadataCache,
63}
64
65impl ExcelHandler {
66 pub fn new() -> Self {
67 Self {
68 metadata_cache: ExcelMetadataCache::new(),
69 }
70 }
71
72 fn resolve_sheet_selection(requested: Option<&str>, available: &[String]) -> Result<String> {
75 match requested {
76 Some(name) => {
77 if available.iter().any(|s| s == name) {
78 Ok(name.to_string())
79 } else {
80 let list = if available.is_empty() {
81 "(none)".to_string()
82 } else {
83 available.join(", ")
84 };
85 anyhow::bail!(
86 "Sheet '{name}' not found in workbook. Available sheets: {list}"
87 );
88 }
89 }
90 None => available
91 .first()
92 .cloned()
93 .ok_or_else(|| anyhow::anyhow!("No sheets found in workbook")),
94 }
95 }
96
97 fn get_metadata(&self, path: &str) -> Result<ExcelMetadata> {
99 if let Some(metadata) = self.metadata_cache.get(path) {
101 return Ok(metadata);
102 }
103
104 let workbook: Xlsx<_> =
106 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
107
108 let modified_time = std::fs::metadata(path)
109 .and_then(|m| m.modified())
110 .ok();
111
112 let metadata = ExcelMetadata {
113 sheet_names: workbook.sheet_names().to_vec(),
114 modified_time,
115 };
116
117 self.metadata_cache.insert(path.to_string(), metadata.clone());
119
120 Ok(metadata)
121 }
122
123 pub fn read(&self, path: &str) -> Result<String> {
124 self.read_with_sheet(path, None)
125 }
126
127 pub fn read_with_sheet(&self, path: &str, sheet_name: Option<&str>) -> Result<String> {
128 let mut workbook: Xlsx<_> =
129 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
130
131 let metadata = self.get_metadata(path)?;
132 let sheet_name = Self::resolve_sheet_selection(sheet_name, &metadata.sheet_names)?;
133
134 let range = workbook
135 .worksheet_range(&sheet_name)
136 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
137
138 let mut output = String::with_capacity(range.height() * range.width() * 10);
140 for row in range.rows() {
141 let row_str: Vec<String> = row.iter().map(|cell| cell.to_string()).collect();
142 output.push_str(&row_str.join(","));
143 output.push('\n');
144 }
145
146 Ok(output)
147 }
148
149 pub fn parse_cell_reference(&self, cell: &str) -> Result<(u32, u16)> {
150 let mut col_str = String::new();
151 let mut row_str = String::new();
152
153 for ch in cell.chars() {
154 if ch.is_alphabetic() {
155 col_str.push(ch);
156 } else if ch.is_ascii_digit() {
157 row_str.push(ch);
158 }
159 }
160
161 let col = self.column_to_index(&col_str)?;
162 let row = row_str
163 .parse::<u32>()
164 .with_context(|| format!("Invalid row number in cell reference: {cell}"))?;
165
166 Ok((row - 1, col))
167 }
168
169 fn column_to_index(&self, col: &str) -> Result<u16> {
170 let mut index = 0u16;
171 for ch in col.chars() {
172 index = index * 26 + (ch.to_ascii_uppercase() as u16 - b'A' as u16 + 1);
173 }
174 Ok(index - 1)
175 }
176
177 pub fn read_range(
179 &self,
180 path: &str,
181 range: &CellRange,
182 sheet_name: Option<&str>,
183 ) -> Result<Vec<Vec<String>>> {
184 let mut workbook: Xlsx<_> =
185 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
186
187 let metadata = self.get_metadata(path)?;
188 let sheet_name = Self::resolve_sheet_selection(sheet_name, &metadata.sheet_names)?;
189
190 let ws_range = workbook
191 .worksheet_range(&sheet_name)
192 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
193
194 let estimated_rows = range.end_row.saturating_sub(range.start_row) + 1;
195 let estimated_cols = range.end_col.saturating_sub(range.start_col) + 1;
196 let mut result = Vec::with_capacity(estimated_rows.min(1024));
197
198 for (row_idx, row) in ws_range.rows().enumerate() {
199 if row_idx < range.start_row {
200 continue;
201 }
202 if row_idx > range.end_row {
203 break;
204 }
205
206 let mut row_data = Vec::with_capacity(estimated_cols);
207 for (col_idx, cell) in row.iter().enumerate() {
208 if col_idx >= range.start_col && col_idx <= range.end_col {
209 row_data.push(cell.to_string());
210 }
211 }
212 result.push(row_data);
213 }
214
215 Ok(result)
216 }
217
218 pub fn read_as_json(&self, path: &str, sheet_name: Option<&str>) -> Result<String> {
220 let mut workbook: Xlsx<_> =
221 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
222
223 let metadata = self.get_metadata(path)?;
224 let sheet_name = Self::resolve_sheet_selection(sheet_name, &metadata.sheet_names)?;
225
226 let range = workbook
227 .worksheet_range(&sheet_name)
228 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
229
230 let mut rows: Vec<Vec<String>> = Vec::with_capacity(range.height());
231 for row in range.rows() {
232 let mut row_data = Vec::with_capacity(range.width());
233 for cell in row.iter() {
234 row_data.push(cell.to_string());
235 }
236 rows.push(row_data);
237 }
238
239 serde_json::to_string_pretty(&rows).with_context(|| "Failed to serialize to JSON")
240 }
241
242 pub fn list_sheets(&self, path: &str) -> Result<Vec<String>> {
244 let metadata = self.get_metadata(path)?;
245 Ok(metadata.sheet_names)
246 }
247
248 pub fn read_all_sheets(
250 &self,
251 path: &str,
252 ) -> Result<std::collections::HashMap<String, Vec<Vec<String>>>> {
253 let mut workbook: Xlsx<_> =
254 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
255
256 let sheet_names = workbook.sheet_names().to_vec();
257 let mut result = std::collections::HashMap::new();
258
259 for sheet_name in sheet_names {
260 let range = workbook
261 .worksheet_range(&sheet_name)
262 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
263
264 let mut rows: Vec<Vec<String>> = Vec::new();
265 for row in range.rows() {
266 rows.push(row.iter().map(|cell| cell.to_string()).collect());
267 }
268
269 result.insert(sheet_name, rows);
270 }
271
272 Ok(result)
273 }
274
275 pub fn read_ods(&self, path: &str, sheet_name: Option<&str>) -> Result<String> {
277 let mut workbook: Ods<_> =
278 open_workbook(path).with_context(|| format!("Failed to open ODS file: {path}"))?;
279
280 let sheet_names: Vec<String> = workbook.sheet_names().to_vec();
281 let sheet_name = Self::resolve_sheet_selection(sheet_name, &sheet_names)?;
282
283 let range = workbook
284 .worksheet_range(&sheet_name)
285 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
286
287 let mut output = String::new();
288 for row in range.rows() {
289 let row_str: Vec<String> = row.iter().map(|cell| cell.to_string()).collect();
290 output.push_str(&row_str.join(","));
291 output.push('\n');
292 }
293
294 Ok(output)
295 }
296
297 pub fn read_ods_data(&self, path: &str, sheet_name: Option<&str>) -> Result<Vec<Vec<String>>> {
299 let mut workbook: Ods<_> =
300 open_workbook(path).with_context(|| format!("Failed to open ODS file: {path}"))?;
301
302 let sheet_names: Vec<String> = workbook.sheet_names().to_vec();
303 let sheet_name = Self::resolve_sheet_selection(sheet_name, &sheet_names)?;
304
305 let range = workbook
306 .worksheet_range(&sheet_name)
307 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
308
309 let mut rows: Vec<Vec<String>> = Vec::new();
310 for row in range.rows() {
311 rows.push(row.iter().map(|cell| cell.to_string()).collect());
312 }
313
314 Ok(rows)
315 }
316
317 pub fn list_ods_sheets(&self, path: &str) -> Result<Vec<String>> {
319 let workbook: Ods<_> =
320 open_workbook(path).with_context(|| format!("Failed to open ODS file: {path}"))?;
321 Ok(workbook.sheet_names().to_vec())
322 }
323
324 pub fn read_auto(&self, path: &str, sheet_or_range: Option<&str>) -> Result<Vec<Vec<String>>> {
326 let path_lower = path.to_lowercase();
327
328 if path_lower.ends_with(".ods") {
329 return self.read_ods_data(path, sheet_or_range);
330 }
331
332 if path_lower.ends_with(".xlsx") || path_lower.ends_with(".xls") {
333 if let Some(range_str) = sheet_or_range {
334 let cell_range = CellRange::parse(range_str)?;
335 return self.read_range(path, &cell_range, None);
336 } else {
337 let csv_str = self.read_with_sheet(path, None)?;
338 let data = csv_str
339 .lines()
340 .filter(|l| !l.is_empty())
341 .map(|l| l.split(',').map(|s| s.to_string()).collect())
342 .collect();
343 return Ok(data);
344 }
345 }
346
347 anyhow::bail!("Unsupported file format: {path}")
348 }
349}
350
351impl DataReader for ExcelHandler {
352 fn read(&self, path: &str) -> Result<Vec<Vec<String>>> {
353 let csv_str = self.read_with_sheet(path, None)?;
354 let result: Vec<Vec<String>> = csv_str
355 .lines()
356 .filter(|l| !l.is_empty())
357 .map(|l| l.split(',').map(|s| s.to_string()).collect())
358 .collect();
359 Ok(result)
360 }
361
362 fn read_with_headers(&self, path: &str) -> Result<Vec<Vec<String>>> {
363 let csv_str = self.read_with_sheet(path, None)?;
365 let result: Vec<Vec<String>> = csv_str
366 .lines()
367 .filter(|l| !l.is_empty())
368 .map(|l| l.split(',').map(|s| s.to_string()).collect())
369 .collect();
370 Ok(result)
371 }
372
373 fn read_range(&self, path: &str, range: &CellRange) -> Result<Vec<Vec<String>>> {
374 let mut workbook: Xlsx<_> =
376 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
377
378 let metadata = self.get_metadata(path)?;
379 let sheet_name = metadata
380 .sheet_names
381 .first()
382 .map(|s| s.as_str())
383 .ok_or_else(|| anyhow::anyhow!("No sheets found in workbook"))?;
384
385 let ws_range = workbook
386 .worksheet_range(sheet_name)
387 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
388
389 let estimated_rows = range.end_row.saturating_sub(range.start_row) + 1;
390 let estimated_cols = range.end_col.saturating_sub(range.start_col) + 1;
391 let mut result = Vec::with_capacity(estimated_rows.min(1024));
392
393 for (row_idx, row) in ws_range.rows().enumerate() {
394 if row_idx < range.start_row {
395 continue;
396 }
397 if row_idx > range.end_row {
398 break;
399 }
400
401 let mut row_data = Vec::with_capacity(estimated_cols);
402 for (col_idx, cell) in row.iter().enumerate() {
403 if col_idx >= range.start_col && col_idx <= range.end_col {
404 row_data.push(cell.to_string());
405 }
406 }
407 result.push(row_data);
408 }
409
410 Ok(result)
411 }
412
413 fn read_as_json(&self, path: &str) -> Result<String> {
414 let mut workbook: Xlsx<_> =
415 open_workbook(path).with_context(|| format!("Failed to open Excel file: {path}"))?;
416
417 let metadata = self.get_metadata(path)?;
418 let sheet_name = metadata
419 .sheet_names
420 .first()
421 .map(|s| s.as_str())
422 .ok_or_else(|| anyhow::anyhow!("No sheets found in workbook"))?;
423
424 let range = workbook
425 .worksheet_range(sheet_name)
426 .with_context(|| format!("Failed to read sheet: {sheet_name}"))?;
427
428 let mut rows: Vec<Vec<String>> = Vec::with_capacity(range.height());
429 for row in range.rows() {
430 let mut row_data = Vec::with_capacity(range.width());
431 for cell in row.iter() {
432 row_data.push(cell.to_string());
433 }
434 rows.push(row_data);
435 }
436
437 serde_json::to_string_pretty(&rows).with_context(|| "Failed to serialize to JSON")
438 }
439
440 fn supports_format(&self, path: &str) -> bool {
441 let path_lower = path.to_lowercase();
442 path_lower.ends_with(".xlsx") || path_lower.ends_with(".xls") || path_lower.ends_with(".ods")
443 }
444}