excel_cli/excel/
workbook.rs

1use anyhow::{Context, Result};
2use calamine::{open_workbook_auto, Data, Reader, Xls, Xlsx};
3use chrono::Local;
4use rust_xlsxwriter::{Format, Workbook as XlsxWorkbook};
5use std::collections::HashSet;
6use std::fs::File;
7use std::io::BufReader;
8use std::path::Path;
9
10use crate::excel::{Cell, CellType, DataTypeInfo, Sheet};
11
12pub enum CalamineWorkbook {
13    Xlsx(Box<Xlsx<BufReader<File>>>),
14    Xls(Xls<BufReader<File>>),
15    None,
16}
17
18impl Clone for CalamineWorkbook {
19    fn clone(&self) -> Self {
20        CalamineWorkbook::None
21    }
22}
23
24pub struct Workbook {
25    sheets: Vec<Sheet>,
26    current_sheet_index: usize,
27    file_path: String,
28    is_modified: bool,
29    calamine_workbook: CalamineWorkbook,
30    lazy_loading: bool,
31    loaded_sheets: HashSet<usize>, // Track which sheets have been loaded
32}
33
34impl Clone for Workbook {
35    fn clone(&self) -> Self {
36        Workbook {
37            sheets: self.sheets.clone(),
38            current_sheet_index: self.current_sheet_index,
39            file_path: self.file_path.clone(),
40            is_modified: self.is_modified,
41            calamine_workbook: CalamineWorkbook::None,
42            lazy_loading: false,
43            loaded_sheets: self.loaded_sheets.clone(),
44        }
45    }
46}
47
48pub fn open_workbook<P: AsRef<Path>>(path: P, enable_lazy_loading: bool) -> Result<Workbook> {
49    let path_str = path.as_ref().to_string_lossy().to_string();
50    let path_ref = path.as_ref();
51
52    // Determine if the file format supports lazy loading
53    let extension = path_ref
54        .extension()
55        .and_then(|ext| ext.to_str())
56        .map(|ext| ext.to_lowercase());
57
58    // Only enable lazy loading if both the flag is set AND the format supports it
59    let supports_lazy_loading =
60        enable_lazy_loading && matches!(extension.as_deref(), Some("xlsx" | "xlsm"));
61
62    // Open workbook directly from path
63    let mut workbook = open_workbook_auto(&path)
64        .with_context(|| format!("Unable to parse Excel file: {}", path_str))?;
65
66    let sheet_names = workbook.sheet_names().to_vec();
67
68    // Pre-allocate with the right capacity
69    let mut sheets = Vec::with_capacity(sheet_names.len());
70
71    // Store the original calamine workbook for lazy loading if enabled
72    let mut calamine_workbook = CalamineWorkbook::None;
73
74    if supports_lazy_loading {
75        // For formats that support lazy loading, keep the original workbook
76        // and only load sheet metadata
77        for name in &sheet_names {
78            // Create a minimal sheet with just the name
79            let sheet = Sheet {
80                name: name.to_string(),
81                data: vec![vec![Cell::empty(); 1]; 1],
82                max_rows: 0,
83                max_cols: 0,
84                is_loaded: false,
85            };
86
87            sheets.push(sheet);
88        }
89
90        // Try to reopen the file to get a fresh reader for lazy loading
91        if let Ok(file) = File::open(&path) {
92            let reader = BufReader::new(file);
93
94            // Try to open as XLSX first
95            if let Ok(xlsx_workbook) = Xlsx::new(reader) {
96                calamine_workbook = CalamineWorkbook::Xlsx(Box::new(xlsx_workbook));
97            } else {
98                // If not XLSX, try to open as XLS
99                if let Ok(file) = File::open(&path) {
100                    let reader = BufReader::new(file);
101                    if let Ok(xls_workbook) = Xls::new(reader) {
102                        calamine_workbook = CalamineWorkbook::Xls(xls_workbook);
103                    }
104                }
105            }
106        }
107    } else {
108        // For formats that don't support lazy loading or if lazy loading is disabled,
109        for name in &sheet_names {
110            let range = workbook
111                .worksheet_range(name)
112                .with_context(|| format!("Unable to read worksheet: {}", name))?;
113
114            let mut sheet = create_sheet_from_range(name, range);
115            sheet.is_loaded = true;
116            sheets.push(sheet);
117        }
118    }
119
120    if sheets.is_empty() {
121        anyhow::bail!("No worksheets found in file");
122    }
123
124    let mut loaded_sheets = HashSet::new();
125
126    if !supports_lazy_loading {
127        for i in 0..sheets.len() {
128            loaded_sheets.insert(i);
129        }
130    }
131
132    Ok(Workbook {
133        sheets,
134        current_sheet_index: 0,
135        file_path: path_str,
136        is_modified: false,
137        calamine_workbook,
138        lazy_loading: supports_lazy_loading,
139        loaded_sheets,
140    })
141}
142
143fn create_sheet_from_range(name: &str, range: calamine::Range<Data>) -> Sheet {
144    let (height, width) = range.get_size();
145
146    // Create a data grid with empty cells, adding 1 to dimensions for 1-based indexing
147    let mut data = vec![vec![Cell::empty(); width + 1]; height + 1];
148
149    // Process only non-empty cells
150    for (row_idx, col_idx, cell) in range.used_cells() {
151        // Extract value, cell_type, and original_type from the Data
152        let (value, cell_type, original_type) = match cell {
153            Data::Empty => (String::new(), CellType::Empty, Some(DataTypeInfo::Empty)),
154
155            Data::String(s) => {
156                let value = s.clone();
157                (value, CellType::Text, Some(DataTypeInfo::String))
158            }
159
160            Data::Float(f) => {
161                let value = if *f == (*f as i64) as f64 && f.abs() < 1e10 {
162                    (*f as i64).to_string()
163                } else {
164                    f.to_string()
165                };
166                (value, CellType::Number, Some(DataTypeInfo::Float(*f)))
167            }
168
169            Data::Int(i) => (i.to_string(), CellType::Number, Some(DataTypeInfo::Int(*i))),
170
171            Data::Bool(b) => (
172                if *b {
173                    "TRUE".to_string()
174                } else {
175                    "FALSE".to_string()
176                },
177                CellType::Boolean,
178                Some(DataTypeInfo::Bool(*b)),
179            ),
180
181            Data::Error(e) => {
182                let mut value = String::with_capacity(15);
183                value.push_str("Error: ");
184                value.push_str(&format!("{:?}", e));
185                (value, CellType::Text, Some(DataTypeInfo::Error))
186            }
187
188            Data::DateTime(dt) => (
189                dt.to_string(),
190                CellType::Date,
191                Some(DataTypeInfo::DateTime(dt.as_f64())),
192            ),
193
194            Data::DateTimeIso(s) => {
195                let value = s.clone();
196                (
197                    value.clone(),
198                    CellType::Date,
199                    Some(DataTypeInfo::DateTimeIso(value)),
200                )
201            }
202
203            Data::DurationIso(s) => {
204                let value = s.clone();
205                (
206                    value.clone(),
207                    CellType::Text,
208                    Some(DataTypeInfo::DurationIso(value)),
209                )
210            }
211        };
212
213        let is_formula = !value.is_empty() && value.starts_with('=');
214
215        // Store the cell in data grid (using 1-based indexing)
216        data[row_idx + 1][col_idx + 1] =
217            Cell::new_with_type(value, is_formula, cell_type, original_type);
218    }
219
220    Sheet {
221        name: name.to_string(),
222        data,
223        max_rows: height,
224        max_cols: width,
225        is_loaded: true,
226    }
227}
228
229impl Workbook {
230    pub fn get_current_sheet(&self) -> &Sheet {
231        &self.sheets[self.current_sheet_index]
232    }
233
234    pub fn get_current_sheet_mut(&mut self) -> &mut Sheet {
235        &mut self.sheets[self.current_sheet_index]
236    }
237
238    pub fn ensure_sheet_loaded(&mut self, sheet_index: usize, sheet_name: &str) -> Result<()> {
239        if !self.lazy_loading || self.sheets[sheet_index].is_loaded {
240            return Ok(());
241        }
242
243        // Load the sheet data from the calamine workbook
244        match &mut self.calamine_workbook {
245            CalamineWorkbook::Xlsx(xlsx) => {
246                if let Ok(range) = xlsx.worksheet_range(sheet_name) {
247                    // Replace the placeholder sheet with a fully loaded one
248                    let mut sheet = create_sheet_from_range(sheet_name, range);
249
250                    // Preserve the original name in case it was customized
251                    let original_name = self.sheets[sheet_index].name.clone();
252                    sheet.name = original_name;
253
254                    self.sheets[sheet_index] = sheet;
255
256                    // Mark the sheet as loaded
257                    self.loaded_sheets.insert(sheet_index);
258                }
259            }
260            CalamineWorkbook::Xls(xls) => {
261                if let Ok(range) = xls.worksheet_range(sheet_name) {
262                    // Replace the placeholder sheet with a fully loaded one
263                    let mut sheet = create_sheet_from_range(sheet_name, range);
264
265                    // Preserve the original name in case it was customized
266                    let original_name = self.sheets[sheet_index].name.clone();
267                    sheet.name = original_name;
268
269                    self.sheets[sheet_index] = sheet;
270
271                    // Mark the sheet as loaded
272                    self.loaded_sheets.insert(sheet_index);
273                }
274            }
275            CalamineWorkbook::None => {
276                return Err(anyhow::anyhow!("Cannot load sheet: no workbook available"));
277            }
278        }
279
280        Ok(())
281    }
282
283    pub fn get_sheet_by_index(&self, index: usize) -> Option<&Sheet> {
284        self.sheets.get(index)
285    }
286
287    pub fn ensure_cell_exists(&mut self, row: usize, col: usize) {
288        let sheet = &mut self.sheets[self.current_sheet_index];
289
290        // Expand rows if needed
291        if row >= sheet.data.len() {
292            let default_row_len = if sheet.data.is_empty() {
293                col + 1
294            } else {
295                sheet.data[0].len()
296            };
297            let rows_to_add = row + 1 - sheet.data.len();
298
299            sheet
300                .data
301                .extend(vec![vec![Cell::empty(); default_row_len]; rows_to_add]);
302            sheet.max_rows = sheet.max_rows.max(row);
303        }
304
305        // Expand columns if needed
306        if col >= sheet.data[0].len() {
307            for row_data in &mut sheet.data {
308                row_data.resize_with(col + 1, Cell::empty);
309            }
310
311            sheet.max_cols = sheet.max_cols.max(col);
312        }
313    }
314
315    pub fn set_cell_value(&mut self, row: usize, col: usize, value: String) -> Result<()> {
316        self.ensure_cell_exists(row, col);
317
318        let sheet = &mut self.sheets[self.current_sheet_index];
319        let current_value = &sheet.data[row][col].value;
320
321        // Only set modified flag if value actually changes
322        if current_value != &value {
323            let is_formula = value.starts_with('=');
324            sheet.data[row][col] = Cell::new(value, is_formula);
325
326            // Update max_cols if needed
327            if col > sheet.max_cols && !sheet.data[row][col].value.is_empty() {
328                sheet.max_cols = col;
329            }
330
331            self.is_modified = true;
332        }
333
334        Ok(())
335    }
336
337    pub fn get_sheet_names(&self) -> Vec<String> {
338        let mut names = Vec::with_capacity(self.sheets.len());
339        for sheet in &self.sheets {
340            names.push(sheet.name.clone());
341        }
342        names
343    }
344
345    pub fn get_current_sheet_name(&self) -> String {
346        self.sheets[self.current_sheet_index].name.clone()
347    }
348
349    pub fn get_current_sheet_index(&self) -> usize {
350        self.current_sheet_index
351    }
352
353    pub fn switch_sheet(&mut self, index: usize) -> Result<()> {
354        if index >= self.sheets.len() {
355            anyhow::bail!("Sheet index out of range");
356        }
357
358        self.current_sheet_index = index;
359        Ok(())
360    }
361
362    pub fn delete_current_sheet(&mut self) -> Result<()> {
363        // Prevent deleting the last sheet
364        if self.sheets.len() <= 1 {
365            anyhow::bail!("Cannot delete the last sheet");
366        }
367
368        self.sheets.remove(self.current_sheet_index);
369        self.is_modified = true;
370
371        // Adjust current_sheet_index
372        if self.current_sheet_index >= self.sheets.len() {
373            self.current_sheet_index = self.sheets.len() - 1;
374        }
375
376        Ok(())
377    }
378
379    pub fn delete_row(&mut self, row: usize) -> Result<()> {
380        let sheet = &mut self.sheets[self.current_sheet_index];
381
382        // If row is less than 1, return early with success
383        if row < 1 {
384            return Ok(());
385        }
386
387        // If row is outside the max range, return early with success
388        if row > sheet.max_rows {
389            return Ok(());
390        }
391
392        // Only remove the row if it exists in the data
393        if row < sheet.data.len() {
394            sheet.data.remove(row);
395            self.recalculate_max_cols();
396            self.is_modified = true;
397        }
398
399        Ok(())
400    }
401
402    // Delete a range of rows from the current sheet
403    pub fn delete_rows(&mut self, start_row: usize, end_row: usize) -> Result<()> {
404        let sheet = &mut self.sheets[self.current_sheet_index];
405
406        // If start_row is less than 1 or start_row > end_row, return early with success
407        if start_row < 1 || start_row > end_row {
408            return Ok(());
409        }
410
411        // If the entire range is outside max_rows, return early with success
412        if start_row > sheet.max_rows {
413            return Ok(());
414        }
415
416        // Adjust end_row to not exceed the data length
417        let adjusted_end_row = end_row.min(sheet.data.len() - 1);
418
419        // If start_row is valid but end_row exceeds max_rows, adjust end_row to max_rows
420        let effective_end_row = if end_row > sheet.max_rows {
421            sheet.max_rows
422        } else {
423            adjusted_end_row
424        };
425
426        // Only proceed if there are rows to delete
427        if start_row <= effective_end_row && start_row < sheet.data.len() {
428            // Remove rows in reverse order to avoid index shifting issues
429            for row in (start_row..=effective_end_row).rev() {
430                if row < sheet.data.len() {
431                    sheet.data.remove(row);
432                }
433            }
434
435            self.recalculate_max_cols();
436            self.is_modified = true;
437        }
438
439        Ok(())
440    }
441
442    pub fn delete_column(&mut self, col: usize) -> Result<()> {
443        let sheet = &mut self.sheets[self.current_sheet_index];
444
445        // If column is less than 1, return early with success
446        if col < 1 {
447            return Ok(());
448        }
449
450        // If column is outside the max range, return early with success
451        if col > sheet.max_cols {
452            return Ok(());
453        }
454
455        let mut has_data = false;
456        for row in &sheet.data {
457            if col < row.len() && !row[col].value.is_empty() {
458                has_data = true;
459                break;
460            }
461        }
462
463        for row in sheet.data.iter_mut() {
464            if col < row.len() {
465                row.remove(col);
466            }
467        }
468
469        self.recalculate_max_cols();
470        self.recalculate_max_rows();
471
472        if has_data {
473            self.is_modified = true;
474        }
475
476        Ok(())
477    }
478
479    // Delete a range of columns from the current sheet
480    pub fn delete_columns(&mut self, start_col: usize, end_col: usize) -> Result<()> {
481        let sheet = &mut self.sheets[self.current_sheet_index];
482
483        // If start_col is less than 1 or start_col > end_col, return early with success
484        if start_col < 1 || start_col > end_col {
485            return Ok(());
486        }
487
488        // If the entire range is outside max_cols, return early with success
489        if start_col > sheet.max_cols {
490            return Ok(());
491        }
492
493        // If start_col is valid but end_col exceeds max_cols, adjust end_col to max_cols
494        let effective_end_col = end_col.min(sheet.max_cols);
495
496        let mut has_data = false;
497        for row in &sheet.data {
498            for col in start_col..=effective_end_col {
499                if col < row.len() && !row[col].value.is_empty() {
500                    has_data = true;
501                    break;
502                }
503            }
504            if has_data {
505                break;
506            }
507        }
508
509        for row in sheet.data.iter_mut() {
510            for col in (start_col..=effective_end_col).rev() {
511                if col < row.len() {
512                    row.remove(col);
513                }
514            }
515        }
516
517        self.recalculate_max_cols();
518        self.recalculate_max_rows();
519
520        if has_data {
521            self.is_modified = true;
522        }
523
524        Ok(())
525    }
526
527    pub fn is_modified(&self) -> bool {
528        self.is_modified
529    }
530
531    pub fn set_modified(&mut self, modified: bool) {
532        self.is_modified = modified;
533    }
534
535    pub fn get_file_path(&self) -> &str {
536        &self.file_path
537    }
538
539    pub fn is_lazy_loading(&self) -> bool {
540        self.lazy_loading
541    }
542
543    pub fn is_sheet_loaded(&self, sheet_index: usize) -> bool {
544        if !self.lazy_loading || sheet_index >= self.sheets.len() {
545            return true;
546        }
547
548        self.sheets[sheet_index].is_loaded
549    }
550
551    pub fn save(&mut self) -> Result<()> {
552        if !self.is_modified {
553            println!("No changes to save.");
554            return Ok(());
555        }
556
557        // Create a new workbook with rust_xlsxwriter
558        let mut workbook = XlsxWorkbook::new();
559
560        let now = Local::now();
561        let timestamp = now.format("%Y%m%d_%H%M%S").to_string();
562        let path = Path::new(&self.file_path);
563        let file_stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("sheet");
564        let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("xlsx");
565        let parent_dir = path.parent().unwrap_or_else(|| Path::new(""));
566        let new_filename = format!("{}_{}.{}", file_stem, timestamp, extension);
567        let new_filepath = parent_dir.join(new_filename);
568
569        // Create formats
570        let number_format = Format::new().set_num_format("General");
571        let date_format = Format::new().set_num_format("yyyy-mm-dd");
572
573        // Process each sheet
574        for sheet in &self.sheets {
575            let worksheet = workbook.add_worksheet().set_name(&sheet.name)?;
576
577            // Set column widths
578            for col in 0..sheet.max_cols {
579                worksheet.set_column_width(col as u16, 15)?;
580            }
581
582            // Write cell data
583            for row in 1..sheet.data.len() {
584                if row <= sheet.max_rows {
585                    for col in 1..sheet.data[0].len() {
586                        if col <= sheet.max_cols {
587                            let cell = &sheet.data[row][col];
588
589                            // Skip empty cells
590                            if cell.value.is_empty() {
591                                continue;
592                            }
593
594                            let row_idx = (row - 1) as u32;
595                            let col_idx = (col - 1) as u16;
596
597                            // Write cell based on its type
598                            match cell.cell_type {
599                                CellType::Number => {
600                                    if let Ok(num) = cell.value.parse::<f64>() {
601                                        worksheet.write_number_with_format(
602                                            row_idx,
603                                            col_idx,
604                                            num,
605                                            &number_format,
606                                        )?;
607                                    } else {
608                                        worksheet.write_string(row_idx, col_idx, &cell.value)?;
609                                    }
610                                }
611                                CellType::Date => {
612                                    worksheet.write_string_with_format(
613                                        row_idx,
614                                        col_idx,
615                                        &cell.value,
616                                        &date_format,
617                                    )?;
618                                }
619                                CellType::Boolean => {
620                                    if let Ok(b) = cell.value.parse::<bool>() {
621                                        worksheet.write_boolean(row_idx, col_idx, b)?;
622                                    } else {
623                                        worksheet.write_string(row_idx, col_idx, &cell.value)?;
624                                    }
625                                }
626                                CellType::Text => {
627                                    if cell.is_formula {
628                                        let formula = rust_xlsxwriter::Formula::new(&cell.value);
629                                        worksheet.write_formula(row_idx, col_idx, formula)?;
630                                    } else {
631                                        worksheet.write_string(row_idx, col_idx, &cell.value)?;
632                                    }
633                                }
634                                CellType::Empty => {}
635                            }
636                        }
637                    }
638                }
639            }
640        }
641
642        workbook.save(&new_filepath)?;
643        self.is_modified = false;
644
645        Ok(())
646    }
647
648    pub fn insert_sheet_at_index(&mut self, sheet: Sheet, index: usize) -> Result<()> {
649        if index > self.sheets.len() {
650            anyhow::bail!(
651                "Cannot insert sheet at index {}: index out of bounds (max index: {})",
652                index,
653                self.sheets.len()
654            );
655        }
656        self.sheets.insert(index, sheet);
657        self.is_modified = true;
658        Ok(())
659    }
660
661    pub fn recalculate_max_cols(&mut self) {
662        let sheet = &mut self.sheets[self.current_sheet_index];
663
664        // Find maximum non-empty column across all rows
665        let actual_max_col = sheet
666            .data
667            .iter()
668            .map(|row| {
669                // Find last non-empty cell in this row
670                row.iter()
671                    .enumerate()
672                    .rev()
673                    .find(|(_, cell)| !cell.value.is_empty())
674                    .map(|(idx, _)| idx)
675                    .unwrap_or(0)
676            })
677            .max()
678            .unwrap_or(0);
679
680        sheet.max_cols = actual_max_col.max(1);
681    }
682
683    pub fn recalculate_max_rows(&mut self) {
684        let sheet = &mut self.sheets[self.current_sheet_index];
685
686        // Find last row with any non-empty cells
687        let actual_max_row = sheet
688            .data
689            .iter()
690            .enumerate()
691            .rev()
692            .find(|(_, row)| row.iter().any(|cell| !cell.value.is_empty()))
693            .map(|(idx, _)| idx)
694            .unwrap_or(0);
695
696        sheet.max_rows = actual_max_row.max(1);
697    }
698}