Skip to main content

rust_excel_core/
workbook.rs

1use std::io::Cursor;
2use std::path::Path;
3
4use calamine::{open_workbook_from_rs, Reader, Xlsx};
5use umya_spreadsheet::{reader, writer, Spreadsheet, Worksheet};
6
7use crate::cell::from_calamine_data;
8use crate::error::{ExcelError, ExcelResult};
9use crate::types::{CellValue, RangeData, SheetInfo, WorkbookInfo};
10
11/// A wrapper around an Excel workbook providing a clean API.
12pub struct Workbook {
13    inner: Spreadsheet,
14    /// Cached original bytes for fast calamine reads. Cleared on mutation.
15    cached_bytes: Option<Vec<u8>>,
16}
17
18impl Workbook {
19    /// Create a new empty workbook with one default sheet.
20    pub fn new() -> Self {
21        Self {
22            inner: umya_spreadsheet::new_file(),
23            cached_bytes: None,
24        }
25    }
26
27    /// Open a workbook from a file path.
28    pub fn open(path: impl AsRef<Path>) -> ExcelResult<Self> {
29        let spreadsheet =
30            reader::xlsx::read(path).map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
31        Ok(Self {
32            inner: spreadsheet,
33            cached_bytes: None,
34        })
35    }
36
37    /// Open a workbook from bytes in memory.
38    pub fn open_from_bytes(bytes: &[u8]) -> ExcelResult<Self> {
39        let cursor = Cursor::new(bytes);
40        let spreadsheet = reader::xlsx::read_reader(cursor, true)
41            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
42        Ok(Self {
43            inner: spreadsheet,
44            cached_bytes: None,
45        })
46    }
47
48    /// Open a workbook from bytes, caching the bytes for fast calamine reads.
49    ///
50    /// Use this on upload: the original bytes are kept so `read_sheet_data_fast()`
51    /// can use calamine directly without re-serializing through umya.
52    pub fn open_from_bytes_cached(bytes: Vec<u8>) -> ExcelResult<Self> {
53        let cursor = Cursor::new(&bytes);
54        let spreadsheet = reader::xlsx::read_reader(cursor, true)
55            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
56        Ok(Self {
57            inner: spreadsheet,
58            cached_bytes: Some(bytes),
59        })
60    }
61
62    /// Save the workbook to a file path.
63    pub fn save(&self, path: impl AsRef<Path>) -> ExcelResult<()> {
64        writer::xlsx::write(&self.inner, path)
65            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
66        Ok(())
67    }
68
69    /// Serialize the workbook to bytes.
70    pub fn save_to_bytes(&self) -> ExcelResult<Vec<u8>> {
71        let mut buf = Vec::new();
72        writer::xlsx::write_writer(&self.inner, Cursor::new(&mut buf))
73            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
74        Ok(buf)
75    }
76
77    /// Get summary info about the workbook.
78    pub fn info(&self) -> WorkbookInfo {
79        let sheets: Vec<SheetInfo> = self
80            .inner
81            .get_sheet_collection()
82            .iter()
83            .enumerate()
84            .map(|(i, s)| SheetInfo {
85                name: s.get_name().to_string(),
86                index: i,
87            })
88            .collect();
89
90        WorkbookInfo {
91            sheet_count: sheets.len(),
92            sheets,
93        }
94    }
95
96    /// Get the names of all sheets.
97    pub fn sheet_names(&self) -> Vec<String> {
98        self.inner
99            .get_sheet_collection()
100            .iter()
101            .map(|s| s.get_name().to_string())
102            .collect()
103    }
104
105    /// Add a new empty sheet with the given name.
106    pub fn add_sheet(&mut self, name: &str) -> ExcelResult<()> {
107        if self.inner.get_sheet_by_name(name).is_some() {
108            return Err(ExcelError::SheetAlreadyExists(name.to_string()));
109        }
110        self.inner
111            .new_sheet(name)
112            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
113        Ok(())
114    }
115
116    /// Remove a sheet by name.
117    pub fn remove_sheet(&mut self, name: &str) -> ExcelResult<()> {
118        self.inner
119            .remove_sheet_by_name(name)
120            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
121        Ok(())
122    }
123
124    /// Rename a sheet.
125    pub fn rename_sheet(&mut self, old_name: &str, new_name: &str) -> ExcelResult<()> {
126        let sheet = self
127            .inner
128            .get_sheet_by_name_mut(old_name)
129            .ok_or_else(|| ExcelError::SheetNotFound(old_name.to_string()))?;
130        sheet.set_name(new_name);
131        Ok(())
132    }
133
134    /// Get an immutable reference to a sheet by name.
135    pub fn get_sheet(&self, name: &str) -> ExcelResult<&Worksheet> {
136        self.inner
137            .get_sheet_by_name(name)
138            .ok_or_else(|| ExcelError::SheetNotFound(name.to_string()))
139    }
140
141    /// Get a mutable reference to a sheet by name.
142    pub fn get_sheet_mut(&mut self, name: &str) -> ExcelResult<&mut Worksheet> {
143        self.inner
144            .get_sheet_by_name_mut(name)
145            .ok_or_else(|| ExcelError::SheetNotFound(name.to_string()))
146    }
147
148    /// Get a reference to the inner spreadsheet.
149    pub fn inner(&self) -> &Spreadsheet {
150        &self.inner
151    }
152
153    /// Get a mutable reference to the inner spreadsheet.
154    pub fn inner_mut(&mut self) -> &mut Spreadsheet {
155        &mut self.inner
156    }
157
158    /// Mark the workbook as dirty, invalidating cached bytes.
159    /// Call this after any mutation (cell write, row/col insert, style change, etc.).
160    pub fn mark_dirty(&mut self) {
161        self.cached_bytes = None;
162    }
163
164    /// Whether cached bytes are available for fast reads.
165    pub fn has_cache(&self) -> bool {
166        self.cached_bytes.is_some()
167    }
168
169    /// Fast bulk read of an entire sheet's data using calamine.
170    ///
171    /// If cached bytes are available (from upload), reads directly from them
172    /// with calamine (~2.4x faster). Otherwise falls back to serializing
173    /// through umya first.
174    pub fn read_sheet_data(&self, name: &str) -> ExcelResult<RangeData> {
175        // Verify the sheet exists first
176        if self.inner.get_sheet_by_name(name).is_none() {
177            return Err(ExcelError::SheetNotFound(name.to_string()));
178        }
179
180        // Fast path: use cached bytes directly with calamine
181        if let Some(ref bytes) = self.cached_bytes {
182            return Self::read_data_from_bytes(bytes, name);
183        }
184
185        // Slow path: serialize via umya then parse with calamine
186        let bytes = self.save_to_bytes()?;
187        Self::read_data_from_bytes(&bytes, name)
188    }
189
190    /// Create a new .xlsx file from raw data using rust_xlsxwriter.
191    ///
192    /// This bypasses umya-spreadsheet entirely for maximum write performance.
193    /// Each tuple in the input is (sheet_name, data). Returns the .xlsx bytes.
194    pub fn create_from_data(sheets: Vec<(String, RangeData)>) -> ExcelResult<Vec<u8>> {
195        let mut wb = rust_xlsxwriter::Workbook::new();
196
197        for (name, data) in &sheets {
198            let ws = wb.add_worksheet();
199            ws.set_name(name)
200                .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
201
202            for (ri, row) in data.rows.iter().enumerate() {
203                for (ci, val) in row.iter().enumerate() {
204                    let r = ri as u32;
205                    let c = ci as u16;
206                    match val {
207                        CellValue::String(s) => {
208                            ws.write_string(r, c, s)
209                                .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
210                        }
211                        CellValue::Number(n) => {
212                            ws.write_number(r, c, *n)
213                                .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
214                        }
215                        CellValue::Boolean(b) => {
216                            ws.write_boolean(r, c, *b)
217                                .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
218                        }
219                        CellValue::Formula(f) => {
220                            ws.write_formula(r, c, f.as_str())
221                                .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
222                        }
223                        CellValue::Empty => {}
224                    }
225                }
226            }
227        }
228
229        wb.save_to_buffer()
230            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))
231    }
232
233    /// Serialize the workbook to bytes using rust_xlsxwriter for speed.
234    ///
235    /// Walks the umya in-memory model and writes cell data via rust_xlsxwriter
236    /// (~1.4x faster than umya's native writer). Falls back to umya on error.
237    /// Note: styles/merges/formatting may not be fully preserved — use
238    /// `save_to_bytes()` when full fidelity is needed.
239    pub fn save_to_bytes_fast(&self) -> ExcelResult<Vec<u8>> {
240        let sheets: Vec<(String, RangeData)> = self
241            .inner
242            .get_sheet_collection()
243            .iter()
244            .map(|ws| {
245                let name = ws.get_name().to_string();
246                let (min_row, max_row, min_col, max_col) = Self::sheet_used_range(ws);
247                let mut rows = Vec::new();
248                for r in min_row..=max_row {
249                    let mut row = Vec::new();
250                    for c in min_col..=max_col {
251                        let val = match ws.get_cell((c, r)) {
252                            Some(cell) => crate::cell::read_cell_value(cell),
253                            None => CellValue::Empty,
254                        };
255                        row.push(val);
256                    }
257                    rows.push(row);
258                }
259                (
260                    name,
261                    RangeData {
262                        start_row: min_row,
263                        start_col: min_col,
264                        end_row: max_row,
265                        end_col: max_col,
266                        rows,
267                    },
268                )
269            })
270            .collect();
271
272        match Self::create_from_data(sheets) {
273            Ok(bytes) => Ok(bytes),
274            Err(_) => self.save_to_bytes(), // fallback to umya
275        }
276    }
277
278    /// Get the used range of a worksheet (min_row, max_row, min_col, max_col).
279    fn sheet_used_range(ws: &Worksheet) -> (u32, u32, u32, u32) {
280        let cells = ws.get_cell_collection();
281        if cells.is_empty() {
282            return (1, 1, 1, 1);
283        }
284        let mut min_row = u32::MAX;
285        let mut max_row = 0u32;
286        let mut min_col = u32::MAX;
287        let mut max_col = 0u32;
288        for cell in cells {
289            let r = *cell.get_coordinate().get_row_num();
290            let c = *cell.get_coordinate().get_col_num();
291            min_row = min_row.min(r);
292            max_row = max_row.max(r);
293            min_col = min_col.min(c);
294            max_col = max_col.max(c);
295        }
296        (min_row, max_row, min_col, max_col)
297    }
298
299    /// Read sheet data directly from raw .xlsx bytes using calamine only.
300    ///
301    /// This is the true fast path — no umya-spreadsheet involved at all.
302    /// Use this when you already have raw bytes (e.g. from an upload) and
303    /// just need to extract cell data without loading into the mutable model.
304    pub fn read_data_from_bytes(bytes: &[u8], sheet_name: &str) -> ExcelResult<RangeData> {
305        let cursor = Cursor::new(bytes);
306        let mut cal_wb: Xlsx<_> = open_workbook_from_rs(cursor)
307            .map_err(|e: calamine::XlsxError| ExcelError::Spreadsheet(e.to_string()))?;
308
309        let range = cal_wb
310            .worksheet_range(sheet_name)
311            .map_err(|e| ExcelError::Spreadsheet(e.to_string()))?;
312
313        let (start_row, start_col) = range.start().unwrap_or((0, 0));
314        let (end_row, end_col) = range.end().unwrap_or((0, 0));
315
316        let rows: Vec<Vec<CellValue>> = range
317            .rows()
318            .map(|row| row.iter().map(from_calamine_data).collect())
319            .collect();
320
321        Ok(RangeData {
322            start_row: start_row + 1,
323            start_col: start_col + 1,
324            end_row: end_row + 1,
325            end_col: end_col + 1,
326            rows,
327        })
328    }
329}
330
331impl Default for Workbook {
332    fn default() -> Self {
333        Self::new()
334    }
335}