1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//! Excel file reading with streaming support
use crate::error::{ExcelError, Result};
use crate::types::{CellValue, Row};
use calamine::{open_workbook_auto, Data, Range, Reader, Sheets};
use std::path::Path;
/// Excel file reader with streaming capabilities
pub struct ExcelReader {
workbook: Sheets<std::io::BufReader<std::fs::File>>,
}
impl ExcelReader {
/// Open an Excel file for reading
///
/// Supports XLSX, XLS, and ODS formats. Format is auto-detected from file extension.
///
/// # Examples
///
/// ```no_run
/// use excelstream::reader::ExcelReader;
///
/// let reader = ExcelReader::open("data.xlsx").unwrap();
/// ```
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let workbook =
open_workbook_auto(path).map_err(|e| ExcelError::ReadError(e.to_string()))?;
Ok(ExcelReader { workbook })
}
/// Get list of sheet names in the workbook
///
/// # Examples
///
/// ```no_run
/// use excelstream::reader::ExcelReader;
///
/// let reader = ExcelReader::open("data.xlsx").unwrap();
/// let sheets = reader.sheet_names();
/// println!("Available sheets: {:?}", sheets);
/// ```
pub fn sheet_names(&self) -> Vec<String> {
self.workbook.sheet_names().to_vec()
}
/// Get the number of sheets in the workbook
pub fn sheet_count(&self) -> usize {
self.workbook.sheet_names().len()
}
/// Read all rows from a specific sheet (streaming iterator)
///
/// Returns an iterator that yields rows one at a time, minimizing memory usage.
///
/// # Examples
///
/// ```no_run
/// use excelstream::reader::ExcelReader;
///
/// let mut reader = ExcelReader::open("data.xlsx").unwrap();
/// for row_result in reader.rows("Sheet1").unwrap() {
/// let row = row_result.unwrap();
/// println!("Row {}: {:?}", row.index, row.cells);
/// }
/// ```
pub fn rows(&mut self, sheet_name: &str) -> Result<RowIterator> {
let range = self.workbook.worksheet_range(sheet_name).map_err(|e| {
let error_str = e.to_string();
if error_str.contains("not found") {
let available = self.sheet_names().join(", ");
ExcelError::SheetNotFound {
sheet: sheet_name.to_string(),
available,
}
} else {
ExcelError::from(e)
}
})?;
Ok(RowIterator::new(range))
}
/// Read all rows from a sheet by index (0-based)
pub fn rows_by_index(&mut self, index: usize) -> Result<RowIterator> {
let sheet_names = self.sheet_names();
let sheet_name = sheet_names.get(index).ok_or_else(|| {
let available = sheet_names.join(", ");
ExcelError::SheetNotFound {
sheet: format!("index {}", index),
available,
}
})?;
self.rows(sheet_name)
}
/// Read a specific cell value
///
/// # Examples
///
/// ```no_run
/// use excelstream::reader::ExcelReader;
///
/// let mut reader = ExcelReader::open("data.xlsx").unwrap();
/// let value = reader.read_cell("Sheet1", 0, 0).unwrap();
/// println!("Cell A1: {}", value);
/// ```
pub fn read_cell(&mut self, sheet_name: &str, row: u32, col: u32) -> Result<CellValue> {
let range = self.workbook.worksheet_range(sheet_name).map_err(|e| {
let error_str = e.to_string();
if error_str.contains("not found") {
let available = self.sheet_names().join(", ");
ExcelError::SheetNotFound {
sheet: sheet_name.to_string(),
available,
}
} else {
ExcelError::from(e)
}
})?;
let cell = range
.get_value((row, col))
.map(datatype_to_cellvalue)
.unwrap_or(CellValue::Empty);
Ok(cell)
}
/// Get the dimensions of a sheet (rows, cols)
pub fn dimensions(&mut self, sheet_name: &str) -> Result<(u32, u32)> {
let range = self.workbook.worksheet_range(sheet_name).map_err(|e| {
let error_str = e.to_string();
if error_str.contains("not found") {
let available = self.sheet_names().join(", ");
ExcelError::SheetNotFound {
sheet: sheet_name.to_string(),
available,
}
} else {
ExcelError::from(e)
}
})?;
let (rows, cols) = range.get_size();
Ok((rows as u32, cols as u32))
}
}
/// Iterator over rows in an Excel sheet
pub struct RowIterator {
range: Range<Data>,
current_row: u32,
max_row: u32,
}
impl RowIterator {
fn new(range: Range<Data>) -> Self {
let (rows, _) = range.get_size();
let start = range.start().map(|(r, _)| r).unwrap_or(0);
RowIterator {
range,
current_row: start,
max_row: start + rows as u32,
}
}
}
impl Iterator for RowIterator {
type Item = Result<Row>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_row >= self.max_row {
return None;
}
let row_idx = self.current_row;
self.current_row += 1;
let (_, cols) = self.range.get_size();
let mut cells = Vec::with_capacity(cols);
// Optimize: iterate directly without intermediate conversions
let start_col = self.range.start().map(|(_, c)| c).unwrap_or(0);
for col in start_col..start_col + cols as u32 {
let cell_value = self
.range
.get_value((row_idx, col))
.map(datatype_to_cellvalue)
.unwrap_or(CellValue::Empty);
cells.push(cell_value);
}
Some(Ok(Row::new(row_idx, cells)))
}
}
/// Convert calamine Data to our CellValue
#[inline]
fn datatype_to_cellvalue(dt: &Data) -> CellValue {
match dt {
Data::Empty => CellValue::Empty,
Data::String(s) => CellValue::String(s.clone()),
Data::Float(f) => CellValue::Float(*f),
Data::Int(i) => CellValue::Int(*i),
Data::Bool(b) => CellValue::Bool(*b),
Data::DateTime(d) => CellValue::DateTime(d.as_f64()),
Data::Error(e) => CellValue::Error(format!("{:?}", e)),
Data::DateTimeIso(s) => CellValue::String(s.clone()),
Data::DurationIso(s) => CellValue::String(s.clone()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use calamine::Data;
#[test]
fn test_datatype_conversion() {
let dt = Data::String("test".to_string());
let cv = datatype_to_cellvalue(&dt);
assert_eq!(cv, CellValue::String("test".to_string()));
let dt = Data::Int(42);
let cv = datatype_to_cellvalue(&dt);
assert_eq!(cv, CellValue::Int(42));
}
}