1use calamine::{Data, Range, Reader, Xlsx, open_workbook};
2use std::collections::{HashMap, HashSet};
3use std::path::Path;
4
5use super::DataSource;
6use super::error::DataError;
7use super::helpers;
8use crate::layout::value::{DataValue, ValueSource};
9
10#[derive(Debug, Clone)]
11pub struct ExcelDataSourceOptions {
12 pub main_sheet: String,
13 pub versions: Vec<String>,
14}
15
16impl ExcelDataSourceOptions {
17 pub fn new(versions: Vec<String>) -> Self {
18 Self {
19 main_sheet: "Main".to_owned(),
20 versions,
21 }
22 }
23}
24
25pub struct ExcelDataSource {
27 names: Vec<String>,
28 version_columns: Vec<Vec<Data>>,
29 sheets: HashMap<String, Range<Data>>,
30}
31
32impl ExcelDataSource {
33 pub fn from_path(
34 path: impl AsRef<Path>,
35 options: ExcelDataSourceOptions,
36 ) -> Result<Self, DataError> {
37 let path = path.as_ref();
38 let mut workbook: Xlsx<_> = open_workbook(path).map_err(|_| {
39 DataError::FileError(format!("failed to open file: {}", path.display()))
40 })?;
41
42 let main_sheet_name = options.main_sheet.as_str();
43 let main_sheet = workbook
44 .worksheet_range(main_sheet_name)
45 .map_err(|_| DataError::MiscError("Main sheet not found.".to_owned()))?;
46
47 let rows: Vec<_> = main_sheet.rows().collect();
48 let (headers, data_rows) = match rows.split_first() {
49 Some((hdr, tail)) => (hdr, tail.len()),
50 None => {
51 return Err(DataError::RetrievalError(
52 "invalid main sheet format.".to_owned(),
53 ));
54 }
55 };
56
57 let name_index = headers
58 .iter()
59 .position(|cell| Self::cell_eq_ascii(cell, "Name"))
60 .ok_or(DataError::ColumnNotFound("Name".to_owned()))?;
61
62 let mut names: Vec<String> = Vec::with_capacity(data_rows);
63 names.extend(rows.iter().skip(1).map(|row| {
64 row.get(name_index)
65 .map(|c| c.to_string().trim().to_owned())
66 .unwrap_or_default()
67 }));
68 helpers::warn_duplicate_names(&names);
69
70 let version_columns =
71 Self::collect_version_columns(headers, &rows, data_rows, &options.versions)?;
72
73 let mut sheets: HashMap<String, Range<Data>> =
74 HashMap::with_capacity(workbook.worksheets().len().saturating_sub(1));
75 for (name, sheet) in workbook.worksheets() {
76 if name != main_sheet_name {
77 sheets.insert(name.clone(), sheet);
78 }
79 }
80
81 Ok(Self {
82 names,
83 version_columns,
84 sheets,
85 })
86 }
87
88 fn retrieve_cell(&self, name: &str) -> Result<&Data, DataError> {
89 let index = self
90 .names
91 .iter()
92 .position(|n| n == name)
93 .ok_or(DataError::RetrievalError(
94 "index not found in data sheet".to_owned(),
95 ))?;
96
97 for column in &self.version_columns {
98 if let Some(value) = column.get(index).filter(|v| !Self::cell_is_empty(v)) {
99 return Ok(value);
100 }
101 }
102
103 Err(DataError::RetrievalError(
104 "data not found in any version column".to_owned(),
105 ))
106 }
107
108 fn cell_eq_ascii(cell: &Data, target: &str) -> bool {
109 match cell {
110 Data::String(s) => s.trim().eq_ignore_ascii_case(target),
111 _ => false,
112 }
113 }
114
115 fn cell_is_empty(cell: &Data) -> bool {
116 match cell {
117 Data::Empty => true,
118 Data::String(s) => s.trim().is_empty(),
119 _ => false,
120 }
121 }
122
123 fn collect_column(rows: &[&[Data]], index: usize, data_rows: usize) -> Vec<Data> {
124 let mut column = Vec::with_capacity(data_rows);
125 column.extend(
126 rows.iter()
127 .skip(1)
128 .map(|row| row.get(index).cloned().unwrap_or(Data::Empty)),
129 );
130 column
131 }
132
133 fn collect_version_columns(
134 headers: &[Data],
135 rows: &[&[Data]],
136 data_rows: usize,
137 versions: &[String],
138 ) -> Result<Vec<Vec<Data>>, DataError> {
139 let mut seen = HashSet::new();
140 let mut columns = Vec::new();
141
142 for v in versions {
143 if seen.insert(v.clone()) {
144 let index = headers
145 .iter()
146 .position(|cell| Self::cell_eq_ascii(cell, v))
147 .ok_or_else(|| DataError::ColumnNotFound(v.clone()))?;
148
149 columns.push(Self::collect_column(rows, index, data_rows));
150 }
151 }
152
153 Ok(columns)
154 }
155}
156
157impl DataSource for ExcelDataSource {
158 fn retrieve_single_value(&self, name: &str) -> Result<DataValue, DataError> {
159 let result = (|| match self.retrieve_cell(name)? {
160 Data::Int(i) => Ok(DataValue::I64(*i)),
161 Data::Float(f) => Ok(DataValue::F64(*f)),
162 Data::Bool(b) => Ok(DataValue::Bool(*b)),
163 _ => Err(DataError::RetrievalError(
164 "Found non-numeric single value".to_owned(),
165 )),
166 })();
167
168 result.map_err(|e| DataError::WhileRetrieving {
169 name: name.to_owned(),
170 source: Box::new(e),
171 })
172 }
173
174 fn retrieve_1d_array_or_string(&self, name: &str) -> Result<ValueSource, DataError> {
175 let result = (|| {
176 let Data::String(cell_string) = self.retrieve_cell(name)? else {
177 return Err(DataError::RetrievalError(
178 "Expected string value for 1D array or string".to_owned(),
179 ));
180 };
181
182 if let Some(sheet_name) = cell_string.strip_prefix('#') {
184 let sheet = self.sheets.get(sheet_name).ok_or_else(|| {
185 let available: Vec<_> = self.sheets.keys().map(|s| s.as_str()).collect();
186 DataError::RetrievalError(format!(
187 "Sheet not found: '{}'. Available sheets: {}",
188 sheet_name,
189 available.join(", ")
190 ))
191 })?;
192
193 let mut out = Vec::new();
194
195 for row in sheet.rows().skip(1) {
196 match row.first() {
197 Some(cell) if !Self::cell_is_empty(cell) => {
198 let v = match cell {
199 Data::Int(i) => DataValue::I64(*i),
200 Data::Float(f) => DataValue::F64(*f),
201 Data::Bool(b) => DataValue::Bool(*b),
202 Data::String(s) => DataValue::Str(s.to_owned()),
203 _ => {
204 return Err(DataError::RetrievalError(
205 "Unsupported data type in 1D array".to_owned(),
206 ));
207 }
208 };
209 out.push(v);
210 }
211 _ => break,
212 }
213 }
214 return Ok(ValueSource::Array(out));
215 }
216
217 Ok(ValueSource::Single(DataValue::Str(cell_string.to_owned())))
219 })();
220
221 result.map_err(|e| DataError::WhileRetrieving {
222 name: name.to_owned(),
223 source: Box::new(e),
224 })
225 }
226
227 fn retrieve_2d_array(&self, name: &str) -> Result<Vec<Vec<DataValue>>, DataError> {
228 let result = (|| {
229 let Data::String(cell_string) = self.retrieve_cell(name)? else {
230 return Err(DataError::RetrievalError(
231 "Expected string value for 2D array".to_owned(),
232 ));
233 };
234
235 let sheet_name = cell_string.strip_prefix('#').ok_or_else(|| {
236 DataError::RetrievalError(format!(
237 "2D array reference must start with '#' prefix, got: {}",
238 cell_string
239 ))
240 })?;
241
242 let sheet = self.sheets.get(sheet_name).ok_or_else(|| {
243 let available: Vec<_> = self.sheets.keys().map(|s| s.as_str()).collect();
244 DataError::RetrievalError(format!(
245 "Sheet not found: '{}'. Available sheets: {}",
246 sheet_name,
247 available.join(", ")
248 ))
249 })?;
250
251 let convert = |cell: &Data| -> Result<DataValue, DataError> {
252 match cell {
253 Data::Int(i) => Ok(DataValue::I64(*i)),
254 Data::Float(f) => Ok(DataValue::F64(*f)),
255 Data::Bool(b) => Ok(DataValue::Bool(*b)),
256 _ => Err(DataError::RetrievalError(
257 "Unsupported data type in 2D array".to_owned(),
258 )),
259 }
260 };
261
262 let mut rows = sheet.rows();
263 let hdrs = rows.next().ok_or_else(|| {
264 DataError::RetrievalError("No headers found in 2D array".to_owned())
265 })?;
266 let width = hdrs.iter().take_while(|c| !Self::cell_is_empty(c)).count();
267 if width == 0 {
268 return Err(DataError::RetrievalError(
269 "Detected zero width 2D array".to_owned(),
270 ));
271 }
272
273 let mut out = Vec::new();
274
275 'outer: for row in rows {
276 if row.first().is_none_or(Self::cell_is_empty) {
277 break;
278 }
279
280 let mut vals = Vec::with_capacity(width);
281 for col in 0..width {
282 let Some(cell) = row.get(col) else {
283 break 'outer;
284 };
285 if Self::cell_is_empty(cell) {
286 break 'outer;
287 };
288 vals.push(convert(cell)?);
289 }
290 out.push(vals);
291 }
292
293 Ok(out)
294 })();
295
296 result.map_err(|e| DataError::WhileRetrieving {
297 name: name.to_owned(),
298 source: Box::new(e),
299 })
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306 use calamine::Data;
307 use std::collections::HashMap;
308
309 fn datasource_with_version(value: Data) -> ExcelDataSource {
310 ExcelDataSource {
311 names: vec!["Flag".to_owned()],
312 version_columns: vec![vec![value]],
313 sheets: HashMap::new(),
314 }
315 }
316
317 #[test]
318 fn retrieve_single_value_accepts_bool_cell() {
319 let ds = datasource_with_version(Data::Bool(true));
320 let value = ds.retrieve_single_value("Flag").expect("bool cell");
321 match value {
322 DataValue::Bool(v) => assert!(v),
323 _ => panic!("expected bool value"),
324 }
325 }
326}