Skip to main content

mint_core/data/
excel.rs

1use calamine::{Data, Range, Reader, Xlsx, open_workbook};
2use std::collections::{HashMap, HashSet};
3use std::path::Path;
4
5use super::DataSource;
6use super::error::DataError;
7use super::helpers;
8use crate::layout::value::{DataValue, ValueSource};
9
10#[derive(Debug, Clone)]
11pub struct ExcelDataSourceOptions {
12    pub main_sheet: String,
13    pub versions: Vec<String>,
14}
15
16impl ExcelDataSourceOptions {
17    pub fn new(versions: Vec<String>) -> Self {
18        Self {
19            main_sheet: "Main".to_owned(),
20            versions,
21        }
22    }
23}
24
25/// Excel-backed data source for versions.
26pub struct ExcelDataSource {
27    names: Vec<String>,
28    version_columns: Vec<Vec<Data>>,
29    sheets: HashMap<String, Range<Data>>,
30}
31
32impl ExcelDataSource {
33    pub fn from_path(
34        path: impl AsRef<Path>,
35        options: ExcelDataSourceOptions,
36    ) -> Result<Self, DataError> {
37        let path = path.as_ref();
38        let mut workbook: Xlsx<_> = open_workbook(path).map_err(|_| {
39            DataError::FileError(format!("failed to open file: {}", path.display()))
40        })?;
41
42        let main_sheet_name = options.main_sheet.as_str();
43        let main_sheet = workbook
44            .worksheet_range(main_sheet_name)
45            .map_err(|_| DataError::MiscError("Main sheet not found.".to_owned()))?;
46
47        let rows: Vec<_> = main_sheet.rows().collect();
48        let (headers, data_rows) = match rows.split_first() {
49            Some((hdr, tail)) => (hdr, tail.len()),
50            None => {
51                return Err(DataError::RetrievalError(
52                    "invalid main sheet format.".to_owned(),
53                ));
54            }
55        };
56
57        let name_index = headers
58            .iter()
59            .position(|cell| Self::cell_eq_ascii(cell, "Name"))
60            .ok_or(DataError::ColumnNotFound("Name".to_owned()))?;
61
62        let mut names: Vec<String> = Vec::with_capacity(data_rows);
63        names.extend(rows.iter().skip(1).map(|row| {
64            row.get(name_index)
65                .map(|c| c.to_string().trim().to_owned())
66                .unwrap_or_default()
67        }));
68        helpers::warn_duplicate_names(&names);
69
70        let version_columns =
71            Self::collect_version_columns(headers, &rows, data_rows, &options.versions)?;
72
73        let mut sheets: HashMap<String, Range<Data>> =
74            HashMap::with_capacity(workbook.worksheets().len().saturating_sub(1));
75        for (name, sheet) in workbook.worksheets() {
76            if name != main_sheet_name {
77                sheets.insert(name.clone(), sheet);
78            }
79        }
80
81        Ok(Self {
82            names,
83            version_columns,
84            sheets,
85        })
86    }
87
88    fn retrieve_cell(&self, name: &str) -> Result<&Data, DataError> {
89        let index = self
90            .names
91            .iter()
92            .position(|n| n == name)
93            .ok_or(DataError::RetrievalError(
94                "index not found in data sheet".to_owned(),
95            ))?;
96
97        for column in &self.version_columns {
98            if let Some(value) = column.get(index).filter(|v| !Self::cell_is_empty(v)) {
99                return Ok(value);
100            }
101        }
102
103        Err(DataError::RetrievalError(
104            "data not found in any version column".to_owned(),
105        ))
106    }
107
108    fn cell_eq_ascii(cell: &Data, target: &str) -> bool {
109        match cell {
110            Data::String(s) => s.trim().eq_ignore_ascii_case(target),
111            _ => false,
112        }
113    }
114
115    fn cell_is_empty(cell: &Data) -> bool {
116        match cell {
117            Data::Empty => true,
118            Data::String(s) => s.trim().is_empty(),
119            _ => false,
120        }
121    }
122
123    fn collect_column(rows: &[&[Data]], index: usize, data_rows: usize) -> Vec<Data> {
124        let mut column = Vec::with_capacity(data_rows);
125        column.extend(
126            rows.iter()
127                .skip(1)
128                .map(|row| row.get(index).cloned().unwrap_or(Data::Empty)),
129        );
130        column
131    }
132
133    fn collect_version_columns(
134        headers: &[Data],
135        rows: &[&[Data]],
136        data_rows: usize,
137        versions: &[String],
138    ) -> Result<Vec<Vec<Data>>, DataError> {
139        let mut seen = HashSet::new();
140        let mut columns = Vec::new();
141
142        for v in versions {
143            if seen.insert(v.clone()) {
144                let index = headers
145                    .iter()
146                    .position(|cell| Self::cell_eq_ascii(cell, v))
147                    .ok_or_else(|| DataError::ColumnNotFound(v.clone()))?;
148
149                columns.push(Self::collect_column(rows, index, data_rows));
150            }
151        }
152
153        Ok(columns)
154    }
155}
156
157impl DataSource for ExcelDataSource {
158    fn retrieve_single_value(&self, name: &str) -> Result<DataValue, DataError> {
159        let result = (|| match self.retrieve_cell(name)? {
160            Data::Int(i) => Ok(DataValue::I64(*i)),
161            Data::Float(f) => Ok(DataValue::F64(*f)),
162            Data::Bool(b) => Ok(DataValue::Bool(*b)),
163            _ => Err(DataError::RetrievalError(
164                "Found non-numeric single value".to_owned(),
165            )),
166        })();
167
168        result.map_err(|e| DataError::WhileRetrieving {
169            name: name.to_owned(),
170            source: Box::new(e),
171        })
172    }
173
174    fn retrieve_1d_array_or_string(&self, name: &str) -> Result<ValueSource, DataError> {
175        let result = (|| {
176            let Data::String(cell_string) = self.retrieve_cell(name)? else {
177                return Err(DataError::RetrievalError(
178                    "Expected string value for 1D array or string".to_owned(),
179                ));
180            };
181
182            // Check if the value starts with '#' to indicate a sheet reference
183            if let Some(sheet_name) = cell_string.strip_prefix('#') {
184                let sheet = self.sheets.get(sheet_name).ok_or_else(|| {
185                    let available: Vec<_> = self.sheets.keys().map(|s| s.as_str()).collect();
186                    DataError::RetrievalError(format!(
187                        "Sheet not found: '{}'. Available sheets: {}",
188                        sheet_name,
189                        available.join(", ")
190                    ))
191                })?;
192
193                let mut out = Vec::new();
194
195                for row in sheet.rows().skip(1) {
196                    match row.first() {
197                        Some(cell) if !Self::cell_is_empty(cell) => {
198                            let v = match cell {
199                                Data::Int(i) => DataValue::I64(*i),
200                                Data::Float(f) => DataValue::F64(*f),
201                                Data::Bool(b) => DataValue::Bool(*b),
202                                Data::String(s) => DataValue::Str(s.to_owned()),
203                                _ => {
204                                    return Err(DataError::RetrievalError(
205                                        "Unsupported data type in 1D array".to_owned(),
206                                    ));
207                                }
208                            };
209                            out.push(v);
210                        }
211                        _ => break,
212                    }
213                }
214                return Ok(ValueSource::Array(out));
215            }
216
217            // No '#' prefix, treat as a literal string
218            Ok(ValueSource::Single(DataValue::Str(cell_string.to_owned())))
219        })();
220
221        result.map_err(|e| DataError::WhileRetrieving {
222            name: name.to_owned(),
223            source: Box::new(e),
224        })
225    }
226
227    fn retrieve_2d_array(&self, name: &str) -> Result<Vec<Vec<DataValue>>, DataError> {
228        let result = (|| {
229            let Data::String(cell_string) = self.retrieve_cell(name)? else {
230                return Err(DataError::RetrievalError(
231                    "Expected string value for 2D array".to_owned(),
232                ));
233            };
234
235            let sheet_name = cell_string.strip_prefix('#').ok_or_else(|| {
236                DataError::RetrievalError(format!(
237                    "2D array reference must start with '#' prefix, got: {}",
238                    cell_string
239                ))
240            })?;
241
242            let sheet = self.sheets.get(sheet_name).ok_or_else(|| {
243                let available: Vec<_> = self.sheets.keys().map(|s| s.as_str()).collect();
244                DataError::RetrievalError(format!(
245                    "Sheet not found: '{}'. Available sheets: {}",
246                    sheet_name,
247                    available.join(", ")
248                ))
249            })?;
250
251            let convert = |cell: &Data| -> Result<DataValue, DataError> {
252                match cell {
253                    Data::Int(i) => Ok(DataValue::I64(*i)),
254                    Data::Float(f) => Ok(DataValue::F64(*f)),
255                    Data::Bool(b) => Ok(DataValue::Bool(*b)),
256                    _ => Err(DataError::RetrievalError(
257                        "Unsupported data type in 2D array".to_owned(),
258                    )),
259                }
260            };
261
262            let mut rows = sheet.rows();
263            let hdrs = rows.next().ok_or_else(|| {
264                DataError::RetrievalError("No headers found in 2D array".to_owned())
265            })?;
266            let width = hdrs.iter().take_while(|c| !Self::cell_is_empty(c)).count();
267            if width == 0 {
268                return Err(DataError::RetrievalError(
269                    "Detected zero width 2D array".to_owned(),
270                ));
271            }
272
273            let mut out = Vec::new();
274
275            'outer: for row in rows {
276                if row.first().is_none_or(Self::cell_is_empty) {
277                    break;
278                }
279
280                let mut vals = Vec::with_capacity(width);
281                for col in 0..width {
282                    let Some(cell) = row.get(col) else {
283                        break 'outer;
284                    };
285                    if Self::cell_is_empty(cell) {
286                        break 'outer;
287                    };
288                    vals.push(convert(cell)?);
289                }
290                out.push(vals);
291            }
292
293            Ok(out)
294        })();
295
296        result.map_err(|e| DataError::WhileRetrieving {
297            name: name.to_owned(),
298            source: Box::new(e),
299        })
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use calamine::Data;
307    use std::collections::HashMap;
308
309    fn datasource_with_version(value: Data) -> ExcelDataSource {
310        ExcelDataSource {
311            names: vec!["Flag".to_owned()],
312            version_columns: vec![vec![value]],
313            sheets: HashMap::new(),
314        }
315    }
316
317    #[test]
318    fn retrieve_single_value_accepts_bool_cell() {
319        let ds = datasource_with_version(Data::Bool(true));
320        let value = ds.retrieve_single_value("Flag").expect("bool cell");
321        match value {
322            DataValue::Bool(v) => assert!(v),
323            _ => panic!("expected bool value"),
324        }
325    }
326}