Skip to main content

mollendorff_forge/excel/
importer.rs

1//! Excel importer implementation - Excel (.xlsx) → YAML
2
3use crate::error::{ForgeError, ForgeResult};
4use crate::excel::reverse_formula_translator::ReverseFormulaTranslator;
5use crate::types::{Column, ColumnValue, ParsedModel, Table, Variable};
6use calamine::{open_workbook, Data, Range, Reader, Xlsx};
7use std::collections::HashMap;
8use std::path::Path;
9
10/// Excel importer for converting .xlsx files to v1.0.0 YAML models
11pub struct ExcelImporter {
12    path: std::path::PathBuf,
13}
14
15impl ExcelImporter {
16    /// Create a new Excel importer
17    pub fn new<P: AsRef<Path>>(path: P) -> Self {
18        Self {
19            path: path.as_ref().to_path_buf(),
20        }
21    }
22
23    /// Import Excel file to `ParsedModel`
24    ///
25    /// # Errors
26    ///
27    /// Returns an error if the Excel file cannot be opened or parsed.
28    pub fn import(&self) -> ForgeResult<ParsedModel> {
29        // Open Excel workbook
30        let mut workbook: Xlsx<_> = open_workbook(&self.path)
31            .map_err(|e| ForgeError::IO(format!("Failed to open Excel file: {e}")))?;
32
33        // Create model
34        let mut model = ParsedModel::new();
35
36        // Get all sheet names
37        let sheet_names = workbook.sheet_names();
38
39        // Process each sheet
40        for sheet_name in sheet_names {
41            if let Ok(range) = workbook.worksheet_range(&sheet_name) {
42                Self::process_sheet(&sheet_name, &range, &mut workbook, &mut model)?;
43            }
44        }
45
46        Ok(model)
47    }
48
49    /// Process a single worksheet
50    fn process_sheet(
51        sheet_name: &str,
52        range: &Range<Data>,
53        workbook: &mut Xlsx<std::io::BufReader<std::fs::File>>,
54        model: &mut ParsedModel,
55    ) -> ForgeResult<()> {
56        // Check if sheet is empty
57        if range.is_empty() {
58            return Ok(()); // Skip empty sheets
59        }
60
61        // Check if this is a "Scalars" sheet (special handling)
62        if sheet_name.to_lowercase() == "scalars" {
63            Self::process_scalars_sheet(range, model);
64            return Ok(());
65        }
66
67        // Get formula range for this sheet
68        let formula_range = workbook.worksheet_formula(sheet_name).ok();
69
70        // Process as regular table
71        Self::process_table_sheet(sheet_name, range, formula_range.as_ref(), model)
72    }
73
74    /// Process a regular table sheet
75    fn process_table_sheet(
76        sheet_name: &str,
77        range: &Range<Data>,
78        formula_range: Option<&Range<String>>,
79        model: &mut ParsedModel,
80    ) -> ForgeResult<()> {
81        let (height, width) = range.get_size();
82
83        if height < 2 {
84            // Need at least header + 1 data row
85            return Ok(()); // Skip sheets with insufficient data
86        }
87
88        // Read header row (row 0)
89        let mut column_names: Vec<String> = Vec::new();
90        for col in 0..width {
91            if let Some(cell) = range.get((0, col)) {
92                let name = match cell {
93                    Data::String(s) => s.clone(),
94                    Data::Int(i) => i.to_string(),
95                    Data::Float(f) => f.to_string(),
96                    _ => format!("col_{col}"),
97                };
98                column_names.push(name);
99            } else {
100                column_names.push(format!("col_{col}"));
101            }
102        }
103
104        // Read data rows and detect column types
105        let mut columns_data: HashMap<String, Vec<Data>> = HashMap::new();
106        for col_name in &column_names {
107            columns_data.insert(col_name.clone(), Vec::new());
108        }
109
110        // Collect all data (skip header row)
111        for row in 1..height {
112            for (col, col_name) in column_names.iter().enumerate().take(width) {
113                if let Some(cell) = range.get((row, col)) {
114                    columns_data.get_mut(col_name).unwrap().push(cell.clone());
115                } else {
116                    // Empty cell - use default based on column type
117                    columns_data.get_mut(col_name).unwrap().push(Data::Empty);
118                }
119            }
120        }
121
122        // Create table
123        let table_name = Self::sanitize_table_name(sheet_name);
124        let mut table = Table::new(table_name);
125
126        // Build column map for formula translation (A → revenue, B → cogs, etc.)
127        let mut column_map = HashMap::new();
128        for (idx, col_name) in column_names.iter().enumerate() {
129            let excel_col = Self::number_to_column_letter(idx);
130            column_map.insert(excel_col, col_name.clone());
131        }
132
133        // Create reverse formula translator
134        let translator = ReverseFormulaTranslator::new(column_map);
135
136        // Convert columns to YAML format
137        for (col_idx, col_name) in column_names.iter().enumerate() {
138            // Check if this column has formulas (check first data row in formula_range)
139            let has_formula = formula_range
140                .and_then(|formulas| formulas.get((1, col_idx)))
141                .is_some_and(|formula_cell| !formula_cell.is_empty());
142
143            if has_formula {
144                // This is a calculated column - extract formula from first data row
145                if let Some(formulas) = formula_range {
146                    if let Some(formula) = formulas.get((1, col_idx)) {
147                        if !formula.is_empty() {
148                            // Add leading = if not present (calamine strips it)
149                            let formula_with_equals = if formula.starts_with('=') {
150                                formula.clone()
151                            } else {
152                                format!("={formula}")
153                            };
154
155                            // Translate Excel formula to YAML syntax
156                            let yaml_formula = translator.translate(&formula_with_equals)?;
157                            table.add_row_formula(col_name.clone(), yaml_formula);
158                            // Skip this column - don't add as data
159                            continue;
160                        }
161                    }
162                }
163            }
164
165            // Regular data column - convert to ColumnValue
166            let data = &columns_data[col_name];
167            // Skip if all data is empty (formula columns may show as empty/zero values)
168            if data.iter().all(|cell| matches!(cell, Data::Empty)) {
169                continue;
170            }
171            let column_value = Self::convert_to_column_value(data)?;
172            table.add_column(Column::new(col_name.clone(), column_value));
173        }
174
175        model.add_table(table);
176        Ok(())
177    }
178
179    /// Process the "Scalars" sheet (if present)
180    fn process_scalars_sheet(range: &Range<Data>, model: &mut ParsedModel) {
181        let (height, _width) = range.get_size();
182
183        // Skip header row, process data rows
184        for row in 1..height {
185            // Column 0: Name
186            // Column 1: Value
187            // Column 2: Formula (optional)
188
189            let name = if let Some(cell) = range.get((row, 0)) {
190                cell.to_string()
191            } else {
192                continue; // Skip row without name
193            };
194
195            #[allow(clippy::cast_precision_loss)] // Excel integer values fit within f64 precision
196            let value = range.get((row, 1)).and_then(|cell| match cell {
197                Data::Float(f) => Some(*f),
198                Data::Int(i) => Some(*i as f64),
199                _ => None,
200            });
201
202            let formula = range.get((row, 2)).and_then(|cell| match cell {
203                Data::String(s) if !s.is_empty() => Some(s.clone()),
204                _ => None,
205            });
206
207            // Create variable
208            let variable = Variable::new(name.clone(), value, formula);
209            model.add_scalar(name, variable);
210        }
211    }
212
213    /// Convert Excel Data array to `ColumnValue`
214    fn convert_to_column_value(data: &[Data]) -> ForgeResult<ColumnValue> {
215        // Detect column type from first non-empty cell
216        let first_type = data
217            .iter()
218            .find(|cell| !matches!(cell, Data::Empty))
219            .ok_or_else(|| ForgeError::Import("Column has no data".to_string()))?;
220
221        match first_type {
222            Data::Float(_) | Data::Int(_) => {
223                // Number column
224                #[allow(clippy::cast_precision_loss)]
225                // Excel integer values fit within f64 precision
226                let numbers: Vec<f64> = data
227                    .iter()
228                    .map(|cell| match cell {
229                        Data::Float(f) => *f,
230                        Data::Int(i) => *i as f64,
231                        _ => 0.0, // Default for empty or other cells
232                    })
233                    .collect();
234                Ok(ColumnValue::Number(numbers))
235            },
236            Data::String(_) => {
237                // Text column
238                let texts: Vec<String> =
239                    data.iter().map(std::string::ToString::to_string).collect();
240                Ok(ColumnValue::Text(texts))
241            },
242            Data::Bool(_) => {
243                // Boolean column
244                let bools: Vec<bool> = data
245                    .iter()
246                    .map(|cell| matches!(cell, Data::Bool(true)))
247                    .collect();
248                Ok(ColumnValue::Boolean(bools))
249            },
250            _ => {
251                // Default to text
252                let texts: Vec<String> =
253                    data.iter().map(std::string::ToString::to_string).collect();
254                Ok(ColumnValue::Text(texts))
255            },
256        }
257    }
258
259    /// Sanitize sheet name to valid YAML key
260    fn sanitize_table_name(sheet_name: &str) -> String {
261        sheet_name
262            .to_lowercase()
263            .replace(' ', "_")
264            .replace('&', "and")
265            .replace('-', "_")
266            .chars()
267            .filter(|c| c.is_alphanumeric() || *c == '_')
268            .collect()
269    }
270
271    /// Convert column index to Excel column letter (0→A, 1→B, 25→Z, 26→AA, etc.)
272    fn number_to_column_letter(n: usize) -> String {
273        let mut result = String::new();
274        let mut num = n;
275
276        loop {
277            let remainder = num % 26;
278            #[allow(clippy::cast_possible_truncation)] // remainder is 0..25, always fits in u8
279            let ch = (b'A' + remainder as u8) as char;
280            result.insert(0, ch);
281            if num < 26 {
282                break;
283            }
284            num = num / 26 - 1;
285        }
286
287        result
288    }
289}
290
291#[cfg(test)]
292// Financial math: exact float comparison validated against Excel/Gnumeric/R
293#[allow(clippy::float_cmp)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn test_number_to_column_letter() {
299        assert_eq!(ExcelImporter::number_to_column_letter(0), "A");
300        assert_eq!(ExcelImporter::number_to_column_letter(1), "B");
301        assert_eq!(ExcelImporter::number_to_column_letter(25), "Z");
302        assert_eq!(ExcelImporter::number_to_column_letter(26), "AA");
303        assert_eq!(ExcelImporter::number_to_column_letter(27), "AB");
304        assert_eq!(ExcelImporter::number_to_column_letter(51), "AZ");
305        assert_eq!(ExcelImporter::number_to_column_letter(52), "BA");
306        assert_eq!(ExcelImporter::number_to_column_letter(702), "AAA");
307    }
308
309    #[test]
310    fn test_sanitize_table_name() {
311        assert_eq!(ExcelImporter::sanitize_table_name("Sheet1"), "sheet1");
312        assert_eq!(
313            ExcelImporter::sanitize_table_name("P&L Statement"),
314            "pandl_statement"
315        );
316        assert_eq!(
317            ExcelImporter::sanitize_table_name("Revenue-2025"),
318            "revenue_2025"
319        );
320        assert_eq!(
321            ExcelImporter::sanitize_table_name("Special@#$Chars"),
322            "specialchars"
323        );
324    }
325
326    #[test]
327    fn test_convert_to_column_value_numbers() {
328        let data = vec![
329            Data::Float(100.0),
330            Data::Float(200.0),
331            Data::Int(300),
332            Data::Empty,
333        ];
334        let result = ExcelImporter::convert_to_column_value(&data).unwrap();
335        match result {
336            ColumnValue::Number(nums) => {
337                assert_eq!(nums.len(), 4);
338                assert_eq!(nums[0], 100.0);
339                assert_eq!(nums[1], 200.0);
340                assert_eq!(nums[2], 300.0);
341                assert_eq!(nums[3], 0.0);
342            },
343            _ => panic!("Expected Number column"),
344        }
345    }
346
347    #[test]
348    fn test_convert_to_column_value_text() {
349        let data = vec![
350            Data::String("Apple".to_string()),
351            Data::String("Banana".to_string()),
352            Data::Empty,
353        ];
354        let result = ExcelImporter::convert_to_column_value(&data).unwrap();
355        match result {
356            ColumnValue::Text(texts) => {
357                assert_eq!(texts.len(), 3);
358                assert_eq!(texts[0], "Apple");
359                assert_eq!(texts[1], "Banana");
360                assert_eq!(texts[2], "");
361            },
362            _ => panic!("Expected Text column"),
363        }
364    }
365
366    #[test]
367    fn test_convert_to_column_value_boolean() {
368        let data = vec![Data::Bool(true), Data::Bool(false), Data::Empty];
369        let result = ExcelImporter::convert_to_column_value(&data).unwrap();
370        match result {
371            ColumnValue::Boolean(bools) => {
372                assert_eq!(bools.len(), 3);
373                assert!(bools[0]);
374                assert!(!bools[1]);
375                assert!(!bools[2]);
376            },
377            _ => panic!("Expected Boolean column"),
378        }
379    }
380
381    #[test]
382    fn test_convert_to_column_value_empty() {
383        let data = vec![Data::Empty, Data::Empty];
384        let result = ExcelImporter::convert_to_column_value(&data);
385        assert!(result.is_err());
386    }
387
388    #[test]
389    fn test_import_nonexistent_file_fails() {
390        let importer = ExcelImporter::new("/nonexistent/file.xlsx");
391        let result = importer.import();
392        assert!(result.is_err());
393    }
394
395    #[test]
396    fn test_import_simple_excel_file() {
397        use crate::excel::exporter::ExcelExporter;
398        use tempfile::TempDir;
399
400        let mut model = ParsedModel::new();
401        let mut table = Table::new("sales".to_string());
402        table.add_column(Column::new(
403            "revenue".to_string(),
404            ColumnValue::Number(vec![100.0, 200.0, 300.0]),
405        ));
406        model.add_table(table);
407
408        let dir = TempDir::new().unwrap();
409        let excel_path = dir.path().join("test.xlsx");
410        let exporter = ExcelExporter::new(model);
411        exporter.export(&excel_path).unwrap();
412
413        let importer = ExcelImporter::new(&excel_path);
414        let result = importer.import().unwrap();
415        assert!(result.tables.contains_key("sales"));
416        let table = result.tables.get("sales").unwrap();
417        assert!(table.columns.contains_key("revenue"));
418    }
419
420    #[test]
421    fn test_import_with_text_column() {
422        use crate::excel::exporter::ExcelExporter;
423        use tempfile::TempDir;
424
425        let mut model = ParsedModel::new();
426        let mut table = Table::new("products".to_string());
427        table.add_column(Column::new(
428            "name".to_string(),
429            ColumnValue::Text(vec![
430                "Apple".to_string(),
431                "Banana".to_string(),
432                "Cherry".to_string(),
433            ]),
434        ));
435        model.add_table(table);
436
437        let dir = TempDir::new().unwrap();
438        let excel_path = dir.path().join("test_text.xlsx");
439        let exporter = ExcelExporter::new(model);
440        exporter.export(&excel_path).unwrap();
441
442        let importer = ExcelImporter::new(&excel_path);
443        let result = importer.import().unwrap();
444        assert!(result.tables.contains_key("products"));
445        let table = result.tables.get("products").unwrap();
446        assert!(table.columns.contains_key("name"));
447    }
448
449    #[test]
450    fn test_import_multiple_tables() {
451        use crate::excel::exporter::ExcelExporter;
452        use tempfile::TempDir;
453
454        let mut model = ParsedModel::new();
455        let mut table1 = Table::new("revenue".to_string());
456        table1.add_column(Column::new(
457            "amount".to_string(),
458            ColumnValue::Number(vec![1000.0, 2000.0]),
459        ));
460        model.add_table(table1);
461        let mut table2 = Table::new("costs".to_string());
462        table2.add_column(Column::new(
463            "amount".to_string(),
464            ColumnValue::Number(vec![500.0, 750.0]),
465        ));
466        model.add_table(table2);
467
468        let dir = TempDir::new().unwrap();
469        let excel_path = dir.path().join("multi.xlsx");
470        let exporter = ExcelExporter::new(model);
471        exporter.export(&excel_path).unwrap();
472
473        let importer = ExcelImporter::new(&excel_path);
474        let result = importer.import().unwrap();
475        assert!(result.tables.contains_key("revenue"));
476        assert!(result.tables.contains_key("costs"));
477    }
478
479    #[test]
480    fn test_import_with_scalars() {
481        use crate::excel::exporter::ExcelExporter;
482        use tempfile::TempDir;
483
484        let mut model = ParsedModel::new();
485        model.add_scalar(
486            "tax_rate".to_string(),
487            Variable::new("tax_rate".to_string(), Some(0.15), None),
488        );
489
490        let dir = TempDir::new().unwrap();
491        let excel_path = dir.path().join("scalars.xlsx");
492        let exporter = ExcelExporter::new(model);
493        exporter.export(&excel_path).unwrap();
494
495        let importer = ExcelImporter::new(&excel_path);
496        let result = importer.import().unwrap();
497        let _ = &result;
498    }
499
500    #[test]
501    fn test_importer_new_stores_path() {
502        let path = std::path::Path::new("/some/path/file.xlsx");
503        let importer = ExcelImporter::new(path);
504        assert!(!importer.path.to_str().unwrap().is_empty());
505    }
506
507    #[test]
508    fn test_convert_to_column_value_mixed_numeric() {
509        let data = vec![
510            Data::Float(1.5),
511            Data::Int(2),
512            Data::Float(3.0),
513            Data::Int(4),
514        ];
515        let result = ExcelImporter::convert_to_column_value(&data).unwrap();
516        match result {
517            ColumnValue::Number(nums) => {
518                assert_eq!(nums.len(), 4);
519                assert_eq!(nums[0], 1.5);
520                assert_eq!(nums[1], 2.0);
521                assert_eq!(nums[2], 3.0);
522                assert_eq!(nums[3], 4.0);
523            },
524            _ => panic!("Expected Number column"),
525        }
526    }
527
528    #[test]
529    fn test_sanitize_table_name_spaces() {
530        assert_eq!(ExcelImporter::sanitize_table_name("My Sheet"), "my_sheet");
531        assert_eq!(
532            ExcelImporter::sanitize_table_name("Financial Data 2024"),
533            "financial_data_2024"
534        );
535        assert_eq!(
536            ExcelImporter::sanitize_table_name("   trimmed   "),
537            "___trimmed___"
538        );
539    }
540
541    #[test]
542    fn test_number_to_column_letter_extended() {
543        assert_eq!(ExcelImporter::number_to_column_letter(703), "AAB");
544        assert_eq!(ExcelImporter::number_to_column_letter(704), "AAC");
545        assert_eq!(ExcelImporter::number_to_column_letter(16383), "XFD");
546    }
547}