sheets_diff/
lib.rs

1use std::{fmt, fs::File, io::BufReader};
2
3use calamine::{open_workbook, Data, Reader, Xlsx};
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7#[derive(Clone)]
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9enum CellDiffKind {
10    Value,
11    Formula,
12}
13
14impl fmt::Display for CellDiffKind {
15    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16        match self {
17            CellDiffKind::Formula => write!(f, "formula"),
18            CellDiffKind::Value => write!(f, "value"),
19        }
20    }
21}
22
23/// main struct
24#[derive(Clone)]
25#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26pub struct Diff {
27    old_filepath: String,
28    new_filepath: String,
29    sheet_diff: Vec<SheetDiff>,
30    cell_diffs: Vec<SheetCellDiff>,
31}
32
33#[derive(Clone)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35struct SheetDiff {
36    old: Option<String>,
37    new: Option<String>,
38}
39
40#[derive(Clone)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42struct SheetCellDiff {
43    kind: CellDiffKind,
44    sheet: String,
45    cells: Vec<CellDiff>,
46}
47
48#[derive(Clone)]
49#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50struct CellDiff {
51    row: u32,
52    col: u32,
53    old: Option<String>,
54    new: Option<String>,
55}
56
57impl Diff {
58    /// init
59    pub fn new(old_filepath: &str, new_filepath: &str) -> Self {
60        Diff {
61            old_filepath: old_filepath.to_owned(),
62            new_filepath: new_filepath.to_owned(),
63            sheet_diff: vec![],
64            cell_diffs: vec![],
65        }
66    }
67
68    /// get serde-ready diff
69    /// #[cfg(feature = "serde")]
70    pub fn diff(&mut self) -> Diff {
71        self.collect_diff();
72        self.clone()
73    }
74
75    /// get unified diff str
76    pub fn unified_diff(&mut self) -> String {
77        self.collect_diff();
78
79        let mut ret: Vec<String> = vec![];
80
81        if !self.sheet_diff.is_empty() {
82            ret.push(format!("--- {} (sheet names)", self.old_filepath));
83            ret.push(format!("+++ {} (sheet names)", self.new_filepath));
84            self.sheet_diff.iter().for_each(|x| {
85                if let Some(sheet) = x.old.as_ref() {
86                    ret.push(format!("- {}", sheet));
87                }
88                if let Some(sheet) = x.new.as_ref() {
89                    ret.push(format!("+ {}", sheet));
90                }
91            });
92        }
93
94        self.cell_diffs.iter().for_each(|x| {
95            ret.push(format!(
96                "--- {} [{}] ({})",
97                self.old_filepath, x.sheet, x.kind
98            ));
99            ret.push(format!(
100                "+++ {} [{}] ({})",
101                self.new_filepath, x.sheet, x.kind
102            ));
103            x.cells.iter().for_each(|x| {
104                ret.push(format!("@@ ({}, {}) @@", x.row, x.col));
105                if let Some(sheet) = x.old.as_ref() {
106                    ret.push(format!("- {}", sheet));
107                }
108                if let Some(sheet) = x.new.as_ref() {
109                    ret.push(format!("+ {}", sheet));
110                }
111            });
112        });
113
114        ret.join("\n")
115    }
116
117    /// collect sheet diff and cell range diff
118    fn collect_diff(&mut self) {
119        let mut old_workbook: Xlsx<BufReader<File>> = open_workbook(self.old_filepath.as_str())
120            .expect(format!("Cannot open {}", self.old_filepath.as_str()).as_str());
121        let mut new_workbook: Xlsx<BufReader<File>> = open_workbook(self.new_filepath.as_str())
122            .expect(format!("Cannot open {}", self.new_filepath.as_str()).as_str());
123
124        let old_sheets = old_workbook.sheet_names().to_owned();
125        let new_sheets = new_workbook.sheet_names().to_owned();
126
127        self.collect_sheet_diff(&old_sheets, &new_sheets);
128
129        let same_name_sheets = filter_same_name_sheets(&old_sheets, &new_sheets);
130        self.collect_cell_value_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
131        self.collect_cell_formula_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
132    }
133
134    /// collect sheet diff by name
135    fn collect_sheet_diff(&mut self, old_sheets: &Vec<String>, new_sheets: &Vec<String>) {
136        if *old_sheets == *new_sheets {
137            return;
138        }
139
140        for sheet in old_sheets {
141            if !new_sheets.contains(sheet) {
142                self.sheet_diff.push(SheetDiff {
143                    old: Some(sheet.to_owned()),
144                    new: None,
145                });
146            }
147        }
148        for sheet in new_sheets {
149            if !old_sheets.contains(sheet) {
150                self.sheet_diff.push(SheetDiff {
151                    old: None,
152                    new: Some(sheet.to_owned()),
153                });
154            }
155        }
156    }
157
158    /// collect value diff in cell range
159    fn collect_cell_value_diff(
160        &mut self,
161        old_workbook: &mut Xlsx<BufReader<File>>,
162        new_workbook: &mut Xlsx<BufReader<File>>,
163        same_name_sheets: &Vec<String>,
164    ) {
165        for sheet in same_name_sheets {
166            if let (Ok(range1), Ok(range2)) = (
167                old_workbook.worksheet_range(sheet),
168                new_workbook.worksheet_range(sheet),
169            ) {
170                let mut cell_diffs: Vec<CellDiff> = vec![];
171
172                let max_rows = range1.height().max(range2.height());
173                let max_cols = range1.width().max(range2.width());
174
175                for row in 0..max_rows {
176                    for col in 0..max_cols {
177                        let cell1 = range1
178                            .get_value((row as u32, col as u32))
179                            .unwrap_or(&Data::Empty);
180                        let cell2 = range2
181                            .get_value((row as u32, col as u32))
182                            .unwrap_or(&Data::Empty);
183
184                        if cell1 != cell2 {
185                            cell_diffs.push(CellDiff {
186                                row: (row + 1) as u32,
187                                col: (col + 1) as u32,
188                                old: if cell1 != &Data::Empty {
189                                    Some(cell1.to_string())
190                                } else {
191                                    None
192                                },
193                                new: if cell2 != &Data::Empty {
194                                    Some(cell2.to_string())
195                                } else {
196                                    None
197                                },
198                            });
199                        }
200                    }
201                }
202
203                if !cell_diffs.is_empty() {
204                    let sheet_cell_diff = SheetCellDiff {
205                        sheet: sheet.to_owned(),
206                        kind: CellDiffKind::Value,
207                        cells: cell_diffs,
208                    };
209                    self.cell_diffs.push(sheet_cell_diff);
210                }
211            } else {
212                println!("Failed to read sheet: {}", sheet);
213            }
214        }
215    }
216
217    /// collect formula diff in cell range
218    fn collect_cell_formula_diff(
219        &mut self,
220        old_workbook: &mut Xlsx<BufReader<File>>,
221        new_workbook: &mut Xlsx<BufReader<File>>,
222        same_name_sheets: &Vec<String>,
223    ) {
224        for sheet in same_name_sheets {
225            if let (Ok(range1), Ok(range2)) = (
226                old_workbook.worksheet_formula(sheet),
227                new_workbook.worksheet_formula(sheet),
228            ) {
229                let mut cell_diffs: Vec<CellDiff> = vec![];
230
231                let (range1_start_row, range1_start_col) = match range1.start() {
232                    Some((row, col)) => (row, col),
233                    None => (u32::MAX, u32::MAX),
234                };
235                let (range2_start_row, range2_start_col) = match range2.start() {
236                    Some((row, col)) => (row, col),
237                    None => (u32::MAX, u32::MAX),
238                };
239                let (range1_end_row, range1_end_col) = match range1.end() {
240                    Some((row, col)) => (row, col),
241                    None => (u32::MIN, u32::MIN),
242                };
243                let (range2_end_row, range2_end_col) = match range2.end() {
244                    Some((row, col)) => (row, col),
245                    None => (u32::MIN, u32::MIN),
246                };
247                let start_row = range1_start_row.min(range2_start_row);
248                let start_col = range1_start_col.min(range2_start_col);
249                let end_row = range1_end_row.max(range2_end_row);
250                let end_col = range1_end_col.max(range2_end_col);
251
252                for row in start_row..(end_row + 1) {
253                    for col in start_col..(end_col + 1) {
254                        let cell1 = match range1.get_value((row as u32, col as u32)) {
255                            Some(x) => &Data::String(x.to_string()),
256                            None => &Data::Empty,
257                        };
258                        let cell2 = match range2.get_value((row as u32, col as u32)) {
259                            Some(x) => &Data::String(x.to_string()),
260                            None => &Data::Empty,
261                        };
262
263                        if cell1 != cell2 {
264                            cell_diffs.push(CellDiff {
265                                row: (row + 1) as u32,
266                                col: (col + 1) as u32,
267                                old: if cell1 != &Data::Empty {
268                                    Some(cell1.to_string())
269                                } else {
270                                    None
271                                },
272                                new: if cell2 != &Data::Empty {
273                                    Some(cell2.to_string())
274                                } else {
275                                    None
276                                },
277                            });
278                        }
279                    }
280                }
281
282                if !cell_diffs.is_empty() {
283                    let sheet_cell_diff = SheetCellDiff {
284                        sheet: sheet.to_owned(),
285                        kind: CellDiffKind::Formula,
286                        cells: cell_diffs,
287                    };
288                    self.cell_diffs.push(sheet_cell_diff);
289                }
290            } else {
291                println!("Failed to read sheet: {}", sheet);
292            }
293        }
294    }
295}
296
297/// filter sheets whose name is equal
298fn filter_same_name_sheets<'a>(
299    old_sheets: &'a Vec<String>,
300    new_sheets: &'a Vec<String>,
301) -> Vec<String> {
302    old_sheets
303        .iter()
304        .filter(|s| new_sheets.contains(s))
305        .map(|s| s.to_owned())
306        .collect()
307}