sheets_diff/
lib.rs

1use std::{fmt, fs::File, io::BufReader};
2
3use calamine::{open_workbook, Data, Reader, Xlsx};
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9enum CellDiffKind {
10    Value,
11    Formula,
12}
13
14impl fmt::Display for CellDiffKind {
15    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16        match self {
17            CellDiffKind::Formula => write!(f, "formula"),
18            CellDiffKind::Value => write!(f, "value"),
19        }
20    }
21}
22
23/// main struct
24#[derive(Clone)]
25#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26pub struct Diff {
27    old_filepath: String,
28    new_filepath: String,
29    sheet_diff: Vec<SheetDiff>,
30    cell_diffs: Vec<SheetCellDiff>,
31}
32
33#[derive(Clone)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35struct SheetDiff {
36    old: Option<String>,
37    new: Option<String>,
38}
39
40#[derive(Clone)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42struct SheetCellDiff {
43    sheet: String,
44    cells: Vec<CellDiff>,
45}
46
47#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
49struct CellDiff {
50    row: usize,
51    col: usize,
52    addr: String,
53    kind: CellDiffKind,
54    old: Option<String>,
55    new: Option<String>,
56}
57
58impl Diff {
59    /// init
60    pub fn new(old_filepath: &str, new_filepath: &str) -> Self {
61        let mut ret = Diff {
62            old_filepath: old_filepath.to_owned(),
63            new_filepath: new_filepath.to_owned(),
64            sheet_diff: vec![],
65            cell_diffs: vec![],
66        };
67
68        ret.collect_diff();
69
70        ret.cell_diffs.sort_by(|a, b| a.sheet.cmp(&b.sheet));
71
72        let mut merged_cell_diffs: Vec<SheetCellDiff> = vec![];
73        ret.cell_diffs.iter().for_each(|a| {
74            let found = merged_cell_diffs.iter_mut().find(|b| b.sheet == a.sheet);
75            if let Some(found) = found {
76                found.cells.extend(a.cells.clone());
77            } else {
78                merged_cell_diffs.push(a.clone());
79            }
80        });
81        merged_cell_diffs.iter_mut().for_each(|x| {
82            x.cells
83                .sort_by(|a, b| a.addr.cmp(&b.addr).then_with(|| a.kind.cmp(&b.kind)));
84        });
85        ret.cell_diffs = merged_cell_diffs;
86
87        ret
88    }
89
90    /// get serde-ready diff
91    /// #[cfg(feature = "serde")]
92    pub fn diff(&mut self) -> Diff {
93        self.clone()
94    }
95
96    /// get unified diff str
97    pub fn unified_diff(&mut self) -> String {
98        let mut ret: Vec<String> = vec![];
99
100        if !self.sheet_diff.is_empty() {
101            ret.push(format!("--- {} (sheet names)", self.old_filepath));
102            ret.push(format!("+++ {} (sheet names)", self.new_filepath));
103            self.sheet_diff.iter().for_each(|x| {
104                if let Some(sheet) = x.old.as_ref() {
105                    ret.push(format!("- {}", sheet));
106                }
107                if let Some(sheet) = x.new.as_ref() {
108                    ret.push(format!("+ {}", sheet));
109                }
110            });
111        }
112
113        self.cell_diffs.iter().for_each(|x| {
114            ret.push(format!("--- {} [{}]", self.old_filepath, x.sheet));
115            ret.push(format!("+++ {} [{}]", self.new_filepath, x.sheet));
116            x.cells.iter().for_each(|x| {
117                ret.push(format!("@@ {}({},{}) {} @@", x.addr, x.row, x.col, x.kind));
118                if let Some(sheet) = x.old.as_ref() {
119                    ret.push(format!("- {}", sheet));
120                }
121                if let Some(sheet) = x.new.as_ref() {
122                    ret.push(format!("+ {}", sheet));
123                }
124            });
125        });
126
127        ret.join("\n")
128    }
129
130    /// collect sheet diff and cell range diff
131    fn collect_diff(&mut self) {
132        let mut old_workbook: Xlsx<BufReader<File>> = open_workbook(self.old_filepath.as_str())
133            .expect(format!("Cannot open {}", self.old_filepath.as_str()).as_str());
134        let mut new_workbook: Xlsx<BufReader<File>> = open_workbook(self.new_filepath.as_str())
135            .expect(format!("Cannot open {}", self.new_filepath.as_str()).as_str());
136
137        let old_sheets = old_workbook.sheet_names().to_owned();
138        let new_sheets = new_workbook.sheet_names().to_owned();
139
140        self.collect_sheet_diff(&old_sheets, &new_sheets);
141
142        let same_name_sheets = filter_same_name_sheets(&old_sheets, &new_sheets);
143        self.collect_cell_value_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
144        self.collect_cell_formula_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
145    }
146
147    /// collect sheet diff by name
148    fn collect_sheet_diff(&mut self, old_sheets: &Vec<String>, new_sheets: &Vec<String>) {
149        if *old_sheets == *new_sheets {
150            return;
151        }
152
153        for sheet in old_sheets {
154            if !new_sheets.contains(sheet) {
155                self.sheet_diff.push(SheetDiff {
156                    old: Some(sheet.to_owned()),
157                    new: None,
158                });
159            }
160        }
161        for sheet in new_sheets {
162            if !old_sheets.contains(sheet) {
163                self.sheet_diff.push(SheetDiff {
164                    old: None,
165                    new: Some(sheet.to_owned()),
166                });
167            }
168        }
169    }
170
171    /// collect value diff in cell range
172    fn collect_cell_value_diff(
173        &mut self,
174        old_workbook: &mut Xlsx<BufReader<File>>,
175        new_workbook: &mut Xlsx<BufReader<File>>,
176        same_name_sheets: &Vec<String>,
177    ) {
178        for sheet in same_name_sheets {
179            if let (Ok(range1), Ok(range2)) = (
180                old_workbook.worksheet_range(sheet),
181                new_workbook.worksheet_range(sheet),
182            ) {
183                let mut cell_diffs: Vec<CellDiff> = vec![];
184
185                let max_rows = range1.height().max(range2.height()) as u32;
186                let max_cols = range1.width().max(range2.width()) as u32;
187
188                for row in 0..max_rows {
189                    for col in 0..max_cols {
190                        let cell1 = range1.get_value((row, col)).unwrap_or(&Data::Empty);
191                        let cell2 = range2.get_value((row, col)).unwrap_or(&Data::Empty);
192
193                        if cell1 != cell2 {
194                            let row = (row + 1) as usize;
195                            let col = (col + 1) as usize;
196                            cell_diffs.push(CellDiff {
197                                row,
198                                col,
199                                addr: cell_pos_to_address(row, col),
200                                kind: CellDiffKind::Value,
201                                old: if cell1 != &Data::Empty {
202                                    Some(cell1.to_string())
203                                } else {
204                                    None
205                                },
206                                new: if cell2 != &Data::Empty {
207                                    Some(cell2.to_string())
208                                } else {
209                                    None
210                                },
211                            });
212                        }
213                    }
214                }
215
216                if !cell_diffs.is_empty() {
217                    let sheet_cell_diff = SheetCellDiff {
218                        sheet: sheet.to_owned(),
219                        cells: cell_diffs,
220                    };
221                    self.cell_diffs.push(sheet_cell_diff);
222                }
223            } else {
224                println!("Failed to read sheet: {}", sheet);
225            }
226        }
227    }
228
229    /// collect formula diff in cell range
230    fn collect_cell_formula_diff(
231        &mut self,
232        old_workbook: &mut Xlsx<BufReader<File>>,
233        new_workbook: &mut Xlsx<BufReader<File>>,
234        same_name_sheets: &Vec<String>,
235    ) {
236        for sheet in same_name_sheets {
237            if let (Ok(range1), Ok(range2)) = (
238                old_workbook.worksheet_formula(sheet),
239                new_workbook.worksheet_formula(sheet),
240            ) {
241                let mut cell_diffs: Vec<CellDiff> = vec![];
242
243                let (range1_start_row, range1_start_col) = match range1.start() {
244                    Some((row, col)) => (row, col),
245                    None => (u32::MAX, u32::MAX),
246                };
247                let (range2_start_row, range2_start_col) = match range2.start() {
248                    Some((row, col)) => (row, col),
249                    None => (u32::MAX, u32::MAX),
250                };
251                let (range1_end_row, range1_end_col) = match range1.end() {
252                    Some((row, col)) => (row, col),
253                    None => (u32::MIN, u32::MIN),
254                };
255                let (range2_end_row, range2_end_col) = match range2.end() {
256                    Some((row, col)) => (row, col),
257                    None => (u32::MIN, u32::MIN),
258                };
259                let start_row = range1_start_row.min(range2_start_row);
260                let start_col = range1_start_col.min(range2_start_col);
261                let end_row = range1_end_row.max(range2_end_row);
262                let end_col = range1_end_col.max(range2_end_col);
263
264                for row in start_row..(end_row + 1) {
265                    for col in start_col..(end_col + 1) {
266                        let cell1 = match range1.get_value((row, col)) {
267                            Some(x) => &Data::String(x.to_string()),
268                            None => &Data::Empty,
269                        };
270                        let cell2 = match range2.get_value((row, col)) {
271                            Some(x) => &Data::String(x.to_string()),
272                            None => &Data::Empty,
273                        };
274
275                        if cell1 != cell2 {
276                            let row = (row + 1) as usize;
277                            let col = (col + 1) as usize;
278                            cell_diffs.push(CellDiff {
279                                row,
280                                col,
281                                addr: cell_pos_to_address(row, col),
282                                kind: CellDiffKind::Formula,
283                                old: if cell1 != &Data::Empty {
284                                    Some(cell1.to_string())
285                                } else {
286                                    None
287                                },
288                                new: if cell2 != &Data::Empty {
289                                    Some(cell2.to_string())
290                                } else {
291                                    None
292                                },
293                            });
294                        }
295                    }
296                }
297
298                if !cell_diffs.is_empty() {
299                    let sheet_cell_diff = SheetCellDiff {
300                        sheet: sheet.to_owned(),
301                        cells: cell_diffs,
302                    };
303                    self.cell_diffs.push(sheet_cell_diff);
304                }
305            } else {
306                println!("Failed to read sheet: {}", sheet);
307            }
308        }
309    }
310}
311
312/// filter sheets whose name is equal
313fn filter_same_name_sheets<'a>(
314    old_sheets: &'a Vec<String>,
315    new_sheets: &'a Vec<String>,
316) -> Vec<String> {
317    old_sheets
318        .iter()
319        .filter(|s| new_sheets.contains(s))
320        .map(|s| s.to_owned())
321        .collect()
322}
323
324/// convert (row, col) to cell address str
325fn cell_pos_to_address(row: usize, col: usize) -> String {
326    let col_letter = (col as u8 - 1) / 26;
327    let col_index = (col as u8 - 1) % 26;
328
329    let col_char = if col_letter == 0 {
330        ((b'A' + col_index) as char).to_string()
331    } else {
332        let first_char = (b'A' + col_letter - 1) as char;
333        let second_char = (b'A' + col_index) as char;
334        format!("{}{}", first_char, second_char)
335    };
336
337    format!("{}{}", col_char, row)
338}