sheets_diff/core/
diff.rs

1use std::{fmt, fs::File, io::BufReader};
2
3use calamine::{open_workbook, Data, Reader, Xlsx};
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9pub enum CellDiffKind {
10    Value,
11    Formula,
12}
13
14impl fmt::Display for CellDiffKind {
15    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16        match self {
17            CellDiffKind::Formula => write!(f, "formula"),
18            CellDiffKind::Value => write!(f, "value"),
19        }
20    }
21}
22
23/// main struct
24#[derive(Clone, Debug)]
25#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26pub struct Diff {
27    pub old_filepath: String,
28    pub new_filepath: String,
29    pub sheet_diff: Vec<SheetDiff>,
30    pub cell_diffs: Vec<SheetCellDiff>,
31}
32
33#[derive(Clone, Debug)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35pub struct SheetDiff {
36    pub old: Option<String>,
37    pub new: Option<String>,
38}
39
40#[derive(Clone, Debug)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42pub struct SheetCellDiff {
43    pub sheet: String,
44    pub cells: Vec<CellDiff>,
45}
46
47#[derive(Clone, Debug)]
48#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
49pub struct CellDiff {
50    pub row: usize,
51    pub col: usize,
52    pub addr: String,
53    pub kind: CellDiffKind,
54    pub old: Option<String>,
55    pub new: Option<String>,
56}
57
58impl Diff {
59    /// init
60    pub fn new(old_filepath: &str, new_filepath: &str) -> Self {
61        let mut ret = Diff {
62            old_filepath: old_filepath.to_owned(),
63            new_filepath: new_filepath.to_owned(),
64            sheet_diff: vec![],
65            cell_diffs: vec![],
66        };
67
68        ret.collect_diff();
69
70        ret.cell_diffs.sort_by(|a, b| a.sheet.cmp(&b.sheet));
71
72        let mut merged_cell_diffs: Vec<SheetCellDiff> = vec![];
73        ret.cell_diffs.iter().for_each(|a| {
74            let found = merged_cell_diffs.iter_mut().find(|b| b.sheet == a.sheet);
75            if let Some(found) = found {
76                found.cells.extend(a.cells.clone());
77            } else {
78                merged_cell_diffs.push(a.clone());
79            }
80        });
81        merged_cell_diffs.iter_mut().for_each(|x| {
82            x.cells
83                .sort_by(|a, b| a.addr.cmp(&b.addr).then_with(|| a.kind.cmp(&b.kind)));
84        });
85        ret.cell_diffs = merged_cell_diffs;
86
87        ret
88    }
89
90    /// get serde-ready diff
91    /// #[cfg(feature = "serde")]
92    pub fn diff(&mut self) -> Diff {
93        self.clone()
94    }
95
96    /// get unified diff str
97    pub fn unified_diff(&mut self) -> UnifiedDiff {
98        let mut ret: Vec<UnifiedDiffLine> = vec![];
99
100        if !self.sheet_diff.is_empty() {
101            ret.push(UnifiedDiffLine {
102                kind: UnifiedDiffKind::OldTitle,
103                text: format!("{} (sheet names)", self.old_filepath),
104            });
105            ret.push(UnifiedDiffLine {
106                kind: UnifiedDiffKind::NewTitle,
107                text: format!("{} (sheet names)", self.new_filepath),
108            });
109
110            self.sheet_diff.iter().for_each(|x| {
111                if let Some(sheet) = x.old.as_ref() {
112                    ret.push(UnifiedDiffLine {
113                        kind: UnifiedDiffKind::OldContent,
114                        text: sheet.to_owned(),
115                    });
116                }
117                if let Some(sheet) = x.new.as_ref() {
118                    ret.push(UnifiedDiffLine {
119                        kind: UnifiedDiffKind::NewContent,
120                        text: sheet.to_owned(),
121                    });
122                }
123            });
124        }
125
126        self.cell_diffs.iter().for_each(|x| {
127            ret.push(UnifiedDiffLine {
128                kind: UnifiedDiffKind::OldTitle,
129                text: format!("{} [{}]", self.old_filepath, x.sheet),
130            });
131            ret.push(UnifiedDiffLine {
132                kind: UnifiedDiffKind::NewTitle,
133                text: format!("{} [{}]", self.new_filepath, x.sheet),
134            });
135
136            x.cells.iter().for_each(|x| {
137                ret.push(UnifiedDiffLine {
138                    kind: UnifiedDiffKind::DiffPos,
139                    text: format!("{}({},{}) {}", x.addr, x.row, x.col, x.kind),
140                });
141
142                if let Some(sheet) = x.old.as_ref() {
143                    ret.push(UnifiedDiffLine {
144                        kind: UnifiedDiffKind::OldContent,
145                        text: sheet.to_owned(),
146                    });
147                }
148                if let Some(sheet) = x.new.as_ref() {
149                    ret.push(UnifiedDiffLine {
150                        kind: UnifiedDiffKind::NewContent,
151                        text: sheet.to_owned(),
152                    });
153                }
154            });
155        });
156
157        UnifiedDiff { lines: ret }
158    }
159
160    /// collect sheet diff and cell range diff
161    fn collect_diff(&mut self) {
162        let mut old_workbook: Xlsx<BufReader<File>> = open_workbook(self.old_filepath.as_str())
163            .expect(format!("Cannot open {}", self.old_filepath.as_str()).as_str());
164        let mut new_workbook: Xlsx<BufReader<File>> = open_workbook(self.new_filepath.as_str())
165            .expect(format!("Cannot open {}", self.new_filepath.as_str()).as_str());
166
167        let old_sheets = old_workbook.sheet_names().to_owned();
168        let new_sheets = new_workbook.sheet_names().to_owned();
169
170        self.collect_sheet_diff(&old_sheets, &new_sheets);
171
172        let same_name_sheets = filter_same_name_sheets(&old_sheets, &new_sheets);
173        self.collect_cell_value_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
174        self.collect_cell_formula_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
175    }
176
177    /// collect sheet diff by name
178    fn collect_sheet_diff(&mut self, old_sheets: &Vec<String>, new_sheets: &Vec<String>) {
179        if *old_sheets == *new_sheets {
180            return;
181        }
182
183        for sheet in old_sheets {
184            if !new_sheets.contains(sheet) {
185                self.sheet_diff.push(SheetDiff {
186                    old: Some(sheet.to_owned()),
187                    new: None,
188                });
189            }
190        }
191        for sheet in new_sheets {
192            if !old_sheets.contains(sheet) {
193                self.sheet_diff.push(SheetDiff {
194                    old: None,
195                    new: Some(sheet.to_owned()),
196                });
197            }
198        }
199    }
200
201    /// collect value diff in cell range
202    fn collect_cell_value_diff(
203        &mut self,
204        old_workbook: &mut Xlsx<BufReader<File>>,
205        new_workbook: &mut Xlsx<BufReader<File>>,
206        same_name_sheets: &Vec<String>,
207    ) {
208        for sheet in same_name_sheets {
209            if let (Ok(old_range), Ok(new_range)) = (
210                old_workbook.worksheet_range(sheet),
211                new_workbook.worksheet_range(sheet),
212            ) {
213                let mut cell_diffs: Vec<CellDiff> = vec![];
214
215                let (start_row, start_col, end_row, end_col) = diff_range(
216                    old_range.start(),
217                    new_range.start(),
218                    old_range.end(),
219                    new_range.end(),
220                );
221
222                for row in start_row..end_row {
223                    for col in start_col..end_col {
224                        let old_cell = old_range.get_value((row, col)).unwrap_or(&Data::Empty);
225                        let new_cell = new_range.get_value((row, col)).unwrap_or(&Data::Empty);
226
227                        if old_cell != new_cell {
228                            let row = (row + 1) as usize;
229                            let col = (col + 1) as usize;
230                            cell_diffs.push(CellDiff {
231                                row,
232                                col,
233                                addr: cell_pos_to_address(row, col),
234                                kind: CellDiffKind::Value,
235                                old: if old_cell != &Data::Empty {
236                                    Some(old_cell.to_string())
237                                } else {
238                                    None
239                                },
240                                new: if new_cell != &Data::Empty {
241                                    Some(new_cell.to_string())
242                                } else {
243                                    None
244                                },
245                            });
246                        }
247                    }
248                }
249
250                if !cell_diffs.is_empty() {
251                    let sheet_cell_diff = SheetCellDiff {
252                        sheet: sheet.to_owned(),
253                        cells: cell_diffs,
254                    };
255                    self.cell_diffs.push(sheet_cell_diff);
256                }
257            } else {
258                println!("Failed to read sheet: {}", sheet);
259            }
260        }
261    }
262
263    /// collect formula diff in cell range
264    fn collect_cell_formula_diff(
265        &mut self,
266        old_workbook: &mut Xlsx<BufReader<File>>,
267        new_workbook: &mut Xlsx<BufReader<File>>,
268        same_name_sheets: &Vec<String>,
269    ) {
270        for sheet in same_name_sheets {
271            if let (Ok(old_range), Ok(new_range)) = (
272                old_workbook.worksheet_formula(sheet),
273                new_workbook.worksheet_formula(sheet),
274            ) {
275                let mut cell_diffs: Vec<CellDiff> = vec![];
276
277                let (start_row, start_col, end_row, end_col) = diff_range(
278                    old_range.start(),
279                    new_range.start(),
280                    old_range.end(),
281                    new_range.end(),
282                );
283
284                for row in start_row..end_row {
285                    for col in start_col..end_col {
286                        let old_cell = match old_range.get_value((row, col)) {
287                            Some(x) => &Data::String(x.to_string()),
288                            None => &Data::Empty,
289                        };
290                        let new_cell = match new_range.get_value((row, col)) {
291                            Some(x) => &Data::String(x.to_string()),
292                            None => &Data::Empty,
293                        };
294
295                        if old_cell != new_cell {
296                            let row = (row + 1) as usize;
297                            let col = (col + 1) as usize;
298                            cell_diffs.push(CellDiff {
299                                row,
300                                col,
301                                addr: cell_pos_to_address(row, col),
302                                kind: CellDiffKind::Formula,
303                                old: if old_cell != &Data::Empty {
304                                    Some(old_cell.to_string())
305                                } else {
306                                    None
307                                },
308                                new: if new_cell != &Data::Empty {
309                                    Some(new_cell.to_string())
310                                } else {
311                                    None
312                                },
313                            });
314                        }
315                    }
316                }
317
318                if !cell_diffs.is_empty() {
319                    let sheet_cell_diff = SheetCellDiff {
320                        sheet: sheet.to_owned(),
321                        cells: cell_diffs,
322                    };
323                    self.cell_diffs.push(sheet_cell_diff);
324                }
325            } else {
326                println!("Failed to read sheet: {}", sheet);
327            }
328        }
329    }
330}
331
332/// filter sheets whose name is equal
333fn filter_same_name_sheets<'a>(
334    old_sheets: &'a Vec<String>,
335    new_sheets: &'a Vec<String>,
336) -> Vec<String> {
337    old_sheets
338        .iter()
339        .filter(|s| new_sheets.contains(s))
340        .map(|s| s.to_owned())
341        .collect()
342}
343
344/// get range to compare
345/// return: (start_row, start_col, end_row, end_col)
346fn diff_range<'a>(
347    old_start: Option<(u32, u32)>,
348    new_start: Option<(u32, u32)>,
349    old_end: Option<(u32, u32)>,
350    new_end: Option<(u32, u32)>,
351) -> (u32, u32, u32, u32) {
352    let (old_start_row, old_start_col) = match old_start {
353        Some((row, col)) => (row, col),
354        None => (u32::MAX, u32::MAX),
355    };
356    let (new_start_row, new_start_col) = match new_start {
357        Some((row, col)) => (row, col),
358        None => (u32::MAX, u32::MAX),
359    };
360    let (old_end_row, old_end_col) = match old_end {
361        Some((row, col)) => (row, col),
362        None => (u32::MIN, u32::MIN),
363    };
364    let (new_end_row, new_end_col) = match new_end {
365        Some((row, col)) => (row, col),
366        None => (u32::MIN, u32::MIN),
367    };
368    let start_row = old_start_row.min(new_start_row);
369    let start_col = old_start_col.min(new_start_col);
370    let end_row = old_end_row.max(new_end_row);
371    let end_col = old_end_col.max(new_end_col);
372
373    (start_row, start_col, end_row + 1, end_col + 1)
374}
375
376/// convert (row, col) to cell address str
377fn cell_pos_to_address(row: usize, col: usize) -> String {
378    let col_letter = (col as u8 - 1) / 26;
379    let col_index = (col as u8 - 1) % 26;
380
381    let col_char = if col_letter == 0 {
382        ((b'A' + col_index) as char).to_string()
383    } else {
384        let first_char = (b'A' + col_letter - 1) as char;
385        let second_char = (b'A' + col_index) as char;
386        format!("{}{}", first_char, second_char)
387    };
388
389    format!("{}{}", col_char, row)
390}
391
392/// unified diff line kind
393#[derive(Clone, Debug)]
394#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
395pub enum UnifiedDiffKind {
396    OldTitle,
397    NewTitle,
398    DiffPos,
399    OldContent,
400    NewContent,
401}
402
403/// unified diff lines
404#[derive(Clone, Debug)]
405#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
406pub struct UnifiedDiff {
407    pub lines: Vec<UnifiedDiffLine>,
408}
409
410impl fmt::Display for UnifiedDiff {
411    /// to_string() for unified diff lines
412    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
413        let str = self
414            .lines
415            .iter()
416            .map(|x| x.to_string())
417            .collect::<Vec<String>>();
418        write!(f, "{}", str.join("\n"))
419    }
420}
421
422/// unified diff line
423#[derive(Clone, Debug)]
424#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
425pub struct UnifiedDiffLine {
426    pub kind: UnifiedDiffKind,
427    pub text: String,
428}
429
430impl fmt::Display for UnifiedDiffLine {
431    /// to_string() for unified diff line
432    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
433        match self.kind {
434            UnifiedDiffKind::OldTitle => write!(f, "--- {}", self.text),
435            UnifiedDiffKind::NewTitle => write!(f, "+++ {}", self.text),
436            UnifiedDiffKind::DiffPos => write!(f, "@@ {} @@", self.text),
437            UnifiedDiffKind::OldContent => write!(f, "- {}", self.text),
438            UnifiedDiffKind::NewContent => write!(f, "+ {}", self.text),
439        }
440    }
441}