sheets_diff/core/
diff.rs

1use std::{fmt, fs::File, io::BufReader};
2
3use calamine::{open_workbook, Data, Reader, Xlsx};
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9pub enum CellDiffKind {
10    Value,
11    Formula,
12}
13
14impl fmt::Display for CellDiffKind {
15    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16        match self {
17            CellDiffKind::Formula => write!(f, "formula"),
18            CellDiffKind::Value => write!(f, "value"),
19        }
20    }
21}
22
23/// main struct
24#[derive(Clone, Debug)]
25#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26pub struct Diff {
27    pub old_filepath: String,
28    pub new_filepath: String,
29    pub sheet_diff: Vec<SheetDiff>,
30    pub cell_diffs: Vec<SheetCellDiff>,
31}
32
33#[derive(Clone, Debug)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35pub struct SheetDiff {
36    pub old: Option<String>,
37    pub new: Option<String>,
38}
39
40#[derive(Clone, Debug)]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
42pub struct SheetCellDiff {
43    pub sheet: String,
44    pub cells: Vec<CellDiff>,
45}
46
47#[derive(Clone, Debug)]
48#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
49pub struct CellDiff {
50    pub row: usize,
51    pub col: usize,
52    pub addr: String,
53    pub kind: CellDiffKind,
54    pub old: Option<String>,
55    pub new: Option<String>,
56}
57
58impl Diff {
59    /// init
60    pub fn new(old_filepath: &str, new_filepath: &str) -> Self {
61        let mut ret = Diff {
62            old_filepath: old_filepath.to_owned(),
63            new_filepath: new_filepath.to_owned(),
64            sheet_diff: vec![],
65            cell_diffs: vec![],
66        };
67
68        ret.collect_diff();
69
70        ret.cell_diffs.sort_by(|a, b| a.sheet.cmp(&b.sheet));
71
72        let mut merged_cell_diffs: Vec<SheetCellDiff> = vec![];
73        ret.cell_diffs.iter().for_each(|a| {
74            let found = merged_cell_diffs.iter_mut().find(|b| b.sheet == a.sheet);
75            if let Some(found) = found {
76                found.cells.extend(a.cells.clone());
77            } else {
78                merged_cell_diffs.push(a.clone());
79            }
80        });
81        merged_cell_diffs.iter_mut().for_each(|x| {
82            x.cells
83                .sort_by(|a, b| a.addr.cmp(&b.addr).then_with(|| a.kind.cmp(&b.kind)));
84        });
85        ret.cell_diffs = merged_cell_diffs;
86
87        ret
88    }
89
90    /// get serde-ready diff
91    /// #[cfg(feature = "serde")]
92    pub fn diff(&mut self) -> Diff {
93        self.clone()
94    }
95
96    /// get unified diff str
97    pub fn unified_diff(&mut self) -> UnifiedDiff {
98        let mut ret: Vec<UnifiedDiffLine> = vec![];
99
100        if !self.sheet_diff.is_empty() {
101            ret.push(UnifiedDiffLine {
102                kind: UnifiedDiffKind::OldTitle,
103                text: format!("{} (sheet names)", self.old_filepath),
104            });
105            ret.push(UnifiedDiffLine {
106                kind: UnifiedDiffKind::NewTitle,
107                text: format!("{} (sheet names)", self.new_filepath),
108            });
109
110            self.sheet_diff.iter().for_each(|x| {
111                if let Some(sheet) = x.old.as_ref() {
112                    ret.push(UnifiedDiffLine {
113                        kind: UnifiedDiffKind::OldContent,
114                        text: sheet.to_owned(),
115                    });
116                }
117                if let Some(sheet) = x.new.as_ref() {
118                    ret.push(UnifiedDiffLine {
119                        kind: UnifiedDiffKind::NewContent,
120                        text: sheet.to_owned(),
121                    });
122                }
123            });
124        }
125
126        self.cell_diffs.iter().for_each(|x| {
127            ret.push(UnifiedDiffLine {
128                kind: UnifiedDiffKind::OldTitle,
129                text: format!("{} [{}]", self.old_filepath, x.sheet),
130            });
131            ret.push(UnifiedDiffLine {
132                kind: UnifiedDiffKind::NewTitle,
133                text: format!("{} [{}]", self.new_filepath, x.sheet),
134            });
135
136            x.cells.iter().for_each(|x| {
137                ret.push(UnifiedDiffLine {
138                    kind: UnifiedDiffKind::DiffPos,
139                    text: format!("{}({},{}) {}", x.addr, x.row, x.col, x.kind),
140                });
141
142                if let Some(sheet) = x.old.as_ref() {
143                    ret.push(UnifiedDiffLine {
144                        kind: UnifiedDiffKind::OldContent,
145                        text: sheet.to_owned(),
146                    });
147                }
148                if let Some(sheet) = x.new.as_ref() {
149                    ret.push(UnifiedDiffLine {
150                        kind: UnifiedDiffKind::NewContent,
151                        text: sheet.to_owned(),
152                    });
153                }
154            });
155        });
156
157        UnifiedDiff { lines: ret }
158    }
159
160    /// collect sheet diff and cell range diff
161    fn collect_diff(&mut self) {
162        let mut old_workbook: Xlsx<BufReader<File>> = open_workbook(self.old_filepath.as_str())
163            .expect(format!("Cannot open {}", self.old_filepath.as_str()).as_str());
164        let mut new_workbook: Xlsx<BufReader<File>> = open_workbook(self.new_filepath.as_str())
165            .expect(format!("Cannot open {}", self.new_filepath.as_str()).as_str());
166
167        let old_sheets = old_workbook.sheet_names().to_owned();
168        let new_sheets = new_workbook.sheet_names().to_owned();
169
170        self.collect_sheet_diff(&old_sheets, &new_sheets);
171
172        let same_name_sheets = filter_same_name_sheets(&old_sheets, &new_sheets);
173        self.collect_cell_value_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
174        self.collect_cell_formula_diff(&mut old_workbook, &mut new_workbook, &same_name_sheets);
175    }
176
177    /// collect sheet diff by name
178    fn collect_sheet_diff(&mut self, old_sheets: &Vec<String>, new_sheets: &Vec<String>) {
179        if *old_sheets == *new_sheets {
180            return;
181        }
182
183        for sheet in old_sheets {
184            if !new_sheets.contains(sheet) {
185                self.sheet_diff.push(SheetDiff {
186                    old: Some(sheet.to_owned()),
187                    new: None,
188                });
189            }
190        }
191        for sheet in new_sheets {
192            if !old_sheets.contains(sheet) {
193                self.sheet_diff.push(SheetDiff {
194                    old: None,
195                    new: Some(sheet.to_owned()),
196                });
197            }
198        }
199    }
200
201    /// collect value diff in cell range
202    fn collect_cell_value_diff(
203        &mut self,
204        old_workbook: &mut Xlsx<BufReader<File>>,
205        new_workbook: &mut Xlsx<BufReader<File>>,
206        same_name_sheets: &Vec<String>,
207    ) {
208        for sheet in same_name_sheets {
209            if let (Ok(old_range), Ok(new_range)) = (
210                old_workbook.worksheet_range(sheet),
211                new_workbook.worksheet_range(sheet),
212            ) {
213                let mut cell_diffs: Vec<CellDiff> = vec![];
214
215                let (old_rows, old_cols) = old_range.get_size();
216                let (new_rows, new_cols) = new_range.get_size();
217                let max_rows = old_rows.max(new_rows) as u32;
218                let max_cols = old_cols.max(new_cols) as u32;
219
220                for row in 0..max_rows {
221                    for col in 0..max_cols {
222                        let old_cell = old_range.get_value((row, col)).unwrap_or(&Data::Empty);
223                        let new_cell = new_range.get_value((row, col)).unwrap_or(&Data::Empty);
224
225                        if old_cell != new_cell {
226                            let row = (row + 1) as usize;
227                            let col = (col + 1) as usize;
228                            cell_diffs.push(CellDiff {
229                                row,
230                                col,
231                                addr: cell_pos_to_address(row, col),
232                                kind: CellDiffKind::Value,
233                                old: if old_cell != &Data::Empty {
234                                    Some(old_cell.to_string())
235                                } else {
236                                    None
237                                },
238                                new: if new_cell != &Data::Empty {
239                                    Some(new_cell.to_string())
240                                } else {
241                                    None
242                                },
243                            });
244                        }
245                    }
246                }
247
248                if !cell_diffs.is_empty() {
249                    let sheet_cell_diff = SheetCellDiff {
250                        sheet: sheet.to_owned(),
251                        cells: cell_diffs,
252                    };
253                    self.cell_diffs.push(sheet_cell_diff);
254                }
255            } else {
256                println!("Failed to read sheet: {}", sheet);
257            }
258        }
259    }
260
261    /// collect formula diff in cell range
262    fn collect_cell_formula_diff(
263        &mut self,
264        old_workbook: &mut Xlsx<BufReader<File>>,
265        new_workbook: &mut Xlsx<BufReader<File>>,
266        same_name_sheets: &Vec<String>,
267    ) {
268        for sheet in same_name_sheets {
269            if let (Ok(old_range), Ok(new_range)) = (
270                old_workbook.worksheet_formula(sheet),
271                new_workbook.worksheet_formula(sheet),
272            ) {
273                let mut cell_diffs: Vec<CellDiff> = vec![];
274
275                let (old_start_row, old_start_col) = match old_range.start() {
276                    Some((row, col)) => (row, col),
277                    None => (u32::MAX, u32::MAX),
278                };
279                let (new_start_row, new_start_col) = match new_range.start() {
280                    Some((row, col)) => (row, col),
281                    None => (u32::MAX, u32::MAX),
282                };
283                let (old_end_row, old_end_col) = match old_range.end() {
284                    Some((row, col)) => (row, col),
285                    None => (u32::MIN, u32::MIN),
286                };
287                let (new_end_row, new_end_col) = match new_range.end() {
288                    Some((row, col)) => (row, col),
289                    None => (u32::MIN, u32::MIN),
290                };
291                let start_row = old_start_row.min(new_start_row);
292                let start_col = old_start_col.min(new_start_col);
293                let end_row = old_end_row.max(new_end_row);
294                let end_col = old_end_col.max(new_end_col);
295
296                for row in start_row..(end_row + 1) {
297                    for col in start_col..(end_col + 1) {
298                        let old_cell = match old_range.get_value((row, col)) {
299                            Some(x) => &Data::String(x.to_string()),
300                            None => &Data::Empty,
301                        };
302                        let new_cell = match new_range.get_value((row, col)) {
303                            Some(x) => &Data::String(x.to_string()),
304                            None => &Data::Empty,
305                        };
306
307                        if old_cell != new_cell {
308                            let row = (row + 1) as usize;
309                            let col = (col + 1) as usize;
310                            cell_diffs.push(CellDiff {
311                                row,
312                                col,
313                                addr: cell_pos_to_address(row, col),
314                                kind: CellDiffKind::Formula,
315                                old: if old_cell != &Data::Empty {
316                                    Some(old_cell.to_string())
317                                } else {
318                                    None
319                                },
320                                new: if new_cell != &Data::Empty {
321                                    Some(new_cell.to_string())
322                                } else {
323                                    None
324                                },
325                            });
326                        }
327                    }
328                }
329
330                if !cell_diffs.is_empty() {
331                    let sheet_cell_diff = SheetCellDiff {
332                        sheet: sheet.to_owned(),
333                        cells: cell_diffs,
334                    };
335                    self.cell_diffs.push(sheet_cell_diff);
336                }
337            } else {
338                println!("Failed to read sheet: {}", sheet);
339            }
340        }
341    }
342}
343
344/// filter sheets whose name is equal
345fn filter_same_name_sheets<'a>(
346    old_sheets: &'a Vec<String>,
347    new_sheets: &'a Vec<String>,
348) -> Vec<String> {
349    old_sheets
350        .iter()
351        .filter(|s| new_sheets.contains(s))
352        .map(|s| s.to_owned())
353        .collect()
354}
355
356/// convert (row, col) to cell address str
357fn cell_pos_to_address(row: usize, col: usize) -> String {
358    let col_letter = (col as u8 - 1) / 26;
359    let col_index = (col as u8 - 1) % 26;
360
361    let col_char = if col_letter == 0 {
362        ((b'A' + col_index) as char).to_string()
363    } else {
364        let first_char = (b'A' + col_letter - 1) as char;
365        let second_char = (b'A' + col_index) as char;
366        format!("{}{}", first_char, second_char)
367    };
368
369    format!("{}{}", col_char, row)
370}
371
372/// unified diff line kind
373#[derive(Clone, Debug)]
374#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
375pub enum UnifiedDiffKind {
376    OldTitle,
377    NewTitle,
378    DiffPos,
379    OldContent,
380    NewContent,
381}
382
383/// unified diff lines
384#[derive(Clone, Debug)]
385#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
386pub struct UnifiedDiff {
387    pub lines: Vec<UnifiedDiffLine>,
388}
389
390impl fmt::Display for UnifiedDiff {
391    /// to_string() for unified diff lines
392    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
393        let str = self
394            .lines
395            .iter()
396            .map(|x| x.to_string())
397            .collect::<Vec<String>>();
398        write!(f, "{}", str.join("\n"))
399    }
400}
401
402/// unified diff line
403#[derive(Clone, Debug)]
404#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
405pub struct UnifiedDiffLine {
406    pub kind: UnifiedDiffKind,
407    pub text: String,
408}
409
410impl fmt::Display for UnifiedDiffLine {
411    /// to_string() for unified diff line
412    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
413        match self.kind {
414            UnifiedDiffKind::OldTitle => write!(f, "--- {}", self.text),
415            UnifiedDiffKind::NewTitle => write!(f, "+++ {}", self.text),
416            UnifiedDiffKind::DiffPos => write!(f, "@@ {} @@", self.text),
417            UnifiedDiffKind::OldContent => write!(f, "- {}", self.text),
418            UnifiedDiffKind::NewContent => write!(f, "+ {}", self.text),
419        }
420    }
421}