Skip to main content

spreadsheet_mcp/diff/
mod.rs

1pub mod address;
2pub mod cells;
3pub mod hash;
4pub mod merge;
5pub mod names;
6pub mod sst;
7pub mod tables;
8
9use anyhow::Result;
10use cells::CellIterator;
11use merge::{CellDiff, diff_streams};
12use names::{DefinedName, NameDiff, NameKey, diff_names, parse_defined_names};
13use quick_xml::events::Event;
14use quick_xml::reader::Reader;
15use schemars::JsonSchema;
16use serde::Serialize;
17use sst::Sst;
18use std::collections::HashMap;
19use std::fs::File;
20use std::io::BufReader;
21use std::path::Path;
22use tables::{TableDiff, TableInfo, diff_tables, parse_table_xml};
23use zip::ZipArchive;
24
25#[derive(Debug, Serialize, JsonSchema)]
26#[serde(untagged)]
27pub enum Change {
28    Cell(CellChange),
29    Table(TableDiff),
30    Name(NameDiff),
31}
32
33#[derive(Debug, Serialize, JsonSchema)]
34pub struct CellChange {
35    pub sheet: String,
36    #[serde(flatten)]
37    pub diff: CellDiff,
38}
39
40// Legacy alias for tests until updated
41pub type DiffResult = CellChange;
42
43pub fn calculate_changeset(
44    base_path: &Path,
45    fork_path: &Path,
46    sheet_filter: Option<&str>,
47) -> Result<Vec<Change>> {
48    let mut base_zip = ZipArchive::new(File::open(base_path)?)?;
49    let mut fork_zip = ZipArchive::new(File::open(fork_path)?)?;
50
51    // Load SSTs
52    let base_sst = load_sst(&mut base_zip).ok();
53    let fork_sst = load_sst(&mut fork_zip).ok();
54    let base_sst_hash = base_zip
55        .by_name("xl/sharedStrings.xml")
56        .ok()
57        .and_then(|f| hash::compute_hash(f).ok())
58        .unwrap_or(0);
59    let fork_sst_hash = fork_zip
60        .by_name("xl/sharedStrings.xml")
61        .ok()
62        .and_then(|f| hash::compute_hash(f).ok())
63        .unwrap_or(0);
64
65    // Load Workbook Meta (Sheets + Names)
66    let base_meta = load_workbook_meta(&mut base_zip)?;
67    let fork_meta = load_workbook_meta(&mut fork_zip)?;
68
69    let mut all_changes = Vec::new();
70
71    // 1. Diff Names
72    // Names are global (or scoped), not filtered by sheet_filter usually,
73    // unless scope matches? For now return all name changes.
74    // Ideally we filter scoped names by sheet_filter.
75    let name_diffs = diff_names(&base_meta.names, &fork_meta.names, &base_meta.sheet_id_map);
76    for d in name_diffs {
77        if let Some(filter) = sheet_filter {
78            match &d {
79                NameDiff::NameAdded {
80                    scope_sheet: Some(s),
81                    ..
82                } if s != filter => continue,
83                NameDiff::NameDeleted {
84                    scope_sheet: Some(s),
85                    ..
86                } if s != filter => continue,
87                NameDiff::NameModified {
88                    scope_sheet: Some(s),
89                    ..
90                } if s != filter => continue,
91                _ => {}
92            }
93        }
94        all_changes.push(Change::Name(d));
95    }
96
97    // 2. Diff Tables
98    let base_tables = load_tables(&mut base_zip, &base_meta.sheet_map)?;
99    let fork_tables = load_tables(&mut fork_zip, &fork_meta.sheet_map)?;
100    let table_diffs = diff_tables(&base_tables, &fork_tables);
101    for d in table_diffs {
102        if let Some(filter) = sheet_filter {
103            match &d {
104                TableDiff::TableAdded { sheet, .. } if sheet != filter => continue,
105                TableDiff::TableDeleted { sheet, .. } if sheet != filter => continue,
106                TableDiff::TableModified { sheet, .. } if sheet != filter => continue,
107                _ => {}
108            }
109        }
110        all_changes.push(Change::Table(d));
111    }
112
113    // 3. Diff Cells (per sheet)
114    // We iterate the UNION of sheets
115    let mut all_sheets: Vec<_> = base_meta
116        .sheet_map
117        .keys()
118        .chain(fork_meta.sheet_map.keys())
119        .collect();
120    all_sheets.sort();
121    all_sheets.dedup();
122
123    for name in all_sheets {
124        if let Some(filter) = sheet_filter
125            && name != filter
126        {
127            continue;
128        }
129
130        let base_path_str = base_meta.sheet_map.get(name);
131        let fork_path_str = fork_meta.sheet_map.get(name);
132
133        // Hash Check (optimization)
134        let base_hash = if let Some(p) = base_path_str {
135            if let Ok(f) = base_zip.by_name(p) {
136                hash::compute_hash(f)?
137            } else {
138                0
139            }
140        } else {
141            0
142        };
143
144        let fork_hash = if let Some(p) = fork_path_str {
145            if let Ok(f) = fork_zip.by_name(p) {
146                hash::compute_hash(f)?
147            } else {
148                0
149            }
150        } else {
151            0
152        };
153
154        if base_hash != 0 && base_hash == fork_hash && base_sst_hash == fork_sst_hash {
155            continue;
156        }
157
158        // Diff Streams
159        let base_iter = if let Some(p) = base_path_str {
160            if let Ok(f) = base_zip.by_name(p) {
161                Some(CellIterator::new(BufReader::new(f), base_sst.as_ref()))
162            } else {
163                None
164            }
165        } else {
166            None
167        };
168
169        let fork_iter = if let Some(p) = fork_path_str {
170            if let Ok(f) = fork_zip.by_name(p) {
171                Some(CellIterator::new(BufReader::new(f), fork_sst.as_ref()))
172            } else {
173                None
174            }
175        } else {
176            None
177        };
178
179        let diffs = match (base_iter, fork_iter) {
180            (Some(b), Some(f)) => diff_streams(b, f)?,
181            (Some(b), None) => diff_streams(b, std::iter::empty())?,
182            (None, Some(f)) => diff_streams(std::iter::empty(), f)?,
183            (None, None) => Vec::new(),
184        };
185
186        for d in diffs {
187            all_changes.push(Change::Cell(CellChange {
188                sheet: name.clone(),
189                diff: d,
190            }));
191        }
192    }
193
194    Ok(all_changes)
195}
196
197fn load_sst(zip: &mut ZipArchive<File>) -> Result<Sst> {
198    let f = zip.by_name("xl/sharedStrings.xml")?;
199    Sst::from_reader(BufReader::new(f))
200}
201
202struct WorkbookMeta {
203    sheet_map: HashMap<String, String>, // name -> path
204    sheet_id_map: HashMap<u32, String>, // index (0-based from sheetId or array?) -> name
205    // Spec says localSheetId is 0-based index of sheet in workbook
206    names: HashMap<NameKey, DefinedName>,
207}
208
209fn load_workbook_meta(zip: &mut ZipArchive<File>) -> Result<WorkbookMeta> {
210    // 1. Parse workbook.xml for name -> rId, sheetId, and definedNames
211    let mut name_to_rid = HashMap::new();
212    let mut sheet_id_map = HashMap::new();
213    let mut defined_names = HashMap::new();
214
215    // We need to know the *order* of sheets for localSheetId (0, 1, 2...)
216    // Iterate sheets in order of appearance?
217    let mut sheet_order = Vec::new();
218
219    {
220        let workbook_xml = zip.by_name("xl/workbook.xml")?;
221        let mut reader = Reader::from_reader(BufReader::new(workbook_xml));
222        let mut buf = Vec::new();
223
224        loop {
225            match reader.read_event_into(&mut buf) {
226                Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
227                    if e.name().as_ref() == b"sheet" {
228                        let mut name = String::new();
229                        let mut rid = String::new();
230                        for attr in e.attributes() {
231                            let attr = attr?;
232                            if attr.key.as_ref() == b"name" {
233                                name = String::from_utf8_lossy(&attr.value).to_string();
234                            } else if attr.key.as_ref() == b"r:id" {
235                                rid = String::from_utf8_lossy(&attr.value).to_string();
236                            }
237                        }
238                        if !name.is_empty() && !rid.is_empty() {
239                            name_to_rid.insert(rid, name.clone());
240                            sheet_order.push(name);
241                        }
242                    } else if e.name().as_ref() == b"definedNames" {
243                        // Switch to parsing defined names
244                        // We need to pass the reader to the names module?
245                        // But we are inside a loop borrowing the reader.
246                        // Since `parse_defined_names` takes `&mut Reader`, we can call it.
247                        // It will consume until </definedNames>.
248                        defined_names = parse_defined_names(&mut reader)?;
249                    }
250                }
251                Ok(Event::Eof) => break,
252                Err(e) => return Err(e.into()),
253                _ => {}
254            }
255            buf.clear();
256        }
257    }
258
259    for (idx, name) in sheet_order.into_iter().enumerate() {
260        sheet_id_map.insert(idx as u32, name);
261    }
262
263    // 2. Parse _rels/workbook.xml.rels for rId -> Target
264    let mut rid_to_target = HashMap::new();
265    if let Ok(rels_xml) = zip.by_name("xl/_rels/workbook.xml.rels") {
266        let mut reader = Reader::from_reader(BufReader::new(rels_xml));
267        let mut buf = Vec::new();
268
269        loop {
270            match reader.read_event_into(&mut buf) {
271                Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
272                    if e.name().as_ref() == b"Relationship" {
273                        let mut id = String::new();
274                        let mut target = String::new();
275                        for attr in e.attributes() {
276                            let attr = attr?;
277                            if attr.key.as_ref() == b"Id" {
278                                id = String::from_utf8_lossy(&attr.value).to_string();
279                            } else if attr.key.as_ref() == b"Target" {
280                                target = String::from_utf8_lossy(&attr.value).to_string();
281                            }
282                        }
283                        rid_to_target.insert(id, target);
284                    }
285                }
286                Ok(Event::Eof) => break,
287                Err(e) => return Err(e.into()),
288                _ => {}
289            }
290            buf.clear();
291        }
292    }
293
294    // 3. Join
295    let mut sheet_map = HashMap::new();
296    for (rid, name) in name_to_rid {
297        if let Some(target) = rid_to_target.get(&rid) {
298            let path = if target.starts_with('/') {
299                target.trim_start_matches('/').to_string()
300            } else {
301                format!("xl/{}", target)
302            };
303            sheet_map.insert(name, path);
304        }
305    }
306
307    Ok(WorkbookMeta {
308        sheet_map,
309        sheet_id_map,
310        names: defined_names,
311    })
312}
313
314fn load_tables(
315    zip: &mut ZipArchive<File>,
316    sheet_map: &HashMap<String, String>,
317) -> Result<HashMap<String, TableInfo>> {
318    let mut tables = HashMap::new();
319
320    for (sheet_name, sheet_path) in sheet_map {
321        let path = Path::new(sheet_path);
322        let parent = path.parent().unwrap();
323        let filename = path.file_name().unwrap().to_str().unwrap();
324        let rels_path = parent.join("_rels").join(format!("{}.rels", filename));
325        let rels_path_str = rels_path.to_str().unwrap();
326
327        // 1. Find table files in rels
328        let mut table_files = Vec::new();
329        if let Ok(f) = zip.by_name(rels_path_str) {
330            let mut reader = Reader::from_reader(BufReader::new(f));
331            let mut buf = Vec::new();
332            loop {
333                match reader.read_event_into(&mut buf) {
334                    Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
335                        if e.name().as_ref() == b"Relationship" {
336                            let mut target = String::new();
337                            let mut type_attr = String::new();
338                            for attr in e.attributes() {
339                                let attr = attr?;
340                                if attr.key.as_ref() == b"Target" {
341                                    target = String::from_utf8_lossy(&attr.value).to_string();
342                                } else if attr.key.as_ref() == b"Type" {
343                                    type_attr = String::from_utf8_lossy(&attr.value).to_string();
344                                }
345                            }
346                            if type_attr.ends_with("/table") {
347                                table_files.push(target);
348                            }
349                        }
350                    }
351                    Ok(Event::Eof) => break,
352                    _ => {}
353                }
354                buf.clear();
355            }
356        }
357
358        // 2. Parse table files
359        for target in table_files {
360            // Target is relative to sheet path parent (e.g. "../tables/table1.xml")
361            // parent is "xl/worksheets"
362            // We need to resolve this path.
363            // Simple normalization: if starts with "../", strip last component of parent.
364            // parent: "xl/worksheets" -> parent of that is "xl"
365            // target: "tables/table1.xml" -> "xl/tables/table1.xml"
366
367            // Or use PathBuf logic
368            let mut full_path = parent.to_path_buf();
369            for component in Path::new(&target).components() {
370                match component {
371                    std::path::Component::ParentDir => {
372                        full_path.pop();
373                    }
374                    std::path::Component::Normal(c) => {
375                        full_path.push(c);
376                    }
377                    _ => {}
378                }
379            }
380
381            let full_path_str = full_path.to_str().unwrap();
382
383            if let Ok(f) = zip.by_name(full_path_str) {
384                let mut reader = Reader::from_reader(BufReader::new(f));
385                if let Ok(info) = parse_table_xml(&mut reader, sheet_name.clone()) {
386                    tables.insert(info.display_name.clone(), info);
387                }
388            }
389        }
390    }
391
392    Ok(tables)
393}