spreadsheet_mcp/diff/
mod.rs1pub mod address;
2pub mod cells;
3pub mod hash;
4pub mod merge;
5pub mod names;
6pub mod sst;
7pub mod tables;
8
9use anyhow::Result;
10use cells::CellIterator;
11use merge::{CellDiff, diff_streams};
12use names::{DefinedName, NameDiff, NameKey, diff_names, parse_defined_names};
13use quick_xml::events::Event;
14use quick_xml::reader::Reader;
15use schemars::JsonSchema;
16use serde::Serialize;
17use sst::Sst;
18use std::collections::HashMap;
19use std::fs::File;
20use std::io::BufReader;
21use std::path::Path;
22use tables::{TableDiff, TableInfo, diff_tables, parse_table_xml};
23use zip::ZipArchive;
24
25#[derive(Debug, Serialize, JsonSchema)]
26#[serde(untagged)]
27pub enum Change {
28 Cell(CellChange),
29 Table(TableDiff),
30 Name(NameDiff),
31}
32
33#[derive(Debug, Serialize, JsonSchema)]
34pub struct CellChange {
35 pub sheet: String,
36 #[serde(flatten)]
37 pub diff: CellDiff,
38}
39
40pub type DiffResult = CellChange;
42
43pub fn calculate_changeset(
44 base_path: &Path,
45 fork_path: &Path,
46 sheet_filter: Option<&str>,
47) -> Result<Vec<Change>> {
48 let mut base_zip = ZipArchive::new(File::open(base_path)?)?;
49 let mut fork_zip = ZipArchive::new(File::open(fork_path)?)?;
50
51 let base_sst = load_sst(&mut base_zip).ok();
53 let fork_sst = load_sst(&mut fork_zip).ok();
54 let base_sst_hash = base_zip
55 .by_name("xl/sharedStrings.xml")
56 .ok()
57 .and_then(|f| hash::compute_hash(f).ok())
58 .unwrap_or(0);
59 let fork_sst_hash = fork_zip
60 .by_name("xl/sharedStrings.xml")
61 .ok()
62 .and_then(|f| hash::compute_hash(f).ok())
63 .unwrap_or(0);
64
65 let base_meta = load_workbook_meta(&mut base_zip)?;
67 let fork_meta = load_workbook_meta(&mut fork_zip)?;
68
69 let mut all_changes = Vec::new();
70
71 let name_diffs = diff_names(&base_meta.names, &fork_meta.names, &base_meta.sheet_id_map);
76 for d in name_diffs {
77 if let Some(filter) = sheet_filter {
78 match &d {
79 NameDiff::NameAdded {
80 scope_sheet: Some(s),
81 ..
82 } if s != filter => continue,
83 NameDiff::NameDeleted {
84 scope_sheet: Some(s),
85 ..
86 } if s != filter => continue,
87 NameDiff::NameModified {
88 scope_sheet: Some(s),
89 ..
90 } if s != filter => continue,
91 _ => {}
92 }
93 }
94 all_changes.push(Change::Name(d));
95 }
96
97 let base_tables = load_tables(&mut base_zip, &base_meta.sheet_map)?;
99 let fork_tables = load_tables(&mut fork_zip, &fork_meta.sheet_map)?;
100 let table_diffs = diff_tables(&base_tables, &fork_tables);
101 for d in table_diffs {
102 if let Some(filter) = sheet_filter {
103 match &d {
104 TableDiff::TableAdded { sheet, .. } if sheet != filter => continue,
105 TableDiff::TableDeleted { sheet, .. } if sheet != filter => continue,
106 TableDiff::TableModified { sheet, .. } if sheet != filter => continue,
107 _ => {}
108 }
109 }
110 all_changes.push(Change::Table(d));
111 }
112
113 let mut all_sheets: Vec<_> = base_meta
116 .sheet_map
117 .keys()
118 .chain(fork_meta.sheet_map.keys())
119 .collect();
120 all_sheets.sort();
121 all_sheets.dedup();
122
123 for name in all_sheets {
124 if let Some(filter) = sheet_filter
125 && name != filter
126 {
127 continue;
128 }
129
130 let base_path_str = base_meta.sheet_map.get(name);
131 let fork_path_str = fork_meta.sheet_map.get(name);
132
133 let base_hash = if let Some(p) = base_path_str {
135 if let Ok(f) = base_zip.by_name(p) {
136 hash::compute_hash(f)?
137 } else {
138 0
139 }
140 } else {
141 0
142 };
143
144 let fork_hash = if let Some(p) = fork_path_str {
145 if let Ok(f) = fork_zip.by_name(p) {
146 hash::compute_hash(f)?
147 } else {
148 0
149 }
150 } else {
151 0
152 };
153
154 if base_hash != 0 && base_hash == fork_hash && base_sst_hash == fork_sst_hash {
155 continue;
156 }
157
158 let base_iter = if let Some(p) = base_path_str {
160 if let Ok(f) = base_zip.by_name(p) {
161 Some(CellIterator::new(BufReader::new(f), base_sst.as_ref()))
162 } else {
163 None
164 }
165 } else {
166 None
167 };
168
169 let fork_iter = if let Some(p) = fork_path_str {
170 if let Ok(f) = fork_zip.by_name(p) {
171 Some(CellIterator::new(BufReader::new(f), fork_sst.as_ref()))
172 } else {
173 None
174 }
175 } else {
176 None
177 };
178
179 let diffs = match (base_iter, fork_iter) {
180 (Some(b), Some(f)) => diff_streams(b, f)?,
181 (Some(b), None) => diff_streams(b, std::iter::empty())?,
182 (None, Some(f)) => diff_streams(std::iter::empty(), f)?,
183 (None, None) => Vec::new(),
184 };
185
186 for d in diffs {
187 all_changes.push(Change::Cell(CellChange {
188 sheet: name.clone(),
189 diff: d,
190 }));
191 }
192 }
193
194 Ok(all_changes)
195}
196
197fn load_sst(zip: &mut ZipArchive<File>) -> Result<Sst> {
198 let f = zip.by_name("xl/sharedStrings.xml")?;
199 Sst::from_reader(BufReader::new(f))
200}
201
202struct WorkbookMeta {
203 sheet_map: HashMap<String, String>, sheet_id_map: HashMap<u32, String>, names: HashMap<NameKey, DefinedName>,
207}
208
209fn load_workbook_meta(zip: &mut ZipArchive<File>) -> Result<WorkbookMeta> {
210 let mut name_to_rid = HashMap::new();
212 let mut sheet_id_map = HashMap::new();
213 let mut defined_names = HashMap::new();
214
215 let mut sheet_order = Vec::new();
218
219 {
220 let workbook_xml = zip.by_name("xl/workbook.xml")?;
221 let mut reader = Reader::from_reader(BufReader::new(workbook_xml));
222 let mut buf = Vec::new();
223
224 loop {
225 match reader.read_event_into(&mut buf) {
226 Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
227 if e.name().as_ref() == b"sheet" {
228 let mut name = String::new();
229 let mut rid = String::new();
230 for attr in e.attributes() {
231 let attr = attr?;
232 if attr.key.as_ref() == b"name" {
233 name = String::from_utf8_lossy(&attr.value).to_string();
234 } else if attr.key.as_ref() == b"r:id" {
235 rid = String::from_utf8_lossy(&attr.value).to_string();
236 }
237 }
238 if !name.is_empty() && !rid.is_empty() {
239 name_to_rid.insert(rid, name.clone());
240 sheet_order.push(name);
241 }
242 } else if e.name().as_ref() == b"definedNames" {
243 defined_names = parse_defined_names(&mut reader)?;
249 }
250 }
251 Ok(Event::Eof) => break,
252 Err(e) => return Err(e.into()),
253 _ => {}
254 }
255 buf.clear();
256 }
257 }
258
259 for (idx, name) in sheet_order.into_iter().enumerate() {
260 sheet_id_map.insert(idx as u32, name);
261 }
262
263 let mut rid_to_target = HashMap::new();
265 if let Ok(rels_xml) = zip.by_name("xl/_rels/workbook.xml.rels") {
266 let mut reader = Reader::from_reader(BufReader::new(rels_xml));
267 let mut buf = Vec::new();
268
269 loop {
270 match reader.read_event_into(&mut buf) {
271 Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
272 if e.name().as_ref() == b"Relationship" {
273 let mut id = String::new();
274 let mut target = String::new();
275 for attr in e.attributes() {
276 let attr = attr?;
277 if attr.key.as_ref() == b"Id" {
278 id = String::from_utf8_lossy(&attr.value).to_string();
279 } else if attr.key.as_ref() == b"Target" {
280 target = String::from_utf8_lossy(&attr.value).to_string();
281 }
282 }
283 rid_to_target.insert(id, target);
284 }
285 }
286 Ok(Event::Eof) => break,
287 Err(e) => return Err(e.into()),
288 _ => {}
289 }
290 buf.clear();
291 }
292 }
293
294 let mut sheet_map = HashMap::new();
296 for (rid, name) in name_to_rid {
297 if let Some(target) = rid_to_target.get(&rid) {
298 let path = if target.starts_with('/') {
299 target.trim_start_matches('/').to_string()
300 } else {
301 format!("xl/{}", target)
302 };
303 sheet_map.insert(name, path);
304 }
305 }
306
307 Ok(WorkbookMeta {
308 sheet_map,
309 sheet_id_map,
310 names: defined_names,
311 })
312}
313
314fn load_tables(
315 zip: &mut ZipArchive<File>,
316 sheet_map: &HashMap<String, String>,
317) -> Result<HashMap<String, TableInfo>> {
318 let mut tables = HashMap::new();
319
320 for (sheet_name, sheet_path) in sheet_map {
321 let path = Path::new(sheet_path);
322 let parent = path.parent().unwrap();
323 let filename = path.file_name().unwrap().to_str().unwrap();
324 let rels_path = parent.join("_rels").join(format!("{}.rels", filename));
325 let rels_path_str = rels_path.to_str().unwrap();
326
327 let mut table_files = Vec::new();
329 if let Ok(f) = zip.by_name(rels_path_str) {
330 let mut reader = Reader::from_reader(BufReader::new(f));
331 let mut buf = Vec::new();
332 loop {
333 match reader.read_event_into(&mut buf) {
334 Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
335 if e.name().as_ref() == b"Relationship" {
336 let mut target = String::new();
337 let mut type_attr = String::new();
338 for attr in e.attributes() {
339 let attr = attr?;
340 if attr.key.as_ref() == b"Target" {
341 target = String::from_utf8_lossy(&attr.value).to_string();
342 } else if attr.key.as_ref() == b"Type" {
343 type_attr = String::from_utf8_lossy(&attr.value).to_string();
344 }
345 }
346 if type_attr.ends_with("/table") {
347 table_files.push(target);
348 }
349 }
350 }
351 Ok(Event::Eof) => break,
352 _ => {}
353 }
354 buf.clear();
355 }
356 }
357
358 for target in table_files {
360 let mut full_path = parent.to_path_buf();
369 for component in Path::new(&target).components() {
370 match component {
371 std::path::Component::ParentDir => {
372 full_path.pop();
373 }
374 std::path::Component::Normal(c) => {
375 full_path.push(c);
376 }
377 _ => {}
378 }
379 }
380
381 let full_path_str = full_path.to_str().unwrap();
382
383 if let Ok(f) = zip.by_name(full_path_str) {
384 let mut reader = Reader::from_reader(BufReader::new(f));
385 if let Ok(info) = parse_table_xml(&mut reader, sheet_name.clone()) {
386 tables.insert(info.display_name.clone(), info);
387 }
388 }
389 }
390 }
391
392 Ok(tables)
393}