1use std::collections::HashSet;
7
8use clayers_xml::ContentHash;
9
10use crate::error::{Error, Result};
11use crate::object::{Object, TreeObject};
12use crate::store::ObjectStore;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct TreeDiff {
17 pub changes: Vec<NodeChange>,
19}
20
21impl TreeDiff {
22 #[must_use]
24 pub fn is_empty(&self) -> bool {
25 self.changes.is_empty()
26 }
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum NodeChange {
32 Added {
34 parent: ContentHash,
36 position: usize,
38 node: ContentHash,
40 },
41 Removed {
43 parent: ContentHash,
45 position: usize,
47 node: ContentHash,
49 },
50 Modified {
52 hash_before: ContentHash,
54 hash_after: ContentHash,
56 inner: Box<TreeDiff>,
58 },
59 AttributeChanged {
61 element: ContentHash,
63 attr: String,
65 old: Option<String>,
67 new: Option<String>,
69 },
70 TextChanged {
72 old: ContentHash,
74 new: ContentHash,
76 },
77}
78
79pub async fn diff(
87 store: &dyn ObjectStore,
88 a: ContentHash,
89 b: ContentHash,
90) -> Result<TreeDiff> {
91 if a == b {
93 return Ok(TreeDiff {
94 changes: Vec::new(),
95 });
96 }
97
98 let obj_a = store.get(&a).await?.ok_or(Error::NotFound(a))?;
99 let obj_b = store.get(&b).await?.ok_or(Error::NotFound(b))?;
100
101 let mut changes = Vec::new();
102
103 match (&obj_a, &obj_b) {
104 (Object::Text(_), Object::Text(_)) => {
105 changes.push(NodeChange::TextChanged { old: a, new: b });
106 }
107 (Object::Element(el_a), Object::Element(el_b)) => {
108 diff_attributes(&mut changes, b, el_a, el_b);
110
111 diff_children(store, &mut changes, a, &el_a.children, &el_b.children).await?;
113 }
114 _ => {
115 changes.push(NodeChange::Removed {
117 parent: a,
118 position: 0,
119 node: a,
120 });
121 changes.push(NodeChange::Added {
122 parent: b,
123 position: 0,
124 node: b,
125 });
126 }
127 }
128
129 Ok(TreeDiff { changes })
130}
131
132fn diff_attributes(
134 changes: &mut Vec<NodeChange>,
135 element_hash: ContentHash,
136 el_a: &crate::object::ElementObject,
137 el_b: &crate::object::ElementObject,
138) {
139 for attr_a in &el_a.attributes {
141 let matching = el_b.attributes.iter().find(|ab| {
142 ab.local_name == attr_a.local_name && ab.namespace_uri == attr_a.namespace_uri
143 });
144 match matching {
145 Some(attr_b) if attr_b.value != attr_a.value => {
146 changes.push(NodeChange::AttributeChanged {
147 element: element_hash,
148 attr: attr_a.local_name.clone(),
149 old: Some(attr_a.value.clone()),
150 new: Some(attr_b.value.clone()),
151 });
152 }
153 None => {
154 changes.push(NodeChange::AttributeChanged {
155 element: element_hash,
156 attr: attr_a.local_name.clone(),
157 old: Some(attr_a.value.clone()),
158 new: None,
159 });
160 }
161 _ => {}
162 }
163 }
164
165 for attr_b in &el_b.attributes {
167 let exists_in_a = el_a.attributes.iter().any(|aa| {
168 aa.local_name == attr_b.local_name && aa.namespace_uri == attr_b.namespace_uri
169 });
170 if !exists_in_a {
171 changes.push(NodeChange::AttributeChanged {
172 element: element_hash,
173 attr: attr_b.local_name.clone(),
174 old: None,
175 new: Some(attr_b.value.clone()),
176 });
177 }
178 }
179}
180
181async fn diff_children(
183 store: &dyn ObjectStore,
184 changes: &mut Vec<NodeChange>,
185 parent: ContentHash,
186 children_a: &[ContentHash],
187 children_b: &[ContentHash],
188) -> Result<()> {
189 let len_a = children_a.len();
190 let len_b = children_b.len();
191 let min_len = len_a.min(len_b);
192
193 for i in 0..min_len {
195 if children_a[i] != children_b[i] {
196 let inner = Box::pin(diff(store, children_a[i], children_b[i])).await?;
198 changes.push(NodeChange::Modified {
199 hash_before: children_a[i],
200 hash_after: children_b[i],
201 inner: Box::new(inner),
202 });
203 }
204 }
205
206 for (i, child) in children_a.iter().enumerate().skip(min_len) {
208 changes.push(NodeChange::Removed {
209 parent,
210 position: i,
211 node: *child,
212 });
213 }
214
215 for (i, child) in children_b.iter().enumerate().skip(min_len) {
217 changes.push(NodeChange::Added {
218 parent,
219 position: i,
220 node: *child,
221 });
222 }
223
224 Ok(())
225}
226
227#[derive(Debug, Clone, PartialEq, Eq)]
233#[cfg_attr(feature = "serde", derive(serde::Serialize))]
234#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "snake_case"))]
235pub enum FileChange {
236 Added {
238 path: String,
240 document: ContentHash,
242 },
243 Removed {
245 path: String,
247 document: ContentHash,
249 },
250 Modified {
252 path: String,
254 old_doc: ContentHash,
256 new_doc: ContentHash,
258 },
259}
260
261#[must_use]
266pub fn diff_trees(tree_a: &TreeObject, tree_b: &TreeObject) -> Vec<FileChange> {
267 let mut changes = Vec::new();
268
269 let paths_a: HashSet<&str> = tree_a.entries.iter().map(|e| e.path.as_str()).collect();
270
271 for entry in &tree_a.entries {
273 if let Some(entry_b) = tree_b.entries.iter().find(|e| e.path == entry.path) {
274 if entry.document != entry_b.document {
275 changes.push(FileChange::Modified {
276 path: entry.path.clone(),
277 old_doc: entry.document,
278 new_doc: entry_b.document,
279 });
280 }
281 } else {
282 changes.push(FileChange::Removed {
283 path: entry.path.clone(),
284 document: entry.document,
285 });
286 }
287 }
288
289 for entry in &tree_b.entries {
291 if !paths_a.contains(entry.path.as_str()) {
292 changes.push(FileChange::Added {
293 path: entry.path.clone(),
294 document: entry.document,
295 });
296 }
297 }
298
299 changes
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305 use crate::object::{ElementObject, TextObject};
306 use crate::store::memory::MemoryStore;
307
308 async fn store_text(store: &MemoryStore, text: &str) -> ContentHash {
309 let hash = ContentHash::from_canonical(text.as_bytes());
310 let mut tx = store.transaction().await.unwrap();
311 tx.put(
312 hash,
313 Object::Text(TextObject {
314 content: text.into(),
315 }),
316 )
317 .await
318 .unwrap();
319 tx.commit().await.unwrap();
320 hash
321 }
322
323 #[tokio::test]
324 async fn identical_hashes_no_changes() {
325 let store = MemoryStore::new();
326 let h = store_text(&store, "same").await;
327 let d = diff(&store, h, h).await.unwrap();
328 assert!(d.is_empty());
329 }
330
331 #[tokio::test]
332 async fn text_content_change() {
333 let store = MemoryStore::new();
334 let h1 = store_text(&store, "old").await;
335 let h2 = store_text(&store, "new").await;
336 let d = diff(&store, h1, h2).await.unwrap();
337 assert_eq!(d.changes.len(), 1);
338 assert!(matches!(&d.changes[0], NodeChange::TextChanged { .. }));
339 }
340
341 #[tokio::test]
342 async fn attribute_change_detected() {
343 let store = MemoryStore::new();
344 let h1 = ContentHash::from_canonical(b"el1");
345 let h2 = ContentHash::from_canonical(b"el2");
346
347 let mut tx = store.transaction().await.unwrap();
348 tx.put(
349 h1,
350 Object::Element(ElementObject {
351 local_name: "div".into(),
352 namespace_uri: None,
353 namespace_prefix: None,
354 extra_namespaces: vec![],
355 attributes: vec![crate::object::Attribute {
356 local_name: "class".into(),
357 namespace_uri: None,
358 namespace_prefix: None,
359 value: "old".into(),
360 }],
361 children: vec![],
362 inclusive_hash: h1,
363 }),
364 )
365 .await
366 .unwrap();
367 tx.put(
368 h2,
369 Object::Element(ElementObject {
370 local_name: "div".into(),
371 namespace_uri: None,
372 namespace_prefix: None,
373 extra_namespaces: vec![],
374 attributes: vec![crate::object::Attribute {
375 local_name: "class".into(),
376 namespace_uri: None,
377 namespace_prefix: None,
378 value: "new".into(),
379 }],
380 children: vec![],
381 inclusive_hash: h2,
382 }),
383 )
384 .await
385 .unwrap();
386 tx.commit().await.unwrap();
387
388 let d = diff(&store, h1, h2).await.unwrap();
389 assert!(d.changes.iter().any(|c| matches!(
390 c,
391 NodeChange::AttributeChanged {
392 attr,
393 old: Some(_),
394 new: Some(_),
395 ..
396 } if attr == "class"
397 )));
398 }
399}