use std::collections::HashSet;
use clayers_xml::ContentHash;
use crate::error::{Error, Result};
use crate::object::{Object, TreeObject};
use crate::store::ObjectStore;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TreeDiff {
pub changes: Vec<NodeChange>,
}
impl TreeDiff {
#[must_use]
pub fn is_empty(&self) -> bool {
self.changes.is_empty()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeChange {
Added {
parent: ContentHash,
position: usize,
node: ContentHash,
},
Removed {
parent: ContentHash,
position: usize,
node: ContentHash,
},
Modified {
hash_before: ContentHash,
hash_after: ContentHash,
inner: Box<TreeDiff>,
},
AttributeChanged {
element: ContentHash,
attr: String,
old: Option<String>,
new: Option<String>,
},
TextChanged {
old: ContentHash,
new: ContentHash,
},
}
pub async fn diff(
store: &dyn ObjectStore,
a: ContentHash,
b: ContentHash,
) -> Result<TreeDiff> {
if a == b {
return Ok(TreeDiff {
changes: Vec::new(),
});
}
let obj_a = store.get(&a).await?.ok_or(Error::NotFound(a))?;
let obj_b = store.get(&b).await?.ok_or(Error::NotFound(b))?;
let mut changes = Vec::new();
match (&obj_a, &obj_b) {
(Object::Text(_), Object::Text(_)) => {
changes.push(NodeChange::TextChanged { old: a, new: b });
}
(Object::Element(el_a), Object::Element(el_b)) => {
diff_attributes(&mut changes, b, el_a, el_b);
diff_children(store, &mut changes, a, &el_a.children, &el_b.children).await?;
}
_ => {
changes.push(NodeChange::Removed {
parent: a,
position: 0,
node: a,
});
changes.push(NodeChange::Added {
parent: b,
position: 0,
node: b,
});
}
}
Ok(TreeDiff { changes })
}
fn diff_attributes(
changes: &mut Vec<NodeChange>,
element_hash: ContentHash,
el_a: &crate::object::ElementObject,
el_b: &crate::object::ElementObject,
) {
for attr_a in &el_a.attributes {
let matching = el_b.attributes.iter().find(|ab| {
ab.local_name == attr_a.local_name && ab.namespace_uri == attr_a.namespace_uri
});
match matching {
Some(attr_b) if attr_b.value != attr_a.value => {
changes.push(NodeChange::AttributeChanged {
element: element_hash,
attr: attr_a.local_name.clone(),
old: Some(attr_a.value.clone()),
new: Some(attr_b.value.clone()),
});
}
None => {
changes.push(NodeChange::AttributeChanged {
element: element_hash,
attr: attr_a.local_name.clone(),
old: Some(attr_a.value.clone()),
new: None,
});
}
_ => {}
}
}
for attr_b in &el_b.attributes {
let exists_in_a = el_a.attributes.iter().any(|aa| {
aa.local_name == attr_b.local_name && aa.namespace_uri == attr_b.namespace_uri
});
if !exists_in_a {
changes.push(NodeChange::AttributeChanged {
element: element_hash,
attr: attr_b.local_name.clone(),
old: None,
new: Some(attr_b.value.clone()),
});
}
}
}
async fn diff_children(
store: &dyn ObjectStore,
changes: &mut Vec<NodeChange>,
parent: ContentHash,
children_a: &[ContentHash],
children_b: &[ContentHash],
) -> Result<()> {
let len_a = children_a.len();
let len_b = children_b.len();
let min_len = len_a.min(len_b);
for i in 0..min_len {
if children_a[i] != children_b[i] {
let inner = Box::pin(diff(store, children_a[i], children_b[i])).await?;
changes.push(NodeChange::Modified {
hash_before: children_a[i],
hash_after: children_b[i],
inner: Box::new(inner),
});
}
}
for (i, child) in children_a.iter().enumerate().skip(min_len) {
changes.push(NodeChange::Removed {
parent,
position: i,
node: *child,
});
}
for (i, child) in children_b.iter().enumerate().skip(min_len) {
changes.push(NodeChange::Added {
parent,
position: i,
node: *child,
});
}
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "snake_case"))]
pub enum FileChange {
Added {
path: String,
document: ContentHash,
},
Removed {
path: String,
document: ContentHash,
},
Modified {
path: String,
old_doc: ContentHash,
new_doc: ContentHash,
},
}
#[must_use]
pub fn diff_trees(tree_a: &TreeObject, tree_b: &TreeObject) -> Vec<FileChange> {
let mut changes = Vec::new();
let paths_a: HashSet<&str> = tree_a.entries.iter().map(|e| e.path.as_str()).collect();
for entry in &tree_a.entries {
if let Some(entry_b) = tree_b.entries.iter().find(|e| e.path == entry.path) {
if entry.document != entry_b.document {
changes.push(FileChange::Modified {
path: entry.path.clone(),
old_doc: entry.document,
new_doc: entry_b.document,
});
}
} else {
changes.push(FileChange::Removed {
path: entry.path.clone(),
document: entry.document,
});
}
}
for entry in &tree_b.entries {
if !paths_a.contains(entry.path.as_str()) {
changes.push(FileChange::Added {
path: entry.path.clone(),
document: entry.document,
});
}
}
changes
}
#[cfg(test)]
mod tests {
use super::*;
use crate::object::{ElementObject, TextObject};
use crate::store::memory::MemoryStore;
async fn store_text(store: &MemoryStore, text: &str) -> ContentHash {
let hash = ContentHash::from_canonical(text.as_bytes());
let mut tx = store.transaction().await.unwrap();
tx.put(
hash,
Object::Text(TextObject {
content: text.into(),
}),
)
.await
.unwrap();
tx.commit().await.unwrap();
hash
}
#[tokio::test]
async fn identical_hashes_no_changes() {
let store = MemoryStore::new();
let h = store_text(&store, "same").await;
let d = diff(&store, h, h).await.unwrap();
assert!(d.is_empty());
}
#[tokio::test]
async fn text_content_change() {
let store = MemoryStore::new();
let h1 = store_text(&store, "old").await;
let h2 = store_text(&store, "new").await;
let d = diff(&store, h1, h2).await.unwrap();
assert_eq!(d.changes.len(), 1);
assert!(matches!(&d.changes[0], NodeChange::TextChanged { .. }));
}
#[tokio::test]
async fn attribute_change_detected() {
let store = MemoryStore::new();
let h1 = ContentHash::from_canonical(b"el1");
let h2 = ContentHash::from_canonical(b"el2");
let mut tx = store.transaction().await.unwrap();
tx.put(
h1,
Object::Element(ElementObject {
local_name: "div".into(),
namespace_uri: None,
namespace_prefix: None,
extra_namespaces: vec![],
attributes: vec![crate::object::Attribute {
local_name: "class".into(),
namespace_uri: None,
namespace_prefix: None,
value: "old".into(),
}],
children: vec![],
inclusive_hash: h1,
}),
)
.await
.unwrap();
tx.put(
h2,
Object::Element(ElementObject {
local_name: "div".into(),
namespace_uri: None,
namespace_prefix: None,
extra_namespaces: vec![],
attributes: vec![crate::object::Attribute {
local_name: "class".into(),
namespace_uri: None,
namespace_prefix: None,
value: "new".into(),
}],
children: vec![],
inclusive_hash: h2,
}),
)
.await
.unwrap();
tx.commit().await.unwrap();
let d = diff(&store, h1, h2).await.unwrap();
assert!(d.changes.iter().any(|c| matches!(
c,
NodeChange::AttributeChanged {
attr,
old: Some(_),
new: Some(_),
..
} if attr == "class"
)));
}
}