use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;
use super::types::{ChangeType, DiffEntry, TimeError};
use super::walker::{HistoricalEntry, HistoricalTreeProvider, HistoricalTreeWalker, WalkOptions};
#[derive(Debug, Clone)]
pub struct DiffOptions {
pub include_content: bool,
pub include_metadata: bool,
pub detect_renames: bool,
pub max_depth: i32,
pub change_types: Option<Vec<ChangeType>>,
pub limit: Option<usize>,
}
impl Default for DiffOptions {
fn default() -> Self {
Self {
include_content: true,
include_metadata: true,
detect_renames: true,
max_depth: -1,
change_types: None,
limit: None,
}
}
}
pub struct DiffEngine<'a, P: HistoricalTreeProvider> {
provider: &'a P,
}
impl<'a, P: HistoricalTreeProvider> DiffEngine<'a, P> {
pub fn new(provider: &'a P) -> Self {
Self { provider }
}
pub fn diff(
&self,
path: &str,
from_txg: u64,
to_txg: u64,
options: &DiffOptions,
) -> Result<Vec<DiffEntry>, TimeError> {
let from_walker = HistoricalTreeWalker::new(self.provider, from_txg);
let to_walker = HistoricalTreeWalker::new(self.provider, to_txg);
let walk_options = WalkOptions {
max_depth: options.max_depth,
..Default::default()
};
let from_entries = self.collect_entries(&from_walker, path, &walk_options)?;
let to_entries = self.collect_entries(&to_walker, path, &walk_options)?;
let from_map: BTreeMap<String, HistoricalEntry> = from_entries
.into_iter()
.map(|e| (e.path.clone(), e))
.collect();
let to_map: BTreeMap<String, HistoricalEntry> = to_entries
.into_iter()
.map(|e| (e.path.clone(), e))
.collect();
let mut diffs = Vec::new();
let mut deleted_by_checksum: BTreeMap<[u64; 4], (String, HistoricalEntry)> =
BTreeMap::new();
for (path, from_entry) in &from_map {
if let Some(to_entry) = to_map.get(path) {
if let Some(change) = self.detect_change(from_entry, to_entry, options) {
diffs.push(change);
}
} else {
if options.detect_renames && from_entry.checksum != [0; 4] {
deleted_by_checksum
.insert(from_entry.checksum, (path.clone(), from_entry.clone()));
} else {
diffs.push(DiffEntry {
path: path.clone(),
change_type: ChangeType::Deleted,
old_size: Some(from_entry.size),
new_size: None,
old_checksum: Some(from_entry.checksum),
new_checksum: None,
old_mtime: Some(from_entry.mtime),
new_mtime: None,
txg: to_txg,
});
}
}
}
for (path, to_entry) in &to_map {
if !from_map.contains_key(path) {
if options.detect_renames && to_entry.checksum != [0; 4] {
if let Some((old_path, old_entry)) =
deleted_by_checksum.remove(&to_entry.checksum)
{
diffs.push(DiffEntry {
path: path.clone(),
change_type: ChangeType::Renamed { old_path },
old_size: Some(old_entry.size),
new_size: Some(to_entry.size),
old_checksum: Some(old_entry.checksum),
new_checksum: Some(to_entry.checksum),
old_mtime: Some(old_entry.mtime),
new_mtime: Some(to_entry.mtime),
txg: to_entry.txg,
});
continue;
}
}
diffs.push(DiffEntry {
path: path.clone(),
change_type: ChangeType::Created,
old_size: None,
new_size: Some(to_entry.size),
old_checksum: None,
new_checksum: Some(to_entry.checksum),
old_mtime: None,
new_mtime: Some(to_entry.mtime),
txg: to_entry.txg,
});
}
}
for (_, (path, entry)) in deleted_by_checksum {
diffs.push(DiffEntry {
path,
change_type: ChangeType::Deleted,
old_size: Some(entry.size),
new_size: None,
old_checksum: Some(entry.checksum),
new_checksum: None,
old_mtime: Some(entry.mtime),
new_mtime: None,
txg: to_txg,
});
}
if let Some(ref filter_types) = options.change_types {
diffs.retain(|d| {
filter_types.iter().any(|ct| {
matches!(
(&d.change_type, ct),
(ChangeType::Created, ChangeType::Created)
| (ChangeType::Modified, ChangeType::Modified)
| (ChangeType::Deleted, ChangeType::Deleted)
| (ChangeType::Renamed { .. }, ChangeType::Renamed { .. })
| (ChangeType::MetadataChanged, ChangeType::MetadataChanged)
)
})
});
}
diffs.sort_by(|a, b| a.path.cmp(&b.path));
if let Some(limit) = options.limit {
diffs.truncate(limit);
}
Ok(diffs)
}
fn collect_entries(
&self,
walker: &HistoricalTreeWalker<P>,
path: &str,
options: &WalkOptions,
) -> Result<Vec<HistoricalEntry>, TimeError> {
if !walker.exists(path) {
return Ok(Vec::new());
}
let entry = walker.lookup(path)?;
if entry.is_dir() {
let mut entries = walker.walk(path, options)?;
entries.insert(0, entry);
Ok(entries)
} else {
Ok(vec![entry])
}
}
fn detect_change(
&self,
from: &HistoricalEntry,
to: &HistoricalEntry,
options: &DiffOptions,
) -> Option<DiffEntry> {
if options.include_content && from.checksum != to.checksum {
return Some(DiffEntry {
path: to.path.clone(),
change_type: ChangeType::Modified,
old_size: Some(from.size),
new_size: Some(to.size),
old_checksum: Some(from.checksum),
new_checksum: Some(to.checksum),
old_mtime: Some(from.mtime),
new_mtime: Some(to.mtime),
txg: to.txg,
});
}
if options.include_metadata {
let metadata_changed = from.mode != to.mode
|| from.uid != to.uid
|| from.gid != to.gid
|| from.mtime != to.mtime;
if metadata_changed {
return Some(DiffEntry {
path: to.path.clone(),
change_type: ChangeType::MetadataChanged,
old_size: Some(from.size),
new_size: Some(to.size),
old_checksum: Some(from.checksum),
new_checksum: Some(to.checksum),
old_mtime: Some(from.mtime),
new_mtime: Some(to.mtime),
txg: to.txg,
});
}
}
None
}
pub fn quick_diff(
&self,
path: &str,
from_txg: u64,
to_txg: u64,
) -> Result<DiffSummary, TimeError> {
let from_walker = HistoricalTreeWalker::new(self.provider, from_txg);
let to_walker = HistoricalTreeWalker::new(self.provider, to_txg);
let walk_options = WalkOptions::default();
let from_entries = self.collect_entries(&from_walker, path, &walk_options)?;
let to_entries = self.collect_entries(&to_walker, path, &walk_options)?;
let from_paths: Vec<_> = from_entries.iter().map(|e| e.path.as_str()).collect();
let to_paths: Vec<_> = to_entries.iter().map(|e| e.path.as_str()).collect();
let mut created = 0;
let mut deleted = 0;
let mut modified = 0;
for path in &to_paths {
if !from_paths.contains(path) {
created += 1;
}
}
for path in &from_paths {
if !to_paths.contains(path) {
deleted += 1;
}
}
for from_entry in &from_entries {
if let Some(to_entry) = to_entries.iter().find(|e| e.path == from_entry.path) {
if from_entry.checksum != to_entry.checksum {
modified += 1;
}
}
}
Ok(DiffSummary {
created,
modified,
deleted,
renamed: 0,
metadata_changed: 0,
})
}
}
#[derive(Debug, Clone, Default)]
pub struct DiffSummary {
pub created: usize,
pub modified: usize,
pub deleted: usize,
pub renamed: usize,
pub metadata_changed: usize,
}
impl DiffSummary {
pub fn total(&self) -> usize {
self.created + self.modified + self.deleted + self.renamed + self.metadata_changed
}
pub fn is_empty(&self) -> bool {
self.total() == 0
}
pub fn from_entries(entries: &[DiffEntry]) -> Self {
let mut summary = DiffSummary::default();
for entry in entries {
match &entry.change_type {
ChangeType::Created => summary.created += 1,
ChangeType::Modified => summary.modified += 1,
ChangeType::Deleted => summary.deleted += 1,
ChangeType::Renamed { .. } => summary.renamed += 1,
ChangeType::MetadataChanged => summary.metadata_changed += 1,
}
}
summary
}
}
pub struct IncrementalDiffIterator<'a, P: HistoricalTreeProvider> {
engine: &'a DiffEngine<'a, P>,
from_txg: u64,
to_txg: u64,
path: String,
options: DiffOptions,
current_diffs: Vec<DiffEntry>,
position: usize,
exhausted: bool,
}
impl<'a, P: HistoricalTreeProvider> IncrementalDiffIterator<'a, P> {
pub fn new(
engine: &'a DiffEngine<'a, P>,
path: &str,
from_txg: u64,
to_txg: u64,
options: DiffOptions,
) -> Self {
Self {
engine,
from_txg,
to_txg,
path: path.into(),
options,
current_diffs: Vec::new(),
position: 0,
exhausted: false,
}
}
fn load_batch(&mut self) -> bool {
if self.exhausted {
return false;
}
match self
.engine
.diff(&self.path, self.from_txg, self.to_txg, &self.options)
{
Ok(diffs) => {
self.current_diffs = diffs;
self.position = 0;
self.exhausted = true;
!self.current_diffs.is_empty()
}
Err(_) => {
self.exhausted = true;
false
}
}
}
}
impl<'a, P: HistoricalTreeProvider> Iterator for IncrementalDiffIterator<'a, P> {
type Item = DiffEntry;
fn next(&mut self) -> Option<Self::Item> {
if self.position >= self.current_diffs.len() && !self.load_batch() {
return None;
}
if self.position < self.current_diffs.len() {
let entry = self.current_diffs[self.position].clone();
self.position += 1;
Some(entry)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::super::types::FileType;
use super::super::walker::InMemoryTreeProvider;
use super::*;
fn create_entry(
path: &str,
name: &str,
file_type: FileType,
size: u64,
txg: u64,
checksum: [u64; 4],
) -> HistoricalEntry {
HistoricalEntry {
name: name.into(),
path: path.into(),
object_id: path.len() as u64,
parent_id: 1,
file_type,
size,
mode: 0o644,
uid: 1000,
gid: 1000,
atime: txg * 1000,
mtime: txg * 1000,
ctime: txg * 1000,
txg,
checksum,
nlinks: 1,
blocks: size.div_ceil(512),
generation: txg,
}
}
fn create_test_provider() -> InMemoryTreeProvider {
let mut provider = InMemoryTreeProvider::new();
provider.add_entry(
100,
create_entry("/", "", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry("/data", "data", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry(
"/data/file1.txt",
"file1.txt",
FileType::Regular,
100,
100,
[1, 0, 0, 0],
),
);
provider.add_entry(
100,
create_entry(
"/data/file2.txt",
"file2.txt",
FileType::Regular,
200,
100,
[2, 0, 0, 0],
),
);
provider.add_entry(
200,
create_entry("/", "", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry("/data", "data", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry(
"/data/file1.txt",
"file1.txt",
FileType::Regular,
150,
200,
[10, 0, 0, 0], ),
);
provider.add_entry(
200,
create_entry(
"/data/file3.txt",
"file3.txt",
FileType::Regular,
300,
200,
[3, 0, 0, 0],
),
);
provider
}
#[test]
fn test_diff_basic() {
let provider = create_test_provider();
let engine = DiffEngine::new(&provider);
let diffs = engine
.diff("/data", 100, 200, &DiffOptions::default())
.unwrap();
let created: Vec<_> = diffs
.iter()
.filter(|d| matches!(d.change_type, ChangeType::Created))
.collect();
assert_eq!(created.len(), 1);
assert_eq!(created[0].path, "/data/file3.txt");
let modified: Vec<_> = diffs
.iter()
.filter(|d| matches!(d.change_type, ChangeType::Modified))
.collect();
assert_eq!(modified.len(), 1);
assert_eq!(modified[0].path, "/data/file1.txt");
}
#[test]
fn test_diff_rename_detection() {
let mut provider = InMemoryTreeProvider::new();
provider.add_entry(
100,
create_entry("/", "", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry("/data", "data", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry(
"/data/file.txt",
"file.txt",
FileType::Regular,
100,
100,
[42, 42, 42, 42],
),
);
provider.add_entry(
200,
create_entry("/", "", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry("/data", "data", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry(
"/data/copy.txt",
"copy.txt",
FileType::Regular,
100,
200,
[42, 42, 42, 42], ),
);
let engine = DiffEngine::new(&provider);
let options = DiffOptions {
detect_renames: true,
..Default::default()
};
let diffs = engine.diff("/data", 100, 200, &options).unwrap();
let created: Vec<_> = diffs
.iter()
.filter(|d| matches!(d.change_type, ChangeType::Created))
.collect();
assert_eq!(created.len(), 1);
assert_eq!(created[0].path, "/data/copy.txt");
}
#[test]
fn test_diff_filter_change_types() {
let provider = create_test_provider();
let engine = DiffEngine::new(&provider);
let options = DiffOptions {
change_types: Some(vec![ChangeType::Created]),
..Default::default()
};
let diffs = engine.diff("/data", 100, 200, &options).unwrap();
assert!(
diffs
.iter()
.all(|d| matches!(d.change_type, ChangeType::Created))
);
}
#[test]
fn test_diff_limit() {
let provider = create_test_provider();
let engine = DiffEngine::new(&provider);
let options = DiffOptions {
limit: Some(1),
..Default::default()
};
let diffs = engine.diff("/data", 100, 200, &options).unwrap();
assert_eq!(diffs.len(), 1);
}
#[test]
fn test_quick_diff() {
let provider = create_test_provider();
let engine = DiffEngine::new(&provider);
let summary = engine.quick_diff("/data", 100, 200).unwrap();
assert_eq!(summary.created, 1);
assert_eq!(summary.modified, 1);
}
#[test]
fn test_diff_summary() {
let entries = vec![
DiffEntry {
path: "/a".into(),
change_type: ChangeType::Created,
old_size: None,
new_size: Some(100),
old_checksum: None,
new_checksum: Some([1; 4]),
old_mtime: None,
new_mtime: Some(1000),
txg: 200,
},
DiffEntry {
path: "/b".into(),
change_type: ChangeType::Modified,
old_size: Some(50),
new_size: Some(100),
old_checksum: Some([1; 4]),
new_checksum: Some([2; 4]),
old_mtime: Some(500),
new_mtime: Some(1000),
txg: 200,
},
DiffEntry {
path: "/c".into(),
change_type: ChangeType::Deleted,
old_size: Some(100),
new_size: None,
old_checksum: Some([3; 4]),
new_checksum: None,
old_mtime: Some(500),
new_mtime: None,
txg: 200,
},
];
let summary = DiffSummary::from_entries(&entries);
assert_eq!(summary.created, 1);
assert_eq!(summary.modified, 1);
assert_eq!(summary.deleted, 1);
assert_eq!(summary.total(), 3);
}
#[test]
fn test_diff_no_changes() {
let mut provider = InMemoryTreeProvider::new();
provider.add_entry(
100,
create_entry("/", "", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry("/data", "data", FileType::Directory, 0, 100, [0; 4]),
);
provider.add_entry(
100,
create_entry(
"/data/file.txt",
"file.txt",
FileType::Regular,
100,
100,
[1; 4],
),
);
provider.add_entry(
200,
create_entry("/", "", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry("/data", "data", FileType::Directory, 0, 200, [0; 4]),
);
provider.add_entry(
200,
create_entry(
"/data/file.txt",
"file.txt",
FileType::Regular,
100,
200,
[1; 4], ),
);
let engine = DiffEngine::new(&provider);
let options = DiffOptions {
include_metadata: false,
..Default::default()
};
let diffs = engine.diff("/data", 100, 200, &options).unwrap();
assert!(diffs.is_empty());
}
#[test]
fn test_incremental_iterator() {
let provider = create_test_provider();
let engine = DiffEngine::new(&provider);
let iter = IncrementalDiffIterator::new(&engine, "/data", 100, 200, DiffOptions::default());
let diffs: Vec<_> = iter.collect();
assert_eq!(diffs.len(), 3);
}
}