use anyhow::{bail, Result};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use crate::domain::path::canonicalize_scope;
use crate::domain::scope::ScopeMatch;
use crate::domain::source::Source;
use crate::expr::filter::{self, Filter};
use crate::repo::{self, Db};
pub struct CompareOptions {
pub include_excluded: bool,
pub verbose: bool,
}
pub fn run(
db: &mut Db,
path_a: &Path,
path_b: &Path,
filter_strs: &[String],
options: &CompareOptions,
) -> Result<bool> {
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_a = canonicalize_scope(Some(path_a))?;
let scope_b = canonicalize_scope(Some(path_b))?;
let Some(ref prefix_a) = scope_a else {
bail!("Path A does not exist: {}", path_a.display());
};
let Some(ref prefix_b) = scope_b else {
bail!("Path B does not exist: {}", path_b.display());
};
let conn = db.conn_mut();
let (sources_a, unhashed_a) =
get_sources_in_scope(conn, prefix_a, &filters, options.include_excluded)?;
let (sources_b, unhashed_b) =
get_sources_in_scope(conn, prefix_b, &filters, options.include_excluded)?;
let objects_a: HashSet<i64> = sources_a.keys().copied().collect();
let objects_b: HashSet<i64> = sources_b.keys().copied().collect();
let in_both: HashSet<i64> = objects_a.intersection(&objects_b).copied().collect();
let only_in_a: HashSet<i64> = objects_a.difference(&objects_b).copied().collect();
let only_in_b: HashSet<i64> = objects_b.difference(&objects_a).copied().collect();
println!("Comparing:");
println!(" A: {prefix_a}");
println!(" B: {prefix_b}");
println!();
let total_unhashed = unhashed_a + unhashed_b;
if total_unhashed > 0 {
eprintln!(
"Skipped {total_unhashed} unhashed files (use `canon worklist` to hash them)"
);
eprintln!();
}
let is_identical = only_in_a.is_empty() && only_in_b.is_empty();
println!("Files in both (by content): {}", in_both.len());
println!("Only in A: {}", only_in_a.len());
if options.verbose && !only_in_a.is_empty() {
let mut paths: Vec<&str> = only_in_a
.iter()
.filter_map(|oid| sources_a.get(oid))
.map(|s| s.as_str())
.collect();
paths.sort();
for path in paths {
println!(" {path}");
}
}
println!("Only in B: {}", only_in_b.len());
if options.verbose && !only_in_b.is_empty() {
let mut paths: Vec<&str> = only_in_b
.iter()
.filter_map(|oid| sources_b.get(oid))
.map(|s| s.as_str())
.collect();
paths.sort();
for path in paths {
println!(" {path}");
}
}
Ok(is_identical)
}
fn get_sources_in_scope(
conn: &mut crate::repo::Connection,
scope_prefix: &str,
filters: &[Filter],
include_excluded: bool,
) -> Result<(HashMap<i64, String>, usize)> {
let scopes = ScopeMatch::classify_all(&[scope_prefix.to_string()]);
let root_ids: Vec<i64> = conn
.prepare("SELECT id FROM roots")?
.query_map([], |row| row.get(0))?
.collect::<Result<Vec<_>, _>>()?;
let all_sources = repo::source::batch_fetch_by_roots(conn, &root_ids)?;
let filtered: Vec<Source> = all_sources
.into_iter()
.filter(|s| s.is_active())
.filter(|s| s.matches_scope(&scopes))
.filter(|s| include_excluded || !s.is_excluded())
.collect();
let final_sources = if filters.is_empty() {
filtered
} else {
let source_ids: Vec<i64> = filtered.iter().map(|s| s.id).collect();
let filtered_ids: HashSet<i64> = filter::apply_filters(conn, &source_ids, filters)?
.into_iter()
.collect();
filtered
.into_iter()
.filter(|s| filtered_ids.contains(&s.id))
.collect()
};
let mut result: HashMap<i64, String> = HashMap::new();
let mut unhashed = 0;
for source in final_sources {
match source.object_id {
Some(oid) => {
result.entry(oid).or_insert_with(|| source.path());
}
None => {
unhashed += 1;
}
}
}
Ok((result, unhashed))
}