use anyhow::{Context, Result};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::domain::exclusion::find_excludable_duplicates;
use crate::domain::path::canonicalize_scopes;
use crate::domain::root::find_containing_root;
use crate::domain::scope::ScopeMatch;
use crate::expr::filter::{self, Filter};
use crate::repo::{self, Connection, Db};
pub struct SetOptions {
pub dry_run: bool,
pub verbose: bool,
}
pub struct ClearOptions {
pub dry_run: bool,
}
pub fn set(
db: &mut Db,
scope_paths: &[PathBuf],
filter_strs: &[String],
options: &SetOptions,
) -> Result<()> {
let conn = db.conn_mut();
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_prefixes = canonicalize_scopes(scope_paths)?;
let source_ids = get_matching_sources(conn, &scope_prefixes, &filters, false)?;
let sources_map = repo::source::batch_fetch_by_ids(conn, &source_ids)?;
let to_exclude: Vec<i64> = source_ids
.into_iter()
.filter(|id| {
sources_map
.get(id)
.map(|s| !s.is_excluded())
.unwrap_or(false)
})
.collect();
if to_exclude.is_empty() {
println!("No sources to exclude (0 matching non-excluded sources)");
return Ok(());
}
if options.dry_run {
println!("Would exclude {} sources:", to_exclude.len());
for &id in &to_exclude {
if let Some(path) = get_source_path(conn, id)? {
println!(" {path}");
}
}
return Ok(());
}
for source_id in &to_exclude {
repo::source::set_excluded(conn, *source_id, true)?;
}
println!("Excluded {} sources", to_exclude.len());
Ok(())
}
pub fn clear(
db: &mut Db,
scope_paths: &[PathBuf],
filter_strs: &[String],
options: &ClearOptions,
) -> Result<()> {
let conn = db.conn_mut();
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_prefixes = canonicalize_scopes(scope_paths)?;
let excluded_sources = get_excluded_sources(conn, &scope_prefixes, &filters)?;
if excluded_sources.is_empty() {
println!("No excluded sources match the given filters");
return Ok(());
}
if options.dry_run {
println!(
"Would clear exclusions for {} sources:",
excluded_sources.len()
);
for (_, path) in &excluded_sources {
println!(" {path}");
}
return Ok(());
}
for (source_id, _) in &excluded_sources {
repo::source::set_excluded(conn, *source_id, false)?;
}
println!("Cleared exclusions for {} sources", excluded_sources.len());
Ok(())
}
pub fn list(db: &mut Db, scope_paths: &[PathBuf], filter_strs: &[String]) -> Result<()> {
let conn = db.conn_mut();
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_prefixes = canonicalize_scopes(scope_paths)?;
let direct_excluded = get_excluded_sources(conn, &scope_prefixes, &filters)?;
let object_excluded = get_object_excluded_sources(conn, &scope_prefixes, &filters)?;
if direct_excluded.is_empty() && object_excluded.is_empty() {
println!("No excluded sources match the given filters");
return Ok(());
}
if !direct_excluded.is_empty() {
println!("Directly excluded ({}):", direct_excluded.len());
for (id, path) in &direct_excluded {
println!(" {path} (id: {id})");
}
}
if !object_excluded.is_empty() {
if !direct_excluded.is_empty() {
println!();
}
println!("Excluded via object ({}):", object_excluded.len());
for (id, path, hash_short) in &object_excluded {
println!(" {path} (id: {id}, object: {hash_short}...)");
}
}
Ok(())
}
fn get_matching_sources(
conn: &mut Connection,
scope_prefixes: &[String],
filters: &[Filter],
include_excluded: bool,
) -> Result<Vec<i64>> {
let roots = repo::root::fetch_all(conn)?;
let source_root_ids: Vec<i64> = roots
.iter()
.filter(|r| r.is_active() && r.is_source())
.map(|r| r.id)
.collect();
if source_root_ids.is_empty() {
return Ok(Vec::new());
}
let sources = repo::source::batch_fetch_by_roots(conn, &source_root_ids)?;
let scopes = ScopeMatch::classify_all(scope_prefixes);
let filtered: Vec<i64> = sources
.into_iter()
.filter(|s| scopes.is_empty() || s.matches_scope(&scopes))
.filter(|s| include_excluded || !s.is_excluded())
.map(|s| s.id)
.collect();
if filters.is_empty() {
return Ok(filtered);
}
filter::apply_filters(conn, &filtered, filters)
}
fn get_excluded_sources(
conn: &mut Connection,
scope_prefixes: &[String],
filters: &[Filter],
) -> Result<Vec<(i64, String)>> {
let roots = repo::root::fetch_all(conn)?;
let source_root_ids: Vec<i64> = roots
.iter()
.filter(|r| r.is_active() && r.is_source())
.map(|r| r.id)
.collect();
if source_root_ids.is_empty() {
return Ok(Vec::new());
}
let sources = repo::source::batch_fetch_by_roots(conn, &source_root_ids)?;
let scopes = ScopeMatch::classify_all(scope_prefixes);
let filtered: Vec<(i64, String)> = sources
.into_iter()
.filter(|s| scopes.is_empty() || s.matches_scope(&scopes))
.filter(|s| s.excluded) .map(|s| (s.id, s.path()))
.collect();
if filters.is_empty() {
return Ok(filtered);
}
let ids: Vec<i64> = filtered.iter().map(|(id, _)| *id).collect();
let filtered_ids: std::collections::HashSet<i64> = filter::apply_filters(conn, &ids, filters)?
.into_iter()
.collect();
Ok(filtered
.into_iter()
.filter(|(id, _)| filtered_ids.contains(id))
.collect())
}
fn get_source_path(conn: &Connection, source_id: i64) -> Result<Option<String>> {
let sources = repo::source::batch_fetch_by_ids(conn, &[source_id])?;
Ok(sources.get(&source_id).map(|s| s.path()))
}
pub fn set_by_id(db: &Db, source_id: i64, options: &SetOptions) -> Result<()> {
let conn = db.conn();
let sources = repo::source::batch_fetch_by_ids(conn, &[source_id])?;
let Some(source) = sources.get(&source_id) else {
anyhow::bail!("Source with id {source_id} not found or not present");
};
let path = source.path();
if source.is_excluded() {
println!("Source already excluded: {path}");
return Ok(());
}
if options.dry_run {
println!("Would exclude source (id: {source_id}):");
println!(" {path}");
return Ok(());
}
repo::source::set_excluded(conn, source_id, true)?;
println!("Excluded source (id: {source_id}): {path}");
Ok(())
}
pub fn set_by_path(db: &Db, file_path: &Path, options: &SetOptions) -> Result<()> {
let conn = db.conn();
let canonical = std::fs::canonicalize(file_path)
.with_context(|| format!("Failed to resolve path: {}", file_path.display()))?;
let path_str = canonical
.to_str()
.ok_or_else(|| anyhow::anyhow!("Path contains invalid UTF-8"))?;
let roots = repo::root::fetch_all(conn)?;
let Some((root_id, _root_path, _role, rel_path)) = find_containing_root(path_str, &roots)
else {
anyhow::bail!("No source found for path: {}", file_path.display());
};
let Some(source) = repo::source::fetch_by_path(conn, root_id, &rel_path)? else {
anyhow::bail!("No source found for path: {}", file_path.display());
};
let display_path = source.path();
if source.is_excluded() {
println!("Source already excluded: {display_path}");
return Ok(());
}
if options.dry_run {
println!("Would exclude:");
println!(" {display_path}");
return Ok(());
}
repo::source::set_excluded(conn, source.id, true)?;
println!("Excluded: {display_path}");
Ok(())
}
pub fn exclude_duplicates(
db: &mut Db,
prefer_path: &Path,
scope_path: Option<&Path>,
filter_strs: &[String],
dry_run: bool,
) -> Result<()> {
let conn = db.conn_mut();
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_prefixes: Vec<String> = if let Some(p) = scope_path {
vec![std::fs::canonicalize(p)
.map(|cp| cp.to_string_lossy().to_string())
.unwrap_or_else(|_| p.to_string_lossy().to_string())]
} else {
vec![]
};
let prefer_prefix = std::fs::canonicalize(prefer_path)
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| prefer_path.to_string_lossy().to_string());
let source_ids = get_matching_sources(conn, &scope_prefixes, &filters, false)?;
if source_ids.is_empty() {
println!("No sources match the given filters.");
return Ok(());
}
let scope_sources_map = repo::source::batch_fetch_by_ids(conn, &source_ids)?;
let scope_sources: Vec<_> = source_ids
.iter()
.filter_map(|id| scope_sources_map.get(id).cloned())
.collect();
let object_ids: Vec<i64> = scope_sources
.iter()
.filter_map(|s| s.object_id)
.collect::<HashSet<_>>()
.into_iter()
.collect();
let sources_by_object = repo::source::fetch_sources_by_object_ids(conn, &object_ids)?;
let result = find_excludable_duplicates(&scope_sources, &sources_by_object, &prefer_prefix);
let to_exclude_with_paths: Vec<(i64, String)> = result
.to_exclude
.iter()
.filter_map(|id| scope_sources_map.get(id).map(|s| (*id, s.path())))
.collect();
println!(
"Sources in scope: {} ({} unhashed skipped)",
source_ids.len(),
result.skipped_no_hash
);
println!(" Will exclude: {}", to_exclude_with_paths.len());
println!(
" Skipped (no copy in --prefer): {}",
result.skipped_not_covered
);
println!(
" Skipped (multiple copies in --prefer): {}",
result.skipped_multiple
);
if result.skipped_in_prefer > 0 {
println!(
" Skipped (already in --prefer): {}",
result.skipped_in_prefer
);
}
println!();
if to_exclude_with_paths.is_empty() {
println!("Nothing to exclude.");
return Ok(());
}
if dry_run {
println!("Would exclude {} sources:", to_exclude_with_paths.len());
for (_, path) in &to_exclude_with_paths {
println!(" {path}");
}
println!();
println!("Use `canon ls --duplicates` to see remaining duplicates.");
return Ok(());
}
let mut excluded_count = 0;
for (source_id, _) in &to_exclude_with_paths {
if let Some(source) = scope_sources_map.get(source_id) {
if source.is_excluded() {
continue;
}
}
repo::source::set_excluded(conn, *source_id, true)?;
excluded_count += 1;
}
println!("Excluded {excluded_count} sources");
println!();
println!("Use `canon ls --duplicates` to see remaining duplicates.");
Ok(())
}
pub fn set_object_by_hash(db: &Db, hash: &str, options: &SetOptions) -> Result<()> {
let conn = db.conn();
let Some(object) = repo::object::fetch_by_hash(conn, hash)? else {
anyhow::bail!("No object found with hash: {hash}");
};
exclude_object_by_id(conn, object.id, &object.hash_value, options)
}
pub fn set_object_by_file(db: &Db, file_path: &Path, options: &SetOptions) -> Result<()> {
let conn = db.conn();
let canonical = std::fs::canonicalize(file_path)
.with_context(|| format!("Failed to resolve path: {}", file_path.display()))?;
let path_str = canonical
.to_str()
.ok_or_else(|| anyhow::anyhow!("Path contains invalid UTF-8"))?;
let roots = repo::root::fetch_all(conn)?;
let Some((root_id, _root_path, _role, rel_path)) = find_containing_root(path_str, &roots)
else {
anyhow::bail!(
"No hashed source found for path: {}\n (File must be scanned and hashed first)",
file_path.display()
);
};
let Some(source) = repo::source::fetch_by_path(conn, root_id, &rel_path)? else {
anyhow::bail!(
"No hashed source found for path: {}\n (File must be scanned and hashed first)",
file_path.display()
);
};
let Some(object_id) = source.object_id else {
anyhow::bail!(
"No hashed source found for path: {}\n (File must be scanned and hashed first)",
file_path.display()
);
};
let objects = repo::object::batch_fetch_by_ids(conn, &[object_id])?;
let Some(object) = objects.get(&object_id) else {
anyhow::bail!(
"No hashed source found for path: {}\n (File must be scanned and hashed first)",
file_path.display()
);
};
if source.size == 0 {
anyhow::bail!(
"Cannot exclude empty file via path (all empty files share the same hash).\n \
Use --hash {} to explicitly exclude all empty files.",
object.hash_value
);
}
exclude_object_by_id(conn, object_id, &object.hash_value, options)
}
pub fn set_objects_by_filter(
db: &mut Db,
scope_paths: &[PathBuf],
filter_strs: &[String],
options: &SetOptions,
) -> Result<()> {
let conn = db.conn_mut();
let filters: Vec<Filter> = filter_strs
.iter()
.map(|f| Filter::parse(f))
.collect::<Result<Vec<_>>>()?;
let scope_prefixes = canonicalize_scopes(scope_paths)?;
let source_ids = get_matching_sources(conn, &scope_prefixes, &filters, true)?;
if source_ids.is_empty() {
println!("No sources match the given filters.");
return Ok(());
}
let sources_map = repo::source::batch_fetch_by_ids(conn, &source_ids)?;
let mut object_ids_to_check: Vec<i64> = Vec::new();
let mut seen_objects: HashSet<i64> = HashSet::new();
let mut no_hash = 0;
let mut empty_skipped = 0;
for source_id in &source_ids {
let Some(source) = sources_map.get(source_id) else {
continue;
};
let Some(object_id) = source.object_id else {
no_hash += 1;
continue;
};
if seen_objects.contains(&object_id) {
continue;
}
seen_objects.insert(object_id);
if source.size == 0 {
empty_skipped += 1;
continue;
}
object_ids_to_check.push(object_id);
}
if object_ids_to_check.is_empty() {
println!("No objects to exclude.");
if no_hash > 0 {
println!(" {no_hash} sources have no hash yet");
}
if empty_skipped > 0 {
println!(
" {empty_skipped} empty files skipped (use --hash to exclude explicitly)"
);
}
return Ok(());
}
let objects_map = repo::object::batch_fetch_by_ids(conn, &object_ids_to_check)?;
let sources_by_object = repo::source::fetch_sources_by_object_ids(conn, &object_ids_to_check)?;
let mut objects_to_exclude: Vec<(i64, String, i64)> = Vec::new(); let mut already_excluded = 0;
for object_id in &object_ids_to_check {
let Some(object) = objects_map.get(object_id) else {
continue;
};
if object.is_excluded() {
already_excluded += 1;
continue;
}
let source_count = sources_by_object
.get(object_id)
.map(|sources| sources.len() as i64)
.unwrap_or(0);
objects_to_exclude.push((*object_id, object.hash_value.clone(), source_count));
}
if objects_to_exclude.is_empty() {
println!("No objects to exclude.");
if no_hash > 0 {
println!(" {no_hash} sources have no hash yet");
}
if empty_skipped > 0 {
println!(
" {empty_skipped} empty files skipped (use --hash to exclude explicitly)"
);
}
if already_excluded > 0 {
println!(" {already_excluded} objects already excluded");
}
return Ok(());
}
let mut all_sources: Vec<(i64, String, Vec<SourceInfo>)> = Vec::new(); let mut total_source_count = 0;
let mut total_archive_count = 0;
for (object_id, hash, _) in &objects_to_exclude {
let sources = get_object_sources(conn, *object_id)?;
let archive_count = sources.iter().filter(|s| s.is_archive).count();
total_archive_count += archive_count;
total_source_count += sources.len();
all_sources.push((*object_id, hash.clone(), sources));
}
let total_in_source_roots = total_source_count - total_archive_count;
if options.dry_run {
println!(
"Would exclude {} objects affecting {} sources ({} in source roots, {} in archives):",
objects_to_exclude.len(),
total_source_count,
total_in_source_roots,
total_archive_count
);
for (_, hash, sources) in &all_sources {
let archive_count = sources.iter().filter(|s| s.is_archive).count();
let src_count = sources.len() - archive_count;
println!(
" {}... ({} source, {} archive)",
&hash[..16.min(hash.len())],
src_count,
archive_count
);
if options.verbose {
for source in sources {
let marker = if source.is_archive { " (archive)" } else { "" };
println!(" {}{}", source.path, marker);
}
}
}
if no_hash > 0 {
println!("\n {no_hash} sources skipped (no hash)");
}
if empty_skipped > 0 {
println!(
" {empty_skipped} empty files skipped (use --hash to exclude explicitly)"
);
}
if already_excluded > 0 {
println!(" {already_excluded} objects already excluded");
}
println!("\nUse --yes to execute.");
return Ok(());
}
for (object_id, _, _) in &all_sources {
repo::object::set_excluded(conn, *object_id, true)?;
}
println!(
"Excluded {} objects affecting {} sources ({} in source roots, {} in archives)",
all_sources.len(),
total_source_count,
total_in_source_roots,
total_archive_count
);
Ok(())
}
struct SourceInfo {
path: String,
is_archive: bool,
}
fn get_object_sources(conn: &Connection, object_id: i64) -> Result<Vec<SourceInfo>> {
let sources_map = repo::source::fetch_sources_by_object_ids(conn, &[object_id])?;
let mut sources: Vec<_> = sources_map.get(&object_id).cloned().unwrap_or_default();
sources.sort_by(|a, b| {
b.root_role
.cmp(&a.root_role) .then_with(|| a.root_path.cmp(&b.root_path))
.then_with(|| a.rel_path.cmp(&b.rel_path))
});
Ok(sources
.into_iter()
.map(|s| SourceInfo {
path: s.path(),
is_archive: s.is_from_role("archive"),
})
.collect())
}
fn print_source_locations(sources: &[SourceInfo], verbose: bool) {
let archive_count = sources.iter().filter(|s| s.is_archive).count();
let source_count = sources.len() - archive_count;
println!(
" Sources: {source_count} in source roots, {archive_count} in archive roots"
);
const DEFAULT_LIMIT: usize = 3;
let show_count = if verbose {
sources.len()
} else {
DEFAULT_LIMIT
};
let truncated = sources.len() > show_count && !verbose;
for source in sources.iter().take(show_count) {
let marker = if source.is_archive { " (archive)" } else { "" };
println!(" {}{}", source.path, marker);
}
if truncated {
println!(
" ... and {} more (use --verbose to show all)",
sources.len() - show_count
);
}
}
fn exclude_object_by_id(
conn: &Connection,
object_id: i64,
hash_value: &str,
options: &SetOptions,
) -> Result<()> {
let objects = repo::object::batch_fetch_by_ids(conn, &[object_id])?;
if let Some(object) = objects.get(&object_id) {
if object.is_excluded() {
println!(
"Object already excluded: {}...",
&hash_value[..16.min(hash_value.len())]
);
return Ok(());
}
}
let sources = get_object_sources(conn, object_id)?;
if options.dry_run {
println!(
"Would exclude object: {}...",
&hash_value[..16.min(hash_value.len())]
);
print_source_locations(&sources, options.verbose);
println!("\nUse --yes to execute.");
return Ok(());
}
repo::object::set_excluded(conn, object_id, true)?;
println!(
"Excluded object: {}...",
&hash_value[..16.min(hash_value.len())]
);
print_source_locations(&sources, options.verbose);
Ok(())
}
pub fn clear_object(db: &Db, hash: &str, options: &ClearOptions) -> Result<()> {
let conn = db.conn();
let Some(object) = repo::object::fetch_by_hash(conn, hash)? else {
anyhow::bail!("No object found with hash: {hash}");
};
if !object.is_excluded() {
println!(
"Object is not excluded: {}...",
&object.hash_value[..16.min(object.hash_value.len())]
);
return Ok(());
}
if options.dry_run {
println!(
"Would clear exclusion from object: {}...",
&object.hash_value[..16.min(object.hash_value.len())]
);
return Ok(());
}
repo::object::set_excluded(conn, object.id, false)?;
println!(
"Cleared exclusion from object: {}...",
&object.hash_value[..16.min(object.hash_value.len())]
);
Ok(())
}
pub fn list_objects(db: &Db) -> Result<()> {
let conn = db.conn();
let excluded = repo::object::fetch_excluded(conn)?;
if excluded.is_empty() {
println!("No excluded objects");
return Ok(());
}
let object_ids: Vec<i64> = excluded.iter().map(|o| o.id).collect();
let sources_by_object = repo::source::fetch_sources_by_object_ids(conn, &object_ids)?;
println!("Excluded objects ({}):", excluded.len());
for object in &excluded {
let hash_short = &object.hash_value[..16.min(object.hash_value.len())];
let source_count = sources_by_object
.get(&object.id)
.map(|sources| sources.len())
.unwrap_or(0);
println!(
" {}... (id: {}, {} sources)",
hash_short, object.id, source_count
);
}
Ok(())
}
fn get_object_excluded_sources(
conn: &mut Connection,
scope_prefixes: &[String],
filters: &[Filter],
) -> Result<Vec<(i64, String, String)>> {
let roots = repo::root::fetch_all(conn)?;
let source_root_ids: Vec<i64> = roots
.iter()
.filter(|r| r.is_active() && r.is_source())
.map(|r| r.id)
.collect();
if source_root_ids.is_empty() {
return Ok(Vec::new());
}
let sources = repo::source::batch_fetch_by_roots(conn, &source_root_ids)?;
let scopes = ScopeMatch::classify_all(scope_prefixes);
let candidates: Vec<_> = sources
.into_iter()
.filter(|s| scopes.is_empty() || s.matches_scope(&scopes))
.filter(|s| !s.excluded) .filter(|s| s.object_id.is_some()) .collect();
if candidates.is_empty() {
return Ok(Vec::new());
}
let object_ids: Vec<i64> = candidates.iter().filter_map(|s| s.object_id).collect();
let objects = repo::object::batch_fetch_by_ids(conn, &object_ids)?;
let filtered: Vec<(i64, String, String)> = candidates
.into_iter()
.filter_map(|s| {
let object_id = s.object_id?;
let obj = objects.get(&object_id)?;
if obj.excluded {
let hash_short = obj.hash_value[..16.min(obj.hash_value.len())].to_string();
Some((s.id, s.path(), hash_short))
} else {
None
}
})
.collect();
if filters.is_empty() {
return Ok(filtered);
}
let ids: Vec<i64> = filtered.iter().map(|(id, _, _)| *id).collect();
let filtered_ids: std::collections::HashSet<i64> = filter::apply_filters(conn, &ids, filters)?
.into_iter()
.collect();
Ok(filtered
.into_iter()
.filter(|(id, _, _)| filtered_ids.contains(id))
.collect())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::repo::open_in_memory_for_test;
use rusqlite::Connection as RusqliteConnection;
fn setup_test_db() -> RusqliteConnection {
open_in_memory_for_test()
}
fn insert_root(conn: &RusqliteConnection, path: &str, role: &str, suspended: bool) -> i64 {
conn.execute(
"INSERT INTO roots (path, role, suspended) VALUES (?, ?, ?)",
rusqlite::params![path, role, suspended as i64],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_object(conn: &RusqliteConnection, hash: &str, excluded: bool) -> i64 {
conn.execute(
"INSERT INTO objects (hash_type, hash_value, excluded) VALUES ('sha256', ?, ?)",
rusqlite::params![hash, excluded as i64],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_source(
conn: &RusqliteConnection,
root_id: i64,
rel_path: &str,
object_id: Option<i64>,
present: bool,
excluded: bool,
) -> i64 {
conn.execute(
"INSERT INTO sources (root_id, rel_path, object_id, size, mtime, partial_hash, scanned_at, last_seen_at, device, inode, present, excluded)
VALUES (?, ?, ?, 1000, 1704067200, '', 0, 0, 0, 0, ?, ?)",
rusqlite::params![
root_id,
rel_path,
object_id,
present as i64,
excluded as i64
],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn test_get_matching_sources_excludes_suspended_roots() {
let mut conn = setup_test_db();
let active_root = insert_root(&conn, "/active", "source", false);
let active_id = insert_source(&conn, active_root, "file.txt", None, true, false);
let suspended_root = insert_root(&conn, "/suspended", "source", true);
let _suspended_id = insert_source(&conn, suspended_root, "file.txt", None, true, false);
let result = get_matching_sources(&mut conn, &[], &[], false).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&active_id));
}
#[test]
fn test_get_matching_sources_excludes_archive_roots() {
let mut conn = setup_test_db();
let source_root = insert_root(&conn, "/source", "source", false);
let source_id = insert_source(&conn, source_root, "file.txt", None, true, false);
let archive_root = insert_root(&conn, "/archive", "archive", false);
let _archive_id = insert_source(&conn, archive_root, "file.txt", None, true, false);
let result = get_matching_sources(&mut conn, &[], &[], false).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&source_id));
}
#[test]
fn test_get_matching_sources_respects_scope() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let in_scope_id = insert_source(&conn, root, "2024/photo.jpg", None, true, false);
let _out_of_scope_id = insert_source(&conn, root, "2023/photo.jpg", None, true, false);
let scopes = vec!["/photos/2024".to_string()];
let result = get_matching_sources(&mut conn, &scopes, &[], false).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&in_scope_id));
}
#[test]
fn test_get_matching_sources_excludes_source_level_excluded() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let normal_id = insert_source(&conn, root, "normal.jpg", None, true, false);
let _excluded_id = insert_source(&conn, root, "excluded.jpg", None, true, true);
let result = get_matching_sources(&mut conn, &[], &[], false).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&normal_id));
}
#[test]
fn test_get_matching_sources_excludes_object_level_excluded() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let normal_id = insert_source(&conn, root, "normal.jpg", None, true, false);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let _obj_excluded_id = insert_source(
&conn,
root,
"obj_excluded.jpg",
Some(excluded_obj),
true,
false,
);
let result = get_matching_sources(&mut conn, &[], &[], false).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&normal_id));
}
#[test]
fn test_get_matching_sources_includes_excluded_when_flag_set() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let normal_id = insert_source(&conn, root, "normal.jpg", None, true, false);
let source_excluded_id =
insert_source(&conn, root, "source_excluded.jpg", None, true, true);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let obj_excluded_id = insert_source(
&conn,
root,
"obj_excluded.jpg",
Some(excluded_obj),
true,
false,
);
let result = get_matching_sources(&mut conn, &[], &[], true).unwrap();
assert_eq!(result.len(), 3);
assert!(result.contains(&normal_id));
assert!(result.contains(&source_excluded_id));
assert!(result.contains(&obj_excluded_id));
}
#[test]
fn test_get_excluded_sources_returns_source_level_only() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_id = insert_source(&conn, root, "excluded.jpg", None, true, true);
let result = get_excluded_sources(&mut conn, &[], &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, excluded_id);
}
#[test]
fn test_get_excluded_sources_ignores_object_level_excluded() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let _obj_excluded_id = insert_source(
&conn,
root,
"obj_excluded.jpg",
Some(excluded_obj),
true,
false,
);
let result = get_excluded_sources(&mut conn, &[], &[]).unwrap();
assert!(
result.is_empty(),
"Object-level excluded sources should NOT appear in get_excluded_sources"
);
}
#[test]
fn test_get_excluded_sources_respects_scope() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let in_scope_id = insert_source(&conn, root, "2024/excluded.jpg", None, true, true);
let _out_of_scope_id = insert_source(&conn, root, "2023/excluded.jpg", None, true, true);
let scopes = vec!["/photos/2024".to_string()];
let result = get_excluded_sources(&mut conn, &scopes, &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, in_scope_id);
}
#[test]
fn test_get_excluded_sources_returns_correct_path() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_id = insert_source(&conn, root, "subdir/excluded.jpg", None, true, true);
let result = get_excluded_sources(&mut conn, &[], &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, excluded_id);
assert_eq!(result[0].1, "/photos/subdir/excluded.jpg");
}
#[test]
fn test_get_object_excluded_sources_returns_object_level_only() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let obj_excluded_id = insert_source(
&conn,
root,
"obj_excluded.jpg",
Some(excluded_obj),
true,
false,
);
let result = get_object_excluded_sources(&mut conn, &[], &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, obj_excluded_id);
}
#[test]
fn test_get_object_excluded_sources_ignores_source_level_excluded() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let _both_excluded_id = insert_source(
&conn,
root,
"both_excluded.jpg",
Some(excluded_obj),
true,
true,
);
let result = get_object_excluded_sources(&mut conn, &[], &[]).unwrap();
assert!(
result.is_empty(),
"Sources with source-level exclusion should NOT appear in get_object_excluded_sources"
);
}
#[test]
fn test_get_object_excluded_sources_returns_hash_prefix() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let hash = "abcdef1234567890abcdef1234567890abcdef1234567890";
let excluded_obj = insert_object(&conn, hash, true);
let _id = insert_source(&conn, root, "file.jpg", Some(excluded_obj), true, false);
let result = get_object_excluded_sources(&mut conn, &[], &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].2, "abcdef1234567890");
}
#[test]
fn test_get_object_excluded_sources_respects_scope() {
let mut conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let excluded_obj = insert_object(&conn, "abc123excluded", true);
let in_scope_id = insert_source(
&conn,
root,
"2024/file.jpg",
Some(excluded_obj),
true,
false,
);
let _out_of_scope_id = insert_source(
&conn,
root,
"2023/file.jpg",
Some(excluded_obj),
true,
false,
);
let scopes = vec!["/photos/2024".to_string()];
let result = get_object_excluded_sources(&mut conn, &scopes, &[]).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, in_scope_id);
}
fn make_test_db() -> repo::Db {
let conn = setup_test_db();
repo::Db::from_connection(conn)
}
fn is_source_excluded(conn: &RusqliteConnection, source_id: i64) -> bool {
conn.query_row(
"SELECT excluded FROM sources WHERE id = ?",
[source_id],
|row| row.get::<_, i64>(0),
)
.map(|v| v == 1)
.unwrap_or(false)
}
#[test]
fn test_exclude_duplicates_excludes_when_one_copy_in_prefer() {
let mut db = make_test_db();
let conn = db.conn_mut();
let source_root = insert_root(conn, "/source", "source", false);
let archive_root = insert_root(conn, "/archive", "archive", false);
let obj = insert_object(conn, "same_content_hash", false);
let source_id = insert_source(conn, source_root, "photo.jpg", Some(obj), true, false);
let _archive_id = insert_source(conn, archive_root, "photo.jpg", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive"),
Some(Path::new("/source")),
&[],
false, );
assert!(result.is_ok());
assert!(
is_source_excluded(db.conn(), source_id),
"Source should be excluded when exactly one copy exists in prefer path"
);
}
#[test]
fn test_exclude_duplicates_skips_when_no_copy_in_prefer() {
let mut db = make_test_db();
let conn = db.conn_mut();
let source_root = insert_root(conn, "/source", "source", false);
let _archive_root = insert_root(conn, "/archive", "archive", false);
let obj = insert_object(conn, "unique_content_hash", false);
let source_id = insert_source(conn, source_root, "unique.jpg", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive"),
Some(Path::new("/source")),
&[],
false,
);
assert!(result.is_ok());
assert!(
!is_source_excluded(db.conn(), source_id),
"Source should NOT be excluded when no copy exists in prefer path"
);
}
#[test]
fn test_exclude_duplicates_skips_when_multiple_copies_in_prefer() {
let mut db = make_test_db();
let conn = db.conn_mut();
let source_root = insert_root(conn, "/source", "source", false);
let archive_root = insert_root(conn, "/archive", "archive", false);
let obj = insert_object(conn, "duplicated_content", false);
let source_id = insert_source(conn, source_root, "photo.jpg", Some(obj), true, false);
let _archive_copy1 = insert_source(conn, archive_root, "copy1.jpg", Some(obj), true, false);
let _archive_copy2 = insert_source(conn, archive_root, "copy2.jpg", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive"),
Some(Path::new("/source")),
&[],
false,
);
assert!(result.is_ok());
assert!(
!is_source_excluded(db.conn(), source_id),
"Source should NOT be excluded when multiple copies exist in prefer path"
);
}
#[test]
fn test_exclude_duplicates_skips_source_already_in_prefer() {
let mut db = make_test_db();
let conn = db.conn_mut();
let archive_root = insert_root(conn, "/archive", "archive", false);
let obj = insert_object(conn, "archive_file_hash", false);
let archive_file_id =
insert_source(conn, archive_root, "keeper.jpg", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive"),
Some(Path::new("/archive")),
&[],
false,
);
assert!(result.is_ok());
assert!(
!is_source_excluded(db.conn(), archive_file_id),
"Source in prefer path should never be excluded"
);
}
#[test]
fn test_exclude_duplicates_path_prefix_no_false_positive() {
let mut db = make_test_db();
let conn = db.conn_mut();
let source_root = insert_root(conn, "/source", "source", false);
let _archive_root = insert_root(conn, "/archive/photos", "archive", false);
let other_root = insert_root(conn, "/archive/photos-old", "archive", false);
let obj = insert_object(conn, "test_content", false);
let source_id = insert_source(conn, source_root, "file.jpg", Some(obj), true, false);
let _other_copy = insert_source(conn, other_root, "file.jpg", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive/photos"),
Some(Path::new("/source")),
&[],
false,
);
assert!(result.is_ok());
assert!(
!is_source_excluded(db.conn(), source_id),
"Path prefix matching should not have false positives: /archive/photos-old is NOT under /archive/photos"
);
}
#[test]
fn test_exclude_duplicates_empty_rel_path() {
let mut db = make_test_db();
let conn = db.conn_mut();
let source_root = insert_root(conn, "/source", "source", false);
let obj = insert_object(conn, "duplicate_content", false);
let source_id = insert_source(conn, source_root, "photo.jpg", Some(obj), true, false);
let archive_file_root = insert_root(conn, "/archive/photo.jpg", "archive", false);
let _archive_id = insert_source(conn, archive_file_root, "", Some(obj), true, false);
let result = exclude_duplicates(
&mut db,
Path::new("/archive/photo.jpg"),
Some(Path::new("/source")),
&[],
false,
);
assert!(result.is_ok());
assert!(
is_source_excluded(db.conn(), source_id),
"Source should be excluded when duplicate exists at prefer path with empty rel_path"
);
}
#[test]
fn test_set_by_id_excludes_source() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let source_id = insert_source(conn, root, "photo.jpg", None, true, false);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_id(&db, source_id, &options);
assert!(result.is_ok());
assert!(
is_source_excluded(conn, source_id),
"Source should be excluded after set_by_id"
);
}
#[test]
fn test_set_by_id_nonexistent_fails() {
let db = make_test_db();
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_id(&db, 99999, &options);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("not found"),
"Error should mention 'not found', got: {err_msg}"
);
}
#[test]
fn test_set_by_id_already_excluded_skips() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let source_id = insert_source(conn, root, "photo.jpg", None, true, true);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_id(&db, source_id, &options);
assert!(result.is_ok());
assert!(
is_source_excluded(conn, source_id),
"Source should remain excluded"
);
}
#[test]
fn test_set_by_id_not_present_fails() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let source_id = insert_source(conn, root, "deleted.jpg", None, false, false);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_id(&db, source_id, &options);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("not found") || err_msg.contains("not present"),
"Error should mention source not found/present, got: {err_msg}"
);
}
#[test]
fn test_set_by_path_nonexistent_file_fails() {
let db = make_test_db();
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_path(&db, Path::new("/nonexistent/path/to/file.jpg"), &options);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("Failed to resolve path"),
"Error should mention path resolution failure, got: {err_msg}"
);
}
#[test]
fn test_set_by_path_not_in_db_fails() {
let db = make_test_db();
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_by_path(&db, Path::new("/tmp"), &options);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("No source found"),
"Error should mention no source found, got: {err_msg}"
);
}
#[test]
fn test_get_object_sources_returns_paths() {
let conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let obj = insert_object(&conn, "abc123hash", false);
insert_source(&conn, root, "2024/photo.jpg", Some(obj), true, false);
let sources = get_object_sources(&conn, obj).unwrap();
assert_eq!(sources.len(), 1);
assert_eq!(
sources[0].path, "/photos/2024/photo.jpg",
"Path should be correctly constructed from root + rel_path"
);
}
#[test]
fn test_get_object_sources_includes_role() {
let conn = setup_test_db();
let source_root = insert_root(&conn, "/source", "source", false);
let archive_root = insert_root(&conn, "/archive", "archive", false);
let obj = insert_object(&conn, "abc123hash", false);
insert_source(&conn, source_root, "photo.jpg", Some(obj), true, false);
insert_source(&conn, archive_root, "photo.jpg", Some(obj), true, false);
let sources = get_object_sources(&conn, obj).unwrap();
assert_eq!(sources.len(), 2);
let archive_sources: Vec<_> = sources.iter().filter(|s| s.is_archive).collect();
let source_sources: Vec<_> = sources.iter().filter(|s| !s.is_archive).collect();
assert_eq!(archive_sources.len(), 1, "Should have one archive source");
assert_eq!(source_sources.len(), 1, "Should have one source source");
assert_eq!(archive_sources[0].path, "/archive/photo.jpg");
assert_eq!(source_sources[0].path, "/source/photo.jpg");
}
#[test]
fn test_get_object_sources_empty_rel_path() {
let conn = setup_test_db();
let root = insert_root(&conn, "/archive/photo.jpg", "archive", false);
let obj = insert_object(&conn, "abc123hash", false);
insert_source(&conn, root, "", Some(obj), true, false);
let sources = get_object_sources(&conn, obj).unwrap();
assert_eq!(sources.len(), 1);
assert_eq!(
sources[0].path, "/archive/photo.jpg",
"Empty rel_path should NOT produce trailing slash"
);
}
#[test]
fn test_get_object_sources_excludes_not_present() {
let conn = setup_test_db();
let root = insert_root(&conn, "/photos", "source", false);
let obj = insert_object(&conn, "abc123hash", false);
insert_source(&conn, root, "present.jpg", Some(obj), true, false);
insert_source(&conn, root, "deleted.jpg", Some(obj), false, false);
let sources = get_object_sources(&conn, obj).unwrap();
assert_eq!(sources.len(), 1);
assert_eq!(sources[0].path, "/photos/present.jpg");
}
fn is_object_excluded_in_db(conn: &RusqliteConnection, object_id: i64) -> bool {
conn.query_row(
"SELECT excluded FROM objects WHERE id = ?",
[object_id],
|row| row.get::<_, i64>(0),
)
.map(|v| v == 1)
.unwrap_or(false)
}
#[test]
fn test_set_objects_by_filter_excludes_objects() {
let mut db = make_test_db();
let conn = db.conn_mut();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "abc123hash", false);
insert_source(conn, root, "photo.jpg", Some(obj), true, false);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_objects_by_filter(
&mut db,
&[], &[], &options,
);
assert!(result.is_ok());
assert!(
is_object_excluded_in_db(db.conn(), obj),
"Object should be excluded after set_objects_by_filter"
);
}
#[test]
fn test_set_objects_by_filter_skips_empty_files() {
let mut db = make_test_db();
let conn = db.conn_mut();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "empty_file_hash", false);
conn.execute(
"INSERT INTO sources (root_id, rel_path, object_id, size, mtime, partial_hash, scanned_at, last_seen_at, device, inode, present, excluded)
VALUES (?, ?, ?, 0, 1704067200, '', 0, 0, 0, 0, 1, 0)",
rusqlite::params![root, "empty.txt", obj],
)
.unwrap();
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_objects_by_filter(&mut db, &[], &[], &options);
assert!(result.is_ok());
assert!(
!is_object_excluded_in_db(db.conn(), obj),
"Empty file objects should NOT be excluded"
);
}
#[test]
fn test_set_objects_by_filter_skips_already_excluded() {
let mut db = make_test_db();
let conn = db.conn_mut();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "already_excluded_hash", true);
insert_source(conn, root, "photo.jpg", Some(obj), true, false);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_objects_by_filter(&mut db, &[], &[], &options);
assert!(result.is_ok());
assert!(is_object_excluded_in_db(db.conn(), obj));
}
#[test]
fn test_set_objects_by_filter_skips_unhashed() {
let mut db = make_test_db();
let conn = db.conn_mut();
let root = insert_root(conn, "/photos", "source", false);
insert_source(conn, root, "unhashed.jpg", None, true, false);
let options = SetOptions {
dry_run: false,
verbose: false,
};
let result = set_objects_by_filter(&mut db, &[], &[], &options);
assert!(result.is_ok());
}
#[test]
fn test_set_objects_by_filter_dry_run() {
let mut db = make_test_db();
let conn = db.conn_mut();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "dry_run_hash", false);
insert_source(conn, root, "photo.jpg", Some(obj), true, false);
let options = SetOptions {
dry_run: true, verbose: false,
};
let result = set_objects_by_filter(&mut db, &[], &[], &options);
assert!(result.is_ok());
assert!(
!is_object_excluded_in_db(db.conn(), obj),
"Dry run should NOT actually exclude objects"
);
}
#[test]
fn test_list_objects_shows_excluded() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "excluded_object_hash", true);
insert_source(conn, root, "photo.jpg", Some(obj), true, false);
let result = list_objects(&db);
assert!(result.is_ok());
}
#[test]
fn test_list_objects_shows_source_count() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "multi_source_hash", true);
insert_source(conn, root, "photo1.jpg", Some(obj), true, false);
insert_source(conn, root, "photo2.jpg", Some(obj), true, false);
insert_source(conn, root, "deleted.jpg", Some(obj), false, false);
let result = list_objects(&db);
assert!(result.is_ok());
}
#[test]
fn test_list_objects_empty() {
let db = make_test_db();
let conn = db.conn();
let root = insert_root(conn, "/photos", "source", false);
let obj = insert_object(conn, "not_excluded_hash", false);
insert_source(conn, root, "photo.jpg", Some(obj), true, false);
let result = list_objects(&db);
assert!(result.is_ok());
}
}