use std::collections::{HashMap, HashSet};
use anyhow::Result;
use rusqlite::OptionalExtension;
use super::db::Connection;
use crate::domain::object::Object;
pub const BATCH_SIZE: usize = 1000;
const OBJECT_COLUMNS: &str = "id, hash_type, hash_value, excluded";
fn object_from_row(row: &rusqlite::Row) -> rusqlite::Result<Object> {
Ok(Object {
id: row.get(0)?,
hash_type: row.get(1)?,
hash_value: row.get(2)?,
excluded: row.get(3)?,
})
}
pub fn batch_fetch_by_ids(conn: &Connection, object_ids: &[i64]) -> Result<HashMap<i64, Object>> {
if object_ids.is_empty() {
return Ok(HashMap::new());
}
let mut result = HashMap::with_capacity(object_ids.len());
for chunk in object_ids.chunks(BATCH_SIZE) {
let placeholders: Vec<&str> = chunk.iter().map(|_| "?").collect();
let sql = format!(
"SELECT {} FROM objects WHERE id IN ({})",
OBJECT_COLUMNS,
placeholders.join(",")
);
let params: Vec<rusqlite::types::Value> = chunk
.iter()
.map(|&id| rusqlite::types::Value::from(id))
.collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params_from_iter(params), object_from_row)?;
for row in rows {
let obj = row?;
result.insert(obj.id, obj);
}
}
Ok(result)
}
pub fn fetch_by_hash(conn: &Connection, hash: &str) -> Result<Option<Object>> {
let sql = format!(
"SELECT {OBJECT_COLUMNS} FROM objects WHERE hash_value = ?"
);
let result = conn.query_row(&sql, [hash], object_from_row).optional()?;
Ok(result)
}
pub fn batch_check_archived(
conn: &Connection,
object_ids: &[i64],
archive_root_id: Option<i64>,
) -> Result<HashSet<i64>> {
if object_ids.is_empty() {
return Ok(HashSet::new());
}
let mut result = HashSet::new();
for chunk in object_ids.chunks(BATCH_SIZE) {
let placeholders: Vec<&str> = chunk.iter().map(|_| "?").collect();
let (sql, params): (String, Vec<rusqlite::types::Value>) =
if let Some(root_id) = archive_root_id {
let sql = format!(
"SELECT DISTINCT s.object_id
FROM sources s
WHERE s.root_id = ? AND s.present = 1
AND s.object_id IN ({})",
placeholders.join(",")
);
let mut params = vec![rusqlite::types::Value::from(root_id)];
params.extend(chunk.iter().map(|&id| rusqlite::types::Value::from(id)));
(sql, params)
} else {
let sql = format!(
"SELECT DISTINCT s.object_id
FROM sources s
JOIN roots r ON s.root_id = r.id
WHERE r.role = 'archive' AND s.present = 1
AND s.object_id IN ({})",
placeholders.join(",")
);
let params: Vec<rusqlite::types::Value> = chunk
.iter()
.map(|&id| rusqlite::types::Value::from(id))
.collect();
(sql, params)
};
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params_from_iter(params), |row| {
row.get::<_, i64>(0)
})?;
for row in rows {
result.insert(row?);
}
}
Ok(result)
}
pub fn batch_find_archive_paths(
conn: &Connection,
object_ids: &[i64],
) -> Result<HashMap<i64, Vec<String>>> {
if object_ids.is_empty() {
return Ok(HashMap::new());
}
let mut result: HashMap<i64, Vec<String>> = HashMap::new();
for chunk in object_ids.chunks(BATCH_SIZE) {
let placeholders: Vec<&str> = chunk.iter().map(|_| "?").collect();
let sql = format!(
"SELECT s.object_id, r.path, s.rel_path
FROM sources s
JOIN roots r ON s.root_id = r.id
WHERE r.role = 'archive' AND s.present = 1
AND s.object_id IN ({})
ORDER BY s.object_id, r.path, s.rel_path",
placeholders.join(",")
);
let params: Vec<rusqlite::types::Value> = chunk
.iter()
.map(|&id| rusqlite::types::Value::from(id))
.collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params_from_iter(params), |row| {
let object_id: i64 = row.get(0)?;
let root_path: String = row.get(1)?;
let rel_path: String = row.get(2)?;
Ok((object_id, root_path, rel_path))
})?;
for row in rows {
let (object_id, root_path, rel_path) = row?;
let full_path = if rel_path.is_empty() {
root_path
} else {
format!("{root_path}/{rel_path}")
};
result.entry(object_id).or_default().push(full_path);
}
}
Ok(result)
}
pub fn batch_find_archive_info_by_hash(
conn: &Connection,
hash_values: &[&str],
) -> Result<HashMap<String, Vec<(i64, String)>>> {
if hash_values.is_empty() {
return Ok(HashMap::new());
}
let mut result: HashMap<String, Vec<(i64, String)>> = HashMap::new();
for chunk in hash_values.chunks(BATCH_SIZE) {
let placeholders: Vec<&str> = chunk.iter().map(|_| "?").collect();
let sql = format!(
"SELECT o.hash_value, r.id, r.path, s.rel_path
FROM sources s
JOIN roots r ON s.root_id = r.id
JOIN objects o ON s.object_id = o.id
WHERE r.role = 'archive' AND s.present = 1
AND o.hash_value IN ({})
ORDER BY o.hash_value, r.id, s.rel_path",
placeholders.join(",")
);
let params: Vec<rusqlite::types::Value> = chunk
.iter()
.map(|&h| rusqlite::types::Value::from(h.to_string()))
.collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params_from_iter(params), |row| {
let hash_value: String = row.get(0)?;
let archive_root_id: i64 = row.get(1)?;
let root_path: String = row.get(2)?;
let rel_path: String = row.get(3)?;
Ok((hash_value, archive_root_id, root_path, rel_path))
})?;
for row in rows {
let (hash_value, archive_root_id, root_path, rel_path) = row?;
let full_path = if rel_path.is_empty() {
root_path
} else {
format!("{root_path}/{rel_path}")
};
result
.entry(hash_value)
.or_default()
.push((archive_root_id, full_path));
}
}
Ok(result)
}
pub fn set_excluded(conn: &Connection, object_id: i64, excluded: bool) -> Result<()> {
conn.execute(
"UPDATE objects SET excluded = ? WHERE id = ?",
rusqlite::params![excluded as i64, object_id],
)?;
Ok(())
}
pub fn fetch_excluded(conn: &Connection) -> Result<Vec<Object>> {
let sql = format!(
"SELECT {OBJECT_COLUMNS} FROM objects WHERE excluded = 1 ORDER BY id"
);
let mut stmt = conn.prepare(&sql)?;
let objects = stmt
.query_map([], object_from_row)?
.collect::<Result<Vec<_>, _>>()?;
Ok(objects)
}
#[derive(Debug, Clone, Default)]
pub struct OrphanedStats {
pub object_count: i64,
pub source_count: i64,
pub source_fact_count: i64,
pub object_fact_count: i64,
}
impl OrphanedStats {
pub fn total_fact_count(&self) -> i64 {
self.source_fact_count + self.object_fact_count
}
}
pub fn find_orphaned_stats(conn: &Connection) -> Result<OrphanedStats> {
let object_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s
WHERE s.object_id = o.id AND s.present = 1
)",
[],
|row| row.get(0),
)?;
if object_count == 0 {
return Ok(OrphanedStats::default());
}
let source_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM sources s
WHERE s.present = 0
AND s.object_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s2
WHERE s2.object_id = o.id AND s2.present = 1
)
)",
[],
|row| row.get(0),
)?;
let source_fact_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM facts f
WHERE f.entity_type = 'source'
AND f.entity_id IN (
SELECT s.id FROM sources s
WHERE s.present = 0
AND s.object_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s2
WHERE s2.object_id = o.id AND s2.present = 1
)
)
)",
[],
|row| row.get(0),
)?;
let object_fact_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM facts f
WHERE f.entity_type = 'object'
AND f.entity_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s
WHERE s.object_id = o.id AND s.present = 1
)
)",
[],
|row| row.get(0),
)?;
Ok(OrphanedStats {
object_count,
source_count,
source_fact_count,
object_fact_count,
})
}
pub fn delete_orphaned(conn: &Connection) -> Result<OrphanedStats> {
let source_fact_count = conn.execute(
"DELETE FROM facts
WHERE entity_type = 'source'
AND entity_id IN (
SELECT s.id FROM sources s
WHERE s.present = 0
AND s.object_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s2
WHERE s2.object_id = o.id AND s2.present = 1
)
)
)",
[],
)?;
let source_count = conn.execute(
"DELETE FROM sources
WHERE present = 0
AND object_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s
WHERE s.object_id = o.id AND s.present = 1
)
)",
[],
)?;
let object_fact_count = conn.execute(
"DELETE FROM facts
WHERE entity_type = 'object'
AND entity_id IN (
SELECT o.id FROM objects o
WHERE NOT EXISTS (
SELECT 1 FROM sources s
WHERE s.object_id = o.id AND s.present = 1
)
)",
[],
)?;
let object_count = conn.execute(
"DELETE FROM objects
WHERE NOT EXISTS (
SELECT 1 FROM sources s
WHERE s.object_id = objects.id AND s.present = 1
)",
[],
)?;
Ok(OrphanedStats {
object_count: object_count as i64,
source_count: source_count as i64,
source_fact_count: source_fact_count as i64,
object_fact_count: object_fact_count as i64,
})
}
pub fn get_or_create(conn: &Connection, hash_type: &str, hash_value: &str) -> Result<Object> {
conn.execute(
"INSERT INTO objects (hash_type, hash_value) VALUES (?, ?)
ON CONFLICT(hash_type, hash_value) DO NOTHING",
rusqlite::params![hash_type, hash_value],
)?;
let sql = format!(
"SELECT {OBJECT_COLUMNS} FROM objects WHERE hash_type = ? AND hash_value = ?"
);
let obj = conn.query_row(
&sql,
rusqlite::params![hash_type, hash_value],
object_from_row,
)?;
Ok(obj)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::repo::open_in_memory_for_test;
use rusqlite::Connection as RusqliteConnection;
fn setup_test_db() -> RusqliteConnection {
open_in_memory_for_test()
}
fn insert_root(conn: &RusqliteConnection, path: &str, role: &str) -> i64 {
conn.execute(
"INSERT INTO roots (path, role) VALUES (?, ?)",
rusqlite::params![path, role],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_object(conn: &RusqliteConnection, hash_value: &str, excluded: bool) -> i64 {
conn.execute(
"INSERT INTO objects (hash_type, hash_value, excluded) VALUES ('sha256', ?, ?)",
rusqlite::params![hash_value, excluded as i64],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_source(
conn: &RusqliteConnection,
root_id: i64,
rel_path: &str,
object_id: Option<i64>,
present: bool,
) -> i64 {
conn.execute(
"INSERT INTO sources (root_id, rel_path, object_id, present, size, mtime, partial_hash, scanned_at, last_seen_at, device, inode)
VALUES (?, ?, ?, ?, 0, 0, '', 0, 0, 0, 0)",
rusqlite::params![root_id, rel_path, object_id, present as i64],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn batch_fetch_by_ids_empty_returns_empty() {
let conn = setup_test_db();
let result = batch_fetch_by_ids(&conn, &[]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_fetch_by_ids_found() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123", false);
let result = batch_fetch_by_ids(&conn, &[obj_id]).unwrap();
assert_eq!(result.len(), 1);
let obj = result.get(&obj_id).unwrap();
assert_eq!(obj.id, obj_id);
assert_eq!(obj.hash_type, "sha256");
assert_eq!(obj.hash_value, "abc123");
assert!(!obj.excluded);
}
#[test]
fn batch_fetch_by_ids_partial_missing_ids_ignored() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123", false);
let result = batch_fetch_by_ids(&conn, &[obj_id, 999, 1000]).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains_key(&obj_id));
assert!(!result.contains_key(&999));
}
#[test]
fn batch_fetch_by_ids_includes_excluded_objects() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123", true);
let result = batch_fetch_by_ids(&conn, &[obj_id]).unwrap();
assert_eq!(result.len(), 1);
let obj = result.get(&obj_id).unwrap();
assert!(obj.is_excluded());
}
#[test]
fn batch_check_archived_empty_returns_empty() {
let conn = setup_test_db();
let result = batch_check_archived(&conn, &[], None).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_check_archived_finds_archived_objects() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "file.jpg", Some(obj_id), true);
let result = batch_check_archived(&conn, &[obj_id], None).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&obj_id));
}
#[test]
fn batch_check_archived_excludes_non_archive_roots() {
let conn = setup_test_db();
let source_root_id = insert_root(&conn, "/photos", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, source_root_id, "file.jpg", Some(obj_id), true);
let result = batch_check_archived(&conn, &[obj_id], None).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_check_archived_requires_present_source() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "file.jpg", Some(obj_id), false);
let result = batch_check_archived(&conn, &[obj_id], None).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_check_archived_deduplicates_multiple_archive_sources() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let archive2_id = insert_root(&conn, "/archive2", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive1_id, "file1.jpg", Some(obj_id), true);
insert_source(&conn, archive2_id, "file2.jpg", Some(obj_id), true);
let result = batch_check_archived(&conn, &[obj_id], None).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&obj_id));
}
#[test]
fn batch_check_archived_specific_root_filters_correctly() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let _archive2_id = insert_root(&conn, "/archive2", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive1_id, "file.jpg", Some(obj_id), true);
let result = batch_check_archived(&conn, &[obj_id], Some(archive1_id)).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains(&obj_id));
}
#[test]
fn batch_check_archived_specific_root_ignores_other_archives() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let archive2_id = insert_root(&conn, "/archive2", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive1_id, "file.jpg", Some(obj_id), true);
let result = batch_check_archived(&conn, &[obj_id], Some(archive2_id)).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_check_archived_handles_large_id_sets() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let mut object_ids = Vec::new();
for i in 0..1050 {
let obj_id = insert_object(&conn, &format!("hash_{i}"), false);
object_ids.push(obj_id);
if i % 10 == 0 {
insert_source(
&conn,
archive_id,
&format!("file_{i}.jpg"),
Some(obj_id),
true,
);
}
}
let result = batch_check_archived(&conn, &object_ids, None).unwrap();
assert_eq!(result.len(), 105);
}
#[test]
fn batch_find_archive_paths_empty_returns_empty() {
let conn = setup_test_db();
let result = batch_find_archive_paths(&conn, &[]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_paths_returns_correct_path_format() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "subdir/file.jpg", Some(obj_id), true);
let result = batch_find_archive_paths(&conn, &[obj_id]).unwrap();
assert_eq!(result.len(), 1);
let paths = result.get(&obj_id).unwrap();
assert_eq!(paths.len(), 1);
assert_eq!(paths[0], "/archive/subdir/file.jpg");
}
#[test]
fn batch_find_archive_paths_empty_rel_path() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "", Some(obj_id), true);
let result = batch_find_archive_paths(&conn, &[obj_id]).unwrap();
let paths = result.get(&obj_id).unwrap();
assert_eq!(paths[0], "/archive"); }
#[test]
fn batch_find_archive_paths_multiple_paths_per_object() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let archive2_id = insert_root(&conn, "/archive2", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive1_id, "file.jpg", Some(obj_id), true);
insert_source(&conn, archive2_id, "copy.jpg", Some(obj_id), true);
let result = batch_find_archive_paths(&conn, &[obj_id]).unwrap();
assert_eq!(result.len(), 1);
let paths = result.get(&obj_id).unwrap();
assert_eq!(paths.len(), 2);
assert_eq!(paths[0], "/archive1/file.jpg");
assert_eq!(paths[1], "/archive2/copy.jpg");
}
#[test]
fn batch_find_archive_paths_excludes_non_archive_roots() {
let conn = setup_test_db();
let source_root_id = insert_root(&conn, "/photos", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, source_root_id, "file.jpg", Some(obj_id), true);
let result = batch_find_archive_paths(&conn, &[obj_id]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_paths_excludes_non_present() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "file.jpg", Some(obj_id), false);
let result = batch_find_archive_paths(&conn, &[obj_id]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_info_by_hash_empty_returns_empty() {
let conn = setup_test_db();
let result = batch_find_archive_info_by_hash(&conn, &[]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_info_by_hash_single_hash_single_archive() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "subdir/file.jpg", Some(obj_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["abc123"]).unwrap();
assert_eq!(result.len(), 1);
let info = result.get("abc123").unwrap();
assert_eq!(info.len(), 1);
assert_eq!(info[0].0, archive_id); assert_eq!(info[0].1, "/archive/subdir/file.jpg"); }
#[test]
fn batch_find_archive_info_by_hash_returns_archive_root_id() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let archive2_id = insert_root(&conn, "/archive2", "archive");
let obj1_id = insert_object(&conn, "hash1", false);
let obj2_id = insert_object(&conn, "hash2", false);
insert_source(&conn, archive1_id, "file1.jpg", Some(obj1_id), true);
insert_source(&conn, archive2_id, "file2.jpg", Some(obj2_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["hash1", "hash2"]).unwrap();
let info1 = result.get("hash1").unwrap();
assert_eq!(info1[0].0, archive1_id);
let info2 = result.get("hash2").unwrap();
assert_eq!(info2[0].0, archive2_id);
}
#[test]
fn batch_find_archive_info_by_hash_multiple_archives_per_hash() {
let conn = setup_test_db();
let archive1_id = insert_root(&conn, "/archive1", "archive");
let archive2_id = insert_root(&conn, "/archive2", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive1_id, "file.jpg", Some(obj_id), true);
insert_source(&conn, archive2_id, "copy.jpg", Some(obj_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["abc123"]).unwrap();
assert_eq!(result.len(), 1);
let info = result.get("abc123").unwrap();
assert_eq!(info.len(), 2);
assert_eq!(info[0].0, archive1_id);
assert_eq!(info[0].1, "/archive1/file.jpg");
assert_eq!(info[1].0, archive2_id);
assert_eq!(info[1].1, "/archive2/copy.jpg");
}
#[test]
fn batch_find_archive_info_by_hash_empty_rel_path() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "", Some(obj_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["abc123"]).unwrap();
let info = result.get("abc123").unwrap();
assert_eq!(info[0].1, "/archive"); }
#[test]
fn batch_find_archive_info_by_hash_excludes_non_archive_roots() {
let conn = setup_test_db();
let source_root_id = insert_root(&conn, "/photos", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, source_root_id, "file.jpg", Some(obj_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["abc123"]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_info_by_hash_excludes_non_present() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, archive_id, "file.jpg", Some(obj_id), false);
let result = batch_find_archive_info_by_hash(&conn, &["abc123"]).unwrap();
assert!(result.is_empty());
}
#[test]
fn batch_find_archive_info_by_hash_not_found_hashes_excluded() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let obj_id = insert_object(&conn, "exists", false);
insert_source(&conn, archive_id, "file.jpg", Some(obj_id), true);
let result = batch_find_archive_info_by_hash(&conn, &["exists", "missing"]).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains_key("exists"));
assert!(!result.contains_key("missing"));
}
#[test]
fn batch_find_archive_info_by_hash_handles_large_hash_sets() {
let conn = setup_test_db();
let archive_id = insert_root(&conn, "/archive", "archive");
let mut hashes: Vec<String> = Vec::new();
for i in 0..1050 {
let hash = format!("hash_{i}");
let obj_id = insert_object(&conn, &hash, false);
hashes.push(hash);
if i % 10 == 0 {
insert_source(
&conn,
archive_id,
&format!("file_{i}.jpg"),
Some(obj_id),
true,
);
}
}
let hash_refs: Vec<&str> = hashes.iter().map(|s| s.as_str()).collect();
let result = batch_find_archive_info_by_hash(&conn, &hash_refs).unwrap();
assert_eq!(result.len(), 105);
}
#[test]
fn set_excluded_marks_object() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123", false);
let excluded: i64 = conn
.query_row(
"SELECT excluded FROM objects WHERE id = ?",
rusqlite::params![obj_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(excluded, 0);
set_excluded(&conn, obj_id, true).unwrap();
let excluded: i64 = conn
.query_row(
"SELECT excluded FROM objects WHERE id = ?",
rusqlite::params![obj_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(excluded, 1);
}
#[test]
fn set_excluded_clears_object() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123", true);
let excluded: i64 = conn
.query_row(
"SELECT excluded FROM objects WHERE id = ?",
rusqlite::params![obj_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(excluded, 1);
set_excluded(&conn, obj_id, false).unwrap();
let excluded: i64 = conn
.query_row(
"SELECT excluded FROM objects WHERE id = ?",
rusqlite::params![obj_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(excluded, 0);
}
#[test]
fn set_excluded_nonexistent_object() {
let conn = setup_test_db();
let result = set_excluded(&conn, 99999, true);
assert!(result.is_ok());
}
#[test]
fn fetch_by_hash_returns_object() {
let conn = setup_test_db();
let obj_id = insert_object(&conn, "abc123def456", false);
let result = fetch_by_hash(&conn, "abc123def456").unwrap();
assert!(result.is_some());
let obj = result.unwrap();
assert_eq!(obj.id, obj_id);
assert_eq!(obj.hash_type, "sha256");
assert_eq!(obj.hash_value, "abc123def456");
assert!(!obj.excluded);
}
#[test]
fn fetch_by_hash_not_found() {
let conn = setup_test_db();
let result = fetch_by_hash(&conn, "nonexistent_hash").unwrap();
assert!(result.is_none());
}
#[test]
fn fetch_by_hash_returns_excluded_flag() {
let conn = setup_test_db();
insert_object(&conn, "excluded_hash", true);
let result = fetch_by_hash(&conn, "excluded_hash").unwrap();
assert!(result.is_some());
let obj = result.unwrap();
assert!(obj.excluded);
}
#[test]
fn fetch_excluded_returns_only_excluded() {
let conn = setup_test_db();
insert_object(&conn, "excluded1", true);
insert_object(&conn, "not_excluded", false);
insert_object(&conn, "excluded2", true);
let result = fetch_excluded(&conn).unwrap();
assert_eq!(result.len(), 2);
assert!(result.iter().all(|o| o.excluded));
assert_eq!(result[0].hash_value, "excluded1");
assert_eq!(result[1].hash_value, "excluded2");
}
#[test]
fn fetch_excluded_empty_when_none_excluded() {
let conn = setup_test_db();
insert_object(&conn, "not_excluded1", false);
insert_object(&conn, "not_excluded2", false);
let result = fetch_excluded(&conn).unwrap();
assert!(result.is_empty());
}
#[test]
fn get_or_create_creates_new_returns_complete_object() {
let conn = setup_test_db();
let obj = get_or_create(&conn, "sha256", "abc123").unwrap();
assert!(obj.id > 0);
assert_eq!(obj.hash_type, "sha256");
assert_eq!(obj.hash_value, "abc123");
assert!(!obj.excluded);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM objects WHERE hash_type = 'sha256' AND hash_value = 'abc123'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn get_or_create_returns_existing_with_excluded_flag() {
let conn = setup_test_db();
let id1 = insert_object(&conn, "existing_hash", true);
let obj = get_or_create(&conn, "sha256", "existing_hash").unwrap();
assert_eq!(obj.id, id1);
assert_eq!(obj.hash_value, "existing_hash");
assert!(obj.excluded);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM objects WHERE hash_value = 'existing_hash'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn get_or_create_is_idempotent() {
let conn = setup_test_db();
let obj1 = get_or_create(&conn, "sha256", "same_hash").unwrap();
let obj2 = get_or_create(&conn, "sha256", "same_hash").unwrap();
let obj3 = get_or_create(&conn, "sha256", "same_hash").unwrap();
assert_eq!(obj1.id, obj2.id);
assert_eq!(obj2.id, obj3.id);
assert_eq!(obj1.hash_value, "same_hash");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM objects WHERE hash_value = 'same_hash'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn get_or_create_different_hashes() {
let conn = setup_test_db();
let obj1 = get_or_create(&conn, "sha256", "hash1").unwrap();
let obj2 = get_or_create(&conn, "sha256", "hash2").unwrap();
assert_ne!(obj1.id, obj2.id);
assert_eq!(obj1.hash_value, "hash1");
assert_eq!(obj2.hash_value, "hash2");
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM objects", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 2);
}
fn insert_fact(
conn: &RusqliteConnection,
entity_type: &str,
entity_id: i64,
key: &str,
value: &str,
) {
conn.execute(
"INSERT INTO facts (entity_type, entity_id, key, value_text, observed_at, observed_basis_rev)
VALUES (?, ?, ?, ?, 0, CASE WHEN ? = 'source' THEN 0 ELSE NULL END)",
rusqlite::params![entity_type, entity_id, key, value, entity_type],
)
.unwrap();
}
#[test]
fn find_orphaned_stats_no_orphans() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, root_id, "file.jpg", Some(obj_id), true);
let stats = find_orphaned_stats(&conn).unwrap();
assert_eq!(stats.object_count, 0);
assert_eq!(stats.source_count, 0);
assert_eq!(stats.source_fact_count, 0);
assert_eq!(stats.object_fact_count, 0);
}
#[test]
fn find_orphaned_stats_object_with_no_sources() {
let conn = setup_test_db();
insert_object(&conn, "abc123", false);
let stats = find_orphaned_stats(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 0);
}
#[test]
fn find_orphaned_stats_object_with_only_non_present_sources() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, root_id, "file1.jpg", Some(obj_id), false); insert_source(&conn, root_id, "file2.jpg", Some(obj_id), false);
let stats = find_orphaned_stats(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 2);
}
#[test]
fn find_orphaned_stats_counts_facts() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
let source_id = insert_source(&conn, root_id, "file.jpg", Some(obj_id), false);
insert_fact(&conn, "source", source_id, "content.Make", "Canon");
insert_fact(&conn, "source", source_id, "content.Model", "EOS");
insert_fact(&conn, "object", obj_id, "content.hash.sha256", "abc123");
let stats = find_orphaned_stats(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 1);
assert_eq!(stats.source_fact_count, 2);
assert_eq!(stats.object_fact_count, 1);
assert_eq!(stats.total_fact_count(), 3);
}
#[test]
fn find_orphaned_stats_mixed_orphaned_and_active() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let active_obj_id = insert_object(&conn, "active_hash", false);
insert_source(&conn, root_id, "active.jpg", Some(active_obj_id), true);
insert_fact(&conn, "object", active_obj_id, "content.Make", "Canon");
let orphaned_obj_id = insert_object(&conn, "orphaned_hash", false);
insert_source(&conn, root_id, "orphaned.jpg", Some(orphaned_obj_id), false);
insert_fact(&conn, "object", orphaned_obj_id, "content.Make", "Nikon");
let stats = find_orphaned_stats(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 1);
assert_eq!(stats.object_fact_count, 1);
}
#[test]
fn delete_orphaned_no_orphans() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, root_id, "file.jpg", Some(obj_id), true);
let stats = delete_orphaned(&conn).unwrap();
assert_eq!(stats.object_count, 0);
let obj_count: i64 = conn
.query_row("SELECT COUNT(*) FROM objects", [], |row| row.get(0))
.unwrap();
assert_eq!(obj_count, 1);
}
#[test]
fn delete_orphaned_removes_orphaned_object() {
let conn = setup_test_db();
insert_object(&conn, "abc123", false);
let stats = delete_orphaned(&conn).unwrap();
assert_eq!(stats.object_count, 1);
let obj_count: i64 = conn
.query_row("SELECT COUNT(*) FROM objects", [], |row| row.get(0))
.unwrap();
assert_eq!(obj_count, 0);
}
#[test]
fn delete_orphaned_cascade_deletes_all() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
let source_id = insert_source(&conn, root_id, "file.jpg", Some(obj_id), false);
insert_fact(&conn, "source", source_id, "content.Make", "Canon");
insert_fact(&conn, "object", obj_id, "content.hash.sha256", "abc123");
let stats = delete_orphaned(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 1);
assert_eq!(stats.source_fact_count, 1);
assert_eq!(stats.object_fact_count, 1);
let obj_count: i64 = conn
.query_row("SELECT COUNT(*) FROM objects", [], |row| row.get(0))
.unwrap();
let src_count: i64 = conn
.query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))
.unwrap();
let fact_count: i64 = conn
.query_row("SELECT COUNT(*) FROM facts", [], |row| row.get(0))
.unwrap();
assert_eq!(obj_count, 0);
assert_eq!(src_count, 0);
assert_eq!(fact_count, 0);
}
#[test]
fn delete_orphaned_preserves_active_objects() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let active_obj_id = insert_object(&conn, "active_hash", false);
let active_source_id =
insert_source(&conn, root_id, "active.jpg", Some(active_obj_id), true);
insert_fact(&conn, "object", active_obj_id, "content.Make", "Canon");
insert_fact(&conn, "source", active_source_id, "source.policy", "keep");
let orphaned_obj_id = insert_object(&conn, "orphaned_hash", false);
let orphaned_source_id =
insert_source(&conn, root_id, "orphaned.jpg", Some(orphaned_obj_id), false);
insert_fact(&conn, "object", orphaned_obj_id, "content.Make", "Nikon");
insert_fact(
&conn,
"source",
orphaned_source_id,
"source.policy",
"delete",
);
let stats = delete_orphaned(&conn).unwrap();
assert_eq!(stats.object_count, 1);
assert_eq!(stats.source_count, 1);
assert_eq!(stats.source_fact_count, 1);
assert_eq!(stats.object_fact_count, 1);
let obj_count: i64 = conn
.query_row("SELECT COUNT(*) FROM objects", [], |row| row.get(0))
.unwrap();
let src_count: i64 = conn
.query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))
.unwrap();
let fact_count: i64 = conn
.query_row("SELECT COUNT(*) FROM facts", [], |row| row.get(0))
.unwrap();
assert_eq!(obj_count, 1);
assert_eq!(src_count, 1);
assert_eq!(fact_count, 2); }
#[test]
fn delete_orphaned_handles_object_with_mixed_present_sources() {
let conn = setup_test_db();
let root_id = insert_root(&conn, "/root", "source");
let obj_id = insert_object(&conn, "abc123", false);
insert_source(&conn, root_id, "present.jpg", Some(obj_id), true);
insert_source(&conn, root_id, "not_present.jpg", Some(obj_id), false);
let stats = delete_orphaned(&conn).unwrap();
assert_eq!(stats.object_count, 0);
let src_count: i64 = conn
.query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))
.unwrap();
assert_eq!(src_count, 2);
}
}