pub mod asset_manifest;
pub mod audit_trail;
pub mod batch_archive;
pub mod catalog;
pub mod catalog_export;
pub mod checksum;
pub mod dedup_archive;
pub mod fixity;
pub mod format_registry;
pub mod indexing;
pub mod ingest_log;
pub mod integrity_scan;
pub mod migration;
pub mod preservation;
pub mod preservation_policy;
pub mod quarantine;
pub mod report;
pub mod restore_plan;
pub mod retention_schedule;
pub mod search_index;
pub mod tape;
pub mod validate;
pub mod version_history;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use thiserror::Error;
#[derive(Error, Debug)]
pub enum ArchiveError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Database error: {0}")]
Database(#[from] sqlx::Error),
#[error("Checksum mismatch: expected {expected}, got {actual}")]
ChecksumMismatch { expected: String, actual: String },
#[error("Validation error: {0}")]
Validation(String),
#[error("Corruption detected: {0}")]
Corruption(String),
#[error("Configuration error: {0}")]
Config(String),
#[error("Quarantine error: {0}")]
Quarantine(String),
#[error("Report generation error: {0}")]
Report(String),
}
pub type ArchiveResult<T> = Result<T, ArchiveError>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerificationConfig {
pub enable_blake3: bool,
pub enable_md5: bool,
pub enable_sha256: bool,
pub enable_crc32: bool,
pub generate_sidecars: bool,
pub validate_containers: bool,
pub enable_fixity_checks: bool,
pub fixity_check_interval_days: u32,
pub auto_quarantine: bool,
pub parallel_threads: usize,
pub database_path: PathBuf,
pub quarantine_dir: PathBuf,
pub enable_premis_logging: bool,
pub enable_bagit: bool,
}
impl Default for VerificationConfig {
fn default() -> Self {
Self {
enable_blake3: true,
enable_md5: false,
enable_sha256: true,
enable_crc32: true,
generate_sidecars: true,
validate_containers: true,
enable_fixity_checks: true,
fixity_check_interval_days: 90,
auto_quarantine: false,
parallel_threads: num_cpus::get(),
database_path: PathBuf::from("archive_verification.db"),
quarantine_dir: PathBuf::from("quarantine"),
enable_premis_logging: true,
enable_bagit: false,
}
}
}
pub struct ArchiveVerifier {
config: VerificationConfig,
db_pool: Option<sqlx::SqlitePool>,
}
impl ArchiveVerifier {
pub fn new() -> Self {
Self {
config: VerificationConfig::default(),
db_pool: None,
}
}
pub fn with_config(config: VerificationConfig) -> Self {
Self {
config,
db_pool: None,
}
}
pub async fn initialize(&mut self) -> ArchiveResult<()> {
let db_url = format!("sqlite:{}", self.config.database_path.display());
let pool = sqlx::SqlitePool::connect(&db_url).await?;
self.create_tables(&pool).await?;
self.db_pool = Some(pool);
Ok(())
}
async fn create_tables(&self, pool: &sqlx::SqlitePool) -> ArchiveResult<()> {
sqlx::query(
r"
CREATE TABLE IF NOT EXISTS checksums (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_path TEXT NOT NULL,
file_size INTEGER NOT NULL,
blake3 TEXT,
md5 TEXT,
sha256 TEXT,
crc32 TEXT,
created_at TEXT NOT NULL,
last_verified_at TEXT
)
",
)
.execute(pool)
.await?;
sqlx::query(
r"
CREATE TABLE IF NOT EXISTS fixity_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_path TEXT NOT NULL,
check_time TEXT NOT NULL,
status TEXT NOT NULL,
error_message TEXT,
blake3_match BOOLEAN,
md5_match BOOLEAN,
sha256_match BOOLEAN,
crc32_match BOOLEAN
)
",
)
.execute(pool)
.await?;
sqlx::query(
r"
CREATE TABLE IF NOT EXISTS premis_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
event_id TEXT NOT NULL UNIQUE,
event_type TEXT NOT NULL,
event_date_time TEXT NOT NULL,
event_detail TEXT,
event_outcome TEXT NOT NULL,
event_outcome_detail TEXT,
linking_object_id TEXT NOT NULL
)
",
)
.execute(pool)
.await?;
sqlx::query(
r"
CREATE TABLE IF NOT EXISTS quarantine_records (
id INTEGER PRIMARY KEY AUTOINCREMENT,
original_path TEXT NOT NULL,
quarantine_path TEXT NOT NULL,
quarantine_date TEXT NOT NULL,
reason TEXT NOT NULL,
checksum_before TEXT,
auto_quarantine BOOLEAN NOT NULL,
restored BOOLEAN DEFAULT 0,
restore_date TEXT
)
",
)
.execute(pool)
.await?;
Ok(())
}
pub fn config(&self) -> &VerificationConfig {
&self.config
}
pub fn config_mut(&mut self) -> &mut VerificationConfig {
&mut self.config
}
pub fn db_pool(&self) -> Option<&sqlx::SqlitePool> {
self.db_pool.as_ref()
}
pub async fn verify_file(&self, path: &Path) -> ArchiveResult<VerificationResult> {
let mut result = VerificationResult {
file_path: path.to_path_buf(),
verified_at: Utc::now(),
status: VerificationStatus::Success,
checksums: ChecksumSet::default(),
validation_errors: Vec::new(),
fixity_status: None,
};
if self.config.enable_blake3
|| self.config.enable_md5
|| self.config.enable_sha256
|| self.config.enable_crc32
{
result.checksums = checksum::compute_checksums(path, &self.config).await?;
}
if self.config.validate_containers {
if let Err(e) = validate::validate_file(path).await {
result
.validation_errors
.push(format!("Validation error: {e}"));
result.status = VerificationStatus::ValidationFailed;
}
}
if self.config.enable_fixity_checks {
if let Some(pool) = self.db_pool.as_ref() {
let fixity_result = fixity::check_fixity(path, &result.checksums, pool).await?;
result.fixity_status = Some(fixity_result);
}
}
Ok(result)
}
pub async fn verify_files(&self, paths: &[PathBuf]) -> ArchiveResult<Vec<VerificationResult>> {
let mut results = Vec::new();
for path in paths {
let result = self.verify_file(path).await?;
results.push(result);
}
Ok(results)
}
pub async fn run_fixity_checks(&self) -> ArchiveResult<fixity::FixityReport> {
if let Some(pool) = &self.db_pool {
fixity::run_scheduled_checks(pool, &self.config).await
} else {
Err(ArchiveError::Config("Database not initialized".to_string()))
}
}
pub async fn generate_report(
&self,
format: report::ReportFormat,
output_path: &Path,
) -> ArchiveResult<()> {
if let Some(pool) = &self.db_pool {
report::generate_report(pool, format, output_path).await
} else {
Err(ArchiveError::Config("Database not initialized".to_string()))
}
}
}
impl Default for ArchiveVerifier {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ChecksumSet {
pub blake3: Option<String>,
pub md5: Option<String>,
pub sha256: Option<String>,
pub crc32: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerificationResult {
pub file_path: PathBuf,
pub verified_at: DateTime<Utc>,
pub status: VerificationStatus,
pub checksums: ChecksumSet,
pub validation_errors: Vec<String>,
pub fixity_status: Option<fixity::FixityStatus>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum VerificationStatus {
Success,
ChecksumMismatch,
ValidationFailed,
Corrupted,
Quarantined,
}
#[cfg(not(target_env = "msvc"))]
mod num_cpus {
pub fn get() -> usize {
std::thread::available_parallelism()
.map(std::num::NonZero::get)
.unwrap_or(4)
}
}
#[cfg(target_env = "msvc")]
mod num_cpus {
pub fn get() -> usize {
std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4)
}
}