eclipse-sanitizer 0.1.1

A fast Rust CLI for sanitizing metadata from documents and images
use std::path::{Path, PathBuf};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileKind {
    Pdf,
    OfficeOpenXml,
    Png,
    Jpeg,
}

impl FileKind {
    pub fn from_path(path: &Path) -> Option<Self> {
        let extension = path.extension()?.to_string_lossy().to_ascii_lowercase();
        match extension.as_str() {
            "pdf" => Some(Self::Pdf),
            "docx" | "docm" | "dotx" | "dotm" | "xlsx" | "xlsm" | "xltx" | "xltm" | "pptx"
            | "pptm" | "potx" | "potm" => Some(Self::OfficeOpenXml),
            "png" => Some(Self::Png),
            "jpg" | "jpeg" => Some(Self::Jpeg),
            _ => None,
        }
    }

    pub fn supported_extensions() -> &'static [&'static str] {
        &[
            "pdf", "docx", "docm", "dotx", "dotm", "xlsx", "xlsm", "xltx", "xltm", "pptx", "pptm",
            "potx", "potm", "png", "jpg", "jpeg",
        ]
    }

    pub fn label(self) -> &'static str {
        match self {
            Self::Pdf => "PDF",
            Self::OfficeOpenXml => "OOXML",
            Self::Png => "PNG",
            Self::Jpeg => "JPEG",
        }
    }
}

#[derive(Debug, Clone)]
pub struct FileTask {
    pub source: PathBuf,
    pub destination: PathBuf,
    pub kind: FileKind,
}

#[derive(Debug, Clone, Default)]
pub struct SanitizationPlan {
    pub removed_items: Vec<String>,
}

impl SanitizationPlan {
    pub fn is_empty(&self) -> bool {
        self.removed_items.is_empty()
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileStatus {
    DryRun,
    Sanitized,
    Failed,
    Interrupted,
}

#[derive(Debug, Clone)]
pub struct FileReport {
    pub source: PathBuf,
    pub destination: PathBuf,
    pub kind: FileKind,
    pub status: FileStatus,
    pub original_hash: Option<String>,
    pub sanitized_hash: Option<String>,
    pub removed_items: Vec<String>,
    pub error: Option<String>,
}

impl FileReport {
    pub fn dry_run(
        source: PathBuf,
        destination: PathBuf,
        kind: FileKind,
        original_hash: String,
        removed_items: Vec<String>,
    ) -> Self {
        Self {
            source,
            destination,
            kind,
            status: FileStatus::DryRun,
            original_hash: Some(original_hash),
            sanitized_hash: None,
            removed_items,
            error: None,
        }
    }

    pub fn sanitized(
        source: PathBuf,
        destination: PathBuf,
        kind: FileKind,
        original_hash: String,
        sanitized_hash: String,
        removed_items: Vec<String>,
    ) -> Self {
        Self {
            source,
            destination,
            kind,
            status: FileStatus::Sanitized,
            original_hash: Some(original_hash),
            sanitized_hash: Some(sanitized_hash),
            removed_items,
            error: None,
        }
    }

    pub fn failed(
        source: PathBuf,
        destination: PathBuf,
        kind: FileKind,
        original_hash: Option<String>,
        error: String,
    ) -> Self {
        Self {
            source,
            destination,
            kind,
            status: FileStatus::Failed,
            original_hash,
            sanitized_hash: None,
            removed_items: Vec::new(),
            error: Some(error),
        }
    }

    pub fn interrupted(
        source: PathBuf,
        destination: PathBuf,
        kind: FileKind,
        original_hash: Option<String>,
    ) -> Self {
        Self {
            source,
            destination,
            kind,
            status: FileStatus::Interrupted,
            original_hash,
            sanitized_hash: None,
            removed_items: Vec::new(),
            error: Some("processing interrupted".to_string()),
        }
    }
}

#[derive(Debug, Default, Clone)]
pub struct RunSummary {
    pub discovered: usize,
    pub skipped_unsupported: usize,
    pub dry_run: usize,
    pub sanitized: usize,
    pub failed: usize,
    pub interrupted: bool,
}

impl RunSummary {
    pub fn from_reports(reports: &[FileReport], skipped_unsupported: usize) -> Self {
        let mut summary = Self {
            discovered: reports.len(),
            skipped_unsupported,
            ..Self::default()
        };

        for report in reports {
            match report.status {
                FileStatus::DryRun => summary.dry_run += 1,
                FileStatus::Sanitized => summary.sanitized += 1,
                FileStatus::Failed => summary.failed += 1,
                FileStatus::Interrupted => summary.interrupted = true,
            }
        }

        summary
    }
}

#[derive(Debug, Clone)]
pub struct DiscoveryResult {
    pub tasks: Vec<FileTask>,
    pub skipped_unsupported: usize,
}