dj 1.0.0

CLI-first backup solution with content-addressable storage
Documentation
use crate::{Result, BLAKE3_HASH_SIZE};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;

pub type Hash = [u8; BLAKE3_HASH_SIZE];

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct HashId(pub Hash);

impl HashId {
    pub fn new(data: &[u8]) -> Self {
        let hash = blake3::hash(data);
        Self(hash.into())
    }

    pub fn from_hex(hex: &str) -> Result<Self> {
        let bytes = hex::decode(hex).map_err(|_| crate::Error::InvalidHash {
            hash: hex.to_string(),
        })?;
        if bytes.len() != BLAKE3_HASH_SIZE {
            return Err(crate::Error::InvalidHash {
                hash: hex.to_string(),
            });
        }
        let mut hash = [0u8; BLAKE3_HASH_SIZE];
        hash.copy_from_slice(&bytes);
        Ok(Self(hash))
    }

    pub fn to_hex(&self) -> String {
        hex::encode(self.0)
    }

    pub fn as_bytes(&self) -> &[u8] {
        &self.0
    }
}

impl std::fmt::Display for HashId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.to_hex())
    }
}

impl std::str::FromStr for HashId {
    type Err = crate::Error;

    fn from_str(s: &str) -> Result<Self> {
        Self::from_hex(s)
    }
}

impl std::cmp::Ord for HashId {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.0.cmp(&other.0)
    }
}

impl std::cmp::PartialOrd for HashId {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Blob {
    pub id: HashId,
    pub size: u64,
    pub data: Vec<u8>,
}

impl Blob {
    pub fn new(data: Vec<u8>) -> Self {
        let id = HashId::new(&data);
        let size = data.len() as u64;
        Self { id, size, data }
    }

    pub fn verify(&self) -> bool {
        self.id == HashId::new(&self.data)
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TreeEntry {
    File {
        name: String,
        size: u64,
        chunks: Vec<HashId>,
        mode: u32,
        mtime: DateTime<Utc>,
    },
    Directory {
        name: String,
        tree: HashId,
        mode: u32,
        mtime: DateTime<Utc>,
    },
    Symlink {
        name: String,
        target: String,
        mtime: DateTime<Utc>,
    },
}

impl TreeEntry {
    pub fn name(&self) -> &str {
        match self {
            TreeEntry::File { name, .. } => name,
            TreeEntry::Directory { name, .. } => name,
            TreeEntry::Symlink { name, .. } => name,
        }
    }

    pub fn mtime(&self) -> DateTime<Utc> {
        match self {
            TreeEntry::File { mtime, .. } => *mtime,
            TreeEntry::Directory { mtime, .. } => *mtime,
            TreeEntry::Symlink { mtime, .. } => *mtime,
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tree {
    pub id: HashId,
    pub entries: Vec<TreeEntry>,
}

impl Tree {
    pub fn new(entries: Vec<TreeEntry>) -> Self {
        let data = serde_json::to_vec(&entries).unwrap();
        let id = HashId::new(&data);
        Self { id, entries }
    }

    pub fn find_entry(&self, name: &str) -> Option<&TreeEntry> {
        self.entries.iter().find(|entry| entry.name() == name)
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Snapshot {
    pub id: Uuid,
    pub time: DateTime<Utc>,
    pub tree: HashId,
    pub paths: Vec<String>,
    pub hostname: String,
    pub username: String,
    pub tags: Vec<String>,
    pub parent: Option<HashId>,
    pub summary: SnapshotSummary,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SnapshotSummary {
    pub files_new: u64,
    pub files_changed: u64,
    pub files_unmodified: u64,
    pub dirs_new: u64,
    pub dirs_changed: u64,
    pub dirs_unmodified: u64,
    pub data_blobs: u64,
    pub tree_blobs: u64,
    pub data_added: u64,
    pub total_files_processed: u64,
    pub total_bytes_processed: u64,
    pub total_duration: std::time::Duration,
}

impl Default for SnapshotSummary {
    fn default() -> Self {
        Self {
            files_new: 0,
            files_changed: 0,
            files_unmodified: 0,
            dirs_new: 0,
            dirs_changed: 0,
            dirs_unmodified: 0,
            data_blobs: 0,
            tree_blobs: 0,
            data_added: 0,
            total_files_processed: 0,
            total_bytes_processed: 0,
            total_duration: std::time::Duration::from_secs(0),
        }
    }
}

impl Snapshot {
    pub fn new(
        tree: HashId,
        paths: Vec<String>,
        hostname: String,
        username: String,
        tags: Vec<String>,
        parent: Option<HashId>,
    ) -> Self {
        Self {
            id: Uuid::new_v4(),
            time: Utc::now(),
            tree,
            paths,
            hostname,
            username,
            tags,
            parent,
            summary: SnapshotSummary::default(),
        }
    }

    pub fn matches_tags(&self, filter_tags: &[String]) -> bool {
        if filter_tags.is_empty() {
            return true;
        }
        filter_tags.iter().all(|tag| self.tags.contains(tag))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PackfileHeader {
    pub version: u32,
    pub entries: Vec<PackfileEntry>,
    pub total_size: u64,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PackfileEntry {
    pub hash: HashId,
    pub offset: u64,
    pub length: u64,
    pub uncompressed_length: u64,
    pub object_type: ObjectType,
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ObjectType {
    Blob,
    Tree,
    Snapshot,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Packfile {
    pub id: HashId,
    pub header: PackfileHeader,
    pub data: Vec<u8>,
}

impl Packfile {
    pub fn new(entries: Vec<(HashId, Vec<u8>, ObjectType)>) -> Result<Self> {
        let mut packfile_entries = Vec::new();
        let mut data = Vec::new();
        let mut current_offset = 0;

        for (hash, object_data, object_type) in entries {
            let uncompressed_length = object_data.len() as u64;
            let compressed = zstd::encode_all(&object_data[..], 3)?;
            let length = compressed.len() as u64;

            packfile_entries.push(PackfileEntry {
                hash,
                offset: current_offset,
                length,
                uncompressed_length,
                object_type,
            });

            data.extend_from_slice(&compressed);
            current_offset += length;
        }

        let header = PackfileHeader {
            version: crate::INDEX_VERSION,
            entries: packfile_entries,
            total_size: current_offset,
        };

        let header_bytes = serde_json::to_vec(&header)?;
        let mut packfile_data = Vec::new();
        packfile_data.extend_from_slice(&(header_bytes.len() as u32).to_le_bytes());
        packfile_data.extend_from_slice(&header_bytes);
        packfile_data.extend_from_slice(&data);

        let id = HashId::new(&packfile_data);

        Ok(Self {
            id,
            header,
            data: packfile_data,
        })
    }

    pub fn extract_object(&self, entry: &PackfileEntry) -> Result<Vec<u8>> {
        let header_len_bytes = 4;
        let header_bytes_len =
            u32::from_le_bytes([self.data[0], self.data[1], self.data[2], self.data[3]]) as usize;
        let data_start = header_len_bytes + header_bytes_len;

        let start = data_start + entry.offset as usize;
        let end = start + entry.length as usize;

        if end > self.data.len() {
            return Err(crate::Error::corrupted_data(
                "Packfile entry extends beyond data",
            ));
        }

        let compressed_data = &self.data[start..end];
        let decompressed = zstd::decode_all(compressed_data)?;

        if decompressed.len() != entry.uncompressed_length as usize {
            return Err(crate::Error::corrupted_data("Decompressed length mismatch"));
        }

        Ok(decompressed)
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct IndexEntry {
    pub hash: HashId,
    pub packfile_id: HashId,
    pub offset: u64,
    pub length: u64,
    pub object_type: ObjectType,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Index {
    pub version: u32,
    pub entries: HashMap<String, IndexEntry>,
    pub packfiles: Vec<HashId>,
}

impl Default for Index {
    fn default() -> Self {
        Self::new()
    }
}

impl Index {
    pub fn new() -> Self {
        Self {
            version: crate::INDEX_VERSION,
            entries: HashMap::new(),
            packfiles: Vec::new(),
        }
    }

    pub fn add_packfile(&mut self, packfile: &Packfile) {
        self.packfiles.push(packfile.id);

        for entry in &packfile.header.entries {
            self.entries.insert(
                entry.hash.to_hex(),
                IndexEntry {
                    hash: entry.hash,
                    packfile_id: packfile.id,
                    offset: entry.offset,
                    length: entry.length,
                    object_type: entry.object_type,
                },
            );
        }
    }

    pub fn contains(&self, hash: &HashId) -> bool {
        self.entries.contains_key(&hash.to_hex())
    }

    pub fn get(&self, hash: &HashId) -> Option<&IndexEntry> {
        self.entries.get(&hash.to_hex())
    }
}