Skip to main content

embeddenator_fs/fs/versioned/
chunk.rs

1//! Versioned chunk data structure
2//!
3//! A chunk is a fixed-size block of file data encoded as a SparseVec. This module
4//! implements versioning for chunks to enable concurrent access with optimistic locking.
5
6use crate::SparseVec;
7use std::fmt;
8use std::sync::atomic::{AtomicU32, Ordering};
9use std::sync::Arc;
10use std::time::Instant;
11
12/// A versioned chunk with metadata
13///
14/// Chunks are immutable once created - the SparseVec is wrapped in Arc for
15/// zero-copy sharing across threads. Each update creates a new VersionedChunk
16/// with an incremented version number.
17#[derive(Clone)]
18pub struct VersionedChunk {
19    /// The actual VSA-encoded chunk data (immutable, shared)
20    pub vector: Arc<SparseVec>,
21
22    /// Version number of this chunk (local to the chunk)
23    pub version: u64,
24
25    /// When this chunk was first created
26    pub created_at: Instant,
27
28    /// When this chunk was last modified
29    pub modified_at: Instant,
30
31    /// Reference count - how many files reference this chunk
32    /// Used for garbage collection and deduplication tracking
33    pub ref_count: Arc<AtomicU32>,
34
35    /// Size of the original data in bytes (before VSA encoding)
36    pub original_size: usize,
37
38    /// Content hash for deduplication (first 8 bytes of SHA256)
39    pub content_hash: [u8; 8],
40}
41
42impl VersionedChunk {
43    /// Create a new versioned chunk
44    pub fn new(vector: SparseVec, original_size: usize, content_hash: [u8; 8]) -> Self {
45        let now = Instant::now();
46        Self {
47            vector: Arc::new(vector),
48            version: 0,
49            created_at: now,
50            modified_at: now,
51            ref_count: Arc::new(AtomicU32::new(1)),
52            original_size,
53            content_hash,
54        }
55    }
56
57    /// Create a new version of this chunk with updated data
58    pub fn update(&self, new_vector: SparseVec, new_hash: [u8; 8]) -> Self {
59        Self {
60            vector: Arc::new(new_vector),
61            version: self.version + 1,
62            created_at: self.created_at,
63            modified_at: Instant::now(),
64            ref_count: Arc::new(AtomicU32::new(1)),
65            original_size: self.original_size,
66            content_hash: new_hash,
67        }
68    }
69
70    /// Increment the reference count
71    pub fn inc_ref(&self) {
72        self.ref_count.fetch_add(1, Ordering::AcqRel);
73    }
74
75    /// Decrement the reference count and return the new value
76    pub fn dec_ref(&self) -> u32 {
77        self.ref_count
78            .fetch_sub(1, Ordering::AcqRel)
79            .saturating_sub(1)
80    }
81
82    /// Get the current reference count
83    pub fn ref_count(&self) -> u32 {
84        self.ref_count.load(Ordering::Acquire)
85    }
86
87    /// Check if this chunk can be garbage collected
88    pub fn is_unreferenced(&self) -> bool {
89        self.ref_count() == 0
90    }
91
92    /// Get the age of this chunk
93    pub fn age(&self) -> std::time::Duration {
94        Instant::now().duration_since(self.created_at)
95    }
96
97    /// Get time since last modification
98    pub fn time_since_modification(&self) -> std::time::Duration {
99        Instant::now().duration_since(self.modified_at)
100    }
101}
102
103impl fmt::Debug for VersionedChunk {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        f.debug_struct("VersionedChunk")
106            .field("version", &self.version)
107            .field("original_size", &self.original_size)
108            .field("ref_count", &self.ref_count())
109            .field("content_hash", &format!("{:02x?}", &self.content_hash))
110            .field("age_ms", &self.age().as_millis())
111            .finish()
112    }
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    #[test]
120    fn test_chunk_creation() {
121        let vec = SparseVec::new();
122        let chunk = VersionedChunk::new(vec, 4096, [1, 2, 3, 4, 5, 6, 7, 8]);
123
124        assert_eq!(chunk.version, 0);
125        assert_eq!(chunk.original_size, 4096);
126        assert_eq!(chunk.ref_count(), 1);
127        assert!(!chunk.is_unreferenced());
128    }
129
130    #[test]
131    fn test_chunk_update() {
132        let vec1 = SparseVec::new();
133        let chunk1 = VersionedChunk::new(vec1, 4096, [1, 2, 3, 4, 5, 6, 7, 8]);
134
135        let vec2 = SparseVec::new();
136        let chunk2 = chunk1.update(vec2, [9, 10, 11, 12, 13, 14, 15, 16]);
137
138        assert_eq!(chunk2.version, 1);
139        assert_eq!(chunk2.created_at, chunk1.created_at);
140        assert!(chunk2.modified_at >= chunk1.modified_at);
141    }
142
143    #[test]
144    fn test_reference_counting() {
145        let vec = SparseVec::new();
146        let chunk = VersionedChunk::new(vec, 4096, [0; 8]);
147
148        assert_eq!(chunk.ref_count(), 1);
149
150        chunk.inc_ref();
151        assert_eq!(chunk.ref_count(), 2);
152
153        chunk.inc_ref();
154        assert_eq!(chunk.ref_count(), 3);
155
156        let count = chunk.dec_ref();
157        assert_eq!(count, 2);
158
159        let count = chunk.dec_ref();
160        assert_eq!(count, 1);
161
162        let count = chunk.dec_ref();
163        assert_eq!(count, 0);
164        assert!(chunk.is_unreferenced());
165    }
166}