dx_forge/storage/
blob.rs

1/// Blob Storage System for Cloudflare R2
2///
3/// This module provides efficient binary blob storage using FlatBuffers for serialization.
4/// All file content and metadata are stored as binary blobs in R2, making it faster and
5/// more cost-effective than traditional Git storage.
6use anyhow::{Context, Result};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9use std::path::{Path, PathBuf};
10use tokio::fs;
11
12/// Blob metadata
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct BlobMetadata {
15    /// SHA-256 hash of the blob content
16    pub hash: String,
17
18    /// Original file path
19    pub path: String,
20
21    /// Blob size in bytes
22    pub size: u64,
23
24    /// MIME type
25    pub mime_type: String,
26
27    /// Creation timestamp
28    pub created_at: chrono::DateTime<chrono::Utc>,
29
30    /// Compression algorithm used (if any)
31    pub compression: Option<String>,
32}
33
34/// Binary blob representation
35#[derive(Debug)]
36pub struct Blob {
37    pub metadata: BlobMetadata,
38    pub content: Vec<u8>,
39}
40
41impl Blob {
42    /// Create a new blob from file content
43    pub async fn from_file(path: &Path) -> Result<Self> {
44        let content = fs::read(path).await.context("Failed to read file")?;
45
46        let hash = compute_hash(&content);
47        let size = content.len() as u64;
48        let mime_type = detect_mime_type(path);
49
50        let metadata = BlobMetadata {
51            hash: hash.clone(),
52            path: path.display().to_string(),
53            size,
54            mime_type,
55            created_at: chrono::Utc::now(),
56            compression: None,
57        };
58
59        Ok(Self { metadata, content })
60    }
61
62    /// Create blob from raw content
63    pub fn from_content(path: &str, content: Vec<u8>) -> Self {
64        let hash = compute_hash(&content);
65        let size = content.len() as u64;
66        let mime_type = detect_mime_type_from_path(path);
67
68        let metadata = BlobMetadata {
69            hash: hash.clone(),
70            path: path.to_string(),
71            size,
72            mime_type,
73            created_at: chrono::Utc::now(),
74            compression: None,
75        };
76
77        Self { metadata, content }
78    }
79
80    /// Serialize blob to binary format
81    pub fn to_binary(&self) -> Result<Vec<u8>> {
82        // Simple binary format:
83        // [metadata_len: u32][metadata_json][content]
84
85        let metadata_json = serde_json::to_vec(&self.metadata)?;
86        let metadata_len = metadata_json.len() as u32;
87
88        let mut binary = Vec::with_capacity(4 + metadata_json.len() + self.content.len());
89        binary.extend_from_slice(&metadata_len.to_le_bytes());
90        binary.extend_from_slice(&metadata_json);
91        binary.extend_from_slice(&self.content);
92
93        Ok(binary)
94    }
95
96    /// Deserialize blob from binary format
97    pub fn from_binary(binary: &[u8]) -> Result<Self> {
98        if binary.len() < 4 {
99            anyhow::bail!("Invalid blob: too short");
100        }
101
102        let metadata_len =
103            u32::from_le_bytes([binary[0], binary[1], binary[2], binary[3]]) as usize;
104
105        if binary.len() < 4 + metadata_len {
106            anyhow::bail!("Invalid blob: metadata truncated");
107        }
108
109        let metadata_json = &binary[4..4 + metadata_len];
110        let metadata: BlobMetadata = serde_json::from_slice(metadata_json)?;
111
112        let content = binary[4 + metadata_len..].to_vec();
113
114        Ok(Self { metadata, content })
115    }
116
117    /// Compress blob content using LZ4
118    pub fn compress(&mut self) -> Result<()> {
119        if self.metadata.compression.is_some() {
120            return Ok(()); // Already compressed
121        }
122
123        let compressed = lz4::block::compress(&self.content, None, false)?;
124
125        // Only use compression if it actually reduces size
126        if compressed.len() < self.content.len() {
127            self.content = compressed;
128            self.metadata.compression = Some("lz4".to_string());
129            self.metadata.size = self.content.len() as u64;
130        }
131
132        Ok(())
133    }
134
135    /// Decompress blob content
136    pub fn decompress(&mut self) -> Result<()> {
137        if self.metadata.compression.is_none() {
138            return Ok(()); // Not compressed
139        }
140
141        let decompressed = lz4::block::decompress(&self.content, None)?;
142        self.content = decompressed;
143        self.metadata.compression = None;
144        self.metadata.size = self.content.len() as u64;
145
146        Ok(())
147    }
148
149    /// Get blob hash (content-addressable)
150    pub fn hash(&self) -> &str {
151        &self.metadata.hash
152    }
153}
154
155/// Compute SHA-256 hash of content
156fn compute_hash(content: &[u8]) -> String {
157    let mut hasher = Sha256::new();
158    hasher.update(content);
159    format!("{:x}", hasher.finalize())
160}
161
162/// Detect MIME type from file path
163fn detect_mime_type(path: &Path) -> String {
164    detect_mime_type_from_path(&path.display().to_string())
165}
166
167/// Detect MIME type from path string
168fn detect_mime_type_from_path(path: &str) -> String {
169    let path_lower = path.to_lowercase();
170
171    if path_lower.ends_with(".rs") {
172        "text/x-rust".to_string()
173    } else if path_lower.ends_with(".js") || path_lower.ends_with(".mjs") {
174        "text/javascript".to_string()
175    } else if path_lower.ends_with(".ts") {
176        "text/typescript".to_string()
177    } else if path_lower.ends_with(".tsx") {
178        "text/tsx".to_string()
179    } else if path_lower.ends_with(".json") {
180        "application/json".to_string()
181    } else if path_lower.ends_with(".md") {
182        "text/markdown".to_string()
183    } else if path_lower.ends_with(".html") {
184        "text/html".to_string()
185    } else if path_lower.ends_with(".css") {
186        "text/css".to_string()
187    } else if path_lower.ends_with(".toml") {
188        "application/toml".to_string()
189    } else if path_lower.ends_with(".yaml") || path_lower.ends_with(".yml") {
190        "application/yaml".to_string()
191    } else {
192        "application/octet-stream".to_string()
193    }
194}
195
196/// Blob repository for local caching
197pub struct BlobRepository {
198    cache_dir: PathBuf,
199}
200
201impl BlobRepository {
202    /// Create new blob repository
203    pub fn new(forge_dir: &Path) -> Result<Self> {
204        let cache_dir = forge_dir.join("blobs");
205        std::fs::create_dir_all(&cache_dir)?;
206
207        Ok(Self { cache_dir })
208    }
209
210    /// Store blob locally
211    pub async fn store_local(&self, blob: &Blob) -> Result<()> {
212        let hash = blob.hash();
213        let blob_path = self.get_blob_path(hash);
214
215        // Create directory structure (first 2 chars of hash)
216        if let Some(parent) = blob_path.parent() {
217            fs::create_dir_all(parent).await?;
218        }
219
220        let binary = blob.to_binary()?;
221        fs::write(&blob_path, binary).await?;
222
223        Ok(())
224    }
225
226    /// Load blob from local cache
227    pub async fn load_local(&self, hash: &str) -> Result<Blob> {
228        let blob_path = self.get_blob_path(hash);
229        let binary = fs::read(&blob_path)
230            .await
231            .context("Blob not found in cache")?;
232
233        Blob::from_binary(&binary)
234    }
235
236    /// Check if blob exists locally
237    pub async fn exists_local(&self, hash: &str) -> bool {
238        self.get_blob_path(hash).exists()
239    }
240
241    /// Get blob storage path (content-addressable)
242    fn get_blob_path(&self, hash: &str) -> PathBuf {
243        // Store blobs like Git: .dx/forge/blobs/ab/cdef1234...
244        let prefix = &hash[..2];
245        let suffix = &hash[2..];
246        self.cache_dir.join(prefix).join(suffix)
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[tokio::test]
255    async fn test_blob_serialization() {
256        let content = b"Hello, world!".to_vec();
257        let blob = Blob::from_content("test.txt", content.clone());
258
259        let binary = blob.to_binary().unwrap();
260        let restored = Blob::from_binary(&binary).unwrap();
261
262        assert_eq!(blob.metadata.hash, restored.metadata.hash);
263        assert_eq!(blob.content, restored.content);
264        assert_eq!(blob.metadata.path, restored.metadata.path);
265    }
266
267    #[tokio::test]
268    async fn test_blob_compression() {
269        let content = b"Hello, world! ".repeat(1000);
270        let mut blob = Blob::from_content("test.txt", content.clone());
271
272        let original_size = blob.metadata.size;
273        blob.compress().unwrap();
274        let compressed_size = blob.metadata.size;
275
276        assert!(compressed_size < original_size);
277        assert_eq!(blob.metadata.compression, Some("lz4".to_string()));
278
279        blob.decompress().unwrap();
280        assert_eq!(blob.content, content);
281        assert_eq!(blob.metadata.compression, None);
282    }
283}