Skip to main content

rustrails_storage/
blob.rs

1//! Immutable file metadata records.
2
3use std::{collections::BTreeMap, fmt};
4
5use bytes::Bytes;
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use thiserror::Error;
10use uuid::Uuid;
11
12use crate::{detect_content_type, file_extension, sha256_hex, variant::Variant};
13
14/// Errors returned while building a [`Blob`].
15#[derive(Debug, Error, Clone, PartialEq, Eq)]
16pub enum BlobError {
17    /// The filename was empty.
18    #[error("filename must not be empty")]
19    EmptyFilename,
20    /// The blob key was empty.
21    #[error("blob key must not be empty")]
22    EmptyKey,
23    /// The supplied byte size does not match the payload size.
24    #[error("byte size mismatch: expected {expected}, actual {actual}")]
25    ByteSizeMismatch { expected: u64, actual: u64 },
26}
27
28/// Immutable metadata describing an uploaded file.
29#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
30pub struct Blob {
31    id: Uuid,
32    key: String,
33    filename: String,
34    content_type: Option<String>,
35    byte_size: u64,
36    checksum: String,
37    metadata: BTreeMap<String, Value>,
38    service_name: String,
39    created_at: DateTime<Utc>,
40}
41
42impl Blob {
43    /// Builds a blob from in-memory data and computes its checksum.
44    ///
45    /// # Errors
46    ///
47    /// Returns an error when the filename is empty.
48    pub fn create(
49        data: Bytes,
50        filename: impl Into<String>,
51        content_type: Option<&str>,
52        metadata: BTreeMap<String, Value>,
53        service_name: impl Into<String>,
54    ) -> Result<Self, BlobError> {
55        Self::create_with_key(
56            Self::generate_key(),
57            data,
58            filename,
59            content_type,
60            metadata,
61            service_name,
62        )
63    }
64
65    /// Builds a blob with an explicit key.
66    ///
67    /// # Errors
68    ///
69    /// Returns an error when the key or filename is empty.
70    pub fn create_with_key(
71        key: impl Into<String>,
72        data: Bytes,
73        filename: impl Into<String>,
74        content_type: Option<&str>,
75        metadata: BTreeMap<String, Value>,
76        service_name: impl Into<String>,
77    ) -> Result<Self, BlobError> {
78        let key = key.into();
79        if key.trim().is_empty() {
80            return Err(BlobError::EmptyKey);
81        }
82        let filename = filename.into();
83        if filename.trim().is_empty() {
84            return Err(BlobError::EmptyFilename);
85        }
86        Ok(Self {
87            id: Uuid::now_v7(),
88            key,
89            content_type: detect_content_type(&filename, content_type),
90            byte_size: data.len() as u64,
91            checksum: Self::checksum_for(&data),
92            filename,
93            metadata,
94            service_name: service_name.into(),
95            created_at: Utc::now(),
96        })
97    }
98
99    /// Builds a blob record for direct uploads before the bytes have been stored.
100    ///
101    /// # Errors
102    ///
103    /// Returns an error when the key or filename is empty.
104    pub fn create_before_direct_upload(
105        key: impl Into<String>,
106        filename: impl Into<String>,
107        byte_size: u64,
108        checksum: impl Into<String>,
109        content_type: Option<&str>,
110        metadata: BTreeMap<String, Value>,
111        service_name: impl Into<String>,
112    ) -> Result<Self, BlobError> {
113        let key = key.into();
114        if key.trim().is_empty() {
115            return Err(BlobError::EmptyKey);
116        }
117        let filename = filename.into();
118        if filename.trim().is_empty() {
119            return Err(BlobError::EmptyFilename);
120        }
121        Ok(Self {
122            id: Uuid::now_v7(),
123            key,
124            content_type: detect_content_type(&filename, content_type),
125            byte_size,
126            checksum: checksum.into(),
127            filename,
128            metadata,
129            service_name: service_name.into(),
130            created_at: Utc::now(),
131        })
132    }
133
134    /// Composes several blobs into a single logical blob.
135    ///
136    /// # Errors
137    ///
138    /// Returns an error when the filename is empty.
139    pub fn compose(
140        blobs: &[Self],
141        filename: impl Into<String>,
142        service_name: impl Into<String>,
143    ) -> Result<Self, BlobError> {
144        let filename = filename.into();
145        if filename.trim().is_empty() {
146            return Err(BlobError::EmptyFilename);
147        }
148        let content_type = blobs.iter().find_map(|blob| blob.content_type.clone());
149        let byte_size = blobs.iter().map(|blob| blob.byte_size).sum();
150        let mut metadata = BTreeMap::new();
151        metadata.insert("composed".to_owned(), Value::Bool(true));
152        metadata.insert("parts".to_owned(), Value::from(blobs.len() as u64));
153        Ok(Self {
154            id: Uuid::now_v7(),
155            key: Self::generate_key(),
156            filename,
157            content_type,
158            byte_size,
159            checksum: String::new(),
160            metadata,
161            service_name: service_name.into(),
162            created_at: Utc::now(),
163        })
164    }
165
166    /// Generates a variant request for this blob.
167    #[must_use]
168    pub fn variant(&self, transformations: BTreeMap<String, Value>) -> Variant {
169        Variant::new(self.clone(), transformations)
170    }
171
172    /// Returns the blob identifier.
173    #[must_use]
174    pub fn id(&self) -> Uuid {
175        self.id
176    }
177
178    /// Returns the storage key.
179    #[must_use]
180    pub fn key(&self) -> &str {
181        &self.key
182    }
183
184    /// Returns the original filename.
185    #[must_use]
186    pub fn filename(&self) -> &str {
187        &self.filename
188    }
189
190    /// Returns the detected content type.
191    #[must_use]
192    pub fn content_type(&self) -> Option<&str> {
193        self.content_type.as_deref()
194    }
195
196    /// Returns the recorded size in bytes.
197    #[must_use]
198    pub fn byte_size(&self) -> u64 {
199        self.byte_size
200    }
201
202    /// Returns the SHA-256 checksum encoded as lowercase hexadecimal.
203    #[must_use]
204    pub fn checksum(&self) -> &str {
205        &self.checksum
206    }
207
208    /// Returns custom metadata.
209    #[must_use]
210    pub fn metadata(&self) -> &BTreeMap<String, Value> {
211        &self.metadata
212    }
213
214    /// Returns the storage service name.
215    #[must_use]
216    pub fn service_name(&self) -> &str {
217        &self.service_name
218    }
219
220    /// Returns the creation timestamp.
221    #[must_use]
222    pub fn created_at(&self) -> DateTime<Utc> {
223        self.created_at
224    }
225
226    /// Returns the file extension, if present.
227    #[must_use]
228    pub fn extension(&self) -> Option<&str> {
229        file_extension(&self.filename)
230    }
231
232    /// Returns whether the blob can be treated as an image.
233    #[must_use]
234    pub fn is_image(&self) -> bool {
235        self.content_type
236            .as_deref()
237            .is_some_and(|content_type| content_type.starts_with("image/"))
238    }
239
240    /// Returns whether the blob can be treated as video.
241    #[must_use]
242    pub fn is_video(&self) -> bool {
243        self.content_type
244            .as_deref()
245            .is_some_and(|content_type| content_type.starts_with("video/"))
246    }
247
248    /// Returns whether the blob can be treated as audio.
249    #[must_use]
250    pub fn is_audio(&self) -> bool {
251        self.content_type
252            .as_deref()
253            .is_some_and(|content_type| content_type.starts_with("audio/"))
254    }
255
256    /// Returns whether the blob contains text-like content.
257    #[must_use]
258    pub fn is_text(&self) -> bool {
259        self.content_type.as_deref().is_some_and(|content_type| {
260            content_type.starts_with("text/")
261                || matches!(content_type, "application/json" | "application/xml")
262        })
263    }
264
265    /// Returns a clone with merged metadata values.
266    #[must_use]
267    pub fn with_metadata(mut self, metadata: BTreeMap<String, Value>) -> Self {
268        self.metadata.extend(metadata);
269        self
270    }
271
272    /// Returns a clone with a different content type.
273    #[must_use]
274    pub fn with_content_type(mut self, content_type: Option<String>) -> Self {
275        self.content_type = content_type;
276        self
277    }
278
279    /// Validates the blob against a downloaded payload.
280    ///
281    /// # Errors
282    ///
283    /// Returns an error when the payload size differs from the recorded size.
284    pub fn validate_payload(&self, data: &Bytes) -> Result<(), BlobError> {
285        let actual = data.len() as u64;
286        if self.byte_size != actual {
287            return Err(BlobError::ByteSizeMismatch {
288                expected: self.byte_size,
289                actual,
290            });
291        }
292        Ok(())
293    }
294
295    /// Computes the checksum for a payload.
296    #[must_use]
297    pub fn checksum_for(data: &Bytes) -> String {
298        sha256_hex(data)
299    }
300
301    /// Generates a 28-character lowercase base36-ish key.
302    #[must_use]
303    pub fn generate_key() -> String {
304        const ALPHABET: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";
305        let seed = sha256_hex(Uuid::now_v7().as_bytes());
306        seed.as_bytes()
307            .iter()
308            .take(28)
309            .map(|byte| ALPHABET[(usize::from(*byte)) % ALPHABET.len()] as char)
310            .collect()
311    }
312}
313
314impl fmt::Display for Blob {
315    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
316        write!(formatter, "{} ({})", self.filename, self.key)
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    fn blob(filename: &str, content_type: Option<&str>, data: &[u8]) -> Blob {
325        Blob::create(
326            Bytes::copy_from_slice(data),
327            filename.to_owned(),
328            content_type,
329            BTreeMap::new(),
330            "memory",
331        )
332        .expect("blob should build")
333    }
334
335    #[test]
336    fn test_create_computes_byte_size() {
337        let blob = blob("hello.txt", None, b"Hello world");
338        assert_eq!(blob.byte_size(), 11);
339    }
340
341    #[test]
342    fn test_create_computes_checksum() {
343        let blob = blob("hello.txt", None, b"Hello world");
344        assert_eq!(
345            blob.checksum(),
346            Blob::checksum_for(&Bytes::from_static(b"Hello world"))
347        );
348    }
349
350    #[test]
351    fn test_create_detects_content_type_from_extension() {
352        let blob = blob("hello.txt", None, b"Hello world");
353        assert_eq!(blob.content_type(), Some("text/plain"));
354    }
355
356    #[test]
357    fn test_create_prefers_explicit_content_type() {
358        let blob = blob("hello.txt", Some("application/custom"), b"Hello world");
359        assert_eq!(blob.content_type(), Some("application/custom"));
360    }
361
362    #[test]
363    fn test_create_replaces_octet_stream_with_filename_hint() {
364        let blob = blob(
365            "hello.txt",
366            Some("application/octet-stream"),
367            b"Hello world",
368        );
369        assert_eq!(blob.content_type(), Some("text/plain"));
370    }
371
372    #[test]
373    fn test_create_with_custom_key() {
374        let blob = Blob::create_with_key(
375            "custom-key",
376            Bytes::from_static(b"Hello world"),
377            "hello.txt",
378            None,
379            BTreeMap::new(),
380            "memory",
381        )
382        .expect("blob should build");
383        assert_eq!(blob.key(), "custom-key");
384    }
385
386    #[test]
387    fn test_create_with_key_rejects_empty_key() {
388        let error = Blob::create_with_key(
389            "   ",
390            Bytes::new(),
391            "hello.txt",
392            None,
393            BTreeMap::new(),
394            "memory",
395        )
396        .expect_err("should fail");
397        assert_eq!(error, BlobError::EmptyKey);
398    }
399
400    #[test]
401    fn test_create_rejects_empty_filename() {
402        let error = Blob::create(Bytes::new(), "", None, BTreeMap::new(), "memory")
403            .expect_err("should fail");
404        assert_eq!(error, BlobError::EmptyFilename);
405    }
406
407    #[test]
408    fn test_create_before_direct_upload_preserves_checksum() {
409        let blob = Blob::create_before_direct_upload(
410            "direct-key",
411            "racecar.jpg",
412            42,
413            "checksum",
414            Some("image/jpeg"),
415            BTreeMap::new(),
416            "memory",
417        )
418        .expect("blob should build");
419        assert_eq!(blob.checksum(), "checksum");
420        assert_eq!(blob.byte_size(), 42);
421    }
422
423    #[test]
424    fn test_create_before_direct_upload_preserves_metadata_and_service_name() {
425        let mut metadata = BTreeMap::new();
426        metadata.insert("identified".to_owned(), Value::Bool(false));
427        metadata.insert("custom".to_owned(), serde_json::json!({"width": 640}));
428
429        let blob = Blob::create_before_direct_upload(
430            "direct-key",
431            "racecar.jpg",
432            42,
433            "checksum",
434            Some("image/jpeg"),
435            metadata.clone(),
436            "mirror",
437        )
438        .expect("blob should build");
439
440        assert_eq!(blob.metadata(), &metadata);
441        assert_eq!(blob.service_name(), "mirror");
442    }
443
444    #[test]
445    fn test_create_before_direct_upload_rejects_empty_key() {
446        let error = Blob::create_before_direct_upload(
447            "   ",
448            "racecar.jpg",
449            42,
450            "checksum",
451            Some("image/jpeg"),
452            BTreeMap::new(),
453            "memory",
454        )
455        .expect_err("should fail");
456
457        assert_eq!(error, BlobError::EmptyKey);
458    }
459
460    #[test]
461    fn test_direct_upload_without_content_type_leaves_type_helpers_false() {
462        let blob = Blob::create_before_direct_upload(
463            "direct-key",
464            "unknown_file",
465            100,
466            "checksum",
467            None,
468            BTreeMap::new(),
469            "memory",
470        )
471        .expect("blob should build");
472
473        assert_eq!(blob.content_type(), None);
474        assert!(!blob.is_image());
475        assert!(!blob.is_video());
476        assert!(!blob.is_audio());
477        assert!(!blob.is_text());
478    }
479
480    #[test]
481    fn test_generate_key_has_expected_shape() {
482        let key = Blob::generate_key();
483        assert_eq!(key.len(), 28);
484        assert!(
485            key.chars()
486                .all(|character| character.is_ascii_lowercase() || character.is_ascii_digit())
487        );
488    }
489
490    #[test]
491    fn test_generate_key_is_unique_enough_for_two_calls() {
492        assert_ne!(Blob::generate_key(), Blob::generate_key());
493    }
494
495    #[test]
496    fn test_blob_type_helpers_for_image() {
497        let blob = blob("image.png", None, b"png");
498        assert!(blob.is_image());
499        assert!(!blob.is_video());
500        assert!(!blob.is_audio());
501    }
502
503    #[test]
504    fn test_blob_type_helpers_for_video() {
505        let blob = blob("movie.mp4", None, b"mp4");
506        assert!(blob.is_video());
507        assert!(!blob.is_text());
508    }
509
510    #[test]
511    fn test_blob_type_helpers_for_audio() {
512        let blob = blob("sound.mp3", None, b"mp3");
513        assert!(blob.is_audio());
514        assert!(!blob.is_image());
515    }
516
517    #[test]
518    fn test_blob_type_helpers_for_text() {
519        let blob = blob("hello.txt", None, b"Hello world");
520        assert!(blob.is_text());
521    }
522
523    #[test]
524    fn test_create_normalizes_explicit_content_type() {
525        let blob = blob("hello", Some(" TEXT/PLAIN "), b"Hello world");
526
527        assert_eq!(blob.content_type(), Some("text/plain"));
528    }
529
530    #[test]
531    fn test_extension_reads_filename_extension() {
532        let blob = blob("archive.tar", None, b"x");
533        assert_eq!(blob.extension(), Some("tar"));
534    }
535
536    #[test]
537    fn test_extension_returns_none_for_extensionless_filename() {
538        let blob = blob("archive", None, b"x");
539
540        assert_eq!(blob.extension(), None);
541    }
542
543    #[test]
544    fn test_with_metadata_merges_values() {
545        let mut metadata = BTreeMap::new();
546        metadata.insert("width".to_owned(), Value::from(100));
547        let blob = blob("image.png", None, b"png").with_metadata(metadata.clone());
548        assert_eq!(blob.metadata(), &metadata);
549    }
550
551    #[test]
552    fn test_with_metadata_overwrites_conflicting_keys() {
553        let mut metadata = BTreeMap::new();
554        metadata.insert("width".to_owned(), Value::from(100));
555        metadata.insert("height".to_owned(), Value::from(200));
556
557        let mut updates = BTreeMap::new();
558        updates.insert("width".to_owned(), Value::from(400));
559        updates.insert("identified".to_owned(), Value::Bool(true));
560
561        let blob = Blob::create(
562            Bytes::from_static(b"png"),
563            "image.png",
564            None,
565            metadata,
566            "memory",
567        )
568        .expect("blob should build")
569        .with_metadata(updates);
570
571        assert_eq!(blob.metadata().get("width"), Some(&Value::from(400)));
572        assert_eq!(blob.metadata().get("height"), Some(&Value::from(200)));
573        assert_eq!(blob.metadata().get("identified"), Some(&Value::Bool(true)));
574    }
575
576    #[test]
577    fn test_with_content_type_replaces_value() {
578        let blob = blob("hello.txt", None, b"Hello world")
579            .with_content_type(Some("application/json".to_owned()));
580        assert_eq!(blob.content_type(), Some("application/json"));
581    }
582
583    #[test]
584    fn test_validate_payload_rejects_wrong_size() {
585        let blob = blob("hello.txt", None, b"Hello world");
586        let error = blob
587            .validate_payload(&Bytes::from_static(b"short"))
588            .expect_err("size mismatch should fail");
589        assert_eq!(
590            error,
591            BlobError::ByteSizeMismatch {
592                expected: 11,
593                actual: 5
594            }
595        );
596    }
597
598    #[test]
599    fn test_validate_payload_accepts_matching_size() {
600        let blob = blob("hello.txt", None, b"Hello");
601
602        blob.validate_payload(&Bytes::from_static(b"Hello"))
603            .expect("matching payload should validate");
604    }
605
606    #[test]
607    fn test_zero_byte_blob_is_supported() {
608        let blob = blob("empty.txt", None, b"");
609        assert_eq!(blob.byte_size(), 0);
610        assert!(blob.is_text());
611    }
612
613    #[test]
614    fn test_compose_sums_sizes_and_marks_metadata() {
615        let first = blob("part-1.txt", None, b"123");
616        let second = blob("part-2.txt", None, b"456");
617        let composite =
618            Blob::compose(&[first, second], "all.txt", "memory").expect("compose should work");
619        assert_eq!(composite.byte_size(), 6);
620        assert_eq!(
621            composite.metadata().get("composed"),
622            Some(&Value::Bool(true))
623        );
624    }
625
626    #[test]
627    fn test_compose_uses_first_available_content_type_and_empty_checksum() {
628        let first = Blob::create_before_direct_upload(
629            "first",
630            "first",
631            1,
632            "checksum-1",
633            None,
634            BTreeMap::new(),
635            "memory",
636        )
637        .expect("blob should build");
638        let second = blob("image.png", Some("image/png"), b"png");
639
640        let composite =
641            Blob::compose(&[first, second], "all.bin", "archive").expect("compose should work");
642
643        assert_eq!(composite.content_type(), Some("image/png"));
644        assert_eq!(composite.checksum(), "");
645        assert_eq!(composite.service_name(), "archive");
646        assert_eq!(composite.metadata().get("parts"), Some(&Value::from(2_u64)));
647    }
648
649    #[test]
650    fn test_variant_builder_clones_blob_metadata() {
651        let variant = blob("racecar.jpg", None, b"jpg").variant(BTreeMap::new());
652        assert_eq!(variant.blob().filename(), "racecar.jpg");
653    }
654}