adaptive_pipeline_domain/value_objects/
chunk_metadata.rs

1// /////////////////////////////////////////////////////////////////////////////
2// Adaptive Pipeline
3// Copyright (c) 2025 Michael Gardner, A Bit of Help, Inc.
4// SPDX-License-Identifier: BSD-3-Clause
5// See LICENSE file in the project root.
6// /////////////////////////////////////////////////////////////////////////////
7
8//! # Chunk Metadata Value Object
9//!
10//! This module defines the chunk metadata value object for the adaptive
11//! pipeline system. It encapsulates all metadata needed to track and manage
12//! file chunks throughout the pipeline processing lifecycle.
13//!
14//! ## Overview
15//!
16//! The chunk metadata provides:
17//!
18//! - **Chunk Identification**: Unique identification and description of chunks
19//! - **Size Tracking**: Accurate tracking of chunk sizes and boundaries
20//! - **Integrity Verification**: Checksums and validation for chunk integrity
21//! - **Processing Context**: Context about processing stages and operations
22//! - **Temporal Tracking**: Timestamps for chunk lifecycle management
23//!
24//! ## Architecture
25//!
26//! The metadata follows Domain-Driven Design principles:
27//!
28//! - **Value Object**: Immutable value object with equality semantics
29//! - **Rich Domain Model**: Encapsulates chunk-related business logic
30//! - **Validation**: Comprehensive validation of metadata consistency
31//! - **Serialization**: Support for persistence and transmission
32//!
33//! ## Key Features
34//!
35//! ### Chunk Identification
36//!
37//! - **Unique Identifiers**: Unique identification for each chunk
38//! - **Descriptive Names**: Human-readable chunk descriptions
39//! - **Hierarchical Organization**: Support for chunk hierarchies
40//! - **Context Preservation**: Maintain context across processing stages
41//!
42//! ### Size and Boundary Management
43//!
44//! - **Accurate Sizing**: Precise tracking of chunk sizes in bytes
45//! - **Boundary Information**: Track chunk boundaries within files
46//! - **Compression Tracking**: Track size changes during compression
47//! - **Memory Management**: Support for memory-efficient processing
48//!
49//! ### Integrity and Validation
50//!
51//! - **Checksum Support**: Multiple checksum algorithms for verification
52//! - **Integrity Validation**: Comprehensive integrity checking
53//! - **Corruption Detection**: Detect and report chunk corruption
54//! - **Recovery Information**: Information for chunk recovery
55//!
56//! ## Usage Examples
57//!
58//! ### Creating Chunk Metadata
59
60//!
61//! ### Working with Attributes
62
63//!
64//! ### Integrity Verification
65
66//!
67//! ### Processing Stage Tracking
68
69//!
70//! ### Serialization and Persistence
71
72//!
73//! ## Metadata Attributes
74//!
75//! ### Standard Attributes
76//!
77//! Common attributes used across the system:
78//!
79//! - **compression_ratio**: Compression ratio achieved
80//! - **algorithm**: Algorithm used for processing
81//! - **level**: Processing level or quality setting
82//! - **original_size**: Original size before processing
83//! - **processing_time_ms**: Time taken for processing
84//!
85//! ### Custom Attributes
86//!
87//! Applications can define custom attributes:
88//!
89//! - **Application-specific**: Custom metadata for specific use cases
90//! - **Processing Context**: Context-specific information
91//! - **Performance Metrics**: Custom performance measurements
92//! - **Business Logic**: Domain-specific business information
93//!
94//! ## Integrity Verification
95//!
96//! ### Checksum Algorithms
97//!
98//! Supported checksum algorithms:
99//!
100//! - **SHA-256**: Primary checksum algorithm
101//! - **Blake3**: High-performance alternative
102//! - **CRC32**: Fast integrity checking
103//! - **MD5**: Legacy support (not recommended)
104//!
105//! ### Verification Process
106//!
107//! 1. **Calculate Checksum**: Calculate checksum of chunk data
108//! 2. **Compare**: Compare with stored checksum
109//! 3. **Validate**: Validate checksum format and algorithm
110//! 4. **Report**: Report verification results
111//!
112//! ## Performance Considerations
113//!
114//! ### Memory Efficiency
115//!
116//! - **Compact Storage**: Efficient storage of metadata
117//! - **Lazy Evaluation**: Lazy evaluation of expensive operations
118//! - **String Interning**: Intern common strings to reduce memory usage
119//!
120//! ### Processing Performance
121//!
122//! - **Fast Access**: Optimized access to metadata fields
123//! - **Efficient Serialization**: Fast serialization/deserialization
124//! - **Minimal Overhead**: Minimal overhead during processing
125//!
126//! ## Validation Rules
127//!
128//! ### Size Validation
129//!
130//! - **Positive Size**: Chunk size must be positive
131//! - **Reasonable Limits**: Size must be within reasonable limits
132//! - **Consistency**: Size must be consistent with actual data
133//!
134//! ### Identifier Validation
135//!
136//! - **Non-empty**: Identifier cannot be empty
137//! - **Valid Characters**: Must contain only valid characters
138//! - **Uniqueness**: Should be unique within context
139//!
140//! ### Checksum Validation
141//!
142//! - **Format Validation**: Validate checksum format
143//! - **Algorithm Support**: Verify algorithm is supported
144//! - **Length Validation**: Validate checksum length
145//!
146//! ## Error Handling
147//!
148//! ### Validation Errors
149//!
150//! - **Invalid Size**: Chunk size is invalid
151//! - **Invalid Identifier**: Identifier is invalid
152//! - **Invalid Checksum**: Checksum format is invalid
153//! - **Inconsistent Data**: Metadata is inconsistent
154//!
155//! ### Processing Errors
156//!
157//! - **Checksum Calculation**: Errors during checksum calculation
158//! - **Serialization Errors**: Errors during serialization
159//! - **Attribute Errors**: Errors with attribute operations
160//!
161//! ## Integration
162//!
163//! The chunk metadata integrates with:
164//!
165//! - **File Chunks**: Associated with file chunk data
166//! - **Processing Pipeline**: Used throughout processing pipeline
167//! - **Storage Systems**: Persisted with chunk data
168//! - **Monitoring**: Used for monitoring and metrics
169//!
170//! ## Thread Safety
171//!
172//! The chunk metadata is designed for thread safety:
173//!
174//! - **Immutable**: Metadata is immutable after creation
175//! - **Safe Sharing**: Safe to share between threads
176//! - **Concurrent Access**: Safe concurrent access to metadata
177//!
178//! ## Future Enhancements
179//!
180//! Planned enhancements include:
181//!
182//! - **Extended Attributes**: More comprehensive attribute system
183//! - **Compression Metadata**: Enhanced compression-specific metadata
184//! - **Performance Metrics**: Built-in performance metrics
185//! - **Validation Framework**: Enhanced validation capabilities
186
187use chrono::{DateTime, Utc};
188use serde::{Deserialize, Serialize};
189use std::collections::HashMap;
190
191use crate::PipelineError;
192
193/// Metadata associated with a file chunk during processing
194///
195/// This value object encapsulates all metadata needed to track and manage
196/// chunks throughout the pipeline processing lifecycle, following DDD
197/// principles.
198///
199/// # Key Features
200///
201/// - **Chunk Identification**: Unique identification and description
202/// - **Size Tracking**: Accurate size tracking in bytes
203/// - **Integrity Verification**: Checksum-based integrity checking
204/// - **Processing Context**: Track processing stages and operations
205/// - **Temporal Tracking**: Timestamp-based lifecycle management
206/// - **Extensible Attributes**: Custom metadata through key-value attributes
207///
208/// # Examples
209#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
210pub struct ChunkMetadata {
211    /// Size of the chunk in bytes
212    chunk_size: usize,
213
214    /// Identifier or description of the chunk
215    identifier: String,
216
217    /// Checksum for integrity verification
218    checksum: Option<String>,
219
220    /// Processing stage that created this metadata
221    stage: Option<String>,
222
223    /// Timestamp when this metadata was created
224    created_at: DateTime<Utc>,
225
226    /// Additional metadata as key-value pairs
227    attributes: HashMap<String, String>,
228}
229
230impl ChunkMetadata {
231    /// Creates new chunk metadata with required fields
232    ///
233    /// # Arguments
234    /// * `chunk_size` - Size of the chunk in bytes
235    /// * `identifier` - Unique identifier or description for the chunk
236    ///
237    /// # Returns
238    /// * `Result<ChunkMetadata, PipelineError>` - New metadata instance or
239    ///   error
240    pub fn new(chunk_size: usize, identifier: String) -> Result<Self, PipelineError> {
241        if chunk_size == 0 {
242            return Err(PipelineError::ValidationError(
243                "Chunk size must be greater than zero".to_string(),
244            ));
245        }
246
247        if identifier.trim().is_empty() {
248            return Err(PipelineError::ValidationError(
249                "Chunk identifier cannot be empty".to_string(),
250            ));
251        }
252
253        Ok(Self {
254            chunk_size,
255            identifier: identifier.trim().to_string(),
256            checksum: None,
257            stage: None,
258            created_at: chrono::Utc::now(),
259            attributes: HashMap::new(),
260        })
261    }
262
263    /// Creates chunk metadata with all fields for testing
264    pub fn new_for_testing(
265        chunk_size: usize,
266        identifier: String,
267        checksum: Option<String>,
268        stage: Option<String>,
269    ) -> Self {
270        Self {
271            chunk_size,
272            identifier,
273            checksum,
274            stage,
275            created_at: chrono::Utc::now(),
276            attributes: HashMap::new(),
277        }
278    }
279
280    /// Gets the chunk size
281    pub fn chunk_size(&self) -> usize {
282        self.chunk_size
283    }
284
285    /// Gets the chunk identifier
286    pub fn identifier(&self) -> &str {
287        &self.identifier
288    }
289
290    /// Gets the checksum if available
291    pub fn checksum(&self) -> Option<&str> {
292        self.checksum.as_deref()
293    }
294
295    /// Gets the processing stage if available
296    pub fn stage(&self) -> Option<&str> {
297        self.stage.as_deref()
298    }
299
300    /// Gets the creation timestamp
301    pub fn created_at(&self) -> DateTime<Utc> {
302        self.created_at
303    }
304
305    /// Sets the checksum for integrity verification
306    pub fn with_checksum(mut self, checksum: String) -> Self {
307        self.checksum = Some(checksum);
308        self
309    }
310
311    /// Sets the processing stage
312    pub fn with_stage(mut self, stage: String) -> Self {
313        self.stage = Some(stage);
314        self
315    }
316
317    /// Adds a custom attribute
318    pub fn with_attribute(mut self, key: String, value: String) -> Self {
319        self.attributes.insert(key, value);
320        self
321    }
322
323    /// Gets a custom attribute
324    pub fn get_attribute(&self, key: &str) -> Option<&str> {
325        self.attributes.get(key).map(|s| s.as_str())
326    }
327
328    /// Gets all attributes
329    pub fn attributes(&self) -> &HashMap<String, String> {
330        &self.attributes
331    }
332
333    /// Validates the metadata integrity
334    pub fn validate(&self) -> Result<(), PipelineError> {
335        if self.chunk_size == 0 {
336            return Err(PipelineError::ValidationError(
337                "Invalid chunk size: must be greater than zero".to_string(),
338            ));
339        }
340
341        if self.identifier.trim().is_empty() {
342            return Err(PipelineError::ValidationError(
343                "Invalid identifier: cannot be empty".to_string(),
344            ));
345        }
346
347        Ok(())
348    }
349}
350
351impl Default for ChunkMetadata {
352    fn default() -> Self {
353        Self {
354            chunk_size: 1024, // 1KB default
355            identifier: "default_chunk".to_string(),
356            checksum: None,
357            stage: None,
358            created_at: chrono::Utc::now(),
359            attributes: HashMap::new(),
360        }
361    }
362}
363
364impl std::fmt::Display for ChunkMetadata {
365    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366        write!(
367            f,
368            "ChunkMetadata(id: {}, size: {} bytes, stage: {:?})",
369            self.identifier, self.chunk_size, self.stage
370        )
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    /// Tests chunk metadata creation with basic properties.
379    ///
380    /// This test validates that chunk metadata can be created with
381    /// required properties and that all metadata fields are properly
382    /// initialized and accessible.
383    ///
384    /// # Test Coverage
385    ///
386    /// - Chunk metadata creation with size and identifier
387    /// - Chunk size storage and retrieval
388    /// - Identifier storage and retrieval
389    /// - Optional field initialization (checksum, stage)
390    /// - Default values for optional fields
391    ///
392    /// # Test Scenario
393    ///
394    /// Creates chunk metadata with basic properties and verifies
395    /// all fields are set correctly with proper defaults.
396    ///
397    /// # Assertions
398    ///
399    /// - Chunk size matches input
400    /// - Identifier matches input
401    /// - Checksum is initially None
402    /// - Stage is initially None
403    #[test]
404    fn test_chunk_metadata_creation() {
405        let metadata = ChunkMetadata::new(1024, "test_chunk".to_string()).unwrap();
406
407        assert_eq!(metadata.chunk_size(), 1024);
408        assert_eq!(metadata.identifier(), "test_chunk");
409        assert!(metadata.checksum().is_none());
410        assert!(metadata.stage().is_none());
411    }
412
413    /// Tests chunk metadata validation rules and constraints.
414    ///
415    /// This test validates that chunk metadata enforces proper
416    /// validation rules for size and identifier fields, rejecting
417    /// invalid inputs appropriately.
418    ///
419    /// # Test Coverage
420    ///
421    /// - Zero size validation and rejection
422    /// - Empty identifier validation and rejection
423    /// - Whitespace-only identifier validation
424    /// - Input validation error handling
425    /// - Constraint enforcement
426    ///
427    /// # Test Scenario
428    ///
429    /// Tests various invalid inputs including zero size, empty
430    /// identifier, and whitespace-only identifier to ensure
431    /// proper validation and error handling.
432    ///
433    /// # Assertions
434    ///
435    /// - Zero size creation fails
436    /// - Empty identifier creation fails
437    /// - Whitespace-only identifier creation fails
438    /// - Validation errors are returned appropriately
439    #[test]
440    fn test_chunk_metadata_validation() {
441        // Test zero size validation
442        let result = ChunkMetadata::new(0, "test".to_string());
443        assert!(result.is_err());
444
445        // Test empty identifier validation
446        let result = ChunkMetadata::new(1024, "".to_string());
447        assert!(result.is_err());
448
449        // Test whitespace-only identifier validation
450        let result = ChunkMetadata::new(1024, "   ".to_string());
451        assert!(result.is_err());
452    }
453
454    /// Tests chunk metadata builder pattern for fluent construction.
455    ///
456    /// This test validates that chunk metadata supports a fluent
457    /// builder pattern for constructing metadata with optional
458    /// fields and custom attributes.
459    ///
460    /// # Test Coverage
461    ///
462    /// - Builder pattern with method chaining
463    /// - Checksum addition with `with_checksum()`
464    /// - Stage assignment with `with_stage()`
465    /// - Custom attribute addition with `with_attribute()`
466    /// - Attribute retrieval with `get_attribute()`
467    /// - Fluent API construction
468    ///
469    /// # Test Scenario
470    ///
471    /// Creates chunk metadata using the builder pattern to add
472    /// checksum, stage, and custom attributes, then verifies
473    /// all fields are set correctly.
474    ///
475    /// # Assertions
476    ///
477    /// - Chunk size is preserved
478    /// - Identifier is preserved
479    /// - Checksum is set correctly
480    /// - Stage is set correctly
481    /// - Custom attribute is stored and retrievable
482    #[test]
483    fn test_chunk_metadata_builder_pattern() {
484        let metadata = ChunkMetadata::new(2048, "test_chunk".to_string())
485            .unwrap()
486            .with_checksum("abc123".to_string())
487            .with_stage("compression".to_string())
488            .with_attribute("compression_ratio".to_string(), "0.7".to_string());
489
490        assert_eq!(metadata.chunk_size(), 2048);
491        assert_eq!(metadata.identifier(), "test_chunk");
492        assert_eq!(metadata.checksum(), Some("abc123"));
493        assert_eq!(metadata.stage(), Some("compression"));
494        assert_eq!(metadata.get_attribute("compression_ratio"), Some("0.7"));
495    }
496
497    /// Tests chunk metadata display formatting and string representation.
498    ///
499    /// This test validates that chunk metadata provides proper
500    /// string representation through the Display trait, including
501    /// all relevant metadata fields.
502    ///
503    /// # Test Coverage
504    ///
505    /// - Display trait implementation
506    /// - String representation formatting
507    /// - Identifier inclusion in display
508    /// - Size inclusion in display
509    /// - Stage inclusion in display
510    /// - Human-readable output
511    ///
512    /// # Test Scenario
513    ///
514    /// Creates chunk metadata with stage information and verifies
515    /// the display output contains all relevant fields in a
516    /// human-readable format.
517    ///
518    /// # Assertions
519    ///
520    /// - Display contains identifier
521    /// - Display contains size
522    /// - Display contains stage information
523    /// - Output is human-readable
524    #[test]
525    fn test_chunk_metadata_display() {
526        let metadata = ChunkMetadata::new(1024, "test_chunk".to_string())
527            .unwrap()
528            .with_stage("encryption".to_string());
529
530        let display = format!("{}", metadata);
531        assert!(display.contains("test_chunk"));
532        assert!(display.contains("1024"));
533        assert!(display.contains("encryption"));
534    }
535}