adaptive_pipeline_domain/value_objects/chunk_metadata.rs
1// /////////////////////////////////////////////////////////////////////////////
2// Adaptive Pipeline
3// Copyright (c) 2025 Michael Gardner, A Bit of Help, Inc.
4// SPDX-License-Identifier: BSD-3-Clause
5// See LICENSE file in the project root.
6// /////////////////////////////////////////////////////////////////////////////
7
8//! # Chunk Metadata Value Object
9//!
10//! This module defines the chunk metadata value object for the adaptive
11//! pipeline system. It encapsulates all metadata needed to track and manage
12//! file chunks throughout the pipeline processing lifecycle.
13//!
14//! ## Overview
15//!
16//! The chunk metadata provides:
17//!
18//! - **Chunk Identification**: Unique identification and description of chunks
19//! - **Size Tracking**: Accurate tracking of chunk sizes and boundaries
20//! - **Integrity Verification**: Checksums and validation for chunk integrity
21//! - **Processing Context**: Context about processing stages and operations
22//! - **Temporal Tracking**: Timestamps for chunk lifecycle management
23//!
24//! ## Architecture
25//!
26//! The metadata follows Domain-Driven Design principles:
27//!
28//! - **Value Object**: Immutable value object with equality semantics
29//! - **Rich Domain Model**: Encapsulates chunk-related business logic
30//! - **Validation**: Comprehensive validation of metadata consistency
31//! - **Serialization**: Support for persistence and transmission
32//!
33//! ## Key Features
34//!
35//! ### Chunk Identification
36//!
37//! - **Unique Identifiers**: Unique identification for each chunk
38//! - **Descriptive Names**: Human-readable chunk descriptions
39//! - **Hierarchical Organization**: Support for chunk hierarchies
40//! - **Context Preservation**: Maintain context across processing stages
41//!
42//! ### Size and Boundary Management
43//!
44//! - **Accurate Sizing**: Precise tracking of chunk sizes in bytes
45//! - **Boundary Information**: Track chunk boundaries within files
46//! - **Compression Tracking**: Track size changes during compression
47//! - **Memory Management**: Support for memory-efficient processing
48//!
49//! ### Integrity and Validation
50//!
51//! - **Checksum Support**: Multiple checksum algorithms for verification
52//! - **Integrity Validation**: Comprehensive integrity checking
53//! - **Corruption Detection**: Detect and report chunk corruption
54//! - **Recovery Information**: Information for chunk recovery
55//!
56//! ## Usage Examples
57//!
58//! ### Creating Chunk Metadata
59
60//!
61//! ### Working with Attributes
62
63//!
64//! ### Integrity Verification
65
66//!
67//! ### Processing Stage Tracking
68
69//!
70//! ### Serialization and Persistence
71
72//!
73//! ## Metadata Attributes
74//!
75//! ### Standard Attributes
76//!
77//! Common attributes used across the system:
78//!
79//! - **compression_ratio**: Compression ratio achieved
80//! - **algorithm**: Algorithm used for processing
81//! - **level**: Processing level or quality setting
82//! - **original_size**: Original size before processing
83//! - **processing_time_ms**: Time taken for processing
84//!
85//! ### Custom Attributes
86//!
87//! Applications can define custom attributes:
88//!
89//! - **Application-specific**: Custom metadata for specific use cases
90//! - **Processing Context**: Context-specific information
91//! - **Performance Metrics**: Custom performance measurements
92//! - **Business Logic**: Domain-specific business information
93//!
94//! ## Integrity Verification
95//!
96//! ### Checksum Algorithms
97//!
98//! Supported checksum algorithms:
99//!
100//! - **SHA-256**: Primary checksum algorithm
101//! - **Blake3**: High-performance alternative
102//! - **CRC32**: Fast integrity checking
103//! - **MD5**: Legacy support (not recommended)
104//!
105//! ### Verification Process
106//!
107//! 1. **Calculate Checksum**: Calculate checksum of chunk data
108//! 2. **Compare**: Compare with stored checksum
109//! 3. **Validate**: Validate checksum format and algorithm
110//! 4. **Report**: Report verification results
111//!
112//! ## Performance Considerations
113//!
114//! ### Memory Efficiency
115//!
116//! - **Compact Storage**: Efficient storage of metadata
117//! - **Lazy Evaluation**: Lazy evaluation of expensive operations
118//! - **String Interning**: Intern common strings to reduce memory usage
119//!
120//! ### Processing Performance
121//!
122//! - **Fast Access**: Optimized access to metadata fields
123//! - **Efficient Serialization**: Fast serialization/deserialization
124//! - **Minimal Overhead**: Minimal overhead during processing
125//!
126//! ## Validation Rules
127//!
128//! ### Size Validation
129//!
130//! - **Positive Size**: Chunk size must be positive
131//! - **Reasonable Limits**: Size must be within reasonable limits
132//! - **Consistency**: Size must be consistent with actual data
133//!
134//! ### Identifier Validation
135//!
136//! - **Non-empty**: Identifier cannot be empty
137//! - **Valid Characters**: Must contain only valid characters
138//! - **Uniqueness**: Should be unique within context
139//!
140//! ### Checksum Validation
141//!
142//! - **Format Validation**: Validate checksum format
143//! - **Algorithm Support**: Verify algorithm is supported
144//! - **Length Validation**: Validate checksum length
145//!
146//! ## Error Handling
147//!
148//! ### Validation Errors
149//!
150//! - **Invalid Size**: Chunk size is invalid
151//! - **Invalid Identifier**: Identifier is invalid
152//! - **Invalid Checksum**: Checksum format is invalid
153//! - **Inconsistent Data**: Metadata is inconsistent
154//!
155//! ### Processing Errors
156//!
157//! - **Checksum Calculation**: Errors during checksum calculation
158//! - **Serialization Errors**: Errors during serialization
159//! - **Attribute Errors**: Errors with attribute operations
160//!
161//! ## Integration
162//!
163//! The chunk metadata integrates with:
164//!
165//! - **File Chunks**: Associated with file chunk data
166//! - **Processing Pipeline**: Used throughout processing pipeline
167//! - **Storage Systems**: Persisted with chunk data
168//! - **Monitoring**: Used for monitoring and metrics
169//!
170//! ## Thread Safety
171//!
172//! The chunk metadata is designed for thread safety:
173//!
174//! - **Immutable**: Metadata is immutable after creation
175//! - **Safe Sharing**: Safe to share between threads
176//! - **Concurrent Access**: Safe concurrent access to metadata
177//!
178//! ## Future Enhancements
179//!
180//! Planned enhancements include:
181//!
182//! - **Extended Attributes**: More comprehensive attribute system
183//! - **Compression Metadata**: Enhanced compression-specific metadata
184//! - **Performance Metrics**: Built-in performance metrics
185//! - **Validation Framework**: Enhanced validation capabilities
186
187use chrono::{DateTime, Utc};
188use serde::{Deserialize, Serialize};
189use std::collections::HashMap;
190
191use crate::PipelineError;
192
193/// Metadata associated with a file chunk during processing
194///
195/// This value object encapsulates all metadata needed to track and manage
196/// chunks throughout the pipeline processing lifecycle, following DDD
197/// principles.
198///
199/// # Key Features
200///
201/// - **Chunk Identification**: Unique identification and description
202/// - **Size Tracking**: Accurate size tracking in bytes
203/// - **Integrity Verification**: Checksum-based integrity checking
204/// - **Processing Context**: Track processing stages and operations
205/// - **Temporal Tracking**: Timestamp-based lifecycle management
206/// - **Extensible Attributes**: Custom metadata through key-value attributes
207///
208/// # Examples
209#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
210pub struct ChunkMetadata {
211 /// Size of the chunk in bytes
212 chunk_size: usize,
213
214 /// Identifier or description of the chunk
215 identifier: String,
216
217 /// Checksum for integrity verification
218 checksum: Option<String>,
219
220 /// Processing stage that created this metadata
221 stage: Option<String>,
222
223 /// Timestamp when this metadata was created
224 created_at: DateTime<Utc>,
225
226 /// Additional metadata as key-value pairs
227 attributes: HashMap<String, String>,
228}
229
230impl ChunkMetadata {
231 /// Creates new chunk metadata with required fields
232 ///
233 /// # Arguments
234 /// * `chunk_size` - Size of the chunk in bytes
235 /// * `identifier` - Unique identifier or description for the chunk
236 ///
237 /// # Returns
238 /// * `Result<ChunkMetadata, PipelineError>` - New metadata instance or
239 /// error
240 pub fn new(chunk_size: usize, identifier: String) -> Result<Self, PipelineError> {
241 if chunk_size == 0 {
242 return Err(PipelineError::ValidationError(
243 "Chunk size must be greater than zero".to_string(),
244 ));
245 }
246
247 if identifier.trim().is_empty() {
248 return Err(PipelineError::ValidationError(
249 "Chunk identifier cannot be empty".to_string(),
250 ));
251 }
252
253 Ok(Self {
254 chunk_size,
255 identifier: identifier.trim().to_string(),
256 checksum: None,
257 stage: None,
258 created_at: chrono::Utc::now(),
259 attributes: HashMap::new(),
260 })
261 }
262
263 /// Creates chunk metadata with all fields for testing
264 pub fn new_for_testing(
265 chunk_size: usize,
266 identifier: String,
267 checksum: Option<String>,
268 stage: Option<String>,
269 ) -> Self {
270 Self {
271 chunk_size,
272 identifier,
273 checksum,
274 stage,
275 created_at: chrono::Utc::now(),
276 attributes: HashMap::new(),
277 }
278 }
279
280 /// Gets the chunk size
281 pub fn chunk_size(&self) -> usize {
282 self.chunk_size
283 }
284
285 /// Gets the chunk identifier
286 pub fn identifier(&self) -> &str {
287 &self.identifier
288 }
289
290 /// Gets the checksum if available
291 pub fn checksum(&self) -> Option<&str> {
292 self.checksum.as_deref()
293 }
294
295 /// Gets the processing stage if available
296 pub fn stage(&self) -> Option<&str> {
297 self.stage.as_deref()
298 }
299
300 /// Gets the creation timestamp
301 pub fn created_at(&self) -> DateTime<Utc> {
302 self.created_at
303 }
304
305 /// Sets the checksum for integrity verification
306 pub fn with_checksum(mut self, checksum: String) -> Self {
307 self.checksum = Some(checksum);
308 self
309 }
310
311 /// Sets the processing stage
312 pub fn with_stage(mut self, stage: String) -> Self {
313 self.stage = Some(stage);
314 self
315 }
316
317 /// Adds a custom attribute
318 pub fn with_attribute(mut self, key: String, value: String) -> Self {
319 self.attributes.insert(key, value);
320 self
321 }
322
323 /// Gets a custom attribute
324 pub fn get_attribute(&self, key: &str) -> Option<&str> {
325 self.attributes.get(key).map(|s| s.as_str())
326 }
327
328 /// Gets all attributes
329 pub fn attributes(&self) -> &HashMap<String, String> {
330 &self.attributes
331 }
332
333 /// Validates the metadata integrity
334 pub fn validate(&self) -> Result<(), PipelineError> {
335 if self.chunk_size == 0 {
336 return Err(PipelineError::ValidationError(
337 "Invalid chunk size: must be greater than zero".to_string(),
338 ));
339 }
340
341 if self.identifier.trim().is_empty() {
342 return Err(PipelineError::ValidationError(
343 "Invalid identifier: cannot be empty".to_string(),
344 ));
345 }
346
347 Ok(())
348 }
349}
350
351impl Default for ChunkMetadata {
352 fn default() -> Self {
353 Self {
354 chunk_size: 1024, // 1KB default
355 identifier: "default_chunk".to_string(),
356 checksum: None,
357 stage: None,
358 created_at: chrono::Utc::now(),
359 attributes: HashMap::new(),
360 }
361 }
362}
363
364impl std::fmt::Display for ChunkMetadata {
365 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366 write!(
367 f,
368 "ChunkMetadata(id: {}, size: {} bytes, stage: {:?})",
369 self.identifier, self.chunk_size, self.stage
370 )
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377
378 /// Tests chunk metadata creation with basic properties.
379 ///
380 /// This test validates that chunk metadata can be created with
381 /// required properties and that all metadata fields are properly
382 /// initialized and accessible.
383 ///
384 /// # Test Coverage
385 ///
386 /// - Chunk metadata creation with size and identifier
387 /// - Chunk size storage and retrieval
388 /// - Identifier storage and retrieval
389 /// - Optional field initialization (checksum, stage)
390 /// - Default values for optional fields
391 ///
392 /// # Test Scenario
393 ///
394 /// Creates chunk metadata with basic properties and verifies
395 /// all fields are set correctly with proper defaults.
396 ///
397 /// # Assertions
398 ///
399 /// - Chunk size matches input
400 /// - Identifier matches input
401 /// - Checksum is initially None
402 /// - Stage is initially None
403 #[test]
404 fn test_chunk_metadata_creation() {
405 let metadata = ChunkMetadata::new(1024, "test_chunk".to_string()).unwrap();
406
407 assert_eq!(metadata.chunk_size(), 1024);
408 assert_eq!(metadata.identifier(), "test_chunk");
409 assert!(metadata.checksum().is_none());
410 assert!(metadata.stage().is_none());
411 }
412
413 /// Tests chunk metadata validation rules and constraints.
414 ///
415 /// This test validates that chunk metadata enforces proper
416 /// validation rules for size and identifier fields, rejecting
417 /// invalid inputs appropriately.
418 ///
419 /// # Test Coverage
420 ///
421 /// - Zero size validation and rejection
422 /// - Empty identifier validation and rejection
423 /// - Whitespace-only identifier validation
424 /// - Input validation error handling
425 /// - Constraint enforcement
426 ///
427 /// # Test Scenario
428 ///
429 /// Tests various invalid inputs including zero size, empty
430 /// identifier, and whitespace-only identifier to ensure
431 /// proper validation and error handling.
432 ///
433 /// # Assertions
434 ///
435 /// - Zero size creation fails
436 /// - Empty identifier creation fails
437 /// - Whitespace-only identifier creation fails
438 /// - Validation errors are returned appropriately
439 #[test]
440 fn test_chunk_metadata_validation() {
441 // Test zero size validation
442 let result = ChunkMetadata::new(0, "test".to_string());
443 assert!(result.is_err());
444
445 // Test empty identifier validation
446 let result = ChunkMetadata::new(1024, "".to_string());
447 assert!(result.is_err());
448
449 // Test whitespace-only identifier validation
450 let result = ChunkMetadata::new(1024, " ".to_string());
451 assert!(result.is_err());
452 }
453
454 /// Tests chunk metadata builder pattern for fluent construction.
455 ///
456 /// This test validates that chunk metadata supports a fluent
457 /// builder pattern for constructing metadata with optional
458 /// fields and custom attributes.
459 ///
460 /// # Test Coverage
461 ///
462 /// - Builder pattern with method chaining
463 /// - Checksum addition with `with_checksum()`
464 /// - Stage assignment with `with_stage()`
465 /// - Custom attribute addition with `with_attribute()`
466 /// - Attribute retrieval with `get_attribute()`
467 /// - Fluent API construction
468 ///
469 /// # Test Scenario
470 ///
471 /// Creates chunk metadata using the builder pattern to add
472 /// checksum, stage, and custom attributes, then verifies
473 /// all fields are set correctly.
474 ///
475 /// # Assertions
476 ///
477 /// - Chunk size is preserved
478 /// - Identifier is preserved
479 /// - Checksum is set correctly
480 /// - Stage is set correctly
481 /// - Custom attribute is stored and retrievable
482 #[test]
483 fn test_chunk_metadata_builder_pattern() {
484 let metadata = ChunkMetadata::new(2048, "test_chunk".to_string())
485 .unwrap()
486 .with_checksum("abc123".to_string())
487 .with_stage("compression".to_string())
488 .with_attribute("compression_ratio".to_string(), "0.7".to_string());
489
490 assert_eq!(metadata.chunk_size(), 2048);
491 assert_eq!(metadata.identifier(), "test_chunk");
492 assert_eq!(metadata.checksum(), Some("abc123"));
493 assert_eq!(metadata.stage(), Some("compression"));
494 assert_eq!(metadata.get_attribute("compression_ratio"), Some("0.7"));
495 }
496
497 /// Tests chunk metadata display formatting and string representation.
498 ///
499 /// This test validates that chunk metadata provides proper
500 /// string representation through the Display trait, including
501 /// all relevant metadata fields.
502 ///
503 /// # Test Coverage
504 ///
505 /// - Display trait implementation
506 /// - String representation formatting
507 /// - Identifier inclusion in display
508 /// - Size inclusion in display
509 /// - Stage inclusion in display
510 /// - Human-readable output
511 ///
512 /// # Test Scenario
513 ///
514 /// Creates chunk metadata with stage information and verifies
515 /// the display output contains all relevant fields in a
516 /// human-readable format.
517 ///
518 /// # Assertions
519 ///
520 /// - Display contains identifier
521 /// - Display contains size
522 /// - Display contains stage information
523 /// - Output is human-readable
524 #[test]
525 fn test_chunk_metadata_display() {
526 let metadata = ChunkMetadata::new(1024, "test_chunk".to_string())
527 .unwrap()
528 .with_stage("encryption".to_string());
529
530 let display = format!("{}", metadata);
531 assert!(display.contains("test_chunk"));
532 assert!(display.contains("1024"));
533 assert!(display.contains("encryption"));
534 }
535}