adaptive_pipeline/application/use_cases/restore_file.rs
1// /////////////////////////////////////////////////////////////////////////////
2// Adaptive Pipeline
3// Copyright (c) 2025 Michael Gardner, A Bit of Help, Inc.
4// SPDX-License-Identifier: BSD-3-Clause
5// See LICENSE file in the project root.
6// /////////////////////////////////////////////////////////////////////////////
7
8//! # File Restoration Module
9//!
10//! This module provides comprehensive file restoration capabilities for the
11//! adaptive pipeline system. It handles the creation and execution of
12//! restoration pipelines that reverse the processing operations applied to
13//! files, restoring them from the `.adapipe` binary format back to their
14//! original state.
15//!
16//! ## Overview
17//!
18//! The restoration module implements the inverse operations of the processing
19//! pipeline:
20//!
21//! - **Metadata Analysis**: Parses `.adapipe` file headers to understand
22//! processing history
23//! - **Pipeline Reconstruction**: Creates restoration pipelines that reverse
24//! original processing
25//! - **Stage Reversal**: Applies processing stages in reverse order (LIFO)
26//! - **Integrity Validation**: Verifies checksums and data integrity during
27//! restoration
28//! - **Error Recovery**: Handles restoration failures and provides detailed
29//! error reporting
30//!
31//! ## Architecture
32//!
33//! The restoration system follows Domain-Driven Design principles:
34//!
35//! - **Domain Entities**: `Pipeline` serves as the aggregate root for
36//! restoration operations
37//! - **Value Objects**: Type-safe identifiers (`PipelineId`, `StageId`) ensure
38//! correctness
39//! - **Immutability**: Restoration pipelines are immutable once created
40//! - **Error Handling**: Comprehensive validation and error propagation
41//! throughout
42//! - **Separation of Concerns**: Restoration logic is isolated from main
43//! application logic
44//!
45//! ## Restoration Process
46//!
47//! ### 1. Metadata Parsing
48//! The restoration process begins by parsing the `.adapipe` file header to
49//! extract:
50//! - Original processing pipeline configuration
51//! - Processing steps and their parameters
52//! - Checksums for integrity validation
53//! - File metadata and compression information
54//!
55//! ### 2. Pipeline Creation
56//! An ephemeral restoration pipeline is created that:
57//! - Reverses the original processing order (LIFO)
58//! - Configures inverse operations for each stage
59//! - Includes checksum validation stages
60//! - Maintains processing context and metadata
61//!
62//! ### 3. Stage Execution
63//! Processing stages are executed in reverse order:
64//! - **Decompression**: Reverses compression operations
65//! - **Decryption**: Reverses encryption operations
66//! - **Validation**: Verifies checksums and data integrity
67//! - **Output**: Writes restored file to target location
68//!
69//! ## Usage Examples
70//!
71//! ### Basic File Restoration
72
73//!
74//! ### Batch Restoration
75
76//!
77//! ### Advanced Restoration with Validation
78
79//!
80//! ## Error Handling
81//!
82//! The restoration module provides comprehensive error handling for:
83//!
84//! - **Metadata Parsing Errors**: Invalid or corrupted `.adapipe` headers
85//! - **Pipeline Creation Errors**: Invalid processing steps or configurations
86//! - **Stage Configuration Errors**: Unsupported algorithms or parameters
87//! - **Validation Errors**: Checksum mismatches or data corruption
88//! - **I/O Errors**: File access, permission, or disk space issues
89//!
90//! ## Performance Considerations
91//!
92//! - **Memory Usage**: Restoration pipelines are lightweight and ephemeral
93//! - **Processing Order**: LIFO stage execution ensures correct restoration
94//! sequence
95//! - **Streaming**: Large files are processed in chunks to minimize memory
96//! usage
97//! - **Validation**: Checksum validation provides integrity guarantees with
98//! minimal overhead
99//!
100//! ## Security Considerations
101//!
102//! - **Decryption**: Encrypted files require appropriate decryption keys
103//! - **Integrity**: Checksum validation ensures data hasn't been tampered with
104//! - **Permissions**: Restored files maintain appropriate access permissions
105//! - **Audit Trail**: Restoration operations are logged for security auditing
106//!
107//! ## Integration
108//!
109//! The restoration module integrates with:
110//!
111//! - **CLI Interface**: Command-line restoration operations
112//! - **Pipeline System**: Core pipeline execution engine
113//! - **File I/O Services**: Reading `.adapipe` files and writing restored files
114//! - **Validation Services**: Checksum verification and integrity checking
115//! - **Logging System**: Comprehensive operation logging and error reporting
116
117use adaptive_pipeline_domain::entities::pipeline::Pipeline;
118use adaptive_pipeline_domain::entities::pipeline_stage::{PipelineStage, StageConfiguration, StageType};
119use adaptive_pipeline_domain::value_objects::binary_file_format::FileHeader;
120use adaptive_pipeline_domain::PipelineError;
121use chrono::Utc;
122use tracing::info;
123
124type Result<T> = std::result::Result<T, PipelineError>;
125
126/// Creates an ephemeral restoration pipeline from `.adapipe` file metadata.
127///
128/// This function is the core of the restoration system, responsible for
129/// analyzing the processing history stored in `.adapipe` file headers and
130/// creating a corresponding restoration pipeline that can reverse the original
131/// processing operations.
132///
133/// ## Functionality
134///
135/// The function performs the following operations:
136///
137/// 1. **Metadata Analysis**: Parses the file header to extract processing steps
138/// 2. **Pipeline Generation**: Creates a unique restoration pipeline identifier
139/// 3. **Stage Reversal**: Configures processing stages in reverse order (LIFO)
140/// 4. **Validation Setup**: Includes checksum validation stages for integrity
141/// 5. **Error Handling**: Provides comprehensive error reporting and validation
142///
143/// ## Architecture
144///
145/// The function follows Domain-Driven Design principles:
146///
147/// - **Domain Entity**: `Pipeline` serves as the aggregate root for restoration
148/// - **Value Objects**: Type-safe identifiers (`PipelineId`, `StageId`) ensure
149/// correctness
150/// - **Immutability**: Created pipeline stages are immutable and thread-safe
151/// - **Error Handling**: Comprehensive validation with detailed error
152/// propagation
153/// - **Business Logic**: Encapsulates restoration domain knowledge and rules
154///
155/// ## Processing Logic
156///
157/// ### Stage Reversal (LIFO)
158/// Processing stages are applied in reverse order to undo the original
159/// operations:
160/// - **Last Applied First**: The last processing step becomes the first
161/// restoration step
162/// - **Parameter Inversion**: Stage parameters are configured for reverse
163/// operations
164/// - **Checksum Validation**: Automatic inclusion of integrity validation
165/// stages
166///
167/// ### Automatic Stage Management
168/// The pipeline automatically includes:
169/// - **Input Checksum**: Validates `.adapipe` file integrity
170/// - **Output Checksum**: Verifies restored file integrity
171/// - **Processing Stages**: User-defined stages in reverse order
172///
173/// ## Parameters
174///
175/// * `metadata` - File header containing processing history and configuration
176/// - Must contain valid processing steps and pipeline information
177/// - Used to determine the restoration sequence and parameters
178/// - Provides checksums for integrity validation
179///
180/// ## Returns
181///
182/// Returns a `Result<Pipeline>` containing:
183/// - **Success**: Fully configured restoration pipeline ready for execution
184/// - **Error**: Detailed error information if pipeline creation fails
185///
186/// ## Errors
187///
188/// This function can return errors for:
189///
190/// - **Invalid Metadata**: Corrupted or malformed file headers
191/// - **Unsupported Algorithms**: Processing steps with unknown algorithms
192/// - **Configuration Errors**: Invalid stage parameters or configurations
193/// - **Pipeline Creation**: Errors during pipeline assembly
194///
195/// ## Usage Examples
196///
197/// ### Basic Restoration Pipeline
198///
199///
200/// ### Validation and Error Handling
201///
202///
203/// ### Complex Processing History
204///
205///
206/// ## Performance Characteristics
207///
208/// - **Lightweight**: Pipeline creation is fast and memory-efficient
209/// - **Ephemeral**: Pipelines exist only for the duration of restoration
210/// - **Thread-Safe**: Created pipelines are immutable and thread-safe
211/// - **Scalable**: Can handle complex processing histories efficiently
212///
213/// ## Security Considerations
214///
215/// - **Integrity Validation**: Automatic checksum verification
216/// - **Algorithm Validation**: Only supported algorithms are allowed
217/// - **Parameter Validation**: Stage parameters are validated for safety
218/// - **Audit Trail**: Pipeline creation is logged for security auditing
219pub async fn create_restoration_pipeline(metadata: &FileHeader) -> Result<Pipeline> {
220 let mut stages = Vec::new();
221
222 // Generate unique pipeline ID for restoration
223 let pipeline_name = format!("__restore__{}_{}", metadata.pipeline_id, Utc::now().timestamp_millis());
224
225 // Note: Pipeline::new will automatically add input_checksum and output_checksum
226 // stages So we only need to create the user-defined stages
227
228 // 2. Process steps in REVERSE order (LIFO for restoration)
229 let processing_steps = &metadata.processing_steps;
230 for step in processing_steps.iter().rev() {
231 let step_name = step.algorithm.to_lowercase();
232
233 // Skip checksum steps as they're handled separately
234 if step_name.contains("checksum") {
235 info!(
236 "Skipping checksum step: {} (from step order {}) - used for validation only",
237 step.algorithm, step.order
238 );
239 continue;
240 }
241
242 // Handle transformative custom steps (compression, encryption implemented as
243 // custom)
244 let stage_type = if step_name == "compression" {
245 StageType::Compression
246 } else if step_name == "encryption" {
247 StageType::Encryption
248 } else {
249 // For custom algorithms, infer type from algorithm name
250 if step.algorithm.contains("brotli") || step.algorithm.contains("gzip") || step.algorithm.contains("lz4") {
251 StageType::Compression
252 } else if step.algorithm.contains("aes")
253 || step.algorithm.contains("chacha")
254 || step.algorithm.contains("xchacha")
255 {
256 StageType::Encryption
257 } else {
258 // Default to pass-through for unknown algorithms
259 StageType::PassThrough
260 }
261 };
262
263 let stage_name = match stage_type {
264 StageType::Compression => "decompression",
265 StageType::Encryption => "decryption",
266 _ => &step_name,
267 };
268
269 let stage = PipelineStage::new(
270 stage_name.to_string(),
271 stage_type,
272 StageConfiguration {
273 algorithm: step.algorithm.clone(),
274 operation: adaptive_pipeline_domain::entities::Operation::Reverse, // REVERSE for restoration!
275 chunk_size: Some(metadata.chunk_size as usize),
276 parallel_processing: false, // Sequential for restoration
277 parameters: Default::default(),
278 },
279 0, // Order will be set by Pipeline::new
280 )?;
281
282 stages.push(stage);
283 }
284
285 // 3. Verification stage (always present for integrity)
286 let verification_stage = PipelineStage::new(
287 "verification".to_string(),
288 StageType::Checksum,
289 StageConfiguration {
290 algorithm: "sha256".to_string(),
291 operation: adaptive_pipeline_domain::entities::Operation::Reverse, // REVERSE for restoration!
292 chunk_size: Some(metadata.chunk_size as usize),
293 parallel_processing: false,
294 parameters: Default::default(),
295 },
296 0, // Order will be set by Pipeline::new
297 )?;
298 stages.push(verification_stage);
299
300 // Create pipeline with restoration stages (input_checksum and output_checksum
301 // will be added automatically)
302 let pipeline = Pipeline::new(pipeline_name, stages)?;
303
304 info!(
305 "Created restoration pipeline with {} stages for file: {}",
306 pipeline.stages().len(),
307 metadata.original_filename
308 );
309
310 Ok(pipeline)
311}