hedl_cli/batch/traits.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Batch operation traits.
19
20use crate::error::CliError;
21use std::path::Path;
22
23/// Trait for batch operations on HEDL files.
24///
25/// Implement this trait to define custom batch operations. The operation must be
26/// thread-safe (Send + Sync) to support parallel processing.
27///
28/// # Type Parameters
29///
30/// * `Output` - The type returned on successful processing of a file
31///
32/// # Examples
33///
34/// ```rust
35/// use hedl_cli::batch::BatchOperation;
36/// use hedl_cli::error::CliError;
37/// use std::path::Path;
38///
39/// struct CountLinesOperation;
40///
41/// impl BatchOperation for CountLinesOperation {
42/// type Output = usize;
43///
44/// fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
45/// let content = std::fs::read_to_string(path)
46/// .map_err(|e| CliError::io_error(path, e))?;
47/// Ok(content.lines().count())
48/// }
49///
50/// fn name(&self) -> &str {
51/// "count-lines"
52/// }
53/// }
54/// ```
55pub trait BatchOperation: Send + Sync {
56 /// The output type for successful processing
57 type Output: Send;
58
59 /// Process a single file and return the result.
60 ///
61 /// # Arguments
62 ///
63 /// * `path` - The path to the file to process
64 ///
65 /// # Returns
66 ///
67 /// * `Ok(Output)` - On successful processing
68 /// * `Err(CliError)` - On any error
69 ///
70 /// # Errors
71 ///
72 /// Should return appropriate `CliError` variants for different failure modes.
73 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError>;
74
75 /// Get a human-readable name for this operation.
76 ///
77 /// Used for progress reporting and logging.
78 fn name(&self) -> &str;
79}
80
81/// Trait for streaming batch operations on HEDL files.
82///
83/// Unlike `BatchOperation` which loads entire files into memory,
84/// streaming operations process files incrementally with constant memory usage.
85/// This is ideal for processing large files (>100MB) or when memory is constrained.
86///
87/// # Memory Characteristics
88///
89/// - **Standard operations**: `O(num_threads` × `file_size`)
90/// - **Streaming operations**: `O(buffer_size` + `ID_set`) ≈ constant
91///
92/// # Type Parameters
93///
94/// * `Output` - The type returned on successful processing of a file
95///
96/// # Examples
97///
98/// ```rust
99/// use hedl_cli::batch::StreamingBatchOperation;
100/// use hedl_cli::error::CliError;
101/// use std::path::Path;
102///
103/// struct StreamingCountOperation;
104///
105/// impl StreamingBatchOperation for StreamingCountOperation {
106/// type Output = usize;
107///
108/// fn process_file_streaming(&self, path: &Path) -> Result<Self::Output, CliError> {
109/// use std::io::BufReader;
110/// use std::fs::File;
111/// use hedl_stream::StreamingParser;
112///
113/// let file = File::open(path).map_err(|e| CliError::io_error(path, e))?;
114/// let reader = BufReader::new(file);
115/// let parser = StreamingParser::new(reader)
116/// .map_err(|e| CliError::parse(e.to_string()))?;
117///
118/// let count = parser.filter(|e| {
119/// matches!(e, Ok(hedl_stream::NodeEvent::Node(_)))
120/// }).count();
121///
122/// Ok(count)
123/// }
124///
125/// fn name(&self) -> &str {
126/// "count-streaming"
127/// }
128/// }
129/// ```
130pub trait StreamingBatchOperation: Send + Sync {
131 /// The output type for successful processing
132 type Output: Send;
133
134 /// Process a file using streaming parser.
135 ///
136 /// # Arguments
137 ///
138 /// * `path` - File path to process
139 ///
140 /// # Returns
141 ///
142 /// * `Ok(Output)` - On successful processing
143 /// * `Err(CliError)` - On any error
144 ///
145 /// # Memory Guarantee
146 ///
147 /// Implementations should maintain O(1) memory usage regardless of file size,
148 /// processing the file incrementally using the streaming parser.
149 fn process_file_streaming(&self, path: &Path) -> Result<Self::Output, CliError>;
150
151 /// Get operation name for progress reporting
152 fn name(&self) -> &str;
153
154 /// Indicate if this operation can run in streaming mode.
155 ///
156 /// Some operations (like formatting) may require full document.
157 /// Default: true
158 fn supports_streaming(&self) -> bool {
159 true
160 }
161}