Skip to main content

hedl_cli/batch/
traits.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Batch operation traits.
19
20use crate::error::CliError;
21use std::path::Path;
22
23/// Trait for batch operations on HEDL files.
24///
25/// Implement this trait to define custom batch operations. The operation must be
26/// thread-safe (Send + Sync) to support parallel processing.
27///
28/// # Type Parameters
29///
30/// * `Output` - The type returned on successful processing of a file
31///
32/// # Examples
33///
34/// ```rust
35/// use hedl_cli::batch::BatchOperation;
36/// use hedl_cli::error::CliError;
37/// use std::path::Path;
38///
39/// struct CountLinesOperation;
40///
41/// impl BatchOperation for CountLinesOperation {
42///     type Output = usize;
43///
44///     fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
45///         let content = std::fs::read_to_string(path)
46///             .map_err(|e| CliError::io_error(path, e))?;
47///         Ok(content.lines().count())
48///     }
49///
50///     fn name(&self) -> &str {
51///         "count-lines"
52///     }
53/// }
54/// ```
55pub trait BatchOperation: Send + Sync {
56    /// The output type for successful processing
57    type Output: Send;
58
59    /// Process a single file and return the result.
60    ///
61    /// # Arguments
62    ///
63    /// * `path` - The path to the file to process
64    ///
65    /// # Returns
66    ///
67    /// * `Ok(Output)` - On successful processing
68    /// * `Err(CliError)` - On any error
69    ///
70    /// # Errors
71    ///
72    /// Should return appropriate `CliError` variants for different failure modes.
73    fn process_file(&self, path: &Path) -> Result<Self::Output, CliError>;
74
75    /// Get a human-readable name for this operation.
76    ///
77    /// Used for progress reporting and logging.
78    fn name(&self) -> &str;
79}
80
81/// Trait for streaming batch operations on HEDL files.
82///
83/// Unlike `BatchOperation` which loads entire files into memory,
84/// streaming operations process files incrementally with constant memory usage.
85/// This is ideal for processing large files (>100MB) or when memory is constrained.
86///
87/// # Memory Characteristics
88///
89/// - **Standard operations**: `O(num_threads` × `file_size`)
90/// - **Streaming operations**: `O(buffer_size` + `ID_set`) ≈ constant
91///
92/// # Type Parameters
93///
94/// * `Output` - The type returned on successful processing of a file
95///
96/// # Examples
97///
98/// ```rust
99/// use hedl_cli::batch::StreamingBatchOperation;
100/// use hedl_cli::error::CliError;
101/// use std::path::Path;
102///
103/// struct StreamingCountOperation;
104///
105/// impl StreamingBatchOperation for StreamingCountOperation {
106///     type Output = usize;
107///
108///     fn process_file_streaming(&self, path: &Path) -> Result<Self::Output, CliError> {
109///         use std::io::BufReader;
110///         use std::fs::File;
111///         use hedl_stream::StreamingParser;
112///
113///         let file = File::open(path).map_err(|e| CliError::io_error(path, e))?;
114///         let reader = BufReader::new(file);
115///         let parser = StreamingParser::new(reader)
116///             .map_err(|e| CliError::parse(e.to_string()))?;
117///
118///         let count = parser.filter(|e| {
119///             matches!(e, Ok(hedl_stream::NodeEvent::Node(_)))
120///         }).count();
121///
122///         Ok(count)
123///     }
124///
125///     fn name(&self) -> &str {
126///         "count-streaming"
127///     }
128/// }
129/// ```
130pub trait StreamingBatchOperation: Send + Sync {
131    /// The output type for successful processing
132    type Output: Send;
133
134    /// Process a file using streaming parser.
135    ///
136    /// # Arguments
137    ///
138    /// * `path` - File path to process
139    ///
140    /// # Returns
141    ///
142    /// * `Ok(Output)` - On successful processing
143    /// * `Err(CliError)` - On any error
144    ///
145    /// # Memory Guarantee
146    ///
147    /// Implementations should maintain O(1) memory usage regardless of file size,
148    /// processing the file incrementally using the streaming parser.
149    fn process_file_streaming(&self, path: &Path) -> Result<Self::Output, CliError>;
150
151    /// Get operation name for progress reporting
152    fn name(&self) -> &str;
153
154    /// Indicate if this operation can run in streaming mode.
155    ///
156    /// Some operations (like formatting) may require full document.
157    /// Default: true
158    fn supports_streaming(&self) -> bool {
159        true
160    }
161}