Skip to main content

bnto_core/
processor.rs

1// =============================================================================
2// NodeProcessor Trait — The Contract Every Node Type Must Implement
3// =============================================================================
4
5use crate::context::ProcessContext;
6use crate::errors::BntoError;
7use crate::metadata::{NodeCategory, NodeMetadata};
8use crate::progress::ProgressReporter;
9
10// =============================================================================
11// Input and Output Types
12// =============================================================================
13
14/// The input data that a node receives for processing.
15pub struct NodeInput {
16    /// The raw file data (bytes). For an image, this is the JPEG/PNG/WebP
17    /// binary data. For a CSV, this is the UTF-8 text content as bytes.
18    pub data: Vec<u8>,
19
20    /// The original filename (e.g., "photo.jpg", "data.csv").
21    /// Used to determine the file format and to generate output filenames.
22    pub filename: String,
23
24    /// The MIME type of the input (e.g., "image/jpeg", "text/csv").
25    /// `None` when the MIME type wasn't provided by the caller.
26    pub mime_type: Option<String>,
27
28    /// Configuration parameters for the node (e.g., quality level, target
29    /// format, dimensions). A JSON-compatible map where keys are parameter
30    /// names from @bnto/nodes schemas.
31    pub params: serde_json::Map<String, serde_json::Value>,
32}
33
34/// The output from a node after processing.
35///
36/// A node can produce one or more output files. For example, the
37/// compress-images node takes one image in and produces one compressed
38/// image out. A future "split PDF" node might produce many pages.
39pub struct NodeOutput {
40    /// The processed file data. Each entry is one output file.
41    pub files: Vec<OutputFile>,
42
43    /// Optional metadata about the processing (timing, compression ratio,
44    /// rows removed, etc.). Displayed in the UI's results panel.
45    pub metadata: serde_json::Map<String, serde_json::Value>,
46}
47
48/// A single output file produced by a node.
49pub struct OutputFile {
50    /// The raw file data (bytes) of the processed output.
51    pub data: Vec<u8>,
52
53    /// The filename for this output (e.g., "photo-compressed.jpg").
54    pub filename: String,
55
56    /// The MIME type of the output (e.g., "image/jpeg").
57    pub mime_type: String,
58}
59
60/// Input for batch processors that need all files at once (e.g., merge, zip).
61///
62/// Unlike `NodeInput` (one file), this carries the full set of pipeline files
63/// plus the shared configuration parameters.
64pub struct BatchInput {
65    /// All files to process as a group.
66    pub files: Vec<BatchFile>,
67
68    /// Configuration parameters for the node (same as `NodeInput.params`).
69    pub params: serde_json::Map<String, serde_json::Value>,
70}
71
72/// A single file within a batch input.
73pub struct BatchFile {
74    /// The raw file data (bytes).
75    pub data: Vec<u8>,
76
77    /// The original filename.
78    pub filename: String,
79
80    /// The MIME type, if known.
81    pub mime_type: Option<String>,
82}
83
84// =============================================================================
85// The NodeProcessor Trait
86// =============================================================================
87
88/// The contract that every node type must implement.
89///
90/// Currently synchronous -- async is handled at the Web Worker level.
91/// wasm-bindgen doesn't support async trait methods across the WASM boundary.
92pub trait NodeProcessor {
93    /// The unique name of this node type (e.g., "compress-images").
94    /// Used for logging and progress reporting.
95    fn name(&self) -> &str;
96
97    /// Process a single input file and produce output.
98    ///
99    /// Arguments:
100    ///   - `&self` — reference to the node processor instance
101    ///   - `input` — the file data, filename, MIME type, and config params
102    ///   - `progress` — callback to report progress to the UI (0-100%)
103    ///   - `ctx` — system access boundary (commands, temp files, env vars)
104    ///
105    /// Returns:
106    ///   - `Ok(NodeOutput)` — processing succeeded, here are the results
107    ///   - `Err(BntoError)` — processing failed, here's what went wrong
108    fn process(
109        &self,
110        input: NodeInput,
111        progress: &ProgressReporter,
112        ctx: &dyn ProcessContext,
113    ) -> Result<NodeOutput, BntoError>;
114
115    /// Validate the input parameters before processing.
116    ///
117    /// This is called BEFORE `process()` to catch configuration errors
118    /// early (missing required params, invalid values, etc.) without
119    /// doing any expensive file processing.
120    ///
121    /// Returns a list of validation errors (empty = valid).
122    ///
123    /// Default implementation passes validation. Override in specific
124    /// node types to add parameter validation.
125    fn validate(&self, _params: &serde_json::Map<String, serde_json::Value>) -> Vec<String> {
126        Vec::new()
127    }
128
129    /// Process a batch of files together, producing combined output.
130    ///
131    /// Override this for processors with `InputCardinality::Batch` (merge, zip,
132    /// concat). The default falls back to calling `process()` per file and
133    /// concatenating results — suitable for `PerFile` processors.
134    fn process_batch(
135        &self,
136        input: BatchInput,
137        progress: &ProgressReporter,
138        ctx: &dyn ProcessContext,
139    ) -> Result<NodeOutput, BntoError> {
140        let total = input.files.len();
141        let mut all_files = Vec::new();
142        let mut combined_metadata = serde_json::Map::new();
143
144        for (i, file) in input.files.into_iter().enumerate() {
145            let pct = ((i as u32) * 100) / (total as u32).max(1);
146            progress.report(pct, &format!("Processing file {} of {total}...", i + 1));
147
148            let single_input = NodeInput {
149                data: file.data,
150                filename: file.filename,
151                mime_type: file.mime_type,
152                params: input.params.clone(),
153            };
154            let output = self.process(single_input, progress, ctx)?;
155            all_files.extend(output.files);
156            // Merge metadata from the last file processed.
157            combined_metadata = output.metadata;
158        }
159
160        Ok(NodeOutput {
161            files: all_files,
162            metadata: combined_metadata,
163        })
164    }
165
166    /// Return the processor's self-describing metadata.
167    ///
168    /// This tells consumers everything about this processor: what it's called,
169    /// what category it belongs to, what parameters it accepts, what files it
170    /// handles, and whether it runs in the browser.
171    ///
172    /// Every concrete processor SHOULD override this with its real metadata.
173    /// The default returns a placeholder "unknown" metadata — useful for tests
174    /// and mocks that don't need real metadata.
175    fn metadata(&self) -> NodeMetadata {
176        NodeMetadata {
177            node_type: "unknown".to_string(),
178            name: self.name().to_string(),
179            description: String::new(),
180            category: NodeCategory::Data,
181            accepts: vec![],
182            platforms: vec![],
183            parameters: vec![],
184            input_cardinality: Default::default(),
185            requires: vec![],
186        }
187    }
188}
189
190// =============================================================================
191// Tests
192// =============================================================================
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::context::NoopContext;
198
199    // --- Test helpers ---
200    // We create a simple mock processor to test the trait contract.
201
202    /// A mock node processor for testing. Does nothing — just echoes
203    /// the input back as output.
204    struct EchoProcessor;
205
206    impl NodeProcessor for EchoProcessor {
207        fn name(&self) -> &str {
208            "echo"
209        }
210
211        fn process(
212            &self,
213            input: NodeInput,
214            _progress: &ProgressReporter,
215            _ctx: &dyn ProcessContext,
216        ) -> Result<NodeOutput, BntoError> {
217            // Just echo the input data back as output.
218            Ok(NodeOutput {
219                files: vec![OutputFile {
220                    data: input.data,
221                    filename: input.filename,
222                    mime_type: input
223                        .mime_type
224                        .unwrap_or_else(|| "application/octet-stream".to_string()),
225                }],
226                metadata: serde_json::Map::new(),
227            })
228        }
229    }
230
231    /// A mock processor that always fails — for testing error handling.
232    struct FailProcessor;
233
234    impl NodeProcessor for FailProcessor {
235        fn name(&self) -> &str {
236            "fail"
237        }
238
239        fn process(
240            &self,
241            _input: NodeInput,
242            _progress: &ProgressReporter,
243            _ctx: &dyn ProcessContext,
244        ) -> Result<NodeOutput, BntoError> {
245            Err(BntoError::ProcessingFailed(
246                "intentional test failure".to_string(),
247            ))
248        }
249    }
250
251    /// Helper to create a simple test input.
252    fn make_test_input(data: &[u8], filename: &str) -> NodeInput {
253        NodeInput {
254            data: data.to_vec(),
255            filename: filename.to_string(),
256            mime_type: None,
257            params: serde_json::Map::new(),
258        }
259    }
260
261    // --- Tests ---
262
263    #[test]
264    fn test_echo_processor_name() {
265        let processor = EchoProcessor;
266        assert_eq!(processor.name(), "echo");
267    }
268
269    #[test]
270    fn test_echo_processor_echoes_data() {
271        let processor = EchoProcessor;
272        let progress = ProgressReporter::new_noop();
273        let input = make_test_input(b"hello world", "test.txt");
274
275        let output = processor.process(input, &progress, &NoopContext).unwrap();
276
277        assert_eq!(output.files.len(), 1);
278        assert_eq!(output.files[0].data, b"hello world");
279        assert_eq!(output.files[0].filename, "test.txt");
280    }
281
282    #[test]
283    fn test_fail_processor_returns_error() {
284        let processor = FailProcessor;
285        let progress = ProgressReporter::new_noop();
286        let input = make_test_input(b"data", "test.txt");
287
288        let result = processor.process(input, &progress, &NoopContext);
289        assert!(result.is_err());
290
291        if let Err(e) = result {
292            assert!(e.to_string().contains("intentional test failure"));
293        }
294    }
295
296    #[test]
297    fn test_default_validate_returns_empty() {
298        let processor = EchoProcessor;
299        let params = serde_json::Map::new();
300
301        // The default validate() should return no errors.
302        let errors = processor.validate(&params);
303        assert!(errors.is_empty());
304    }
305
306    // --- Batch Processing Tests ---
307
308    #[test]
309    fn test_default_process_batch_falls_back_to_per_file() {
310        let processor = EchoProcessor;
311        let progress = ProgressReporter::new_noop();
312        let input = BatchInput {
313            files: vec![
314                BatchFile {
315                    data: b"file1".to_vec(),
316                    filename: "a.txt".to_string(),
317                    mime_type: None,
318                },
319                BatchFile {
320                    data: b"file2".to_vec(),
321                    filename: "b.txt".to_string(),
322                    mime_type: None,
323                },
324            ],
325            params: serde_json::Map::new(),
326        };
327
328        let output = processor
329            .process_batch(input, &progress, &NoopContext)
330            .unwrap();
331
332        // Default batch falls back to per-file: 2 inputs → 2 outputs.
333        assert_eq!(output.files.len(), 2);
334        assert_eq!(output.files[0].filename, "a.txt");
335        assert_eq!(output.files[0].data, b"file1");
336        assert_eq!(output.files[1].filename, "b.txt");
337        assert_eq!(output.files[1].data, b"file2");
338    }
339
340    #[test]
341    fn test_default_process_batch_empty_input() {
342        let processor = EchoProcessor;
343        let progress = ProgressReporter::new_noop();
344        let input = BatchInput {
345            files: vec![],
346            params: serde_json::Map::new(),
347        };
348
349        let output = processor
350            .process_batch(input, &progress, &NoopContext)
351            .unwrap();
352        assert_eq!(output.files.len(), 0);
353    }
354
355    #[test]
356    fn test_default_process_batch_propagates_errors() {
357        let processor = FailProcessor;
358        let progress = ProgressReporter::new_noop();
359        let input = BatchInput {
360            files: vec![BatchFile {
361                data: b"data".to_vec(),
362                filename: "test.txt".to_string(),
363                mime_type: None,
364            }],
365            params: serde_json::Map::new(),
366        };
367
368        let result = processor.process_batch(input, &progress, &NoopContext);
369        assert!(result.is_err());
370    }
371}