bnto_core/processor.rs
1// =============================================================================
2// NodeProcessor Trait — The Contract Every Node Type Must Implement
3// =============================================================================
4
5use crate::context::ProcessContext;
6use crate::errors::BntoError;
7use crate::metadata::{NodeCategory, NodeMetadata};
8use crate::progress::ProgressReporter;
9
10// =============================================================================
11// Input and Output Types
12// =============================================================================
13
14/// The input data that a node receives for processing.
15pub struct NodeInput {
16 /// The raw file data (bytes). For an image, this is the JPEG/PNG/WebP
17 /// binary data. For a CSV, this is the UTF-8 text content as bytes.
18 pub data: Vec<u8>,
19
20 /// The original filename (e.g., "photo.jpg", "data.csv").
21 /// Used to determine the file format and to generate output filenames.
22 pub filename: String,
23
24 /// The MIME type of the input (e.g., "image/jpeg", "text/csv").
25 /// `None` when the MIME type wasn't provided by the caller.
26 pub mime_type: Option<String>,
27
28 /// Configuration parameters for the node (e.g., quality level, target
29 /// format, dimensions). A JSON-compatible map where keys are parameter
30 /// names from @bnto/nodes schemas.
31 pub params: serde_json::Map<String, serde_json::Value>,
32}
33
34/// The output from a node after processing.
35///
36/// A node can produce one or more output files. For example, the
37/// compress-images node takes one image in and produces one compressed
38/// image out. A future "split PDF" node might produce many pages.
39pub struct NodeOutput {
40 /// The processed file data. Each entry is one output file.
41 pub files: Vec<OutputFile>,
42
43 /// Optional metadata about the processing (timing, compression ratio,
44 /// rows removed, etc.). Displayed in the UI's results panel.
45 pub metadata: serde_json::Map<String, serde_json::Value>,
46}
47
48/// A single output file produced by a node.
49pub struct OutputFile {
50 /// The raw file data (bytes) of the processed output.
51 pub data: Vec<u8>,
52
53 /// The filename for this output (e.g., "photo-compressed.jpg").
54 pub filename: String,
55
56 /// The MIME type of the output (e.g., "image/jpeg").
57 pub mime_type: String,
58}
59
60/// Input for batch processors that need all files at once (e.g., merge, zip).
61///
62/// Unlike `NodeInput` (one file), this carries the full set of pipeline files
63/// plus the shared configuration parameters.
64pub struct BatchInput {
65 /// All files to process as a group.
66 pub files: Vec<BatchFile>,
67
68 /// Configuration parameters for the node (same as `NodeInput.params`).
69 pub params: serde_json::Map<String, serde_json::Value>,
70}
71
72/// A single file within a batch input.
73pub struct BatchFile {
74 /// The raw file data (bytes).
75 pub data: Vec<u8>,
76
77 /// The original filename.
78 pub filename: String,
79
80 /// The MIME type, if known.
81 pub mime_type: Option<String>,
82}
83
84// =============================================================================
85// The NodeProcessor Trait
86// =============================================================================
87
88/// The contract that every node type must implement.
89///
90/// Currently synchronous -- async is handled at the Web Worker level.
91/// wasm-bindgen doesn't support async trait methods across the WASM boundary.
92pub trait NodeProcessor {
93 /// The unique name of this node type (e.g., "compress-images").
94 /// Used for logging and progress reporting.
95 fn name(&self) -> &str;
96
97 /// Process a single input file and produce output.
98 ///
99 /// Arguments:
100 /// - `&self` — reference to the node processor instance
101 /// - `input` — the file data, filename, MIME type, and config params
102 /// - `progress` — callback to report progress to the UI (0-100%)
103 /// - `ctx` — system access boundary (commands, temp files, env vars)
104 ///
105 /// Returns:
106 /// - `Ok(NodeOutput)` — processing succeeded, here are the results
107 /// - `Err(BntoError)` — processing failed, here's what went wrong
108 fn process(
109 &self,
110 input: NodeInput,
111 progress: &ProgressReporter,
112 ctx: &dyn ProcessContext,
113 ) -> Result<NodeOutput, BntoError>;
114
115 /// Validate the input parameters before processing.
116 ///
117 /// This is called BEFORE `process()` to catch configuration errors
118 /// early (missing required params, invalid values, etc.) without
119 /// doing any expensive file processing.
120 ///
121 /// Returns a list of validation errors (empty = valid).
122 ///
123 /// Default implementation passes validation. Override in specific
124 /// node types to add parameter validation.
125 fn validate(&self, _params: &serde_json::Map<String, serde_json::Value>) -> Vec<String> {
126 Vec::new()
127 }
128
129 /// Process a batch of files together, producing combined output.
130 ///
131 /// Override this for processors with `InputCardinality::Batch` (merge, zip,
132 /// concat). The default falls back to calling `process()` per file and
133 /// concatenating results — suitable for `PerFile` processors.
134 fn process_batch(
135 &self,
136 input: BatchInput,
137 progress: &ProgressReporter,
138 ctx: &dyn ProcessContext,
139 ) -> Result<NodeOutput, BntoError> {
140 let total = input.files.len();
141 let mut all_files = Vec::new();
142 let mut combined_metadata = serde_json::Map::new();
143
144 for (i, file) in input.files.into_iter().enumerate() {
145 let pct = ((i as u32) * 100) / (total as u32).max(1);
146 progress.report(pct, &format!("Processing file {} of {total}...", i + 1));
147
148 let single_input = NodeInput {
149 data: file.data,
150 filename: file.filename,
151 mime_type: file.mime_type,
152 params: input.params.clone(),
153 };
154 let output = self.process(single_input, progress, ctx)?;
155 all_files.extend(output.files);
156 // Merge metadata from the last file processed.
157 combined_metadata = output.metadata;
158 }
159
160 Ok(NodeOutput {
161 files: all_files,
162 metadata: combined_metadata,
163 })
164 }
165
166 /// Return the processor's self-describing metadata.
167 ///
168 /// This tells consumers everything about this processor: what it's called,
169 /// what category it belongs to, what parameters it accepts, what files it
170 /// handles, and whether it runs in the browser.
171 ///
172 /// Every concrete processor SHOULD override this with its real metadata.
173 /// The default returns a placeholder "unknown" metadata — useful for tests
174 /// and mocks that don't need real metadata.
175 fn metadata(&self) -> NodeMetadata {
176 NodeMetadata {
177 node_type: "unknown".to_string(),
178 name: self.name().to_string(),
179 description: String::new(),
180 category: NodeCategory::Data,
181 accepts: vec![],
182 platforms: vec![],
183 parameters: vec![],
184 input_cardinality: Default::default(),
185 requires: vec![],
186 }
187 }
188}
189
190// =============================================================================
191// Tests
192// =============================================================================
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197 use crate::context::NoopContext;
198
199 // --- Test helpers ---
200 // We create a simple mock processor to test the trait contract.
201
202 /// A mock node processor for testing. Does nothing — just echoes
203 /// the input back as output.
204 struct EchoProcessor;
205
206 impl NodeProcessor for EchoProcessor {
207 fn name(&self) -> &str {
208 "echo"
209 }
210
211 fn process(
212 &self,
213 input: NodeInput,
214 _progress: &ProgressReporter,
215 _ctx: &dyn ProcessContext,
216 ) -> Result<NodeOutput, BntoError> {
217 // Just echo the input data back as output.
218 Ok(NodeOutput {
219 files: vec![OutputFile {
220 data: input.data,
221 filename: input.filename,
222 mime_type: input
223 .mime_type
224 .unwrap_or_else(|| "application/octet-stream".to_string()),
225 }],
226 metadata: serde_json::Map::new(),
227 })
228 }
229 }
230
231 /// A mock processor that always fails — for testing error handling.
232 struct FailProcessor;
233
234 impl NodeProcessor for FailProcessor {
235 fn name(&self) -> &str {
236 "fail"
237 }
238
239 fn process(
240 &self,
241 _input: NodeInput,
242 _progress: &ProgressReporter,
243 _ctx: &dyn ProcessContext,
244 ) -> Result<NodeOutput, BntoError> {
245 Err(BntoError::ProcessingFailed(
246 "intentional test failure".to_string(),
247 ))
248 }
249 }
250
251 /// Helper to create a simple test input.
252 fn make_test_input(data: &[u8], filename: &str) -> NodeInput {
253 NodeInput {
254 data: data.to_vec(),
255 filename: filename.to_string(),
256 mime_type: None,
257 params: serde_json::Map::new(),
258 }
259 }
260
261 // --- Tests ---
262
263 #[test]
264 fn test_echo_processor_name() {
265 let processor = EchoProcessor;
266 assert_eq!(processor.name(), "echo");
267 }
268
269 #[test]
270 fn test_echo_processor_echoes_data() {
271 let processor = EchoProcessor;
272 let progress = ProgressReporter::new_noop();
273 let input = make_test_input(b"hello world", "test.txt");
274
275 let output = processor.process(input, &progress, &NoopContext).unwrap();
276
277 assert_eq!(output.files.len(), 1);
278 assert_eq!(output.files[0].data, b"hello world");
279 assert_eq!(output.files[0].filename, "test.txt");
280 }
281
282 #[test]
283 fn test_fail_processor_returns_error() {
284 let processor = FailProcessor;
285 let progress = ProgressReporter::new_noop();
286 let input = make_test_input(b"data", "test.txt");
287
288 let result = processor.process(input, &progress, &NoopContext);
289 assert!(result.is_err());
290
291 if let Err(e) = result {
292 assert!(e.to_string().contains("intentional test failure"));
293 }
294 }
295
296 #[test]
297 fn test_default_validate_returns_empty() {
298 let processor = EchoProcessor;
299 let params = serde_json::Map::new();
300
301 // The default validate() should return no errors.
302 let errors = processor.validate(¶ms);
303 assert!(errors.is_empty());
304 }
305
306 // --- Batch Processing Tests ---
307
308 #[test]
309 fn test_default_process_batch_falls_back_to_per_file() {
310 let processor = EchoProcessor;
311 let progress = ProgressReporter::new_noop();
312 let input = BatchInput {
313 files: vec![
314 BatchFile {
315 data: b"file1".to_vec(),
316 filename: "a.txt".to_string(),
317 mime_type: None,
318 },
319 BatchFile {
320 data: b"file2".to_vec(),
321 filename: "b.txt".to_string(),
322 mime_type: None,
323 },
324 ],
325 params: serde_json::Map::new(),
326 };
327
328 let output = processor
329 .process_batch(input, &progress, &NoopContext)
330 .unwrap();
331
332 // Default batch falls back to per-file: 2 inputs → 2 outputs.
333 assert_eq!(output.files.len(), 2);
334 assert_eq!(output.files[0].filename, "a.txt");
335 assert_eq!(output.files[0].data, b"file1");
336 assert_eq!(output.files[1].filename, "b.txt");
337 assert_eq!(output.files[1].data, b"file2");
338 }
339
340 #[test]
341 fn test_default_process_batch_empty_input() {
342 let processor = EchoProcessor;
343 let progress = ProgressReporter::new_noop();
344 let input = BatchInput {
345 files: vec![],
346 params: serde_json::Map::new(),
347 };
348
349 let output = processor
350 .process_batch(input, &progress, &NoopContext)
351 .unwrap();
352 assert_eq!(output.files.len(), 0);
353 }
354
355 #[test]
356 fn test_default_process_batch_propagates_errors() {
357 let processor = FailProcessor;
358 let progress = ProgressReporter::new_noop();
359 let input = BatchInput {
360 files: vec![BatchFile {
361 data: b"data".to_vec(),
362 filename: "test.txt".to_string(),
363 mime_type: None,
364 }],
365 params: serde_json::Map::new(),
366 };
367
368 let result = processor.process_batch(input, &progress, &NoopContext);
369 assert!(result.is_err());
370 }
371}