hedl_cli/batch.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Batch processing for multiple HEDL files with parallel execution and progress reporting.
19//!
20//! This module provides efficient batch processing capabilities for operations on multiple
21//! HEDL files. It uses Rayon for parallel processing when beneficial and provides real-time
22//! progress reporting with detailed error tracking.
23//!
24//! # Features
25//!
26//! - **Parallel Processing**: Automatic parallelization using Rayon's work-stealing scheduler
27//! - **Progress Reporting**: Real-time progress with file counts and success/failure tracking
28//! - **Error Resilience**: Continues processing on errors, collecting all failures for reporting
29//! - **Performance Optimization**: Intelligent parallel/serial mode selection based on workload
30//! - **Type Safety**: Strongly typed operation definitions with compile-time guarantees
31//!
32//! # Architecture
33//!
34//! The batch processing system uses a functional architecture with:
35//! - Operation trait for extensible batch operations
36//! - Result aggregation with detailed error context
37//! - Atomic counters for thread-safe progress tracking
38//! - Zero-copy file path handling
39//!
40//! # Examples
41//!
42//! ```rust,no_run
43//! use hedl_cli::batch::{BatchProcessor, BatchConfig, ValidationOperation};
44//! use std::path::PathBuf;
45//!
46//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
47//! // Create a batch processor with default configuration
48//! let processor = BatchProcessor::new(BatchConfig::default());
49//!
50//! // Validate multiple files in parallel
51//! let files = vec![
52//! PathBuf::from("file1.hedl"),
53//! PathBuf::from("file2.hedl"),
54//! PathBuf::from("file3.hedl"),
55//! ];
56//!
57//! let operation = ValidationOperation { strict: true };
58//! let results = processor.process(&files, operation, true)?;
59//!
60//! println!("Processed {} files, {} succeeded, {} failed",
61//! results.total_files(),
62//! results.success_count(),
63//! results.failure_count()
64//! );
65//! # Ok(())
66//! # }
67//! ```
68//!
69//! # Performance Characteristics
70//!
71//! - **Small batches (< 10 files)**: Serial processing to avoid overhead
72//! - **Medium batches (10-100 files)**: Parallel with Rayon thread pool
73//! - **Large batches (> 100 files)**: Chunked parallel processing with progress updates
74//!
75//! # Thread Safety
76//!
77//! All progress tracking uses atomic operations for lock-free concurrent access.
78//! Operations are required to be Send + Sync for parallel execution.
79
80use crate::error::CliError;
81use colored::Colorize;
82use rayon::prelude::*;
83use std::path::{Path, PathBuf};
84use std::sync::atomic::{AtomicUsize, Ordering};
85use std::sync::Arc;
86use std::time::Instant;
87
88/// Configuration for batch processing operations.
89///
90/// Controls parallelization strategy, progress reporting, and error handling behavior.
91///
92/// # Examples
93///
94/// ```rust
95/// use hedl_cli::batch::BatchConfig;
96///
97/// // Default configuration (auto parallelization)
98/// let config = BatchConfig::default();
99///
100/// // Custom configuration
101/// let config = BatchConfig {
102/// parallel_threshold: 5, // Parallelize if >= 5 files
103/// max_threads: Some(4), // Use at most 4 threads
104/// progress_interval: 10, // Update progress every 10 files
105/// verbose: true, // Show detailed progress
106/// };
107/// ```
108#[derive(Debug, Clone)]
109pub struct BatchConfig {
110 /// Minimum number of files to trigger parallel processing.
111 ///
112 /// Files below this threshold are processed serially to avoid thread pool overhead.
113 /// Default: 10
114 pub parallel_threshold: usize,
115
116 /// Maximum number of threads to use for parallel processing.
117 ///
118 /// None means use Rayon's default (typically number of CPU cores).
119 /// Default: None
120 pub max_threads: Option<usize>,
121
122 /// Number of files between progress updates.
123 ///
124 /// Progress is printed every N files processed. Set to 0 to disable.
125 /// Default: 1 (update after each file)
126 pub progress_interval: usize,
127
128 /// Enable verbose progress reporting.
129 ///
130 /// When true, shows file names and detailed status for each file.
131 /// Default: false
132 pub verbose: bool,
133}
134
135impl Default for BatchConfig {
136 fn default() -> Self {
137 Self {
138 parallel_threshold: 10,
139 max_threads: None,
140 progress_interval: 1,
141 verbose: false,
142 }
143 }
144}
145
146/// Result of processing a single file in a batch operation.
147///
148/// Contains the file path and either a success value or an error.
149///
150/// # Type Parameters
151///
152/// * `T` - The success type returned by the operation
153#[derive(Debug, Clone)]
154pub struct FileResult<T> {
155 /// The file path that was processed
156 pub path: PathBuf,
157 /// The result of processing (Ok or Err)
158 pub result: Result<T, CliError>,
159}
160
161impl<T> FileResult<T> {
162 /// Create a successful file result.
163 pub fn success(path: PathBuf, value: T) -> Self {
164 Self {
165 path,
166 result: Ok(value),
167 }
168 }
169
170 /// Create a failed file result.
171 pub fn failure(path: PathBuf, error: CliError) -> Self {
172 Self {
173 path,
174 result: Err(error),
175 }
176 }
177
178 /// Check if the result is successful.
179 pub fn is_success(&self) -> bool {
180 self.result.is_ok()
181 }
182
183 /// Check if the result is a failure.
184 pub fn is_failure(&self) -> bool {
185 self.result.is_err()
186 }
187}
188
189/// Aggregated results from a batch processing operation.
190///
191/// Contains all individual file results and provides statistics.
192///
193/// # Type Parameters
194///
195/// * `T` - The success type returned by the operation
196#[derive(Debug, Clone)]
197pub struct BatchResults<T> {
198 /// Individual results for each processed file
199 pub results: Vec<FileResult<T>>,
200 /// Total processing time in milliseconds
201 pub elapsed_ms: u128,
202}
203
204impl<T> BatchResults<T> {
205 /// Create new batch results from a vector of file results.
206 pub fn new(results: Vec<FileResult<T>>, elapsed_ms: u128) -> Self {
207 Self { results, elapsed_ms }
208 }
209
210 /// Get the total number of files processed.
211 pub fn total_files(&self) -> usize {
212 self.results.len()
213 }
214
215 /// Get the number of successfully processed files.
216 pub fn success_count(&self) -> usize {
217 self.results.iter().filter(|r| r.is_success()).count()
218 }
219
220 /// Get the number of failed files.
221 pub fn failure_count(&self) -> usize {
222 self.results.iter().filter(|r| r.is_failure()).count()
223 }
224
225 /// Check if all files were processed successfully.
226 pub fn all_succeeded(&self) -> bool {
227 self.results.iter().all(|r| r.is_success())
228 }
229
230 /// Check if any files failed.
231 pub fn has_failures(&self) -> bool {
232 self.results.iter().any(|r| r.is_failure())
233 }
234
235 /// Get an iterator over successful results.
236 pub fn successes(&self) -> impl Iterator<Item = &FileResult<T>> {
237 self.results.iter().filter(|r| r.is_success())
238 }
239
240 /// Get an iterator over failed results.
241 pub fn failures(&self) -> impl Iterator<Item = &FileResult<T>> {
242 self.results.iter().filter(|r| r.is_failure())
243 }
244
245 /// Get processing throughput in files per second.
246 pub fn throughput(&self) -> f64 {
247 if self.elapsed_ms == 0 {
248 0.0
249 } else {
250 (self.total_files() as f64) / (self.elapsed_ms as f64 / 1000.0)
251 }
252 }
253}
254
255/// Trait for batch operations on HEDL files.
256///
257/// Implement this trait to define custom batch operations. The operation must be
258/// thread-safe (Send + Sync) to support parallel processing.
259///
260/// # Type Parameters
261///
262/// * `Output` - The type returned on successful processing of a file
263///
264/// # Examples
265///
266/// ```rust
267/// use hedl_cli::batch::BatchOperation;
268/// use hedl_cli::error::CliError;
269/// use std::path::Path;
270///
271/// struct CountLinesOperation;
272///
273/// impl BatchOperation for CountLinesOperation {
274/// type Output = usize;
275///
276/// fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
277/// let content = std::fs::read_to_string(path)
278/// .map_err(|e| CliError::io_error(path, e))?;
279/// Ok(content.lines().count())
280/// }
281///
282/// fn name(&self) -> &str {
283/// "count-lines"
284/// }
285/// }
286/// ```
287pub trait BatchOperation: Send + Sync {
288 /// The output type for successful processing
289 type Output: Send;
290
291 /// Process a single file and return the result.
292 ///
293 /// # Arguments
294 ///
295 /// * `path` - The path to the file to process
296 ///
297 /// # Returns
298 ///
299 /// * `Ok(Output)` - On successful processing
300 /// * `Err(CliError)` - On any error
301 ///
302 /// # Errors
303 ///
304 /// Should return appropriate CliError variants for different failure modes.
305 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError>;
306
307 /// Get a human-readable name for this operation.
308 ///
309 /// Used for progress reporting and logging.
310 fn name(&self) -> &str;
311}
312
313/// Progress tracker for batch operations.
314///
315/// Uses atomic counters for lock-free concurrent progress tracking.
316#[derive(Debug)]
317struct ProgressTracker {
318 total: usize,
319 processed: AtomicUsize,
320 succeeded: AtomicUsize,
321 failed: AtomicUsize,
322 interval: usize,
323 verbose: bool,
324 start_time: Instant,
325}
326
327impl ProgressTracker {
328 /// Create a new progress tracker.
329 fn new(total: usize, interval: usize, verbose: bool) -> Self {
330 Self {
331 total,
332 processed: AtomicUsize::new(0),
333 succeeded: AtomicUsize::new(0),
334 failed: AtomicUsize::new(0),
335 interval,
336 verbose,
337 start_time: Instant::now(),
338 }
339 }
340
341 /// Record a successful file processing.
342 fn record_success(&self, path: &Path) {
343 let processed = self.processed.fetch_add(1, Ordering::Relaxed) + 1;
344 self.succeeded.fetch_add(1, Ordering::Relaxed);
345
346 if self.should_report(processed) {
347 self.report_progress(path, true);
348 }
349 }
350
351 /// Record a failed file processing.
352 fn record_failure(&self, path: &Path, error: &CliError) {
353 let processed = self.processed.fetch_add(1, Ordering::Relaxed) + 1;
354 self.failed.fetch_add(1, Ordering::Relaxed);
355
356 if self.verbose {
357 eprintln!("{} {} - {}", "✗".red().bold(), path.display(), error);
358 }
359
360 if self.should_report(processed) {
361 self.report_progress(path, false);
362 }
363 }
364
365 /// Check if progress should be reported for this count.
366 fn should_report(&self, processed: usize) -> bool {
367 self.interval > 0 && (processed.is_multiple_of(self.interval) || processed == self.total)
368 }
369
370 /// Report current progress to stderr.
371 fn report_progress(&self, current_file: &Path, success: bool) {
372 let processed = self.processed.load(Ordering::Relaxed);
373 let succeeded = self.succeeded.load(Ordering::Relaxed);
374 let failed = self.failed.load(Ordering::Relaxed);
375 let elapsed = self.start_time.elapsed();
376 let rate = processed as f64 / elapsed.as_secs_f64();
377
378 if self.verbose {
379 let status = if success {
380 "✓".green().bold()
381 } else {
382 "✗".red().bold()
383 };
384 eprintln!(
385 "{} [{}/{}] {} ({:.1} files/s)",
386 status,
387 processed,
388 self.total,
389 current_file.display(),
390 rate
391 );
392 } else {
393 eprintln!(
394 "Progress: [{}/{}] {} succeeded, {} failed ({:.1} files/s)",
395 processed, self.total, succeeded, failed, rate
396 );
397 }
398 }
399
400 /// Print final summary.
401 fn print_summary(&self, operation_name: &str) {
402 let processed = self.processed.load(Ordering::Relaxed);
403 let succeeded = self.succeeded.load(Ordering::Relaxed);
404 let failed = self.failed.load(Ordering::Relaxed);
405 let elapsed = self.start_time.elapsed();
406
407 println!();
408 println!("{}", "═".repeat(60).bright_blue());
409 println!(
410 "{} {}",
411 "Batch Operation:".bright_blue().bold(),
412 operation_name.bright_white()
413 );
414 println!("{}", "═".repeat(60).bright_blue());
415 println!(
416 " {} {}",
417 "Total files:".bright_cyan(),
418 processed.to_string().bright_white()
419 );
420 println!(
421 " {} {}",
422 "Succeeded:".green().bold(),
423 succeeded.to_string().bright_white()
424 );
425 println!(
426 " {} {}",
427 "Failed:".red().bold(),
428 failed.to_string().bright_white()
429 );
430 println!(
431 " {} {:.2}s",
432 "Elapsed:".bright_cyan(),
433 elapsed.as_secs_f64()
434 );
435 println!(
436 " {} {:.1} files/s",
437 "Throughput:".bright_cyan(),
438 processed as f64 / elapsed.as_secs_f64()
439 );
440 println!("{}", "═".repeat(60).bright_blue());
441 }
442}
443
444/// High-performance batch processor for HEDL files.
445///
446/// Orchestrates parallel or serial processing based on configuration and workload.
447/// Provides progress tracking and comprehensive error collection.
448///
449/// # Thread Safety
450///
451/// BatchProcessor is thread-safe and can be shared across threads via Arc.
452///
453/// # Examples
454///
455/// ```rust,no_run
456/// use hedl_cli::batch::{BatchProcessor, BatchConfig, ValidationOperation};
457/// use std::path::PathBuf;
458///
459/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
460/// let processor = BatchProcessor::new(BatchConfig {
461/// parallel_threshold: 5,
462/// verbose: true,
463/// ..Default::default()
464/// });
465///
466/// let files: Vec<PathBuf> = vec![
467/// "file1.hedl".into(),
468/// "file2.hedl".into(),
469/// ];
470///
471/// let results = processor.process(
472/// &files,
473/// ValidationOperation { strict: false },
474/// true,
475/// )?;
476///
477/// if results.has_failures() {
478/// eprintln!("Some files failed validation");
479/// for failure in results.failures() {
480/// eprintln!(" - {}: {:?}", failure.path.display(), failure.result);
481/// }
482/// }
483/// # Ok(())
484/// # }
485/// ```
486#[derive(Debug, Clone)]
487pub struct BatchProcessor {
488 config: BatchConfig,
489}
490
491impl BatchProcessor {
492 /// Create a new batch processor with the given configuration.
493 pub fn new(config: BatchConfig) -> Self {
494 Self { config }
495 }
496
497 /// Create a batch processor with default configuration.
498 pub fn default_config() -> Self {
499 Self::new(BatchConfig::default())
500 }
501
502 /// Process multiple files with the given operation.
503 ///
504 /// Automatically selects parallel or serial processing based on configuration
505 /// and file count. Provides progress reporting and collects all results.
506 ///
507 /// # Arguments
508 ///
509 /// * `files` - Slice of file paths to process
510 /// * `operation` - The operation to perform on each file
511 /// * `show_progress` - Whether to show progress updates
512 ///
513 /// # Returns
514 ///
515 /// * `Ok(BatchResults)` - Always succeeds and collects all individual results
516 /// * `Err(CliError)` - Only on catastrophic failures (e.g., thread pool creation)
517 ///
518 /// # Performance
519 ///
520 /// - Uses parallel processing if `files.len() >= config.parallel_threshold`
521 /// - Serial processing for small batches to avoid thread pool overhead
522 /// - Lock-free progress tracking using atomic counters
523 ///
524 /// # Examples
525 ///
526 /// ```rust,no_run
527 /// use hedl_cli::batch::{BatchProcessor, BatchConfig, FormatOperation};
528 /// use std::path::PathBuf;
529 ///
530 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
531 /// let processor = BatchProcessor::default_config();
532 /// let files = vec![PathBuf::from("a.hedl"), PathBuf::from("b.hedl")];
533 ///
534 /// let results = processor.process(
535 /// &files,
536 /// FormatOperation {
537 /// check: false,
538 /// ditto: true,
539 /// with_counts: false,
540 /// },
541 /// true,
542 /// )?;
543 ///
544 /// println!("Formatted {} files", results.success_count());
545 /// # Ok(())
546 /// # }
547 /// ```
548 pub fn process<O>(
549 &self,
550 files: &[PathBuf],
551 operation: O,
552 show_progress: bool,
553 ) -> Result<BatchResults<O::Output>, CliError>
554 where
555 O: BatchOperation,
556 {
557 let start_time = Instant::now();
558
559 if files.is_empty() {
560 return Ok(BatchResults::new(vec![], 0));
561 }
562
563 // Configure thread pool if max_threads is specified
564 if let Some(max_threads) = self.config.max_threads {
565 rayon::ThreadPoolBuilder::new()
566 .num_threads(max_threads)
567 .build_global()
568 .ok(); // Ignore error if already initialized
569 }
570
571 let results = if files.len() < self.config.parallel_threshold {
572 // Serial processing for small batches
573 self.process_serial(files, &operation, show_progress)
574 } else {
575 // Parallel processing for larger batches
576 self.process_parallel(files, &operation, show_progress)
577 };
578
579 let elapsed_ms = start_time.elapsed().as_millis();
580
581 Ok(BatchResults::new(results, elapsed_ms))
582 }
583
584 /// Process files serially (single-threaded).
585 fn process_serial<O>(
586 &self,
587 files: &[PathBuf],
588 operation: &O,
589 show_progress: bool,
590 ) -> Vec<FileResult<O::Output>>
591 where
592 O: BatchOperation,
593 {
594 let tracker = if show_progress {
595 Some(ProgressTracker::new(
596 files.len(),
597 self.config.progress_interval,
598 self.config.verbose,
599 ))
600 } else {
601 None
602 };
603
604 let results: Vec<FileResult<O::Output>> = files
605 .iter()
606 .map(|path| {
607 let result = operation.process_file(path);
608
609 if let Some(ref t) = tracker {
610 match &result {
611 Ok(_) => t.record_success(path),
612 Err(e) => t.record_failure(path, e),
613 }
614 }
615
616 FileResult {
617 path: path.clone(),
618 result: result.map_err(|e| e.clone()),
619 }
620 })
621 .collect();
622
623 if show_progress {
624 if let Some(tracker) = tracker {
625 tracker.print_summary(operation.name());
626 }
627 }
628
629 results
630 }
631
632 /// Process files in parallel using Rayon.
633 fn process_parallel<O>(
634 &self,
635 files: &[PathBuf],
636 operation: &O,
637 show_progress: bool,
638 ) -> Vec<FileResult<O::Output>>
639 where
640 O: BatchOperation,
641 {
642 let tracker = if show_progress {
643 Some(Arc::new(ProgressTracker::new(
644 files.len(),
645 self.config.progress_interval,
646 self.config.verbose,
647 )))
648 } else {
649 None
650 };
651
652 let results: Vec<FileResult<O::Output>> = files
653 .par_iter()
654 .map(|path| {
655 let result = operation.process_file(path);
656
657 if let Some(ref t) = tracker {
658 match &result {
659 Ok(_) => t.record_success(path),
660 Err(e) => t.record_failure(path, e),
661 }
662 }
663
664 FileResult {
665 path: path.clone(),
666 result: result.map_err(|e| e.clone()),
667 }
668 })
669 .collect();
670
671 if show_progress {
672 if let Some(tracker) = tracker {
673 tracker.print_summary(operation.name());
674 }
675 }
676
677 results
678 }
679}
680
681// ============================================================================
682// Standard Operations
683// ============================================================================
684
685/// Batch validation operation.
686///
687/// Validates multiple HEDL files in parallel, checking syntax and optionally
688/// enforcing strict reference resolution.
689#[derive(Debug, Clone)]
690pub struct ValidationOperation {
691 /// Enable strict reference validation
692 pub strict: bool,
693}
694
695impl BatchOperation for ValidationOperation {
696 type Output = ();
697
698 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
699 use hedl_core::{parse_with_limits, ParseOptions};
700
701 let content =
702 std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
703
704 let options = ParseOptions {
705 strict_refs: self.strict,
706 ..ParseOptions::default()
707 };
708
709 parse_with_limits(content.as_bytes(), options)
710 .map_err(|e| CliError::parse(e.to_string()))?;
711
712 Ok(())
713 }
714
715 fn name(&self) -> &str {
716 "validate"
717 }
718}
719
720/// Batch format operation.
721///
722/// Formats multiple HEDL files to canonical form, optionally checking if files
723/// are already canonical.
724#[derive(Debug, Clone)]
725pub struct FormatOperation {
726 /// Only check if files are canonical (don't write)
727 pub check: bool,
728 /// Use ditto optimization
729 pub ditto: bool,
730 /// Add count hints to matrix lists
731 pub with_counts: bool,
732}
733
734impl BatchOperation for FormatOperation {
735 type Output = String;
736
737 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
738 use hedl_c14n::{canonicalize_with_config, CanonicalConfig};
739 use hedl_core::parse;
740
741 let content =
742 std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
743
744 let mut doc = parse(content.as_bytes()).map_err(|e| CliError::parse(e.to_string()))?;
745
746 // Add count hints if requested
747 if self.with_counts {
748 add_count_hints(&mut doc);
749 }
750
751 let config = CanonicalConfig::new()
752 .with_ditto(self.ditto);
753
754 let canonical = canonicalize_with_config(&doc, &config)
755 .map_err(|e| CliError::canonicalization(e.to_string()))?;
756
757 if self.check && canonical != content {
758 return Err(CliError::NotCanonical);
759 }
760
761 Ok(canonical)
762 }
763
764 fn name(&self) -> &str {
765 if self.check {
766 "format-check"
767 } else {
768 "format"
769 }
770 }
771}
772
773/// Batch lint operation.
774///
775/// Lints multiple HEDL files for best practices and common issues.
776#[derive(Debug, Clone)]
777pub struct LintOperation {
778 /// Treat warnings as errors
779 pub warn_error: bool,
780}
781
782impl BatchOperation for LintOperation {
783 type Output = Vec<String>;
784
785 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
786 use hedl_core::parse;
787 use hedl_lint::lint;
788
789 let content =
790 std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
791
792 let doc = parse(content.as_bytes()).map_err(|e| CliError::parse(e.to_string()))?;
793
794 let diagnostics = lint(&doc);
795
796 if self.warn_error && !diagnostics.is_empty() {
797 return Err(CliError::LintErrors);
798 }
799
800 Ok(diagnostics.iter().map(|d| d.to_string()).collect())
801 }
802
803 fn name(&self) -> &str {
804 "lint"
805 }
806}
807
808// ============================================================================
809// Helper Functions for Count Hints
810// ============================================================================
811
812/// Recursively add count hints to all matrix lists in the document
813fn add_count_hints(doc: &mut hedl_core::Document) {
814
815
816 for item in doc.root.values_mut() {
817 add_count_hints_to_item(item);
818 }
819}
820
821/// Recursively add count hints to an item
822fn add_count_hints_to_item(item: &mut hedl_core::Item) {
823 use hedl_core::Item;
824
825 match item {
826 Item::List(list) => {
827 // Set count hint based on actual row count
828 list.count_hint = Some(list.rows.len());
829
830 // Recursively add child counts to each node
831 for node in &mut list.rows {
832 add_child_count_to_node(node);
833 }
834 }
835 Item::Object(map) => {
836 // Recursively process nested objects
837 for nested_item in map.values_mut() {
838 add_count_hints_to_item(nested_item);
839 }
840 }
841 Item::Scalar(_) => {
842 // Scalars don't have matrix lists
843 }
844 }
845}
846
847/// Recursively set child_count on nodes that have children
848fn add_child_count_to_node(node: &mut hedl_core::Node) {
849 // Calculate total number of direct children across all child types
850 let total_children: usize = node.children.values().map(|v| v.len()).sum();
851
852 if total_children > 0 {
853 node.child_count = Some(total_children);
854
855 // Recursively process all child nodes
856 for child_list in node.children.values_mut() {
857 for child_node in child_list {
858 add_child_count_to_node(child_node);
859 }
860 }
861 }
862}
863
864#[cfg(test)]
865mod tests {
866 use super::*;
867
868 #[test]
869 fn test_batch_config_default() {
870 let config = BatchConfig::default();
871 assert_eq!(config.parallel_threshold, 10);
872 assert!(config.max_threads.is_none());
873 assert_eq!(config.progress_interval, 1);
874 assert!(!config.verbose);
875 }
876
877 #[test]
878 fn test_file_result_success() {
879 let result = FileResult::success(PathBuf::from("test.hedl"), 42);
880 assert!(result.is_success());
881 assert!(!result.is_failure());
882 assert_eq!(result.result.unwrap(), 42);
883 }
884
885 #[test]
886 fn test_file_result_failure() {
887 let result: FileResult<()> =
888 FileResult::failure(PathBuf::from("test.hedl"), CliError::NotCanonical);
889 assert!(!result.is_success());
890 assert!(result.is_failure());
891 assert!(result.result.is_err());
892 }
893
894 #[test]
895 fn test_batch_results_statistics() {
896 let results = vec![
897 FileResult::success(PathBuf::from("a.hedl"), ()),
898 FileResult::success(PathBuf::from("b.hedl"), ()),
899 FileResult::failure(PathBuf::from("c.hedl"), CliError::NotCanonical),
900 ];
901
902 let batch = BatchResults::new(results, 1000);
903
904 assert_eq!(batch.total_files(), 3);
905 assert_eq!(batch.success_count(), 2);
906 assert_eq!(batch.failure_count(), 1);
907 assert!(!batch.all_succeeded());
908 assert!(batch.has_failures());
909 assert_eq!(batch.successes().count(), 2);
910 assert_eq!(batch.failures().count(), 1);
911 }
912
913 #[test]
914 fn test_batch_results_throughput() {
915 let results = vec![
916 FileResult::success(PathBuf::from("a.hedl"), ()),
917 FileResult::success(PathBuf::from("b.hedl"), ()),
918 ];
919
920 let batch = BatchResults::new(results, 1000); // 1 second
921 assert!((batch.throughput() - 2.0).abs() < 0.01);
922
923 let batch_zero: BatchResults<()> = BatchResults::new(vec![], 0);
924 assert_eq!(batch_zero.throughput(), 0.0);
925 }
926
927 #[test]
928 fn test_progress_tracker_should_report() {
929 let tracker = ProgressTracker::new(100, 10, false);
930
931 assert!(!tracker.should_report(1));
932 assert!(!tracker.should_report(9));
933 assert!(tracker.should_report(10)); // Interval boundary
934 assert!(tracker.should_report(100)); // End
935 }
936
937 // Mock operation for testing
938 struct MockOperation {
939 should_fail: bool,
940 }
941
942 impl BatchOperation for MockOperation {
943 type Output = String;
944
945 fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
946 if self.should_fail {
947 Err(CliError::NotCanonical)
948 } else {
949 Ok(path.to_string_lossy().to_string())
950 }
951 }
952
953 fn name(&self) -> &str {
954 "mock"
955 }
956 }
957
958 #[test]
959 fn test_batch_processor_empty() {
960 let processor = BatchProcessor::default_config();
961 let results = processor
962 .process(&[], MockOperation { should_fail: false }, false)
963 .unwrap();
964
965 assert_eq!(results.total_files(), 0);
966 assert!(results.all_succeeded());
967 }
968
969 #[test]
970 fn test_batch_processor_serial_success() {
971 let processor = BatchProcessor::new(BatchConfig {
972 parallel_threshold: 100, // Force serial for small batch
973 ..Default::default()
974 });
975
976 let files = vec![
977 PathBuf::from("a.hedl"),
978 PathBuf::from("b.hedl"),
979 PathBuf::from("c.hedl"),
980 ];
981
982 let results = processor
983 .process(&files, MockOperation { should_fail: false }, false)
984 .unwrap();
985
986 assert_eq!(results.total_files(), 3);
987 assert_eq!(results.success_count(), 3);
988 assert_eq!(results.failure_count(), 0);
989 assert!(results.all_succeeded());
990 }
991
992 #[test]
993 fn test_batch_processor_serial_with_failures() {
994 let processor = BatchProcessor::new(BatchConfig {
995 parallel_threshold: 100,
996 ..Default::default()
997 });
998
999 let files = vec![PathBuf::from("a.hedl"), PathBuf::from("b.hedl")];
1000
1001 let results = processor
1002 .process(&files, MockOperation { should_fail: true }, false)
1003 .unwrap();
1004
1005 assert_eq!(results.total_files(), 2);
1006 assert_eq!(results.success_count(), 0);
1007 assert_eq!(results.failure_count(), 2);
1008 assert!(!results.all_succeeded());
1009 assert!(results.has_failures());
1010 }
1011
1012 #[test]
1013 fn test_batch_processor_parallel() {
1014 let processor = BatchProcessor::new(BatchConfig {
1015 parallel_threshold: 2, // Force parallel
1016 ..Default::default()
1017 });
1018
1019 let files: Vec<PathBuf> = (0..20).map(|i| PathBuf::from(format!("file{}.hedl", i))).collect();
1020
1021 let results = processor
1022 .process(&files, MockOperation { should_fail: false }, false)
1023 .unwrap();
1024
1025 assert_eq!(results.total_files(), 20);
1026 assert_eq!(results.success_count(), 20);
1027 }
1028}