hedl_cli/
batch.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Batch processing for multiple HEDL files with parallel execution and progress reporting.
19//!
20//! This module provides efficient batch processing capabilities for operations on multiple
21//! HEDL files. It uses Rayon for parallel processing when beneficial and provides real-time
22//! progress reporting with detailed error tracking.
23//!
24//! # Features
25//!
26//! - **Parallel Processing**: Automatic parallelization using Rayon's work-stealing scheduler
27//! - **Progress Reporting**: Real-time progress with file counts and success/failure tracking
28//! - **Error Resilience**: Continues processing on errors, collecting all failures for reporting
29//! - **Performance Optimization**: Intelligent parallel/serial mode selection based on workload
30//! - **Type Safety**: Strongly typed operation definitions with compile-time guarantees
31//!
32//! # Architecture
33//!
34//! The batch processing system uses a functional architecture with:
35//! - Operation trait for extensible batch operations
36//! - Result aggregation with detailed error context
37//! - Atomic counters for thread-safe progress tracking
38//! - Zero-copy file path handling
39//!
40//! # Examples
41//!
42//! ```rust,no_run
43//! use hedl_cli::batch::{BatchProcessor, BatchConfig, ValidationOperation};
44//! use std::path::PathBuf;
45//!
46//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
47//! // Create a batch processor with default configuration
48//! let processor = BatchProcessor::new(BatchConfig::default());
49//!
50//! // Validate multiple files in parallel
51//! let files = vec![
52//!     PathBuf::from("file1.hedl"),
53//!     PathBuf::from("file2.hedl"),
54//!     PathBuf::from("file3.hedl"),
55//! ];
56//!
57//! let operation = ValidationOperation { strict: true };
58//! let results = processor.process(&files, operation, true)?;
59//!
60//! println!("Processed {} files, {} succeeded, {} failed",
61//!     results.total_files(),
62//!     results.success_count(),
63//!     results.failure_count()
64//! );
65//! # Ok(())
66//! # }
67//! ```
68//!
69//! # Performance Characteristics
70//!
71//! - **Small batches (< 10 files)**: Serial processing to avoid overhead
72//! - **Medium batches (10-100 files)**: Parallel with Rayon thread pool
73//! - **Large batches (> 100 files)**: Chunked parallel processing with progress updates
74//!
75//! # Thread Safety
76//!
77//! All progress tracking uses atomic operations for lock-free concurrent access.
78//! Operations are required to be Send + Sync for parallel execution.
79
80use crate::error::CliError;
81use colored::Colorize;
82use rayon::prelude::*;
83use std::path::{Path, PathBuf};
84use std::sync::atomic::{AtomicUsize, Ordering};
85use std::sync::Arc;
86use std::time::Instant;
87
88/// Configuration for batch processing operations.
89///
90/// Controls parallelization strategy, progress reporting, and error handling behavior.
91///
92/// # Examples
93///
94/// ```rust
95/// use hedl_cli::batch::BatchConfig;
96///
97/// // Default configuration (auto parallelization)
98/// let config = BatchConfig::default();
99///
100/// // Custom configuration
101/// let config = BatchConfig {
102///     parallel_threshold: 5,  // Parallelize if >= 5 files
103///     max_threads: Some(4),   // Use at most 4 threads
104///     progress_interval: 10,  // Update progress every 10 files
105///     verbose: true,          // Show detailed progress
106/// };
107/// ```
108#[derive(Debug, Clone)]
109pub struct BatchConfig {
110    /// Minimum number of files to trigger parallel processing.
111    ///
112    /// Files below this threshold are processed serially to avoid thread pool overhead.
113    /// Default: 10
114    pub parallel_threshold: usize,
115
116    /// Maximum number of threads to use for parallel processing.
117    ///
118    /// None means use Rayon's default (typically number of CPU cores).
119    /// Default: None
120    pub max_threads: Option<usize>,
121
122    /// Number of files between progress updates.
123    ///
124    /// Progress is printed every N files processed. Set to 0 to disable.
125    /// Default: 1 (update after each file)
126    pub progress_interval: usize,
127
128    /// Enable verbose progress reporting.
129    ///
130    /// When true, shows file names and detailed status for each file.
131    /// Default: false
132    pub verbose: bool,
133}
134
135impl Default for BatchConfig {
136    fn default() -> Self {
137        Self {
138            parallel_threshold: 10,
139            max_threads: None,
140            progress_interval: 1,
141            verbose: false,
142        }
143    }
144}
145
146/// Result of processing a single file in a batch operation.
147///
148/// Contains the file path and either a success value or an error.
149///
150/// # Type Parameters
151///
152/// * `T` - The success type returned by the operation
153#[derive(Debug, Clone)]
154pub struct FileResult<T> {
155    /// The file path that was processed
156    pub path: PathBuf,
157    /// The result of processing (Ok or Err)
158    pub result: Result<T, CliError>,
159}
160
161impl<T> FileResult<T> {
162    /// Create a successful file result.
163    pub fn success(path: PathBuf, value: T) -> Self {
164        Self {
165            path,
166            result: Ok(value),
167        }
168    }
169
170    /// Create a failed file result.
171    pub fn failure(path: PathBuf, error: CliError) -> Self {
172        Self {
173            path,
174            result: Err(error),
175        }
176    }
177
178    /// Check if the result is successful.
179    pub fn is_success(&self) -> bool {
180        self.result.is_ok()
181    }
182
183    /// Check if the result is a failure.
184    pub fn is_failure(&self) -> bool {
185        self.result.is_err()
186    }
187}
188
189/// Aggregated results from a batch processing operation.
190///
191/// Contains all individual file results and provides statistics.
192///
193/// # Type Parameters
194///
195/// * `T` - The success type returned by the operation
196#[derive(Debug, Clone)]
197pub struct BatchResults<T> {
198    /// Individual results for each processed file
199    pub results: Vec<FileResult<T>>,
200    /// Total processing time in milliseconds
201    pub elapsed_ms: u128,
202}
203
204impl<T> BatchResults<T> {
205    /// Create new batch results from a vector of file results.
206    pub fn new(results: Vec<FileResult<T>>, elapsed_ms: u128) -> Self {
207        Self { results, elapsed_ms }
208    }
209
210    /// Get the total number of files processed.
211    pub fn total_files(&self) -> usize {
212        self.results.len()
213    }
214
215    /// Get the number of successfully processed files.
216    pub fn success_count(&self) -> usize {
217        self.results.iter().filter(|r| r.is_success()).count()
218    }
219
220    /// Get the number of failed files.
221    pub fn failure_count(&self) -> usize {
222        self.results.iter().filter(|r| r.is_failure()).count()
223    }
224
225    /// Check if all files were processed successfully.
226    pub fn all_succeeded(&self) -> bool {
227        self.results.iter().all(|r| r.is_success())
228    }
229
230    /// Check if any files failed.
231    pub fn has_failures(&self) -> bool {
232        self.results.iter().any(|r| r.is_failure())
233    }
234
235    /// Get an iterator over successful results.
236    pub fn successes(&self) -> impl Iterator<Item = &FileResult<T>> {
237        self.results.iter().filter(|r| r.is_success())
238    }
239
240    /// Get an iterator over failed results.
241    pub fn failures(&self) -> impl Iterator<Item = &FileResult<T>> {
242        self.results.iter().filter(|r| r.is_failure())
243    }
244
245    /// Get processing throughput in files per second.
246    pub fn throughput(&self) -> f64 {
247        if self.elapsed_ms == 0 {
248            0.0
249        } else {
250            (self.total_files() as f64) / (self.elapsed_ms as f64 / 1000.0)
251        }
252    }
253}
254
255/// Trait for batch operations on HEDL files.
256///
257/// Implement this trait to define custom batch operations. The operation must be
258/// thread-safe (Send + Sync) to support parallel processing.
259///
260/// # Type Parameters
261///
262/// * `Output` - The type returned on successful processing of a file
263///
264/// # Examples
265///
266/// ```rust
267/// use hedl_cli::batch::BatchOperation;
268/// use hedl_cli::error::CliError;
269/// use std::path::Path;
270///
271/// struct CountLinesOperation;
272///
273/// impl BatchOperation for CountLinesOperation {
274///     type Output = usize;
275///
276///     fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
277///         let content = std::fs::read_to_string(path)
278///             .map_err(|e| CliError::io_error(path, e))?;
279///         Ok(content.lines().count())
280///     }
281///
282///     fn name(&self) -> &str {
283///         "count-lines"
284///     }
285/// }
286/// ```
287pub trait BatchOperation: Send + Sync {
288    /// The output type for successful processing
289    type Output: Send;
290
291    /// Process a single file and return the result.
292    ///
293    /// # Arguments
294    ///
295    /// * `path` - The path to the file to process
296    ///
297    /// # Returns
298    ///
299    /// * `Ok(Output)` - On successful processing
300    /// * `Err(CliError)` - On any error
301    ///
302    /// # Errors
303    ///
304    /// Should return appropriate CliError variants for different failure modes.
305    fn process_file(&self, path: &Path) -> Result<Self::Output, CliError>;
306
307    /// Get a human-readable name for this operation.
308    ///
309    /// Used for progress reporting and logging.
310    fn name(&self) -> &str;
311}
312
313/// Progress tracker for batch operations.
314///
315/// Uses atomic counters for lock-free concurrent progress tracking.
316#[derive(Debug)]
317struct ProgressTracker {
318    total: usize,
319    processed: AtomicUsize,
320    succeeded: AtomicUsize,
321    failed: AtomicUsize,
322    interval: usize,
323    verbose: bool,
324    start_time: Instant,
325}
326
327impl ProgressTracker {
328    /// Create a new progress tracker.
329    fn new(total: usize, interval: usize, verbose: bool) -> Self {
330        Self {
331            total,
332            processed: AtomicUsize::new(0),
333            succeeded: AtomicUsize::new(0),
334            failed: AtomicUsize::new(0),
335            interval,
336            verbose,
337            start_time: Instant::now(),
338        }
339    }
340
341    /// Record a successful file processing.
342    fn record_success(&self, path: &Path) {
343        let processed = self.processed.fetch_add(1, Ordering::Relaxed) + 1;
344        self.succeeded.fetch_add(1, Ordering::Relaxed);
345
346        if self.should_report(processed) {
347            self.report_progress(path, true);
348        }
349    }
350
351    /// Record a failed file processing.
352    fn record_failure(&self, path: &Path, error: &CliError) {
353        let processed = self.processed.fetch_add(1, Ordering::Relaxed) + 1;
354        self.failed.fetch_add(1, Ordering::Relaxed);
355
356        if self.verbose {
357            eprintln!("{} {} - {}", "✗".red().bold(), path.display(), error);
358        }
359
360        if self.should_report(processed) {
361            self.report_progress(path, false);
362        }
363    }
364
365    /// Check if progress should be reported for this count.
366    fn should_report(&self, processed: usize) -> bool {
367        self.interval > 0 && (processed.is_multiple_of(self.interval) || processed == self.total)
368    }
369
370    /// Report current progress to stderr.
371    fn report_progress(&self, current_file: &Path, success: bool) {
372        let processed = self.processed.load(Ordering::Relaxed);
373        let succeeded = self.succeeded.load(Ordering::Relaxed);
374        let failed = self.failed.load(Ordering::Relaxed);
375        let elapsed = self.start_time.elapsed();
376        let rate = processed as f64 / elapsed.as_secs_f64();
377
378        if self.verbose {
379            let status = if success {
380                "✓".green().bold()
381            } else {
382                "✗".red().bold()
383            };
384            eprintln!(
385                "{} [{}/{}] {} ({:.1} files/s)",
386                status,
387                processed,
388                self.total,
389                current_file.display(),
390                rate
391            );
392        } else {
393            eprintln!(
394                "Progress: [{}/{}] {} succeeded, {} failed ({:.1} files/s)",
395                processed, self.total, succeeded, failed, rate
396            );
397        }
398    }
399
400    /// Print final summary.
401    fn print_summary(&self, operation_name: &str) {
402        let processed = self.processed.load(Ordering::Relaxed);
403        let succeeded = self.succeeded.load(Ordering::Relaxed);
404        let failed = self.failed.load(Ordering::Relaxed);
405        let elapsed = self.start_time.elapsed();
406
407        println!();
408        println!("{}", "═".repeat(60).bright_blue());
409        println!(
410            "{} {}",
411            "Batch Operation:".bright_blue().bold(),
412            operation_name.bright_white()
413        );
414        println!("{}", "═".repeat(60).bright_blue());
415        println!(
416            "  {} {}",
417            "Total files:".bright_cyan(),
418            processed.to_string().bright_white()
419        );
420        println!(
421            "  {} {}",
422            "Succeeded:".green().bold(),
423            succeeded.to_string().bright_white()
424        );
425        println!(
426            "  {} {}",
427            "Failed:".red().bold(),
428            failed.to_string().bright_white()
429        );
430        println!(
431            "  {} {:.2}s",
432            "Elapsed:".bright_cyan(),
433            elapsed.as_secs_f64()
434        );
435        println!(
436            "  {} {:.1} files/s",
437            "Throughput:".bright_cyan(),
438            processed as f64 / elapsed.as_secs_f64()
439        );
440        println!("{}", "═".repeat(60).bright_blue());
441    }
442}
443
444/// High-performance batch processor for HEDL files.
445///
446/// Orchestrates parallel or serial processing based on configuration and workload.
447/// Provides progress tracking and comprehensive error collection.
448///
449/// # Thread Safety
450///
451/// BatchProcessor is thread-safe and can be shared across threads via Arc.
452///
453/// # Examples
454///
455/// ```rust,no_run
456/// use hedl_cli::batch::{BatchProcessor, BatchConfig, ValidationOperation};
457/// use std::path::PathBuf;
458///
459/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
460/// let processor = BatchProcessor::new(BatchConfig {
461///     parallel_threshold: 5,
462///     verbose: true,
463///     ..Default::default()
464/// });
465///
466/// let files: Vec<PathBuf> = vec![
467///     "file1.hedl".into(),
468///     "file2.hedl".into(),
469/// ];
470///
471/// let results = processor.process(
472///     &files,
473///     ValidationOperation { strict: false },
474///     true,
475/// )?;
476///
477/// if results.has_failures() {
478///     eprintln!("Some files failed validation");
479///     for failure in results.failures() {
480///         eprintln!("  - {}: {:?}", failure.path.display(), failure.result);
481///     }
482/// }
483/// # Ok(())
484/// # }
485/// ```
486#[derive(Debug, Clone)]
487pub struct BatchProcessor {
488    config: BatchConfig,
489}
490
491impl BatchProcessor {
492    /// Create a new batch processor with the given configuration.
493    pub fn new(config: BatchConfig) -> Self {
494        Self { config }
495    }
496
497    /// Create a batch processor with default configuration.
498    pub fn default_config() -> Self {
499        Self::new(BatchConfig::default())
500    }
501
502    /// Process multiple files with the given operation.
503    ///
504    /// Automatically selects parallel or serial processing based on configuration
505    /// and file count. Provides progress reporting and collects all results.
506    ///
507    /// # Arguments
508    ///
509    /// * `files` - Slice of file paths to process
510    /// * `operation` - The operation to perform on each file
511    /// * `show_progress` - Whether to show progress updates
512    ///
513    /// # Returns
514    ///
515    /// * `Ok(BatchResults)` - Always succeeds and collects all individual results
516    /// * `Err(CliError)` - Only on catastrophic failures (e.g., thread pool creation)
517    ///
518    /// # Performance
519    ///
520    /// - Uses parallel processing if `files.len() >= config.parallel_threshold`
521    /// - Serial processing for small batches to avoid thread pool overhead
522    /// - Lock-free progress tracking using atomic counters
523    ///
524    /// # Examples
525    ///
526    /// ```rust,no_run
527    /// use hedl_cli::batch::{BatchProcessor, BatchConfig, FormatOperation};
528    /// use std::path::PathBuf;
529    ///
530    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
531    /// let processor = BatchProcessor::default_config();
532    /// let files = vec![PathBuf::from("a.hedl"), PathBuf::from("b.hedl")];
533    ///
534    /// let results = processor.process(
535    ///     &files,
536    ///     FormatOperation {
537    ///         check: false,
538    ///         ditto: true,
539    ///         with_counts: false,
540    ///     },
541    ///     true,
542    /// )?;
543    ///
544    /// println!("Formatted {} files", results.success_count());
545    /// # Ok(())
546    /// # }
547    /// ```
548    pub fn process<O>(
549        &self,
550        files: &[PathBuf],
551        operation: O,
552        show_progress: bool,
553    ) -> Result<BatchResults<O::Output>, CliError>
554    where
555        O: BatchOperation,
556    {
557        let start_time = Instant::now();
558
559        if files.is_empty() {
560            return Ok(BatchResults::new(vec![], 0));
561        }
562
563        // Configure thread pool if max_threads is specified
564        if let Some(max_threads) = self.config.max_threads {
565            rayon::ThreadPoolBuilder::new()
566                .num_threads(max_threads)
567                .build_global()
568                .ok(); // Ignore error if already initialized
569        }
570
571        let results = if files.len() < self.config.parallel_threshold {
572            // Serial processing for small batches
573            self.process_serial(files, &operation, show_progress)
574        } else {
575            // Parallel processing for larger batches
576            self.process_parallel(files, &operation, show_progress)
577        };
578
579        let elapsed_ms = start_time.elapsed().as_millis();
580
581        Ok(BatchResults::new(results, elapsed_ms))
582    }
583
584    /// Process files serially (single-threaded).
585    fn process_serial<O>(
586        &self,
587        files: &[PathBuf],
588        operation: &O,
589        show_progress: bool,
590    ) -> Vec<FileResult<O::Output>>
591    where
592        O: BatchOperation,
593    {
594        let tracker = if show_progress {
595            Some(ProgressTracker::new(
596                files.len(),
597                self.config.progress_interval,
598                self.config.verbose,
599            ))
600        } else {
601            None
602        };
603
604        let results: Vec<FileResult<O::Output>> = files
605            .iter()
606            .map(|path| {
607                let result = operation.process_file(path);
608
609                if let Some(ref t) = tracker {
610                    match &result {
611                        Ok(_) => t.record_success(path),
612                        Err(e) => t.record_failure(path, e),
613                    }
614                }
615
616                FileResult {
617                    path: path.clone(),
618                    result: result.map_err(|e| e.clone()),
619                }
620            })
621            .collect();
622
623        if show_progress {
624            if let Some(tracker) = tracker {
625                tracker.print_summary(operation.name());
626            }
627        }
628
629        results
630    }
631
632    /// Process files in parallel using Rayon.
633    fn process_parallel<O>(
634        &self,
635        files: &[PathBuf],
636        operation: &O,
637        show_progress: bool,
638    ) -> Vec<FileResult<O::Output>>
639    where
640        O: BatchOperation,
641    {
642        let tracker = if show_progress {
643            Some(Arc::new(ProgressTracker::new(
644                files.len(),
645                self.config.progress_interval,
646                self.config.verbose,
647            )))
648        } else {
649            None
650        };
651
652        let results: Vec<FileResult<O::Output>> = files
653            .par_iter()
654            .map(|path| {
655                let result = operation.process_file(path);
656
657                if let Some(ref t) = tracker {
658                    match &result {
659                        Ok(_) => t.record_success(path),
660                        Err(e) => t.record_failure(path, e),
661                    }
662                }
663
664                FileResult {
665                    path: path.clone(),
666                    result: result.map_err(|e| e.clone()),
667                }
668            })
669            .collect();
670
671        if show_progress {
672            if let Some(tracker) = tracker {
673                tracker.print_summary(operation.name());
674            }
675        }
676
677        results
678    }
679}
680
681// ============================================================================
682// Standard Operations
683// ============================================================================
684
685/// Batch validation operation.
686///
687/// Validates multiple HEDL files in parallel, checking syntax and optionally
688/// enforcing strict reference resolution.
689#[derive(Debug, Clone)]
690pub struct ValidationOperation {
691    /// Enable strict reference validation
692    pub strict: bool,
693}
694
695impl BatchOperation for ValidationOperation {
696    type Output = ();
697
698    fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
699        use hedl_core::{parse_with_limits, ParseOptions};
700
701        let content =
702            std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
703
704        let options = ParseOptions {
705            strict_refs: self.strict,
706            ..ParseOptions::default()
707        };
708
709        parse_with_limits(content.as_bytes(), options)
710            .map_err(|e| CliError::parse(e.to_string()))?;
711
712        Ok(())
713    }
714
715    fn name(&self) -> &str {
716        "validate"
717    }
718}
719
720/// Batch format operation.
721///
722/// Formats multiple HEDL files to canonical form, optionally checking if files
723/// are already canonical.
724#[derive(Debug, Clone)]
725pub struct FormatOperation {
726    /// Only check if files are canonical (don't write)
727    pub check: bool,
728    /// Use ditto optimization
729    pub ditto: bool,
730    /// Add count hints to matrix lists
731    pub with_counts: bool,
732}
733
734impl BatchOperation for FormatOperation {
735    type Output = String;
736
737    fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
738        use hedl_c14n::{canonicalize_with_config, CanonicalConfig};
739        use hedl_core::parse;
740
741        let content =
742            std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
743
744        let mut doc = parse(content.as_bytes()).map_err(|e| CliError::parse(e.to_string()))?;
745
746        // Add count hints if requested
747        if self.with_counts {
748            add_count_hints(&mut doc);
749        }
750
751        let config = CanonicalConfig::new()
752            .with_ditto(self.ditto);
753
754        let canonical = canonicalize_with_config(&doc, &config)
755            .map_err(|e| CliError::canonicalization(e.to_string()))?;
756
757        if self.check && canonical != content {
758            return Err(CliError::NotCanonical);
759        }
760
761        Ok(canonical)
762    }
763
764    fn name(&self) -> &str {
765        if self.check {
766            "format-check"
767        } else {
768            "format"
769        }
770    }
771}
772
773/// Batch lint operation.
774///
775/// Lints multiple HEDL files for best practices and common issues.
776#[derive(Debug, Clone)]
777pub struct LintOperation {
778    /// Treat warnings as errors
779    pub warn_error: bool,
780}
781
782impl BatchOperation for LintOperation {
783    type Output = Vec<String>;
784
785    fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
786        use hedl_core::parse;
787        use hedl_lint::lint;
788
789        let content =
790            std::fs::read_to_string(path).map_err(|e| CliError::io_error(path, e))?;
791
792        let doc = parse(content.as_bytes()).map_err(|e| CliError::parse(e.to_string()))?;
793
794        let diagnostics = lint(&doc);
795
796        if self.warn_error && !diagnostics.is_empty() {
797            return Err(CliError::LintErrors);
798        }
799
800        Ok(diagnostics.iter().map(|d| d.to_string()).collect())
801    }
802
803    fn name(&self) -> &str {
804        "lint"
805    }
806}
807
808// ============================================================================
809// Helper Functions for Count Hints
810// ============================================================================
811
812/// Recursively add count hints to all matrix lists in the document
813fn add_count_hints(doc: &mut hedl_core::Document) {
814    
815
816    for item in doc.root.values_mut() {
817        add_count_hints_to_item(item);
818    }
819}
820
821/// Recursively add count hints to an item
822fn add_count_hints_to_item(item: &mut hedl_core::Item) {
823    use hedl_core::Item;
824
825    match item {
826        Item::List(list) => {
827            // Set count hint based on actual row count
828            list.count_hint = Some(list.rows.len());
829
830            // Recursively add child counts to each node
831            for node in &mut list.rows {
832                add_child_count_to_node(node);
833            }
834        }
835        Item::Object(map) => {
836            // Recursively process nested objects
837            for nested_item in map.values_mut() {
838                add_count_hints_to_item(nested_item);
839            }
840        }
841        Item::Scalar(_) => {
842            // Scalars don't have matrix lists
843        }
844    }
845}
846
847/// Recursively set child_count on nodes that have children
848fn add_child_count_to_node(node: &mut hedl_core::Node) {
849    // Calculate total number of direct children across all child types
850    let total_children: usize = node.children.values().map(|v| v.len()).sum();
851
852    if total_children > 0 {
853        node.child_count = Some(total_children);
854
855        // Recursively process all child nodes
856        for child_list in node.children.values_mut() {
857            for child_node in child_list {
858                add_child_count_to_node(child_node);
859            }
860        }
861    }
862}
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867
868    #[test]
869    fn test_batch_config_default() {
870        let config = BatchConfig::default();
871        assert_eq!(config.parallel_threshold, 10);
872        assert!(config.max_threads.is_none());
873        assert_eq!(config.progress_interval, 1);
874        assert!(!config.verbose);
875    }
876
877    #[test]
878    fn test_file_result_success() {
879        let result = FileResult::success(PathBuf::from("test.hedl"), 42);
880        assert!(result.is_success());
881        assert!(!result.is_failure());
882        assert_eq!(result.result.unwrap(), 42);
883    }
884
885    #[test]
886    fn test_file_result_failure() {
887        let result: FileResult<()> =
888            FileResult::failure(PathBuf::from("test.hedl"), CliError::NotCanonical);
889        assert!(!result.is_success());
890        assert!(result.is_failure());
891        assert!(result.result.is_err());
892    }
893
894    #[test]
895    fn test_batch_results_statistics() {
896        let results = vec![
897            FileResult::success(PathBuf::from("a.hedl"), ()),
898            FileResult::success(PathBuf::from("b.hedl"), ()),
899            FileResult::failure(PathBuf::from("c.hedl"), CliError::NotCanonical),
900        ];
901
902        let batch = BatchResults::new(results, 1000);
903
904        assert_eq!(batch.total_files(), 3);
905        assert_eq!(batch.success_count(), 2);
906        assert_eq!(batch.failure_count(), 1);
907        assert!(!batch.all_succeeded());
908        assert!(batch.has_failures());
909        assert_eq!(batch.successes().count(), 2);
910        assert_eq!(batch.failures().count(), 1);
911    }
912
913    #[test]
914    fn test_batch_results_throughput() {
915        let results = vec![
916            FileResult::success(PathBuf::from("a.hedl"), ()),
917            FileResult::success(PathBuf::from("b.hedl"), ()),
918        ];
919
920        let batch = BatchResults::new(results, 1000); // 1 second
921        assert!((batch.throughput() - 2.0).abs() < 0.01);
922
923        let batch_zero: BatchResults<()> = BatchResults::new(vec![], 0);
924        assert_eq!(batch_zero.throughput(), 0.0);
925    }
926
927    #[test]
928    fn test_progress_tracker_should_report() {
929        let tracker = ProgressTracker::new(100, 10, false);
930
931        assert!(!tracker.should_report(1));
932        assert!(!tracker.should_report(9));
933        assert!(tracker.should_report(10)); // Interval boundary
934        assert!(tracker.should_report(100)); // End
935    }
936
937    // Mock operation for testing
938    struct MockOperation {
939        should_fail: bool,
940    }
941
942    impl BatchOperation for MockOperation {
943        type Output = String;
944
945        fn process_file(&self, path: &Path) -> Result<Self::Output, CliError> {
946            if self.should_fail {
947                Err(CliError::NotCanonical)
948            } else {
949                Ok(path.to_string_lossy().to_string())
950            }
951        }
952
953        fn name(&self) -> &str {
954            "mock"
955        }
956    }
957
958    #[test]
959    fn test_batch_processor_empty() {
960        let processor = BatchProcessor::default_config();
961        let results = processor
962            .process(&[], MockOperation { should_fail: false }, false)
963            .unwrap();
964
965        assert_eq!(results.total_files(), 0);
966        assert!(results.all_succeeded());
967    }
968
969    #[test]
970    fn test_batch_processor_serial_success() {
971        let processor = BatchProcessor::new(BatchConfig {
972            parallel_threshold: 100, // Force serial for small batch
973            ..Default::default()
974        });
975
976        let files = vec![
977            PathBuf::from("a.hedl"),
978            PathBuf::from("b.hedl"),
979            PathBuf::from("c.hedl"),
980        ];
981
982        let results = processor
983            .process(&files, MockOperation { should_fail: false }, false)
984            .unwrap();
985
986        assert_eq!(results.total_files(), 3);
987        assert_eq!(results.success_count(), 3);
988        assert_eq!(results.failure_count(), 0);
989        assert!(results.all_succeeded());
990    }
991
992    #[test]
993    fn test_batch_processor_serial_with_failures() {
994        let processor = BatchProcessor::new(BatchConfig {
995            parallel_threshold: 100,
996            ..Default::default()
997        });
998
999        let files = vec![PathBuf::from("a.hedl"), PathBuf::from("b.hedl")];
1000
1001        let results = processor
1002            .process(&files, MockOperation { should_fail: true }, false)
1003            .unwrap();
1004
1005        assert_eq!(results.total_files(), 2);
1006        assert_eq!(results.success_count(), 0);
1007        assert_eq!(results.failure_count(), 2);
1008        assert!(!results.all_succeeded());
1009        assert!(results.has_failures());
1010    }
1011
1012    #[test]
1013    fn test_batch_processor_parallel() {
1014        let processor = BatchProcessor::new(BatchConfig {
1015            parallel_threshold: 2, // Force parallel
1016            ..Default::default()
1017        });
1018
1019        let files: Vec<PathBuf> = (0..20).map(|i| PathBuf::from(format!("file{}.hedl", i))).collect();
1020
1021        let results = processor
1022            .process(&files, MockOperation { should_fail: false }, false)
1023            .unwrap();
1024
1025        assert_eq!(results.total_files(), 20);
1026        assert_eq!(results.success_count(), 20);
1027    }
1028}
hedl_cli/batch.rs

hedl_cli/
batch.rs