Skip to main content

edgeparse_core/api/
batch.rs

1//! Batch processing API — process multiple PDF files with progress tracking.
2
3use std::path::{Path, PathBuf};
4use std::time::{Duration, Instant};
5
6use crate::api::config::ProcessingConfig;
7use crate::EdgePdfError;
8
9/// Result of processing a single file in a batch.
10#[derive(Debug, Clone)]
11pub struct BatchFileResult {
12    /// Input file path.
13    pub input_path: PathBuf,
14    /// Whether processing succeeded.
15    pub success: bool,
16    /// Error message, if failed.
17    pub error: Option<String>,
18    /// Processing duration.
19    pub duration: Duration,
20    /// Number of pages processed.
21    pub page_count: Option<u32>,
22}
23
24/// Aggregate results of a batch processing run.
25#[derive(Debug, Clone)]
26pub struct BatchResult {
27    /// Individual file results.
28    pub files: Vec<BatchFileResult>,
29    /// Total duration of the batch.
30    pub total_duration: Duration,
31}
32
33impl BatchResult {
34    /// Number of successfully processed files.
35    pub fn success_count(&self) -> usize {
36        self.files.iter().filter(|f| f.success).count()
37    }
38
39    /// Number of failed files.
40    pub fn failure_count(&self) -> usize {
41        self.files.iter().filter(|f| !f.success).count()
42    }
43
44    /// Total number of files.
45    pub fn total_count(&self) -> usize {
46        self.files.len()
47    }
48
49    /// Average processing time per file.
50    pub fn avg_duration(&self) -> Duration {
51        if self.files.is_empty() {
52            return Duration::ZERO;
53        }
54        self.total_duration / self.files.len() as u32
55    }
56
57    /// Summary string.
58    pub fn summary(&self) -> String {
59        format!(
60            "Batch complete: {}/{} succeeded, {} failed, {:.1}s total",
61            self.success_count(),
62            self.total_count(),
63            self.failure_count(),
64            self.total_duration.as_secs_f64(),
65        )
66    }
67}
68
69/// A batch processing request.
70#[derive(Debug, Clone)]
71pub struct BatchRequest {
72    /// Input file paths.
73    pub files: Vec<PathBuf>,
74    /// Processing configuration.
75    pub config: ProcessingConfig,
76    /// Output directory (if any).
77    pub output_dir: Option<PathBuf>,
78}
79
80impl BatchRequest {
81    /// Create a new batch request from a list of file paths.
82    pub fn new(files: Vec<PathBuf>, config: ProcessingConfig) -> Self {
83        Self {
84            files,
85            config,
86            output_dir: None,
87        }
88    }
89
90    /// Set the output directory.
91    pub fn with_output_dir(mut self, dir: PathBuf) -> Self {
92        self.output_dir = Some(dir);
93        self
94    }
95}
96
97/// Collect PDF files from a directory (non-recursive).
98pub fn collect_pdf_files(dir: &Path) -> Result<Vec<PathBuf>, EdgePdfError> {
99    let mut files = Vec::new();
100
101    let entries = std::fs::read_dir(dir)?;
102
103    for entry in entries {
104        let entry = entry?;
105        let path = entry.path();
106        if path.is_file() {
107            if let Some(ext) = path.extension() {
108                if ext.eq_ignore_ascii_case("pdf") {
109                    files.push(path);
110                }
111            }
112        }
113    }
114
115    files.sort();
116    Ok(files)
117}
118
119/// Collect PDF files recursively from a directory.
120pub fn collect_pdf_files_recursive(dir: &Path) -> Result<Vec<PathBuf>, EdgePdfError> {
121    let mut files = Vec::new();
122    collect_recursive(dir, &mut files)?;
123    files.sort();
124    Ok(files)
125}
126
127fn collect_recursive(dir: &Path, files: &mut Vec<PathBuf>) -> Result<(), EdgePdfError> {
128    let entries = std::fs::read_dir(dir)?;
129
130    for entry in entries {
131        let entry = entry?;
132        let path = entry.path();
133        if path.is_dir() {
134            collect_recursive(&path, files)?;
135        } else if path.is_file() {
136            if let Some(ext) = path.extension() {
137                if ext.eq_ignore_ascii_case("pdf") {
138                    files.push(path);
139                }
140            }
141        }
142    }
143
144    Ok(())
145}
146
147/// Process a batch of files sequentially (placeholder — actual processing
148/// would call the full pipeline for each file).
149pub fn process_batch<F>(request: &BatchRequest, mut process_fn: F) -> BatchResult
150where
151    F: FnMut(&Path, &ProcessingConfig) -> Result<u32, String>,
152{
153    let batch_start = Instant::now();
154    let mut results = Vec::with_capacity(request.files.len());
155
156    for file_path in &request.files {
157        let file_start = Instant::now();
158        match process_fn(file_path, &request.config) {
159            Ok(page_count) => {
160                results.push(BatchFileResult {
161                    input_path: file_path.clone(),
162                    success: true,
163                    error: None,
164                    duration: file_start.elapsed(),
165                    page_count: Some(page_count),
166                });
167            }
168            Err(e) => {
169                results.push(BatchFileResult {
170                    input_path: file_path.clone(),
171                    success: false,
172                    error: Some(e),
173                    duration: file_start.elapsed(),
174                    page_count: None,
175                });
176            }
177        }
178    }
179
180    BatchResult {
181        files: results,
182        total_duration: batch_start.elapsed(),
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use std::path::PathBuf;
190
191    #[test]
192    fn test_batch_result_counts() {
193        let result = BatchResult {
194            files: vec![
195                BatchFileResult {
196                    input_path: PathBuf::from("a.pdf"),
197                    success: true,
198                    error: None,
199                    duration: Duration::from_millis(100),
200                    page_count: Some(5),
201                },
202                BatchFileResult {
203                    input_path: PathBuf::from("b.pdf"),
204                    success: false,
205                    error: Some("bad".to_string()),
206                    duration: Duration::from_millis(10),
207                    page_count: None,
208                },
209                BatchFileResult {
210                    input_path: PathBuf::from("c.pdf"),
211                    success: true,
212                    error: None,
213                    duration: Duration::from_millis(200),
214                    page_count: Some(10),
215                },
216            ],
217            total_duration: Duration::from_millis(310),
218        };
219        assert_eq!(result.success_count(), 2);
220        assert_eq!(result.failure_count(), 1);
221        assert_eq!(result.total_count(), 3);
222    }
223
224    #[test]
225    fn test_process_batch() {
226        let request = BatchRequest::new(
227            vec![PathBuf::from("test1.pdf"), PathBuf::from("test2.pdf")],
228            ProcessingConfig::default(),
229        );
230        let result = process_batch(&request, |path, _config| {
231            if path.to_str().unwrap().contains("test1") {
232                Ok(5)
233            } else {
234                Err("not found".to_string())
235            }
236        });
237        assert_eq!(result.success_count(), 1);
238        assert_eq!(result.failure_count(), 1);
239    }
240
241    #[test]
242    fn test_batch_request_with_output() {
243        let req = BatchRequest::new(vec![], ProcessingConfig::default())
244            .with_output_dir(PathBuf::from("/tmp/output"));
245        assert_eq!(req.output_dir.unwrap(), PathBuf::from("/tmp/output"));
246    }
247
248    #[test]
249    fn test_empty_batch() {
250        let request = BatchRequest::new(vec![], ProcessingConfig::default());
251        let result = process_batch(&request, |_, _| Ok(0));
252        assert_eq!(result.total_count(), 0);
253        assert_eq!(result.success_count(), 0);
254    }
255
256    #[test]
257    fn test_summary() {
258        let result = BatchResult {
259            files: vec![],
260            total_duration: Duration::from_secs(5),
261        };
262        let summary = result.summary();
263        assert!(summary.contains("0/0"));
264    }
265}