Skip to main content

oxidize_pdf/operations/
mod.rs

1//! PDF operations module
2//!
3//! This module provides high-level operations for manipulating PDF documents
4//! such as splitting, merging, rotating pages, and reordering.
5
6pub mod extract_images;
7pub mod merge;
8pub mod page_analysis;
9pub mod page_extraction;
10pub mod pdf_ocr_converter;
11pub mod reorder;
12pub mod rotate;
13pub mod split;
14
15pub use extract_images::{
16    extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
17    ImageExtractor,
18};
19pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
20pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
21pub use page_extraction::{
22    extract_page, extract_page_range, extract_page_range_to_file, extract_page_to_file,
23    extract_pages, extract_pages_to_file, PageExtractionOptions, PageExtractor,
24};
25pub use pdf_ocr_converter::{ConversionOptions, ConversionResult, PdfOcrConverter};
26pub use reorder::{
27    move_pdf_page, reorder_pdf_pages, reverse_pdf_pages, swap_pdf_pages, PageReorderer,
28    ReorderOptions,
29};
30pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
31pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
32
33use crate::error::PdfError;
34
35/// Result type for operations
36pub type OperationResult<T> = Result<T, OperationError>;
37
38/// Operation-specific errors
39#[derive(Debug, thiserror::Error)]
40pub enum OperationError {
41    /// Page index out of bounds
42    #[error("Page index {0} out of bounds (document has {1} pages)")]
43    PageIndexOutOfBounds(usize, usize),
44
45    /// Invalid page range
46    #[error("Invalid page range: {0}")]
47    InvalidPageRange(String),
48
49    /// No pages to process
50    #[error("No pages to process")]
51    NoPagesToProcess,
52
53    /// Resource conflict during merge
54    #[error("Resource conflict: {0}")]
55    ResourceConflict(String),
56
57    /// Invalid rotation angle
58    #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
59    InvalidRotation(i32),
60
61    /// Parse error
62    #[error("Parse error: {0}")]
63    ParseError(String),
64
65    /// Invalid file path
66    #[error("Invalid file path: {reason}")]
67    InvalidPath { reason: String },
68
69    /// IO error
70    #[error("IO error: {0}")]
71    Io(#[from] std::io::Error),
72
73    /// Core PDF error
74    #[error("PDF error: {0}")]
75    PdfError(#[from] PdfError),
76
77    /// General processing error
78    #[error("Processing error: {0}")]
79    ProcessingError(String),
80}
81
82/// Page range specification
83#[derive(Debug, Clone)]
84pub enum PageRange {
85    /// All pages
86    All,
87    /// Single page (0-based index)
88    Single(usize),
89    /// Range of pages (inclusive, 0-based)
90    Range(usize, usize),
91    /// List of specific pages (0-based indices)
92    List(Vec<usize>),
93}
94
95impl PageRange {
96    /// Parse a page range from a string
97    ///
98    /// Examples:
99    /// - "all" -> All pages
100    /// - "1" -> Single page (converts to 0-based)
101    /// - "1-5" -> Range of pages (converts to 0-based)
102    /// - "1,3,5" -> List of pages (converts to 0-based)
103    pub fn parse(s: &str) -> Result<Self, OperationError> {
104        let s = s.trim();
105
106        if s.eq_ignore_ascii_case("all") {
107            return Ok(PageRange::All);
108        }
109
110        // Try single page
111        if let Ok(page) = s.parse::<usize>() {
112            if page == 0 {
113                return Err(OperationError::InvalidPageRange(
114                    "Page numbers start at 1".to_string(),
115                ));
116            }
117            return Ok(PageRange::Single(page - 1));
118        }
119
120        // Try range (e.g., "1-5")
121        if let Some((start, end)) = s.split_once('-') {
122            let start = start
123                .trim()
124                .parse::<usize>()
125                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
126            let end = end
127                .trim()
128                .parse::<usize>()
129                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
130
131            if start == 0 || end == 0 {
132                return Err(OperationError::InvalidPageRange(
133                    "Page numbers start at 1".to_string(),
134                ));
135            }
136
137            if start > end {
138                return Err(OperationError::InvalidPageRange(format!(
139                    "Start {start} is greater than end {end}"
140                )));
141            }
142
143            return Ok(PageRange::Range(start - 1, end - 1));
144        }
145
146        // Try list (e.g., "1,3,5")
147        if s.contains(',') {
148            let pages: Result<Vec<usize>, _> = s
149                .split(',')
150                .map(|p| {
151                    let page = p.trim().parse::<usize>().map_err(|_| {
152                        OperationError::InvalidPageRange(format!("Invalid page: {p}"))
153                    })?;
154                    if page == 0 {
155                        return Err(OperationError::InvalidPageRange(
156                            "Page numbers start at 1".to_string(),
157                        ));
158                    }
159                    Ok(page - 1)
160                })
161                .collect();
162
163            return Ok(PageRange::List(pages?));
164        }
165
166        Err(OperationError::InvalidPageRange(format!(
167            "Invalid format: {s}"
168        )))
169    }
170
171    /// Get the page indices for this range
172    pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
173        match self {
174            PageRange::All => Ok((0..total_pages).collect()),
175            PageRange::Single(idx) => {
176                if *idx >= total_pages {
177                    Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
178                } else {
179                    Ok(vec![*idx])
180                }
181            }
182            PageRange::Range(start, end) => {
183                if *start >= total_pages {
184                    Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
185                } else if *end >= total_pages {
186                    Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
187                } else {
188                    Ok((*start..=*end).collect())
189                }
190            }
191            PageRange::List(pages) => {
192                for &page in pages {
193                    if page >= total_pages {
194                        return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
195                    }
196                }
197                Ok(pages.clone())
198            }
199        }
200    }
201}
202
203#[cfg(test)]
204mod error_tests;
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    #[test]
211    fn test_page_range_parsing() {
212        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
213        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
214
215        match PageRange::parse("5").unwrap() {
216            PageRange::Single(idx) => assert_eq!(idx, 4),
217            _ => panic!("Expected Single"),
218        }
219
220        match PageRange::parse("2-5").unwrap() {
221            PageRange::Range(start, end) => {
222                assert_eq!(start, 1);
223                assert_eq!(end, 4);
224            }
225            _ => panic!("Expected Range"),
226        }
227
228        match PageRange::parse("1,3,5,7").unwrap() {
229            PageRange::List(pages) => {
230                assert_eq!(pages, vec![0, 2, 4, 6]);
231            }
232            _ => panic!("Expected List"),
233        }
234
235        assert!(PageRange::parse("0").is_err());
236        assert!(PageRange::parse("5-2").is_err());
237        assert!(PageRange::parse("invalid").is_err());
238    }
239
240    #[test]
241    fn test_page_range_indices() {
242        let total = 10;
243
244        assert_eq!(
245            PageRange::All.get_indices(total).unwrap(),
246            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
247        );
248
249        assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
250
251        assert_eq!(
252            PageRange::Range(2, 5).get_indices(total).unwrap(),
253            vec![2, 3, 4, 5]
254        );
255
256        assert_eq!(
257            PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
258            vec![1, 3, 5]
259        );
260
261        assert!(PageRange::Single(10).get_indices(total).is_err());
262        assert!(PageRange::Range(8, 15).get_indices(total).is_err());
263    }
264
265    #[test]
266    fn test_page_range_empty_list() {
267        // Test parsing an empty list of pages
268        let result = PageRange::parse("");
269        assert!(result.is_err());
270
271        // Test list with only commas
272        let result2 = PageRange::parse(",,");
273        assert!(result2.is_err());
274    }
275
276    #[test]
277    fn test_page_range_list_with_zero() {
278        // Test that 0 in a list causes error (line 148-151)
279        let result = PageRange::parse("1,0,3");
280        assert!(result.is_err());
281        if let Err(e) = result {
282            match e {
283                OperationError::InvalidPageRange(msg) => {
284                    assert!(msg.contains("Page numbers start at 1"));
285                }
286                _ => panic!("Expected InvalidPageRange error"),
287            }
288        }
289    }
290
291    #[test]
292    fn test_page_range_with_extra_spaces() {
293        // Test parsing with extra spaces in list (line 143-144)
294        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
295            PageRange::List(pages) => {
296                assert_eq!(pages, vec![0, 2, 4]);
297            }
298            _ => panic!("Expected List"),
299        }
300
301        // Test range with spaces
302        match PageRange::parse(" 2 - 5 ").unwrap() {
303            PageRange::Range(start, end) => {
304                assert_eq!(start, 1);
305                assert_eq!(end, 4);
306            }
307            _ => panic!("Expected Range"),
308        }
309    }
310
311    #[test]
312    fn test_page_range_equal_start_end() {
313        // Test range where start == end (should work)
314        match PageRange::parse("5-5").unwrap() {
315            PageRange::Range(start, end) => {
316                assert_eq!(start, 4);
317                assert_eq!(end, 4);
318            }
319            _ => panic!("Expected Range"),
320        }
321
322        // Verify get_indices works correctly
323        let range = PageRange::Range(4, 4);
324        assert_eq!(range.get_indices(10).unwrap(), vec![4]);
325    }
326
327    #[test]
328    fn test_page_range_list_out_of_bounds() {
329        // Test List variant with out of bounds indices (line 186-190)
330        let pages = PageRange::List(vec![2, 5, 15]);
331        let result = pages.get_indices(10);
332        assert!(result.is_err());
333        if let Err(e) = result {
334            match e {
335                OperationError::PageIndexOutOfBounds(idx, total) => {
336                    assert_eq!(idx, 15);
337                    assert_eq!(total, 10);
338                }
339                _ => panic!("Expected PageIndexOutOfBounds error"),
340            }
341        }
342    }
343
344    #[test]
345    fn test_page_range_empty_document() {
346        // Test get_indices with 0 total pages
347        let total = 0;
348
349        // All should return empty vector
350        assert_eq!(
351            PageRange::All.get_indices(total).unwrap(),
352            Vec::<usize>::new()
353        );
354
355        // Single should fail
356        assert!(PageRange::Single(0).get_indices(total).is_err());
357
358        // Range should fail
359        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
360
361        // Empty list should work
362        assert_eq!(
363            PageRange::List(vec![]).get_indices(total).unwrap(),
364            Vec::<usize>::new()
365        );
366    }
367
368    #[test]
369    fn test_page_range_additional_invalid_formats() {
370        // Test various invalid formats (line 160-162)
371        assert!(PageRange::parse("1-2-3").is_err()); // Multiple dashes
372        assert!(PageRange::parse("abc").is_err()); // Non-numeric
373        assert!(PageRange::parse("1.5").is_err()); // Decimal
374        assert!(PageRange::parse("-5").is_err()); // Negative without start
375        assert!(PageRange::parse("1-").is_err()); // Missing end
376        assert!(PageRange::parse("-").is_err()); // Only dash
377    }
378
379    #[test]
380    fn test_module_exports() {
381        // Verify that all operation types are exported correctly
382        // This test just ensures the module structure is correct
383
384        // We can create these types through their modules
385        use super::extract_images::ExtractImagesOptions;
386        use super::merge::MergeOptions;
387        use super::page_analysis::{AnalysisOptions, PageType};
388        use super::page_extraction::PageExtractionOptions;
389        use super::rotate::{RotateOptions, RotationAngle};
390        use super::split::{SplitMode, SplitOptions};
391
392        // Just verify we can access these types
393        let _extract: ExtractImagesOptions;
394        let _merge: MergeOptions;
395        let _analysis: AnalysisOptions;
396        let _extraction: PageExtractionOptions;
397        let _rotate: RotateOptions;
398        let _split: SplitOptions;
399        let _angle: RotationAngle;
400        let _page_type: PageType;
401        let _mode: SplitMode;
402    }
403
404    #[test]
405    fn test_operation_error_variants() {
406        let errors = vec![
407            OperationError::PageIndexOutOfBounds(5, 3),
408            OperationError::InvalidPageRange("test".to_string()),
409            OperationError::NoPagesToProcess,
410            OperationError::ResourceConflict("test".to_string()),
411            OperationError::InvalidRotation(45),
412            OperationError::ParseError("test".to_string()),
413            OperationError::ProcessingError("test".to_string()),
414        ];
415
416        for error in errors {
417            let message = error.to_string();
418            assert!(!message.is_empty());
419        }
420    }
421
422    #[test]
423    fn test_page_range_edge_cases() {
424        // Test whitespace handling
425        assert!(matches!(
426            PageRange::parse("  all  ").unwrap(),
427            PageRange::All
428        ));
429        assert!(matches!(
430            PageRange::parse(" 5 ").unwrap(),
431            PageRange::Single(4)
432        ));
433
434        // Test various list formats
435        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
436            PageRange::List(pages) => assert_eq!(pages, vec![0, 2, 4]),
437            _ => panic!("Expected List"),
438        }
439
440        // Test range with spaces
441        match PageRange::parse(" 2 - 5 ").unwrap() {
442            PageRange::Range(start, end) => {
443                assert_eq!(start, 1);
444                assert_eq!(end, 4);
445            }
446            _ => panic!("Expected Range"),
447        }
448    }
449
450    #[test]
451    fn test_page_range_invalid_formats() {
452        // Test various invalid formats
453        assert!(PageRange::parse("").is_err());
454        assert!(PageRange::parse("abc").is_err());
455        assert!(PageRange::parse("1-").is_err());
456        assert!(PageRange::parse("-5").is_err());
457        assert!(PageRange::parse("1-2-3").is_err());
458        assert!(PageRange::parse("1,0,3").is_err());
459        assert!(PageRange::parse("0-5").is_err());
460        assert!(PageRange::parse("5-0").is_err());
461        assert!(PageRange::parse("1,,3").is_err());
462        assert!(PageRange::parse("1.5").is_err());
463    }
464
465    #[test]
466    fn test_page_range_get_indices_empty_document() {
467        let total = 0;
468
469        assert_eq!(
470            PageRange::All.get_indices(total).unwrap(),
471            vec![] as Vec<usize>
472        );
473        assert!(PageRange::Single(0).get_indices(total).is_err());
474        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
475        assert!(PageRange::List(vec![0]).get_indices(total).is_err());
476    }
477
478    #[test]
479    fn test_page_range_get_indices_single_page_document() {
480        let total = 1;
481
482        assert_eq!(PageRange::All.get_indices(total).unwrap(), vec![0]);
483        assert_eq!(PageRange::Single(0).get_indices(total).unwrap(), vec![0]);
484        assert!(PageRange::Single(1).get_indices(total).is_err());
485        assert_eq!(PageRange::Range(0, 0).get_indices(total).unwrap(), vec![0]);
486        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
487    }
488
489    #[test]
490    fn test_page_range_list_duplicates() {
491        // Lists can have duplicates in our implementation
492        match PageRange::parse("1,1,2,2,3").unwrap() {
493            PageRange::List(pages) => {
494                assert_eq!(pages, vec![0, 0, 1, 1, 2]);
495            }
496            _ => panic!("Expected List"),
497        }
498    }
499
500    #[test]
501    fn test_page_range_list_unordered() {
502        // Lists don't need to be ordered
503        match PageRange::parse("5,2,8,1,3").unwrap() {
504            PageRange::List(pages) => {
505                assert_eq!(pages, vec![4, 1, 7, 0, 2]);
506            }
507            _ => panic!("Expected List"),
508        }
509    }
510
511    #[test]
512    fn test_operation_error_display() {
513        let error = OperationError::PageIndexOutOfBounds(10, 5);
514        assert_eq!(
515            error.to_string(),
516            "Page index 10 out of bounds (document has 5 pages)"
517        );
518
519        let error = OperationError::InvalidRotation(45);
520        assert_eq!(
521            error.to_string(),
522            "Invalid rotation angle: 45 (must be 0, 90, 180, or 270)"
523        );
524
525        let error = OperationError::NoPagesToProcess;
526        assert_eq!(error.to_string(), "No pages to process");
527    }
528
529    #[test]
530    fn test_page_range_large_document() {
531        let total = 1000;
532
533        // Test all pages
534        let indices = PageRange::All.get_indices(total).unwrap();
535        assert_eq!(indices.len(), 1000);
536        assert_eq!(indices[0], 0);
537        assert_eq!(indices[999], 999);
538
539        // Test large range
540        let indices = PageRange::Range(100, 200).get_indices(total).unwrap();
541        assert_eq!(indices.len(), 101);
542        assert_eq!(indices[0], 100);
543        assert_eq!(indices[100], 200);
544    }
545
546    #[test]
547    fn test_page_range_parse_case_insensitive() {
548        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
549        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
550        assert!(matches!(PageRange::parse("All").unwrap(), PageRange::All));
551        assert!(matches!(PageRange::parse("aLL").unwrap(), PageRange::All));
552    }
553
554    #[test]
555    fn test_operation_result_type() {
556        // Test that OperationResult works correctly
557        fn test_function() -> OperationResult<usize> {
558            Ok(42)
559        }
560
561        fn test_error_function() -> OperationResult<usize> {
562            Err(OperationError::NoPagesToProcess)
563        }
564
565        assert_eq!(test_function().unwrap(), 42);
566        assert!(test_error_function().is_err());
567    }
568
569    #[test]
570    fn test_page_range_boundary_values() {
571        // Test maximum safe values
572        let large_page = usize::MAX / 2;
573
574        match PageRange::parse(&large_page.to_string()).unwrap() {
575            PageRange::Single(idx) => assert_eq!(idx, large_page - 1),
576            _ => panic!("Expected Single"),
577        }
578
579        // Test with actual document
580        let indices = PageRange::Single(5).get_indices(10).unwrap();
581        assert_eq!(indices, vec![5]);
582
583        // Test range boundary
584        let indices = PageRange::Range(0, 9).get_indices(10).unwrap();
585        assert_eq!(indices.len(), 10);
586    }
587
588    #[test]
589    fn test_error_from_io() {
590        use std::io;
591
592        let io_error = io::Error::new(io::ErrorKind::NotFound, "File not found");
593        let op_error: OperationError = io_error.into();
594
595        match op_error {
596            OperationError::Io(_) => {}
597            _ => panic!("Expected Io variant"),
598        }
599    }
600
601    #[test]
602    fn test_page_range_fmt_debug() {
603        // Test Debug implementation
604        let range = PageRange::All;
605        let debug_str = format!("{:?}", range);
606        assert!(debug_str.contains("All"));
607
608        let range = PageRange::Single(5);
609        let debug_str = format!("{:?}", range);
610        assert!(debug_str.contains("Single"));
611        assert!(debug_str.contains("5"));
612
613        let range = PageRange::Range(1, 10);
614        let debug_str = format!("{:?}", range);
615        assert!(debug_str.contains("Range"));
616
617        let range = PageRange::List(vec![1, 2, 3]);
618        let debug_str = format!("{:?}", range);
619        assert!(debug_str.contains("List"));
620    }
621
622    #[test]
623    fn test_page_range_clone() {
624        let original = PageRange::List(vec![1, 2, 3]);
625        let cloned = original.clone();
626
627        match (original, cloned) {
628            (PageRange::List(orig), PageRange::List(clone)) => {
629                assert_eq!(orig, clone);
630            }
631            _ => panic!("Clone failed"),
632        }
633    }
634}