Skip to main content

oxidize_pdf/operations/
mod.rs

1//! PDF operations module
2//!
3//! This module provides high-level operations for manipulating PDF documents
4//! such as splitting, merging, rotating pages, and reordering.
5
6pub mod extract_images;
7pub mod merge;
8pub mod overlay;
9pub mod page_analysis;
10pub mod page_extraction;
11pub mod pdf_ocr_converter;
12pub mod reorder;
13pub mod rotate;
14pub mod split;
15
16pub use extract_images::{
17    extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
18    ImageExtractor,
19};
20pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
21pub use overlay::{overlay_pdf, OverlayOptions, OverlayPosition, PdfOverlay};
22pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
23pub use page_extraction::{
24    extract_page, extract_page_range, extract_page_range_to_file, extract_page_to_file,
25    extract_pages, extract_pages_to_file, PageExtractionOptions, PageExtractor,
26};
27pub use pdf_ocr_converter::{ConversionOptions, ConversionResult, PdfOcrConverter};
28pub use reorder::{
29    move_pdf_page, reorder_pdf_pages, reverse_pdf_pages, swap_pdf_pages, PageReorderer,
30    ReorderOptions,
31};
32pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
33pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
34
35use crate::error::PdfError;
36
37/// Result type for operations
38pub type OperationResult<T> = Result<T, OperationError>;
39
40/// Operation-specific errors
41#[derive(Debug, thiserror::Error)]
42pub enum OperationError {
43    /// Page index out of bounds
44    #[error("Page index {0} out of bounds (document has {1} pages)")]
45    PageIndexOutOfBounds(usize, usize),
46
47    /// Invalid page range
48    #[error("Invalid page range: {0}")]
49    InvalidPageRange(String),
50
51    /// No pages to process
52    #[error("No pages to process")]
53    NoPagesToProcess,
54
55    /// Resource conflict during merge
56    #[error("Resource conflict: {0}")]
57    ResourceConflict(String),
58
59    /// Invalid rotation angle
60    #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
61    InvalidRotation(i32),
62
63    /// Parse error
64    #[error("Parse error: {0}")]
65    ParseError(String),
66
67    /// Invalid file path
68    #[error("Invalid file path: {reason}")]
69    InvalidPath { reason: String },
70
71    /// IO error
72    #[error("IO error: {0}")]
73    Io(#[from] std::io::Error),
74
75    /// Core PDF error
76    #[error("PDF error: {0}")]
77    PdfError(#[from] PdfError),
78
79    /// General processing error
80    #[error("Processing error: {0}")]
81    ProcessingError(String),
82}
83
84/// Page range specification
85#[derive(Debug, Clone)]
86pub enum PageRange {
87    /// All pages
88    All,
89    /// Single page (0-based index)
90    Single(usize),
91    /// Range of pages (inclusive, 0-based)
92    Range(usize, usize),
93    /// List of specific pages (0-based indices)
94    List(Vec<usize>),
95}
96
97impl PageRange {
98    /// Parse a page range from a string
99    ///
100    /// Examples:
101    /// - "all" -> All pages
102    /// - "1" -> Single page (converts to 0-based)
103    /// - "1-5" -> Range of pages (converts to 0-based)
104    /// - "1,3,5" -> List of pages (converts to 0-based)
105    pub fn parse(s: &str) -> Result<Self, OperationError> {
106        let s = s.trim();
107
108        if s.eq_ignore_ascii_case("all") {
109            return Ok(PageRange::All);
110        }
111
112        // Try single page
113        if let Ok(page) = s.parse::<usize>() {
114            if page == 0 {
115                return Err(OperationError::InvalidPageRange(
116                    "Page numbers start at 1".to_string(),
117                ));
118            }
119            return Ok(PageRange::Single(page - 1));
120        }
121
122        // Try range (e.g., "1-5")
123        if let Some((start, end)) = s.split_once('-') {
124            let start = start
125                .trim()
126                .parse::<usize>()
127                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
128            let end = end
129                .trim()
130                .parse::<usize>()
131                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
132
133            if start == 0 || end == 0 {
134                return Err(OperationError::InvalidPageRange(
135                    "Page numbers start at 1".to_string(),
136                ));
137            }
138
139            if start > end {
140                return Err(OperationError::InvalidPageRange(format!(
141                    "Start {start} is greater than end {end}"
142                )));
143            }
144
145            return Ok(PageRange::Range(start - 1, end - 1));
146        }
147
148        // Try list (e.g., "1,3,5")
149        if s.contains(',') {
150            let pages: Result<Vec<usize>, _> = s
151                .split(',')
152                .map(|p| {
153                    let page = p.trim().parse::<usize>().map_err(|_| {
154                        OperationError::InvalidPageRange(format!("Invalid page: {p}"))
155                    })?;
156                    if page == 0 {
157                        return Err(OperationError::InvalidPageRange(
158                            "Page numbers start at 1".to_string(),
159                        ));
160                    }
161                    Ok(page - 1)
162                })
163                .collect();
164
165            return Ok(PageRange::List(pages?));
166        }
167
168        Err(OperationError::InvalidPageRange(format!(
169            "Invalid format: {s}"
170        )))
171    }
172
173    /// Get the page indices for this range
174    pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
175        match self {
176            PageRange::All => Ok((0..total_pages).collect()),
177            PageRange::Single(idx) => {
178                if *idx >= total_pages {
179                    Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
180                } else {
181                    Ok(vec![*idx])
182                }
183            }
184            PageRange::Range(start, end) => {
185                if *start >= total_pages {
186                    Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
187                } else if *end >= total_pages {
188                    Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
189                } else {
190                    Ok((*start..=*end).collect())
191                }
192            }
193            PageRange::List(pages) => {
194                for &page in pages {
195                    if page >= total_pages {
196                        return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
197                    }
198                }
199                Ok(pages.clone())
200            }
201        }
202    }
203}
204
205#[cfg(test)]
206mod error_tests;
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_page_range_parsing() {
214        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
215        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
216
217        match PageRange::parse("5").unwrap() {
218            PageRange::Single(idx) => assert_eq!(idx, 4),
219            _ => panic!("Expected Single"),
220        }
221
222        match PageRange::parse("2-5").unwrap() {
223            PageRange::Range(start, end) => {
224                assert_eq!(start, 1);
225                assert_eq!(end, 4);
226            }
227            _ => panic!("Expected Range"),
228        }
229
230        match PageRange::parse("1,3,5,7").unwrap() {
231            PageRange::List(pages) => {
232                assert_eq!(pages, vec![0, 2, 4, 6]);
233            }
234            _ => panic!("Expected List"),
235        }
236
237        assert!(PageRange::parse("0").is_err());
238        assert!(PageRange::parse("5-2").is_err());
239        assert!(PageRange::parse("invalid").is_err());
240    }
241
242    #[test]
243    fn test_page_range_indices() {
244        let total = 10;
245
246        assert_eq!(
247            PageRange::All.get_indices(total).unwrap(),
248            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
249        );
250
251        assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
252
253        assert_eq!(
254            PageRange::Range(2, 5).get_indices(total).unwrap(),
255            vec![2, 3, 4, 5]
256        );
257
258        assert_eq!(
259            PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
260            vec![1, 3, 5]
261        );
262
263        assert!(PageRange::Single(10).get_indices(total).is_err());
264        assert!(PageRange::Range(8, 15).get_indices(total).is_err());
265    }
266
267    #[test]
268    fn test_page_range_empty_list() {
269        // Test parsing an empty list of pages
270        let result = PageRange::parse("");
271        assert!(result.is_err());
272
273        // Test list with only commas
274        let result2 = PageRange::parse(",,");
275        assert!(result2.is_err());
276    }
277
278    #[test]
279    fn test_page_range_list_with_zero() {
280        // Test that 0 in a list causes error (line 148-151)
281        let result = PageRange::parse("1,0,3");
282        assert!(result.is_err());
283        if let Err(e) = result {
284            match e {
285                OperationError::InvalidPageRange(msg) => {
286                    assert!(msg.contains("Page numbers start at 1"));
287                }
288                _ => panic!("Expected InvalidPageRange error"),
289            }
290        }
291    }
292
293    #[test]
294    fn test_page_range_with_extra_spaces() {
295        // Test parsing with extra spaces in list (line 143-144)
296        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
297            PageRange::List(pages) => {
298                assert_eq!(pages, vec![0, 2, 4]);
299            }
300            _ => panic!("Expected List"),
301        }
302
303        // Test range with spaces
304        match PageRange::parse(" 2 - 5 ").unwrap() {
305            PageRange::Range(start, end) => {
306                assert_eq!(start, 1);
307                assert_eq!(end, 4);
308            }
309            _ => panic!("Expected Range"),
310        }
311    }
312
313    #[test]
314    fn test_page_range_equal_start_end() {
315        // Test range where start == end (should work)
316        match PageRange::parse("5-5").unwrap() {
317            PageRange::Range(start, end) => {
318                assert_eq!(start, 4);
319                assert_eq!(end, 4);
320            }
321            _ => panic!("Expected Range"),
322        }
323
324        // Verify get_indices works correctly
325        let range = PageRange::Range(4, 4);
326        assert_eq!(range.get_indices(10).unwrap(), vec![4]);
327    }
328
329    #[test]
330    fn test_page_range_list_out_of_bounds() {
331        // Test List variant with out of bounds indices (line 186-190)
332        let pages = PageRange::List(vec![2, 5, 15]);
333        let result = pages.get_indices(10);
334        assert!(result.is_err());
335        if let Err(e) = result {
336            match e {
337                OperationError::PageIndexOutOfBounds(idx, total) => {
338                    assert_eq!(idx, 15);
339                    assert_eq!(total, 10);
340                }
341                _ => panic!("Expected PageIndexOutOfBounds error"),
342            }
343        }
344    }
345
346    #[test]
347    fn test_page_range_empty_document() {
348        // Test get_indices with 0 total pages
349        let total = 0;
350
351        // All should return empty vector
352        assert_eq!(
353            PageRange::All.get_indices(total).unwrap(),
354            Vec::<usize>::new()
355        );
356
357        // Single should fail
358        assert!(PageRange::Single(0).get_indices(total).is_err());
359
360        // Range should fail
361        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
362
363        // Empty list should work
364        assert_eq!(
365            PageRange::List(vec![]).get_indices(total).unwrap(),
366            Vec::<usize>::new()
367        );
368    }
369
370    #[test]
371    fn test_page_range_additional_invalid_formats() {
372        // Test various invalid formats (line 160-162)
373        assert!(PageRange::parse("1-2-3").is_err()); // Multiple dashes
374        assert!(PageRange::parse("abc").is_err()); // Non-numeric
375        assert!(PageRange::parse("1.5").is_err()); // Decimal
376        assert!(PageRange::parse("-5").is_err()); // Negative without start
377        assert!(PageRange::parse("1-").is_err()); // Missing end
378        assert!(PageRange::parse("-").is_err()); // Only dash
379    }
380
381    #[test]
382    fn test_module_exports() {
383        // Verify that all operation types are exported correctly
384        // This test just ensures the module structure is correct
385
386        // We can create these types through their modules
387        use super::extract_images::ExtractImagesOptions;
388        use super::merge::MergeOptions;
389        use super::page_analysis::{AnalysisOptions, PageType};
390        use super::page_extraction::PageExtractionOptions;
391        use super::rotate::{RotateOptions, RotationAngle};
392        use super::split::{SplitMode, SplitOptions};
393
394        // Just verify we can access these types
395        let _extract: ExtractImagesOptions;
396        let _merge: MergeOptions;
397        let _analysis: AnalysisOptions;
398        let _extraction: PageExtractionOptions;
399        let _rotate: RotateOptions;
400        let _split: SplitOptions;
401        let _angle: RotationAngle;
402        let _page_type: PageType;
403        let _mode: SplitMode;
404    }
405
406    #[test]
407    fn test_operation_error_variants() {
408        let errors = vec![
409            OperationError::PageIndexOutOfBounds(5, 3),
410            OperationError::InvalidPageRange("test".to_string()),
411            OperationError::NoPagesToProcess,
412            OperationError::ResourceConflict("test".to_string()),
413            OperationError::InvalidRotation(45),
414            OperationError::ParseError("test".to_string()),
415            OperationError::ProcessingError("test".to_string()),
416        ];
417
418        for error in errors {
419            let message = error.to_string();
420            assert!(!message.is_empty());
421        }
422    }
423
424    #[test]
425    fn test_page_range_edge_cases() {
426        // Test whitespace handling
427        assert!(matches!(
428            PageRange::parse("  all  ").unwrap(),
429            PageRange::All
430        ));
431        assert!(matches!(
432            PageRange::parse(" 5 ").unwrap(),
433            PageRange::Single(4)
434        ));
435
436        // Test various list formats
437        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
438            PageRange::List(pages) => assert_eq!(pages, vec![0, 2, 4]),
439            _ => panic!("Expected List"),
440        }
441
442        // Test range with spaces
443        match PageRange::parse(" 2 - 5 ").unwrap() {
444            PageRange::Range(start, end) => {
445                assert_eq!(start, 1);
446                assert_eq!(end, 4);
447            }
448            _ => panic!("Expected Range"),
449        }
450    }
451
452    #[test]
453    fn test_page_range_invalid_formats() {
454        // Test various invalid formats
455        assert!(PageRange::parse("").is_err());
456        assert!(PageRange::parse("abc").is_err());
457        assert!(PageRange::parse("1-").is_err());
458        assert!(PageRange::parse("-5").is_err());
459        assert!(PageRange::parse("1-2-3").is_err());
460        assert!(PageRange::parse("1,0,3").is_err());
461        assert!(PageRange::parse("0-5").is_err());
462        assert!(PageRange::parse("5-0").is_err());
463        assert!(PageRange::parse("1,,3").is_err());
464        assert!(PageRange::parse("1.5").is_err());
465    }
466
467    #[test]
468    fn test_page_range_get_indices_empty_document() {
469        let total = 0;
470
471        assert_eq!(
472            PageRange::All.get_indices(total).unwrap(),
473            vec![] as Vec<usize>
474        );
475        assert!(PageRange::Single(0).get_indices(total).is_err());
476        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
477        assert!(PageRange::List(vec![0]).get_indices(total).is_err());
478    }
479
480    #[test]
481    fn test_page_range_get_indices_single_page_document() {
482        let total = 1;
483
484        assert_eq!(PageRange::All.get_indices(total).unwrap(), vec![0]);
485        assert_eq!(PageRange::Single(0).get_indices(total).unwrap(), vec![0]);
486        assert!(PageRange::Single(1).get_indices(total).is_err());
487        assert_eq!(PageRange::Range(0, 0).get_indices(total).unwrap(), vec![0]);
488        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
489    }
490
491    #[test]
492    fn test_page_range_list_duplicates() {
493        // Lists can have duplicates in our implementation
494        match PageRange::parse("1,1,2,2,3").unwrap() {
495            PageRange::List(pages) => {
496                assert_eq!(pages, vec![0, 0, 1, 1, 2]);
497            }
498            _ => panic!("Expected List"),
499        }
500    }
501
502    #[test]
503    fn test_page_range_list_unordered() {
504        // Lists don't need to be ordered
505        match PageRange::parse("5,2,8,1,3").unwrap() {
506            PageRange::List(pages) => {
507                assert_eq!(pages, vec![4, 1, 7, 0, 2]);
508            }
509            _ => panic!("Expected List"),
510        }
511    }
512
513    #[test]
514    fn test_operation_error_display() {
515        let error = OperationError::PageIndexOutOfBounds(10, 5);
516        assert_eq!(
517            error.to_string(),
518            "Page index 10 out of bounds (document has 5 pages)"
519        );
520
521        let error = OperationError::InvalidRotation(45);
522        assert_eq!(
523            error.to_string(),
524            "Invalid rotation angle: 45 (must be 0, 90, 180, or 270)"
525        );
526
527        let error = OperationError::NoPagesToProcess;
528        assert_eq!(error.to_string(), "No pages to process");
529    }
530
531    #[test]
532    fn test_page_range_large_document() {
533        let total = 1000;
534
535        // Test all pages
536        let indices = PageRange::All.get_indices(total).unwrap();
537        assert_eq!(indices.len(), 1000);
538        assert_eq!(indices[0], 0);
539        assert_eq!(indices[999], 999);
540
541        // Test large range
542        let indices = PageRange::Range(100, 200).get_indices(total).unwrap();
543        assert_eq!(indices.len(), 101);
544        assert_eq!(indices[0], 100);
545        assert_eq!(indices[100], 200);
546    }
547
548    #[test]
549    fn test_page_range_parse_case_insensitive() {
550        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
551        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
552        assert!(matches!(PageRange::parse("All").unwrap(), PageRange::All));
553        assert!(matches!(PageRange::parse("aLL").unwrap(), PageRange::All));
554    }
555
556    #[test]
557    fn test_operation_result_type() {
558        // Test that OperationResult works correctly
559        fn test_function() -> OperationResult<usize> {
560            Ok(42)
561        }
562
563        fn test_error_function() -> OperationResult<usize> {
564            Err(OperationError::NoPagesToProcess)
565        }
566
567        assert_eq!(test_function().unwrap(), 42);
568        assert!(test_error_function().is_err());
569    }
570
571    #[test]
572    fn test_page_range_boundary_values() {
573        // Test maximum safe values
574        let large_page = usize::MAX / 2;
575
576        match PageRange::parse(&large_page.to_string()).unwrap() {
577            PageRange::Single(idx) => assert_eq!(idx, large_page - 1),
578            _ => panic!("Expected Single"),
579        }
580
581        // Test with actual document
582        let indices = PageRange::Single(5).get_indices(10).unwrap();
583        assert_eq!(indices, vec![5]);
584
585        // Test range boundary
586        let indices = PageRange::Range(0, 9).get_indices(10).unwrap();
587        assert_eq!(indices.len(), 10);
588    }
589
590    #[test]
591    fn test_error_from_io() {
592        use std::io;
593
594        let io_error = io::Error::new(io::ErrorKind::NotFound, "File not found");
595        let op_error: OperationError = io_error.into();
596
597        match op_error {
598            OperationError::Io(_) => {}
599            _ => panic!("Expected Io variant"),
600        }
601    }
602
603    #[test]
604    fn test_page_range_fmt_debug() {
605        // Test Debug implementation
606        let range = PageRange::All;
607        let debug_str = format!("{:?}", range);
608        assert!(debug_str.contains("All"));
609
610        let range = PageRange::Single(5);
611        let debug_str = format!("{:?}", range);
612        assert!(debug_str.contains("Single"));
613        assert!(debug_str.contains("5"));
614
615        let range = PageRange::Range(1, 10);
616        let debug_str = format!("{:?}", range);
617        assert!(debug_str.contains("Range"));
618
619        let range = PageRange::List(vec![1, 2, 3]);
620        let debug_str = format!("{:?}", range);
621        assert!(debug_str.contains("List"));
622    }
623
624    #[test]
625    fn test_page_range_clone() {
626        let original = PageRange::List(vec![1, 2, 3]);
627        let cloned = original.clone();
628
629        match (original, cloned) {
630            (PageRange::List(orig), PageRange::List(clone)) => {
631                assert_eq!(orig, clone);
632            }
633            _ => panic!("Clone failed"),
634        }
635    }
636}