oxidize_pdf/operations/
mod.rs

1//! PDF operations module
2//!
3//! This module provides high-level operations for manipulating PDF documents
4//! such as splitting, merging, rotating pages, and reordering.
5
6pub mod extract_images;
7pub mod merge;
8pub mod page_analysis;
9pub mod page_extraction;
10pub mod reorder;
11pub mod rotate;
12pub mod split;
13
14pub use extract_images::{
15    extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
16    ImageExtractor,
17};
18pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
19pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
20pub use page_extraction::{
21    extract_page, extract_page_range, extract_page_range_to_file, extract_page_to_file,
22    extract_pages, extract_pages_to_file, PageExtractionOptions, PageExtractor,
23};
24pub use reorder::{
25    move_pdf_page, reorder_pdf_pages, reverse_pdf_pages, swap_pdf_pages, PageReorderer,
26    ReorderOptions,
27};
28pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
29pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
30
31use crate::error::PdfError;
32
33/// Result type for operations
34pub type OperationResult<T> = Result<T, OperationError>;
35
36/// Operation-specific errors
37#[derive(Debug, thiserror::Error)]
38pub enum OperationError {
39    /// Page index out of bounds
40    #[error("Page index {0} out of bounds (document has {1} pages)")]
41    PageIndexOutOfBounds(usize, usize),
42
43    /// Invalid page range
44    #[error("Invalid page range: {0}")]
45    InvalidPageRange(String),
46
47    /// No pages to process
48    #[error("No pages to process")]
49    NoPagesToProcess,
50
51    /// Resource conflict during merge
52    #[error("Resource conflict: {0}")]
53    ResourceConflict(String),
54
55    /// Invalid rotation angle
56    #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
57    InvalidRotation(i32),
58
59    /// Parse error
60    #[error("Parse error: {0}")]
61    ParseError(String),
62
63    /// IO error
64    #[error("IO error: {0}")]
65    Io(#[from] std::io::Error),
66
67    /// Core PDF error
68    #[error("PDF error: {0}")]
69    PdfError(#[from] PdfError),
70
71    /// General processing error
72    #[error("Processing error: {0}")]
73    ProcessingError(String),
74}
75
76/// Page range specification
77#[derive(Debug, Clone)]
78pub enum PageRange {
79    /// All pages
80    All,
81    /// Single page (0-based index)
82    Single(usize),
83    /// Range of pages (inclusive, 0-based)
84    Range(usize, usize),
85    /// List of specific pages (0-based indices)
86    List(Vec<usize>),
87}
88
89impl PageRange {
90    /// Parse a page range from a string
91    ///
92    /// Examples:
93    /// - "all" -> All pages
94    /// - "1" -> Single page (converts to 0-based)
95    /// - "1-5" -> Range of pages (converts to 0-based)
96    /// - "1,3,5" -> List of pages (converts to 0-based)
97    pub fn parse(s: &str) -> Result<Self, OperationError> {
98        let s = s.trim();
99
100        if s.eq_ignore_ascii_case("all") {
101            return Ok(PageRange::All);
102        }
103
104        // Try single page
105        if let Ok(page) = s.parse::<usize>() {
106            if page == 0 {
107                return Err(OperationError::InvalidPageRange(
108                    "Page numbers start at 1".to_string(),
109                ));
110            }
111            return Ok(PageRange::Single(page - 1));
112        }
113
114        // Try range (e.g., "1-5")
115        if let Some((start, end)) = s.split_once('-') {
116            let start = start
117                .trim()
118                .parse::<usize>()
119                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
120            let end = end
121                .trim()
122                .parse::<usize>()
123                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
124
125            if start == 0 || end == 0 {
126                return Err(OperationError::InvalidPageRange(
127                    "Page numbers start at 1".to_string(),
128                ));
129            }
130
131            if start > end {
132                return Err(OperationError::InvalidPageRange(format!(
133                    "Start {start} is greater than end {end}"
134                )));
135            }
136
137            return Ok(PageRange::Range(start - 1, end - 1));
138        }
139
140        // Try list (e.g., "1,3,5")
141        if s.contains(',') {
142            let pages: Result<Vec<usize>, _> = s
143                .split(',')
144                .map(|p| {
145                    let page = p.trim().parse::<usize>().map_err(|_| {
146                        OperationError::InvalidPageRange(format!("Invalid page: {p}"))
147                    })?;
148                    if page == 0 {
149                        return Err(OperationError::InvalidPageRange(
150                            "Page numbers start at 1".to_string(),
151                        ));
152                    }
153                    Ok(page - 1)
154                })
155                .collect();
156
157            return Ok(PageRange::List(pages?));
158        }
159
160        Err(OperationError::InvalidPageRange(format!(
161            "Invalid format: {s}"
162        )))
163    }
164
165    /// Get the page indices for this range
166    pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
167        match self {
168            PageRange::All => Ok((0..total_pages).collect()),
169            PageRange::Single(idx) => {
170                if *idx >= total_pages {
171                    Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
172                } else {
173                    Ok(vec![*idx])
174                }
175            }
176            PageRange::Range(start, end) => {
177                if *start >= total_pages {
178                    Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
179                } else if *end >= total_pages {
180                    Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
181                } else {
182                    Ok((*start..=*end).collect())
183                }
184            }
185            PageRange::List(pages) => {
186                for &page in pages {
187                    if page >= total_pages {
188                        return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
189                    }
190                }
191                Ok(pages.clone())
192            }
193        }
194    }
195}
196
197#[cfg(test)]
198mod error_tests;
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_page_range_parsing() {
206        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
207        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
208
209        match PageRange::parse("5").unwrap() {
210            PageRange::Single(idx) => assert_eq!(idx, 4),
211            _ => panic!("Expected Single"),
212        }
213
214        match PageRange::parse("2-5").unwrap() {
215            PageRange::Range(start, end) => {
216                assert_eq!(start, 1);
217                assert_eq!(end, 4);
218            }
219            _ => panic!("Expected Range"),
220        }
221
222        match PageRange::parse("1,3,5,7").unwrap() {
223            PageRange::List(pages) => {
224                assert_eq!(pages, vec![0, 2, 4, 6]);
225            }
226            _ => panic!("Expected List"),
227        }
228
229        assert!(PageRange::parse("0").is_err());
230        assert!(PageRange::parse("5-2").is_err());
231        assert!(PageRange::parse("invalid").is_err());
232    }
233
234    #[test]
235    fn test_page_range_indices() {
236        let total = 10;
237
238        assert_eq!(
239            PageRange::All.get_indices(total).unwrap(),
240            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
241        );
242
243        assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
244
245        assert_eq!(
246            PageRange::Range(2, 5).get_indices(total).unwrap(),
247            vec![2, 3, 4, 5]
248        );
249
250        assert_eq!(
251            PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
252            vec![1, 3, 5]
253        );
254
255        assert!(PageRange::Single(10).get_indices(total).is_err());
256        assert!(PageRange::Range(8, 15).get_indices(total).is_err());
257    }
258
259    #[test]
260    fn test_page_range_empty_list() {
261        // Test parsing an empty list of pages
262        let result = PageRange::parse("");
263        assert!(result.is_err());
264
265        // Test list with only commas
266        let result2 = PageRange::parse(",,");
267        assert!(result2.is_err());
268    }
269
270    #[test]
271    fn test_page_range_list_with_zero() {
272        // Test that 0 in a list causes error (line 148-151)
273        let result = PageRange::parse("1,0,3");
274        assert!(result.is_err());
275        if let Err(e) = result {
276            match e {
277                OperationError::InvalidPageRange(msg) => {
278                    assert!(msg.contains("Page numbers start at 1"));
279                }
280                _ => panic!("Expected InvalidPageRange error"),
281            }
282        }
283    }
284
285    #[test]
286    fn test_page_range_with_extra_spaces() {
287        // Test parsing with extra spaces in list (line 143-144)
288        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
289            PageRange::List(pages) => {
290                assert_eq!(pages, vec![0, 2, 4]);
291            }
292            _ => panic!("Expected List"),
293        }
294
295        // Test range with spaces
296        match PageRange::parse(" 2 - 5 ").unwrap() {
297            PageRange::Range(start, end) => {
298                assert_eq!(start, 1);
299                assert_eq!(end, 4);
300            }
301            _ => panic!("Expected Range"),
302        }
303    }
304
305    #[test]
306    fn test_page_range_equal_start_end() {
307        // Test range where start == end (should work)
308        match PageRange::parse("5-5").unwrap() {
309            PageRange::Range(start, end) => {
310                assert_eq!(start, 4);
311                assert_eq!(end, 4);
312            }
313            _ => panic!("Expected Range"),
314        }
315
316        // Verify get_indices works correctly
317        let range = PageRange::Range(4, 4);
318        assert_eq!(range.get_indices(10).unwrap(), vec![4]);
319    }
320
321    #[test]
322    fn test_page_range_list_out_of_bounds() {
323        // Test List variant with out of bounds indices (line 186-190)
324        let pages = PageRange::List(vec![2, 5, 15]);
325        let result = pages.get_indices(10);
326        assert!(result.is_err());
327        if let Err(e) = result {
328            match e {
329                OperationError::PageIndexOutOfBounds(idx, total) => {
330                    assert_eq!(idx, 15);
331                    assert_eq!(total, 10);
332                }
333                _ => panic!("Expected PageIndexOutOfBounds error"),
334            }
335        }
336    }
337
338    #[test]
339    fn test_page_range_empty_document() {
340        // Test get_indices with 0 total pages
341        let total = 0;
342
343        // All should return empty vector
344        assert_eq!(
345            PageRange::All.get_indices(total).unwrap(),
346            Vec::<usize>::new()
347        );
348
349        // Single should fail
350        assert!(PageRange::Single(0).get_indices(total).is_err());
351
352        // Range should fail
353        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
354
355        // Empty list should work
356        assert_eq!(
357            PageRange::List(vec![]).get_indices(total).unwrap(),
358            Vec::<usize>::new()
359        );
360    }
361
362    #[test]
363    fn test_page_range_additional_invalid_formats() {
364        // Test various invalid formats (line 160-162)
365        assert!(PageRange::parse("1-2-3").is_err()); // Multiple dashes
366        assert!(PageRange::parse("abc").is_err()); // Non-numeric
367        assert!(PageRange::parse("1.5").is_err()); // Decimal
368        assert!(PageRange::parse("-5").is_err()); // Negative without start
369        assert!(PageRange::parse("1-").is_err()); // Missing end
370        assert!(PageRange::parse("-").is_err()); // Only dash
371    }
372
373    #[test]
374    fn test_module_exports() {
375        // Verify that all operation types are exported correctly
376        // This test just ensures the module structure is correct
377
378        // We can create these types through their modules
379        use super::extract_images::ExtractImagesOptions;
380        use super::merge::MergeOptions;
381        use super::page_analysis::{AnalysisOptions, PageType};
382        use super::page_extraction::PageExtractionOptions;
383        use super::rotate::{RotateOptions, RotationAngle};
384        use super::split::{SplitMode, SplitOptions};
385
386        // Just verify we can access these types
387        let _extract: ExtractImagesOptions;
388        let _merge: MergeOptions;
389        let _analysis: AnalysisOptions;
390        let _extraction: PageExtractionOptions;
391        let _rotate: RotateOptions;
392        let _split: SplitOptions;
393        let _angle: RotationAngle;
394        let _page_type: PageType;
395        let _mode: SplitMode;
396    }
397
398    #[test]
399    fn test_operation_error_variants() {
400        let errors = vec![
401            OperationError::PageIndexOutOfBounds(5, 3),
402            OperationError::InvalidPageRange("test".to_string()),
403            OperationError::NoPagesToProcess,
404            OperationError::ResourceConflict("test".to_string()),
405            OperationError::InvalidRotation(45),
406            OperationError::ParseError("test".to_string()),
407            OperationError::ProcessingError("test".to_string()),
408        ];
409
410        for error in errors {
411            let message = error.to_string();
412            assert!(!message.is_empty());
413        }
414    }
415
416    #[test]
417    fn test_page_range_edge_cases() {
418        // Test whitespace handling
419        assert!(matches!(
420            PageRange::parse("  all  ").unwrap(),
421            PageRange::All
422        ));
423        assert!(matches!(
424            PageRange::parse(" 5 ").unwrap(),
425            PageRange::Single(4)
426        ));
427
428        // Test various list formats
429        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
430            PageRange::List(pages) => assert_eq!(pages, vec![0, 2, 4]),
431            _ => panic!("Expected List"),
432        }
433
434        // Test range with spaces
435        match PageRange::parse(" 2 - 5 ").unwrap() {
436            PageRange::Range(start, end) => {
437                assert_eq!(start, 1);
438                assert_eq!(end, 4);
439            }
440            _ => panic!("Expected Range"),
441        }
442    }
443
444    #[test]
445    fn test_page_range_invalid_formats() {
446        // Test various invalid formats
447        assert!(PageRange::parse("").is_err());
448        assert!(PageRange::parse("abc").is_err());
449        assert!(PageRange::parse("1-").is_err());
450        assert!(PageRange::parse("-5").is_err());
451        assert!(PageRange::parse("1-2-3").is_err());
452        assert!(PageRange::parse("1,0,3").is_err());
453        assert!(PageRange::parse("0-5").is_err());
454        assert!(PageRange::parse("5-0").is_err());
455        assert!(PageRange::parse("1,,3").is_err());
456        assert!(PageRange::parse("1.5").is_err());
457    }
458
459    #[test]
460    fn test_page_range_get_indices_empty_document() {
461        let total = 0;
462
463        assert_eq!(
464            PageRange::All.get_indices(total).unwrap(),
465            vec![] as Vec<usize>
466        );
467        assert!(PageRange::Single(0).get_indices(total).is_err());
468        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
469        assert!(PageRange::List(vec![0]).get_indices(total).is_err());
470    }
471
472    #[test]
473    fn test_page_range_get_indices_single_page_document() {
474        let total = 1;
475
476        assert_eq!(PageRange::All.get_indices(total).unwrap(), vec![0]);
477        assert_eq!(PageRange::Single(0).get_indices(total).unwrap(), vec![0]);
478        assert!(PageRange::Single(1).get_indices(total).is_err());
479        assert_eq!(PageRange::Range(0, 0).get_indices(total).unwrap(), vec![0]);
480        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
481    }
482
483    #[test]
484    fn test_page_range_list_duplicates() {
485        // Lists can have duplicates in our implementation
486        match PageRange::parse("1,1,2,2,3").unwrap() {
487            PageRange::List(pages) => {
488                assert_eq!(pages, vec![0, 0, 1, 1, 2]);
489            }
490            _ => panic!("Expected List"),
491        }
492    }
493
494    #[test]
495    fn test_page_range_list_unordered() {
496        // Lists don't need to be ordered
497        match PageRange::parse("5,2,8,1,3").unwrap() {
498            PageRange::List(pages) => {
499                assert_eq!(pages, vec![4, 1, 7, 0, 2]);
500            }
501            _ => panic!("Expected List"),
502        }
503    }
504
505    #[test]
506    fn test_operation_error_display() {
507        let error = OperationError::PageIndexOutOfBounds(10, 5);
508        assert_eq!(
509            error.to_string(),
510            "Page index 10 out of bounds (document has 5 pages)"
511        );
512
513        let error = OperationError::InvalidRotation(45);
514        assert_eq!(
515            error.to_string(),
516            "Invalid rotation angle: 45 (must be 0, 90, 180, or 270)"
517        );
518
519        let error = OperationError::NoPagesToProcess;
520        assert_eq!(error.to_string(), "No pages to process");
521    }
522
523    #[test]
524    fn test_page_range_large_document() {
525        let total = 1000;
526
527        // Test all pages
528        let indices = PageRange::All.get_indices(total).unwrap();
529        assert_eq!(indices.len(), 1000);
530        assert_eq!(indices[0], 0);
531        assert_eq!(indices[999], 999);
532
533        // Test large range
534        let indices = PageRange::Range(100, 200).get_indices(total).unwrap();
535        assert_eq!(indices.len(), 101);
536        assert_eq!(indices[0], 100);
537        assert_eq!(indices[100], 200);
538    }
539
540    #[test]
541    fn test_page_range_parse_case_insensitive() {
542        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
543        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
544        assert!(matches!(PageRange::parse("All").unwrap(), PageRange::All));
545        assert!(matches!(PageRange::parse("aLL").unwrap(), PageRange::All));
546    }
547
548    #[test]
549    fn test_operation_result_type() {
550        // Test that OperationResult works correctly
551        fn test_function() -> OperationResult<usize> {
552            Ok(42)
553        }
554
555        fn test_error_function() -> OperationResult<usize> {
556            Err(OperationError::NoPagesToProcess)
557        }
558
559        assert_eq!(test_function().unwrap(), 42);
560        assert!(test_error_function().is_err());
561    }
562
563    #[test]
564    fn test_page_range_boundary_values() {
565        // Test maximum safe values
566        let large_page = usize::MAX / 2;
567
568        match PageRange::parse(&large_page.to_string()).unwrap() {
569            PageRange::Single(idx) => assert_eq!(idx, large_page - 1),
570            _ => panic!("Expected Single"),
571        }
572
573        // Test with actual document
574        let indices = PageRange::Single(5).get_indices(10).unwrap();
575        assert_eq!(indices, vec![5]);
576
577        // Test range boundary
578        let indices = PageRange::Range(0, 9).get_indices(10).unwrap();
579        assert_eq!(indices.len(), 10);
580    }
581
582    #[test]
583    fn test_error_from_io() {
584        use std::io;
585
586        let io_error = io::Error::new(io::ErrorKind::NotFound, "File not found");
587        let op_error: OperationError = io_error.into();
588
589        match op_error {
590            OperationError::Io(_) => {}
591            _ => panic!("Expected Io variant"),
592        }
593    }
594
595    #[test]
596    fn test_page_range_fmt_debug() {
597        // Test Debug implementation
598        let range = PageRange::All;
599        let debug_str = format!("{:?}", range);
600        assert!(debug_str.contains("All"));
601
602        let range = PageRange::Single(5);
603        let debug_str = format!("{:?}", range);
604        assert!(debug_str.contains("Single"));
605        assert!(debug_str.contains("5"));
606
607        let range = PageRange::Range(1, 10);
608        let debug_str = format!("{:?}", range);
609        assert!(debug_str.contains("Range"));
610
611        let range = PageRange::List(vec![1, 2, 3]);
612        let debug_str = format!("{:?}", range);
613        assert!(debug_str.contains("List"));
614    }
615
616    #[test]
617    fn test_page_range_clone() {
618        let original = PageRange::List(vec![1, 2, 3]);
619        let cloned = original.clone();
620
621        match (original, cloned) {
622            (PageRange::List(orig), PageRange::List(clone)) => {
623                assert_eq!(orig, clone);
624            }
625            _ => panic!("Clone failed"),
626        }
627    }
628}