oxidize_pdf/operations/
mod.rs

1//! PDF operations module
2//!
3//! This module provides high-level operations for manipulating PDF documents
4//! such as splitting, merging, rotating pages, and reordering.
5
6pub mod extract_images;
7pub mod merge;
8pub mod page_analysis;
9pub mod page_extraction;
10pub mod pdf_ocr_converter;
11pub mod reorder;
12pub mod rotate;
13pub mod split;
14
15pub use extract_images::{
16    extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
17    ImageExtractor,
18};
19pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
20pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
21pub use page_extraction::{
22    extract_page, extract_page_range, extract_page_range_to_file, extract_page_to_file,
23    extract_pages, extract_pages_to_file, PageExtractionOptions, PageExtractor,
24};
25pub use pdf_ocr_converter::{ConversionOptions, ConversionResult, PdfOcrConverter};
26pub use reorder::{
27    move_pdf_page, reorder_pdf_pages, reverse_pdf_pages, swap_pdf_pages, PageReorderer,
28    ReorderOptions,
29};
30pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
31pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
32
33use crate::error::PdfError;
34
35/// Result type for operations
36pub type OperationResult<T> = Result<T, OperationError>;
37
38/// Operation-specific errors
39#[derive(Debug, thiserror::Error)]
40pub enum OperationError {
41    /// Page index out of bounds
42    #[error("Page index {0} out of bounds (document has {1} pages)")]
43    PageIndexOutOfBounds(usize, usize),
44
45    /// Invalid page range
46    #[error("Invalid page range: {0}")]
47    InvalidPageRange(String),
48
49    /// No pages to process
50    #[error("No pages to process")]
51    NoPagesToProcess,
52
53    /// Resource conflict during merge
54    #[error("Resource conflict: {0}")]
55    ResourceConflict(String),
56
57    /// Invalid rotation angle
58    #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
59    InvalidRotation(i32),
60
61    /// Parse error
62    #[error("Parse error: {0}")]
63    ParseError(String),
64
65    /// IO error
66    #[error("IO error: {0}")]
67    Io(#[from] std::io::Error),
68
69    /// Core PDF error
70    #[error("PDF error: {0}")]
71    PdfError(#[from] PdfError),
72
73    /// General processing error
74    #[error("Processing error: {0}")]
75    ProcessingError(String),
76}
77
78/// Page range specification
79#[derive(Debug, Clone)]
80pub enum PageRange {
81    /// All pages
82    All,
83    /// Single page (0-based index)
84    Single(usize),
85    /// Range of pages (inclusive, 0-based)
86    Range(usize, usize),
87    /// List of specific pages (0-based indices)
88    List(Vec<usize>),
89}
90
91impl PageRange {
92    /// Parse a page range from a string
93    ///
94    /// Examples:
95    /// - "all" -> All pages
96    /// - "1" -> Single page (converts to 0-based)
97    /// - "1-5" -> Range of pages (converts to 0-based)
98    /// - "1,3,5" -> List of pages (converts to 0-based)
99    pub fn parse(s: &str) -> Result<Self, OperationError> {
100        let s = s.trim();
101
102        if s.eq_ignore_ascii_case("all") {
103            return Ok(PageRange::All);
104        }
105
106        // Try single page
107        if let Ok(page) = s.parse::<usize>() {
108            if page == 0 {
109                return Err(OperationError::InvalidPageRange(
110                    "Page numbers start at 1".to_string(),
111                ));
112            }
113            return Ok(PageRange::Single(page - 1));
114        }
115
116        // Try range (e.g., "1-5")
117        if let Some((start, end)) = s.split_once('-') {
118            let start = start
119                .trim()
120                .parse::<usize>()
121                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
122            let end = end
123                .trim()
124                .parse::<usize>()
125                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
126
127            if start == 0 || end == 0 {
128                return Err(OperationError::InvalidPageRange(
129                    "Page numbers start at 1".to_string(),
130                ));
131            }
132
133            if start > end {
134                return Err(OperationError::InvalidPageRange(format!(
135                    "Start {start} is greater than end {end}"
136                )));
137            }
138
139            return Ok(PageRange::Range(start - 1, end - 1));
140        }
141
142        // Try list (e.g., "1,3,5")
143        if s.contains(',') {
144            let pages: Result<Vec<usize>, _> = s
145                .split(',')
146                .map(|p| {
147                    let page = p.trim().parse::<usize>().map_err(|_| {
148                        OperationError::InvalidPageRange(format!("Invalid page: {p}"))
149                    })?;
150                    if page == 0 {
151                        return Err(OperationError::InvalidPageRange(
152                            "Page numbers start at 1".to_string(),
153                        ));
154                    }
155                    Ok(page - 1)
156                })
157                .collect();
158
159            return Ok(PageRange::List(pages?));
160        }
161
162        Err(OperationError::InvalidPageRange(format!(
163            "Invalid format: {s}"
164        )))
165    }
166
167    /// Get the page indices for this range
168    pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
169        match self {
170            PageRange::All => Ok((0..total_pages).collect()),
171            PageRange::Single(idx) => {
172                if *idx >= total_pages {
173                    Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
174                } else {
175                    Ok(vec![*idx])
176                }
177            }
178            PageRange::Range(start, end) => {
179                if *start >= total_pages {
180                    Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
181                } else if *end >= total_pages {
182                    Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
183                } else {
184                    Ok((*start..=*end).collect())
185                }
186            }
187            PageRange::List(pages) => {
188                for &page in pages {
189                    if page >= total_pages {
190                        return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
191                    }
192                }
193                Ok(pages.clone())
194            }
195        }
196    }
197}
198
199#[cfg(test)]
200mod error_tests;
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_page_range_parsing() {
208        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
209        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
210
211        match PageRange::parse("5").unwrap() {
212            PageRange::Single(idx) => assert_eq!(idx, 4),
213            _ => panic!("Expected Single"),
214        }
215
216        match PageRange::parse("2-5").unwrap() {
217            PageRange::Range(start, end) => {
218                assert_eq!(start, 1);
219                assert_eq!(end, 4);
220            }
221            _ => panic!("Expected Range"),
222        }
223
224        match PageRange::parse("1,3,5,7").unwrap() {
225            PageRange::List(pages) => {
226                assert_eq!(pages, vec![0, 2, 4, 6]);
227            }
228            _ => panic!("Expected List"),
229        }
230
231        assert!(PageRange::parse("0").is_err());
232        assert!(PageRange::parse("5-2").is_err());
233        assert!(PageRange::parse("invalid").is_err());
234    }
235
236    #[test]
237    fn test_page_range_indices() {
238        let total = 10;
239
240        assert_eq!(
241            PageRange::All.get_indices(total).unwrap(),
242            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
243        );
244
245        assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
246
247        assert_eq!(
248            PageRange::Range(2, 5).get_indices(total).unwrap(),
249            vec![2, 3, 4, 5]
250        );
251
252        assert_eq!(
253            PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
254            vec![1, 3, 5]
255        );
256
257        assert!(PageRange::Single(10).get_indices(total).is_err());
258        assert!(PageRange::Range(8, 15).get_indices(total).is_err());
259    }
260
261    #[test]
262    fn test_page_range_empty_list() {
263        // Test parsing an empty list of pages
264        let result = PageRange::parse("");
265        assert!(result.is_err());
266
267        // Test list with only commas
268        let result2 = PageRange::parse(",,");
269        assert!(result2.is_err());
270    }
271
272    #[test]
273    fn test_page_range_list_with_zero() {
274        // Test that 0 in a list causes error (line 148-151)
275        let result = PageRange::parse("1,0,3");
276        assert!(result.is_err());
277        if let Err(e) = result {
278            match e {
279                OperationError::InvalidPageRange(msg) => {
280                    assert!(msg.contains("Page numbers start at 1"));
281                }
282                _ => panic!("Expected InvalidPageRange error"),
283            }
284        }
285    }
286
287    #[test]
288    fn test_page_range_with_extra_spaces() {
289        // Test parsing with extra spaces in list (line 143-144)
290        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
291            PageRange::List(pages) => {
292                assert_eq!(pages, vec![0, 2, 4]);
293            }
294            _ => panic!("Expected List"),
295        }
296
297        // Test range with spaces
298        match PageRange::parse(" 2 - 5 ").unwrap() {
299            PageRange::Range(start, end) => {
300                assert_eq!(start, 1);
301                assert_eq!(end, 4);
302            }
303            _ => panic!("Expected Range"),
304        }
305    }
306
307    #[test]
308    fn test_page_range_equal_start_end() {
309        // Test range where start == end (should work)
310        match PageRange::parse("5-5").unwrap() {
311            PageRange::Range(start, end) => {
312                assert_eq!(start, 4);
313                assert_eq!(end, 4);
314            }
315            _ => panic!("Expected Range"),
316        }
317
318        // Verify get_indices works correctly
319        let range = PageRange::Range(4, 4);
320        assert_eq!(range.get_indices(10).unwrap(), vec![4]);
321    }
322
323    #[test]
324    fn test_page_range_list_out_of_bounds() {
325        // Test List variant with out of bounds indices (line 186-190)
326        let pages = PageRange::List(vec![2, 5, 15]);
327        let result = pages.get_indices(10);
328        assert!(result.is_err());
329        if let Err(e) = result {
330            match e {
331                OperationError::PageIndexOutOfBounds(idx, total) => {
332                    assert_eq!(idx, 15);
333                    assert_eq!(total, 10);
334                }
335                _ => panic!("Expected PageIndexOutOfBounds error"),
336            }
337        }
338    }
339
340    #[test]
341    fn test_page_range_empty_document() {
342        // Test get_indices with 0 total pages
343        let total = 0;
344
345        // All should return empty vector
346        assert_eq!(
347            PageRange::All.get_indices(total).unwrap(),
348            Vec::<usize>::new()
349        );
350
351        // Single should fail
352        assert!(PageRange::Single(0).get_indices(total).is_err());
353
354        // Range should fail
355        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
356
357        // Empty list should work
358        assert_eq!(
359            PageRange::List(vec![]).get_indices(total).unwrap(),
360            Vec::<usize>::new()
361        );
362    }
363
364    #[test]
365    fn test_page_range_additional_invalid_formats() {
366        // Test various invalid formats (line 160-162)
367        assert!(PageRange::parse("1-2-3").is_err()); // Multiple dashes
368        assert!(PageRange::parse("abc").is_err()); // Non-numeric
369        assert!(PageRange::parse("1.5").is_err()); // Decimal
370        assert!(PageRange::parse("-5").is_err()); // Negative without start
371        assert!(PageRange::parse("1-").is_err()); // Missing end
372        assert!(PageRange::parse("-").is_err()); // Only dash
373    }
374
375    #[test]
376    fn test_module_exports() {
377        // Verify that all operation types are exported correctly
378        // This test just ensures the module structure is correct
379
380        // We can create these types through their modules
381        use super::extract_images::ExtractImagesOptions;
382        use super::merge::MergeOptions;
383        use super::page_analysis::{AnalysisOptions, PageType};
384        use super::page_extraction::PageExtractionOptions;
385        use super::rotate::{RotateOptions, RotationAngle};
386        use super::split::{SplitMode, SplitOptions};
387
388        // Just verify we can access these types
389        let _extract: ExtractImagesOptions;
390        let _merge: MergeOptions;
391        let _analysis: AnalysisOptions;
392        let _extraction: PageExtractionOptions;
393        let _rotate: RotateOptions;
394        let _split: SplitOptions;
395        let _angle: RotationAngle;
396        let _page_type: PageType;
397        let _mode: SplitMode;
398    }
399
400    #[test]
401    fn test_operation_error_variants() {
402        let errors = vec![
403            OperationError::PageIndexOutOfBounds(5, 3),
404            OperationError::InvalidPageRange("test".to_string()),
405            OperationError::NoPagesToProcess,
406            OperationError::ResourceConflict("test".to_string()),
407            OperationError::InvalidRotation(45),
408            OperationError::ParseError("test".to_string()),
409            OperationError::ProcessingError("test".to_string()),
410        ];
411
412        for error in errors {
413            let message = error.to_string();
414            assert!(!message.is_empty());
415        }
416    }
417
418    #[test]
419    fn test_page_range_edge_cases() {
420        // Test whitespace handling
421        assert!(matches!(
422            PageRange::parse("  all  ").unwrap(),
423            PageRange::All
424        ));
425        assert!(matches!(
426            PageRange::parse(" 5 ").unwrap(),
427            PageRange::Single(4)
428        ));
429
430        // Test various list formats
431        match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
432            PageRange::List(pages) => assert_eq!(pages, vec![0, 2, 4]),
433            _ => panic!("Expected List"),
434        }
435
436        // Test range with spaces
437        match PageRange::parse(" 2 - 5 ").unwrap() {
438            PageRange::Range(start, end) => {
439                assert_eq!(start, 1);
440                assert_eq!(end, 4);
441            }
442            _ => panic!("Expected Range"),
443        }
444    }
445
446    #[test]
447    fn test_page_range_invalid_formats() {
448        // Test various invalid formats
449        assert!(PageRange::parse("").is_err());
450        assert!(PageRange::parse("abc").is_err());
451        assert!(PageRange::parse("1-").is_err());
452        assert!(PageRange::parse("-5").is_err());
453        assert!(PageRange::parse("1-2-3").is_err());
454        assert!(PageRange::parse("1,0,3").is_err());
455        assert!(PageRange::parse("0-5").is_err());
456        assert!(PageRange::parse("5-0").is_err());
457        assert!(PageRange::parse("1,,3").is_err());
458        assert!(PageRange::parse("1.5").is_err());
459    }
460
461    #[test]
462    fn test_page_range_get_indices_empty_document() {
463        let total = 0;
464
465        assert_eq!(
466            PageRange::All.get_indices(total).unwrap(),
467            vec![] as Vec<usize>
468        );
469        assert!(PageRange::Single(0).get_indices(total).is_err());
470        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
471        assert!(PageRange::List(vec![0]).get_indices(total).is_err());
472    }
473
474    #[test]
475    fn test_page_range_get_indices_single_page_document() {
476        let total = 1;
477
478        assert_eq!(PageRange::All.get_indices(total).unwrap(), vec![0]);
479        assert_eq!(PageRange::Single(0).get_indices(total).unwrap(), vec![0]);
480        assert!(PageRange::Single(1).get_indices(total).is_err());
481        assert_eq!(PageRange::Range(0, 0).get_indices(total).unwrap(), vec![0]);
482        assert!(PageRange::Range(0, 1).get_indices(total).is_err());
483    }
484
485    #[test]
486    fn test_page_range_list_duplicates() {
487        // Lists can have duplicates in our implementation
488        match PageRange::parse("1,1,2,2,3").unwrap() {
489            PageRange::List(pages) => {
490                assert_eq!(pages, vec![0, 0, 1, 1, 2]);
491            }
492            _ => panic!("Expected List"),
493        }
494    }
495
496    #[test]
497    fn test_page_range_list_unordered() {
498        // Lists don't need to be ordered
499        match PageRange::parse("5,2,8,1,3").unwrap() {
500            PageRange::List(pages) => {
501                assert_eq!(pages, vec![4, 1, 7, 0, 2]);
502            }
503            _ => panic!("Expected List"),
504        }
505    }
506
507    #[test]
508    fn test_operation_error_display() {
509        let error = OperationError::PageIndexOutOfBounds(10, 5);
510        assert_eq!(
511            error.to_string(),
512            "Page index 10 out of bounds (document has 5 pages)"
513        );
514
515        let error = OperationError::InvalidRotation(45);
516        assert_eq!(
517            error.to_string(),
518            "Invalid rotation angle: 45 (must be 0, 90, 180, or 270)"
519        );
520
521        let error = OperationError::NoPagesToProcess;
522        assert_eq!(error.to_string(), "No pages to process");
523    }
524
525    #[test]
526    fn test_page_range_large_document() {
527        let total = 1000;
528
529        // Test all pages
530        let indices = PageRange::All.get_indices(total).unwrap();
531        assert_eq!(indices.len(), 1000);
532        assert_eq!(indices[0], 0);
533        assert_eq!(indices[999], 999);
534
535        // Test large range
536        let indices = PageRange::Range(100, 200).get_indices(total).unwrap();
537        assert_eq!(indices.len(), 101);
538        assert_eq!(indices[0], 100);
539        assert_eq!(indices[100], 200);
540    }
541
542    #[test]
543    fn test_page_range_parse_case_insensitive() {
544        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
545        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
546        assert!(matches!(PageRange::parse("All").unwrap(), PageRange::All));
547        assert!(matches!(PageRange::parse("aLL").unwrap(), PageRange::All));
548    }
549
550    #[test]
551    fn test_operation_result_type() {
552        // Test that OperationResult works correctly
553        fn test_function() -> OperationResult<usize> {
554            Ok(42)
555        }
556
557        fn test_error_function() -> OperationResult<usize> {
558            Err(OperationError::NoPagesToProcess)
559        }
560
561        assert_eq!(test_function().unwrap(), 42);
562        assert!(test_error_function().is_err());
563    }
564
565    #[test]
566    fn test_page_range_boundary_values() {
567        // Test maximum safe values
568        let large_page = usize::MAX / 2;
569
570        match PageRange::parse(&large_page.to_string()).unwrap() {
571            PageRange::Single(idx) => assert_eq!(idx, large_page - 1),
572            _ => panic!("Expected Single"),
573        }
574
575        // Test with actual document
576        let indices = PageRange::Single(5).get_indices(10).unwrap();
577        assert_eq!(indices, vec![5]);
578
579        // Test range boundary
580        let indices = PageRange::Range(0, 9).get_indices(10).unwrap();
581        assert_eq!(indices.len(), 10);
582    }
583
584    #[test]
585    fn test_error_from_io() {
586        use std::io;
587
588        let io_error = io::Error::new(io::ErrorKind::NotFound, "File not found");
589        let op_error: OperationError = io_error.into();
590
591        match op_error {
592            OperationError::Io(_) => {}
593            _ => panic!("Expected Io variant"),
594        }
595    }
596
597    #[test]
598    fn test_page_range_fmt_debug() {
599        // Test Debug implementation
600        let range = PageRange::All;
601        let debug_str = format!("{:?}", range);
602        assert!(debug_str.contains("All"));
603
604        let range = PageRange::Single(5);
605        let debug_str = format!("{:?}", range);
606        assert!(debug_str.contains("Single"));
607        assert!(debug_str.contains("5"));
608
609        let range = PageRange::Range(1, 10);
610        let debug_str = format!("{:?}", range);
611        assert!(debug_str.contains("Range"));
612
613        let range = PageRange::List(vec![1, 2, 3]);
614        let debug_str = format!("{:?}", range);
615        assert!(debug_str.contains("List"));
616    }
617
618    #[test]
619    fn test_page_range_clone() {
620        let original = PageRange::List(vec![1, 2, 3]);
621        let cloned = original.clone();
622
623        match (original, cloned) {
624            (PageRange::List(orig), PageRange::List(clone)) => {
625                assert_eq!(orig, clone);
626            }
627            _ => panic!("Clone failed"),
628        }
629    }
630}