oxidize_pdf/operations/
mod.rs

1//! PDF operations module
2//!
3//! This module provides high-level operations for manipulating PDF documents
4//! such as splitting, merging, rotating pages, and reordering.
5
6pub mod extract_images;
7pub mod merge;
8pub mod page_analysis;
9pub mod rotate;
10pub mod split;
11
12pub use extract_images::{
13    extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
14    ImageExtractor,
15};
16pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
17pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
18pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
19pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
20
21use crate::error::PdfError;
22
23/// Result type for operations
24pub type OperationResult<T> = Result<T, OperationError>;
25
26/// Operation-specific errors
27#[derive(Debug, thiserror::Error)]
28pub enum OperationError {
29    /// Page index out of bounds
30    #[error("Page index {0} out of bounds (document has {1} pages)")]
31    PageIndexOutOfBounds(usize, usize),
32
33    /// Invalid page range
34    #[error("Invalid page range: {0}")]
35    InvalidPageRange(String),
36
37    /// No pages to process
38    #[error("No pages to process")]
39    NoPagesToProcess,
40
41    /// Resource conflict during merge
42    #[error("Resource conflict: {0}")]
43    ResourceConflict(String),
44
45    /// Invalid rotation angle
46    #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
47    InvalidRotation(i32),
48
49    /// Parse error
50    #[error("Parse error: {0}")]
51    ParseError(String),
52
53    /// IO error
54    #[error("IO error: {0}")]
55    Io(#[from] std::io::Error),
56
57    /// Core PDF error
58    #[error("PDF error: {0}")]
59    PdfError(#[from] PdfError),
60
61    /// General processing error
62    #[error("Processing error: {0}")]
63    ProcessingError(String),
64}
65
66/// Page range specification
67#[derive(Debug, Clone)]
68pub enum PageRange {
69    /// All pages
70    All,
71    /// Single page (0-based index)
72    Single(usize),
73    /// Range of pages (inclusive, 0-based)
74    Range(usize, usize),
75    /// List of specific pages (0-based indices)
76    List(Vec<usize>),
77}
78
79impl PageRange {
80    /// Parse a page range from a string
81    ///
82    /// Examples:
83    /// - "all" -> All pages
84    /// - "1" -> Single page (converts to 0-based)
85    /// - "1-5" -> Range of pages (converts to 0-based)
86    /// - "1,3,5" -> List of pages (converts to 0-based)
87    pub fn parse(s: &str) -> Result<Self, OperationError> {
88        let s = s.trim();
89
90        if s.eq_ignore_ascii_case("all") {
91            return Ok(PageRange::All);
92        }
93
94        // Try single page
95        if let Ok(page) = s.parse::<usize>() {
96            if page == 0 {
97                return Err(OperationError::InvalidPageRange(
98                    "Page numbers start at 1".to_string(),
99                ));
100            }
101            return Ok(PageRange::Single(page - 1));
102        }
103
104        // Try range (e.g., "1-5")
105        if let Some((start, end)) = s.split_once('-') {
106            let start = start
107                .trim()
108                .parse::<usize>()
109                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
110            let end = end
111                .trim()
112                .parse::<usize>()
113                .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
114
115            if start == 0 || end == 0 {
116                return Err(OperationError::InvalidPageRange(
117                    "Page numbers start at 1".to_string(),
118                ));
119            }
120
121            if start > end {
122                return Err(OperationError::InvalidPageRange(format!(
123                    "Start {start} is greater than end {end}"
124                )));
125            }
126
127            return Ok(PageRange::Range(start - 1, end - 1));
128        }
129
130        // Try list (e.g., "1,3,5")
131        if s.contains(',') {
132            let pages: Result<Vec<usize>, _> = s
133                .split(',')
134                .map(|p| {
135                    let page = p.trim().parse::<usize>().map_err(|_| {
136                        OperationError::InvalidPageRange(format!("Invalid page: {p}"))
137                    })?;
138                    if page == 0 {
139                        return Err(OperationError::InvalidPageRange(
140                            "Page numbers start at 1".to_string(),
141                        ));
142                    }
143                    Ok(page - 1)
144                })
145                .collect();
146
147            return Ok(PageRange::List(pages?));
148        }
149
150        Err(OperationError::InvalidPageRange(format!(
151            "Invalid format: {s}"
152        )))
153    }
154
155    /// Get the page indices for this range
156    pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
157        match self {
158            PageRange::All => Ok((0..total_pages).collect()),
159            PageRange::Single(idx) => {
160                if *idx >= total_pages {
161                    Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
162                } else {
163                    Ok(vec![*idx])
164                }
165            }
166            PageRange::Range(start, end) => {
167                if *start >= total_pages {
168                    Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
169                } else if *end >= total_pages {
170                    Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
171                } else {
172                    Ok((*start..=*end).collect())
173                }
174            }
175            PageRange::List(pages) => {
176                for &page in pages {
177                    if page >= total_pages {
178                        return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
179                    }
180                }
181                Ok(pages.clone())
182            }
183        }
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_page_range_parsing() {
193        assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
194        assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
195
196        match PageRange::parse("5").unwrap() {
197            PageRange::Single(idx) => assert_eq!(idx, 4),
198            _ => panic!("Expected Single"),
199        }
200
201        match PageRange::parse("2-5").unwrap() {
202            PageRange::Range(start, end) => {
203                assert_eq!(start, 1);
204                assert_eq!(end, 4);
205            }
206            _ => panic!("Expected Range"),
207        }
208
209        match PageRange::parse("1,3,5,7").unwrap() {
210            PageRange::List(pages) => {
211                assert_eq!(pages, vec![0, 2, 4, 6]);
212            }
213            _ => panic!("Expected List"),
214        }
215
216        assert!(PageRange::parse("0").is_err());
217        assert!(PageRange::parse("5-2").is_err());
218        assert!(PageRange::parse("invalid").is_err());
219    }
220
221    #[test]
222    fn test_page_range_indices() {
223        let total = 10;
224
225        assert_eq!(
226            PageRange::All.get_indices(total).unwrap(),
227            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
228        );
229
230        assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
231
232        assert_eq!(
233            PageRange::Range(2, 5).get_indices(total).unwrap(),
234            vec![2, 3, 4, 5]
235        );
236
237        assert_eq!(
238            PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
239            vec![1, 3, 5]
240        );
241
242        assert!(PageRange::Single(10).get_indices(total).is_err());
243        assert!(PageRange::Range(8, 15).get_indices(total).is_err());
244    }
245}