1pub mod chunk_page_mapper;
7pub mod extract_images;
8pub mod merge;
9pub mod overlay;
10pub mod page_analysis;
11pub mod page_extraction;
12pub mod pdf_ocr_converter;
13pub mod reorder;
14pub mod rotate;
15pub mod semantic_redactor;
16pub mod source_highlighter;
17pub mod split;
18
19pub use chunk_page_mapper::ChunkPageMapper;
20pub use extract_images::{
21 extract_images_from_pages, extract_images_from_pdf, ExtractImagesOptions, ExtractedImage,
22 ImageExtractor,
23};
24pub use merge::{merge_pdf_files, merge_pdfs, MergeInput, MergeOptions, PdfMerger};
25pub use overlay::{overlay_pdf, OverlayOptions, OverlayPosition, PdfOverlay};
26pub use page_analysis::{AnalysisOptions, ContentAnalysis, PageContentAnalyzer, PageType};
27pub use page_extraction::{
28 extract_page, extract_page_range, extract_page_range_to_file, extract_page_to_file,
29 extract_pages, extract_pages_to_file, PageExtractionOptions, PageExtractor,
30};
31pub use pdf_ocr_converter::{ConversionOptions, ConversionResult, PdfOcrConverter};
32pub use reorder::{
33 move_pdf_page, reorder_pdf_pages, reverse_pdf_pages, swap_pdf_pages, PageReorderer,
34 ReorderOptions,
35};
36pub use rotate::{rotate_all_pages, rotate_pdf_pages, PageRotator, RotateOptions, RotationAngle};
37pub use semantic_redactor::{
38 RedactionConfig, RedactionEntry, RedactionReport, RedactionStyle, SemanticRedactor,
39 SemanticRedactorError, SemanticRedactorResult,
40};
41pub use source_highlighter::{
42 fragment_to_highlight_rect, HighlightStyle, IndexedFragment, SourceHighlighter,
43 SourceHighlighterError, SourceHighlighterResult, TextPositionIndex,
44};
45pub use split::{split_into_pages, split_pdf, PdfSplitter, SplitMode, SplitOptions};
46
47use crate::error::PdfError;
48
49pub type OperationResult<T> = Result<T, OperationError>;
51
52#[derive(Debug, thiserror::Error)]
54pub enum OperationError {
55 #[error("Page index {0} out of bounds (document has {1} pages)")]
57 PageIndexOutOfBounds(usize, usize),
58
59 #[error("Invalid page range: {0}")]
61 InvalidPageRange(String),
62
63 #[error("No pages to process")]
65 NoPagesToProcess,
66
67 #[error("Resource conflict: {0}")]
69 ResourceConflict(String),
70
71 #[error("Invalid rotation angle: {0} (must be 0, 90, 180, or 270)")]
73 InvalidRotation(i32),
74
75 #[error("Parse error: {0}")]
77 ParseError(String),
78
79 #[error("Invalid file path: {reason}")]
81 InvalidPath { reason: String },
82
83 #[error("IO error: {0}")]
85 Io(#[from] std::io::Error),
86
87 #[error("PDF error: {0}")]
89 PdfError(#[from] PdfError),
90
91 #[error("Processing error: {0}")]
93 ProcessingError(String),
94}
95
96#[derive(Debug, Clone)]
98pub enum PageRange {
99 All,
101 Single(usize),
103 Range(usize, usize),
105 List(Vec<usize>),
107}
108
109impl PageRange {
110 pub fn parse(s: &str) -> Result<Self, OperationError> {
118 let s = s.trim();
119
120 if s.eq_ignore_ascii_case("all") {
121 return Ok(PageRange::All);
122 }
123
124 if let Ok(page) = s.parse::<usize>() {
126 if page == 0 {
127 return Err(OperationError::InvalidPageRange(
128 "Page numbers start at 1".to_string(),
129 ));
130 }
131 return Ok(PageRange::Single(page - 1));
132 }
133
134 if let Some((start, end)) = s.split_once('-') {
136 let start = start
137 .trim()
138 .parse::<usize>()
139 .map_err(|_| OperationError::InvalidPageRange(format!("Invalid start: {start}")))?;
140 let end = end
141 .trim()
142 .parse::<usize>()
143 .map_err(|_| OperationError::InvalidPageRange(format!("Invalid end: {end}")))?;
144
145 if start == 0 || end == 0 {
146 return Err(OperationError::InvalidPageRange(
147 "Page numbers start at 1".to_string(),
148 ));
149 }
150
151 if start > end {
152 return Err(OperationError::InvalidPageRange(format!(
153 "Start {start} is greater than end {end}"
154 )));
155 }
156
157 return Ok(PageRange::Range(start - 1, end - 1));
158 }
159
160 if s.contains(',') {
162 let pages: Result<Vec<usize>, _> = s
163 .split(',')
164 .map(|p| {
165 let page = p.trim().parse::<usize>().map_err(|_| {
166 OperationError::InvalidPageRange(format!("Invalid page: {p}"))
167 })?;
168 if page == 0 {
169 return Err(OperationError::InvalidPageRange(
170 "Page numbers start at 1".to_string(),
171 ));
172 }
173 Ok(page - 1)
174 })
175 .collect();
176
177 return Ok(PageRange::List(pages?));
178 }
179
180 Err(OperationError::InvalidPageRange(format!(
181 "Invalid format: {s}"
182 )))
183 }
184
185 pub fn get_indices(&self, total_pages: usize) -> Result<Vec<usize>, OperationError> {
187 match self {
188 PageRange::All => Ok((0..total_pages).collect()),
189 PageRange::Single(idx) => {
190 if *idx >= total_pages {
191 Err(OperationError::PageIndexOutOfBounds(*idx, total_pages))
192 } else {
193 Ok(vec![*idx])
194 }
195 }
196 PageRange::Range(start, end) => {
197 if *start >= total_pages {
198 Err(OperationError::PageIndexOutOfBounds(*start, total_pages))
199 } else if *end >= total_pages {
200 Err(OperationError::PageIndexOutOfBounds(*end, total_pages))
201 } else {
202 Ok((*start..=*end).collect())
203 }
204 }
205 PageRange::List(pages) => {
206 for &page in pages {
207 if page >= total_pages {
208 return Err(OperationError::PageIndexOutOfBounds(page, total_pages));
209 }
210 }
211 Ok(pages.clone())
212 }
213 }
214 }
215}
216
217#[cfg(test)]
218mod error_tests;
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn test_page_range_parsing() {
226 assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
227 assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
228
229 match PageRange::parse("5").unwrap() {
230 PageRange::Single(idx) => assert_eq!(idx, 4),
231 _ => panic!("Expected Single"),
232 }
233
234 match PageRange::parse("2-5").unwrap() {
235 PageRange::Range(start, end) => {
236 assert_eq!(start, 1);
237 assert_eq!(end, 4);
238 }
239 _ => panic!("Expected Range"),
240 }
241
242 match PageRange::parse("1,3,5,7").unwrap() {
243 PageRange::List(pages) => {
244 assert_eq!(pages, vec![0, 2, 4, 6]);
245 }
246 _ => panic!("Expected List"),
247 }
248
249 assert!(PageRange::parse("0").is_err());
250 assert!(PageRange::parse("5-2").is_err());
251 assert!(PageRange::parse("invalid").is_err());
252 }
253
254 #[test]
255 fn test_page_range_indices() {
256 let total = 10;
257
258 assert_eq!(
259 PageRange::All.get_indices(total).unwrap(),
260 vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
261 );
262
263 assert_eq!(PageRange::Single(5).get_indices(total).unwrap(), vec![5]);
264
265 assert_eq!(
266 PageRange::Range(2, 5).get_indices(total).unwrap(),
267 vec![2, 3, 4, 5]
268 );
269
270 assert_eq!(
271 PageRange::List(vec![1, 3, 5]).get_indices(total).unwrap(),
272 vec![1, 3, 5]
273 );
274
275 assert!(PageRange::Single(10).get_indices(total).is_err());
276 assert!(PageRange::Range(8, 15).get_indices(total).is_err());
277 }
278
279 #[test]
280 fn test_page_range_empty_list() {
281 let result = PageRange::parse("");
283 assert!(result.is_err());
284
285 let result2 = PageRange::parse(",,");
287 assert!(result2.is_err());
288 }
289
290 #[test]
291 fn test_page_range_list_with_zero() {
292 let result = PageRange::parse("1,0,3");
294 assert!(result.is_err());
295 if let Err(e) = result {
296 match e {
297 OperationError::InvalidPageRange(msg) => {
298 assert!(msg.contains("Page numbers start at 1"));
299 }
300 _ => panic!("Expected InvalidPageRange error"),
301 }
302 }
303 }
304
305 #[test]
306 fn test_page_range_with_extra_spaces() {
307 match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
309 PageRange::List(pages) => {
310 assert_eq!(pages, vec![0, 2, 4]);
311 }
312 _ => panic!("Expected List"),
313 }
314
315 match PageRange::parse(" 2 - 5 ").unwrap() {
317 PageRange::Range(start, end) => {
318 assert_eq!(start, 1);
319 assert_eq!(end, 4);
320 }
321 _ => panic!("Expected Range"),
322 }
323 }
324
325 #[test]
326 fn test_page_range_equal_start_end() {
327 match PageRange::parse("5-5").unwrap() {
329 PageRange::Range(start, end) => {
330 assert_eq!(start, 4);
331 assert_eq!(end, 4);
332 }
333 _ => panic!("Expected Range"),
334 }
335
336 let range = PageRange::Range(4, 4);
338 assert_eq!(range.get_indices(10).unwrap(), vec![4]);
339 }
340
341 #[test]
342 fn test_page_range_list_out_of_bounds() {
343 let pages = PageRange::List(vec![2, 5, 15]);
345 let result = pages.get_indices(10);
346 assert!(result.is_err());
347 if let Err(e) = result {
348 match e {
349 OperationError::PageIndexOutOfBounds(idx, total) => {
350 assert_eq!(idx, 15);
351 assert_eq!(total, 10);
352 }
353 _ => panic!("Expected PageIndexOutOfBounds error"),
354 }
355 }
356 }
357
358 #[test]
359 fn test_page_range_empty_document() {
360 let total = 0;
362
363 assert_eq!(
365 PageRange::All.get_indices(total).unwrap(),
366 Vec::<usize>::new()
367 );
368
369 assert!(PageRange::Single(0).get_indices(total).is_err());
371
372 assert!(PageRange::Range(0, 1).get_indices(total).is_err());
374
375 assert_eq!(
377 PageRange::List(vec![]).get_indices(total).unwrap(),
378 Vec::<usize>::new()
379 );
380 }
381
382 #[test]
383 fn test_page_range_additional_invalid_formats() {
384 assert!(PageRange::parse("1-2-3").is_err()); assert!(PageRange::parse("abc").is_err()); assert!(PageRange::parse("1.5").is_err()); assert!(PageRange::parse("-5").is_err()); assert!(PageRange::parse("1-").is_err()); assert!(PageRange::parse("-").is_err()); }
392
393 #[test]
394 fn test_module_exports() {
395 use super::extract_images::ExtractImagesOptions;
400 use super::merge::MergeOptions;
401 use super::page_analysis::{AnalysisOptions, PageType};
402 use super::page_extraction::PageExtractionOptions;
403 use super::rotate::{RotateOptions, RotationAngle};
404 use super::split::{SplitMode, SplitOptions};
405
406 let _extract: ExtractImagesOptions;
408 let _merge: MergeOptions;
409 let _analysis: AnalysisOptions;
410 let _extraction: PageExtractionOptions;
411 let _rotate: RotateOptions;
412 let _split: SplitOptions;
413 let _angle: RotationAngle;
414 let _page_type: PageType;
415 let _mode: SplitMode;
416 }
417
418 #[test]
419 fn test_operation_error_variants() {
420 let errors = vec![
421 OperationError::PageIndexOutOfBounds(5, 3),
422 OperationError::InvalidPageRange("test".to_string()),
423 OperationError::NoPagesToProcess,
424 OperationError::ResourceConflict("test".to_string()),
425 OperationError::InvalidRotation(45),
426 OperationError::ParseError("test".to_string()),
427 OperationError::ProcessingError("test".to_string()),
428 ];
429
430 for error in errors {
431 let message = error.to_string();
432 assert!(!message.is_empty());
433 }
434 }
435
436 #[test]
437 fn test_page_range_edge_cases() {
438 assert!(matches!(
440 PageRange::parse(" all ").unwrap(),
441 PageRange::All
442 ));
443 assert!(matches!(
444 PageRange::parse(" 5 ").unwrap(),
445 PageRange::Single(4)
446 ));
447
448 match PageRange::parse(" 1 , 3 , 5 ").unwrap() {
450 PageRange::List(pages) => assert_eq!(pages, vec![0, 2, 4]),
451 _ => panic!("Expected List"),
452 }
453
454 match PageRange::parse(" 2 - 5 ").unwrap() {
456 PageRange::Range(start, end) => {
457 assert_eq!(start, 1);
458 assert_eq!(end, 4);
459 }
460 _ => panic!("Expected Range"),
461 }
462 }
463
464 #[test]
465 fn test_page_range_invalid_formats() {
466 assert!(PageRange::parse("").is_err());
468 assert!(PageRange::parse("abc").is_err());
469 assert!(PageRange::parse("1-").is_err());
470 assert!(PageRange::parse("-5").is_err());
471 assert!(PageRange::parse("1-2-3").is_err());
472 assert!(PageRange::parse("1,0,3").is_err());
473 assert!(PageRange::parse("0-5").is_err());
474 assert!(PageRange::parse("5-0").is_err());
475 assert!(PageRange::parse("1,,3").is_err());
476 assert!(PageRange::parse("1.5").is_err());
477 }
478
479 #[test]
480 fn test_page_range_get_indices_empty_document() {
481 let total = 0;
482
483 assert_eq!(
484 PageRange::All.get_indices(total).unwrap(),
485 vec![] as Vec<usize>
486 );
487 assert!(PageRange::Single(0).get_indices(total).is_err());
488 assert!(PageRange::Range(0, 1).get_indices(total).is_err());
489 assert!(PageRange::List(vec![0]).get_indices(total).is_err());
490 }
491
492 #[test]
493 fn test_page_range_get_indices_single_page_document() {
494 let total = 1;
495
496 assert_eq!(PageRange::All.get_indices(total).unwrap(), vec![0]);
497 assert_eq!(PageRange::Single(0).get_indices(total).unwrap(), vec![0]);
498 assert!(PageRange::Single(1).get_indices(total).is_err());
499 assert_eq!(PageRange::Range(0, 0).get_indices(total).unwrap(), vec![0]);
500 assert!(PageRange::Range(0, 1).get_indices(total).is_err());
501 }
502
503 #[test]
504 fn test_page_range_list_duplicates() {
505 match PageRange::parse("1,1,2,2,3").unwrap() {
507 PageRange::List(pages) => {
508 assert_eq!(pages, vec![0, 0, 1, 1, 2]);
509 }
510 _ => panic!("Expected List"),
511 }
512 }
513
514 #[test]
515 fn test_page_range_list_unordered() {
516 match PageRange::parse("5,2,8,1,3").unwrap() {
518 PageRange::List(pages) => {
519 assert_eq!(pages, vec![4, 1, 7, 0, 2]);
520 }
521 _ => panic!("Expected List"),
522 }
523 }
524
525 #[test]
526 fn test_operation_error_display() {
527 let error = OperationError::PageIndexOutOfBounds(10, 5);
528 assert_eq!(
529 error.to_string(),
530 "Page index 10 out of bounds (document has 5 pages)"
531 );
532
533 let error = OperationError::InvalidRotation(45);
534 assert_eq!(
535 error.to_string(),
536 "Invalid rotation angle: 45 (must be 0, 90, 180, or 270)"
537 );
538
539 let error = OperationError::NoPagesToProcess;
540 assert_eq!(error.to_string(), "No pages to process");
541 }
542
543 #[test]
544 fn test_page_range_large_document() {
545 let total = 1000;
546
547 let indices = PageRange::All.get_indices(total).unwrap();
549 assert_eq!(indices.len(), 1000);
550 assert_eq!(indices[0], 0);
551 assert_eq!(indices[999], 999);
552
553 let indices = PageRange::Range(100, 200).get_indices(total).unwrap();
555 assert_eq!(indices.len(), 101);
556 assert_eq!(indices[0], 100);
557 assert_eq!(indices[100], 200);
558 }
559
560 #[test]
561 fn test_page_range_parse_case_insensitive() {
562 assert!(matches!(PageRange::parse("all").unwrap(), PageRange::All));
563 assert!(matches!(PageRange::parse("ALL").unwrap(), PageRange::All));
564 assert!(matches!(PageRange::parse("All").unwrap(), PageRange::All));
565 assert!(matches!(PageRange::parse("aLL").unwrap(), PageRange::All));
566 }
567
568 #[test]
569 fn test_operation_result_type() {
570 fn test_function() -> OperationResult<usize> {
572 Ok(42)
573 }
574
575 fn test_error_function() -> OperationResult<usize> {
576 Err(OperationError::NoPagesToProcess)
577 }
578
579 assert_eq!(test_function().unwrap(), 42);
580 assert!(test_error_function().is_err());
581 }
582
583 #[test]
584 fn test_page_range_boundary_values() {
585 let large_page = usize::MAX / 2;
587
588 match PageRange::parse(&large_page.to_string()).unwrap() {
589 PageRange::Single(idx) => assert_eq!(idx, large_page - 1),
590 _ => panic!("Expected Single"),
591 }
592
593 let indices = PageRange::Single(5).get_indices(10).unwrap();
595 assert_eq!(indices, vec![5]);
596
597 let indices = PageRange::Range(0, 9).get_indices(10).unwrap();
599 assert_eq!(indices.len(), 10);
600 }
601
602 #[test]
603 fn test_error_from_io() {
604 use std::io;
605
606 let io_error = io::Error::new(io::ErrorKind::NotFound, "File not found");
607 let op_error: OperationError = io_error.into();
608
609 match op_error {
610 OperationError::Io(_) => {}
611 _ => panic!("Expected Io variant"),
612 }
613 }
614
615 #[test]
616 fn test_page_range_fmt_debug() {
617 let range = PageRange::All;
619 let debug_str = format!("{:?}", range);
620 assert!(debug_str.contains("All"));
621
622 let range = PageRange::Single(5);
623 let debug_str = format!("{:?}", range);
624 assert!(debug_str.contains("Single"));
625 assert!(debug_str.contains("5"));
626
627 let range = PageRange::Range(1, 10);
628 let debug_str = format!("{:?}", range);
629 assert!(debug_str.contains("Range"));
630
631 let range = PageRange::List(vec![1, 2, 3]);
632 let debug_str = format!("{:?}", range);
633 assert!(debug_str.contains("List"));
634 }
635
636 #[test]
637 fn test_page_range_clone() {
638 let original = PageRange::List(vec![1, 2, 3]);
639 let cloned = original.clone();
640
641 match (original, cloned) {
642 (PageRange::List(orig), PageRange::List(clone)) => {
643 assert_eq!(orig, clone);
644 }
645 _ => panic!("Clone failed"),
646 }
647 }
648}