1use std::fmt;
9
10use crate::unicode_norm::UnicodeNorm;
11
12#[derive(Debug, Clone, PartialEq)]
17pub enum PdfError {
18 ParseError(String),
20 IoError(String),
22 FontError(String),
24 InterpreterError(String),
26 ResourceLimitExceeded(String),
28 PasswordRequired,
30 InvalidPassword,
32 Other(String),
34}
35
36impl fmt::Display for PdfError {
37 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38 match self {
39 PdfError::ParseError(msg) => write!(f, "parse error: {msg}"),
40 PdfError::IoError(msg) => write!(f, "I/O error: {msg}"),
41 PdfError::FontError(msg) => write!(f, "font error: {msg}"),
42 PdfError::InterpreterError(msg) => write!(f, "interpreter error: {msg}"),
43 PdfError::ResourceLimitExceeded(msg) => write!(f, "resource limit exceeded: {msg}"),
44 PdfError::PasswordRequired => write!(f, "PDF is encrypted and requires a password"),
45 PdfError::InvalidPassword => write!(f, "the supplied password is incorrect"),
46 PdfError::Other(msg) => write!(f, "{msg}"),
47 }
48 }
49}
50
51impl std::error::Error for PdfError {}
52
53impl From<std::io::Error> for PdfError {
54 fn from(err: std::io::Error) -> Self {
55 PdfError::IoError(err.to_string())
56 }
57}
58
59#[derive(Debug, Clone, PartialEq)]
66pub struct ExtractWarning {
67 pub description: String,
69 pub page: Option<usize>,
71 pub element: Option<String>,
73 pub operator_index: Option<usize>,
75 pub font_name: Option<String>,
77}
78
79impl ExtractWarning {
80 pub fn new(description: impl Into<String>) -> Self {
82 Self {
83 description: description.into(),
84 page: None,
85 element: None,
86 operator_index: None,
87 font_name: None,
88 }
89 }
90
91 pub fn on_page(description: impl Into<String>, page: usize) -> Self {
93 Self {
94 description: description.into(),
95 page: Some(page),
96 element: None,
97 operator_index: None,
98 font_name: None,
99 }
100 }
101
102 pub fn with_context(
104 description: impl Into<String>,
105 page: usize,
106 element: impl Into<String>,
107 ) -> Self {
108 Self {
109 description: description.into(),
110 page: Some(page),
111 element: Some(element.into()),
112 operator_index: None,
113 font_name: None,
114 }
115 }
116
117 pub fn with_operator_context(
122 description: impl Into<String>,
123 operator_index: usize,
124 font_name: impl Into<String>,
125 ) -> Self {
126 Self {
127 description: description.into(),
128 page: None,
129 element: None,
130 operator_index: Some(operator_index),
131 font_name: Some(font_name.into()),
132 }
133 }
134}
135
136impl fmt::Display for ExtractWarning {
137 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138 write!(f, "{}", self.description)?;
139 if let Some(page) = self.page {
140 write!(f, " (page {page})")?;
141 }
142 if let Some(ref font_name) = self.font_name {
143 write!(f, " [font {font_name}]")?;
144 }
145 if let Some(index) = self.operator_index {
146 write!(f, " [operator #{index}]")?;
147 }
148 if let Some(ref element) = self.element {
149 write!(f, " [{element}]")?;
150 }
151 Ok(())
152 }
153}
154
155#[derive(Debug, Clone)]
159pub struct ExtractResult<T> {
160 pub value: T,
162 pub warnings: Vec<ExtractWarning>,
164}
165
166impl<T> ExtractResult<T> {
167 pub fn ok(value: T) -> Self {
169 Self {
170 value,
171 warnings: Vec::new(),
172 }
173 }
174
175 pub fn with_warnings(value: T, warnings: Vec<ExtractWarning>) -> Self {
177 Self { value, warnings }
178 }
179
180 pub fn is_clean(&self) -> bool {
182 self.warnings.is_empty()
183 }
184
185 pub fn map<U>(self, f: impl FnOnce(T) -> U) -> ExtractResult<U> {
187 ExtractResult {
188 value: f(self.value),
189 warnings: self.warnings,
190 }
191 }
192}
193
194#[derive(Debug, Clone)]
199pub struct ExtractOptions {
200 pub max_recursion_depth: usize,
202 pub max_objects_per_page: usize,
204 pub max_stream_bytes: usize,
206 pub collect_warnings: bool,
208 pub unicode_norm: UnicodeNorm,
210}
211
212impl Default for ExtractOptions {
213 fn default() -> Self {
214 Self {
215 max_recursion_depth: 10,
216 max_objects_per_page: 100_000,
217 max_stream_bytes: 100 * 1024 * 1024,
218 collect_warnings: true,
219 unicode_norm: UnicodeNorm::None,
220 }
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use crate::unicode_norm::UnicodeNorm;
228
229 #[test]
232 fn pdf_error_parse_error_creation() {
233 let err = PdfError::ParseError("invalid xref".to_string());
234 assert_eq!(err.to_string(), "parse error: invalid xref");
235 }
236
237 #[test]
238 fn pdf_error_io_error_creation() {
239 let err = PdfError::IoError("file not found".to_string());
240 assert_eq!(err.to_string(), "I/O error: file not found");
241 }
242
243 #[test]
244 fn pdf_error_font_error_creation() {
245 let err = PdfError::FontError("missing glyph widths".to_string());
246 assert_eq!(err.to_string(), "font error: missing glyph widths");
247 }
248
249 #[test]
250 fn pdf_error_interpreter_error_creation() {
251 let err = PdfError::InterpreterError("unknown operator".to_string());
252 assert_eq!(err.to_string(), "interpreter error: unknown operator");
253 }
254
255 #[test]
256 fn pdf_error_resource_limit_exceeded() {
257 let err = PdfError::ResourceLimitExceeded("too many objects".to_string());
258 assert_eq!(err.to_string(), "resource limit exceeded: too many objects");
259 }
260
261 #[test]
262 fn pdf_error_password_required() {
263 let err = PdfError::PasswordRequired;
264 assert_eq!(err.to_string(), "PDF is encrypted and requires a password");
265 }
266
267 #[test]
268 fn pdf_error_invalid_password() {
269 let err = PdfError::InvalidPassword;
270 assert_eq!(err.to_string(), "the supplied password is incorrect");
271 }
272
273 #[test]
274 fn pdf_error_password_required_clone_and_eq() {
275 let err1 = PdfError::PasswordRequired;
276 let err2 = err1.clone();
277 assert_eq!(err1, err2);
278 }
279
280 #[test]
281 fn pdf_error_invalid_password_clone_and_eq() {
282 let err1 = PdfError::InvalidPassword;
283 let err2 = err1.clone();
284 assert_eq!(err1, err2);
285 }
286
287 #[test]
288 fn pdf_error_other() {
289 let err = PdfError::Other("something went wrong".to_string());
290 assert_eq!(err.to_string(), "something went wrong");
291 }
292
293 #[test]
294 fn pdf_error_implements_std_error() {
295 let err: Box<dyn std::error::Error> = Box::new(PdfError::ParseError("test".to_string()));
296 assert_eq!(err.to_string(), "parse error: test");
297 }
298
299 #[test]
300 fn pdf_error_clone_and_eq() {
301 let err1 = PdfError::ParseError("test".to_string());
302 let err2 = err1.clone();
303 assert_eq!(err1, err2);
304 }
305
306 #[test]
307 fn pdf_error_from_io_error() {
308 let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "missing file");
309 let pdf_err: PdfError = io_err.into();
310 assert!(matches!(pdf_err, PdfError::IoError(_)));
311 assert!(pdf_err.to_string().contains("missing file"));
312 }
313
314 #[test]
317 fn warning_new_with_description_only() {
318 let w = ExtractWarning::new("missing font metrics");
319 assert_eq!(w.description, "missing font metrics");
320 assert_eq!(w.page, None);
321 assert_eq!(w.element, None);
322 assert_eq!(w.operator_index, None);
323 assert_eq!(w.font_name, None);
324 assert_eq!(w.to_string(), "missing font metrics");
325 }
326
327 #[test]
328 fn warning_on_page() {
329 let w = ExtractWarning::on_page("unknown operator", 3);
330 assert_eq!(w.description, "unknown operator");
331 assert_eq!(w.page, Some(3));
332 assert_eq!(w.element, None);
333 assert_eq!(w.operator_index, None);
334 assert_eq!(w.font_name, None);
335 assert_eq!(w.to_string(), "unknown operator (page 3)");
336 }
337
338 #[test]
339 fn warning_with_full_context() {
340 let w = ExtractWarning::with_context("missing width", 1, "char at offset 42");
341 assert_eq!(w.description, "missing width");
342 assert_eq!(w.page, Some(1));
343 assert_eq!(w.element, Some("char at offset 42".to_string()));
344 assert_eq!(w.operator_index, None);
345 assert_eq!(w.font_name, None);
346 assert_eq!(w.to_string(), "missing width (page 1) [char at offset 42]");
347 }
348
349 #[test]
350 fn warning_with_operator_context() {
351 let w =
352 ExtractWarning::with_operator_context("font not found in resources", 5, "Helvetica");
353 assert_eq!(w.description, "font not found in resources");
354 assert_eq!(w.page, None);
355 assert_eq!(w.element, None);
356 assert_eq!(w.operator_index, Some(5));
357 assert_eq!(w.font_name, Some("Helvetica".to_string()));
358 assert_eq!(
359 w.to_string(),
360 "font not found in resources [font Helvetica] [operator #5]"
361 );
362 }
363
364 #[test]
365 fn warning_display_with_all_fields() {
366 let w = ExtractWarning {
367 description: "test warning".to_string(),
368 page: Some(2),
369 element: Some("extra context".to_string()),
370 operator_index: Some(10),
371 font_name: Some("Arial".to_string()),
372 };
373 assert_eq!(
374 w.to_string(),
375 "test warning (page 2) [font Arial] [operator #10] [extra context]"
376 );
377 }
378
379 #[test]
380 fn warning_clone_and_eq() {
381 let w1 = ExtractWarning::on_page("test warning", 0);
382 let w2 = w1.clone();
383 assert_eq!(w1, w2);
384 }
385
386 #[test]
387 fn warning_with_operator_context_clone_and_eq() {
388 let w1 = ExtractWarning::with_operator_context("test", 3, "Times");
389 let w2 = w1.clone();
390 assert_eq!(w1, w2);
391 }
392
393 #[test]
396 fn extract_result_ok_no_warnings() {
397 let result = ExtractResult::ok(42);
398 assert_eq!(result.value, 42);
399 assert!(result.warnings.is_empty());
400 assert!(result.is_clean());
401 }
402
403 #[test]
404 fn extract_result_with_warnings() {
405 let warnings = vec![
406 ExtractWarning::new("warn 1"),
407 ExtractWarning::on_page("warn 2", 0),
408 ];
409 let result = ExtractResult::with_warnings("hello", warnings);
410 assert_eq!(result.value, "hello");
411 assert_eq!(result.warnings.len(), 2);
412 assert!(!result.is_clean());
413 }
414
415 #[test]
416 fn extract_result_map_preserves_warnings() {
417 let warnings = vec![ExtractWarning::new("test")];
418 let result = ExtractResult::with_warnings(10, warnings);
419 let mapped = result.map(|v| v * 2);
420 assert_eq!(mapped.value, 20);
421 assert_eq!(mapped.warnings.len(), 1);
422 assert_eq!(mapped.warnings[0].description, "test");
423 }
424
425 #[test]
426 fn extract_result_collect_multiple_warnings() {
427 let mut result = ExtractResult::ok(Vec::<String>::new());
428 result.warnings.push(ExtractWarning::new("first"));
429 result.warnings.push(ExtractWarning::on_page("second", 1));
430 result
431 .warnings
432 .push(ExtractWarning::with_context("third", 2, "char 'A'"));
433 assert_eq!(result.warnings.len(), 3);
434 }
435
436 #[test]
439 fn extract_options_default_values() {
440 let opts = ExtractOptions::default();
441 assert_eq!(opts.max_recursion_depth, 10);
442 assert_eq!(opts.max_objects_per_page, 100_000);
443 assert_eq!(opts.max_stream_bytes, 100 * 1024 * 1024);
444 assert!(opts.collect_warnings);
445 assert_eq!(opts.unicode_norm, UnicodeNorm::None);
446 }
447
448 #[test]
449 fn extract_options_custom_values() {
450 let opts = ExtractOptions {
451 max_recursion_depth: 5,
452 max_objects_per_page: 50_000,
453 max_stream_bytes: 10 * 1024 * 1024,
454 collect_warnings: false,
455 unicode_norm: UnicodeNorm::None,
456 };
457 assert_eq!(opts.max_recursion_depth, 5);
458 assert_eq!(opts.max_objects_per_page, 50_000);
459 assert_eq!(opts.max_stream_bytes, 10 * 1024 * 1024);
460 assert!(!opts.collect_warnings);
461 }
462
463 #[test]
464 fn extract_options_clone() {
465 let opts1 = ExtractOptions::default();
466 let opts2 = opts1.clone();
467 assert_eq!(opts2.max_recursion_depth, opts1.max_recursion_depth);
468 assert_eq!(opts2.collect_warnings, opts1.collect_warnings);
469 }
470}