1use std::fmt;
9
10#[derive(Debug, Clone, PartialEq)]
15pub enum PdfError {
16 ParseError(String),
18 IoError(String),
20 FontError(String),
22 InterpreterError(String),
24 ResourceLimitExceeded(String),
26 Other(String),
28}
29
30impl fmt::Display for PdfError {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 match self {
33 PdfError::ParseError(msg) => write!(f, "parse error: {msg}"),
34 PdfError::IoError(msg) => write!(f, "I/O error: {msg}"),
35 PdfError::FontError(msg) => write!(f, "font error: {msg}"),
36 PdfError::InterpreterError(msg) => write!(f, "interpreter error: {msg}"),
37 PdfError::ResourceLimitExceeded(msg) => write!(f, "resource limit exceeded: {msg}"),
38 PdfError::Other(msg) => write!(f, "{msg}"),
39 }
40 }
41}
42
43impl std::error::Error for PdfError {}
44
45impl From<std::io::Error> for PdfError {
46 fn from(err: std::io::Error) -> Self {
47 PdfError::IoError(err.to_string())
48 }
49}
50
51#[derive(Debug, Clone, PartialEq)]
58pub struct ExtractWarning {
59 pub description: String,
61 pub page: Option<usize>,
63 pub element: Option<String>,
65 pub operator_index: Option<usize>,
67 pub font_name: Option<String>,
69}
70
71impl ExtractWarning {
72 pub fn new(description: impl Into<String>) -> Self {
74 Self {
75 description: description.into(),
76 page: None,
77 element: None,
78 operator_index: None,
79 font_name: None,
80 }
81 }
82
83 pub fn on_page(description: impl Into<String>, page: usize) -> Self {
85 Self {
86 description: description.into(),
87 page: Some(page),
88 element: None,
89 operator_index: None,
90 font_name: None,
91 }
92 }
93
94 pub fn with_context(
96 description: impl Into<String>,
97 page: usize,
98 element: impl Into<String>,
99 ) -> Self {
100 Self {
101 description: description.into(),
102 page: Some(page),
103 element: Some(element.into()),
104 operator_index: None,
105 font_name: None,
106 }
107 }
108
109 pub fn with_operator_context(
114 description: impl Into<String>,
115 operator_index: usize,
116 font_name: impl Into<String>,
117 ) -> Self {
118 Self {
119 description: description.into(),
120 page: None,
121 element: None,
122 operator_index: Some(operator_index),
123 font_name: Some(font_name.into()),
124 }
125 }
126}
127
128impl fmt::Display for ExtractWarning {
129 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130 write!(f, "{}", self.description)?;
131 if let Some(page) = self.page {
132 write!(f, " (page {page})")?;
133 }
134 if let Some(ref font_name) = self.font_name {
135 write!(f, " [font {font_name}]")?;
136 }
137 if let Some(index) = self.operator_index {
138 write!(f, " [operator #{index}]")?;
139 }
140 if let Some(ref element) = self.element {
141 write!(f, " [{element}]")?;
142 }
143 Ok(())
144 }
145}
146
147#[derive(Debug, Clone)]
151pub struct ExtractResult<T> {
152 pub value: T,
154 pub warnings: Vec<ExtractWarning>,
156}
157
158impl<T> ExtractResult<T> {
159 pub fn ok(value: T) -> Self {
161 Self {
162 value,
163 warnings: Vec::new(),
164 }
165 }
166
167 pub fn with_warnings(value: T, warnings: Vec<ExtractWarning>) -> Self {
169 Self { value, warnings }
170 }
171
172 pub fn is_clean(&self) -> bool {
174 self.warnings.is_empty()
175 }
176
177 pub fn map<U>(self, f: impl FnOnce(T) -> U) -> ExtractResult<U> {
179 ExtractResult {
180 value: f(self.value),
181 warnings: self.warnings,
182 }
183 }
184}
185
186#[derive(Debug, Clone)]
191pub struct ExtractOptions {
192 pub max_recursion_depth: usize,
194 pub max_objects_per_page: usize,
196 pub max_stream_bytes: usize,
198 pub collect_warnings: bool,
200}
201
202impl Default for ExtractOptions {
203 fn default() -> Self {
204 Self {
205 max_recursion_depth: 10,
206 max_objects_per_page: 100_000,
207 max_stream_bytes: 100 * 1024 * 1024,
208 collect_warnings: true,
209 }
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
220 fn pdf_error_parse_error_creation() {
221 let err = PdfError::ParseError("invalid xref".to_string());
222 assert_eq!(err.to_string(), "parse error: invalid xref");
223 }
224
225 #[test]
226 fn pdf_error_io_error_creation() {
227 let err = PdfError::IoError("file not found".to_string());
228 assert_eq!(err.to_string(), "I/O error: file not found");
229 }
230
231 #[test]
232 fn pdf_error_font_error_creation() {
233 let err = PdfError::FontError("missing glyph widths".to_string());
234 assert_eq!(err.to_string(), "font error: missing glyph widths");
235 }
236
237 #[test]
238 fn pdf_error_interpreter_error_creation() {
239 let err = PdfError::InterpreterError("unknown operator".to_string());
240 assert_eq!(err.to_string(), "interpreter error: unknown operator");
241 }
242
243 #[test]
244 fn pdf_error_resource_limit_exceeded() {
245 let err = PdfError::ResourceLimitExceeded("too many objects".to_string());
246 assert_eq!(err.to_string(), "resource limit exceeded: too many objects");
247 }
248
249 #[test]
250 fn pdf_error_other() {
251 let err = PdfError::Other("something went wrong".to_string());
252 assert_eq!(err.to_string(), "something went wrong");
253 }
254
255 #[test]
256 fn pdf_error_implements_std_error() {
257 let err: Box<dyn std::error::Error> = Box::new(PdfError::ParseError("test".to_string()));
258 assert_eq!(err.to_string(), "parse error: test");
259 }
260
261 #[test]
262 fn pdf_error_clone_and_eq() {
263 let err1 = PdfError::ParseError("test".to_string());
264 let err2 = err1.clone();
265 assert_eq!(err1, err2);
266 }
267
268 #[test]
269 fn pdf_error_from_io_error() {
270 let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "missing file");
271 let pdf_err: PdfError = io_err.into();
272 assert!(matches!(pdf_err, PdfError::IoError(_)));
273 assert!(pdf_err.to_string().contains("missing file"));
274 }
275
276 #[test]
279 fn warning_new_with_description_only() {
280 let w = ExtractWarning::new("missing font metrics");
281 assert_eq!(w.description, "missing font metrics");
282 assert_eq!(w.page, None);
283 assert_eq!(w.element, None);
284 assert_eq!(w.operator_index, None);
285 assert_eq!(w.font_name, None);
286 assert_eq!(w.to_string(), "missing font metrics");
287 }
288
289 #[test]
290 fn warning_on_page() {
291 let w = ExtractWarning::on_page("unknown operator", 3);
292 assert_eq!(w.description, "unknown operator");
293 assert_eq!(w.page, Some(3));
294 assert_eq!(w.element, None);
295 assert_eq!(w.operator_index, None);
296 assert_eq!(w.font_name, None);
297 assert_eq!(w.to_string(), "unknown operator (page 3)");
298 }
299
300 #[test]
301 fn warning_with_full_context() {
302 let w = ExtractWarning::with_context("missing width", 1, "char at offset 42");
303 assert_eq!(w.description, "missing width");
304 assert_eq!(w.page, Some(1));
305 assert_eq!(w.element, Some("char at offset 42".to_string()));
306 assert_eq!(w.operator_index, None);
307 assert_eq!(w.font_name, None);
308 assert_eq!(w.to_string(), "missing width (page 1) [char at offset 42]");
309 }
310
311 #[test]
312 fn warning_with_operator_context() {
313 let w =
314 ExtractWarning::with_operator_context("font not found in resources", 5, "Helvetica");
315 assert_eq!(w.description, "font not found in resources");
316 assert_eq!(w.page, None);
317 assert_eq!(w.element, None);
318 assert_eq!(w.operator_index, Some(5));
319 assert_eq!(w.font_name, Some("Helvetica".to_string()));
320 assert_eq!(
321 w.to_string(),
322 "font not found in resources [font Helvetica] [operator #5]"
323 );
324 }
325
326 #[test]
327 fn warning_display_with_all_fields() {
328 let w = ExtractWarning {
329 description: "test warning".to_string(),
330 page: Some(2),
331 element: Some("extra context".to_string()),
332 operator_index: Some(10),
333 font_name: Some("Arial".to_string()),
334 };
335 assert_eq!(
336 w.to_string(),
337 "test warning (page 2) [font Arial] [operator #10] [extra context]"
338 );
339 }
340
341 #[test]
342 fn warning_clone_and_eq() {
343 let w1 = ExtractWarning::on_page("test warning", 0);
344 let w2 = w1.clone();
345 assert_eq!(w1, w2);
346 }
347
348 #[test]
349 fn warning_with_operator_context_clone_and_eq() {
350 let w1 = ExtractWarning::with_operator_context("test", 3, "Times");
351 let w2 = w1.clone();
352 assert_eq!(w1, w2);
353 }
354
355 #[test]
358 fn extract_result_ok_no_warnings() {
359 let result = ExtractResult::ok(42);
360 assert_eq!(result.value, 42);
361 assert!(result.warnings.is_empty());
362 assert!(result.is_clean());
363 }
364
365 #[test]
366 fn extract_result_with_warnings() {
367 let warnings = vec![
368 ExtractWarning::new("warn 1"),
369 ExtractWarning::on_page("warn 2", 0),
370 ];
371 let result = ExtractResult::with_warnings("hello", warnings);
372 assert_eq!(result.value, "hello");
373 assert_eq!(result.warnings.len(), 2);
374 assert!(!result.is_clean());
375 }
376
377 #[test]
378 fn extract_result_map_preserves_warnings() {
379 let warnings = vec![ExtractWarning::new("test")];
380 let result = ExtractResult::with_warnings(10, warnings);
381 let mapped = result.map(|v| v * 2);
382 assert_eq!(mapped.value, 20);
383 assert_eq!(mapped.warnings.len(), 1);
384 assert_eq!(mapped.warnings[0].description, "test");
385 }
386
387 #[test]
388 fn extract_result_collect_multiple_warnings() {
389 let mut result = ExtractResult::ok(Vec::<String>::new());
390 result.warnings.push(ExtractWarning::new("first"));
391 result.warnings.push(ExtractWarning::on_page("second", 1));
392 result
393 .warnings
394 .push(ExtractWarning::with_context("third", 2, "char 'A'"));
395 assert_eq!(result.warnings.len(), 3);
396 }
397
398 #[test]
401 fn extract_options_default_values() {
402 let opts = ExtractOptions::default();
403 assert_eq!(opts.max_recursion_depth, 10);
404 assert_eq!(opts.max_objects_per_page, 100_000);
405 assert_eq!(opts.max_stream_bytes, 100 * 1024 * 1024);
406 assert!(opts.collect_warnings);
407 }
408
409 #[test]
410 fn extract_options_custom_values() {
411 let opts = ExtractOptions {
412 max_recursion_depth: 5,
413 max_objects_per_page: 50_000,
414 max_stream_bytes: 10 * 1024 * 1024,
415 collect_warnings: false,
416 };
417 assert_eq!(opts.max_recursion_depth, 5);
418 assert_eq!(opts.max_objects_per_page, 50_000);
419 assert_eq!(opts.max_stream_bytes, 10 * 1024 * 1024);
420 assert!(!opts.collect_warnings);
421 }
422
423 #[test]
424 fn extract_options_clone() {
425 let opts1 = ExtractOptions::default();
426 let opts2 = opts1.clone();
427 assert_eq!(opts2.max_recursion_depth, opts1.max_recursion_depth);
428 assert_eq!(opts2.collect_warnings, opts1.collect_warnings);
429 }
430}