Skip to main content

justpdf_core/annot/
parse.rs

1use crate::error::Result;
2use crate::object::{IndirectRef, PdfDict, PdfObject};
3use crate::page::{collect_pages, PageInfo, Rect};
4use crate::parser::PdfDocument;
5
6use super::types::*;
7
8/// Get all annotations from a specific page.
9pub fn get_annotations(doc: &PdfDocument, page: &PageInfo) -> Result<Vec<Annotation>> {
10    let page_obj = doc.resolve(&page.page_ref)?;
11    let page_dict = match page_obj.as_dict() {
12        Some(d) => d,
13        None => return Ok(Vec::new()),
14    };
15
16    let annots_arr = match page_dict.get(b"Annots") {
17        Some(PdfObject::Array(arr)) => arr.clone(),
18        Some(PdfObject::Reference(r)) => {
19            let resolved = doc.resolve(r)?;
20            match resolved.as_array() {
21                Some(arr) => arr.to_vec(),
22                None => return Ok(Vec::new()),
23            }
24        }
25        _ => return Ok(Vec::new()),
26    };
27
28    let mut annotations = Vec::new();
29    for item in &annots_arr {
30        let (annot_dict, obj_ref) = match item {
31            PdfObject::Reference(r) => {
32                let resolved = doc.resolve(r)?;
33                match resolved {
34                    PdfObject::Dict(d) => (d, Some(r.clone())),
35                    _ => continue,
36                }
37            }
38            PdfObject::Dict(d) => (d.clone(), None),
39            _ => continue,
40        };
41        if let Some(annot) = parse_annotation_dict(&annot_dict, obj_ref.as_ref()) {
42            annotations.push(annot);
43        }
44    }
45    Ok(annotations)
46}
47
48/// Get all annotations from all pages.
49pub fn get_all_annotations(doc: &PdfDocument) -> Result<Vec<(usize, Vec<Annotation>)>> {
50    let pages = collect_pages(doc)?;
51    let mut result = Vec::new();
52    for page in &pages {
53        let annots = get_annotations(doc, page)?;
54        if !annots.is_empty() {
55            result.push((page.index, annots));
56        }
57    }
58    Ok(result)
59}
60
61/// Parse a single annotation dictionary.
62fn parse_annotation_dict(dict: &PdfDict, obj_ref: Option<&IndirectRef>) -> Option<Annotation> {
63    let subtype = dict.get_name(b"Subtype")?;
64    let annot_type = AnnotationType::from_name(subtype);
65
66    let rect_arr = dict.get_array(b"Rect")?;
67    let rect = Rect::from_pdf_array(rect_arr)?;
68
69    let contents = dict
70        .get(b"Contents")
71        .and_then(|o| o.as_str())
72        .map(|b| String::from_utf8_lossy(b).into_owned());
73
74    let name = dict
75        .get(b"NM")
76        .and_then(|o| o.as_str())
77        .map(|b| String::from_utf8_lossy(b).into_owned());
78
79    let modified = dict
80        .get(b"M")
81        .and_then(|o| o.as_str())
82        .map(|b| String::from_utf8_lossy(b).into_owned());
83
84    let flags = AnnotationFlags(
85        dict.get_i64(b"F").unwrap_or(0) as u32,
86    );
87
88    let color = dict
89        .get_array(b"C")
90        .and_then(AnnotColor::from_array);
91
92    let border = parse_border_style(dict);
93
94    let appearance_ref = dict
95        .get_dict(b"AP")
96        .and_then(|ap| ap.get_ref(b"N"))
97        .cloned();
98
99    let popup_ref = dict.get_ref(b"Popup").cloned();
100
101    let data = parse_annotation_data(dict, &annot_type);
102
103    Some(Annotation {
104        annot_type,
105        rect,
106        contents,
107        name,
108        modified,
109        flags,
110        color,
111        border,
112        appearance_ref,
113        popup_ref,
114        obj_ref: obj_ref.cloned(),
115        data,
116    })
117}
118
119/// Parse border style from /BS dict or /Border array.
120fn parse_border_style(dict: &PdfDict) -> Option<BorderStyle> {
121    if let Some(bs) = dict.get_dict(b"BS") {
122        let width = bs
123            .get(b"W")
124            .and_then(|o| o.as_f64())
125            .unwrap_or(1.0);
126        let style = bs
127            .get_name(b"S")
128            .map(BorderStyleType::from_name)
129            .unwrap_or(BorderStyleType::Solid);
130        let dash_pattern = bs
131            .get_array(b"D")
132            .map(|arr| arr.iter().filter_map(|o| o.as_f64()).collect())
133            .unwrap_or_default();
134        return Some(BorderStyle {
135            width,
136            style,
137            dash_pattern,
138        });
139    }
140
141    if let Some(border_arr) = dict.get_array(b"Border")
142        && border_arr.len() >= 3
143    {
144        let width = border_arr[2].as_f64().unwrap_or(1.0);
145        let dash_pattern = if border_arr.len() > 3 {
146            border_arr[3]
147                .as_array()
148                .map(|arr| arr.iter().filter_map(|o| o.as_f64()).collect())
149                .unwrap_or_default()
150        } else {
151            Vec::new()
152        };
153        return Some(BorderStyle {
154            width,
155            style: if dash_pattern.is_empty() {
156                BorderStyleType::Solid
157            } else {
158                BorderStyleType::Dashed
159            },
160            dash_pattern,
161        });
162    }
163
164    None
165}
166
167/// Parse type-specific annotation data.
168fn parse_annotation_data(dict: &PdfDict, annot_type: &AnnotationType) -> AnnotationData {
169    match annot_type {
170        AnnotationType::Highlight
171        | AnnotationType::Underline
172        | AnnotationType::StrikeOut
173        | AnnotationType::Squiggly => {
174            let quad_points = dict
175                .get_array(b"QuadPoints")
176                .map(|arr| arr.iter().filter_map(|o| o.as_f64()).collect())
177                .unwrap_or_default();
178            AnnotationData::Markup { quad_points }
179        }
180
181        AnnotationType::Line => {
182            let l = dict
183                .get_array(b"L")
184                .map(|arr| arr.iter().filter_map(|o| o.as_f64()).collect::<Vec<_>>())
185                .unwrap_or_default();
186            let start = if l.len() >= 2 { (l[0], l[1]) } else { (0.0, 0.0) };
187            let end = if l.len() >= 4 { (l[2], l[3]) } else { (0.0, 0.0) };
188
189            let line_endings = dict
190                .get_array(b"LE")
191                .map(|arr| {
192                    let s = arr
193                        .first()
194                        .and_then(|o| o.as_name())
195                        .map(LineEndingStyle::from_name)
196                        .unwrap_or(LineEndingStyle::None);
197                    let e = arr
198                        .get(1)
199                        .and_then(|o| o.as_name())
200                        .map(LineEndingStyle::from_name)
201                        .unwrap_or(LineEndingStyle::None);
202                    (s, e)
203                })
204                .unwrap_or((LineEndingStyle::None, LineEndingStyle::None));
205
206            let leader_line_length = dict
207                .get(b"LL")
208                .and_then(|o| o.as_f64())
209                .unwrap_or(0.0);
210            let leader_line_extension = dict
211                .get(b"LLE")
212                .and_then(|o| o.as_f64())
213                .unwrap_or(0.0);
214            let caption = dict
215                .get(b"Cap")
216                .and_then(|o| o.as_bool())
217                .unwrap_or(false);
218            let interior_color = dict
219                .get_array(b"IC")
220                .and_then(AnnotColor::from_array);
221
222            AnnotationData::Line {
223                start,
224                end,
225                line_endings,
226                leader_line_length,
227                leader_line_extension,
228                caption,
229                interior_color,
230            }
231        }
232
233        AnnotationType::Ink => {
234            let ink_list = dict
235                .get_array(b"InkList")
236                .map(|arr| {
237                    arr.iter()
238                        .filter_map(|item| {
239                            item.as_array().map(|coords| {
240                                coords
241                                    .chunks(2)
242                                    .filter_map(|pair| {
243                                        if pair.len() == 2 {
244                                            Some((pair[0].as_f64()?, pair[1].as_f64()?))
245                                        } else {
246                                            None
247                                        }
248                                    })
249                                    .collect()
250                            })
251                        })
252                        .collect()
253                })
254                .unwrap_or_default();
255            AnnotationData::Ink { ink_list }
256        }
257
258        AnnotationType::Link => {
259            let uri = dict
260                .get_dict(b"A")
261                .and_then(|a| a.get(b"URI"))
262                .and_then(|o| o.as_str())
263                .map(|b| String::from_utf8_lossy(b).into_owned());
264            let dest = dict.get(b"Dest").cloned();
265            AnnotationData::Link { uri, dest }
266        }
267
268        AnnotationType::FreeText => {
269            let da = dict
270                .get(b"DA")
271                .and_then(|o| o.as_str())
272                .map(|b| String::from_utf8_lossy(b).into_owned())
273                .unwrap_or_default();
274            let justification = dict.get_i64(b"Q").unwrap_or(0);
275            AnnotationData::FreeText { da, justification }
276        }
277
278        AnnotationType::FileAttachment => {
279            let fs_ref = dict.get_ref(b"FS").cloned();
280            let icon_name = dict
281                .get_name(b"Name")
282                .map(|n| String::from_utf8_lossy(n).into_owned())
283                .unwrap_or_else(|| "PushPin".to_string());
284            AnnotationData::FileAttachment { fs_ref, icon_name }
285        }
286
287        AnnotationType::Stamp => {
288            let icon_name = dict
289                .get_name(b"Name")
290                .map(|n| String::from_utf8_lossy(n).into_owned())
291                .unwrap_or_else(|| "Draft".to_string());
292            AnnotationData::Stamp { icon_name }
293        }
294
295        AnnotationType::Square
296        | AnnotationType::Circle
297        | AnnotationType::Polygon
298        | AnnotationType::PolyLine => {
299            let vertices = dict
300                .get_array(b"Vertices")
301                .map(|arr| {
302                    arr.chunks(2)
303                        .filter_map(|pair| {
304                            if pair.len() == 2 {
305                                Some((pair[0].as_f64()?, pair[1].as_f64()?))
306                            } else {
307                                None
308                            }
309                        })
310                        .collect()
311                })
312                .unwrap_or_default();
313            let interior_color = dict
314                .get_array(b"IC")
315                .and_then(AnnotColor::from_array);
316            AnnotationData::Shape {
317                vertices,
318                interior_color,
319            }
320        }
321
322        AnnotationType::Redact => {
323            let overlay_text = dict
324                .get(b"OverlayText")
325                .and_then(|o| o.as_str())
326                .map(|b| String::from_utf8_lossy(b).into_owned());
327            let repeat = dict
328                .get(b"Repeat")
329                .and_then(|o| o.as_bool())
330                .unwrap_or(false);
331            let interior_color = dict
332                .get_array(b"IC")
333                .and_then(AnnotColor::from_array);
334            AnnotationData::Redact {
335                overlay_text,
336                repeat,
337                interior_color,
338            }
339        }
340
341        _ => AnnotationData::None,
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    #[test]
350    fn test_parse_highlight_annotation() {
351        let mut dict = PdfDict::new();
352        dict.insert(b"Type".to_vec(), PdfObject::Name(b"Annot".to_vec()));
353        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Highlight".to_vec()));
354        dict.insert(
355            b"Rect".to_vec(),
356            PdfObject::Array(vec![
357                PdfObject::Real(100.0),
358                PdfObject::Real(200.0),
359                PdfObject::Real(300.0),
360                PdfObject::Real(220.0),
361            ]),
362        );
363        dict.insert(
364            b"C".to_vec(),
365            PdfObject::Array(vec![
366                PdfObject::Real(1.0),
367                PdfObject::Real(1.0),
368                PdfObject::Real(0.0),
369            ]),
370        );
371        dict.insert(
372            b"QuadPoints".to_vec(),
373            PdfObject::Array(vec![
374                PdfObject::Real(100.0),
375                PdfObject::Real(220.0),
376                PdfObject::Real(300.0),
377                PdfObject::Real(220.0),
378                PdfObject::Real(100.0),
379                PdfObject::Real(200.0),
380                PdfObject::Real(300.0),
381                PdfObject::Real(200.0),
382            ]),
383        );
384        dict.insert(
385            b"Contents".to_vec(),
386            PdfObject::String(b"Test highlight".to_vec()),
387        );
388
389        let annot = parse_annotation_dict(&dict, None).unwrap();
390        assert_eq!(annot.annot_type, AnnotationType::Highlight);
391        assert_eq!(annot.rect.llx, 100.0);
392        assert_eq!(annot.contents, Some("Test highlight".to_string()));
393        assert_eq!(annot.color, Some(AnnotColor::Rgb(1.0, 1.0, 0.0)));
394        if let AnnotationData::Markup { quad_points } = &annot.data {
395            assert_eq!(quad_points.len(), 8);
396        } else {
397            panic!("expected Markup data");
398        }
399    }
400
401    #[test]
402    fn test_parse_link_annotation() {
403        let mut dict = PdfDict::new();
404        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Link".to_vec()));
405        dict.insert(
406            b"Rect".to_vec(),
407            PdfObject::Array(vec![
408                PdfObject::Integer(72),
409                PdfObject::Integer(700),
410                PdfObject::Integer(200),
411                PdfObject::Integer(720),
412            ]),
413        );
414        let mut action = PdfDict::new();
415        action.insert(b"S".to_vec(), PdfObject::Name(b"URI".to_vec()));
416        action.insert(
417            b"URI".to_vec(),
418            PdfObject::String(b"https://example.com".to_vec()),
419        );
420        dict.insert(b"A".to_vec(), PdfObject::Dict(action));
421
422        let annot = parse_annotation_dict(&dict, None).unwrap();
423        assert_eq!(annot.annot_type, AnnotationType::Link);
424        if let AnnotationData::Link { uri, .. } = &annot.data {
425            assert_eq!(uri.as_deref(), Some("https://example.com"));
426        } else {
427            panic!("expected Link data");
428        }
429    }
430
431    #[test]
432    fn test_parse_ink_annotation() {
433        let mut dict = PdfDict::new();
434        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Ink".to_vec()));
435        dict.insert(
436            b"Rect".to_vec(),
437            PdfObject::Array(vec![
438                PdfObject::Integer(0),
439                PdfObject::Integer(0),
440                PdfObject::Integer(100),
441                PdfObject::Integer(100),
442            ]),
443        );
444        dict.insert(
445            b"InkList".to_vec(),
446            PdfObject::Array(vec![PdfObject::Array(vec![
447                PdfObject::Real(10.0),
448                PdfObject::Real(20.0),
449                PdfObject::Real(30.0),
450                PdfObject::Real(40.0),
451                PdfObject::Real(50.0),
452                PdfObject::Real(60.0),
453            ])]),
454        );
455
456        let annot = parse_annotation_dict(&dict, None).unwrap();
457        if let AnnotationData::Ink { ink_list } = &annot.data {
458            assert_eq!(ink_list.len(), 1);
459            assert_eq!(ink_list[0].len(), 3);
460            assert_eq!(ink_list[0][0], (10.0, 20.0));
461            assert_eq!(ink_list[0][2], (50.0, 60.0));
462        } else {
463            panic!("expected Ink data");
464        }
465    }
466
467    #[test]
468    fn test_parse_line_annotation() {
469        let mut dict = PdfDict::new();
470        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Line".to_vec()));
471        dict.insert(
472            b"Rect".to_vec(),
473            PdfObject::Array(vec![
474                PdfObject::Integer(100),
475                PdfObject::Integer(100),
476                PdfObject::Integer(300),
477                PdfObject::Integer(300),
478            ]),
479        );
480        dict.insert(
481            b"L".to_vec(),
482            PdfObject::Array(vec![
483                PdfObject::Real(100.0),
484                PdfObject::Real(100.0),
485                PdfObject::Real(300.0),
486                PdfObject::Real(300.0),
487            ]),
488        );
489        dict.insert(
490            b"LE".to_vec(),
491            PdfObject::Array(vec![
492                PdfObject::Name(b"OpenArrow".to_vec()),
493                PdfObject::Name(b"ClosedArrow".to_vec()),
494            ]),
495        );
496        dict.insert(b"LL".to_vec(), PdfObject::Real(10.0));
497
498        let annot = parse_annotation_dict(&dict, None).unwrap();
499        if let AnnotationData::Line {
500            start,
501            end,
502            line_endings,
503            leader_line_length,
504            ..
505        } = &annot.data
506        {
507            assert_eq!(*start, (100.0, 100.0));
508            assert_eq!(*end, (300.0, 300.0));
509            assert_eq!(line_endings.0, LineEndingStyle::OpenArrow);
510            assert_eq!(line_endings.1, LineEndingStyle::ClosedArrow);
511            assert_eq!(*leader_line_length, 10.0);
512        } else {
513            panic!("expected Line data");
514        }
515    }
516
517    #[test]
518    fn test_parse_missing_subtype() {
519        let mut dict = PdfDict::new();
520        dict.insert(
521            b"Rect".to_vec(),
522            PdfObject::Array(vec![
523                PdfObject::Integer(0),
524                PdfObject::Integer(0),
525                PdfObject::Integer(100),
526                PdfObject::Integer(100),
527            ]),
528        );
529        assert!(parse_annotation_dict(&dict, None).is_none());
530    }
531
532    #[test]
533    fn test_parse_missing_rect() {
534        let mut dict = PdfDict::new();
535        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Text".to_vec()));
536        assert!(parse_annotation_dict(&dict, None).is_none());
537    }
538
539    #[test]
540    fn test_parse_border_style() {
541        let mut dict = PdfDict::new();
542        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Text".to_vec()));
543        dict.insert(
544            b"Rect".to_vec(),
545            PdfObject::Array(vec![
546                PdfObject::Integer(0),
547                PdfObject::Integer(0),
548                PdfObject::Integer(100),
549                PdfObject::Integer(100),
550            ]),
551        );
552        let mut bs = PdfDict::new();
553        bs.insert(b"W".to_vec(), PdfObject::Real(2.0));
554        bs.insert(b"S".to_vec(), PdfObject::Name(b"D".to_vec()));
555        bs.insert(
556            b"D".to_vec(),
557            PdfObject::Array(vec![PdfObject::Integer(3), PdfObject::Integer(1)]),
558        );
559        dict.insert(b"BS".to_vec(), PdfObject::Dict(bs));
560
561        let annot = parse_annotation_dict(&dict, None).unwrap();
562        let border = annot.border.unwrap();
563        assert_eq!(border.width, 2.0);
564        assert_eq!(border.style, BorderStyleType::Dashed);
565        assert_eq!(border.dash_pattern, vec![3.0, 1.0]);
566    }
567}