1use lopdf::{Document, Object};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
11pub enum AnnotationType {
12 Text,
14 Highlight,
16 Underline,
18 StrikeOut,
20 FreeText,
22 Link,
24 Stamp,
26 Ink,
28 FileAttachment,
30 Popup,
32 Other(String),
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct PdfAnnotation {
39 pub annotation_type: AnnotationType,
41 pub contents: Option<String>,
43 pub author: Option<String>,
45 pub page_number: u32,
47 pub rect: Option<[f64; 4]>,
49 pub subject: Option<String>,
51 pub creation_date: Option<String>,
53 pub modification_date: Option<String>,
55}
56
57pub fn extract_annotations(doc: &Document) -> Vec<PdfAnnotation> {
59 let mut annotations = Vec::new();
60
61 let pages = doc.get_pages();
62 let mut page_ids: Vec<(u32, lopdf::ObjectId)> = pages.into_iter().collect();
63 page_ids.sort_by_key(|(num, _)| *num);
64
65 for (page_num, page_id) in page_ids {
66 let page_dict = match doc.get_object(page_id).and_then(|o| o.as_dict()) {
67 Ok(d) => d,
68 Err(_) => continue,
69 };
70
71 let annots_obj = match page_dict.get(b"Annots") {
72 Ok(obj) => resolve(doc, obj),
73 Err(_) => continue,
74 };
75
76 let annots_array = match annots_obj.as_array() {
77 Ok(a) => a,
78 Err(_) => continue,
79 };
80
81 for annot_ref in annots_array {
82 let annot_obj = resolve(doc, annot_ref);
83 if let Ok(dict) = annot_obj.as_dict() {
84 if let Some(annot) = parse_annotation(dict, page_num) {
85 annotations.push(annot);
86 }
87 }
88 }
89 }
90
91 annotations
92}
93
94fn parse_annotation(dict: &lopdf::Dictionary, page_number: u32) -> Option<PdfAnnotation> {
96 let annotation_type = match dict.get(b"Subtype") {
97 Ok(Object::Name(name)) => match name.as_slice() {
98 b"Text" => AnnotationType::Text,
99 b"Highlight" => AnnotationType::Highlight,
100 b"Underline" => AnnotationType::Underline,
101 b"StrikeOut" => AnnotationType::StrikeOut,
102 b"FreeText" => AnnotationType::FreeText,
103 b"Link" => AnnotationType::Link,
104 b"Stamp" => AnnotationType::Stamp,
105 b"Ink" => AnnotationType::Ink,
106 b"FileAttachment" => AnnotationType::FileAttachment,
107 b"Popup" => AnnotationType::Popup,
108 other => AnnotationType::Other(String::from_utf8_lossy(other).to_string()),
109 },
110 _ => return None,
111 };
112
113 let contents = get_string(dict, b"Contents");
114 let author = get_string(dict, b"T");
115 let subject = get_string(dict, b"Subj");
116 let creation_date = get_string(dict, b"CreationDate");
117 let modification_date = get_string(dict, b"M");
118 let rect = get_rect(dict);
119
120 Some(PdfAnnotation {
121 annotation_type,
122 contents,
123 author,
124 page_number,
125 rect,
126 subject,
127 creation_date,
128 modification_date,
129 })
130}
131
132fn get_string(dict: &lopdf::Dictionary, key: &[u8]) -> Option<String> {
134 dict.get(key).ok().and_then(|obj| match obj {
135 Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).to_string()),
136 _ => None,
137 })
138}
139
140fn get_rect(dict: &lopdf::Dictionary) -> Option<[f64; 4]> {
142 let rect_obj = dict.get(b"Rect").ok()?;
143 let arr = rect_obj.as_array().ok()?;
144 if arr.len() < 4 {
145 return None;
146 }
147 let mut result = [0.0f64; 4];
148 for (i, obj) in arr.iter().enumerate().take(4) {
149 result[i] = match obj {
150 Object::Real(f) => *f,
151 Object::Integer(i) => *i as f64,
152 _ => return None,
153 };
154 }
155 Some(result)
156}
157
158fn resolve<'a>(doc: &'a Document, obj: &'a Object) -> &'a Object {
160 match obj {
161 Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
162 _ => obj,
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
171 fn test_parse_annotation_text() {
172 let mut dict = lopdf::Dictionary::new();
173 dict.set("Subtype", Object::Name(b"Text".to_vec()));
174 dict.set(
175 "Contents",
176 Object::String(b"A comment".to_vec(), lopdf::StringFormat::Literal),
177 );
178 let annot = parse_annotation(&dict, 1).unwrap();
179 assert_eq!(annot.annotation_type, AnnotationType::Text);
180 assert_eq!(annot.contents, Some("A comment".to_string()));
181 assert_eq!(annot.page_number, 1);
182 }
183
184 #[test]
185 fn test_parse_annotation_highlight() {
186 let mut dict = lopdf::Dictionary::new();
187 dict.set("Subtype", Object::Name(b"Highlight".to_vec()));
188 let annot = parse_annotation(&dict, 3).unwrap();
189 assert_eq!(annot.annotation_type, AnnotationType::Highlight);
190 assert_eq!(annot.page_number, 3);
191 }
192
193 #[test]
194 fn test_parse_annotation_no_subtype() {
195 let dict = lopdf::Dictionary::new();
196 assert!(parse_annotation(&dict, 1).is_none());
197 }
198
199 #[test]
200 fn test_get_rect() {
201 let mut dict = lopdf::Dictionary::new();
202 dict.set(
203 "Rect",
204 Object::Array(vec![
205 Object::Real(10.0),
206 Object::Real(20.0),
207 Object::Real(100.0),
208 Object::Real(50.0),
209 ]),
210 );
211 let rect = get_rect(&dict).unwrap();
212 assert_eq!(rect, [10.0, 20.0, 100.0, 50.0]);
213 }
214}