1use crate::pdf::annotation_extractor::{AnnotationType, PdfAnnotation};
7
8#[derive(Debug, Clone)]
10pub struct AnnotationGroup {
11 pub annotation_type: AnnotationType,
13 pub page_number: u32,
15 pub annotations: Vec<PdfAnnotation>,
17}
18
19#[derive(Debug, Clone, Default)]
21pub struct AnnotationStats {
22 pub total: usize,
24 pub highlights: usize,
26 pub comments: usize,
28 pub links: usize,
30 pub stamps: usize,
32 pub other: usize,
34 pub pages_with_annotations: usize,
36}
37
38pub fn group_by_type_and_page(annotations: &[PdfAnnotation]) -> Vec<AnnotationGroup> {
40 use std::collections::BTreeMap;
41
42 let mut groups: BTreeMap<(u32, String), Vec<PdfAnnotation>> = BTreeMap::new();
44
45 for ann in annotations {
46 let type_key = type_to_key(&ann.annotation_type);
47 groups
48 .entry((ann.page_number, type_key))
49 .or_default()
50 .push(ann.clone());
51 }
52
53 groups
54 .into_iter()
55 .map(|((page_number, _), anns)| {
56 let annotation_type = anns[0].annotation_type.clone();
57 AnnotationGroup {
58 annotation_type,
59 page_number,
60 annotations: anns,
61 }
62 })
63 .collect()
64}
65
66pub fn filter_user_annotations(annotations: &[PdfAnnotation]) -> Vec<PdfAnnotation> {
68 annotations
69 .iter()
70 .filter(|a| {
71 !matches!(
72 a.annotation_type,
73 AnnotationType::Popup | AnnotationType::Link
74 )
75 })
76 .cloned()
77 .collect()
78}
79
80pub fn filter_with_content(annotations: &[PdfAnnotation]) -> Vec<PdfAnnotation> {
82 annotations
83 .iter()
84 .filter(|a| a.contents.as_ref().is_some_and(|c| !c.trim().is_empty()))
85 .cloned()
86 .collect()
87}
88
89pub fn compute_stats(annotations: &[PdfAnnotation]) -> AnnotationStats {
91 let mut stats = AnnotationStats {
92 total: annotations.len(),
93 ..Default::default()
94 };
95
96 let mut pages = std::collections::HashSet::new();
97
98 for ann in annotations {
99 pages.insert(ann.page_number);
100 match &ann.annotation_type {
101 AnnotationType::Highlight | AnnotationType::Underline | AnnotationType::StrikeOut => {
102 stats.highlights += 1;
103 }
104 AnnotationType::Text | AnnotationType::FreeText => {
105 stats.comments += 1;
106 }
107 AnnotationType::Link => {
108 stats.links += 1;
109 }
110 AnnotationType::Stamp => {
111 stats.stamps += 1;
112 }
113 _ => {
114 stats.other += 1;
115 }
116 }
117 }
118
119 stats.pages_with_annotations = pages.len();
120 stats
121}
122
123pub fn annotations_to_markdown(annotations: &[PdfAnnotation]) -> String {
125 if annotations.is_empty() {
126 return String::new();
127 }
128
129 let mut out = String::from("## Annotations\n\n");
130 let mut current_page = 0u32;
131
132 for ann in annotations {
133 if ann.page_number != current_page {
134 current_page = ann.page_number;
135 out.push_str(&format!("### Page {}\n\n", current_page));
136 }
137 let type_label = type_to_key(&ann.annotation_type);
138 let content = ann.contents.as_deref().unwrap_or("(no content)");
139 let author = ann
140 .author
141 .as_deref()
142 .map(|a| format!(" — {}", a))
143 .unwrap_or_default();
144 out.push_str(&format!("- **[{}]** {}{}\n", type_label, content, author));
145 }
146
147 out
148}
149
150fn type_to_key(t: &AnnotationType) -> String {
151 match t {
152 AnnotationType::Text => "Text".to_string(),
153 AnnotationType::Highlight => "Highlight".to_string(),
154 AnnotationType::Underline => "Underline".to_string(),
155 AnnotationType::StrikeOut => "StrikeOut".to_string(),
156 AnnotationType::FreeText => "FreeText".to_string(),
157 AnnotationType::Link => "Link".to_string(),
158 AnnotationType::Stamp => "Stamp".to_string(),
159 AnnotationType::Ink => "Ink".to_string(),
160 AnnotationType::FileAttachment => "FileAttachment".to_string(),
161 AnnotationType::Popup => "Popup".to_string(),
162 AnnotationType::Other(s) => s.clone(),
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 fn make_ann(ann_type: AnnotationType, page: u32, content: Option<&str>) -> PdfAnnotation {
171 PdfAnnotation {
172 annotation_type: ann_type,
173 contents: content.map(|s| s.to_string()),
174 author: None,
175 page_number: page,
176 rect: None,
177 subject: None,
178 creation_date: None,
179 modification_date: None,
180 }
181 }
182
183 #[test]
184 fn test_group_by_type_and_page() {
185 let annotations = vec![
186 make_ann(AnnotationType::Highlight, 1, Some("important")),
187 make_ann(AnnotationType::Highlight, 1, Some("also important")),
188 make_ann(AnnotationType::Text, 1, Some("note")),
189 make_ann(AnnotationType::Highlight, 2, Some("another")),
190 ];
191 let groups = group_by_type_and_page(&annotations);
192 assert_eq!(groups.len(), 3); assert_eq!(groups[0].annotations.len(), 2); }
195
196 #[test]
197 fn test_filter_user_annotations() {
198 let annotations = vec![
199 make_ann(AnnotationType::Highlight, 1, Some("yes")),
200 make_ann(AnnotationType::Popup, 1, None),
201 make_ann(AnnotationType::Link, 1, None),
202 make_ann(AnnotationType::Text, 2, Some("note")),
203 ];
204 let filtered = filter_user_annotations(&annotations);
205 assert_eq!(filtered.len(), 2);
206 }
207
208 #[test]
209 fn test_filter_with_content() {
210 let annotations = vec![
211 make_ann(AnnotationType::Highlight, 1, Some("has content")),
212 make_ann(AnnotationType::Highlight, 1, None),
213 make_ann(AnnotationType::Highlight, 1, Some(" ")),
214 ];
215 let filtered = filter_with_content(&annotations);
216 assert_eq!(filtered.len(), 1);
217 }
218
219 #[test]
220 fn test_compute_stats() {
221 let annotations = vec![
222 make_ann(AnnotationType::Highlight, 1, Some("a")),
223 make_ann(AnnotationType::Text, 1, Some("b")),
224 make_ann(AnnotationType::Link, 2, None),
225 make_ann(AnnotationType::Stamp, 3, None),
226 ];
227 let stats = compute_stats(&annotations);
228 assert_eq!(stats.total, 4);
229 assert_eq!(stats.highlights, 1);
230 assert_eq!(stats.comments, 1);
231 assert_eq!(stats.links, 1);
232 assert_eq!(stats.stamps, 1);
233 assert_eq!(stats.pages_with_annotations, 3);
234 }
235
236 #[test]
237 fn test_annotations_to_markdown() {
238 let annotations = vec![make_ann(AnnotationType::Highlight, 1, Some("key point"))];
239 let md = annotations_to_markdown(&annotations);
240 assert!(md.contains("## Annotations"));
241 assert!(md.contains("### Page 1"));
242 assert!(md.contains("[Highlight]"));
243 assert!(md.contains("key point"));
244 }
245}