use crate::types::{BoundingBox, PdfAnnotation, PdfAnnotationType};
use pdfium_render::prelude::*;
pub fn extract_annotations_from_document(document: &PdfDocument<'_>) -> Vec<PdfAnnotation> {
let mut annotations = Vec::new();
for (page_index, page) in document.pages().iter().enumerate() {
let page_number = page_index + 1;
let page_annotations = page.annotations();
for annotation in page_annotations.iter() {
let pdfium_type = annotation.annotation_type();
if matches!(
pdfium_type,
PdfPageAnnotationType::Widget | PdfPageAnnotationType::XfaWidget | PdfPageAnnotationType::Popup
) {
continue;
}
let annotation_type = map_annotation_type(pdfium_type);
let content = extract_annotation_content(&annotation);
let bounding_box = annotation.bounds().ok().map(|rect| BoundingBox {
x0: rect.left().value as f64,
y0: rect.bottom().value as f64,
x1: rect.right().value as f64,
y1: rect.top().value as f64,
});
annotations.push(PdfAnnotation {
annotation_type,
content,
page_number,
bounding_box,
});
}
}
annotations
}
fn map_annotation_type(pdfium_type: PdfPageAnnotationType) -> PdfAnnotationType {
match pdfium_type {
PdfPageAnnotationType::Text => PdfAnnotationType::Text,
PdfPageAnnotationType::FreeText => PdfAnnotationType::Text,
PdfPageAnnotationType::Highlight => PdfAnnotationType::Highlight,
PdfPageAnnotationType::Link => PdfAnnotationType::Link,
PdfPageAnnotationType::Stamp => PdfAnnotationType::Stamp,
PdfPageAnnotationType::Underline => PdfAnnotationType::Underline,
PdfPageAnnotationType::Strikeout => PdfAnnotationType::StrikeOut,
_ => PdfAnnotationType::Other,
}
}
fn extract_annotation_content(annotation: &PdfPageAnnotation<'_>) -> Option<String> {
if let Some(link_annot) = annotation.as_link_annotation()
&& let Ok(link) = link_annot.link()
&& let Some(action) = link.action()
&& let Some(uri_action) = action.as_uri_action()
&& let Ok(uri) = uri_action.uri()
&& !uri.is_empty()
{
return Some(uri);
}
let contents = annotation.contents();
contents.filter(|s| !s.is_empty())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_map_annotation_type_text() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Text),
PdfAnnotationType::Text
);
}
#[test]
fn test_map_annotation_type_free_text() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::FreeText),
PdfAnnotationType::Text
);
}
#[test]
fn test_map_annotation_type_highlight() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Highlight),
PdfAnnotationType::Highlight
);
}
#[test]
fn test_map_annotation_type_link() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Link),
PdfAnnotationType::Link
);
}
#[test]
fn test_map_annotation_type_stamp() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Stamp),
PdfAnnotationType::Stamp
);
}
#[test]
fn test_map_annotation_type_underline() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Underline),
PdfAnnotationType::Underline
);
}
#[test]
fn test_map_annotation_type_strikeout() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Strikeout),
PdfAnnotationType::StrikeOut
);
}
#[test]
fn test_map_annotation_type_other() {
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Ink),
PdfAnnotationType::Other
);
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Circle),
PdfAnnotationType::Other
);
assert_eq!(
map_annotation_type(PdfPageAnnotationType::Square),
PdfAnnotationType::Other
);
}
}