use super::OxideDocument;
use crate::types::{BoundingBox, PdfAnnotation, PdfAnnotationType};
pub(crate) fn extract_annotations(doc: &mut OxideDocument) -> Vec<PdfAnnotation> {
let page_count = match doc.doc.page_count() {
Ok(count) => count,
Err(e) => {
tracing::debug!("pdf_oxide: failed to get page count for annotations: {e}");
return Vec::new();
}
};
let mut annotations = Vec::new();
for page_index in 0..page_count {
let page_number = page_index + 1;
let page_annotations = match doc.doc.get_annotations(page_index) {
Ok(annots) => annots,
Err(e) => {
tracing::debug!(page = page_index, "pdf_oxide: failed to get annotations: {e}");
continue;
}
};
for annot in page_annotations {
if matches!(
annot.subtype_enum,
pdf_oxide::AnnotationSubtype::Widget | pdf_oxide::AnnotationSubtype::Popup
) {
continue;
}
let annotation_type = map_annotation_subtype(annot.subtype_enum);
let content = extract_annotation_content(&annot);
let bounding_box = annot.rect.map(|rect| BoundingBox {
x0: rect[0],
y0: rect[1],
x1: rect[2],
y1: rect[3],
});
annotations.push(PdfAnnotation {
annotation_type,
content,
page_number,
bounding_box,
});
}
}
annotations
}
fn map_annotation_subtype(subtype: pdf_oxide::AnnotationSubtype) -> PdfAnnotationType {
match subtype {
pdf_oxide::AnnotationSubtype::Text | pdf_oxide::AnnotationSubtype::FreeText => PdfAnnotationType::Text,
pdf_oxide::AnnotationSubtype::Highlight => PdfAnnotationType::Highlight,
pdf_oxide::AnnotationSubtype::Link => PdfAnnotationType::Link,
pdf_oxide::AnnotationSubtype::Stamp => PdfAnnotationType::Stamp,
pdf_oxide::AnnotationSubtype::Underline => PdfAnnotationType::Underline,
pdf_oxide::AnnotationSubtype::StrikeOut => PdfAnnotationType::StrikeOut,
_ => PdfAnnotationType::Other,
}
}
fn extract_annotation_content(annot: &pdf_oxide::Annotation) -> Option<String> {
if annot.subtype_enum == pdf_oxide::AnnotationSubtype::Link
&& let Some(ref action) = annot.action
{
match action {
pdf_oxide::LinkAction::Uri(uri) if !uri.is_empty() => {
return Some(uri.clone());
}
_ => {}
}
}
annot.contents.as_ref().filter(|s| !s.is_empty()).cloned()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_map_annotation_subtype_text() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Text),
PdfAnnotationType::Text
);
}
#[test]
fn test_map_annotation_subtype_free_text() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::FreeText),
PdfAnnotationType::Text
);
}
#[test]
fn test_map_annotation_subtype_highlight() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Highlight),
PdfAnnotationType::Highlight
);
}
#[test]
fn test_map_annotation_subtype_link() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Link),
PdfAnnotationType::Link
);
}
#[test]
fn test_map_annotation_subtype_stamp() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Stamp),
PdfAnnotationType::Stamp
);
}
#[test]
fn test_map_annotation_subtype_underline() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Underline),
PdfAnnotationType::Underline
);
}
#[test]
fn test_map_annotation_subtype_strikeout() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::StrikeOut),
PdfAnnotationType::StrikeOut
);
}
#[test]
fn test_map_annotation_subtype_other() {
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Ink),
PdfAnnotationType::Other
);
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Circle),
PdfAnnotationType::Other
);
assert_eq!(
map_annotation_subtype(pdf_oxide::AnnotationSubtype::Square),
PdfAnnotationType::Other
);
}
#[test]
fn test_extract_annotation_content_uri() {
let annot = pdf_oxide::Annotation {
annotation_type: "Annot".to_string(),
subtype: Some("Link".to_string()),
subtype_enum: pdf_oxide::AnnotationSubtype::Link,
contents: None,
rect: None,
author: None,
creation_date: None,
modification_date: None,
subject: None,
destination: None,
action: Some(pdf_oxide::LinkAction::Uri("https://example.com".to_string())),
quad_points: None,
color: None,
opacity: None,
flags: pdf_oxide::AnnotationFlags::empty(),
border: None,
interior_color: None,
field_type: None,
field_name: None,
field_value: None,
default_value: None,
field_flags: None,
options: None,
appearance_state: None,
raw_dict: None,
};
let content = extract_annotation_content(&annot);
assert_eq!(content, Some("https://example.com".to_string()));
}
#[test]
fn test_extract_annotation_content_fallback() {
let annot = pdf_oxide::Annotation {
annotation_type: "Annot".to_string(),
subtype: Some("Text".to_string()),
subtype_enum: pdf_oxide::AnnotationSubtype::Text,
contents: Some("A note".to_string()),
rect: None,
author: None,
creation_date: None,
modification_date: None,
subject: None,
destination: None,
action: None,
quad_points: None,
color: None,
opacity: None,
flags: pdf_oxide::AnnotationFlags::empty(),
border: None,
interior_color: None,
field_type: None,
field_name: None,
field_value: None,
default_value: None,
field_flags: None,
options: None,
appearance_state: None,
raw_dict: None,
};
let content = extract_annotation_content(&annot);
assert_eq!(content, Some("A note".to_string()));
}
}