use super::common::*;
use lopdf::{Document, Object};
fn link_annotation_count(bytes: &[u8]) -> usize {
count_substr(bytes, b"/Subtype/Link")
}
fn is_link(d: &lopdf::Dictionary) -> bool {
d.get(b"Subtype")
.and_then(|o| o.as_name())
.map(|n| n == b"Link")
.unwrap_or(false)
}
fn deref_once<'a>(doc: &'a Document, obj: &'a Object) -> Option<&'a Object> {
match obj {
Object::Reference(id) => doc.objects.get(id),
other => Some(other),
}
}
fn collect_link_annotations(bytes: &[u8]) -> Vec<(String, Option<String>)> {
let doc = Document::load_mem(bytes).expect("PDF must parse");
let mut out = Vec::new();
let mut consume = |d: &lopdf::Dictionary| {
if !is_link(d) {
return;
}
let uri = d
.get(b"A")
.and_then(|o| o.as_dict())
.and_then(|a| a.get(b"URI"))
.and_then(|o| o.as_str())
.ok()
.map(|b| String::from_utf8_lossy(b).into_owned());
let contents = d
.get(b"Contents")
.and_then(|o| o.as_str())
.ok()
.map(|b| String::from_utf8_lossy(b).into_owned());
if let Some(u) = uri {
out.push((u, contents));
}
};
for (_, obj) in doc.objects.iter() {
if let Object::Dictionary(d) = obj {
consume(d);
}
}
for pid in doc.page_iter() {
let Some(Object::Dictionary(page)) = doc.objects.get(&pid) else {
continue;
};
let Ok(annots) = page.get(b"Annots") else {
continue;
};
let Some(Object::Array(items)) = deref_once(&doc, annots) else {
continue;
};
for item in items {
let Some(resolved) = deref_once(&doc, item) else {
continue;
};
if let Object::Dictionary(d) = resolved {
consume(d);
}
}
}
out
}
fn collect_link_rects(bytes: &[u8]) -> Vec<[f32; 4]> {
let doc = Document::load_mem(bytes).expect("PDF must parse");
let mut out = Vec::new();
let mut consume = |d: &lopdf::Dictionary| {
if !is_link(d) {
return;
}
let Ok(Object::Array(items)) = d.get(b"Rect") else {
return;
};
if items.len() != 4 {
return;
}
let mut r = [0.0_f32; 4];
for (i, v) in items.iter().enumerate() {
match v {
Object::Real(f) => r[i] = *f,
Object::Integer(n) => r[i] = *n as f32,
_ => return,
}
}
out.push(r);
};
for (_, obj) in doc.objects.iter() {
if let Object::Dictionary(d) = obj {
consume(d);
}
}
for pid in doc.page_iter() {
let Some(Object::Dictionary(page)) = doc.objects.get(&pid) else {
continue;
};
let Ok(annots) = page.get(b"Annots") else {
continue;
};
let Some(Object::Array(items)) = deref_once(&doc, annots) else {
continue;
};
for item in items {
let Some(resolved) = deref_once(&doc, item) else {
continue;
};
if let Object::Dictionary(d) = resolved {
consume(d);
}
}
}
out
}
#[test]
fn inline_anchor_renders_as_clickable_link() {
let md = "Click <a href=\"https://example.com\">here</a>.\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert_eq!(
link_annotation_count(&bytes),
1,
"inline <a> should produce one URI annotation"
);
assert!(
contains_text(&bytes, "https://example.com"),
"URL should appear in the link annotation"
);
assert!(
!contains_text(&bytes, "<a href"),
"raw <a> markup must not leak into the rendered text"
);
}
#[test]
fn anchor_title_attaches_pdf_tooltip() {
let md = "Visit <a href=\"https://example.com\" title=\"Tip\">x</a>.\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert!(
contains_text(&bytes, "(Tip)"),
"title attribute should be injected as the annotation /Contents tooltip"
);
}
#[test]
fn div_wrapper_drops_around_markdown_content() {
let md = "<div>\n\nWrapped paragraph body.\n\n</div>\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert!(
!contains_text(&bytes, "<div>") && !contains_text(&bytes, "</div>"),
"div framing tags must not render as literal text"
);
assert!(
contains_text(&bytes, "Wrapped paragraph body"),
"wrapped markdown content must still render"
);
}
#[test]
fn section_figure_figcaption_wrappers_drop() {
let md = "\
<section>
Section body.
</section>
<figure>
Figure body.
<figcaption>
Caption body.
</figcaption>
</figure>
";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
for tag in [
"<section>",
"</section>",
"<figure>",
"</figure>",
"<figcaption>",
"</figcaption>",
] {
assert!(
!contains_text(&bytes, tag),
"framing tag {tag} must not render as literal text"
);
}
for body in ["Section body", "Figure body", "Caption body"] {
assert!(
contains_text(&bytes, body),
"wrapped content '{body}' should still render"
);
}
}
#[test]
fn unknown_block_tag_still_passes_through() {
let md = "<custom-element>content here</custom-element>\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert!(
contains_text(&bytes, "custom-element") || contains(&bytes, b"custom-element"),
"unrecognised tags must keep the existing pass-through behavior"
);
}
#[test]
fn unclosed_anchor_degrades_to_literal_without_panic() {
let md = "before <a href=\"https://example.com\">no close here.\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert_eq!(
link_annotation_count(&bytes),
0,
"unclosed <a> should not emit a link annotation"
);
assert!(
contains_text(&bytes, "no close here"),
"literal fallback content must still render"
);
}
#[test]
fn anchor_inside_emphasis_links_within_styling() {
let md = "*click <a href=\"https://example.com\">here</a> now*\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert_eq!(link_annotation_count(&bytes), 1);
assert!(contains_text(&bytes, "https://example.com"));
}
#[test]
fn link_annotation_has_nonempty_clickable_rect() {
let bytes = render(
"Click <a href=\"https://example.com\">here</a>.\n",
"",
);
let rects = collect_link_rects(&bytes);
assert_eq!(rects.len(), 1);
let [llx, lly, urx, ury] = rects[0];
assert!(urx > llx, "rect must have positive width: {:?}", rects[0]);
assert!(ury > lly, "rect must have positive height: {:?}", rects[0]);
assert!(urx - llx >= 1.0, "click target too narrow: {:?}", rects[0]);
}
#[test]
fn multiple_anchors_in_one_paragraph_each_get_annotation() {
let md = "see <a href=\"https://a.example\">one</a> and \
<a href=\"https://b.example\" title=\"second\">two</a> here.\n";
let bytes = render(md, "");
let links = collect_link_annotations(&bytes);
assert_eq!(links.len(), 2);
let mut uris: Vec<String> = links.iter().map(|(u, _)| u.clone()).collect();
uris.sort();
assert_eq!(uris, vec!["https://a.example", "https://b.example"]);
let with_tip: Vec<&String> = links
.iter()
.find(|(u, _)| u == "https://b.example")
.and_then(|(_, c)| c.as_ref())
.into_iter()
.collect();
assert_eq!(with_tip.len(), 1);
assert_eq!(with_tip[0], "second");
}
#[test]
fn nested_block_wrappers_drop_both_levels() {
let md = "\
<div>
<section>
Inner body.
</section>
</div>
";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
for tag in ["<div", "</div>", "<section", "</section>"] {
assert!(
!contains_text(&bytes, tag),
"nested wrapper {tag} leaked into output"
);
}
assert!(contains_text(&bytes, "Inner body"));
}
#[test]
fn wrapper_with_attributes_drops() {
let md = "<div class=\"hero\" id=\"top\">\n\nBody text.\n\n</div>\n";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert!(!contains_text(&bytes, "<div"));
assert!(!contains_text(&bytes, "class=\"hero\""));
assert!(contains_text(&bytes, "Body text"));
}
#[test]
fn wrapper_preserves_inner_markdown_link() {
let md = "<section>\n\nSee [docs](https://docs.example) for more.\n\n</section>\n";
let bytes = render(md, "");
let links = collect_link_annotations(&bytes);
assert_eq!(links.len(), 1);
assert_eq!(links[0].0, "https://docs.example");
}
#[test]
fn wrapper_with_inline_anchor_inside_paragraph() {
let md = "\
<div>
Body with <a href=\"https://x.example\">inline anchor</a> inside.
</div>
";
let bytes = render(md, "");
let links = collect_link_annotations(&bytes);
assert_eq!(links.len(), 1);
assert_eq!(links[0].0, "https://x.example");
assert!(!contains_text(&bytes, "<a href"));
}
#[test]
fn anchor_inside_heading_renders_link() {
let md = "# Section with <a href=\"https://x.example\">a link</a>\n\nBody.\n";
let bytes = render(md, "");
assert_eq!(link_annotation_count(&bytes), 1);
}
#[test]
fn anchor_inside_list_item_renders_link() {
let md = "- first item with <a href=\"https://x.example\">link</a>\n- second\n";
let bytes = render(md, "");
assert_eq!(link_annotation_count(&bytes), 1);
}
#[test]
fn anchor_inside_table_cell_renders_link() {
let md = "\
| left | right |
| ---- | ----- |
| <a href=\"https://x.example\">go</a> | stay |
";
let bytes = render(md, "");
assert_eq!(link_annotation_count(&bytes), 1);
}
#[test]
fn anchor_inside_blockquote_renders_link() {
let md = "> quoted text with <a href=\"https://x.example\">a link</a> inside\n";
let bytes = render(md, "");
assert_eq!(link_annotation_count(&bytes), 1);
}
#[test]
fn anchor_with_query_and_fragment_preserves_url() {
let md = "go to <a href=\"https://example.com/p?q=1&r=2#sec\">there</a>.\n";
let bytes = render(md, "");
let links = collect_link_annotations(&bytes);
assert_eq!(links.len(), 1);
assert!(
links[0].0.contains("p?q=1") && links[0].0.contains("#sec"),
"URL parts missing: {:?}",
links[0]
);
}
#[test]
fn uppercase_anchor_tag_works_end_to_end() {
let md = "go <A HREF=\"https://example.com\">here</A> now\n";
let bytes = render(md, "");
assert_eq!(link_annotation_count(&bytes), 1);
assert!(contains_text(&bytes, "https://example.com"));
}
#[test]
fn self_closing_anchor_emits_no_annotation() {
let bytes = render("go <a href=\"https://example.com\" /> bye\n", "");
assert!(pdf_well_formed(&bytes));
assert_eq!(link_annotation_count(&bytes), 0);
}
#[test]
fn anchor_without_href_emits_no_annotation() {
let bytes = render("see <a name=\"foo\">target</a>\n", "");
assert!(pdf_well_formed(&bytes));
assert_eq!(link_annotation_count(&bytes), 0);
}
#[test]
fn unrecognised_inline_tag_passes_through_as_text() {
let bytes = render("text <weird>inside</weird> more\n", "");
assert!(pdf_well_formed(&bytes));
assert_eq!(link_annotation_count(&bytes), 0);
assert!(contains_text(&bytes, "weird"));
}
#[test]
fn standalone_html_comment_remains_invisible() {
let bytes = render("<!-- private note -->\nBody.\n", "");
assert!(!contains_text(&bytes, "private note"));
assert!(contains_text(&bytes, "Body"));
}
#[test]
fn markdown_link_title_tooltip_still_works() {
let bytes = render(
"see [docs](https://docs.example \"Documentation\") here\n",
"",
);
let links = collect_link_annotations(&bytes);
assert_eq!(links.len(), 1);
assert_eq!(links[0].1.as_deref(), Some("Documentation"));
}
#[test]
fn unknown_block_tag_treats_inner_content_as_code_block() {
let bytes = render("<aside>\n\ncontent\n\n</aside>\n", "");
assert!(pdf_well_formed(&bytes));
assert!(
contains_text(&bytes, "aside") || contains(&bytes, b"aside"),
"unknown block tag was silently dropped"
);
}
#[test]
fn unclosed_anchor_does_not_capture_next_paragraph() {
let md = "\
First: <a href=\"https://example.com/missing\">no close — falls through.
Second: <a name=\"bookmark\">not a link</a> here.
";
let bytes = render(md, "");
assert!(pdf_well_formed(&bytes));
assert_eq!(
link_annotation_count(&bytes),
0,
"neither malformed anchor should produce a link annotation"
);
}
#[test]
fn empty_anchor_body_still_emits_annotation() {
let bytes = render(
"Trailing <a href=\"https://example.com\"></a> link.\n",
"",
);
assert!(pdf_well_formed(&bytes));
assert!(link_annotation_count(&bytes) <= 1);
}