use libxml::parser::Parser;
use libxml::tree::{Document, Node};
#[test]
fn dup_node_into_new_doc_basic() {
let parser = Parser::default();
let src = parser
.parse_string(
"<root xmlns=\"http://example.com/ns\"><a id=\"x\"><b/></a><c/></root>",
)
.expect("parse src");
let root = src.get_root_element().expect("src root");
let a = root.get_first_child().expect("first child");
let sub = Document::dup_node_into_new_doc(&a).expect("dup");
let sub_root = sub.get_root_element().expect("sub root");
assert_eq!(sub_root.get_name(), "a");
assert!(sub_root.get_first_child().is_some());
drop(src);
let serialized = sub.to_string();
assert!(serialized.contains("<a"));
assert!(serialized.contains("<b"));
}
#[test]
fn dup_node_into_new_doc_multi_siblings() {
let parser = Parser::default();
let src = parser
.parse_string(
"<root xmlns=\"http://example.com/ns\">\
<s id=\"s1\"><t>one</t></s>\
<s id=\"s2\"><t>two</t></s>\
<s id=\"s3\"><t>three</t></s>\
</root>",
)
.expect("parse src");
let root = src.get_root_element().expect("src root");
let mut child = root.get_first_child();
let mut subdocs = Vec::new();
let mut count = 0;
while let Some(n) = child {
if n.get_name() == "s" {
let sub = Document::dup_node_into_new_doc(&n)
.expect("dup_node_into_new_doc must succeed for every sibling");
assert_eq!(
sub.get_root_element().unwrap().get_name(),
"s",
"sub-document #{count} should have <s> as root"
);
subdocs.push(sub);
count += 1;
}
child = n.get_next_sibling();
}
assert_eq!(count, 3, "all three siblings extracted");
drop(src);
for (i, s) in subdocs.iter().enumerate() {
let xml = s.to_string();
assert!(xml.contains("<s"), "subdoc {i} has <s>");
assert!(xml.contains("<t"), "subdoc {i} has <t>");
}
}
#[test]
fn dup_node_into_new_doc_source_dropped_first() {
let sub = {
let parser = Parser::default();
let src = parser
.parse_string(
"<root xmlns=\"http://example.com/ns\" xmlns:x=\"http://example.com/x\">\
<a x:tag=\"hi\"><b>text</b></a></root>",
)
.expect("parse src");
let root = src.get_root_element().unwrap();
let a = root.get_first_child().unwrap();
Document::dup_node_into_new_doc(&a).expect("dup")
};
let s = sub.to_string();
assert!(s.contains("<a"));
assert!(s.contains("<b"));
assert!(s.contains("text"));
}
#[test]
fn dup_node_into_new_doc_after_unlink_chain() {
let parser = Parser::default();
let src = parser
.parse_string(
"<root xmlns=\"http://example.com/ns\">\
<s id=\"s1\"><t>one</t></s>\
<s id=\"s2\"><t>two</t></s>\
<s id=\"s3\"><t>three</t></s>\
</root>",
)
.expect("parse src");
let root = src.get_root_element().expect("src root");
let mut pages: Vec<Node> = Vec::new();
let mut cur = root.get_first_child();
while let Some(n) = cur {
let next = n.get_next_sibling();
if n.get_name() == "s" {
pages.push(n);
}
cur = next;
}
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("dup #{i} failed for unlinked sibling"));
let sub_root = sub.get_root_element().expect("sub root");
assert_eq!(sub_root.get_name(), "s");
subdocs.push(sub);
}
assert_eq!(subdocs.len(), 3);
drop(src);
for s in &subdocs {
let xml = s.to_string();
assert!(
xml.contains("<s ") || xml.contains("<s>") || xml.contains(":s "),
"subdoc must contain element s: {xml}"
);
assert!(
xml.contains("<t>") || xml.contains(":t>"),
"subdoc must contain element t: {xml}"
);
}
}
#[test]
fn dup_node_into_new_doc_after_xpath_and_attr_mutation() {
let parser = Parser::default();
let src = parser
.parse_string(
"<root xmlns=\"http://example.com/ns\">\
<s xml:id=\"s1\"><t>one</t></s>\
<s xml:id=\"s2\"><t>two</t></s>\
<s xml:id=\"s3\"><t>three</t></s>\
</root>",
)
.expect("parse src");
let root = src.get_root_element().unwrap();
let mut pages: Vec<Node> = Vec::new();
let mut cur = root.get_first_child();
while let Some(n) = cur {
let next = n.get_next_sibling();
if n.get_name() == "s" {
pages.push(n);
}
cur = next;
}
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
eprintln!("[iter {i}] start");
let mut p_mut = p.clone();
p_mut.set_attribute("inlist", "toc").ok();
eprintln!("[iter {i}] post set_attribute");
let xpath_hits = p.findnodes("descendant-or-self::*[@*]").unwrap_or_default();
eprintln!("[iter {i}] post findnodes (hits={})", xpath_hits.len());
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("dup #{i} failed after xpath/attr mutation"));
eprintln!("[iter {i}] post dup");
subdocs.push(sub);
}
assert_eq!(subdocs.len(), 3);
drop(pages);
drop(src);
for s in &subdocs {
let _ = s.to_string();
}
}
#[test]
fn dup_node_into_new_doc_many_ns_repeated() {
let parser = Parser::default();
let src_xml = "<root xmlns=\"http://example.com/ns\" \
xmlns:a=\"http://example.com/a\" \
xmlns:b=\"http://example.com/b\" \
xmlns:c=\"http://example.com/c\">\
<s id=\"s1\"><t a:k=\"v\"><u b:k=\"v\"/></t></s>\
<s id=\"s2\"><t a:k=\"v\"><u c:k=\"v\"/></t></s>\
<s id=\"s3\"><t b:k=\"v\"><u a:k=\"v\"/></t></s>\
<s id=\"s4\"><t c:k=\"v\"><u b:k=\"v\"/></t></s>\
<s id=\"s5\"><t a:k=\"v\"><u c:k=\"v\"/></t></s>\
</root>";
let src = parser.parse_string(src_xml).expect("parse src");
let root = src.get_root_element().unwrap();
let mut pages: Vec<Node> = Vec::new();
let mut cur = root.get_first_child();
while let Some(n) = cur {
let next = n.get_next_sibling();
if n.get_name() == "s" {
pages.push(n);
}
cur = next;
}
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("ns-stress dup #{i} failed"));
subdocs.push(sub);
}
assert_eq!(subdocs.len(), 5);
drop(src);
for s in &subdocs {
let _ = s.to_string();
}
}
#[test]
fn dup_node_into_new_doc_large_doc_siblings() {
let parser = Parser::default();
let path = "tests/resources/large_doc.xml";
if std::fs::metadata(path).is_err() {
eprintln!("skipping: {path} not present");
return;
}
let src = parser.parse_file(path).expect("parse large doc");
let root = src.get_root_element().expect("root");
let mut pages: Vec<Node> = root
.findnodes("descendant::*[local-name()='section']")
.unwrap_or_default()
.into_iter()
.filter(|n| {
n.get_parent()
.map(|p| p.get_name() == "chapter")
.unwrap_or(false)
})
.collect();
assert!(pages.len() >= 2, "need at least 2 section siblings to repro");
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
eprintln!("[large_doc] dup #{i} of {}", p.get_name());
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("dup #{i} of section returned NULL"));
subdocs.push(sub);
}
eprintln!("[large_doc] all dups OK, count={}", subdocs.len());
drop(pages);
drop(src);
for s in &subdocs {
let _ = s.to_string();
}
}
#[test]
fn dup_node_into_new_doc_xpath_then_dup_at_scale() {
let mut xml = String::from(
"<root xmlns=\"http://example.com/ns\">",
);
for i in 0..5 {
xml.push_str(&format!("<s xml:id=\"s{i}\">"));
for j in 0..200 {
xml.push_str(&format!(
"<p xml:id=\"s{i}.p{j}\"><e xml:id=\"s{i}.p{j}.e\"/></p>"
));
}
xml.push_str("</s>");
}
xml.push_str("</root>");
let parser = Parser::default();
let src = parser.parse_string(&xml).expect("parse");
let root = src.get_root_element().unwrap();
let mut pages: Vec<Node> = Vec::new();
let mut cur = root.get_first_child();
while let Some(n) = cur {
let next = n.get_next_sibling();
if n.get_name() == "s" {
pages.push(n);
}
cur = next;
}
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
let hits = p
.findnodes("descendant-or-self::*[@*[local-name()='id']]")
.unwrap_or_default();
assert!(hits.len() > 100, "expected many xml:id hits, got {}", hits.len());
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("dup #{i} returned NULL after XPath descent"));
subdocs.push(sub);
}
drop(pages);
drop(src);
for s in &subdocs {
let _ = s.to_string();
}
}
#[test]
fn dup_node_into_new_doc_mixed_xpath_at_scale() {
let mut xml = String::from("<root xmlns=\"http://example.com/ns\">");
for r in 0..5 {
xml.push_str(&format!("<resource src=\"r{r}.css\"/>"));
}
for i in 0..7 {
xml.push_str(&format!("<s xml:id=\"s{i}\">"));
for j in 0..400 {
xml.push_str(&format!(
"<p xml:id=\"s{i}.p{j}\"><e xml:id=\"s{i}.p{j}.e\"/></p>"
));
}
xml.push_str("</s>");
}
xml.push_str("</root>");
let parser = Parser::default();
let src = parser.parse_string(&xml).expect("parse");
let root = src.get_root_element().unwrap();
let mut pages: Vec<Node> = Vec::new();
let mut cur = root.get_first_child();
while let Some(n) = cur {
let next = n.get_next_sibling();
if n.get_name() == "s" {
pages.push(n);
}
cur = next;
}
for p in pages.iter_mut() {
p.unlink_node();
}
let mut subdocs = Vec::new();
for (i, p) in pages.iter().enumerate() {
let id_hits = p
.findnodes("descendant-or-self::*[@*[local-name()='id']]")
.unwrap_or_default();
assert!(id_hits.len() > 100, "iter {i}: too few id hits");
let res_hits = root
.findnodes("descendant::*[local-name()='resource']")
.unwrap_or_default();
assert_eq!(res_hits.len(), 5, "iter {i}: expected 5 resource hits");
let sub = Document::dup_node_into_new_doc(p)
.unwrap_or_else(|_| panic!("dup #{i} returned NULL after mixed XPath"));
subdocs.push(sub);
}
drop(pages);
drop(src);
for s in &subdocs {
let _ = s.to_string();
}
}