use skyscraper::{html, xpath};
#[test]
fn union_bar_combines_results() {
let text = r#"<html><body><div>a</div><span>b</span><p>c</p></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div | //span").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 2, "should return div and span: {items:?}");
}
#[test]
fn union_keyword_combines_results() {
let text = r#"<html><body><div>a</div><span>b</span><p>c</p></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div union //span").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 2, "should return div and span: {items:?}");
}
#[test]
fn union_deduplicates() {
let text = r#"<html><body><div class="a">x</div><div class="b">y</div></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div | //div").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 2, "should deduplicate: {items:?}");
}
#[test]
fn union_three_sets() {
let text = r#"<html><body><div>a</div><span>b</span><p>c</p></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div | //span | //p").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 3, "should return all three elements: {items:?}");
}
#[test]
fn union_document_order() {
let text = r#"<html><body><span>first</span><div>second</div></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div | //span").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 2);
let first = items[0].extract_as_node().extract_as_element_node();
assert_eq!(first.name, "span", "span should come first in document order");
}
#[test]
fn intersect_filters_common() {
let text =
r#"<html><body><div class="a">x</div><div>y</div><div class="b">z</div></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div intersect //div[@class]").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(
items.len(),
2,
"should return only divs with class attr: {items:?}"
);
}
#[test]
fn except_removes_matching() {
let text =
r#"<html><body><div class="a">x</div><div>y</div><div class="b">z</div></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div except //div[@class]").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(
items.len(),
1,
"should return only the div without class: {items:?}"
);
}
#[test]
fn except_disjoint_returns_lhs() {
let text = r#"<html><body><div>a</div><span>b</span></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div except //span").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 1, "should return div: {items:?}");
}
#[test]
fn intersect_disjoint_returns_empty() {
let text = r#"<html><body><div>a</div><span>b</span></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div intersect //span").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 0, "should return empty: {items:?}");
}
#[test]
fn sequence_preserves_duplicates() {
let text = "<html><body></body></html>";
let document = html::parse(text).unwrap();
let xpath = xpath::parse("count((1, 2, 2, 3))").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(
items[0],
skyscraper::xpath::grammar::data_model::XpathItem::AnyAtomicType(
skyscraper::xpath::grammar::data_model::AnyAtomicType::Integer(4)
),
"Sequences must preserve duplicate values"
);
}
#[test]
fn union_still_deduplicates_nodes() {
let text = r#"<html><body><div>x</div></body></html>"#;
let document = html::parse(text).unwrap();
let xpath = xpath::parse("//div | //div").unwrap();
let items = xpath.apply(&document).unwrap();
assert_eq!(items.len(), 1, "Union of same nodes should deduplicate: {items:?}");
}