Struct Tag

Source

pub struct Tag<'a> { /* private fields */ }

Expand description

A reference to an element in the document.

Tag provides navigation and content extraction methods. It borrows from the underlying Document, ensuring the tag remains valid while in use.

§Design

Copy trait enables cheap passing without ownership concerns
Lifetime 'a tied to Document prevents dangling references
NodeId enables O(1) node access via arena

§Examples

§Accessing Attributes

use scrape_core::Soup;

let soup = Soup::parse("<a href=\"https://example.com\" class=\"link\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
    assert_eq!(link.get("href"), Some("https://example.com"));
    assert!(link.has_class("link"));
}

use scrape_core::Soup;

let soup = Soup::parse("<div><span>Child</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    if let Some(parent) = span.parent() {
        assert_eq!(parent.name(), Some("div"));
    }
}

Implementations§

Source §

impl<'a> Tag<'a>

Source

pub fn node_id(&self) -> NodeId

Returns the node ID.

Source

pub fn document(&self) -> &'a Document

Returns a reference to the document containing this tag.

This method is primarily useful for advanced operations that need direct document access, such as custom serialization or traversal.

Source

pub fn name(&self) -> Option<&str>

Returns the tag name (e.g., “div”, “span”, “a”).

Returns None if this is not an element node.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.name(), Some("div"));
}

Source

pub fn get(&self, attr: &str) -> Option<&str>

Returns the value of an attribute, if present.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<a href=\"/page\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
    assert_eq!(link.get("href"), Some("/page"));
    assert_eq!(link.get("class"), None);
}

Source

pub fn has_attr(&self, attr: &str) -> bool

Checks if this element has the specified attribute.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<input disabled type=\"text\">");
if let Ok(Some(input)) = soup.find("input") {
    assert!(input.has_attr("disabled"));
    assert!(input.has_attr("type"));
    assert!(!input.has_attr("value"));
}

Source

pub fn attrs(&self) -> Option<&HashMap<String, String>>

Returns all attributes on this element.

Returns None if this is not an element node.

Source

pub fn has_class(&self, class: &str) -> bool

Checks if this element has the specified class.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div class=\"foo bar\"></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert!(div.has_class("foo"));
    assert!(div.has_class("bar"));
    assert!(!div.has_class("baz"));
}

Source

pub fn classes(&self) -> impl Iterator<Item = &str>

Returns all classes on this element.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div class=\"foo bar baz\"></div>");
if let Ok(Some(div)) = soup.find("div") {
    let classes: Vec<_> = div.classes().collect();
    assert_eq!(classes, vec!["foo", "bar", "baz"]);
}

Source

pub fn text(&self) -> String

Returns the text content of this element and its descendants.

HTML tags are stripped and only text nodes are included. Text from multiple nodes is concatenated with no separator.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.text(), "Hello World!");
}

Source

pub fn text_into(&self, buf: &mut String)

Collects text content into the provided buffer.

This method allows buffer reuse for repeated text extraction, avoiding allocations in performance-critical paths.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div>Hello</div><div>World</div>");
let mut buffer = String::new();

for div in soup.find_all("div").unwrap() {
    buffer.clear();
    div.text_into(&mut buffer);
    println!("{}", buffer);
}

Source

pub fn inner_html(&self) -> String

Returns the inner HTML of this element.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.inner_html(), "<span>Hello</span>");
}

Source

pub fn outer_html(&self) -> String

Returns the outer HTML of this element (including the tag itself).

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    assert_eq!(span.outer_html(), "<span>Hello</span>");
}

Source

pub fn parent(&self) -> Option<Tag<'a>>

Returns the parent element, if any.

Returns None for the root element or if the parent is not an element.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><span>text</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    let parent = span.parent().unwrap();
    assert_eq!(parent.name(), Some("div"));
}

Source

pub fn children(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over direct child elements.

Only element nodes are included (text and comments are skipped).

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let children: Vec<_> = ul.children().collect();
    assert_eq!(children.len(), 3);
}

Source

pub fn next_sibling(&self) -> Option<Tag<'a>>

Returns the next sibling element.

Skips text and comment nodes.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li id=\"a\">A</li><li id=\"b\">B</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
    if let Some(next) = first.next_sibling() {
        assert_eq!(next.get("id"), Some("b"));
    }
}

Source

pub fn prev_sibling(&self) -> Option<Tag<'a>>

Returns the previous sibling element.

Skips text and comment nodes.

Source

pub fn descendants(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all descendant elements.

Only element nodes are included (text and comments are skipped).

Source

pub fn parents(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all ancestor elements.

Iterates from parent toward root (does not include the element itself). Only element nodes are included (text and comments are skipped).

§Complexity

Time: O(depth) - iterates from node to root
Space: O(1) - lazy evaluation

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<html><body><div><span>text</span></div></body></html>");
if let Ok(Some(span)) = soup.find("span") {
    let names: Vec<_> = span.parents().filter_map(|t| t.name().map(String::from)).collect();
    assert_eq!(names, vec!["div", "body", "html"]);
}

Source

pub fn ancestors(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all ancestor elements.

Alias for parents.

Source

pub fn closest(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>

Finds the nearest ancestor matching the CSS selector.

Iterates from parent toward root, returning the first match. Returns Ok(None) if no ancestor matches. Does not match the element itself.

§Complexity

Time: O(depth × selector_complexity) - tests each ancestor against selector
Space: O(1) - no allocation

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div class='outer'><div class='inner'><span>text</span></div></div>");
if let Ok(Some(span)) = soup.find("span") {
    let inner = span.closest("div.inner").unwrap();
    assert!(inner.is_some());
    assert!(inner.unwrap().has_class("inner"));

    let outer = span.closest("div.outer").unwrap();
    assert!(outer.is_some());
    assert!(outer.unwrap().has_class("outer"));

    let none = span.closest("section").unwrap();
    assert!(none.is_none());
}

Source

pub fn next_siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over following sibling elements.

Does not include the element itself. Only element nodes are included.

§Complexity

Time: O(width) - iterates through siblings until end
Space: O(1) - lazy evaluation

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
    let ids: Vec<_> =
        first.next_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["b", "c"]);
}

Source

pub fn prev_siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over preceding sibling elements.

Does not include the element itself. Only element nodes are included. Iterates in reverse order (from immediate predecessor toward first sibling).

§Complexity

Time: O(width) - iterates through siblings until start
Space: O(1) - lazy evaluation

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(last)) = soup.find("li#c") {
    let ids: Vec<_> =
        last.prev_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["b", "a"]); // Reverse order
}

Source

pub fn siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all sibling elements (excluding self).

Iterates in document order (from first sibling to last). Only element nodes are included.

§Complexity

Time: O(width) - iterates through all siblings
Space: O(1) - lazy evaluation

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(middle)) = soup.find("li#b") {
    let ids: Vec<_> = middle.siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["a", "c"]); // Document order
}

Source

pub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>

Finds the first descendant matching the selector.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let item = div.find(".item").unwrap();
    assert!(item.is_some());
}

Source

pub fn find_all(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>

Finds all descendants matching the selector.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let items = ul.find_all("li").unwrap();
    assert_eq!(items.len(), 3);
}

Source

pub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>

Selects descendants using a CSS selector.

Alias for Tag::find_all.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

Source

pub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'a>>

Finds the first descendant using a pre-compiled selector.

§Examples

use scrape_core::{Soup, query::CompiledSelector};

let selector = CompiledSelector::compile(".item").unwrap();
let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let item = div.find_compiled(&selector);
    assert!(item.is_some());
}

Source

pub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'a>>

Finds all descendants using a pre-compiled selector.

§Examples

use scrape_core::{Soup, query::CompiledSelector};

let selector = CompiledSelector::compile("li").unwrap();
let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let items = ul.select_compiled(&selector);
    assert_eq!(items.len(), 3);
}

Source

pub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>

Extracts text content from all descendants matching a CSS selector.

Returns the concatenated text content of each matching element within this element’s subtree.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><ul><li>First</li><li>Second</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let texts = div.select_text("li").unwrap();
    assert_eq!(texts, vec!["First", "Second"]);
}

Source

pub fn select_attr( &self, selector: &str, attr: &str, ) -> QueryResult<Vec<Option<String>>>

Extracts attribute values from all descendants matching a CSS selector.

Returns Some(value) if the attribute exists, None if it doesn’t.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<nav><a href='/1'>1</a><a href='/2'>2</a></nav>");
if let Ok(Some(nav)) = soup.find("nav") {
    let hrefs = nav.select_attr("a", "href").unwrap();
    assert_eq!(hrefs, vec![Some("/1".to_string()), Some("/2".to_string())]);
}

Source

pub fn text_nodes(&self) -> TextNodesIter<'a> ⓘ

Returns an iterator over all text nodes in this subtree.

Only text node content is returned; element tags and comments are skipped. Iterates in depth-first order.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
    let texts: Vec<_> = div.text_nodes().collect();
    assert_eq!(texts, vec!["Hello ", "World", "!"]);
}

Source

pub fn children_by_name( &self, name: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a

Returns an iterator over child elements with the given tag name.

Only direct children are included (not descendants). Tag name matching is case-insensitive.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><span>X</span><li>B</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let lis: Vec<_> = ul.children_by_name("li").collect();
    assert_eq!(lis.len(), 2);
}

Source

pub fn children_by_class( &self, class: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a

Returns an iterator over child elements with the given class.

Only direct children are included (not descendants). Elements are matched if they have the class in their class attribute.

§Examples

use scrape_core::Soup;

let soup = Soup::parse("<div><span class=\"a\">A</span><span class=\"b\">B</span></div>");
if let Ok(Some(div)) = soup.find("div") {
    let results: Vec<_> = div.children_by_class("a").collect();
    assert_eq!(results.len(), 1);
}