Skip to main content

Tag

Struct Tag 

Source
pub struct Tag<'a> { /* private fields */ }
Expand description

A reference to an element in the document.

Tag provides navigation and content extraction methods. It borrows from the underlying Document, ensuring the tag remains valid while in use.

§Design

  • Copy trait enables cheap passing without ownership concerns
  • Lifetime 'a tied to Document prevents dangling references
  • NodeId enables O(1) node access via arena

§Examples

§Accessing Attributes

use scrape_core::Soup;

let soup = Soup::parse("<a href=\"https://example.com\" class=\"link\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
    assert_eq!(link.get("href"), Some("https://example.com"));
    assert!(link.has_class("link"));
}

§Tree Navigation

use scrape_core::Soup;

let soup = Soup::parse("<div><span>Child</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    if let Some(parent) = span.parent() {
        assert_eq!(parent.name(), Some("div"));
    }
}

Implementations§

Source§

impl<'a> Tag<'a>

Source

pub fn node_id(&self) -> NodeId

Returns the node ID.

Source

pub fn document(&self) -> &'a Document

Returns a reference to the document containing this tag.

This method is primarily useful for advanced operations that need direct document access, such as custom serialization or traversal.

Source

pub fn name(&self) -> Option<&str>

Returns the tag name (e.g., “div”, “span”, “a”).

Returns None if this is not an element node.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.name(), Some("div"));
}
Source

pub fn get(&self, attr: &str) -> Option<&str>

Returns the value of an attribute, if present.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<a href=\"/page\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
    assert_eq!(link.get("href"), Some("/page"));
    assert_eq!(link.get("class"), None);
}
Source

pub fn has_attr(&self, attr: &str) -> bool

Checks if this element has the specified attribute.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<input disabled type=\"text\">");
if let Ok(Some(input)) = soup.find("input") {
    assert!(input.has_attr("disabled"));
    assert!(input.has_attr("type"));
    assert!(!input.has_attr("value"));
}
Source

pub fn attrs(&self) -> Option<&HashMap<String, String>>

Returns all attributes on this element.

Returns None if this is not an element node.

Source

pub fn has_class(&self, class: &str) -> bool

Checks if this element has the specified class.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div class=\"foo bar\"></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert!(div.has_class("foo"));
    assert!(div.has_class("bar"));
    assert!(!div.has_class("baz"));
}
Source

pub fn classes(&self) -> impl Iterator<Item = &str>

Returns all classes on this element.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div class=\"foo bar baz\"></div>");
if let Ok(Some(div)) = soup.find("div") {
    let classes: Vec<_> = div.classes().collect();
    assert_eq!(classes, vec!["foo", "bar", "baz"]);
}
Source

pub fn text(&self) -> String

Returns the text content of this element and its descendants.

HTML tags are stripped and only text nodes are included. Text from multiple nodes is concatenated with no separator.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.text(), "Hello World!");
}
Source

pub fn text_into(&self, buf: &mut String)

Collects text content into the provided buffer.

This method allows buffer reuse for repeated text extraction, avoiding allocations in performance-critical paths.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div>Hello</div><div>World</div>");
let mut buffer = String::new();

for div in soup.find_all("div").unwrap() {
    buffer.clear();
    div.text_into(&mut buffer);
    println!("{}", buffer);
}
Source

pub fn inner_html(&self) -> String

Returns the inner HTML of this element.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(div)) = soup.find("div") {
    assert_eq!(div.inner_html(), "<span>Hello</span>");
}
Source

pub fn outer_html(&self) -> String

Returns the outer HTML of this element (including the tag itself).

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    assert_eq!(span.outer_html(), "<span>Hello</span>");
}
Source

pub fn parent(&self) -> Option<Tag<'a>>

Returns the parent element, if any.

Returns None for the root element or if the parent is not an element.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><span>text</span></div>");
if let Ok(Some(span)) = soup.find("span") {
    let parent = span.parent().unwrap();
    assert_eq!(parent.name(), Some("div"));
}
Source

pub fn children(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over direct child elements.

Only element nodes are included (text and comments are skipped).

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let children: Vec<_> = ul.children().collect();
    assert_eq!(children.len(), 3);
}
Source

pub fn next_sibling(&self) -> Option<Tag<'a>>

Returns the next sibling element.

Skips text and comment nodes.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li id=\"a\">A</li><li id=\"b\">B</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
    if let Some(next) = first.next_sibling() {
        assert_eq!(next.get("id"), Some("b"));
    }
}
Source

pub fn prev_sibling(&self) -> Option<Tag<'a>>

Returns the previous sibling element.

Skips text and comment nodes.

Source

pub fn descendants(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all descendant elements.

Only element nodes are included (text and comments are skipped).

Source

pub fn parents(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all ancestor elements.

Iterates from parent toward root (does not include the element itself). Only element nodes are included (text and comments are skipped).

§Complexity
  • Time: O(depth) - iterates from node to root
  • Space: O(1) - lazy evaluation
§Examples
use scrape_core::Soup;

let soup = Soup::parse("<html><body><div><span>text</span></div></body></html>");
if let Ok(Some(span)) = soup.find("span") {
    let names: Vec<_> = span.parents().filter_map(|t| t.name().map(String::from)).collect();
    assert_eq!(names, vec!["div", "body", "html"]);
}
Source

pub fn ancestors(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all ancestor elements.

Alias for parents.

Source

pub fn closest(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>

Finds the nearest ancestor matching the CSS selector.

Iterates from parent toward root, returning the first match. Returns Ok(None) if no ancestor matches. Does not match the element itself.

§Complexity
  • Time: O(depth × selector_complexity) - tests each ancestor against selector
  • Space: O(1) - no allocation
§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div class='outer'><div class='inner'><span>text</span></div></div>");
if let Ok(Some(span)) = soup.find("span") {
    let inner = span.closest("div.inner").unwrap();
    assert!(inner.is_some());
    assert!(inner.unwrap().has_class("inner"));

    let outer = span.closest("div.outer").unwrap();
    assert!(outer.is_some());
    assert!(outer.unwrap().has_class("outer"));

    let none = span.closest("section").unwrap();
    assert!(none.is_none());
}
Source

pub fn next_siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over following sibling elements.

Does not include the element itself. Only element nodes are included.

§Complexity
  • Time: O(width) - iterates through siblings until end
  • Space: O(1) - lazy evaluation
§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
    let ids: Vec<_> =
        first.next_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["b", "c"]);
}
Source

pub fn prev_siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over preceding sibling elements.

Does not include the element itself. Only element nodes are included. Iterates in reverse order (from immediate predecessor toward first sibling).

§Complexity
  • Time: O(width) - iterates through siblings until start
  • Space: O(1) - lazy evaluation
§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(last)) = soup.find("li#c") {
    let ids: Vec<_> =
        last.prev_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["b", "a"]); // Reverse order
}
Source

pub fn siblings(&self) -> impl Iterator<Item = Tag<'a>>

Returns an iterator over all sibling elements (excluding self).

Iterates in document order (from first sibling to last). Only element nodes are included.

§Complexity
  • Time: O(width) - iterates through all siblings
  • Space: O(1) - lazy evaluation
§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(middle)) = soup.find("li#b") {
    let ids: Vec<_> = middle.siblings().filter_map(|t| t.get("id").map(String::from)).collect();
    assert_eq!(ids, vec!["a", "c"]); // Document order
}
Source

pub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>

Finds the first descendant matching the selector.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let item = div.find(".item").unwrap();
    assert!(item.is_some());
}
Source

pub fn find_all(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>

Finds all descendants matching the selector.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let items = ul.find_all("li").unwrap();
    assert_eq!(items.len(), 3);
}
Source

pub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>

Selects descendants using a CSS selector.

Alias for Tag::find_all.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

Source

pub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'a>>

Finds the first descendant using a pre-compiled selector.

§Examples
use scrape_core::{Soup, query::CompiledSelector};

let selector = CompiledSelector::compile(".item").unwrap();
let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let item = div.find_compiled(&selector);
    assert!(item.is_some());
}
Source

pub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'a>>

Finds all descendants using a pre-compiled selector.

§Examples
use scrape_core::{Soup, query::CompiledSelector};

let selector = CompiledSelector::compile("li").unwrap();
let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let items = ul.select_compiled(&selector);
    assert_eq!(items.len(), 3);
}
Source

pub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>

Extracts text content from all descendants matching a CSS selector.

Returns the concatenated text content of each matching element within this element’s subtree.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><ul><li>First</li><li>Second</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
    let texts = div.select_text("li").unwrap();
    assert_eq!(texts, vec!["First", "Second"]);
}
Source

pub fn select_attr( &self, selector: &str, attr: &str, ) -> QueryResult<Vec<Option<String>>>

Extracts attribute values from all descendants matching a CSS selector.

Returns Some(value) if the attribute exists, None if it doesn’t.

§Errors

Returns [QueryError::InvalidSelector] if the selector syntax is invalid.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<nav><a href='/1'>1</a><a href='/2'>2</a></nav>");
if let Ok(Some(nav)) = soup.find("nav") {
    let hrefs = nav.select_attr("a", "href").unwrap();
    assert_eq!(hrefs, vec![Some("/1".to_string()), Some("/2".to_string())]);
}
Source

pub fn text_nodes(&self) -> TextNodesIter<'a>

Returns an iterator over all text nodes in this subtree.

Only text node content is returned; element tags and comments are skipped. Iterates in depth-first order.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
    let texts: Vec<_> = div.text_nodes().collect();
    assert_eq!(texts, vec!["Hello ", "World", "!"]);
}
Source

pub fn children_by_name( &self, name: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a

Returns an iterator over child elements with the given tag name.

Only direct children are included (not descendants). Tag name matching is case-insensitive.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<ul><li>A</li><span>X</span><li>B</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
    let lis: Vec<_> = ul.children_by_name("li").collect();
    assert_eq!(lis.len(), 2);
}
Source

pub fn children_by_class( &self, class: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a

Returns an iterator over child elements with the given class.

Only direct children are included (not descendants). Elements are matched if they have the class in their class attribute.

§Examples
use scrape_core::Soup;

let soup = Soup::parse("<div><span class=\"a\">A</span><span class=\"b\">B</span></div>");
if let Ok(Some(div)) = soup.find("div") {
    let results: Vec<_> = div.children_by_class("a").collect();
    assert_eq!(results.len(), 1);
}

Trait Implementations§

Source§

impl<'a> Clone for Tag<'a>

Source§

fn clone(&self) -> Tag<'a>

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<'a> Debug for Tag<'a>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl HtmlSerializer for Tag<'_>

Source§

fn serialize_html_into(&self, buf: &mut String)

Serializes this node to HTML, appending to the provided buffer.
Source§

fn serialize_inner_into(&self, buf: &mut String)

Serializes children to HTML, appending to the provided buffer.
Source§

fn extract_text_into(&self, buf: &mut String)

Extracts text content, appending to the provided buffer.
Source§

fn serialize_html(&self) -> String

Serializes this node and its subtree to HTML. Read more
Source§

fn serialize_inner(&self) -> String

Serializes only the children of this node to HTML. Read more
Source§

fn extract_text(&self) -> String

Extracts text content from this node and its descendants. Read more
Source§

impl PartialEq for Tag<'_>

Source§

fn eq(&self, other: &Self) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<'a> Copy for Tag<'a>

Source§

impl Eq for Tag<'_>

Auto Trait Implementations§

§

impl<'a> Freeze for Tag<'a>

§

impl<'a> RefUnwindSafe for Tag<'a>

§

impl<'a> Send for Tag<'a>

§

impl<'a> Sync for Tag<'a>

§

impl<'a> Unpin for Tag<'a>

§

impl<'a> UnwindSafe for Tag<'a>

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.