pub struct Tag<'a> { /* private fields */ }Expand description
A reference to an element in the document.
Tag provides navigation and content extraction methods. It borrows from
the underlying Document, ensuring the tag remains valid while in use.
§Design
Copytrait enables cheap passing without ownership concerns- Lifetime
'atied to Document prevents dangling references NodeIdenables O(1) node access via arena
§Examples
§Accessing Attributes
use scrape_core::Soup;
let soup = Soup::parse("<a href=\"https://example.com\" class=\"link\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
assert_eq!(link.get("href"), Some("https://example.com"));
assert!(link.has_class("link"));
}§Tree Navigation
use scrape_core::Soup;
let soup = Soup::parse("<div><span>Child</span></div>");
if let Ok(Some(span)) = soup.find("span") {
if let Some(parent) = span.parent() {
assert_eq!(parent.name(), Some("div"));
}
}Implementations§
Source§impl<'a> Tag<'a>
impl<'a> Tag<'a>
Sourcepub fn document(&self) -> &'a Document
pub fn document(&self) -> &'a Document
Returns a reference to the document containing this tag.
This method is primarily useful for advanced operations that need direct document access, such as custom serialization or traversal.
Sourcepub fn name(&self) -> Option<&str>
pub fn name(&self) -> Option<&str>
Returns the tag name (e.g., “div”, “span”, “a”).
Returns None if this is not an element node.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div></div>");
if let Ok(Some(div)) = soup.find("div") {
assert_eq!(div.name(), Some("div"));
}Sourcepub fn get(&self, attr: &str) -> Option<&str>
pub fn get(&self, attr: &str) -> Option<&str>
Returns the value of an attribute, if present.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<a href=\"/page\">Link</a>");
if let Ok(Some(link)) = soup.find("a") {
assert_eq!(link.get("href"), Some("/page"));
assert_eq!(link.get("class"), None);
}Sourcepub fn has_attr(&self, attr: &str) -> bool
pub fn has_attr(&self, attr: &str) -> bool
Checks if this element has the specified attribute.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<input disabled type=\"text\">");
if let Ok(Some(input)) = soup.find("input") {
assert!(input.has_attr("disabled"));
assert!(input.has_attr("type"));
assert!(!input.has_attr("value"));
}Sourcepub fn attrs(&self) -> Option<&HashMap<String, String>>
pub fn attrs(&self) -> Option<&HashMap<String, String>>
Returns all attributes on this element.
Returns None if this is not an element node.
Sourcepub fn has_class(&self, class: &str) -> bool
pub fn has_class(&self, class: &str) -> bool
Checks if this element has the specified class.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div class=\"foo bar\"></div>");
if let Ok(Some(div)) = soup.find("div") {
assert!(div.has_class("foo"));
assert!(div.has_class("bar"));
assert!(!div.has_class("baz"));
}Sourcepub fn classes(&self) -> impl Iterator<Item = &str>
pub fn classes(&self) -> impl Iterator<Item = &str>
Returns all classes on this element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div class=\"foo bar baz\"></div>");
if let Ok(Some(div)) = soup.find("div") {
let classes: Vec<_> = div.classes().collect();
assert_eq!(classes, vec!["foo", "bar", "baz"]);
}Sourcepub fn text(&self) -> String
pub fn text(&self) -> String
Returns the text content of this element and its descendants.
HTML tags are stripped and only text nodes are included. Text from multiple nodes is concatenated with no separator.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
assert_eq!(div.text(), "Hello World!");
}Sourcepub fn text_into(&self, buf: &mut String)
pub fn text_into(&self, buf: &mut String)
Collects text content into the provided buffer.
This method allows buffer reuse for repeated text extraction, avoiding allocations in performance-critical paths.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div>Hello</div><div>World</div>");
let mut buffer = String::new();
for div in soup.find_all("div").unwrap() {
buffer.clear();
div.text_into(&mut buffer);
println!("{}", buffer);
}Sourcepub fn inner_html(&self) -> String
pub fn inner_html(&self) -> String
Returns the inner HTML of this element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(div)) = soup.find("div") {
assert_eq!(div.inner_html(), "<span>Hello</span>");
}Sourcepub fn outer_html(&self) -> String
pub fn outer_html(&self) -> String
Returns the outer HTML of this element (including the tag itself).
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span>Hello</span></div>");
if let Ok(Some(span)) = soup.find("span") {
assert_eq!(span.outer_html(), "<span>Hello</span>");
}Sourcepub fn parent(&self) -> Option<Tag<'a>>
pub fn parent(&self) -> Option<Tag<'a>>
Returns the parent element, if any.
Returns None for the root element or if the parent is not an element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span>text</span></div>");
if let Ok(Some(span)) = soup.find("span") {
let parent = span.parent().unwrap();
assert_eq!(parent.name(), Some("div"));
}Sourcepub fn children(&self) -> impl Iterator<Item = Tag<'a>>
pub fn children(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over direct child elements.
Only element nodes are included (text and comments are skipped).
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
let children: Vec<_> = ul.children().collect();
assert_eq!(children.len(), 3);
}Sourcepub fn next_sibling(&self) -> Option<Tag<'a>>
pub fn next_sibling(&self) -> Option<Tag<'a>>
Returns the next sibling element.
Skips text and comment nodes.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li id=\"a\">A</li><li id=\"b\">B</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
if let Some(next) = first.next_sibling() {
assert_eq!(next.get("id"), Some("b"));
}
}Sourcepub fn prev_sibling(&self) -> Option<Tag<'a>>
pub fn prev_sibling(&self) -> Option<Tag<'a>>
Returns the previous sibling element.
Skips text and comment nodes.
Sourcepub fn descendants(&self) -> impl Iterator<Item = Tag<'a>>
pub fn descendants(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over all descendant elements.
Only element nodes are included (text and comments are skipped).
Sourcepub fn parents(&self) -> impl Iterator<Item = Tag<'a>>
pub fn parents(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over all ancestor elements.
Iterates from parent toward root (does not include the element itself). Only element nodes are included (text and comments are skipped).
§Complexity
- Time:
O(depth)- iterates from node to root - Space:
O(1)- lazy evaluation
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><body><div><span>text</span></div></body></html>");
if let Ok(Some(span)) = soup.find("span") {
let names: Vec<_> = span.parents().filter_map(|t| t.name().map(String::from)).collect();
assert_eq!(names, vec!["div", "body", "html"]);
}Sourcepub fn ancestors(&self) -> impl Iterator<Item = Tag<'a>>
pub fn ancestors(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over all ancestor elements.
Alias for parents.
Sourcepub fn closest(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>
pub fn closest(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>
Finds the nearest ancestor matching the CSS selector.
Iterates from parent toward root, returning the first match.
Returns Ok(None) if no ancestor matches. Does not match the element itself.
§Complexity
- Time:
O(depth × selector_complexity)- tests each ancestor against selector - Space:
O(1)- no allocation
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div class='outer'><div class='inner'><span>text</span></div></div>");
if let Ok(Some(span)) = soup.find("span") {
let inner = span.closest("div.inner").unwrap();
assert!(inner.is_some());
assert!(inner.unwrap().has_class("inner"));
let outer = span.closest("div.outer").unwrap();
assert!(outer.is_some());
assert!(outer.unwrap().has_class("outer"));
let none = span.closest("section").unwrap();
assert!(none.is_none());
}Sourcepub fn next_siblings(&self) -> impl Iterator<Item = Tag<'a>>
pub fn next_siblings(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over following sibling elements.
Does not include the element itself. Only element nodes are included.
§Complexity
- Time:
O(width)- iterates through siblings until end - Space:
O(1)- lazy evaluation
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(first)) = soup.find("li") {
let ids: Vec<_> =
first.next_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
assert_eq!(ids, vec!["b", "c"]);
}Sourcepub fn prev_siblings(&self) -> impl Iterator<Item = Tag<'a>>
pub fn prev_siblings(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over preceding sibling elements.
Does not include the element itself. Only element nodes are included. Iterates in reverse order (from immediate predecessor toward first sibling).
§Complexity
- Time:
O(width)- iterates through siblings until start - Space:
O(1)- lazy evaluation
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(last)) = soup.find("li#c") {
let ids: Vec<_> =
last.prev_siblings().filter_map(|t| t.get("id").map(String::from)).collect();
assert_eq!(ids, vec!["b", "a"]); // Reverse order
}Sourcepub fn siblings(&self) -> impl Iterator<Item = Tag<'a>>
pub fn siblings(&self) -> impl Iterator<Item = Tag<'a>>
Returns an iterator over all sibling elements (excluding self).
Iterates in document order (from first sibling to last). Only element nodes are included.
§Complexity
- Time:
O(width)- iterates through all siblings - Space:
O(1)- lazy evaluation
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li id='a'>A</li><li id='b'>B</li><li id='c'>C</li></ul>");
if let Ok(Some(middle)) = soup.find("li#b") {
let ids: Vec<_> = middle.siblings().filter_map(|t| t.get("id").map(String::from)).collect();
assert_eq!(ids, vec!["a", "c"]); // Document order
}Sourcepub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>
pub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'a>>>
Finds the first descendant matching the selector.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
let item = div.find(".item").unwrap();
assert!(item.is_some());
}Sourcepub fn find_all(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>
pub fn find_all(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>
Finds all descendants matching the selector.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
let items = ul.find_all("li").unwrap();
assert_eq!(items.len(), 3);
}Sourcepub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>
pub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'a>>>
Selects descendants using a CSS selector.
Alias for Tag::find_all.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
Sourcepub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'a>>
pub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'a>>
Finds the first descendant using a pre-compiled selector.
§Examples
use scrape_core::{Soup, query::CompiledSelector};
let selector = CompiledSelector::compile(".item").unwrap();
let soup = Soup::parse("<div><ul><li class=\"item\">text</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
let item = div.find_compiled(&selector);
assert!(item.is_some());
}Sourcepub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'a>>
pub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'a>>
Finds all descendants using a pre-compiled selector.
§Examples
use scrape_core::{Soup, query::CompiledSelector};
let selector = CompiledSelector::compile("li").unwrap();
let soup = Soup::parse("<ul><li>A</li><li>B</li><li>C</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
let items = ul.select_compiled(&selector);
assert_eq!(items.len(), 3);
}Sourcepub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>
pub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>
Extracts text content from all descendants matching a CSS selector.
Returns the concatenated text content of each matching element within this element’s subtree.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><ul><li>First</li><li>Second</li></ul></div>");
if let Ok(Some(div)) = soup.find("div") {
let texts = div.select_text("li").unwrap();
assert_eq!(texts, vec!["First", "Second"]);
}Sourcepub fn select_attr(
&self,
selector: &str,
attr: &str,
) -> QueryResult<Vec<Option<String>>>
pub fn select_attr( &self, selector: &str, attr: &str, ) -> QueryResult<Vec<Option<String>>>
Extracts attribute values from all descendants matching a CSS selector.
Returns Some(value) if the attribute exists, None if it doesn’t.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<nav><a href='/1'>1</a><a href='/2'>2</a></nav>");
if let Ok(Some(nav)) = soup.find("nav") {
let hrefs = nav.select_attr("a", "href").unwrap();
assert_eq!(hrefs, vec![Some("/1".to_string()), Some("/2".to_string())]);
}Sourcepub fn text_nodes(&self) -> TextNodesIter<'a> ⓘ
pub fn text_nodes(&self) -> TextNodesIter<'a> ⓘ
Returns an iterator over all text nodes in this subtree.
Only text node content is returned; element tags and comments are skipped. Iterates in depth-first order.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
if let Ok(Some(div)) = soup.find("div") {
let texts: Vec<_> = div.text_nodes().collect();
assert_eq!(texts, vec!["Hello ", "World", "!"]);
}Sourcepub fn children_by_name(
&self,
name: &'a str,
) -> impl Iterator<Item = Tag<'a>> + 'a
pub fn children_by_name( &self, name: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a
Returns an iterator over child elements with the given tag name.
Only direct children are included (not descendants). Tag name matching is case-insensitive.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li>A</li><span>X</span><li>B</li></ul>");
if let Ok(Some(ul)) = soup.find("ul") {
let lis: Vec<_> = ul.children_by_name("li").collect();
assert_eq!(lis.len(), 2);
}Sourcepub fn children_by_class(
&self,
class: &'a str,
) -> impl Iterator<Item = Tag<'a>> + 'a
pub fn children_by_class( &self, class: &'a str, ) -> impl Iterator<Item = Tag<'a>> + 'a
Returns an iterator over child elements with the given class.
Only direct children are included (not descendants). Elements are matched if they have the class in their class attribute.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span class=\"a\">A</span><span class=\"b\">B</span></div>");
if let Ok(Some(div)) = soup.find("div") {
let results: Vec<_> = div.children_by_class("a").collect();
assert_eq!(results.len(), 1);
}