pub struct Soup { /* private fields */ }Expand description
A parsed HTML document.
Soup is the main entry point for parsing and querying HTML documents.
It provides methods for finding elements by CSS selector or tag name.
§Examples
§Basic Parsing
use scrape_core::Soup;
let html = "<html><body><h1>Hello, World!</h1></body></html>";
let soup = Soup::parse(html);
if let Ok(Some(h1)) = soup.find("h1") {
assert_eq!(h1.text(), "Hello, World!");
}§CSS Selectors
use scrape_core::Soup;
let html = r#"
<div class="container">
<span class="item">One</span>
<span class="item">Two</span>
</div>
"#;
let soup = Soup::parse(html);
let items = soup.select("div.container > span.item").unwrap();
assert_eq!(items.len(), 2);Implementations§
Source§impl Soup
impl Soup
Sourcepub fn parse(html: &str) -> Self
pub fn parse(html: &str) -> Self
Parses an HTML string into a Soup document.
This uses the default configuration. For custom configuration,
use Soup::parse_with_config.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><body>Hello</body></html>");Sourcepub fn parse_with_config(html: &str, config: SoupConfig) -> Self
pub fn parse_with_config(html: &str, config: SoupConfig) -> Self
Parses an HTML string with custom configuration.
§Examples
use scrape_core::{Soup, SoupConfig};
let config = SoupConfig::builder().max_depth(128).build();
let soup = Soup::parse_with_config("<html>...</html>", config);Sourcepub fn parse_fragment(html: &str) -> Self
pub fn parse_fragment(html: &str) -> Self
Parses an HTML fragment without wrapping in html/body tags.
Unlike Soup::parse, this does not wrap content in <html><body> structure.
The fragment is parsed as if it appeared inside a <body> element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse_fragment("<span>A</span><span>B</span>");
let spans = soup.find_all("span").unwrap();
assert_eq!(spans.len(), 2);Sourcepub fn parse_fragment_with_context(html: &str, context: &str) -> Self
pub fn parse_fragment_with_context(html: &str, context: &str) -> Self
Parses an HTML fragment with a custom context element.
The context element determines parsing behavior:
"body": Standard HTML elements (default)"table": Allows tr/td without explicit tbody"tbody": Allows tr directly
§Examples
use scrape_core::Soup;
let soup = Soup::parse_fragment_with_context("<tr><td>A</td></tr>", "tbody");
let tr = soup.find("tr").unwrap();
assert!(tr.is_some());Sourcepub fn parse_fragment_with_config(
html: &str,
context: &str,
config: SoupConfig,
) -> Self
pub fn parse_fragment_with_config( html: &str, context: &str, config: SoupConfig, ) -> Self
Parses an HTML fragment with custom context and configuration.
Sourcepub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'_>>>
pub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'_>>>
Finds the first element matching the given CSS selector.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span class=\"item\">Hello</span></div>");
let span = soup.find("span.item").unwrap().unwrap();
assert_eq!(span.text(), "Hello");Sourcepub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'_>>>
pub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'_>>>
Selects elements using a CSS selector.
This is an alias for Soup::find_all for users familiar with
the CSS selector API.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div class=\"a\"><span class=\"b\">Text</span></div>");
let results = soup.select("div.a > span.b").unwrap();
assert_eq!(results.len(), 1);Sourcepub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'_>>
pub fn find_compiled(&self, selector: &CompiledSelector) -> Option<Tag<'_>>
Finds the first element using a pre-compiled selector.
§Examples
use scrape_core::{Soup, query::CompiledSelector};
let selector = CompiledSelector::compile("div.item").unwrap();
let soup = Soup::parse("<div class=\"item\">Text</div>");
let result = soup.find_compiled(&selector);
assert!(result.is_some());Sourcepub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'_>>
pub fn select_compiled(&self, selector: &CompiledSelector) -> Vec<Tag<'_>>
Finds all elements using a pre-compiled selector.
§Examples
use scrape_core::{Soup, query::CompiledSelector};
let selector = CompiledSelector::compile("li").unwrap();
let soup = Soup::parse("<ul><li>A</li><li>B</li></ul>");
let items = soup.select_compiled(&selector);
assert_eq!(items.len(), 2);Sourcepub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>
pub fn select_text(&self, selector: &str) -> QueryResult<Vec<String>>
Extracts text content from all elements matching a CSS selector.
Returns the concatenated text content of each matching element.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<ul><li>First</li><li>Second</li></ul>");
let texts = soup.select_text("li").unwrap();
assert_eq!(texts, vec!["First", "Second"]);Sourcepub fn select_attr(
&self,
selector: &str,
attr: &str,
) -> QueryResult<Vec<Option<String>>>
pub fn select_attr( &self, selector: &str, attr: &str, ) -> QueryResult<Vec<Option<String>>>
Extracts attribute values from all elements matching a CSS selector.
Returns Some(value) if the attribute exists, None if it doesn’t.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<a href='/a'>A</a><a>B</a>");
let hrefs = soup.select_attr("a", "href").unwrap();
assert_eq!(hrefs, vec![Some("/a".to_string()), None]);Sourcepub fn root(&self) -> Option<Tag<'_>>
pub fn root(&self) -> Option<Tag<'_>>
Returns the root element of the document.
This is typically the <html> element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><body>text</body></html>");
if let Some(root) = soup.root() {
assert_eq!(root.name(), Some("html"));
}Sourcepub fn title(&self) -> Option<String>
pub fn title(&self) -> Option<String>
Returns the document’s title, if present.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><head><title>My Page</title></head></html>");
assert_eq!(soup.title(), Some("My Page".to_string()));