pub struct Soup { /* private fields */ }Expand description
A parsed HTML document.
Soup is the main entry point for parsing and querying HTML documents.
It provides methods for finding elements by CSS selector or tag name.
§Examples
§Basic Parsing
use scrape_core::Soup;
let html = "<html><body><h1>Hello, World!</h1></body></html>";
let soup = Soup::parse(html);
if let Ok(Some(h1)) = soup.find("h1") {
assert_eq!(h1.text(), "Hello, World!");
}§CSS Selectors
use scrape_core::Soup;
let html = r#"
<div class="container">
<span class="item">One</span>
<span class="item">Two</span>
</div>
"#;
let soup = Soup::parse(html);
let items = soup.select("div.container > span.item").unwrap();
assert_eq!(items.len(), 2);Implementations§
Source§impl Soup
impl Soup
Sourcepub fn parse(html: &str) -> Self
pub fn parse(html: &str) -> Self
Parses an HTML string into a Soup document.
This uses the default configuration. For custom configuration,
use Soup::parse_with_config.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><body>Hello</body></html>");Sourcepub fn parse_with_config(html: &str, config: SoupConfig) -> Self
pub fn parse_with_config(html: &str, config: SoupConfig) -> Self
Parses an HTML string with custom configuration.
§Examples
use scrape_core::{Soup, SoupConfig};
let config = SoupConfig::builder().max_depth(128).build();
let soup = Soup::parse_with_config("<html>...</html>", config);Sourcepub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'_>>>
pub fn find(&self, selector: &str) -> QueryResult<Option<Tag<'_>>>
Finds the first element matching the given CSS selector.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div><span class=\"item\">Hello</span></div>");
let span = soup.find("span.item").unwrap().unwrap();
assert_eq!(span.text(), "Hello");Sourcepub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'_>>>
pub fn select(&self, selector: &str) -> QueryResult<Vec<Tag<'_>>>
Selects elements using a CSS selector.
This is an alias for Soup::find_all for users familiar with
the CSS selector API.
§Errors
Returns [QueryError::InvalidSelector] if the selector syntax is invalid.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<div class=\"a\"><span class=\"b\">Text</span></div>");
let results = soup.select("div.a > span.b").unwrap();
assert_eq!(results.len(), 1);Sourcepub fn root(&self) -> Option<Tag<'_>>
pub fn root(&self) -> Option<Tag<'_>>
Returns the root element of the document.
This is typically the <html> element.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><body>text</body></html>");
if let Some(root) = soup.root() {
assert_eq!(root.name(), Some("html"));
}Sourcepub fn title(&self) -> Option<String>
pub fn title(&self) -> Option<String>
Returns the document’s title, if present.
§Examples
use scrape_core::Soup;
let soup = Soup::parse("<html><head><title>My Page</title></head></html>");
assert_eq!(soup.title(), Some("My Page".to_string()));