pub struct HtmlParser { /* private fields */ }Expand description
Main HTML parser
§Example
use halldyll_parser::HtmlParser;
let html = r#"
<html>
<head><title>Test</title></head>
<body><p>Hello world</p></body>
</html>
"#;
let parser = HtmlParser::new();
let result = parser.parse(html).unwrap();
println!("Title: {:?}", result.metadata.title);Implementations§
Source§impl HtmlParser
impl HtmlParser
Sourcepub fn with_config(config: ParserConfig) -> Self
pub fn with_config(config: ParserConfig) -> Self
Create a parser with custom configuration
Sourcepub fn with_base_url(url: &str) -> ParserResult<Self>
pub fn with_base_url(url: &str) -> ParserResult<Self>
Create a parser with a base URL
Sourcepub fn set_base_url(&mut self, url: &str) -> ParserResult<()>
pub fn set_base_url(&mut self, url: &str) -> ParserResult<()>
Set the base URL for resolving relative URLs
Sourcepub fn config(&self) -> &ParserConfig
pub fn config(&self) -> &ParserConfig
Get the current configuration
Sourcepub fn config_mut(&mut self) -> &mut ParserConfig
pub fn config_mut(&mut self) -> &mut ParserConfig
Get mutable configuration
Sourcepub fn parse(&self, html: &str) -> ParserResult<ParsedContent>
pub fn parse(&self, html: &str) -> ParserResult<ParsedContent>
Parse HTML and extract all content
Sourcepub fn parse_fragment(&self, html: &str) -> ParserResult<ParsedContent>
pub fn parse_fragment(&self, html: &str) -> ParserResult<ParsedContent>
Parse HTML fragment (not a full document)
Sourcepub fn extract_metadata(&self, html: &str) -> ParserResult<PageMetadata>
pub fn extract_metadata(&self, html: &str) -> ParserResult<PageMetadata>
Extract only metadata
Sourcepub fn extract_text(&self, html: &str) -> ParserResult<TextContent>
pub fn extract_text(&self, html: &str) -> ParserResult<TextContent>
Extract only text content
Sourcepub fn extract_headings(&self, html: &str) -> ParserResult<Vec<Heading>>
pub fn extract_headings(&self, html: &str) -> ParserResult<Vec<Heading>>
Extract only headings
Sourcepub fn extract_links(&self, html: &str) -> ParserResult<Vec<Link>>
pub fn extract_links(&self, html: &str) -> ParserResult<Vec<Link>>
Extract only links
Sourcepub fn extract_images(&self, html: &str) -> ParserResult<Vec<Image>>
pub fn extract_images(&self, html: &str) -> ParserResult<Vec<Image>>
Extract only images
Sourcepub fn extract_lists(&self, html: &str) -> ParserResult<Vec<ListContent>>
pub fn extract_lists(&self, html: &str) -> ParserResult<Vec<ListContent>>
Extract only lists
Sourcepub fn extract_tables(&self, html: &str) -> ParserResult<Vec<TableContent>>
pub fn extract_tables(&self, html: &str) -> ParserResult<Vec<TableContent>>
Extract only tables
Sourcepub fn extract_code_blocks(&self, html: &str) -> ParserResult<Vec<CodeBlock>>
pub fn extract_code_blocks(&self, html: &str) -> ParserResult<Vec<CodeBlock>>
Extract only code blocks
Sourcepub fn extract_quotes(&self, html: &str) -> ParserResult<Vec<Quote>>
pub fn extract_quotes(&self, html: &str) -> ParserResult<Vec<Quote>>
Extract only quotes
Sourcepub fn extract_structured_data(&self, html: &str) -> Vec<StructuredData>
pub fn extract_structured_data(&self, html: &str) -> Vec<StructuredData>
Extract only structured data
Sourcepub fn resolve_url(&self, href: &str) -> Option<String>
pub fn resolve_url(&self, href: &str) -> Option<String>
Resolve a relative URL to absolute using the parser’s base URL
Sourcepub fn has_base_url(&self) -> bool
pub fn has_base_url(&self) -> bool
Check if the parser has a base URL configured
Trait Implementations§
Source§impl Clone for HtmlParser
impl Clone for HtmlParser
Source§fn clone(&self) -> HtmlParser
fn clone(&self) -> HtmlParser
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for HtmlParser
impl Debug for HtmlParser
Auto Trait Implementations§
impl Freeze for HtmlParser
impl RefUnwindSafe for HtmlParser
impl Send for HtmlParser
impl Sync for HtmlParser
impl Unpin for HtmlParser
impl UnwindSafe for HtmlParser
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more