pub struct ParserConfig {
pub base_url: Option<Url>,
pub max_text_length: usize,
pub extract_images: bool,
pub extract_links: bool,
pub extract_tables: bool,
pub extract_code_blocks: bool,
pub extract_structured_data: bool,
pub compute_readability: bool,
pub min_paragraph_length: usize,
pub content_selectors: Vec<String>,
pub remove_selectors: Vec<String>,
pub preserve_whitespace: bool,
}Expand description
Configuration for the HTML parser
Fields§
§base_url: Option<Url>Base URL for resolving relative URLs
max_text_length: usizeMaximum text length to extract
extract_images: boolWhether to extract images
extract_links: boolWhether to extract links
extract_tables: boolWhether to extract tables
extract_code_blocks: boolWhether to extract code blocks
extract_structured_data: boolWhether to extract structured data
compute_readability: boolWhether to compute readability scores
min_paragraph_length: usizeMinimum paragraph length to include
content_selectors: Vec<String>Content selectors (CSS selectors for main content)
remove_selectors: Vec<String>Selectors for elements to remove (ads, nav, footer, etc.)
preserve_whitespace: boolWhether to preserve whitespace
Implementations§
Source§impl ParserConfig
impl ParserConfig
Sourcepub fn with_base_url(url: impl AsRef<str>) -> Result<Self, ParseError>
pub fn with_base_url(url: impl AsRef<str>) -> Result<Self, ParseError>
Create a new config with base URL
Sourcepub fn add_content_selector(self, selector: impl Into<String>) -> Self
pub fn add_content_selector(self, selector: impl Into<String>) -> Self
Add content selector
Sourcepub fn add_remove_selector(self, selector: impl Into<String>) -> Self
pub fn add_remove_selector(self, selector: impl Into<String>) -> Self
Add remove selector
Trait Implementations§
Source§impl Clone for ParserConfig
impl Clone for ParserConfig
Source§fn clone(&self) -> ParserConfig
fn clone(&self) -> ParserConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for ParserConfig
impl Debug for ParserConfig
Auto Trait Implementations§
impl Freeze for ParserConfig
impl RefUnwindSafe for ParserConfig
impl Send for ParserConfig
impl Sync for ParserConfig
impl Unpin for ParserConfig
impl UnwindSafe for ParserConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more