pub struct ContentExtractor;Expand description
Content extraction functionality
Implementations§
Source§impl ContentExtractor
impl ContentExtractor
Sourcepub async fn extract_main_content(page: &PageHandle) -> Result<ExtractedContent>
pub async fn extract_main_content(page: &PageHandle) -> Result<ExtractedContent>
Extract main content from the page
Sourcepub async fn extract_from_selector(
page: &PageHandle,
selector: &str,
) -> Result<ExtractedContent>
pub async fn extract_from_selector( page: &PageHandle, selector: &str, ) -> Result<ExtractedContent>
Extract content from a specific selector
Sourcepub async fn extract_all_text(page: &PageHandle) -> Result<String>
pub async fn extract_all_text(page: &PageHandle) -> Result<String>
Extract all text from the page body
Sourcepub fn html_to_text(html: &str) -> String
pub fn html_to_text(html: &str) -> String
Convert HTML to plain text
Sourcepub fn decode_html_entities(text: &str) -> String
pub fn decode_html_entities(text: &str) -> String
Decode common HTML entities
Sourcepub fn html_to_markdown(html: &str) -> String
pub fn html_to_markdown(html: &str) -> String
Convert HTML to markdown
Sourcepub fn normalize_whitespace(text: &str) -> String
pub fn normalize_whitespace(text: &str) -> String
Normalize whitespace in text
Auto Trait Implementations§
impl Freeze for ContentExtractor
impl RefUnwindSafe for ContentExtractor
impl Send for ContentExtractor
impl Sync for ContentExtractor
impl Unpin for ContentExtractor
impl UnwindSafe for ContentExtractor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more