Struct MetaScraper

Source

pub struct MetaScraper { /* private fields */ }

Expand description

Holds a parsed HTML document and exposes accessors for common page metadata.

Construct one with MetaScraper::new, then call the per-field accessors. The document is parsed once at construction time; accessors are read-only and may be called repeatedly.

Implementations§

Source §

impl MetaScraper

Source

pub fn new(html: &str) -> Self

Parse an HTML document.

Parsing is eager and lenient — malformed HTML will not panic.

Source

pub fn extract_title(&self) -> Option<String>

Returns the trimmed text of the first <title> element.

Returns None if the tag is absent, empty, or whitespace-only.

§Example

let m = MetaScraper::new("<title>Page Title</title>");
assert_eq!(m.extract_title().as_deref(), Some("Page Title"));

Source

pub fn extract_og_title(&self) -> Option<String>

Returns the OpenGraph title from <meta property="og:title">.

Also accepts the non-conformant <meta name="og:title"> variant emitted by some CMSes. An empty content attribute is treated as absent.

§Example

let m = MetaScraper::new(r#"<meta property="og:title" content="Hello" />"#);
assert_eq!(m.extract_og_title().as_deref(), Some("Hello"));

Source

pub fn extract_twitter_title(&self) -> Option<String>

Returns the Twitter Card title from <meta name="twitter:title">.

Also accepts <meta property="twitter:title">, which appears in the wild. An empty content attribute is treated as absent.

§Example

let m = MetaScraper::new(r#"<meta name="twitter:title" content="Hello" />"#);
assert_eq!(m.extract_twitter_title().as_deref(), Some("Hello"));

Source

pub fn title(&self) -> Option<String>

Returns the page title, trying each source in turn and returning the first match:

extract_og_title — og:title
extract_twitter_title — twitter:title
extract_title — <title>

Source

pub fn extract_description(&self) -> Option<String>

Returns the standard description from <meta name="description">.

An empty content attribute is treated as absent.

§Example

let m = MetaScraper::new(r#"<meta name="description" content="A page." />"#);
assert_eq!(m.extract_description().as_deref(), Some("A page."));

Source

pub fn extract_og_description(&self) -> Option<String>

Returns the OpenGraph description from <meta property="og:description">.

Also accepts the non-conformant <meta name="og:description"> variant. An empty content attribute is treated as absent.

§Example

let m = MetaScraper::new(r#"<meta property="og:description" content="A page." />"#);
assert_eq!(m.extract_og_description().as_deref(), Some("A page."));

Source

pub fn extract_twitter_description(&self) -> Option<String>

Returns the Twitter Card description from <meta name="twitter:description">.

Also accepts <meta property="twitter:description">. An empty content attribute is treated as absent.

§Example

let m = MetaScraper::new(r#"<meta name="twitter:description" content="A page." />"#);
assert_eq!(m.extract_twitter_description().as_deref(), Some("A page."));

Source

pub fn description(&self) -> Option<String>

Returns the page description, trying each source in turn and returning the first match:

extract_og_description — og:description
extract_twitter_description — twitter:description
extract_description — <meta name="description">

Source

pub fn favicon(&self) -> Option<String>

Returns the href of the first <link> whose rel contains icon as a whitespace-separated token.

Matches rel="icon", rel="shortcut icon", rel="icon shortcut", and similar forms. Does not match apple-touch-icon (that’s a single different token).

§Example

let m = MetaScraper::new(r#"<link rel="shortcut icon" href="/favicon.ico" />"#);
assert_eq!(m.favicon().as_deref(), Some("/favicon.ico"));

Source

pub fn extract_og_image(&self) -> Option<String>

Returns the first OpenGraph image URL from <meta property="og:image">.

Also accepts the non-conformant <meta name="og:image"> variant. An empty content attribute is treated as absent. For pages that declare multiple images, see extract_og_images.

§Example

let m = MetaScraper::new(r#"<meta property="og:image" content="https://example.com/i.jpg" />"#);
assert_eq!(m.extract_og_image().as_deref(), Some("https://example.com/i.jpg"));

Source

pub fn extract_og_images(&self) -> Vec<String>

Returns every OpenGraph image URL in document order.

Both <meta property="og:image"> and <meta name="og:image"> contribute. Empty content attributes are skipped. Returns an empty Vec if none are declared.

§Example

let m = MetaScraper::new(r#"
    <meta property="og:image" content="https://example.com/a.jpg" />
    <meta property="og:image" content="https://example.com/b.png" />
"#);
assert_eq!(
    m.extract_og_images(),
    vec!["https://example.com/a.jpg", "https://example.com/b.png"],
);

Source

pub fn extract_twitter_image(&self) -> Option<String>

Returns the Twitter Card image URL from <meta name="twitter:image">.

Also accepts <meta property="twitter:image">. An empty content attribute is treated as absent. Related tags such as twitter:image:alt are not returned.

§Example

let m = MetaScraper::new(r#"<meta name="twitter:image" content="https://example.com/i.jpg" />"#);
assert_eq!(m.extract_twitter_image().as_deref(), Some("https://example.com/i.jpg"));

Source

pub fn image(&self) -> Option<String>

Returns the page image URL, trying each source in turn and returning the first match:

extract_og_image — og:image
extract_twitter_image — twitter:image

There is no native HTML element to fall back to, so an absent result simply means neither tag was declared.

Source

pub fn lang(&self) -> Option<String>

Returns the value of the root <html lang="..."> attribute.

§Example

let m = MetaScraper::new(r#"<html lang="en"><head></head></html>"#);
assert_eq!(m.lang().as_deref(), Some("en"));

Auto Trait Implementations§

§

impl UnwindSafe for MetaScraper

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

MetaScraper

Struct MetaScraper Copy item path

Implementations§

impl MetaScraper

pub fn new(html: &str) -> Self

pub fn extract_title(&self) -> Option<String>

§Example

pub fn extract_og_title(&self) -> Option<String>

§Example

pub fn extract_twitter_title(&self) -> Option<String>

§Example

pub fn title(&self) -> Option<String>

pub fn extract_description(&self) -> Option<String>

§Example

pub fn extract_og_description(&self) -> Option<String>

§Example

pub fn extract_twitter_description(&self) -> Option<String>

§Example

pub fn description(&self) -> Option<String>

pub fn favicon(&self) -> Option<String>

§Example

pub fn extract_og_image(&self) -> Option<String>

§Example

pub fn extract_og_images(&self) -> Vec<String>

§Example

pub fn extract_twitter_image(&self) -> Option<String>

§Example

pub fn image(&self) -> Option<String>

pub fn lang(&self) -> Option<String>

§Example

Auto Trait Implementations§

impl Freeze for MetaScraper

impl !RefUnwindSafe for MetaScraper

impl !Send for MetaScraper

impl !Sync for MetaScraper

impl Unpin for MetaScraper

impl UnsafeUnpin for MetaScraper

impl UnwindSafe for MetaScraper

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct MetaScraper

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,