pub struct MetaScraper { /* private fields */ }Expand description
Holds a parsed HTML document and exposes accessors for common page metadata.
Construct one with MetaScraper::new, then call the per-field
accessors. The document is parsed once at construction time; accessors
are read-only and may be called repeatedly.
Implementations§
Source§impl MetaScraper
impl MetaScraper
Sourcepub fn new(html: &str) -> Self
pub fn new(html: &str) -> Self
Parse an HTML document.
Parsing is eager and lenient — malformed HTML will not panic.
Sourcepub fn extract_title(&self) -> Option<String>
pub fn extract_title(&self) -> Option<String>
Returns the trimmed text of the first <title> element.
Returns None if the tag is absent, empty, or whitespace-only.
§Example
let m = MetaScraper::new("<title>Page Title</title>");
assert_eq!(m.extract_title().as_deref(), Some("Page Title"));Sourcepub fn extract_og_title(&self) -> Option<String>
pub fn extract_og_title(&self) -> Option<String>
Returns the OpenGraph title from <meta property="og:title">.
Also accepts the non-conformant <meta name="og:title"> variant
emitted by some CMSes. An empty content attribute is treated as
absent.
§Example
let m = MetaScraper::new(r#"<meta property="og:title" content="Hello" />"#);
assert_eq!(m.extract_og_title().as_deref(), Some("Hello"));Sourcepub fn extract_twitter_title(&self) -> Option<String>
pub fn extract_twitter_title(&self) -> Option<String>
Returns the Twitter Card title from <meta name="twitter:title">.
Also accepts <meta property="twitter:title">, which appears in
the wild. An empty content attribute is treated as absent.
§Example
let m = MetaScraper::new(r#"<meta name="twitter:title" content="Hello" />"#);
assert_eq!(m.extract_twitter_title().as_deref(), Some("Hello"));Sourcepub fn title(&self) -> Option<String>
pub fn title(&self) -> Option<String>
Returns the page title, trying each source in turn and returning the first match:
extract_og_title—og:titleextract_twitter_title—twitter:titleextract_title—<title>
Sourcepub fn extract_description(&self) -> Option<String>
pub fn extract_description(&self) -> Option<String>
Returns the standard description from <meta name="description">.
An empty content attribute is treated as absent.
§Example
let m = MetaScraper::new(r#"<meta name="description" content="A page." />"#);
assert_eq!(m.extract_description().as_deref(), Some("A page."));Sourcepub fn extract_og_description(&self) -> Option<String>
pub fn extract_og_description(&self) -> Option<String>
Returns the OpenGraph description from
<meta property="og:description">.
Also accepts the non-conformant <meta name="og:description">
variant. An empty content attribute is treated as absent.
§Example
let m = MetaScraper::new(r#"<meta property="og:description" content="A page." />"#);
assert_eq!(m.extract_og_description().as_deref(), Some("A page."));Sourcepub fn extract_twitter_description(&self) -> Option<String>
pub fn extract_twitter_description(&self) -> Option<String>
Returns the Twitter Card description from
<meta name="twitter:description">.
Also accepts <meta property="twitter:description">. An empty
content attribute is treated as absent.
§Example
let m = MetaScraper::new(r#"<meta name="twitter:description" content="A page." />"#);
assert_eq!(m.extract_twitter_description().as_deref(), Some("A page."));Sourcepub fn description(&self) -> Option<String>
pub fn description(&self) -> Option<String>
Returns the page description, trying each source in turn and returning the first match:
extract_og_description—og:descriptionextract_twitter_description—twitter:descriptionextract_description—<meta name="description">
Sourcepub fn favicon(&self) -> Option<String>
pub fn favicon(&self) -> Option<String>
Returns the href of the first <link> whose rel contains
icon as a whitespace-separated token.
Matches rel="icon", rel="shortcut icon", rel="icon shortcut",
and similar forms. Does not match apple-touch-icon (that’s a
single different token).
§Example
let m = MetaScraper::new(r#"<link rel="shortcut icon" href="/favicon.ico" />"#);
assert_eq!(m.favicon().as_deref(), Some("/favicon.ico"));Sourcepub fn extract_og_image(&self) -> Option<String>
pub fn extract_og_image(&self) -> Option<String>
Returns the first OpenGraph image URL from
<meta property="og:image">.
Also accepts the non-conformant <meta name="og:image"> variant.
An empty content attribute is treated as absent. For pages that
declare multiple images, see extract_og_images.
§Example
let m = MetaScraper::new(r#"<meta property="og:image" content="https://example.com/i.jpg" />"#);
assert_eq!(m.extract_og_image().as_deref(), Some("https://example.com/i.jpg"));Sourcepub fn extract_og_images(&self) -> Vec<String>
pub fn extract_og_images(&self) -> Vec<String>
Returns every OpenGraph image URL in document order.
Both <meta property="og:image"> and <meta name="og:image">
contribute. Empty content attributes are skipped. Returns an
empty Vec if none are declared.
§Example
let m = MetaScraper::new(r#"
<meta property="og:image" content="https://example.com/a.jpg" />
<meta property="og:image" content="https://example.com/b.png" />
"#);
assert_eq!(
m.extract_og_images(),
vec!["https://example.com/a.jpg", "https://example.com/b.png"],
);Sourcepub fn extract_twitter_image(&self) -> Option<String>
pub fn extract_twitter_image(&self) -> Option<String>
Returns the Twitter Card image URL from
<meta name="twitter:image">.
Also accepts <meta property="twitter:image">. An empty content
attribute is treated as absent. Related tags such as
twitter:image:alt are not returned.
§Example
let m = MetaScraper::new(r#"<meta name="twitter:image" content="https://example.com/i.jpg" />"#);
assert_eq!(m.extract_twitter_image().as_deref(), Some("https://example.com/i.jpg"));Sourcepub fn image(&self) -> Option<String>
pub fn image(&self) -> Option<String>
Returns the page image URL, trying each source in turn and returning the first match:
extract_og_image—og:imageextract_twitter_image—twitter:image
There is no native HTML element to fall back to, so an absent result simply means neither tag was declared.