use scraper::{Html, Selector};
use std::collections::HashMap;
use crate::types::document::OpenGraph;
pub struct OpenGraphExtractor;
impl Default for OpenGraphExtractor {
fn default() -> Self {
Self
}
}
impl OpenGraphExtractor {
pub fn new() -> Self {
Self
}
pub fn extract(&self, html: &str) -> OpenGraph {
let document = Html::parse_document(html);
let selector = Selector::parse(r#"meta[property^="og:"]"#).unwrap();
let mut og = OpenGraph::default();
let mut extra = HashMap::new();
for meta in document.select(&selector) {
let property = match meta.value().attr("property") {
Some(p) => p.strip_prefix("og:").unwrap_or(p),
None => continue,
};
let content = match meta.value().attr("content") {
Some(c) => c.to_string(),
None => continue,
};
match property {
"title" => og.title = Some(content),
"type" => og.og_type = Some(content),
"url" => og.url = Some(content),
"description" => og.description = Some(content),
"image" => og.image = Some(content),
"image:width" => og.image_width = content.parse().ok(),
"image:height" => og.image_height = content.parse().ok(),
"video" => og.video = Some(content),
"video:type" => og.video_type = Some(content),
"video:width" => og.video_width = content.parse().ok(),
"video:height" => og.video_height = content.parse().ok(),
"audio" => og.audio = Some(content),
"site_name" => og.site_name = Some(content),
"locale" => og.locale = Some(content),
_ => {
extra.insert(property.to_string(), content);
}
}
}
og.extra = extra;
og
}
pub fn extract_twitter_cards(&self, html: &str) -> TwitterCard {
let document = Html::parse_document(html);
let selector = Selector::parse(r#"meta[name^="twitter:"]"#).unwrap();
let mut card = TwitterCard::default();
for meta in document.select(&selector) {
let name = match meta.value().attr("name") {
Some(n) => n.strip_prefix("twitter:").unwrap_or(n),
None => continue,
};
let content = match meta.value().attr("content") {
Some(c) => c.to_string(),
None => continue,
};
match name {
"card" => card.card = Some(content),
"site" => card.site = Some(content),
"creator" => card.creator = Some(content),
"title" => card.title = Some(content),
"description" => card.description = Some(content),
"image" => card.image = Some(content),
"image:alt" => card.image_alt = Some(content),
_ => {}
}
}
card
}
}
#[derive(Debug, Clone, Default)]
pub struct TwitterCard {
pub card: Option<String>,
pub site: Option<String>,
pub creator: Option<String>,
pub title: Option<String>,
pub description: Option<String>,
pub image: Option<String>,
pub image_alt: Option<String>,
}