halldyll_core/parse/
opengraph.rs

1//! OpenGraph - OpenGraph metadata extraction
2
3use scraper::{Html, Selector};
4use std::collections::HashMap;
5
6use crate::types::document::OpenGraph;
7
8/// OpenGraph extractor
9pub struct OpenGraphExtractor;
10
11impl Default for OpenGraphExtractor {
12    fn default() -> Self {
13        Self
14    }
15}
16
17impl OpenGraphExtractor {
18    /// New extractor
19    pub fn new() -> Self {
20        Self
21    }
22
23    /// Extract all OpenGraph metadata
24    pub fn extract(&self, html: &str) -> OpenGraph {
25        let document = Html::parse_document(html);
26        let selector = Selector::parse(r#"meta[property^="og:"]"#).unwrap();
27        
28        let mut og = OpenGraph::default();
29        let mut extra = HashMap::new();
30
31        for meta in document.select(&selector) {
32            let property = match meta.value().attr("property") {
33                Some(p) => p.strip_prefix("og:").unwrap_or(p),
34                None => continue,
35            };
36            let content = match meta.value().attr("content") {
37                Some(c) => c.to_string(),
38                None => continue,
39            };
40
41            match property {
42                "title" => og.title = Some(content),
43                "type" => og.og_type = Some(content),
44                "url" => og.url = Some(content),
45                "description" => og.description = Some(content),
46                "image" => og.image = Some(content),
47                "image:width" => og.image_width = content.parse().ok(),
48                "image:height" => og.image_height = content.parse().ok(),
49                "video" => og.video = Some(content),
50                "video:type" => og.video_type = Some(content),
51                "video:width" => og.video_width = content.parse().ok(),
52                "video:height" => og.video_height = content.parse().ok(),
53                "audio" => og.audio = Some(content),
54                "site_name" => og.site_name = Some(content),
55                "locale" => og.locale = Some(content),
56                _ => {
57                    extra.insert(property.to_string(), content);
58                }
59            }
60        }
61
62        og.extra = extra;
63        og
64    }
65
66    /// Extract Twitter Cards
67    pub fn extract_twitter_cards(&self, html: &str) -> TwitterCard {
68        let document = Html::parse_document(html);
69        let selector = Selector::parse(r#"meta[name^="twitter:"]"#).unwrap();
70        
71        let mut card = TwitterCard::default();
72
73        for meta in document.select(&selector) {
74            let name = match meta.value().attr("name") {
75                Some(n) => n.strip_prefix("twitter:").unwrap_or(n),
76                None => continue,
77            };
78            let content = match meta.value().attr("content") {
79                Some(c) => c.to_string(),
80                None => continue,
81            };
82
83            match name {
84                "card" => card.card = Some(content),
85                "site" => card.site = Some(content),
86                "creator" => card.creator = Some(content),
87                "title" => card.title = Some(content),
88                "description" => card.description = Some(content),
89                "image" => card.image = Some(content),
90                "image:alt" => card.image_alt = Some(content),
91                _ => {}
92            }
93        }
94
95        card
96    }
97}
98
99/// Twitter Card
100#[derive(Debug, Clone, Default)]
101pub struct TwitterCard {
102    /// Card type (summary, summary_large_image, player, app)
103    pub card: Option<String>,
104    /// Site @username
105    pub site: Option<String>,
106    /// Creator @username
107    pub creator: Option<String>,
108    /// Title
109    pub title: Option<String>,
110    /// Description
111    pub description: Option<String>,
112    /// Image URL
113    pub image: Option<String>,
114    /// Image alt text
115    pub image_alt: Option<String>,
116}