halldyll_core/parse/
opengraph.rs1use scraper::{Html, Selector};
4use std::collections::HashMap;
5
6use crate::types::document::OpenGraph;
7
8pub struct OpenGraphExtractor;
10
11impl Default for OpenGraphExtractor {
12 fn default() -> Self {
13 Self
14 }
15}
16
17impl OpenGraphExtractor {
18 pub fn new() -> Self {
20 Self
21 }
22
23 pub fn extract(&self, html: &str) -> OpenGraph {
25 let document = Html::parse_document(html);
26 let selector = Selector::parse(r#"meta[property^="og:"]"#).unwrap();
27
28 let mut og = OpenGraph::default();
29 let mut extra = HashMap::new();
30
31 for meta in document.select(&selector) {
32 let property = match meta.value().attr("property") {
33 Some(p) => p.strip_prefix("og:").unwrap_or(p),
34 None => continue,
35 };
36 let content = match meta.value().attr("content") {
37 Some(c) => c.to_string(),
38 None => continue,
39 };
40
41 match property {
42 "title" => og.title = Some(content),
43 "type" => og.og_type = Some(content),
44 "url" => og.url = Some(content),
45 "description" => og.description = Some(content),
46 "image" => og.image = Some(content),
47 "image:width" => og.image_width = content.parse().ok(),
48 "image:height" => og.image_height = content.parse().ok(),
49 "video" => og.video = Some(content),
50 "video:type" => og.video_type = Some(content),
51 "video:width" => og.video_width = content.parse().ok(),
52 "video:height" => og.video_height = content.parse().ok(),
53 "audio" => og.audio = Some(content),
54 "site_name" => og.site_name = Some(content),
55 "locale" => og.locale = Some(content),
56 _ => {
57 extra.insert(property.to_string(), content);
58 }
59 }
60 }
61
62 og.extra = extra;
63 og
64 }
65
66 pub fn extract_twitter_cards(&self, html: &str) -> TwitterCard {
68 let document = Html::parse_document(html);
69 let selector = Selector::parse(r#"meta[name^="twitter:"]"#).unwrap();
70
71 let mut card = TwitterCard::default();
72
73 for meta in document.select(&selector) {
74 let name = match meta.value().attr("name") {
75 Some(n) => n.strip_prefix("twitter:").unwrap_or(n),
76 None => continue,
77 };
78 let content = match meta.value().attr("content") {
79 Some(c) => c.to_string(),
80 None => continue,
81 };
82
83 match name {
84 "card" => card.card = Some(content),
85 "site" => card.site = Some(content),
86 "creator" => card.creator = Some(content),
87 "title" => card.title = Some(content),
88 "description" => card.description = Some(content),
89 "image" => card.image = Some(content),
90 "image:alt" => card.image_alt = Some(content),
91 _ => {}
92 }
93 }
94
95 card
96 }
97}
98
99#[derive(Debug, Clone, Default)]
101pub struct TwitterCard {
102 pub card: Option<String>,
104 pub site: Option<String>,
106 pub creator: Option<String>,
108 pub title: Option<String>,
110 pub description: Option<String>,
112 pub image: Option<String>,
114 pub image_alt: Option<String>,
116}