webpage_info/
opengraph.rs

1//! OpenGraph metadata extraction
2//!
3//! Parses [OpenGraph](https://ogp.me/) protocol metadata from HTML documents.
4
5use std::collections::HashMap;
6
7use serde::{Deserialize, Serialize};
8
9// Security limit for media collections
10const MAX_MEDIA_ITEMS: usize = 100;
11
12/// OpenGraph metadata for a webpage.
13///
14/// OpenGraph is a protocol for structured data in web pages, originally
15/// developed by Facebook. It allows websites to control how content appears
16/// when shared on social media platforms.
17#[derive(Debug, Clone, Default, Serialize, Deserialize)]
18pub struct Opengraph {
19    /// The type of object (e.g., "website", "article", "video.movie")
20    pub og_type: Option<String>,
21
22    /// The title of the object
23    pub title: Option<String>,
24
25    /// A brief description of the content
26    pub description: Option<String>,
27
28    /// The canonical URL of the object
29    pub url: Option<String>,
30
31    /// The name of the site
32    pub site_name: Option<String>,
33
34    /// The locale of the content (e.g., "en_US")
35    pub locale: Option<String>,
36
37    /// Alternative locales available
38    pub locale_alternates: Vec<String>,
39
40    /// Images associated with the object
41    pub images: Vec<OpengraphMedia>,
42
43    /// Videos associated with the object
44    pub videos: Vec<OpengraphMedia>,
45
46    /// Audio files associated with the object
47    pub audios: Vec<OpengraphMedia>,
48
49    /// Additional properties not covered by standard fields
50    pub properties: HashMap<String, String>,
51}
52
53/// Media object (image, video, or audio) in OpenGraph.
54#[derive(Debug, Clone, Default, Serialize, Deserialize)]
55pub struct OpengraphMedia {
56    /// URL of the media
57    pub url: String,
58
59    /// Secure (HTTPS) URL of the media
60    pub secure_url: Option<String>,
61
62    /// MIME type (e.g., "image/jpeg")
63    pub mime_type: Option<String>,
64
65    /// Width in pixels
66    pub width: Option<u32>,
67
68    /// Height in pixels
69    pub height: Option<u32>,
70
71    /// Alternative text description
72    pub alt: Option<String>,
73
74    /// Additional properties
75    pub properties: HashMap<String, String>,
76}
77
78impl OpengraphMedia {
79    /// Create a new media object with the given URL.
80    pub fn new(url: impl Into<String>) -> Self {
81        Self {
82            url: url.into(),
83            ..Default::default()
84        }
85    }
86}
87
88impl Opengraph {
89    /// Create an empty OpenGraph structure.
90    pub fn new() -> Self {
91        Self::default()
92    }
93
94    /// Extend the OpenGraph data with a property and its content.
95    ///
96    /// Property names should be without the "og:" prefix (e.g., "title" not "og:title").
97    pub fn extend(&mut self, property: &str, content: String) {
98        match property {
99            "type" => self.og_type = Some(content),
100            "title" => self.title = Some(content),
101            "description" => self.description = Some(content),
102            "url" => self.url = Some(content),
103            "site_name" => self.site_name = Some(content),
104            "locale" => self.locale = Some(content),
105            "locale:alternate" => self.locale_alternates.push(content),
106            _ if property.starts_with("image") => {
107                Self::extend_media("image", property, content, &mut self.images);
108            }
109            _ if property.starts_with("video") => {
110                Self::extend_media("video", property, content, &mut self.videos);
111            }
112            _ if property.starts_with("audio") => {
113                Self::extend_media("audio", property, content, &mut self.audios);
114            }
115            _ => {
116                self.properties.insert(property.to_string(), content);
117            }
118        }
119    }
120
121    /// Parse media properties (image, video, audio).
122    fn extend_media(
123        media_type: &str,
124        property: &str,
125        content: String,
126        collection: &mut Vec<OpengraphMedia>,
127    ) {
128        // "image" or "image:url" starts a new image
129        if property == media_type || property.strip_prefix(media_type) == Some(":url") {
130            // Enforce limit to prevent resource exhaustion
131            if collection.len() < MAX_MEDIA_ITEMS {
132                collection.push(OpengraphMedia::new(content));
133            }
134            return;
135        }
136
137        // Other properties modify the last media item
138        if let Some(media) = collection.last_mut() {
139            // Avoid allocation: check prefix without format!()
140            let prefix_len = media_type.len() + 1; // "image:" length
141            let suffix = if property.len() > prefix_len
142                && property.starts_with(media_type)
143                && property.as_bytes().get(media_type.len()) == Some(&b':')
144            {
145                &property[prefix_len..]
146            } else {
147                ""
148            };
149
150            match suffix {
151                "secure_url" => media.secure_url = Some(content),
152                "type" => media.mime_type = Some(content),
153                "width" => media.width = content.parse().ok(),
154                "height" => media.height = content.parse().ok(),
155                "alt" => media.alt = Some(content),
156                "" => {}
157                _ => {
158                    media.properties.insert(suffix.to_string(), content);
159                }
160            }
161        }
162    }
163
164    /// Check if the OpenGraph data is empty (no meaningful content).
165    pub fn is_empty(&self) -> bool {
166        self.og_type.is_none()
167            && self.title.is_none()
168            && self.description.is_none()
169            && self.url.is_none()
170            && self.images.is_empty()
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_basic_properties() {
180        let mut og = Opengraph::new();
181        og.extend("type", "article".to_string());
182        og.extend("title", "Test Article".to_string());
183        og.extend("description", "A test description".to_string());
184
185        assert_eq!(og.og_type, Some("article".to_string()));
186        assert_eq!(og.title, Some("Test Article".to_string()));
187        assert_eq!(og.description, Some("A test description".to_string()));
188    }
189
190    #[test]
191    fn test_image_with_properties() {
192        let mut og = Opengraph::new();
193        og.extend("image", "http://example.org/image.png".to_string());
194        og.extend(
195            "image:secure_url",
196            "https://example.org/image.png".to_string(),
197        );
198        og.extend("image:width", "800".to_string());
199        og.extend("image:height", "600".to_string());
200        og.extend("image:alt", "Example image".to_string());
201
202        assert_eq!(og.images.len(), 1);
203        let image = &og.images[0];
204        assert_eq!(image.url, "http://example.org/image.png");
205        assert_eq!(
206            image.secure_url,
207            Some("https://example.org/image.png".to_string())
208        );
209        assert_eq!(image.width, Some(800));
210        assert_eq!(image.height, Some(600));
211        assert_eq!(image.alt, Some("Example image".to_string()));
212    }
213
214    #[test]
215    fn test_multiple_images() {
216        let mut og = Opengraph::new();
217        og.extend("image", "http://example.org/image1.png".to_string());
218        og.extend("image:width", "100".to_string());
219        og.extend("image", "http://example.org/image2.png".to_string());
220        og.extend("image:width", "200".to_string());
221
222        assert_eq!(og.images.len(), 2);
223        assert_eq!(og.images[0].url, "http://example.org/image1.png");
224        assert_eq!(og.images[0].width, Some(100));
225        assert_eq!(og.images[1].url, "http://example.org/image2.png");
226        assert_eq!(og.images[1].width, Some(200));
227    }
228
229    #[test]
230    fn test_is_empty() {
231        let og = Opengraph::new();
232        assert!(og.is_empty());
233
234        let mut og2 = Opengraph::new();
235        og2.extend("title", "Test".to_string());
236        assert!(!og2.is_empty());
237    }
238}