metadata_gen/
metatags.rs

1//! Meta tag generation and extraction module.
2//!
3//! This module provides functionality for generating HTML meta tags from metadata
4//! and extracting meta tags from HTML content.
5
6use crate::error::MetadataError;
7use scraper::{Html, Selector};
8use std::{collections::HashMap, fmt};
9
10/// Holds collections of meta tags for different platforms and categories.
11#[derive(Debug, Default, PartialEq, Eq, Hash, Clone)]
12pub struct MetaTagGroups {
13    /// The `apple` meta tags.
14    pub apple: String,
15    /// The primary meta tags.
16    pub primary: String,
17    /// The `og` meta tags.
18    pub og: String,
19    /// The `ms` meta tags.
20    pub ms: String,
21    /// The `twitter` meta tags.
22    pub twitter: String,
23}
24
25/// Represents a single meta tag
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct MetaTag {
28    /// The name or property of the meta tag
29    pub name: String,
30    /// The content of the meta tag
31    pub content: String,
32}
33
34impl MetaTagGroups {
35    /// Adds a custom meta tag to the appropriate group.
36    ///
37    /// # Arguments
38    ///
39    /// * `name` - The name of the meta tag.
40    /// * `content` - The content of the meta tag.
41    pub fn add_custom_tag(&mut self, name: &str, content: &str) {
42        let formatted_tag = self.format_meta_tag(name, content);
43
44        // Match based on specific prefixes for Apple, MS, OG, Twitter, etc.
45        if name.starts_with("apple-") {
46            // println!("Adding Apple meta tag: {}", formatted_tag);  // Debugging output
47            self.apple.push_str(&formatted_tag);
48        } else if name.starts_with("msapplication-") {
49            // println!("Adding MS meta tag: {}", formatted_tag);  // Debugging output
50            self.ms.push_str(&formatted_tag);
51        } else if name.starts_with("og:") {
52            // println!("Adding OG meta tag: {}", formatted_tag);  // Debugging output
53            self.og.push_str(&formatted_tag);
54        } else if name.starts_with("twitter:") {
55            // println!("Adding Twitter meta tag: {}", formatted_tag);  // Debugging output
56            self.twitter.push_str(&formatted_tag);
57        } else {
58            // println!("Adding Primary meta tag: {}", formatted_tag);  // Debugging output
59            self.primary.push_str(&formatted_tag);
60        }
61    }
62
63    /// Formats a single meta tag.
64    ///
65    /// # Arguments
66    ///
67    /// * `name` - The name of the meta tag.
68    /// * `content` - The content of the meta tag.
69    ///
70    /// # Returns
71    ///
72    /// A formatted meta tag string.
73    pub fn format_meta_tag(&self, name: &str, content: &str) -> String {
74        format!(
75            r#"<meta name="{}" content="{}">"#,
76            name,
77            content.replace('"', "&quot;")
78        )
79    }
80
81    /// Generates meta tags for Apple devices.
82    ///
83    /// # Arguments
84    ///
85    /// * `metadata` - A reference to a HashMap containing the metadata.
86    pub fn generate_apple_meta_tags(
87        &mut self,
88        metadata: &HashMap<String, String>,
89    ) {
90        const APPLE_TAGS: [&str; 3] = [
91            "apple-mobile-web-app-capable",
92            "apple-mobile-web-app-status-bar-style",
93            "apple-mobile-web-app-title",
94        ];
95        self.apple = self.generate_tags(metadata, &APPLE_TAGS);
96    }
97
98    /// Generates primary meta tags like `author`, `description`, and `keywords`.
99    ///
100    /// # Arguments
101    ///
102    /// * `metadata` - A reference to a HashMap containing the metadata.
103    pub fn generate_primary_meta_tags(
104        &mut self,
105        metadata: &HashMap<String, String>,
106    ) {
107        const PRIMARY_TAGS: [&str; 4] =
108            ["author", "description", "keywords", "viewport"];
109        self.primary = self.generate_tags(metadata, &PRIMARY_TAGS);
110    }
111
112    /// Generates Open Graph (`og`) meta tags for social media.
113    ///
114    /// # Arguments
115    ///
116    /// * `metadata` - A reference to a HashMap containing the metadata.
117    pub fn generate_og_meta_tags(
118        &mut self,
119        metadata: &HashMap<String, String>,
120    ) {
121        const OG_TAGS: [&str; 5] = [
122            "og:title",
123            "og:description",
124            "og:image",
125            "og:url",
126            "og:type",
127        ];
128        self.og = self.generate_tags(metadata, &OG_TAGS);
129    }
130
131    /// Generates Microsoft-specific meta tags.
132    ///
133    /// # Arguments
134    ///
135    /// * `metadata` - A reference to a HashMap containing the metadata.
136    pub fn generate_ms_meta_tags(
137        &mut self,
138        metadata: &HashMap<String, String>,
139    ) {
140        const MS_TAGS: [&str; 2] =
141            ["msapplication-TileColor", "msapplication-TileImage"];
142        self.ms = self.generate_tags(metadata, &MS_TAGS);
143    }
144
145    /// Generates Twitter meta tags for embedding rich media in tweets.
146    ///
147    /// # Arguments
148    ///
149    /// * `metadata` - A reference to a HashMap containing the metadata.
150    pub fn generate_twitter_meta_tags(
151        &mut self,
152        metadata: &HashMap<String, String>,
153    ) {
154        const TWITTER_TAGS: [&str; 5] = [
155            "twitter:card",
156            "twitter:site",
157            "twitter:title",
158            "twitter:description",
159            "twitter:image",
160        ];
161        self.twitter = self.generate_tags(metadata, &TWITTER_TAGS);
162    }
163
164    /// Generates meta tags based on the provided list of tag names.
165    ///
166    /// # Arguments
167    ///
168    /// * `metadata` - A reference to a `HashMap` containing the metadata.
169    /// * `tags` - A reference to an array of tag names.
170    ///
171    /// # Returns
172    ///
173    /// A string containing the generated meta tags.
174    pub fn generate_tags(
175        &self,
176        metadata: &HashMap<String, String>,
177        tags: &[&str],
178    ) -> String {
179        tags.iter()
180            .filter_map(|&tag| {
181                metadata
182                    .get(tag)
183                    .map(|value| self.format_meta_tag(tag, value))
184            })
185            .collect::<Vec<_>>()
186            .join("\n")
187    }
188}
189
190/// Implement `Display` for `MetaTagGroups`.
191impl fmt::Display for MetaTagGroups {
192    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193        write!(
194            f,
195            "{}\n{}\n{}\n{}\n{}",
196            self.apple, self.primary, self.og, self.ms, self.twitter
197        )
198    }
199}
200
201/// Generates HTML meta tags based on the provided metadata.
202///
203/// This function takes metadata from a `HashMap` and generates meta tags for various platforms (e.g., Apple, Open Graph, Twitter).
204///
205/// # Arguments
206///
207/// * `metadata` - A reference to a `HashMap` containing the metadata.
208///
209/// # Returns
210///
211/// A `MetaTagGroups` structure with meta tags grouped by platform.
212pub fn generate_metatags(
213    metadata: &HashMap<String, String>,
214) -> MetaTagGroups {
215    let mut meta_tag_groups = MetaTagGroups::default();
216    meta_tag_groups.generate_apple_meta_tags(metadata);
217    meta_tag_groups.generate_primary_meta_tags(metadata);
218    meta_tag_groups.generate_og_meta_tags(metadata);
219    meta_tag_groups.generate_ms_meta_tags(metadata);
220    meta_tag_groups.generate_twitter_meta_tags(metadata);
221    meta_tag_groups
222}
223
224/// Extracts meta tags from HTML content.
225///
226/// This function parses the given HTML content and extracts all meta tags,
227/// including both `name` and `property` attributes.
228///
229/// # Arguments
230///
231/// * `html_content` - A string slice containing the HTML content to parse.
232///
233/// # Returns
234///
235/// Returns a `Result` containing a `Vec<MetaTag>` if successful, or a `MetadataError` if parsing fails.
236///
237/// # Errors
238///
239/// This function will return a `MetadataError` if:
240/// - The HTML content cannot be parsed.
241/// - The meta tag selector cannot be created.
242pub fn extract_meta_tags(
243    html_content: &str,
244) -> Result<Vec<MetaTag>, MetadataError> {
245    let document = Html::parse_document(html_content);
246
247    let meta_selector = Selector::parse("meta").map_err(|e| {
248        MetadataError::ExtractionError {
249            message: format!(
250                "Failed to create meta tag selector: {}",
251                e
252            ),
253        }
254    })?;
255
256    let mut meta_tags = Vec::new();
257
258    for element in document.select(&meta_selector) {
259        let name = element
260            .value()
261            .attr("name")
262            .or_else(|| element.value().attr("property"))
263            .or_else(|| element.value().attr("http-equiv"));
264
265        let content = element.value().attr("content");
266
267        if let (Some(name), Some(content)) = (name, content) {
268            meta_tags.push(MetaTag {
269                name: name.to_string(),
270                content: content.to_string(),
271            });
272        }
273    }
274
275    Ok(meta_tags)
276}
277
278/// Converts a vector of MetaTags into a HashMap for easier access.
279///
280/// # Arguments
281///
282/// * `meta_tags` - A vector of MetaTag structs.
283///
284/// # Returns
285///
286/// A HashMap where the keys are the meta tag names and the values are the contents.
287pub fn meta_tags_to_hashmap(
288    meta_tags: Vec<MetaTag>,
289) -> HashMap<String, String> {
290    meta_tags
291        .into_iter()
292        .map(|tag| (tag.name, tag.content))
293        .collect()
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn test_generate_metatags() {
302        let mut metadata = HashMap::new();
303        metadata.insert("title".to_string(), "Test Page".to_string());
304        metadata.insert(
305            "description".to_string(),
306            "A test page".to_string(),
307        );
308        metadata
309            .insert("og:title".to_string(), "OG Test Page".to_string());
310
311        let meta_tags = generate_metatags(&metadata);
312
313        assert!(meta_tags.primary.contains("description"));
314        assert!(meta_tags.og.contains("og:title"));
315    }
316
317    #[test]
318    fn test_extract_meta_tags() {
319        let html = r#"
320        <html>
321          <head>
322            <meta name="description" content="A sample page">
323            <meta property="og:title" content="Sample Title">
324            <meta http-equiv="content-type" content="text/html; charset=UTF-8">
325          </head>
326          <body>
327            <p>Some content</p>
328          </body>
329        </html>
330        "#;
331
332        let meta_tags = extract_meta_tags(html).unwrap();
333        assert_eq!(meta_tags.len(), 3);
334        assert!(meta_tags.iter().any(|tag| tag.name == "description"
335            && tag.content == "A sample page"));
336        assert!(meta_tags.iter().any(|tag| tag.name == "og:title"
337            && tag.content == "Sample Title"));
338        assert!(meta_tags.iter().any(|tag| tag.name == "content-type"
339            && tag.content == "text/html; charset=UTF-8"));
340    }
341
342    #[test]
343    fn test_extract_meta_tags_empty_html() {
344        let html = "<html><head></head><body></body></html>";
345        let meta_tags = extract_meta_tags(html).unwrap();
346        assert_eq!(meta_tags.len(), 0);
347    }
348
349    #[test]
350    fn test_meta_tags_to_hashmap() {
351        let meta_tags = vec![
352            MetaTag {
353                name: "description".to_string(),
354                content: "A sample page".to_string(),
355            },
356            MetaTag {
357                name: "og:title".to_string(),
358                content: "Sample Title".to_string(),
359            },
360        ];
361
362        let hashmap = meta_tags_to_hashmap(meta_tags);
363        assert_eq!(hashmap.len(), 2);
364        assert_eq!(
365            hashmap.get("description"),
366            Some(&"A sample page".to_string())
367        );
368        assert_eq!(
369            hashmap.get("og:title"),
370            Some(&"Sample Title".to_string())
371        );
372    }
373
374    #[test]
375    fn test_meta_tag_groups_display() {
376        let groups = MetaTagGroups {
377    apple: "<meta name=\"apple-mobile-web-app-capable\" content=\"yes\">".to_string(),
378    primary: "<meta name=\"description\" content=\"A test page\">".to_string(),
379    og: "<meta property=\"og:title\" content=\"Test Page\">".to_string(),
380    ms: "<meta name=\"msapplication-TileColor\" content=\"#ffffff\">".to_string(),
381    twitter: "<meta name=\"twitter:card\" content=\"summary\">".to_string(),
382};
383
384        let display = groups.to_string();
385        assert!(display.contains("apple-mobile-web-app-capable"));
386        assert!(display.contains("description"));
387        assert!(display.contains("og:title"));
388        assert!(display.contains("msapplication-TileColor"));
389        assert!(display.contains("twitter:card"));
390    }
391
392    #[test]
393    fn test_format_meta_tag() {
394        let groups = MetaTagGroups::default();
395        let tag = groups.format_meta_tag("test", "Test \"Value\"");
396        assert_eq!(
397            tag,
398            r#"<meta name="test" content="Test &quot;Value&quot;">"#
399        );
400    }
401}