Skip to main content

metadata_gen/
utils.rs

1//! Utility functions for metadata processing and HTML manipulation.
2//!
3//! This module provides various utility functions for tasks such as HTML escaping,
4//! asynchronous file reading, and metadata extraction from files.
5
6use crate::error::MetadataError;
7use crate::extract_and_prepare_metadata;
8use crate::metatags::MetaTagGroups;
9use std::collections::HashMap;
10use tokio::fs::File;
11use tokio::io::AsyncReadExt;
12
13/// Escapes special HTML characters in a string.
14///
15/// This function replaces the following characters with their HTML entity equivalents:
16/// - `&` becomes `&`
17/// - `<` becomes `&lt;`
18/// - `>` becomes `&gt;`
19/// - `"` becomes `&quot;`
20/// - `'` becomes `&#x27;`
21///
22/// # Arguments
23///
24/// * `value` - The string to escape.
25///
26/// # Returns
27///
28/// A new string with special HTML characters escaped.
29///
30/// # Examples
31///
32/// ```
33/// use metadata_gen::utils::escape_html;
34///
35/// let input = "Hello, <world>!";
36/// let expected = "Hello, &lt;world&gt;!";
37///
38/// assert_eq!(escape_html(input), expected);
39/// ```
40///
41/// # Security
42///
43/// This function is designed to prevent XSS (Cross-Site Scripting) attacks by escaping
44/// potentially dangerous characters. However, it should not be relied upon as the sole
45/// method of sanitizing user input for use in HTML contexts.
46pub fn escape_html(value: &str) -> String {
47    value
48        .replace('&', "&amp;")
49        .replace('<', "&lt;")
50        .replace('>', "&gt;")
51        .replace('"', "&quot;")
52        .replace('\'', "&#x27;")
53}
54
55/// Unescapes HTML entities in a string.
56///
57/// This function replaces HTML entities with their corresponding characters:
58/// - `&amp;` becomes `&`
59/// - `&lt;` becomes `<`
60/// - `&gt;` becomes `>`
61/// - `&quot;` becomes `"`
62/// - `&#x27;` and `&#39;` become `'`
63/// - `&#x2F;` and `&#x2f;` become `/`
64///
65/// # Arguments
66///
67/// * `value` - The string to unescape.
68///
69/// # Returns
70///
71/// A new string with HTML entities unescaped.
72///
73/// # Examples
74///
75/// ```
76/// use metadata_gen::utils::unescape_html;
77///
78/// let input = "Hello, &lt;world&gt;!";
79/// let expected = "Hello, <world>!";
80///
81/// assert_eq!(unescape_html(input), expected);
82/// ```
83///
84/// # Security
85///
86/// This function should be used with caution, especially on user-supplied input,
87/// as it can potentially introduce security vulnerabilities if the unescaped content
88/// is then rendered as HTML.
89pub fn unescape_html(value: &str) -> String {
90    value
91        .replace("&amp;", "&")
92        .replace("&lt;", "<")
93        .replace("&gt;", ">")
94        .replace("&quot;", "\"")
95        .replace("&#x27;", "'")
96        .replace("&#39;", "'")
97        .replace("&#x2F;", "/")
98        .replace("&#x2f;", "/")
99}
100
101/// Asynchronously reads a file and extracts metadata from its content.
102///
103/// This function reads the content of a file asynchronously and then extracts
104/// metadata, generates keywords, and prepares meta tag groups.
105///
106/// # Arguments
107///
108/// * `file_path` - A string slice representing the path to the file.
109///
110/// # Returns
111///
112/// Returns a Result containing a tuple with:
113/// * `HashMap<String, String>`: Extracted metadata
114/// * `Vec<String>`: A list of keywords
115/// * `MetaTagGroups`: A structure containing various meta tags
116///
117/// # Errors
118///
119/// This function will return a `MetadataError` if:
120/// - File reading fails (e.g., file not found, permission denied)
121/// - Metadata extraction or processing fails
122///
123/// # Examples
124///
125/// ```no_run
126/// use metadata_gen::utils::async_extract_metadata_from_file;
127///
128/// #[tokio::main]
129/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
130///     let (metadata, keywords, meta_tags) = async_extract_metadata_from_file("path/to/file.md").await?;
131///     println!("Metadata: {:?}", metadata);
132///     println!("Keywords: {:?}", keywords);
133///     println!("Meta tags: {}", meta_tags);
134///     Ok(())
135/// }
136/// ```
137///
138/// # Security
139///
140/// This function reads files from the file system. Ensure that the `file_path`
141/// is properly sanitized and validated to prevent potential security issues like
142/// path traversal attacks.
143pub async fn async_extract_metadata_from_file(
144    file_path: &str,
145) -> Result<
146    (HashMap<String, String>, Vec<String>, MetaTagGroups),
147    MetadataError,
148> {
149    let mut file = File::open(file_path)
150        .await
151        .map_err(MetadataError::IoError)?;
152
153    let mut content = String::new();
154    file.read_to_string(&mut content)
155        .await
156        .map_err(MetadataError::IoError)?;
157
158    if content.trim().is_empty() {
159        // If file is empty, return empty structures
160        return Ok((
161            HashMap::new(),
162            Vec::new(),
163            MetaTagGroups {
164                primary: String::new(),
165                apple: String::new(),
166                ms: String::new(),
167                og: String::new(),
168                twitter: String::new(),
169            },
170        ));
171    }
172
173    extract_and_prepare_metadata(&content)
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use tempfile::tempdir;
180    use tokio::fs::File;
181    use tokio::io::AsyncWriteExt;
182
183    #[test]
184    fn test_escape_html() {
185        let input = "Hello, <world> & \"friends\"!";
186        let expected =
187            "Hello, &lt;world&gt; &amp; &quot;friends&quot;!";
188        assert_eq!(escape_html(input), expected);
189    }
190
191    #[test]
192    fn test_escape_html_special_characters() {
193        let input = "It's <b>bold</b> & it's <i>italic</i>";
194        let expected = "It&#x27;s &lt;b&gt;bold&lt;/b&gt; &amp; it&#x27;s &lt;i&gt;italic&lt;/i&gt;";
195        assert_eq!(escape_html(input), expected);
196    }
197
198    #[test]
199    fn test_unescape_html() {
200        let input = "Hello, &lt;world&gt; &amp; &quot;friends&quot;!";
201        let expected = "Hello, <world> & \"friends\"!";
202        assert_eq!(unescape_html(input), expected);
203    }
204
205    #[test]
206    fn test_unescape_html_edge_cases() {
207        let input = "&lt;&amp;&gt;&quot;&#x27;&#39;&#x2F;";
208        let expected = "<&>\"''/";
209        assert_eq!(unescape_html(input), expected);
210    }
211
212    #[test]
213    fn test_escape_unescape_roundtrip() {
214        let original = "Test <script>alert('XSS');</script> & other \"special\" chars";
215        let escaped = escape_html(original);
216        let unescaped = unescape_html(&escaped);
217        assert_eq!(original, unescaped);
218    }
219
220    #[tokio::test]
221    async fn test_async_extract_metadata_from_file() {
222        // Create a temporary directory and file
223        let temp_dir = tempdir().unwrap();
224        let file_path = temp_dir.path().join("test.md");
225
226        // Write test content to the file
227        let content = r#"---
228title: Test Page
229description: A test page for metadata extraction
230keywords: test, metadata, extraction
231---
232# Test Content
233This is a test file for metadata extraction."#;
234
235        let mut file = File::create(&file_path).await.unwrap();
236        file.write_all(content.as_bytes()).await.unwrap();
237        file.flush().await.unwrap();
238        drop(file);
239
240        // Test the async_extract_metadata_from_file function
241        let result = async_extract_metadata_from_file(
242            file_path.to_str().unwrap(),
243        )
244        .await;
245        assert!(result.is_ok());
246
247        let (metadata, keywords, meta_tags) = result.unwrap();
248        assert_eq!(
249            metadata.get("title"),
250            Some(&"Test Page".to_string())
251        );
252        assert_eq!(
253            metadata.get("description"),
254            Some(&"A test page for metadata extraction".to_string())
255        );
256        assert_eq!(keywords, vec!["test", "metadata", "extraction"]);
257        assert!(!meta_tags.primary.is_empty());
258    }
259
260    #[tokio::test]
261    async fn test_async_extract_metadata_from_empty_file() {
262        let temp_dir = tempdir().unwrap();
263        let file_path = temp_dir.path().join("empty.md");
264
265        // Create an empty file
266        let mut file = File::create(&file_path).await.unwrap();
267        file.write_all(b"").await.unwrap();
268        file.flush().await.unwrap();
269        drop(file);
270
271        let result = async_extract_metadata_from_file(
272            file_path.to_str().unwrap(),
273        )
274        .await;
275
276        // Ensure the result is empty metadata, keywords, and meta tags
277        assert!(result.is_ok());
278        let (metadata, keywords, meta_tags) = result.unwrap();
279        assert!(metadata.is_empty());
280        assert!(keywords.is_empty());
281        assert!(meta_tags.primary.is_empty());
282    }
283
284    #[tokio::test]
285    async fn test_async_extract_metadata_from_nonexistent_file() {
286        let result =
287            async_extract_metadata_from_file("nonexistent_file.md")
288                .await;
289        assert!(result.is_err());
290        assert!(matches!(
291            result.unwrap_err(),
292            MetadataError::IoError(_)
293        ));
294    }
295}