metadata_gen/utils.rs
1//! Utility functions for metadata processing and HTML manipulation.
2//!
3//! This module provides various utility functions for tasks such as HTML escaping,
4//! asynchronous file reading, and metadata extraction from files.
5
6use crate::error::MetadataError;
7use crate::extract_and_prepare_metadata;
8use crate::metatags::MetaTagGroups;
9use std::collections::HashMap;
10use tokio::fs::File;
11use tokio::io::AsyncReadExt;
12
13/// Escapes special HTML characters in a string.
14///
15/// This function replaces the following characters with their HTML entity equivalents:
16/// - `&` becomes `&`
17/// - `<` becomes `<`
18/// - `>` becomes `>`
19/// - `"` becomes `"`
20/// - `'` becomes `'`
21///
22/// # Arguments
23///
24/// * `value` - The string to escape.
25///
26/// # Returns
27///
28/// A new string with special HTML characters escaped.
29///
30/// # Examples
31///
32/// ```
33/// use metadata_gen::utils::escape_html;
34///
35/// let input = "Hello, <world>!";
36/// let expected = "Hello, <world>!";
37///
38/// assert_eq!(escape_html(input), expected);
39/// ```
40///
41/// # Security
42///
43/// This function is designed to prevent XSS (Cross-Site Scripting) attacks by escaping
44/// potentially dangerous characters. However, it should not be relied upon as the sole
45/// method of sanitizing user input for use in HTML contexts.
46pub fn escape_html(value: &str) -> String {
47 value
48 .replace('&', "&")
49 .replace('<', "<")
50 .replace('>', ">")
51 .replace('"', """)
52 .replace('\'', "'")
53}
54
55/// Unescapes HTML entities in a string.
56///
57/// This function replaces HTML entities with their corresponding characters:
58/// - `&` becomes `&`
59/// - `<` becomes `<`
60/// - `>` becomes `>`
61/// - `"` becomes `"`
62/// - `'` and `'` become `'`
63/// - `/` and `/` become `/`
64///
65/// # Arguments
66///
67/// * `value` - The string to unescape.
68///
69/// # Returns
70///
71/// A new string with HTML entities unescaped.
72///
73/// # Examples
74///
75/// ```
76/// use metadata_gen::utils::unescape_html;
77///
78/// let input = "Hello, <world>!";
79/// let expected = "Hello, <world>!";
80///
81/// assert_eq!(unescape_html(input), expected);
82/// ```
83///
84/// # Security
85///
86/// This function should be used with caution, especially on user-supplied input,
87/// as it can potentially introduce security vulnerabilities if the unescaped content
88/// is then rendered as HTML.
89pub fn unescape_html(value: &str) -> String {
90 value
91 .replace("&", "&")
92 .replace("<", "<")
93 .replace(">", ">")
94 .replace(""", "\"")
95 .replace("'", "'")
96 .replace("'", "'")
97 .replace("/", "/")
98 .replace("/", "/")
99}
100
101/// Asynchronously reads a file and extracts metadata from its content.
102///
103/// This function reads the content of a file asynchronously and then extracts
104/// metadata, generates keywords, and prepares meta tag groups.
105///
106/// # Arguments
107///
108/// * `file_path` - A string slice representing the path to the file.
109///
110/// # Returns
111///
112/// Returns a Result containing a tuple with:
113/// * `HashMap<String, String>`: Extracted metadata
114/// * `Vec<String>`: A list of keywords
115/// * `MetaTagGroups`: A structure containing various meta tags
116///
117/// # Errors
118///
119/// This function will return a `MetadataError` if:
120/// - File reading fails (e.g., file not found, permission denied)
121/// - Metadata extraction or processing fails
122///
123/// # Examples
124///
125/// ```no_run
126/// use metadata_gen::utils::async_extract_metadata_from_file;
127///
128/// #[tokio::main]
129/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
130/// let (metadata, keywords, meta_tags) = async_extract_metadata_from_file("path/to/file.md").await?;
131/// println!("Metadata: {:?}", metadata);
132/// println!("Keywords: {:?}", keywords);
133/// println!("Meta tags: {}", meta_tags);
134/// Ok(())
135/// }
136/// ```
137///
138/// # Security
139///
140/// This function reads files from the file system. Ensure that the `file_path`
141/// is properly sanitized and validated to prevent potential security issues like
142/// path traversal attacks.
143pub async fn async_extract_metadata_from_file(
144 file_path: &str,
145) -> Result<
146 (HashMap<String, String>, Vec<String>, MetaTagGroups),
147 MetadataError,
148> {
149 let mut file = File::open(file_path)
150 .await
151 .map_err(MetadataError::IoError)?;
152
153 let mut content = String::new();
154 file.read_to_string(&mut content)
155 .await
156 .map_err(MetadataError::IoError)?;
157
158 if content.trim().is_empty() {
159 // If file is empty, return empty structures
160 return Ok((
161 HashMap::new(),
162 Vec::new(),
163 MetaTagGroups {
164 primary: String::new(),
165 apple: String::new(),
166 ms: String::new(),
167 og: String::new(),
168 twitter: String::new(),
169 },
170 ));
171 }
172
173 extract_and_prepare_metadata(&content)
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 use tempfile::tempdir;
180 use tokio::fs::File;
181 use tokio::io::AsyncWriteExt;
182
183 #[test]
184 fn test_escape_html() {
185 let input = "Hello, <world> & \"friends\"!";
186 let expected =
187 "Hello, <world> & "friends"!";
188 assert_eq!(escape_html(input), expected);
189 }
190
191 #[test]
192 fn test_escape_html_special_characters() {
193 let input = "It's <b>bold</b> & it's <i>italic</i>";
194 let expected = "It's <b>bold</b> & it's <i>italic</i>";
195 assert_eq!(escape_html(input), expected);
196 }
197
198 #[test]
199 fn test_unescape_html() {
200 let input = "Hello, <world> & "friends"!";
201 let expected = "Hello, <world> & \"friends\"!";
202 assert_eq!(unescape_html(input), expected);
203 }
204
205 #[test]
206 fn test_unescape_html_edge_cases() {
207 let input = "<&>"''/";
208 let expected = "<&>\"''/";
209 assert_eq!(unescape_html(input), expected);
210 }
211
212 #[test]
213 fn test_escape_unescape_roundtrip() {
214 let original = "Test <script>alert('XSS');</script> & other \"special\" chars";
215 let escaped = escape_html(original);
216 let unescaped = unescape_html(&escaped);
217 assert_eq!(original, unescaped);
218 }
219
220 #[tokio::test]
221 async fn test_async_extract_metadata_from_file() {
222 // Create a temporary directory and file
223 let temp_dir = tempdir().unwrap();
224 let file_path = temp_dir.path().join("test.md");
225
226 // Write test content to the file
227 let content = r#"---
228title: Test Page
229description: A test page for metadata extraction
230keywords: test, metadata, extraction
231---
232# Test Content
233This is a test file for metadata extraction."#;
234
235 let mut file = File::create(&file_path).await.unwrap();
236 file.write_all(content.as_bytes()).await.unwrap();
237 file.flush().await.unwrap();
238 drop(file);
239
240 // Test the async_extract_metadata_from_file function
241 let result = async_extract_metadata_from_file(
242 file_path.to_str().unwrap(),
243 )
244 .await;
245 assert!(result.is_ok());
246
247 let (metadata, keywords, meta_tags) = result.unwrap();
248 assert_eq!(
249 metadata.get("title"),
250 Some(&"Test Page".to_string())
251 );
252 assert_eq!(
253 metadata.get("description"),
254 Some(&"A test page for metadata extraction".to_string())
255 );
256 assert_eq!(keywords, vec!["test", "metadata", "extraction"]);
257 assert!(!meta_tags.primary.is_empty());
258 }
259
260 #[tokio::test]
261 async fn test_async_extract_metadata_from_empty_file() {
262 let temp_dir = tempdir().unwrap();
263 let file_path = temp_dir.path().join("empty.md");
264
265 // Create an empty file
266 let mut file = File::create(&file_path).await.unwrap();
267 file.write_all(b"").await.unwrap();
268 file.flush().await.unwrap();
269 drop(file);
270
271 let result = async_extract_metadata_from_file(
272 file_path.to_str().unwrap(),
273 )
274 .await;
275
276 // Ensure the result is empty metadata, keywords, and meta tags
277 assert!(result.is_ok());
278 let (metadata, keywords, meta_tags) = result.unwrap();
279 assert!(metadata.is_empty());
280 assert!(keywords.is_empty());
281 assert!(meta_tags.primary.is_empty());
282 }
283
284 #[tokio::test]
285 async fn test_async_extract_metadata_from_nonexistent_file() {
286 let result =
287 async_extract_metadata_from_file("nonexistent_file.md")
288 .await;
289 assert!(result.is_err());
290 assert!(matches!(
291 result.unwrap_err(),
292 MetadataError::IoError(_)
293 ));
294 }
295}