Skip to main content

threatflux_cache/backends/
filesystem.rs

1//! Filesystem storage backend
2
3use async_trait::async_trait;
4use serde::{de::DeserializeOwned, Deserialize, Serialize};
5use std::collections::HashMap;
6use std::hash::Hash;
7use std::path::{Path, PathBuf};
8use tokio::fs::{self, File};
9use tokio::io::AsyncWriteExt;
10
11use crate::{storage::SerializationFormat, CacheEntry, EntryMetadata, Result, StorageBackend};
12
13/// Type alias for complex phantom data type
14type PhantomTypes<K, V, M> = std::marker::PhantomData<(K, V, M)>;
15
16/// Filesystem storage backend
17#[allow(clippy::type_complexity)]
18pub struct FilesystemBackend<K, V, M = ()>
19where
20    K: Hash + Eq + Clone + Send + Sync,
21    V: Clone + Send + Sync,
22    M: Clone + Send + Sync,
23{
24    base_path: PathBuf,
25    format: SerializationFormat,
26    _phantom: PhantomTypes<K, V, M>,
27}
28
29impl<K, V, M> FilesystemBackend<K, V, M>
30where
31    K: Hash + Eq + Clone + Send + Sync,
32    V: Clone + Send + Sync,
33    M: Clone + Send + Sync,
34{
35    /// Create a new filesystem backend with the given base path
36    pub async fn new<P: AsRef<Path>>(base_path: P) -> Result<Self> {
37        let base_path = base_path.as_ref().to_path_buf();
38        fs::create_dir_all(&base_path).await?;
39
40        Ok(Self {
41            base_path,
42            #[cfg(feature = "json-serialization")]
43            format: SerializationFormat::Json,
44            #[cfg(all(not(feature = "json-serialization"), feature = "bincode-serialization"))]
45            format: SerializationFormat::Bincode,
46            _phantom: std::marker::PhantomData,
47        })
48    }
49
50    /// Set the serialization format
51    pub fn with_format(mut self, format: SerializationFormat) -> Self {
52        self.format = format;
53        self
54    }
55
56    /// Sanitize a filename by removing or replacing dangerous characters
57    fn sanitize_filename(filename: &str) -> String {
58        // Replace path separators and other dangerous characters with safe alternatives
59        let mut result = filename
60            .chars()
61            .map(|c| match c {
62                '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
63                c if c.is_control() => '_', // Replace control characters
64                c => c,
65            })
66            .collect::<String>();
67
68        // Replace leading dots to prevent hidden files
69        if result.starts_with('.') {
70            result = result.replacen('.', "_", 1);
71        }
72
73        // Clean up trailing dots and whitespace
74        result.trim_matches('.').trim().to_string()
75    }
76
77    /// Get the path for a cache file
78    fn get_cache_file_path(&self, key: &str) -> PathBuf {
79        let sanitized_key = Self::sanitize_filename(key);
80        // Ensure the filename isn't empty after sanitization
81        let safe_key = if sanitized_key.is_empty() {
82            "cache_entry".to_string()
83        } else {
84            sanitized_key
85        };
86
87        self.base_path
88            .join(format!("{}.{}", safe_key, self.format.extension()))
89    }
90
91    /// Get the metadata file path
92    fn get_metadata_path(&self) -> PathBuf {
93        self.base_path
94            .join(format!("metadata.{}", self.format.extension()))
95    }
96}
97
98#[async_trait]
99impl<K, V, M> StorageBackend for FilesystemBackend<K, V, M>
100where
101    K: Serialize + DeserializeOwned + Hash + Eq + Clone + Send + Sync + std::fmt::Display + 'static,
102    V: Serialize + DeserializeOwned + Clone + Send + Sync + 'static,
103    M: Serialize + DeserializeOwned + Clone + Send + Sync + EntryMetadata,
104{
105    type Key = K;
106    type Value = V;
107    type Metadata = M;
108
109    async fn save(&self, entries: &HashMap<K, Vec<CacheEntry<K, V, M>>>) -> Result<()> {
110        // Save each key's entries to a separate file
111        for (key, entry_vec) in entries {
112            let file_path = self.get_cache_file_path(&key.to_string());
113            let data = self.format.serialize(entry_vec)?;
114
115            let mut file = File::create(&file_path).await?;
116            file.write_all(&data).await?;
117            file.flush().await?;
118        }
119
120        // Save metadata about the cache
121        let metadata = CacheMetadata {
122            total_keys: entries.len(),
123            last_updated: chrono::Utc::now(),
124        };
125
126        let metadata_path = self.get_metadata_path();
127        let data = self.format.serialize(&metadata)?;
128
129        let mut file = File::create(&metadata_path).await?;
130        file.write_all(&data).await?;
131        file.flush().await?;
132
133        Ok(())
134    }
135
136    async fn load(&self) -> Result<HashMap<K, Vec<CacheEntry<K, V, M>>>> {
137        let mut entries = HashMap::new();
138
139        // Read all cache files
140        let mut dir_entries = fs::read_dir(&self.base_path).await?;
141
142        while let Some(entry) = dir_entries.next_entry().await? {
143            let path = entry.path();
144
145            // Skip non-cache files
146            if path.extension().and_then(|s| s.to_str()) != Some(self.format.extension()) {
147                continue;
148            }
149
150            // Skip metadata file
151            if path.file_stem().and_then(|s| s.to_str()) == Some("metadata") {
152                continue;
153            }
154
155            // Read and deserialize the file
156            match fs::read(&path).await {
157                Ok(data) => {
158                    match self.format.deserialize::<Vec<CacheEntry<K, V, M>>>(&data) {
159                        Ok(entry_vec) => {
160                            if let Some(first_entry) = entry_vec.first() {
161                                entries.insert(first_entry.key.clone(), entry_vec);
162                            }
163                        }
164                        Err(e) => {
165                            // Log error but continue loading other files
166                            eprintln!("Failed to deserialize cache file {:?}: {}", path, e);
167                        }
168                    }
169                }
170                Err(e) => {
171                    // Log error but continue loading other files
172                    eprintln!("Failed to read cache file {:?}: {}", path, e);
173                }
174            }
175        }
176
177        Ok(entries)
178    }
179
180    async fn remove(&self, key: &K) -> Result<()> {
181        let file_path = self.get_cache_file_path(&key.to_string());
182        if file_path.exists() {
183            fs::remove_file(&file_path).await?;
184        }
185        Ok(())
186    }
187
188    async fn clear(&self) -> Result<()> {
189        let mut dir_entries = fs::read_dir(&self.base_path).await?;
190
191        while let Some(entry) = dir_entries.next_entry().await? {
192            let path = entry.path();
193
194            // Only remove cache files
195            if path.extension().and_then(|s| s.to_str()) == Some(self.format.extension()) {
196                fs::remove_file(&path).await?;
197            }
198        }
199
200        Ok(())
201    }
202
203    async fn contains(&self, key: &K) -> Result<bool> {
204        let file_path = self.get_cache_file_path(&key.to_string());
205        Ok(file_path.exists())
206    }
207
208    async fn size_bytes(&self) -> Result<u64> {
209        let mut total_size = 0u64;
210        let mut dir_entries = fs::read_dir(&self.base_path).await?;
211
212        while let Some(entry) = dir_entries.next_entry().await? {
213            if let Ok(metadata) = entry.metadata().await {
214                total_size += metadata.len();
215            }
216        }
217
218        Ok(total_size)
219    }
220
221    async fn compact(&self) -> Result<()> {
222        // For filesystem backend, compaction could involve:
223        // - Removing expired entries
224        // - Consolidating small files
225        // - Rewriting files with compression
226        // For now, just a no-op
227        Ok(())
228    }
229}
230
231/// Metadata about the cache stored on filesystem
232#[derive(Debug, Clone, Serialize, Deserialize)]
233struct CacheMetadata {
234    total_keys: usize,
235    last_updated: chrono::DateTime<chrono::Utc>,
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241    use tempfile::TempDir;
242
243    #[tokio::test]
244    async fn test_filesystem_backend_operations() {
245        let temp_dir = TempDir::new().unwrap();
246        let backend: FilesystemBackend<String, String> =
247            FilesystemBackend::new(temp_dir.path()).await.unwrap();
248
249        // Test empty state
250        let loaded = backend.load().await.unwrap();
251        assert!(loaded.is_empty());
252
253        // Test save and load
254        let mut entries = HashMap::new();
255        let entry = CacheEntry::new("key1".to_string(), "value1".to_string());
256        entries.insert("key1".to_string(), vec![entry]);
257
258        backend.save(&entries).await.unwrap();
259        let loaded = backend.load().await.unwrap();
260        assert_eq!(loaded.len(), 1);
261        assert!(loaded.contains_key("key1"));
262
263        // Test contains
264        assert!(backend.contains(&"key1".to_string()).await.unwrap());
265        assert!(!backend.contains(&"key2".to_string()).await.unwrap());
266
267        // Test remove
268        backend.remove(&"key1".to_string()).await.unwrap();
269        assert!(!backend.contains(&"key1".to_string()).await.unwrap());
270
271        // Test clear
272        backend.save(&entries).await.unwrap();
273        backend.clear().await.unwrap();
274        let loaded = backend.load().await.unwrap();
275        assert!(loaded.is_empty());
276    }
277
278    #[tokio::test]
279    async fn test_filesystem_backend_persistence() {
280        let temp_dir = TempDir::new().unwrap();
281        let path = temp_dir.path().to_path_buf();
282
283        // Save data with one backend instance
284        {
285            let backend: FilesystemBackend<String, String> =
286                FilesystemBackend::new(&path).await.unwrap();
287
288            let mut entries = HashMap::new();
289            let entry =
290                CacheEntry::new("persistent_key".to_string(), "persistent_value".to_string());
291            entries.insert("persistent_key".to_string(), vec![entry]);
292
293            backend.save(&entries).await.unwrap();
294        }
295
296        // Load data with a new backend instance
297        {
298            let backend: FilesystemBackend<String, String> =
299                FilesystemBackend::new(&path).await.unwrap();
300
301            let loaded = backend.load().await.unwrap();
302            assert_eq!(loaded.len(), 1);
303            assert!(loaded.contains_key("persistent_key"));
304
305            let entries = &loaded["persistent_key"];
306            assert_eq!(entries[0].value, "persistent_value");
307        }
308    }
309
310    #[tokio::test]
311    async fn test_filesystem_backend_size() {
312        let temp_dir = TempDir::new().unwrap();
313        let backend: FilesystemBackend<String, String> =
314            FilesystemBackend::new(temp_dir.path()).await.unwrap();
315
316        // Save some data
317        let mut entries = HashMap::new();
318        for i in 0..5 {
319            let entry = CacheEntry::new(format!("key{}", i), format!("value{}", i));
320            entries.insert(format!("key{}", i), vec![entry]);
321        }
322
323        backend.save(&entries).await.unwrap();
324
325        // Check size is non-zero
326        let size = backend.size_bytes().await.unwrap();
327        assert!(size > 0);
328    }
329
330    #[tokio::test]
331    async fn test_path_traversal_protection() {
332        let temp_dir = TempDir::new().unwrap();
333        let backend: FilesystemBackend<String, String> =
334            FilesystemBackend::new(temp_dir.path()).await.unwrap();
335
336        // Test malicious keys that could attempt path traversal
337        let malicious_keys = vec![
338            "../etc/passwd",
339            "..\\windows\\system32\\config\\sam",
340            "/etc/shadow",
341            "C:\\Windows\\System32\\config\\SAM",
342            "../../sensitive_file",
343            "./../../../etc/hosts",
344            "../",
345            "..",
346            "test/../../../etc/passwd",
347            "normal_file/../../../etc/passwd",
348        ];
349
350        for malicious_key in malicious_keys {
351            let path = backend.get_cache_file_path(malicious_key);
352
353            // Ensure the path is within the base directory
354            assert!(
355                path.starts_with(&backend.base_path),
356                "Malicious key '{}' resulted in path outside base directory: {:?}",
357                malicious_key,
358                path
359            );
360
361            // Ensure the filename doesn't contain path separators
362            let filename = path.file_name().unwrap().to_str().unwrap();
363            assert!(
364                !filename.contains('/') && !filename.contains('\\'),
365                "Filename '{}' contains path separators for key '{}'",
366                filename,
367                malicious_key
368            );
369        }
370    }
371
372    #[test]
373    fn test_filename_sanitization() {
374        // Test various dangerous characters
375        assert_eq!(
376            FilesystemBackend::<String, String>::sanitize_filename("../etc/passwd"),
377            "_._etc_passwd"
378        );
379        assert_eq!(
380            FilesystemBackend::<String, String>::sanitize_filename("file\\name"),
381            "file_name"
382        );
383        assert_eq!(
384            FilesystemBackend::<String, String>::sanitize_filename("file:name"),
385            "file_name"
386        );
387        assert_eq!(
388            FilesystemBackend::<String, String>::sanitize_filename("file*name"),
389            "file_name"
390        );
391        assert_eq!(
392            FilesystemBackend::<String, String>::sanitize_filename("file?name"),
393            "file_name"
394        );
395        assert_eq!(
396            FilesystemBackend::<String, String>::sanitize_filename("file\"name"),
397            "file_name"
398        );
399        assert_eq!(
400            FilesystemBackend::<String, String>::sanitize_filename("file<name>"),
401            "file_name_"
402        );
403        assert_eq!(
404            FilesystemBackend::<String, String>::sanitize_filename("file|name"),
405            "file_name"
406        );
407        assert_eq!(
408            FilesystemBackend::<String, String>::sanitize_filename(".hidden"),
409            "_hidden"
410        );
411        assert_eq!(
412            FilesystemBackend::<String, String>::sanitize_filename("..."),
413            "_"
414        );
415        assert_eq!(
416            FilesystemBackend::<String, String>::sanitize_filename(""),
417            ""
418        );
419        assert_eq!(
420            FilesystemBackend::<String, String>::sanitize_filename("   "),
421            ""
422        );
423
424        // Test the most important security aspect: no path traversal
425        let result = FilesystemBackend::<String, String>::sanitize_filename("../etc/passwd");
426        assert!(!result.contains('/'));
427        assert!(!result.contains('\\'));
428        assert!(!result.starts_with('.'));
429    }
430}