Skip to main content

reinhardt_utils/staticfiles/
storage.rs

1use async_trait::async_trait;
2use std::collections::HashMap;
3use std::collections::hash_map::DefaultHasher;
4use std::fs;
5use std::hash::{Hash, Hasher};
6use std::io;
7use std::path::PathBuf;
8use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
9use tracing;
10
11/// Acquire a read lock, recovering from poisoning with a warning log.
12fn read_or_recover<'a, T>(lock: &'a RwLock<T>, context: &str) -> RwLockReadGuard<'a, T> {
13	lock.read().unwrap_or_else(|e| {
14		tracing::warn!("RwLock poisoned during read ({}), recovering", context);
15		e.into_inner()
16	})
17}
18
19/// Acquire a write lock, recovering from poisoning with a warning log.
20fn write_or_recover<'a, T>(lock: &'a RwLock<T>, context: &str) -> RwLockWriteGuard<'a, T> {
21	lock.write().unwrap_or_else(|e| {
22		tracing::warn!("RwLock poisoned during write ({}), recovering", context);
23		e.into_inner()
24	})
25}
26
27// Cloud storage backends
28#[cfg(feature = "s3")]
29pub mod s3;
30
31#[cfg(feature = "s3")]
32pub use s3::{S3Config, S3Storage};
33
34#[cfg(feature = "azure")]
35pub mod azure;
36
37#[cfg(feature = "azure")]
38pub use azure::{AzureBlobConfig, AzureBlobStorage};
39
40#[cfg(feature = "gcs")]
41pub mod gcs;
42
43#[cfg(feature = "gcs")]
44pub use gcs::{GcsConfig, GcsStorage};
45
46// Storage registry
47pub mod registry;
48pub use registry::StorageRegistry;
49
50/// Storage trait for static files
51#[async_trait]
52pub trait Storage: Send + Sync {
53	/// Saves content under the given name and returns the URL.
54	async fn save(&self, name: &str, content: &[u8]) -> io::Result<String>;
55	/// Returns whether a file with the given name exists in storage.
56	fn exists(&self, name: &str) -> bool;
57	/// Reads and returns the content of the file with the given name.
58	async fn open(&self, name: &str) -> io::Result<Vec<u8>>;
59	/// Deletes the file with the given name from storage.
60	async fn delete(&self, name: &str) -> io::Result<()>;
61	/// Returns the URL for accessing the file with the given name.
62	fn url(&self, name: &str) -> String;
63}
64
65/// A storage backend that reads and writes files on the local filesystem.
66pub struct FileSystemStorage {
67	/// The root directory where files are stored.
68	pub location: PathBuf,
69	/// The base URL prefix used to generate file URLs.
70	pub base_url: String,
71}
72
73impl FileSystemStorage {
74	/// Creates a new filesystem storage rooted at the given location.
75	pub fn new<P: Into<PathBuf>>(location: P, base_url: &str) -> Self {
76		Self {
77			location: location.into(),
78			base_url: base_url.to_string(),
79		}
80	}
81
82	fn normalize_path(&self, name: &str) -> PathBuf {
83		let name = name.trim_start_matches('/');
84		// Use safe_path_join to prevent directory traversal attacks.
85		// Falls back to simple join only if safe_path_join succeeds.
86		match crate::safe_path_join(&self.location, name) {
87			Ok(safe_path) => safe_path,
88			Err(_) => {
89				tracing::warn!(
90					"Path traversal attempt blocked in FileSystemStorage: {}",
91					name
92				);
93				// Return a path that won't resolve to anything valid outside base
94				self.location.join("__invalid_path__")
95			}
96		}
97	}
98
99	fn normalize_url(&self, base: &str, name: &str) -> String {
100		let base = base.trim_end_matches('/');
101		let name = name.trim_start_matches('/');
102		format!("{}/{}", base, name)
103	}
104}
105
106#[async_trait]
107impl Storage for FileSystemStorage {
108	async fn save(&self, name: &str, content: &[u8]) -> io::Result<String> {
109		let file_path = self.normalize_path(name);
110
111		// Create parent directories if they don't exist
112		if let Some(parent) = file_path.parent() {
113			tokio::fs::create_dir_all(parent).await?;
114		}
115
116		tokio::fs::write(&file_path, content).await?;
117		Ok(self.url(name))
118	}
119
120	fn exists(&self, name: &str) -> bool {
121		self.normalize_path(name).exists()
122	}
123
124	async fn open(&self, name: &str) -> io::Result<Vec<u8>> {
125		tokio::fs::read(self.normalize_path(name)).await
126	}
127
128	async fn delete(&self, name: &str) -> io::Result<()> {
129		let file_path = self.normalize_path(name);
130		if file_path.exists() {
131			tokio::fs::remove_file(file_path).await?;
132		}
133		Ok(())
134	}
135
136	fn url(&self, name: &str) -> String {
137		self.normalize_url(&self.base_url, name)
138	}
139}
140
141/// A storage backend that keeps files in memory, useful for testing.
142pub struct MemoryStorage {
143	base_url: String,
144	files: Arc<RwLock<HashMap<String, Vec<u8>>>>,
145}
146
147impl MemoryStorage {
148	/// Creates a new in-memory storage with the given base URL.
149	pub fn new(base_url: &str) -> Self {
150		Self {
151			base_url: base_url.to_string(),
152			files: Arc::new(RwLock::new(HashMap::new())),
153		}
154	}
155
156	fn normalize_url(&self, base: &str, name: &str) -> String {
157		let base = base.trim_end_matches('/');
158		let name = name.trim_start_matches('/');
159		format!("{}/{}", base, name)
160	}
161}
162
163#[async_trait]
164impl Storage for MemoryStorage {
165	async fn save(&self, name: &str, content: &[u8]) -> io::Result<String> {
166		let mut files = write_or_recover(&self.files, "MemoryStorage::save");
167		files.insert(name.to_string(), content.to_vec());
168		Ok(self.url(name))
169	}
170
171	fn exists(&self, name: &str) -> bool {
172		read_or_recover(&self.files, "MemoryStorage::exists").contains_key(name)
173	}
174
175	async fn open(&self, name: &str) -> io::Result<Vec<u8>> {
176		read_or_recover(&self.files, "MemoryStorage::open")
177			.get(name)
178			.cloned()
179			.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))
180	}
181
182	async fn delete(&self, name: &str) -> io::Result<()> {
183		write_or_recover(&self.files, "MemoryStorage::delete").remove(name);
184		Ok(())
185	}
186
187	fn url(&self, name: &str) -> String {
188		self.normalize_url(&self.base_url, name)
189	}
190}
191
192impl Default for MemoryStorage {
193	fn default() -> Self {
194		Self::new("/static/")
195	}
196}
197
198/// Configuration for the static files system.
199#[derive(Debug, Clone)]
200pub struct StaticFilesConfig {
201	/// The directory where collected static files are stored for deployment.
202	pub static_root: PathBuf,
203	/// The URL prefix for serving static files (e.g., `"/static/"`).
204	pub static_url: String,
205	/// Source directories containing static files to be collected.
206	pub staticfiles_dirs: Vec<PathBuf>,
207	/// Optional URL prefix for user-uploaded media files.
208	pub media_url: Option<String>,
209}
210
211impl Default for StaticFilesConfig {
212	fn default() -> Self {
213		Self {
214			static_root: PathBuf::from("static"),
215			static_url: "/static/".to_string(),
216			staticfiles_dirs: Vec::new(),
217			media_url: None,
218		}
219	}
220}
221
222/// Locates static files across multiple source directories.
223pub struct StaticFilesFinder {
224	/// The list of directories to search for static files.
225	pub directories: Vec<PathBuf>,
226}
227
228impl StaticFilesFinder {
229	/// Creates a new finder that searches the given directories.
230	pub fn new(directories: Vec<PathBuf>) -> Self {
231		Self { directories }
232	}
233
234	/// Finds the first file matching the given path across all configured directories.
235	pub fn find(&self, path: &str) -> Result<PathBuf, io::Error> {
236		let path = path.trim_start_matches('/');
237		for dir in &self.directories {
238			// Use safe_path_join to prevent directory traversal attacks
239			match crate::safe_path_join(dir, path) {
240				Ok(file_path) => {
241					if file_path.exists() {
242						return Ok(file_path);
243					}
244				}
245				Err(_) => {
246					tracing::warn!(
247						"Path traversal attempt blocked in StaticFilesFinder: {}",
248						path
249					);
250					continue;
251				}
252			}
253		}
254		Err(io::Error::new(
255			io::ErrorKind::NotFound,
256			format!("File not found in any directory: {}", path),
257		))
258	}
259
260	/// Find all static files across all configured directories
261	///
262	/// Returns a vector of all static file paths found in the configured directories.
263	/// Each path is relative to its source directory.
264	///
265	/// # Example
266	///
267	/// ```rust,no_run
268	/// use reinhardt_utils::staticfiles::StaticFilesFinder;
269	/// use std::path::PathBuf;
270	///
271	/// let finder = StaticFilesFinder::new(vec![
272	///     PathBuf::from("static"),
273	///     PathBuf::from("assets"),
274	/// ]);
275	///
276	/// let files = finder.find_all();
277	/// // Returns: ["css/style.css", "js/app.js", "images/logo.png", ...]
278	/// ```
279	pub fn find_all(&self) -> Vec<String> {
280		let mut all_files = Vec::new();
281
282		for dir in &self.directories {
283			if !dir.exists() || !dir.is_dir() {
284				continue;
285			}
286
287			if let Ok(entries) = self.walk_directory(dir, dir) {
288				all_files.extend(entries);
289			}
290		}
291
292		all_files
293	}
294
295	/// Recursively walk a directory and collect all file paths
296	#[allow(clippy::only_used_in_recursion)]
297	fn walk_directory(&self, base_dir: &PathBuf, current_dir: &PathBuf) -> io::Result<Vec<String>> {
298		let mut files = Vec::new();
299
300		for entry in fs::read_dir(current_dir)? {
301			let entry = entry?;
302			let path = entry.path();
303
304			if path.is_file() {
305				// Get relative path from base directory
306				if let Ok(relative) = path.strip_prefix(base_dir)
307					&& let Some(path_str) = relative.to_str()
308				{
309					files.push(path_str.to_string());
310				}
311			} else if path.is_dir() {
312				// Recursively walk subdirectories
313				if let Ok(sub_files) = self.walk_directory(base_dir, &path) {
314					files.extend(sub_files);
315				}
316			}
317		}
318
319		Ok(files)
320	}
321}
322
323/// A storage backend that renames files with a content hash for cache busting.
324pub struct HashedFileStorage {
325	/// The root directory where hashed files are stored.
326	pub location: PathBuf,
327	/// The base URL prefix used to generate file URLs.
328	pub base_url: String,
329	hashed_files: Arc<RwLock<HashMap<String, String>>>,
330}
331
332impl HashedFileStorage {
333	/// Creates a new hashed file storage rooted at the given location.
334	pub fn new<P: Into<PathBuf>>(location: P, base_url: &str) -> Self {
335		Self {
336			location: location.into(),
337			base_url: base_url.to_string(),
338			hashed_files: Arc::new(RwLock::new(HashMap::new())),
339		}
340	}
341
342	fn hash_content(content: &[u8]) -> String {
343		use std::collections::hash_map::DefaultHasher;
344		use std::hash::{Hash, Hasher};
345		let mut hasher = DefaultHasher::new();
346		content.hash(&mut hasher);
347		format!("{:x}", hasher.finish())
348	}
349
350	fn get_hashed_name(&self, name: &str, content: &[u8]) -> String {
351		let hash = Self::hash_content(content);
352		let hash_short = &hash[..12];
353		if let Some(dot_pos) = name.rfind('.') {
354			format!("{}.{}{}", &name[..dot_pos], hash_short, &name[dot_pos..])
355		} else {
356			format!("{}.{}", name, hash_short)
357		}
358	}
359
360	/// Saves a file with a content-hashed filename and returns the hashed name.
361	pub async fn save(&self, name: &str, content: &[u8]) -> io::Result<String> {
362		let hashed_name = self.get_hashed_name(name, content);
363		let file_path = self.location.join(&hashed_name);
364
365		if let Some(parent) = file_path.parent() {
366			tokio::fs::create_dir_all(parent).await?;
367		}
368
369		tokio::fs::write(&file_path, content).await?;
370
371		let mut hashed_files = write_or_recover(&self.hashed_files, "HashedFileStorage::save");
372		hashed_files.insert(name.to_string(), hashed_name.clone());
373
374		Ok(hashed_name)
375	}
376
377	/// Saves multiple files with inter-file dependency resolution (e.g., CSS URL rewriting).
378	///
379	/// Returns the number of files processed.
380	pub async fn save_with_dependencies(
381		&self,
382		files: HashMap<String, Vec<u8>>,
383	) -> io::Result<usize> {
384		let mut hashed_map = HashMap::new();
385		let mut processed_files = HashMap::new();
386
387		// First pass: hash all files to build the mapping
388		for (name, content) in &files {
389			let hashed_name = self.get_hashed_name(name, content);
390			hashed_map.insert(name.clone(), hashed_name);
391		}
392
393		// Second pass: process CSS files to update references, then save all files
394		for (name, content) in files {
395			let mut final_content = content;
396
397			// If it's a CSS file, update URL references
398			if name.ends_with(".css") {
399				let content_str = String::from_utf8_lossy(&final_content);
400				let mut updated = content_str.to_string();
401
402				// Replace all references to other files with their hashed names
403				for (orig_name, hashed_name) in &hashed_map {
404					if orig_name != &name {
405						updated = updated.replace(orig_name, hashed_name);
406					}
407				}
408
409				final_content = updated.into_bytes();
410			}
411
412			let hashed_name = hashed_map.get(&name).unwrap();
413			let file_path = self.location.join(hashed_name);
414
415			if let Some(parent) = file_path.parent() {
416				tokio::fs::create_dir_all(parent).await?;
417			}
418
419			tokio::fs::write(&file_path, &final_content).await?;
420			processed_files.insert(name, hashed_name.clone());
421		}
422
423		// Update the internal mapping
424		let mut hashed_files = write_or_recover(
425			&self.hashed_files,
426			"HashedFileStorage::save_with_dependencies",
427		);
428		for (orig, hashed) in processed_files {
429			hashed_files.insert(orig, hashed);
430		}
431
432		Ok(hashed_map.len())
433	}
434
435	/// Opens and reads the content of a previously saved file by its original name.
436	pub async fn open(&self, name: &str) -> io::Result<Vec<u8>> {
437		let hashed_name = {
438			let hashed_files = read_or_recover(&self.hashed_files, "HashedFileStorage::open");
439			hashed_files
440				.get(name)
441				.ok_or_else(|| {
442					io::Error::new(io::ErrorKind::NotFound, "File not found in mapping")
443				})?
444				.clone()
445		};
446
447		let file_path = self.location.join(&hashed_name);
448		tokio::fs::read(file_path).await
449	}
450
451	/// Returns the URL for a file, using the hashed name if available.
452	pub fn url(&self, name: &str) -> String {
453		let hashed_files = read_or_recover(&self.hashed_files, "HashedFileStorage::url");
454		if let Some(hashed_name) = hashed_files.get(name) {
455			format!("{}{}", self.base_url, hashed_name)
456		} else {
457			format!("{}{}", self.base_url, name)
458		}
459	}
460
461	/// Returns whether a file with the given name exists in the hashed storage.
462	pub fn exists(&self, name: &str) -> bool {
463		let hashed_files = read_or_recover(&self.hashed_files, "HashedFileStorage::exists");
464		if let Some(hashed_name) = hashed_files.get(name) {
465			self.location.join(hashed_name).exists()
466		} else {
467			false
468		}
469	}
470
471	/// Returns the hashed filename for the given original name, if available.
472	pub fn get_hashed_path(&self, name: &str) -> Option<String> {
473		let hashed_files =
474			read_or_recover(&self.hashed_files, "HashedFileStorage::get_hashed_path");
475		hashed_files.get(name).cloned()
476	}
477}
478
479/// Manifest file format version.
480pub enum ManifestVersion {
481	/// Version 1 of the manifest format.
482	V1,
483}
484
485/// Manifest file structure that maps original filenames to hashed filenames.
486pub struct Manifest {
487	/// The manifest format version.
488	pub version: ManifestVersion,
489	/// Mapping from original file paths to their hashed counterparts.
490	pub paths: std::collections::HashMap<String, String>,
491}
492
493/// A storage backend that persists a JSON manifest mapping original names to hashed names.
494pub struct ManifestStaticFilesStorage {
495	/// The root directory where files and the manifest are stored.
496	pub location: PathBuf,
497	/// The base URL prefix used to generate file URLs.
498	pub base_url: String,
499	/// The filename of the manifest file (default: `"staticfiles.json"`).
500	pub manifest_name: String,
501	/// If true, lookups for unmapped files will fail rather than fall back.
502	pub manifest_strict: bool,
503	hashed_files: Arc<RwLock<HashMap<String, String>>>,
504}
505
506impl ManifestStaticFilesStorage {
507	/// Creates a new manifest-based storage at the given location.
508	pub fn new<P: Into<PathBuf>>(location: P, base_url: &str) -> Self {
509		Self {
510			location: location.into(),
511			base_url: base_url.to_string(),
512			manifest_name: "staticfiles.json".to_string(),
513			manifest_strict: true,
514			hashed_files: Arc::new(RwLock::new(HashMap::new())),
515		}
516	}
517
518	/// Configures whether strict mode is enabled for manifest lookups.
519	pub fn with_manifest_strict(mut self, strict: bool) -> Self {
520		self.manifest_strict = strict;
521		self
522	}
523
524	fn hash_content(content: &[u8]) -> String {
525		let mut hasher = DefaultHasher::new();
526		content.hash(&mut hasher);
527		format!("{:x}", hasher.finish())
528	}
529
530	fn get_hashed_name(&self, name: &str, content: &[u8]) -> String {
531		let hash = Self::hash_content(content);
532		let hash_short = &hash[..12];
533
534		if let Some(dot_pos) = name.rfind('.') {
535			format!("{}.{}{}", &name[..dot_pos], hash_short, &name[dot_pos..])
536		} else {
537			format!("{}.{}", name, hash_short)
538		}
539	}
540
541	fn normalize_path(&self, name: &str) -> PathBuf {
542		let name = name.trim_start_matches('/');
543		self.location.join(name)
544	}
545
546	fn normalize_url(&self, base: &str, name: &str) -> String {
547		let base = base.trim_end_matches('/');
548		let name = name.trim_start_matches('/');
549		format!("{}/{}", base, name)
550	}
551
552	/// Save multiple files with dependency resolution
553	pub async fn save_with_dependencies(
554		&self,
555		files: HashMap<String, Vec<u8>>,
556	) -> io::Result<usize> {
557		let mut hashed_map = HashMap::new();
558		let mut processed_files = HashMap::new();
559
560		// First pass: hash all files and create mapping
561		for (name, content) in &files {
562			let hashed_name = self.get_hashed_name(name, content);
563			hashed_map.insert(name.clone(), hashed_name);
564		}
565
566		// Second pass: update CSS references and save files
567		for (name, content) in files {
568			let mut final_content = content;
569
570			// If this is a CSS file, update image references
571			if name.ends_with(".css") {
572				let content_str = String::from_utf8_lossy(&final_content);
573				let mut updated = content_str.to_string();
574
575				// Update all url() references
576				for (orig_name, hashed_name) in &hashed_map {
577					if orig_name != &name {
578						updated = updated.replace(orig_name, hashed_name);
579					}
580				}
581
582				final_content = updated.into_bytes();
583			}
584
585			let hashed_name = hashed_map.get(&name).unwrap();
586			let file_path = self.normalize_path(hashed_name);
587
588			if let Some(parent) = file_path.parent() {
589				tokio::fs::create_dir_all(parent).await?;
590			}
591
592			tokio::fs::write(&file_path, &final_content).await?;
593			processed_files.insert(name, hashed_name.clone());
594		}
595
596		// Update internal mapping
597		{
598			let mut hashed_files = write_or_recover(
599				&self.hashed_files,
600				"ManifestStaticFilesStorage::save_with_dependencies",
601			);
602			hashed_files.extend(processed_files);
603		}
604
605		// Save manifest
606		self.save_manifest().await?;
607
608		Ok(hashed_map.len())
609	}
610
611	async fn save_manifest(&self) -> io::Result<()> {
612		let (manifest_path, manifest_json) = {
613			let hashed_files = read_or_recover(
614				&self.hashed_files,
615				"ManifestStaticFilesStorage::save_manifest",
616			);
617			let manifest_path = self.normalize_path(&self.manifest_name);
618
619			// Create manifest with "paths" key to match Django's manifest structure
620			let manifest_data = serde_json::json!({
621				"paths": *hashed_files
622			});
623
624			let manifest_json =
625				serde_json::to_string_pretty(&manifest_data).map_err(io::Error::other)?;
626
627			(manifest_path, manifest_json)
628		};
629
630		tokio::fs::write(manifest_path, manifest_json).await
631	}
632
633	/// Load manifest from disk
634	pub async fn load_manifest(&self) -> io::Result<()> {
635		let manifest_path = self.normalize_path(&self.manifest_name);
636
637		if !manifest_path.exists() {
638			// No manifest file exists yet, that's okay
639			return Ok(());
640		}
641
642		let manifest_content = tokio::fs::read_to_string(manifest_path).await?;
643		let manifest_data: serde_json::Value =
644			serde_json::from_str(&manifest_content).map_err(io::Error::other)?;
645
646		// Extract "paths" object from manifest
647		if let Some(paths) = manifest_data.get("paths").and_then(|p| p.as_object()) {
648			let mut hashed_files = write_or_recover(
649				&self.hashed_files,
650				"ManifestStaticFilesStorage::load_manifest",
651			);
652			for (key, value) in paths {
653				if let Some(hashed_name) = value.as_str() {
654					hashed_files.insert(key.clone(), hashed_name.to_string());
655				}
656			}
657		}
658
659		Ok(())
660	}
661
662	/// Get the hashed path for a given file
663	pub fn get_hashed_path(&self, name: &str) -> Option<String> {
664		let hashed_files = read_or_recover(
665			&self.hashed_files,
666			"ManifestStaticFilesStorage::get_hashed_path",
667		);
668		hashed_files.get(name).cloned()
669	}
670
671	/// Returns whether a file with the given name exists (checking both hashed and original paths).
672	pub fn exists(&self, name: &str) -> bool {
673		// First check if we have a hashed version of this file
674		let hashed_files =
675			read_or_recover(&self.hashed_files, "ManifestStaticFilesStorage::exists");
676		if let Some(hashed_name) = hashed_files.get(name) {
677			// Check hashed file path
678			let hashed_path = self.normalize_path(hashed_name);
679			if hashed_path.exists() {
680				return true;
681			}
682		}
683		drop(hashed_files);
684
685		// Fall back to checking original path
686		self.normalize_path(name).exists()
687	}
688
689	/// Open a file by its original name
690	pub async fn open(&self, name: &str) -> io::Result<Vec<u8>> {
691		let actual_name = {
692			let hashed_files =
693				read_or_recover(&self.hashed_files, "ManifestStaticFilesStorage::open");
694			hashed_files
695				.get(name)
696				.cloned()
697				.unwrap_or_else(|| name.to_string())
698		};
699
700		let file_path = self.normalize_path(&actual_name);
701		tokio::fs::read(file_path).await
702	}
703
704	/// Get URL for a file
705	pub fn url(&self, name: &str) -> String {
706		let hashed_files = read_or_recover(&self.hashed_files, "ManifestStaticFilesStorage::url");
707		let actual_name = hashed_files
708			.get(name)
709			.cloned()
710			.unwrap_or_else(|| name.to_string());
711		drop(hashed_files);
712
713		self.normalize_url(&self.base_url, &actual_name)
714	}
715}