sherpack_core/
files.rs

1//! Files API for accessing pack files from templates
2//!
3//! This module provides a sandboxed file access API that allows templates
4//! to read files from within the pack directory. All operations are restricted
5//! to the pack root to prevent path traversal attacks.
6//!
7//! # Security
8//!
9//! - All paths are resolved relative to the pack root
10//! - Absolute paths are rejected
11//! - Path traversal attempts (../) are detected and rejected
12//! - Files outside the pack directory cannot be accessed
13//!
14//! # Example
15//!
16//! ```jinja2
17//! {# Read a file #}
18//! data:
19//!   nginx.conf: {{ files.get("config/nginx.conf") | b64encode }}
20//!
21//! {# Check if file exists #}
22//! {% if files.exists("config/custom.yaml") %}
23//!   custom: {{ files.get("config/custom.yaml") }}
24//! {% endif %}
25//!
26//! {# Iterate over files matching a glob pattern #}
27//! {% for file in files.glob("scripts/*.sh") %}
28//!   {{ file.name }}: {{ file.content | b64encode }}
29//! {% endfor %}
30//! ```
31
32use std::collections::HashMap;
33use std::path::{Path, PathBuf};
34use std::sync::{Arc, RwLock};
35
36use serde::{Deserialize, Serialize};
37
38use crate::error::{CoreError, Result};
39
40/// Trait for file access providers
41///
42/// This trait allows for different implementations:
43/// - `SandboxedFileProvider`: Real filesystem access (sandboxed to pack root)
44/// - `MockFileProvider`: In-memory files for testing
45/// - `ArchiveFileProvider`: Read files from a tar.gz archive (future)
46pub trait FileProvider: Send + Sync {
47    /// Read the contents of a file as bytes
48    fn get(&self, path: &str) -> Result<Vec<u8>>;
49
50    /// Check if a file exists
51    fn exists(&self, path: &str) -> bool;
52
53    /// List files matching a glob pattern
54    fn glob(&self, pattern: &str) -> Result<Vec<FileEntry>>;
55
56    /// Read a file as lines
57    fn lines(&self, path: &str) -> Result<Vec<String>>;
58
59    /// Read the contents of a file as a string (UTF-8)
60    fn get_string(&self, path: &str) -> Result<String> {
61        let bytes = self.get(path)?;
62        String::from_utf8(bytes).map_err(|e| CoreError::FileAccess {
63            path: path.to_string(),
64            message: format!("file is not valid UTF-8: {}", e),
65        })
66    }
67}
68
69/// A file entry returned by glob operations
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct FileEntry {
72    /// Relative path from pack root
73    pub path: String,
74    /// File name (without directory)
75    pub name: String,
76    /// File content as string (UTF-8 lossy)
77    pub content: String,
78    /// File size in bytes
79    pub size: usize,
80}
81
82/// Sandboxed file provider that restricts access to the pack directory
83///
84/// This is the default provider used during template rendering.
85/// It ensures that templates cannot access files outside the pack root.
86#[derive(Debug)]
87pub struct SandboxedFileProvider {
88    /// The root directory of the pack (all paths are relative to this)
89    root: PathBuf,
90    /// Canonicalized root for security checks
91    canonical_root: PathBuf,
92    /// Cache of file contents to avoid repeated reads
93    cache: Arc<RwLock<HashMap<PathBuf, Vec<u8>>>>,
94}
95
96impl SandboxedFileProvider {
97    /// Create a new sandboxed file provider
98    ///
99    /// # Arguments
100    ///
101    /// * `pack_root` - The root directory of the pack
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if the pack root doesn't exist or cannot be canonicalized.
106    pub fn new(pack_root: impl AsRef<Path>) -> Result<Self> {
107        let root = pack_root.as_ref().to_path_buf();
108
109        if !root.exists() {
110            return Err(CoreError::FileAccess {
111                path: root.display().to_string(),
112                message: "pack root directory does not exist".to_string(),
113            });
114        }
115
116        let canonical_root = root.canonicalize().map_err(|e| CoreError::FileAccess {
117            path: root.display().to_string(),
118            message: format!("failed to canonicalize pack root: {}", e),
119        })?;
120
121        Ok(Self {
122            root,
123            canonical_root,
124            cache: Arc::new(RwLock::new(HashMap::new())),
125        })
126    }
127
128    /// Resolve a relative path and verify it's within the sandbox
129    ///
130    /// # Security
131    ///
132    /// This method:
133    /// 1. Rejects absolute paths
134    /// 2. Joins the path with the pack root
135    /// 3. Canonicalizes to resolve symlinks and `..` components
136    /// 4. Verifies the result is still within the pack root
137    fn resolve_path(&self, relative: &str) -> Result<PathBuf> {
138        let requested = Path::new(relative);
139
140        // Reject absolute paths
141        if requested.is_absolute() {
142            return Err(CoreError::FileAccess {
143                path: relative.to_string(),
144                message: "absolute paths are not allowed in templates".to_string(),
145            });
146        }
147
148        // Quick check for obvious traversal attempts
149        if relative.contains("..") {
150            // Still do the full check, but this catches simple cases early
151        }
152
153        // Build the full path
154        let full_path = self.root.join(relative);
155
156        // Check if the file exists before canonicalizing
157        if !full_path.exists() {
158            return Err(CoreError::FileAccess {
159                path: relative.to_string(),
160                message: "file not found".to_string(),
161            });
162        }
163
164        // Canonicalize to resolve symlinks and .. components
165        let canonical = full_path
166            .canonicalize()
167            .map_err(|e| CoreError::FileAccess {
168                path: relative.to_string(),
169                message: format!("failed to resolve path: {}", e),
170            })?;
171
172        // Verify the path is within the sandbox
173        if !canonical.starts_with(&self.canonical_root) {
174            return Err(CoreError::FileAccess {
175                path: relative.to_string(),
176                message: "path escapes pack directory (sandbox violation)".to_string(),
177            });
178        }
179
180        Ok(canonical)
181    }
182
183    /// Check if a path is valid without reading the file
184    fn is_valid_path(&self, relative: &str) -> bool {
185        self.resolve_path(relative).is_ok()
186    }
187}
188
189impl FileProvider for SandboxedFileProvider {
190    fn get(&self, path: &str) -> Result<Vec<u8>> {
191        let resolved = self.resolve_path(path)?;
192
193        // Check cache first
194        {
195            let cache = self.cache.read().map_err(|_| CoreError::FileAccess {
196                path: path.to_string(),
197                message: "cache lock poisoned".to_string(),
198            })?;
199
200            if let Some(content) = cache.get(&resolved) {
201                return Ok(content.clone());
202            }
203        }
204
205        // Read the file
206        let content = std::fs::read(&resolved).map_err(|e| CoreError::FileAccess {
207            path: path.to_string(),
208            message: format!("failed to read file: {}", e),
209        })?;
210
211        // Update cache
212        {
213            let mut cache = self.cache.write().map_err(|_| CoreError::FileAccess {
214                path: path.to_string(),
215                message: "cache lock poisoned".to_string(),
216            })?;
217
218            cache.insert(resolved, content.clone());
219        }
220
221        Ok(content)
222    }
223
224    fn exists(&self, path: &str) -> bool {
225        self.is_valid_path(path)
226    }
227
228    fn glob(&self, pattern: &str) -> Result<Vec<FileEntry>> {
229        // Validate the glob pattern
230        let glob_pattern = glob::Pattern::new(pattern).map_err(|e| CoreError::GlobPattern {
231            message: format!("invalid glob pattern '{}': {}", pattern, e),
232        })?;
233
234        let mut entries = Vec::new();
235
236        // Walk the pack directory
237        for entry in walkdir::WalkDir::new(&self.root)
238            .into_iter()
239            .filter_map(|e| e.ok())
240            .filter(|e| e.file_type().is_file())
241        {
242            // Get relative path
243            let rel_path = match entry.path().strip_prefix(&self.root) {
244                Ok(p) => p,
245                Err(_) => continue,
246            };
247
248            let rel_str = rel_path.to_string_lossy();
249
250            // Check if it matches the pattern
251            if glob_pattern.matches(&rel_str) {
252                // Read the file content
253                let content = match std::fs::read_to_string(entry.path()) {
254                    Ok(c) => c,
255                    Err(_) => {
256                        // For binary files, use lossy conversion
257                        match std::fs::read(entry.path()) {
258                            Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
259                            Err(_) => continue,
260                        }
261                    }
262                };
263
264                let size = content.len();
265
266                entries.push(FileEntry {
267                    path: rel_str.to_string(),
268                    name: entry.file_name().to_string_lossy().to_string(),
269                    content,
270                    size,
271                });
272            }
273        }
274
275        // Sort for deterministic output (important for reproducible templates)
276        entries.sort_by(|a, b| a.path.cmp(&b.path));
277
278        Ok(entries)
279    }
280
281    fn lines(&self, path: &str) -> Result<Vec<String>> {
282        let content = self.get_string(path)?;
283        Ok(content.lines().map(String::from).collect())
284    }
285}
286
287/// Mock file provider for testing
288///
289/// This provider stores files in memory, allowing tests to run
290/// without filesystem access.
291#[derive(Debug, Default, Clone)]
292pub struct MockFileProvider {
293    files: HashMap<String, Vec<u8>>,
294}
295
296impl MockFileProvider {
297    /// Create a new empty mock provider
298    pub fn new() -> Self {
299        Self::default()
300    }
301
302    /// Add a file to the mock filesystem
303    pub fn with_file(mut self, path: &str, content: impl Into<Vec<u8>>) -> Self {
304        self.files.insert(path.to_string(), content.into());
305        self
306    }
307
308    /// Add a text file to the mock filesystem
309    pub fn with_text_file(self, path: &str, content: &str) -> Self {
310        self.with_file(path, content.as_bytes().to_vec())
311    }
312
313    /// Add multiple files at once
314    pub fn with_files(
315        mut self,
316        files: impl IntoIterator<Item = (&'static str, &'static str)>,
317    ) -> Self {
318        for (path, content) in files {
319            self.files
320                .insert(path.to_string(), content.as_bytes().to_vec());
321        }
322        self
323    }
324}
325
326impl FileProvider for MockFileProvider {
327    fn get(&self, path: &str) -> Result<Vec<u8>> {
328        self.files
329            .get(path)
330            .cloned()
331            .ok_or_else(|| CoreError::FileAccess {
332                path: path.to_string(),
333                message: "file not found".to_string(),
334            })
335    }
336
337    fn exists(&self, path: &str) -> bool {
338        self.files.contains_key(path)
339    }
340
341    fn glob(&self, pattern: &str) -> Result<Vec<FileEntry>> {
342        let glob_pattern = glob::Pattern::new(pattern).map_err(|e| CoreError::GlobPattern {
343            message: format!("invalid glob pattern '{}': {}", pattern, e),
344        })?;
345
346        let mut entries: Vec<_> = self
347            .files
348            .iter()
349            .filter(|(path, _)| glob_pattern.matches(path))
350            .map(|(path, content)| {
351                let name = Path::new(path)
352                    .file_name()
353                    .map(|s| s.to_string_lossy().to_string())
354                    .unwrap_or_default();
355
356                FileEntry {
357                    path: path.clone(),
358                    name,
359                    content: String::from_utf8_lossy(content).to_string(),
360                    size: content.len(),
361                }
362            })
363            .collect();
364
365        // Sort for deterministic output
366        entries.sort_by(|a, b| a.path.cmp(&b.path));
367
368        Ok(entries)
369    }
370
371    fn lines(&self, path: &str) -> Result<Vec<String>> {
372        let content = self.get_string(path)?;
373        Ok(content.lines().map(String::from).collect())
374    }
375}
376
377/// A wrapper that provides the Files API to templates
378///
379/// This struct is what gets injected into the template context as `files`.
380/// It wraps any `FileProvider` implementation.
381#[derive(Clone)]
382pub struct Files {
383    provider: Arc<dyn FileProvider>,
384}
385
386impl Files {
387    /// Create a new Files wrapper from a provider
388    pub fn new(provider: impl FileProvider + 'static) -> Self {
389        Self {
390            provider: Arc::new(provider),
391        }
392    }
393
394    /// Create Files from an Arc'd provider (avoids double-Arc)
395    pub fn from_arc(provider: Arc<dyn FileProvider>) -> Self {
396        Self { provider }
397    }
398
399    /// Create a sandboxed Files instance for a pack
400    pub fn for_pack(pack_root: impl AsRef<Path>) -> Result<Self> {
401        let provider = SandboxedFileProvider::new(pack_root)?;
402        Ok(Self::new(provider))
403    }
404
405    /// Create a mock Files instance for testing
406    pub fn mock() -> MockFileProvider {
407        MockFileProvider::new()
408    }
409
410    /// Get file contents as string
411    pub fn get(&self, path: &str) -> Result<String> {
412        self.provider.get_string(path)
413    }
414
415    /// Get file contents as bytes
416    pub fn get_bytes(&self, path: &str) -> Result<Vec<u8>> {
417        self.provider.get(path)
418    }
419
420    /// Check if file exists
421    pub fn exists(&self, path: &str) -> bool {
422        self.provider.exists(path)
423    }
424
425    /// Glob for files
426    pub fn glob(&self, pattern: &str) -> Result<Vec<FileEntry>> {
427        self.provider.glob(pattern)
428    }
429
430    /// Read file as lines
431    pub fn lines(&self, path: &str) -> Result<Vec<String>> {
432        self.provider.lines(path)
433    }
434}
435
436impl std::fmt::Debug for Files {
437    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
438        f.debug_struct("Files").finish_non_exhaustive()
439    }
440}
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445    use tempfile::TempDir;
446
447    fn create_test_pack() -> TempDir {
448        let temp = TempDir::new().unwrap();
449
450        // Create directory structure
451        std::fs::create_dir_all(temp.path().join("config")).unwrap();
452        std::fs::create_dir_all(temp.path().join("scripts")).unwrap();
453
454        // Create test files
455        std::fs::write(temp.path().join("config/app.yaml"), "key: value").unwrap();
456        std::fs::write(temp.path().join("config/db.yaml"), "host: localhost").unwrap();
457        std::fs::write(
458            temp.path().join("scripts/init.sh"),
459            "#!/bin/bash\necho hello",
460        )
461        .unwrap();
462        std::fs::write(temp.path().join("README.md"), "# Test Pack").unwrap();
463
464        temp
465    }
466
467    #[test]
468    fn test_sandboxed_provider_read_file() {
469        let temp = create_test_pack();
470        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
471
472        let content = provider.get_string("config/app.yaml").unwrap();
473        assert_eq!(content, "key: value");
474    }
475
476    #[test]
477    fn test_sandboxed_provider_exists() {
478        let temp = create_test_pack();
479        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
480
481        assert!(provider.exists("config/app.yaml"));
482        assert!(provider.exists("README.md"));
483        assert!(!provider.exists("nonexistent.txt"));
484    }
485
486    #[test]
487    fn test_sandboxed_provider_glob() {
488        let temp = create_test_pack();
489        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
490
491        let entries = provider.glob("config/*.yaml").unwrap();
492        assert_eq!(entries.len(), 2);
493
494        // Check sorted order
495        assert_eq!(entries[0].name, "app.yaml");
496        assert_eq!(entries[1].name, "db.yaml");
497    }
498
499    #[test]
500    fn test_sandboxed_provider_lines() {
501        let temp = create_test_pack();
502        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
503
504        let lines = provider.lines("scripts/init.sh").unwrap();
505        assert_eq!(lines.len(), 2);
506        assert_eq!(lines[0], "#!/bin/bash");
507        assert_eq!(lines[1], "echo hello");
508    }
509
510    #[test]
511    fn test_sandbox_prevents_absolute_paths() {
512        let temp = create_test_pack();
513        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
514
515        // Use platform-appropriate absolute path
516        #[cfg(windows)]
517        let abs_path = "C:\\Windows\\System32\\config\\SAM";
518        #[cfg(not(windows))]
519        let abs_path = "/etc/passwd";
520
521        let result = provider.get(abs_path);
522        assert!(result.is_err());
523        assert!(result.unwrap_err().to_string().contains("absolute paths"));
524    }
525
526    #[test]
527    fn test_sandbox_prevents_path_traversal() {
528        let temp = create_test_pack();
529        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
530
531        // Create a file outside the pack
532        let parent = temp.path().parent().unwrap();
533        std::fs::write(parent.join("secret.txt"), "secret data").unwrap();
534
535        // Try to access it via path traversal
536        let result = provider.get("../secret.txt");
537        assert!(result.is_err());
538
539        let err = result.unwrap_err().to_string();
540        // Either "sandbox violation" or "file not found" depending on resolution order
541        assert!(err.contains("sandbox") || err.contains("not found"));
542    }
543
544    #[test]
545    fn test_sandbox_prevents_deep_traversal() {
546        let temp = create_test_pack();
547        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
548
549        let result = provider.get("config/../../../../../../etc/passwd");
550        assert!(result.is_err());
551    }
552
553    #[test]
554    fn test_mock_provider() {
555        let provider = MockFileProvider::new()
556            .with_text_file("config/app.yaml", "key: value")
557            .with_text_file("config/db.yaml", "host: localhost");
558
559        assert!(provider.exists("config/app.yaml"));
560        assert!(!provider.exists("nonexistent.txt"));
561
562        let content = provider.get_string("config/app.yaml").unwrap();
563        assert_eq!(content, "key: value");
564    }
565
566    #[test]
567    fn test_mock_provider_glob() {
568        let provider = MockFileProvider::new()
569            .with_text_file("config/a.yaml", "a")
570            .with_text_file("config/b.yaml", "b")
571            .with_text_file("other/c.yaml", "c");
572
573        let entries = provider.glob("config/*.yaml").unwrap();
574        assert_eq!(entries.len(), 2);
575
576        // Verify sorted order
577        assert_eq!(entries[0].path, "config/a.yaml");
578        assert_eq!(entries[1].path, "config/b.yaml");
579    }
580
581    #[test]
582    fn test_files_wrapper() {
583        let mock = MockFileProvider::new().with_text_file("test.txt", "hello world");
584
585        let files = Files::new(mock);
586
587        assert!(files.exists("test.txt"));
588        assert_eq!(files.get("test.txt").unwrap(), "hello world");
589    }
590
591    #[test]
592    fn test_glob_deterministic_order() {
593        // Create files in non-alphabetical order
594        let provider = MockFileProvider::new()
595            .with_text_file("z.yaml", "z")
596            .with_text_file("a.yaml", "a")
597            .with_text_file("m.yaml", "m");
598
599        let entries = provider.glob("*.yaml").unwrap();
600        let paths: Vec<_> = entries.iter().map(|e| e.path.as_str()).collect();
601
602        assert_eq!(paths, vec!["a.yaml", "m.yaml", "z.yaml"]);
603    }
604
605    #[test]
606    fn test_file_caching() {
607        let temp = create_test_pack();
608        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
609
610        // First read
611        let content1 = provider.get("config/app.yaml").unwrap();
612
613        // Modify the file
614        std::fs::write(temp.path().join("config/app.yaml"), "modified").unwrap();
615
616        // Second read should return cached content
617        let content2 = provider.get("config/app.yaml").unwrap();
618
619        assert_eq!(content1, content2);
620    }
621
622    #[test]
623    fn test_binary_file_handling() {
624        let temp = TempDir::new().unwrap();
625
626        // Create a binary file
627        let binary_data = vec![0u8, 1, 2, 255, 254, 253];
628        std::fs::write(temp.path().join("binary.bin"), &binary_data).unwrap();
629
630        let provider = SandboxedFileProvider::new(temp.path()).unwrap();
631        let content = provider.get("binary.bin").unwrap();
632
633        assert_eq!(content, binary_data);
634    }
635
636    #[test]
637    fn test_glob_pattern_validation() {
638        let provider = MockFileProvider::new();
639
640        // Invalid glob pattern (unclosed bracket)
641        let result = provider.glob("[invalid");
642        assert!(result.is_err());
643        assert!(
644            result
645                .unwrap_err()
646                .to_string()
647                .contains("invalid glob pattern")
648        );
649    }
650}