Skip to main content

runmat_filesystem/
sandbox.rs

1#[cfg(not(target_arch = "wasm32"))]
2use crate::data_contract::{
3    DataChunkUploadRequest, DataChunkUploadTarget, DataManifestDescriptor, DataManifestRequest,
4};
5#[cfg(not(target_arch = "wasm32"))]
6use crate::{DirEntry, FsFileType, FsMetadata, FsProvider, OpenFlags};
7#[cfg(not(target_arch = "wasm32"))]
8use async_trait::async_trait;
9#[cfg(not(target_arch = "wasm32"))]
10use chrono::Utc;
11#[cfg(not(target_arch = "wasm32"))]
12use serde_json::Value as JsonValue;
13#[cfg(not(target_arch = "wasm32"))]
14use std::ffi::OsString;
15#[cfg(not(target_arch = "wasm32"))]
16use std::fs;
17#[cfg(not(target_arch = "wasm32"))]
18use std::io;
19#[cfg(not(target_arch = "wasm32"))]
20use std::path::{Component, Path, PathBuf};
21
22#[cfg(not(target_arch = "wasm32"))]
23/// Filesystem provider that sandboxes all operations under a fixed root directory.
24///
25/// Incoming paths (absolute or relative) are normalized and resolved relative to the sandbox root.
26/// Attempts to traverse outside the root using `..` simply clamp to the root, preventing escape.
27pub struct SandboxFsProvider {
28    root: PathBuf,
29}
30
31#[cfg(not(target_arch = "wasm32"))]
32impl SandboxFsProvider {
33    /// Create a new sandbox rooted at `root`. The directory is created if it does not exist.
34    pub fn new(root: PathBuf) -> io::Result<Self> {
35        if !root.exists() {
36            fs::create_dir_all(&root)?;
37        }
38        let canonical = fs::canonicalize(root)?;
39        Ok(Self { root: canonical })
40    }
41
42    /// Return the sandbox root on the host filesystem.
43    pub fn root(&self) -> &Path {
44        &self.root
45    }
46
47    fn resolve(&self, path: &Path) -> PathBuf {
48        let mut segments: Vec<OsString> = Vec::new();
49        for component in path.components() {
50            match component {
51                Component::Prefix(_) | Component::RootDir => {
52                    segments.clear();
53                }
54                Component::CurDir => {}
55                Component::ParentDir => {
56                    segments.pop();
57                }
58                Component::Normal(seg) => segments.push(seg.to_os_string()),
59            }
60        }
61        let mut target = self.root.clone();
62        for seg in segments {
63            target.push(seg);
64        }
65        target
66    }
67
68    fn virtualize(&self, real: &Path) -> PathBuf {
69        let relative = real.strip_prefix(&self.root).unwrap_or(Path::new(""));
70        if relative.as_os_str().is_empty() {
71            return PathBuf::from("/");
72        }
73
74        let mut path = String::from("/");
75        path.push_str(&relative.to_string_lossy().replace('\\', "/"));
76        PathBuf::from(path)
77    }
78
79    fn make_dir_entry(&self, real_path: PathBuf, file_name: OsString) -> DirEntry {
80        let file_type = fs::metadata(&real_path)
81            .ok()
82            .map(|m| FsFileType::from(m.file_type()))
83            .unwrap_or(FsFileType::Unknown);
84        DirEntry {
85            path: self.virtualize(&real_path),
86            file_name,
87            file_type,
88        }
89    }
90}
91
92#[cfg(not(target_arch = "wasm32"))]
93#[async_trait(?Send)]
94impl FsProvider for SandboxFsProvider {
95    fn current_dir_override(&self) -> Option<PathBuf> {
96        Some(PathBuf::from("/"))
97    }
98
99    fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn crate::FileHandle>> {
100        let target = self.resolve(path);
101        if let Some(parent) = target.parent() {
102            fs::create_dir_all(parent)?;
103        }
104        let mut opts = fs::OpenOptions::new();
105        opts.read(flags.read);
106        opts.write(flags.write);
107        opts.append(flags.append);
108        opts.truncate(flags.truncate);
109        opts.create(flags.create);
110        opts.create_new(flags.create_new);
111        let file = opts.open(&target)?;
112        Ok(Box::new(file))
113    }
114
115    async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
116        let target = self.resolve(path);
117        fs::read(target)
118    }
119
120    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
121        let target = self.resolve(path);
122        if let Some(parent) = target.parent() {
123            fs::create_dir_all(parent)?;
124        }
125        fs::write(target, data)
126    }
127
128    async fn remove_file(&self, path: &Path) -> io::Result<()> {
129        let target = self.resolve(path);
130        if target.exists() {
131            fs::remove_file(target)?;
132        }
133        Ok(())
134    }
135
136    async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
137        let target = self.resolve(path);
138        fs::metadata(target).map(FsMetadata::from)
139    }
140
141    async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
142        let target = self.resolve(path);
143        fs::symlink_metadata(target).map(FsMetadata::from)
144    }
145
146    async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
147        let target = self.resolve(path);
148        let entries = fs::read_dir(&target)?;
149        let mut out = Vec::new();
150        for entry in entries {
151            let entry = entry?;
152            out.push(self.make_dir_entry(entry.path(), entry.file_name()));
153        }
154        Ok(out)
155    }
156
157    async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
158        let target = self.resolve(path);
159        let real = fs::canonicalize(target)?;
160        Ok(self.virtualize(&real))
161    }
162
163    async fn create_dir(&self, path: &Path) -> io::Result<()> {
164        let target = self.resolve(path);
165        fs::create_dir(&target)
166    }
167
168    async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
169        let target = self.resolve(path);
170        fs::create_dir_all(&target)
171    }
172
173    async fn remove_dir(&self, path: &Path) -> io::Result<()> {
174        let target = self.resolve(path);
175        fs::remove_dir(&target)
176    }
177
178    async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
179        let target = self.resolve(path);
180        if target.exists() {
181            fs::remove_dir_all(&target)?;
182        }
183        Ok(())
184    }
185
186    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
187        let src = self.resolve(from);
188        let dst = self.resolve(to);
189        if let Some(parent) = dst.parent() {
190            fs::create_dir_all(parent)?;
191        }
192        fs::rename(src, dst)
193    }
194
195    async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
196        let target = self.resolve(path);
197        let mut perms = fs::metadata(&target)?.permissions();
198        perms.set_readonly(readonly);
199        fs::set_permissions(target, perms)
200    }
201
202    async fn data_manifest_descriptor(
203        &self,
204        request: &DataManifestRequest,
205    ) -> io::Result<DataManifestDescriptor> {
206        let manifest_path = if request.path.ends_with(".json") {
207            PathBuf::from(&request.path)
208        } else {
209            PathBuf::from(&request.path).join("manifest.json")
210        };
211        let bytes = self.read(&manifest_path).await?;
212        let json: JsonValue = serde_json::from_slice(&bytes)
213            .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
214        Ok(DataManifestDescriptor {
215            schema_version: json
216                .get("schema_version")
217                .or_else(|| json.get("schemaVersion"))
218                .and_then(|v| v.as_u64())
219                .unwrap_or(1) as u32,
220            format: json
221                .get("format")
222                .and_then(|v| v.as_str())
223                .unwrap_or("runmat-data")
224                .to_string(),
225            dataset_id: json
226                .get("dataset_id")
227                .or_else(|| json.get("datasetId"))
228                .and_then(|v| v.as_str())
229                .unwrap_or_default()
230                .to_string(),
231            updated_at: json
232                .get("updated_at")
233                .or_else(|| json.get("updatedAt"))
234                .and_then(|v| v.as_str())
235                .map(ToString::to_string)
236                .unwrap_or_else(|| Utc::now().to_rfc3339()),
237            txn_sequence: json
238                .get("txn_sequence")
239                .or_else(|| json.get("txnSequence"))
240                .and_then(|v| v.as_u64())
241                .unwrap_or(0),
242        })
243    }
244
245    async fn data_chunk_upload_targets(
246        &self,
247        request: &DataChunkUploadRequest,
248    ) -> io::Result<Vec<DataChunkUploadTarget>> {
249        let root = PathBuf::from(&request.dataset_path)
250            .join("arrays")
251            .join(sanitize_segment(&request.array))
252            .join("chunks");
253        self.create_dir_all(&root).await?;
254        request
255            .chunks
256            .iter()
257            .map(|chunk| {
258                let path = root.join(format!("{}.bin", sanitize_segment(&chunk.object_id)));
259                Ok(DataChunkUploadTarget {
260                    key: chunk.key.clone(),
261                    method: "PUT".to_string(),
262                    upload_url: format!("sandbox://{}", path.to_string_lossy()),
263                    headers: std::collections::HashMap::new(),
264                })
265            })
266            .collect()
267    }
268
269    async fn data_upload_chunk(
270        &self,
271        target: &DataChunkUploadTarget,
272        data: &[u8],
273    ) -> io::Result<()> {
274        if !target.method.eq_ignore_ascii_case("PUT") {
275            return Err(io::Error::new(
276                io::ErrorKind::InvalidInput,
277                format!("unsupported upload method '{}'", target.method),
278            ));
279        }
280        let path = target
281            .upload_url
282            .strip_prefix("sandbox://")
283            .ok_or_else(|| {
284                io::Error::new(io::ErrorKind::InvalidInput, "invalid sandbox upload url")
285            })?;
286        self.write(Path::new(path), data).await
287    }
288}
289
290#[cfg(not(target_arch = "wasm32"))]
291fn sanitize_segment(input: &str) -> String {
292    input
293        .chars()
294        .map(|ch| {
295            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
296                ch
297            } else {
298                '_'
299            }
300        })
301        .collect()
302}
303
304#[cfg(all(not(target_arch = "wasm32"), test))]
305mod tests {
306    use super::SandboxFsProvider;
307    use crate::FsProvider;
308    use futures::executor;
309    use std::path::Path;
310    use tempfile::tempdir;
311
312    fn virtual_path(path: &Path) -> String {
313        path.to_string_lossy().replace('\\', "/")
314    }
315
316    #[test]
317    fn sandbox_prevents_root_escape_and_virtualizes_paths() {
318        let temp = tempdir().expect("tempdir");
319        let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
320        executor::block_on(provider.create_dir_all(Path::new("nested/sub"))).expect("create dir");
321        executor::block_on(provider.write(Path::new("nested/sub/file.txt"), b"hello"))
322            .expect("write");
323
324        // Attempt to escape root should clamp to sandbox.
325        executor::block_on(provider.write(Path::new("../evil.txt"), b"nope"))
326            .expect("write outside clamped");
327        let entries = executor::block_on(provider.read_dir(Path::new("."))).expect("read root");
328        assert!(entries.iter().any(|entry| entry.file_name() == "evil.txt"));
329
330        let listing =
331            executor::block_on(provider.read_dir(Path::new("nested"))).expect("list nested");
332        assert!(listing.iter().any(|entry| {
333            let path = virtual_path(entry.path());
334            path == "/nested/sub"
335        }));
336
337        let sandbox_read =
338            executor::block_on(provider.read(Path::new("/nested/sub/file.txt"))).expect("vfs read");
339        assert_eq!(sandbox_read, b"hello");
340    }
341
342    #[test]
343    fn canonicalize_returns_virtual_paths() {
344        let temp = tempdir().expect("tempdir");
345        let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
346        executor::block_on(provider.create_dir_all(Path::new("data"))).expect("create dir");
347        executor::block_on(provider.write(Path::new("data/file.bin"), b"bytes")).expect("write");
348        let canonical = executor::block_on(provider.canonicalize(Path::new("./data/./file.bin")))
349            .expect("canonicalize");
350        assert_eq!(virtual_path(&canonical), "/data/file.bin");
351    }
352}