Skip to main content

runmat_filesystem/
sandbox.rs

1#[cfg(not(target_arch = "wasm32"))]
2use crate::data_contract::{
3    DataChunkUploadRequest, DataChunkUploadTarget, DataManifestDescriptor, DataManifestRequest,
4};
5#[cfg(not(target_arch = "wasm32"))]
6use crate::{DirEntry, FsFileType, FsMetadata, FsProvider, OpenFlags};
7#[cfg(not(target_arch = "wasm32"))]
8use async_trait::async_trait;
9#[cfg(not(target_arch = "wasm32"))]
10use chrono::Utc;
11#[cfg(not(target_arch = "wasm32"))]
12use serde_json::Value as JsonValue;
13#[cfg(not(target_arch = "wasm32"))]
14use std::ffi::OsString;
15#[cfg(not(target_arch = "wasm32"))]
16use std::fs;
17#[cfg(not(target_arch = "wasm32"))]
18use std::io;
19#[cfg(not(target_arch = "wasm32"))]
20use std::path::{Component, Path, PathBuf};
21
22#[cfg(not(target_arch = "wasm32"))]
23/// Filesystem provider that sandboxes all operations under a fixed root directory.
24///
25/// Incoming paths (absolute or relative) are normalized and resolved relative to the sandbox root.
26/// Attempts to traverse outside the root using `..` simply clamp to the root, preventing escape.
27pub struct SandboxFsProvider {
28    root: PathBuf,
29}
30
31#[cfg(not(target_arch = "wasm32"))]
32impl SandboxFsProvider {
33    /// Create a new sandbox rooted at `root`. The directory is created if it does not exist.
34    pub fn new(root: PathBuf) -> io::Result<Self> {
35        if !root.exists() {
36            fs::create_dir_all(&root)?;
37        }
38        let canonical = fs::canonicalize(root)?;
39        Ok(Self { root: canonical })
40    }
41
42    /// Return the sandbox root on the host filesystem.
43    pub fn root(&self) -> &Path {
44        &self.root
45    }
46
47    fn resolve(&self, path: &Path) -> PathBuf {
48        let mut segments: Vec<OsString> = Vec::new();
49        for component in path.components() {
50            match component {
51                Component::Prefix(_) | Component::RootDir => {
52                    segments.clear();
53                }
54                Component::CurDir => {}
55                Component::ParentDir => {
56                    segments.pop();
57                }
58                Component::Normal(seg) => segments.push(seg.to_os_string()),
59            }
60        }
61        let mut target = self.root.clone();
62        for seg in segments {
63            target.push(seg);
64        }
65        target
66    }
67
68    fn virtualize(&self, real: &Path) -> PathBuf {
69        let relative = real.strip_prefix(&self.root).unwrap_or(Path::new(""));
70        let mut virt = PathBuf::new();
71        #[cfg(windows)]
72        {
73            let prefix = self
74                .root
75                .components()
76                .next()
77                .and_then(|component| match component {
78                    Component::Prefix(prefix) => Some(prefix.as_os_str()),
79                    _ => None,
80                });
81            if let Some(prefix) = prefix {
82                let mut root = OsString::from(prefix);
83                root.push(std::path::MAIN_SEPARATOR.to_string());
84                virt.push(root);
85            } else {
86                virt.push(std::path::MAIN_SEPARATOR.to_string());
87            }
88        }
89        #[cfg(not(windows))]
90        {
91            virt.push(std::path::MAIN_SEPARATOR.to_string());
92        }
93        if !relative.as_os_str().is_empty() {
94            virt.push(relative);
95        }
96        virt
97    }
98
99    fn make_dir_entry(&self, real_path: PathBuf, file_name: OsString) -> DirEntry {
100        let file_type = fs::metadata(&real_path)
101            .ok()
102            .map(|m| FsFileType::from(m.file_type()))
103            .unwrap_or(FsFileType::Unknown);
104        DirEntry {
105            path: self.virtualize(&real_path),
106            file_name,
107            file_type,
108        }
109    }
110}
111
112#[cfg(not(target_arch = "wasm32"))]
113#[async_trait(?Send)]
114impl FsProvider for SandboxFsProvider {
115    fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn crate::FileHandle>> {
116        let target = self.resolve(path);
117        if let Some(parent) = target.parent() {
118            fs::create_dir_all(parent)?;
119        }
120        let mut opts = fs::OpenOptions::new();
121        opts.read(flags.read);
122        opts.write(flags.write);
123        opts.append(flags.append);
124        opts.truncate(flags.truncate);
125        opts.create(flags.create);
126        opts.create_new(flags.create_new);
127        let file = opts.open(&target)?;
128        Ok(Box::new(file))
129    }
130
131    async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
132        let target = self.resolve(path);
133        fs::read(target)
134    }
135
136    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
137        let target = self.resolve(path);
138        if let Some(parent) = target.parent() {
139            fs::create_dir_all(parent)?;
140        }
141        fs::write(target, data)
142    }
143
144    async fn remove_file(&self, path: &Path) -> io::Result<()> {
145        let target = self.resolve(path);
146        if target.exists() {
147            fs::remove_file(target)?;
148        }
149        Ok(())
150    }
151
152    async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
153        let target = self.resolve(path);
154        fs::metadata(target).map(FsMetadata::from)
155    }
156
157    async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
158        let target = self.resolve(path);
159        fs::symlink_metadata(target).map(FsMetadata::from)
160    }
161
162    async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
163        let target = self.resolve(path);
164        let entries = fs::read_dir(&target)?;
165        let mut out = Vec::new();
166        for entry in entries {
167            let entry = entry?;
168            out.push(self.make_dir_entry(entry.path(), entry.file_name()));
169        }
170        Ok(out)
171    }
172
173    async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
174        let target = self.resolve(path);
175        let real = fs::canonicalize(target)?;
176        Ok(self.virtualize(&real))
177    }
178
179    async fn create_dir(&self, path: &Path) -> io::Result<()> {
180        let target = self.resolve(path);
181        fs::create_dir(&target)
182    }
183
184    async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
185        let target = self.resolve(path);
186        fs::create_dir_all(&target)
187    }
188
189    async fn remove_dir(&self, path: &Path) -> io::Result<()> {
190        let target = self.resolve(path);
191        fs::remove_dir(&target)
192    }
193
194    async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
195        let target = self.resolve(path);
196        if target.exists() {
197            fs::remove_dir_all(&target)?;
198        }
199        Ok(())
200    }
201
202    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
203        let src = self.resolve(from);
204        let dst = self.resolve(to);
205        if let Some(parent) = dst.parent() {
206            fs::create_dir_all(parent)?;
207        }
208        fs::rename(src, dst)
209    }
210
211    async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
212        let target = self.resolve(path);
213        let mut perms = fs::metadata(&target)?.permissions();
214        perms.set_readonly(readonly);
215        fs::set_permissions(target, perms)
216    }
217
218    async fn data_manifest_descriptor(
219        &self,
220        request: &DataManifestRequest,
221    ) -> io::Result<DataManifestDescriptor> {
222        let manifest_path = if request.path.ends_with(".json") {
223            PathBuf::from(&request.path)
224        } else {
225            PathBuf::from(&request.path).join("manifest.json")
226        };
227        let bytes = self.read(&manifest_path).await?;
228        let json: JsonValue = serde_json::from_slice(&bytes)
229            .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
230        Ok(DataManifestDescriptor {
231            schema_version: json
232                .get("schema_version")
233                .or_else(|| json.get("schemaVersion"))
234                .and_then(|v| v.as_u64())
235                .unwrap_or(1) as u32,
236            format: json
237                .get("format")
238                .and_then(|v| v.as_str())
239                .unwrap_or("runmat-data")
240                .to_string(),
241            dataset_id: json
242                .get("dataset_id")
243                .or_else(|| json.get("datasetId"))
244                .and_then(|v| v.as_str())
245                .unwrap_or_default()
246                .to_string(),
247            updated_at: json
248                .get("updated_at")
249                .or_else(|| json.get("updatedAt"))
250                .and_then(|v| v.as_str())
251                .map(ToString::to_string)
252                .unwrap_or_else(|| Utc::now().to_rfc3339()),
253            txn_sequence: json
254                .get("txn_sequence")
255                .or_else(|| json.get("txnSequence"))
256                .and_then(|v| v.as_u64())
257                .unwrap_or(0),
258        })
259    }
260
261    async fn data_chunk_upload_targets(
262        &self,
263        request: &DataChunkUploadRequest,
264    ) -> io::Result<Vec<DataChunkUploadTarget>> {
265        let root = PathBuf::from(&request.dataset_path)
266            .join("arrays")
267            .join(sanitize_segment(&request.array))
268            .join("chunks");
269        self.create_dir_all(&root).await?;
270        request
271            .chunks
272            .iter()
273            .map(|chunk| {
274                let path = root.join(format!("{}.bin", sanitize_segment(&chunk.object_id)));
275                Ok(DataChunkUploadTarget {
276                    key: chunk.key.clone(),
277                    method: "PUT".to_string(),
278                    upload_url: format!("sandbox://{}", path.to_string_lossy()),
279                    headers: std::collections::HashMap::new(),
280                })
281            })
282            .collect()
283    }
284
285    async fn data_upload_chunk(
286        &self,
287        target: &DataChunkUploadTarget,
288        data: &[u8],
289    ) -> io::Result<()> {
290        if !target.method.eq_ignore_ascii_case("PUT") {
291            return Err(io::Error::new(
292                io::ErrorKind::InvalidInput,
293                format!("unsupported upload method '{}'", target.method),
294            ));
295        }
296        let path = target
297            .upload_url
298            .strip_prefix("sandbox://")
299            .ok_or_else(|| {
300                io::Error::new(io::ErrorKind::InvalidInput, "invalid sandbox upload url")
301            })?;
302        self.write(Path::new(path), data).await
303    }
304}
305
306#[cfg(not(target_arch = "wasm32"))]
307fn sanitize_segment(input: &str) -> String {
308    input
309        .chars()
310        .map(|ch| {
311            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
312                ch
313            } else {
314                '_'
315            }
316        })
317        .collect()
318}
319
320#[cfg(all(not(target_arch = "wasm32"), test))]
321mod tests {
322    use super::SandboxFsProvider;
323    use crate::FsProvider;
324    use futures::executor;
325    use std::path::Path;
326    use tempfile::tempdir;
327
328    #[test]
329    fn sandbox_prevents_root_escape_and_virtualizes_paths() {
330        let temp = tempdir().expect("tempdir");
331        let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
332        executor::block_on(provider.create_dir_all(Path::new("nested/sub"))).expect("create dir");
333        executor::block_on(provider.write(Path::new("nested/sub/file.txt"), b"hello"))
334            .expect("write");
335
336        // Attempt to escape root should clamp to sandbox.
337        executor::block_on(provider.write(Path::new("../evil.txt"), b"nope"))
338            .expect("write outside clamped");
339        let entries = executor::block_on(provider.read_dir(Path::new("."))).expect("read root");
340        assert!(entries.iter().any(|entry| entry.file_name() == "evil.txt"));
341
342        let listing =
343            executor::block_on(provider.read_dir(Path::new("nested"))).expect("list nested");
344        assert!(listing
345            .iter()
346            .any(|entry| entry.path().ends_with(Path::new("nested/sub"))));
347
348        let sandbox_read =
349            executor::block_on(provider.read(Path::new("/nested/sub/file.txt"))).expect("vfs read");
350        assert_eq!(sandbox_read, b"hello");
351    }
352
353    #[test]
354    fn canonicalize_returns_virtual_paths() {
355        let temp = tempdir().expect("tempdir");
356        let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
357        executor::block_on(provider.create_dir_all(Path::new("data"))).expect("create dir");
358        executor::block_on(provider.write(Path::new("data/file.bin"), b"bytes")).expect("write");
359        let canonical = executor::block_on(provider.canonicalize(Path::new("./data/./file.bin")))
360            .expect("canonicalize");
361        assert!(canonical.ends_with(Path::new("data/file.bin")));
362        assert!(canonical.is_absolute());
363    }
364}