runmat_filesystem/
sandbox.rs1#[cfg(not(target_arch = "wasm32"))]
2use crate::data_contract::{
3 DataChunkUploadRequest, DataChunkUploadTarget, DataManifestDescriptor, DataManifestRequest,
4};
5#[cfg(not(target_arch = "wasm32"))]
6use crate::{DirEntry, FsFileType, FsMetadata, FsProvider, OpenFlags};
7#[cfg(not(target_arch = "wasm32"))]
8use async_trait::async_trait;
9#[cfg(not(target_arch = "wasm32"))]
10use chrono::Utc;
11#[cfg(not(target_arch = "wasm32"))]
12use serde_json::Value as JsonValue;
13#[cfg(not(target_arch = "wasm32"))]
14use std::ffi::OsString;
15#[cfg(not(target_arch = "wasm32"))]
16use std::fs;
17#[cfg(not(target_arch = "wasm32"))]
18use std::io;
19#[cfg(not(target_arch = "wasm32"))]
20use std::path::{Component, Path, PathBuf};
21
22#[cfg(not(target_arch = "wasm32"))]
23pub struct SandboxFsProvider {
28 root: PathBuf,
29}
30
31#[cfg(not(target_arch = "wasm32"))]
32impl SandboxFsProvider {
33 pub fn new(root: PathBuf) -> io::Result<Self> {
35 if !root.exists() {
36 fs::create_dir_all(&root)?;
37 }
38 let canonical = fs::canonicalize(root)?;
39 Ok(Self { root: canonical })
40 }
41
42 pub fn root(&self) -> &Path {
44 &self.root
45 }
46
47 fn resolve(&self, path: &Path) -> PathBuf {
48 let mut segments: Vec<OsString> = Vec::new();
49 for component in path.components() {
50 match component {
51 Component::Prefix(_) | Component::RootDir => {
52 segments.clear();
53 }
54 Component::CurDir => {}
55 Component::ParentDir => {
56 segments.pop();
57 }
58 Component::Normal(seg) => segments.push(seg.to_os_string()),
59 }
60 }
61 let mut target = self.root.clone();
62 for seg in segments {
63 target.push(seg);
64 }
65 target
66 }
67
68 fn virtualize(&self, real: &Path) -> PathBuf {
69 let relative = real.strip_prefix(&self.root).unwrap_or(Path::new(""));
70 if relative.as_os_str().is_empty() {
71 return PathBuf::from("/");
72 }
73
74 let mut path = String::from("/");
75 path.push_str(&relative.to_string_lossy().replace('\\', "/"));
76 PathBuf::from(path)
77 }
78
79 fn make_dir_entry(&self, real_path: PathBuf, file_name: OsString) -> DirEntry {
80 let file_type = fs::metadata(&real_path)
81 .ok()
82 .map(|m| FsFileType::from(m.file_type()))
83 .unwrap_or(FsFileType::Unknown);
84 DirEntry {
85 path: self.virtualize(&real_path),
86 file_name,
87 file_type,
88 }
89 }
90}
91
92#[cfg(not(target_arch = "wasm32"))]
93#[async_trait(?Send)]
94impl FsProvider for SandboxFsProvider {
95 fn current_dir_override(&self) -> Option<PathBuf> {
96 Some(PathBuf::from("/"))
97 }
98
99 fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn crate::FileHandle>> {
100 let target = self.resolve(path);
101 if let Some(parent) = target.parent() {
102 fs::create_dir_all(parent)?;
103 }
104 let mut opts = fs::OpenOptions::new();
105 opts.read(flags.read);
106 opts.write(flags.write);
107 opts.append(flags.append);
108 opts.truncate(flags.truncate);
109 opts.create(flags.create);
110 opts.create_new(flags.create_new);
111 let file = opts.open(&target)?;
112 Ok(Box::new(file))
113 }
114
115 async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
116 let target = self.resolve(path);
117 fs::read(target)
118 }
119
120 async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
121 let target = self.resolve(path);
122 if let Some(parent) = target.parent() {
123 fs::create_dir_all(parent)?;
124 }
125 fs::write(target, data)
126 }
127
128 async fn remove_file(&self, path: &Path) -> io::Result<()> {
129 let target = self.resolve(path);
130 if target.exists() {
131 fs::remove_file(target)?;
132 }
133 Ok(())
134 }
135
136 async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
137 let target = self.resolve(path);
138 fs::metadata(target).map(FsMetadata::from)
139 }
140
141 async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
142 let target = self.resolve(path);
143 fs::symlink_metadata(target).map(FsMetadata::from)
144 }
145
146 async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
147 let target = self.resolve(path);
148 let entries = fs::read_dir(&target)?;
149 let mut out = Vec::new();
150 for entry in entries {
151 let entry = entry?;
152 out.push(self.make_dir_entry(entry.path(), entry.file_name()));
153 }
154 Ok(out)
155 }
156
157 async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
158 let target = self.resolve(path);
159 let real = fs::canonicalize(target)?;
160 Ok(self.virtualize(&real))
161 }
162
163 async fn create_dir(&self, path: &Path) -> io::Result<()> {
164 let target = self.resolve(path);
165 fs::create_dir(&target)
166 }
167
168 async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
169 let target = self.resolve(path);
170 fs::create_dir_all(&target)
171 }
172
173 async fn remove_dir(&self, path: &Path) -> io::Result<()> {
174 let target = self.resolve(path);
175 fs::remove_dir(&target)
176 }
177
178 async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
179 let target = self.resolve(path);
180 if target.exists() {
181 fs::remove_dir_all(&target)?;
182 }
183 Ok(())
184 }
185
186 async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
187 let src = self.resolve(from);
188 let dst = self.resolve(to);
189 if let Some(parent) = dst.parent() {
190 fs::create_dir_all(parent)?;
191 }
192 fs::rename(src, dst)
193 }
194
195 async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
196 let target = self.resolve(path);
197 let mut perms = fs::metadata(&target)?.permissions();
198 perms.set_readonly(readonly);
199 fs::set_permissions(target, perms)
200 }
201
202 async fn data_manifest_descriptor(
203 &self,
204 request: &DataManifestRequest,
205 ) -> io::Result<DataManifestDescriptor> {
206 let manifest_path = if request.path.ends_with(".json") {
207 PathBuf::from(&request.path)
208 } else {
209 PathBuf::from(&request.path).join("manifest.json")
210 };
211 let bytes = self.read(&manifest_path).await?;
212 let json: JsonValue = serde_json::from_slice(&bytes)
213 .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
214 Ok(DataManifestDescriptor {
215 schema_version: json
216 .get("schema_version")
217 .or_else(|| json.get("schemaVersion"))
218 .and_then(|v| v.as_u64())
219 .unwrap_or(1) as u32,
220 format: json
221 .get("format")
222 .and_then(|v| v.as_str())
223 .unwrap_or("runmat-data")
224 .to_string(),
225 dataset_id: json
226 .get("dataset_id")
227 .or_else(|| json.get("datasetId"))
228 .and_then(|v| v.as_str())
229 .unwrap_or_default()
230 .to_string(),
231 updated_at: json
232 .get("updated_at")
233 .or_else(|| json.get("updatedAt"))
234 .and_then(|v| v.as_str())
235 .map(ToString::to_string)
236 .unwrap_or_else(|| Utc::now().to_rfc3339()),
237 txn_sequence: json
238 .get("txn_sequence")
239 .or_else(|| json.get("txnSequence"))
240 .and_then(|v| v.as_u64())
241 .unwrap_or(0),
242 })
243 }
244
245 async fn data_chunk_upload_targets(
246 &self,
247 request: &DataChunkUploadRequest,
248 ) -> io::Result<Vec<DataChunkUploadTarget>> {
249 let root = PathBuf::from(&request.dataset_path)
250 .join("arrays")
251 .join(sanitize_segment(&request.array))
252 .join("chunks");
253 self.create_dir_all(&root).await?;
254 request
255 .chunks
256 .iter()
257 .map(|chunk| {
258 let path = root.join(format!("{}.bin", sanitize_segment(&chunk.object_id)));
259 Ok(DataChunkUploadTarget {
260 key: chunk.key.clone(),
261 method: "PUT".to_string(),
262 upload_url: format!("sandbox://{}", path.to_string_lossy()),
263 headers: std::collections::HashMap::new(),
264 })
265 })
266 .collect()
267 }
268
269 async fn data_upload_chunk(
270 &self,
271 target: &DataChunkUploadTarget,
272 data: &[u8],
273 ) -> io::Result<()> {
274 if !target.method.eq_ignore_ascii_case("PUT") {
275 return Err(io::Error::new(
276 io::ErrorKind::InvalidInput,
277 format!("unsupported upload method '{}'", target.method),
278 ));
279 }
280 let path = target
281 .upload_url
282 .strip_prefix("sandbox://")
283 .ok_or_else(|| {
284 io::Error::new(io::ErrorKind::InvalidInput, "invalid sandbox upload url")
285 })?;
286 self.write(Path::new(path), data).await
287 }
288}
289
290#[cfg(not(target_arch = "wasm32"))]
291fn sanitize_segment(input: &str) -> String {
292 input
293 .chars()
294 .map(|ch| {
295 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
296 ch
297 } else {
298 '_'
299 }
300 })
301 .collect()
302}
303
304#[cfg(all(not(target_arch = "wasm32"), test))]
305mod tests {
306 use super::SandboxFsProvider;
307 use crate::FsProvider;
308 use futures::executor;
309 use std::path::Path;
310 use tempfile::tempdir;
311
312 fn virtual_path(path: &Path) -> String {
313 path.to_string_lossy().replace('\\', "/")
314 }
315
316 #[test]
317 fn sandbox_prevents_root_escape_and_virtualizes_paths() {
318 let temp = tempdir().expect("tempdir");
319 let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
320 executor::block_on(provider.create_dir_all(Path::new("nested/sub"))).expect("create dir");
321 executor::block_on(provider.write(Path::new("nested/sub/file.txt"), b"hello"))
322 .expect("write");
323
324 executor::block_on(provider.write(Path::new("../evil.txt"), b"nope"))
326 .expect("write outside clamped");
327 let entries = executor::block_on(provider.read_dir(Path::new("."))).expect("read root");
328 assert!(entries.iter().any(|entry| entry.file_name() == "evil.txt"));
329
330 let listing =
331 executor::block_on(provider.read_dir(Path::new("nested"))).expect("list nested");
332 assert!(listing.iter().any(|entry| {
333 let path = virtual_path(entry.path());
334 path == "/nested/sub"
335 }));
336
337 let sandbox_read =
338 executor::block_on(provider.read(Path::new("/nested/sub/file.txt"))).expect("vfs read");
339 assert_eq!(sandbox_read, b"hello");
340 }
341
342 #[test]
343 fn canonicalize_returns_virtual_paths() {
344 let temp = tempdir().expect("tempdir");
345 let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
346 executor::block_on(provider.create_dir_all(Path::new("data"))).expect("create dir");
347 executor::block_on(provider.write(Path::new("data/file.bin"), b"bytes")).expect("write");
348 let canonical = executor::block_on(provider.canonicalize(Path::new("./data/./file.bin")))
349 .expect("canonicalize");
350 assert_eq!(virtual_path(&canonical), "/data/file.bin");
351 }
352}