runmat_filesystem/
sandbox.rs1#[cfg(not(target_arch = "wasm32"))]
2use crate::data_contract::{
3 DataChunkUploadRequest, DataChunkUploadTarget, DataManifestDescriptor, DataManifestRequest,
4};
5#[cfg(not(target_arch = "wasm32"))]
6use crate::{DirEntry, FsFileType, FsMetadata, FsProvider, OpenFlags};
7#[cfg(not(target_arch = "wasm32"))]
8use async_trait::async_trait;
9#[cfg(not(target_arch = "wasm32"))]
10use chrono::Utc;
11#[cfg(not(target_arch = "wasm32"))]
12use serde_json::Value as JsonValue;
13#[cfg(not(target_arch = "wasm32"))]
14use std::ffi::OsString;
15#[cfg(not(target_arch = "wasm32"))]
16use std::fs;
17#[cfg(not(target_arch = "wasm32"))]
18use std::io;
19#[cfg(not(target_arch = "wasm32"))]
20use std::path::{Component, Path, PathBuf};
21
22#[cfg(not(target_arch = "wasm32"))]
23pub struct SandboxFsProvider {
28 root: PathBuf,
29}
30
31#[cfg(not(target_arch = "wasm32"))]
32impl SandboxFsProvider {
33 pub fn new(root: PathBuf) -> io::Result<Self> {
35 if !root.exists() {
36 fs::create_dir_all(&root)?;
37 }
38 let canonical = fs::canonicalize(root)?;
39 Ok(Self { root: canonical })
40 }
41
42 pub fn root(&self) -> &Path {
44 &self.root
45 }
46
47 fn resolve(&self, path: &Path) -> PathBuf {
48 let mut segments: Vec<OsString> = Vec::new();
49 for component in path.components() {
50 match component {
51 Component::Prefix(_) | Component::RootDir => {
52 segments.clear();
53 }
54 Component::CurDir => {}
55 Component::ParentDir => {
56 segments.pop();
57 }
58 Component::Normal(seg) => segments.push(seg.to_os_string()),
59 }
60 }
61 let mut target = self.root.clone();
62 for seg in segments {
63 target.push(seg);
64 }
65 target
66 }
67
68 fn virtualize(&self, real: &Path) -> PathBuf {
69 let relative = real.strip_prefix(&self.root).unwrap_or(Path::new(""));
70 let mut virt = PathBuf::new();
71 #[cfg(windows)]
72 {
73 let prefix = self
74 .root
75 .components()
76 .next()
77 .and_then(|component| match component {
78 Component::Prefix(prefix) => Some(prefix.as_os_str()),
79 _ => None,
80 });
81 if let Some(prefix) = prefix {
82 let mut root = OsString::from(prefix);
83 root.push(std::path::MAIN_SEPARATOR.to_string());
84 virt.push(root);
85 } else {
86 virt.push(std::path::MAIN_SEPARATOR.to_string());
87 }
88 }
89 #[cfg(not(windows))]
90 {
91 virt.push(std::path::MAIN_SEPARATOR.to_string());
92 }
93 if !relative.as_os_str().is_empty() {
94 virt.push(relative);
95 }
96 virt
97 }
98
99 fn make_dir_entry(&self, real_path: PathBuf, file_name: OsString) -> DirEntry {
100 let file_type = fs::metadata(&real_path)
101 .ok()
102 .map(|m| FsFileType::from(m.file_type()))
103 .unwrap_or(FsFileType::Unknown);
104 DirEntry {
105 path: self.virtualize(&real_path),
106 file_name,
107 file_type,
108 }
109 }
110}
111
112#[cfg(not(target_arch = "wasm32"))]
113#[async_trait(?Send)]
114impl FsProvider for SandboxFsProvider {
115 fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn crate::FileHandle>> {
116 let target = self.resolve(path);
117 if let Some(parent) = target.parent() {
118 fs::create_dir_all(parent)?;
119 }
120 let mut opts = fs::OpenOptions::new();
121 opts.read(flags.read);
122 opts.write(flags.write);
123 opts.append(flags.append);
124 opts.truncate(flags.truncate);
125 opts.create(flags.create);
126 opts.create_new(flags.create_new);
127 let file = opts.open(&target)?;
128 Ok(Box::new(file))
129 }
130
131 async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
132 let target = self.resolve(path);
133 fs::read(target)
134 }
135
136 async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
137 let target = self.resolve(path);
138 if let Some(parent) = target.parent() {
139 fs::create_dir_all(parent)?;
140 }
141 fs::write(target, data)
142 }
143
144 async fn remove_file(&self, path: &Path) -> io::Result<()> {
145 let target = self.resolve(path);
146 if target.exists() {
147 fs::remove_file(target)?;
148 }
149 Ok(())
150 }
151
152 async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
153 let target = self.resolve(path);
154 fs::metadata(target).map(FsMetadata::from)
155 }
156
157 async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
158 let target = self.resolve(path);
159 fs::symlink_metadata(target).map(FsMetadata::from)
160 }
161
162 async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
163 let target = self.resolve(path);
164 let entries = fs::read_dir(&target)?;
165 let mut out = Vec::new();
166 for entry in entries {
167 let entry = entry?;
168 out.push(self.make_dir_entry(entry.path(), entry.file_name()));
169 }
170 Ok(out)
171 }
172
173 async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
174 let target = self.resolve(path);
175 let real = fs::canonicalize(target)?;
176 Ok(self.virtualize(&real))
177 }
178
179 async fn create_dir(&self, path: &Path) -> io::Result<()> {
180 let target = self.resolve(path);
181 fs::create_dir(&target)
182 }
183
184 async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
185 let target = self.resolve(path);
186 fs::create_dir_all(&target)
187 }
188
189 async fn remove_dir(&self, path: &Path) -> io::Result<()> {
190 let target = self.resolve(path);
191 fs::remove_dir(&target)
192 }
193
194 async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
195 let target = self.resolve(path);
196 if target.exists() {
197 fs::remove_dir_all(&target)?;
198 }
199 Ok(())
200 }
201
202 async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
203 let src = self.resolve(from);
204 let dst = self.resolve(to);
205 if let Some(parent) = dst.parent() {
206 fs::create_dir_all(parent)?;
207 }
208 fs::rename(src, dst)
209 }
210
211 async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
212 let target = self.resolve(path);
213 let mut perms = fs::metadata(&target)?.permissions();
214 perms.set_readonly(readonly);
215 fs::set_permissions(target, perms)
216 }
217
218 async fn data_manifest_descriptor(
219 &self,
220 request: &DataManifestRequest,
221 ) -> io::Result<DataManifestDescriptor> {
222 let manifest_path = if request.path.ends_with(".json") {
223 PathBuf::from(&request.path)
224 } else {
225 PathBuf::from(&request.path).join("manifest.json")
226 };
227 let bytes = self.read(&manifest_path).await?;
228 let json: JsonValue = serde_json::from_slice(&bytes)
229 .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
230 Ok(DataManifestDescriptor {
231 schema_version: json
232 .get("schema_version")
233 .or_else(|| json.get("schemaVersion"))
234 .and_then(|v| v.as_u64())
235 .unwrap_or(1) as u32,
236 format: json
237 .get("format")
238 .and_then(|v| v.as_str())
239 .unwrap_or("runmat-data")
240 .to_string(),
241 dataset_id: json
242 .get("dataset_id")
243 .or_else(|| json.get("datasetId"))
244 .and_then(|v| v.as_str())
245 .unwrap_or_default()
246 .to_string(),
247 updated_at: json
248 .get("updated_at")
249 .or_else(|| json.get("updatedAt"))
250 .and_then(|v| v.as_str())
251 .map(ToString::to_string)
252 .unwrap_or_else(|| Utc::now().to_rfc3339()),
253 txn_sequence: json
254 .get("txn_sequence")
255 .or_else(|| json.get("txnSequence"))
256 .and_then(|v| v.as_u64())
257 .unwrap_or(0),
258 })
259 }
260
261 async fn data_chunk_upload_targets(
262 &self,
263 request: &DataChunkUploadRequest,
264 ) -> io::Result<Vec<DataChunkUploadTarget>> {
265 let root = PathBuf::from(&request.dataset_path)
266 .join("arrays")
267 .join(sanitize_segment(&request.array))
268 .join("chunks");
269 self.create_dir_all(&root).await?;
270 request
271 .chunks
272 .iter()
273 .map(|chunk| {
274 let path = root.join(format!("{}.bin", sanitize_segment(&chunk.object_id)));
275 Ok(DataChunkUploadTarget {
276 key: chunk.key.clone(),
277 method: "PUT".to_string(),
278 upload_url: format!("sandbox://{}", path.to_string_lossy()),
279 headers: std::collections::HashMap::new(),
280 })
281 })
282 .collect()
283 }
284
285 async fn data_upload_chunk(
286 &self,
287 target: &DataChunkUploadTarget,
288 data: &[u8],
289 ) -> io::Result<()> {
290 if !target.method.eq_ignore_ascii_case("PUT") {
291 return Err(io::Error::new(
292 io::ErrorKind::InvalidInput,
293 format!("unsupported upload method '{}'", target.method),
294 ));
295 }
296 let path = target
297 .upload_url
298 .strip_prefix("sandbox://")
299 .ok_or_else(|| {
300 io::Error::new(io::ErrorKind::InvalidInput, "invalid sandbox upload url")
301 })?;
302 self.write(Path::new(path), data).await
303 }
304}
305
306#[cfg(not(target_arch = "wasm32"))]
307fn sanitize_segment(input: &str) -> String {
308 input
309 .chars()
310 .map(|ch| {
311 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
312 ch
313 } else {
314 '_'
315 }
316 })
317 .collect()
318}
319
320#[cfg(all(not(target_arch = "wasm32"), test))]
321mod tests {
322 use super::SandboxFsProvider;
323 use crate::FsProvider;
324 use futures::executor;
325 use std::path::Path;
326 use tempfile::tempdir;
327
328 #[test]
329 fn sandbox_prevents_root_escape_and_virtualizes_paths() {
330 let temp = tempdir().expect("tempdir");
331 let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
332 executor::block_on(provider.create_dir_all(Path::new("nested/sub"))).expect("create dir");
333 executor::block_on(provider.write(Path::new("nested/sub/file.txt"), b"hello"))
334 .expect("write");
335
336 executor::block_on(provider.write(Path::new("../evil.txt"), b"nope"))
338 .expect("write outside clamped");
339 let entries = executor::block_on(provider.read_dir(Path::new("."))).expect("read root");
340 assert!(entries.iter().any(|entry| entry.file_name() == "evil.txt"));
341
342 let listing =
343 executor::block_on(provider.read_dir(Path::new("nested"))).expect("list nested");
344 assert!(listing
345 .iter()
346 .any(|entry| entry.path().ends_with(Path::new("nested/sub"))));
347
348 let sandbox_read =
349 executor::block_on(provider.read(Path::new("/nested/sub/file.txt"))).expect("vfs read");
350 assert_eq!(sandbox_read, b"hello");
351 }
352
353 #[test]
354 fn canonicalize_returns_virtual_paths() {
355 let temp = tempdir().expect("tempdir");
356 let provider = SandboxFsProvider::new(temp.path().to_path_buf()).expect("sandbox");
357 executor::block_on(provider.create_dir_all(Path::new("data"))).expect("create dir");
358 executor::block_on(provider.write(Path::new("data/file.bin"), b"bytes")).expect("write");
359 let canonical = executor::block_on(provider.canonicalize(Path::new("./data/./file.bin")))
360 .expect("canonicalize");
361 assert!(canonical.ends_with(Path::new("data/file.bin")));
362 assert!(canonical.is_absolute());
363 }
364}