Skip to main content

oxios_markdown/
fs.rs

1//! Sandboxed filesystem abstraction for the knowledge base.
2//!
3//! Ported from files.md (`server/fs/fs.go`, `core/fs.rs`) by Artem Zakirullin.
4//! Each knowledge base has its own root directory. All paths are validated
5//! to prevent path traversal attacks.
6
7use std::cmp::Reverse;
8use std::collections::HashMap;
9use std::io::{Read, Write};
10use std::path::{Path, PathBuf};
11use std::time::SystemTime;
12
13use md5::{Digest as Md5Digest, Md5};
14
15use crate::types::{
16    DIR_ARCHIVE, DIR_JOURNAL, DIR_MEDIA, DIR_USER_ROOT, FileEntry, FsError, MAX_TEXT_SIZE,
17};
18
19/// Forbidden filename characters and their safe replacements.
20const FORBIDDEN_CHARS: &[(&str, &str)] = &[
21    ("<", "<"),
22    (">", ">"),
23    (":", "꞉"),
24    ("\"", "″"),
25    ("|", "⼁"),
26    ("\\", "\"),
27    ("?", "?"),
28    ("*", "﹡"),
29    ("\x00", ""),
30    ("/", "/"),
31];
32
33/// System directories to exclude from user-facing listings.
34pub const SYSTEM_DIRS: &[&str] = &["archive", "media", "journal", "insights", "img"];
35
36/// System files to exclude from user-facing listings.
37pub const SYSTEM_FILES: &[&str] = &[
38    "Chat.md", "Later.md", "Done.md", "Shop.md", "Watch.md", "Read.md",
39];
40
41/// Files/dirs to ignore during listing.
42const IGNORED_NAMES: &[&str] = &[".", "..", ".obsidian", ".gitignore", ".DS_Store", ".git"];
43
44/// Maximum size for a single read() / read_to_string() call. Protects
45/// against OOM when a huge file ends up inside the sandbox (F23). Text
46/// content syncs are also bounded by [`MAX_TEXT_SIZE`].
47pub const MAX_READ_SIZE: u64 = MAX_TEXT_SIZE as u64;
48
49/// Minimum number of hex chars required for `unhash()` lookups. Shorter
50/// prefixes match too many files and let callers resolve arbitrary
51/// handles (F18). 5 chars = short_hash width, ~10^6 collision space.
52const MIN_UNHASH_LEN: usize = 5;
53
54// ============================================================================
55// VirtualFs
56// ============================================================================
57
58/// Sandboxed filesystem for a single knowledge base.
59///
60/// All file operations are constrained to the root directory.
61/// Path traversal attempts are rejected.
62#[derive(Clone, Debug)]
63pub struct VirtualFs {
64    root: PathBuf,
65    quota_kb: i64,
66}
67
68impl VirtualFs {
69    /// Create a new VirtualFs rooted at the given directory.
70    ///
71    /// Creates the directory if it doesn't exist.
72    pub fn new(root: PathBuf) -> std::io::Result<Self> {
73        if !root.exists() {
74            std::fs::create_dir_all(&root)?;
75        }
76        Ok(Self { root, quota_kb: 0 })
77    }
78
79    /// Set a storage quota in kilobytes (0 = unlimited).
80    pub fn with_quota(mut self, quota_kb: i64) -> Self {
81        self.quota_kb = quota_kb;
82        self
83    }
84
85    /// Get the root path.
86    pub fn root(&self) -> &Path {
87        &self.root
88    }
89
90    /// Get the configured quota in KB (0 = unlimited).
91    pub fn quota_kb(&self) -> i64 {
92        self.quota_kb
93    }
94
95    /// Resolve a sandboxed path under `dir` for `filename`, verifying the
96    /// final location stays under the knowledge root (rejects `..`, absolute,
97    /// and symlink escapes).
98    pub fn safe_path(&self, dir: &str, filename: &str) -> Result<PathBuf, FsError> {
99        let dir_trimmed = dir.trim();
100        if dir_trimmed.starts_with("..") {
101            return Err(FsError::UnsafePath);
102        }
103
104        let relative: PathBuf = if dir == DIR_USER_ROOT {
105            if filename.is_empty() {
106                return Ok(self.root.clone());
107            }
108            PathBuf::from(filename)
109        } else {
110            PathBuf::from(dir).join(filename)
111        };
112
113        let rel_str = relative.to_string_lossy();
114        if rel_str.starts_with('/') || rel_str.starts_with("../") {
115            return Err(FsError::UnsafePath);
116        }
117
118        let full = self.root.join(&relative);
119
120        // Normalize and verify we didn't escape root
121        let stripped = full
122            .strip_prefix(&self.root)
123            .map_err(|_| FsError::UnsafePath)?;
124        let (normalized, escaped) = normalize_path(stripped);
125        if escaped || normalized.to_string_lossy().contains("..") {
126            return Err(FsError::UnsafePath);
127        }
128
129        let final_path = self.root.join(&normalized);
130        // Re-verify containment by resolving symlinks: a symlink planted
131        // inside the root (e.g. via archive restore or external mount)
132        // could otherwise point outside and let read/write/delete escape
133        // the sandbox (F4).
134        self.verify_under_root(&final_path)?;
135        Ok(final_path)
136    }
137
138    /// Resolve `target` (and its longest existing ancestor when the target
139    /// does not yet exist) via `canonicalize` and confirm the result still
140    /// lives under the canonicalized root. Rejects symlink escapes.
141    fn verify_under_root(&self, target: &Path) -> Result<(), FsError> {
142        let canonical_root = self.root.canonicalize().map_err(|_| FsError::UnsafePath)?;
143
144        let canonical_target = match target.canonicalize() {
145            Ok(p) => p,
146            Err(_) => {
147                // Target doesn't exist yet (typical for writes). Walk up
148                // to the nearest existing ancestor, canonicalize it, then
149                // re-append the non-existent tail components. Tail items
150                // can't be symlinks yet, so they don't change containment.
151                let mut existing = target.to_path_buf();
152                let mut tail: Vec<std::ffi::OsString> = Vec::new();
153                while std::fs::symlink_metadata(&existing).is_err() {
154                    let Some(name) = existing.file_name() else {
155                        return Err(FsError::UnsafePath);
156                    };
157                    tail.push(name.to_owned());
158                    if !existing.pop() {
159                        return Err(FsError::UnsafePath);
160                    }
161                }
162                let mut c = existing.canonicalize().map_err(|_| FsError::UnsafePath)?;
163                for name in tail.into_iter().rev() {
164                    c.push(name);
165                }
166                c
167            }
168        };
169
170        if !canonical_target.starts_with(&canonical_root) {
171            return Err(FsError::UnsafePath);
172        }
173        Ok(())
174    }
175
176    // ── POSIX Path API (단일 path 문자열) ────────────────────
177
178    /// Read file content by POSIX-style relative path.
179    /// `path` examples: "Rust.md", "brain/Rust.md", "journal/2024.08 August.md"
180    pub fn read_path(&self, path: &str) -> Result<String, FsError> {
181        let (dir, filename) = split_posix_path(path);
182        self.read(dir, filename)
183    }
184
185    /// Write file content by POSIX-style relative path.
186    pub fn write_path(&self, path: &str, content: &str) -> Result<(), FsError> {
187        let (dir, filename) = split_posix_path(path);
188        self.write(dir, filename, content)
189    }
190
191    /// Delete file by POSIX-style relative path.
192    pub fn delete_path(&self, path: &str) -> Result<(), FsError> {
193        let (dir, filename) = split_posix_path(path);
194        self.del(dir, filename)
195    }
196
197    /// Rename/move file by POSIX-style relative paths.
198    pub fn rename_path(&self, old_path: &str, new_path: &str) -> Result<(), FsError> {
199        let (old_dir, old_filename) = split_posix_path(old_path);
200        let (new_dir, new_filename) = split_posix_path(new_path);
201        self.rename(old_dir, old_filename, new_dir, new_filename)
202    }
203
204    /// Check if file exists by POSIX-style relative path.
205    pub fn exists_path(&self, path: &str) -> Result<bool, FsError> {
206        let (dir, filename) = split_posix_path(path);
207        self.exists(dir, filename)
208    }
209
210    /// Get mtime by POSIX-style relative path.
211    pub fn mtime_path(&self, path: &str) -> Result<i64, FsError> {
212        let (dir, filename) = split_posix_path(path);
213        self.mtime(dir, filename)
214    }
215
216    // ── Basic I/O ───────────────────────────────────────────
217
218    /// Check if a file or directory exists.
219    pub fn exists(&self, dir: &str, filename: &str) -> Result<bool, FsError> {
220        let path = self.safe_path(dir, filename)?;
221        Ok(path.exists())
222    }
223
224    /// Read file contents as a string.
225    ///
226    /// Refuses files larger than [`MAX_READ_SIZE`] so a giant file inside
227    /// the sandbox (or arriving via sync) cannot OOM the process (F23).
228    pub fn read(&self, dir: &str, filename: &str) -> Result<String, FsError> {
229        let path = self.safe_path(dir, filename)?;
230        let meta = std::fs::metadata(&path)?;
231        if meta.len() > MAX_READ_SIZE {
232            return Err(FsError::TooLarge);
233        }
234        let mut file = std::fs::File::open(&path)?;
235        let mut contents = String::new();
236        file.read_to_string(&mut contents)?;
237        Ok(contents)
238    }
239
240    /// Write content to a file, creating parent directories as needed.
241    pub fn write(&self, dir: &str, filename: &str, content: &str) -> Result<(), FsError> {
242        let path = self.safe_path(dir, filename)?;
243        self.atomic_write(&path, content.as_bytes())
244    }
245
246    /// Read a file as raw bytes.
247    pub fn read_bytes(&self, dir: &str, filename: &str) -> Result<Vec<u8>, FsError> {
248        let path = self.safe_path(dir, filename)?;
249        let meta = std::fs::metadata(&path)?;
250        if meta.len() > MAX_READ_SIZE {
251            return Err(FsError::TooLarge);
252        }
253        Ok(std::fs::read(&path)?)
254    }
255
256    /// Write raw bytes to a file, creating parent directories as needed.
257    /// Respects the configured quota (same logic as `write()`).
258    pub fn write_bytes(&self, dir: &str, filename: &str, data: &[u8]) -> Result<(), FsError> {
259        let path = self.safe_path(dir, filename)?;
260        self.atomic_write(&path, data)
261    }
262
263    /// Atomic write: serialize to a sibling temp file, fsync, then rename.
264    ///
265    /// `std::fs::rename` is atomic on the same filesystem, so a crash
266    /// between truncate and write_all can no longer leave a 0-byte or
267    /// partial file at `path` (F5). The temp name carries a UUID so two
268    /// concurrent writers cannot stomp on each other's scratch file.
269    fn atomic_write(&self, path: &Path, data: &[u8]) -> Result<(), FsError> {
270        if let Some(parent) = path.parent() {
271            std::fs::create_dir_all(parent)?;
272        }
273
274        if self.quota_kb > 0 {
275            let new_size = data.len() as i64;
276            let old_size = std::fs::metadata(path).map(|m| m.len() as i64).unwrap_or(0);
277            // Skip the recursive quota walk when this write doesn't grow
278            // the total (F17) — overwriting with same/smaller content is
279            // always within quota.
280            if new_size > old_size {
281                let used = self.calculate_used_quota()?;
282                let available = (self.quota_kb * 1024) - used;
283                if (new_size - old_size) > available {
284                    return Err(FsError::QuotaExceeded);
285                }
286            }
287        }
288
289        let dir = path.parent().unwrap_or_else(|| Path::new("."));
290        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("file");
291        // Hidden temp file kept inside the target directory so the rename
292        // stays on the same filesystem (required for atomicity).
293        let tmp_path = dir.join(format!(".{file_name}.{}.tmp", uuid::Uuid::new_v4()));
294
295        let result: Result<(), FsError> = (|| {
296            let mut file = std::fs::File::create(&tmp_path)?;
297            file.write_all(data)?;
298            // Durably flush before the rename so the renamed file isn't
299            // half-written after a crash.
300            file.sync_all()?;
301            drop(file);
302            std::fs::rename(&tmp_path, path)?;
303            Ok(())
304        })();
305
306        if result.is_err() {
307            // Best-effort cleanup; error is reported from the actual op.
308            let _ = std::fs::remove_file(&tmp_path);
309        }
310        result
311    }
312
313    /// Read a file by POSIX path as raw bytes.
314    pub fn read_path_bytes(&self, path: &str) -> Result<Vec<u8>, FsError> {
315        let (dir, filename) = split_posix_path(path);
316        self.read_bytes(dir, filename)
317    }
318
319    /// Write raw bytes to a file by POSIX path.
320    pub fn write_path_bytes(&self, path: &str, data: &[u8]) -> Result<(), FsError> {
321        let (dir, filename) = split_posix_path(path);
322        self.write_bytes(dir, filename, data)
323    }
324
325    /// Delete a file.
326    pub fn del(&self, dir: &str, filename: &str) -> Result<(), FsError> {
327        let path = self.safe_path(dir, filename)?;
328        std::fs::remove_file(&path)?;
329        Ok(())
330    }
331
332    /// Rename/move a file.
333    pub fn rename(
334        &self,
335        old_dir: &str,
336        old_filename: &str,
337        new_dir: &str,
338        new_filename: &str,
339    ) -> Result<(), FsError> {
340        let old_path = self.safe_path(old_dir, old_filename)?;
341        let new_path = self.safe_path(new_dir, new_filename)?;
342        if let Some(parent) = new_path.parent() {
343            std::fs::create_dir_all(parent)?;
344        }
345        std::fs::rename(&old_path, &new_path)?;
346        Ok(())
347    }
348
349    /// Create a directory.
350    pub fn make_dir(&self, dir: &str) -> Result<(), FsError> {
351        let path = self.safe_path(dir, "")?;
352        std::fs::create_dir_all(&path)?;
353        Ok(())
354    }
355
356    /// Touch a file: create if missing, update mtime if present.
357    pub fn touch(&self, dir: &str, filename: &str) -> Result<(), FsError> {
358        let path = self.safe_path(dir, filename)?;
359        if path.exists() {
360            let now = SystemTime::now();
361            filetime::set_file_mtime(&path, filetime::FileTime::from_system_time(now))?;
362        } else {
363            self.write(dir, filename, "")?;
364        }
365        Ok(())
366    }
367
368    // ── Metadata ─────────────────────────────────────────────
369
370    /// Get the ctime/mtime of a file in milliseconds since epoch.
371    pub fn ctime(&self, dir: &str, filename: &str) -> Result<i64, FsError> {
372        let path = self.safe_path(dir, filename)?;
373        let meta = std::fs::metadata(&path)?;
374        Ok(mtime_to_ms(meta.modified()?))
375    }
376
377    /// Get the modification time of a file in milliseconds since epoch.
378    pub fn mtime(&self, dir: &str, filename: &str) -> Result<i64, FsError> {
379        let path = self.safe_path(dir, filename)?;
380        let meta = std::fs::metadata(&path)?;
381        Ok(mtime_to_ms(meta.modified()?))
382    }
383
384    /// Recursively collect mtimes for all files with given extensions.
385    pub fn mtimes(&self, root: &str, extensions: &[&str]) -> Result<HashMap<String, i64>, FsError> {
386        let root_path = self.safe_path(root, "")?;
387        let mut result = HashMap::new();
388        self.walk_dir(&root_path, &root_path, extensions, &mut result)?;
389        Ok(result)
390    }
391
392    // ── Listing ─────────────────────────────────────────────
393
394    /// List files and directories in a directory.
395    pub fn files_and_dirs(&self, dir: &str) -> Result<Vec<FileEntry>, FsError> {
396        let user_path = self.safe_path(dir, "")?;
397        if !user_path.exists() {
398            return Ok(vec![]);
399        }
400
401        let mut entries = Vec::new();
402        for entry in std::fs::read_dir(&user_path)? {
403            let entry = entry?;
404            let path = entry.path();
405            let name = path
406                .file_name()
407                .and_then(|n| n.to_str())
408                .unwrap_or("")
409                .to_string();
410
411            if IGNORED_NAMES.contains(&name.as_str()) {
412                continue;
413            }
414
415            let meta = std::fs::metadata(&path)?;
416            let is_dir = meta.is_dir();
417            let ctime = mtime_to_ms(meta.modified().unwrap_or(SystemTime::UNIX_EPOCH));
418            let hash = hash_filename(&name);
419            let display_name = display_name(&name);
420            let has_content = !is_dir && meta.len() > 0;
421
422            entries.push(FileEntry::new(
423                name,
424                hash,
425                display_name,
426                ctime,
427                has_content,
428                is_dir,
429                dir.to_string(),
430            ));
431        }
432        Ok(entries)
433    }
434
435    /// List only directories in the root.
436    pub fn dirs(&self) -> Result<Vec<FileEntry>, FsError> {
437        Ok(self
438            .files_and_dirs(DIR_USER_ROOT)?
439            .into_iter()
440            .filter(|f| f.is_dir)
441            .collect())
442    }
443
444    /// Check if a file has non-whitespace content.
445    pub fn is_multiline(&self, dir: &str, filename: &str) -> Result<bool, FsError> {
446        let content = self.read(dir, filename)?;
447        Ok(!content.trim().is_empty())
448    }
449
450    /// Create the standard system directories (archive, media, journal).
451    pub fn create_system_dirs(&self) -> Result<(), FsError> {
452        for dir in [DIR_ARCHIVE, DIR_MEDIA, DIR_JOURNAL] {
453            self.make_dir(dir)?;
454        }
455        Ok(())
456    }
457
458    /// Reverse a hash to find the original filename.
459    ///
460    /// Requires at least [`MIN_UNHASH_LEN`] hex chars so short prefixes
461    /// cannot resolve to arbitrary files (F18). Ambiguous matches (more
462    /// than one candidate) return [`FsError::CannotUnhash`] instead of
463    /// picking the first hit.
464    pub fn unhash(&self, dir: &str, filename_hash: &str) -> Result<String, FsError> {
465        if dir == DIR_USER_ROOT && filename_hash == DIR_USER_ROOT {
466            return Ok(DIR_USER_ROOT.to_string());
467        }
468        if filename_hash.len() < MIN_UNHASH_LEN {
469            return Err(FsError::CannotUnhash);
470        }
471        let files = self.files_and_dirs(dir)?;
472
473        // Primary pass: hash_prefix match. Collect all matches and refuse
474        // to guess when more than one file qualifies.
475        let mut hash_matches: Vec<&FileEntry> = files
476            .iter()
477            .filter(|f| hash_filename(&f.name).starts_with(filename_hash))
478            .collect();
479        if hash_matches.len() == 1 {
480            return Ok(hash_matches.remove(0).name.clone());
481        }
482        if !hash_matches.is_empty() {
483            return Err(FsError::CannotUnhash);
484        }
485
486        // Secondary pass: exact filename-prefix match (callers passing a
487        // human-readable prefix, e.g. "Chat" for "Chat.md"). Same single-
488        // match rule to prevent handle hijacking.
489        let mut name_matches: Vec<&FileEntry> = files
490            .iter()
491            .filter(|f| f.name.starts_with(filename_hash))
492            .collect();
493        if name_matches.len() == 1 {
494            return Ok(name_matches.remove(0).name.clone());
495        }
496        Err(FsError::CannotUnhash)
497    }
498
499    /// Search files by name across the entire knowledge base.
500    pub fn search_files_by_name(&self, query: &str) -> Result<Vec<FileEntry>, FsError> {
501        let query_lower = query.to_lowercase().trim().to_string();
502        if query_lower.contains('/') {
503            return Err(FsError::UnsafePath);
504        }
505
506        let mut notes = Vec::new();
507        self.collect_md_files(&self.root, &self.root, &mut notes)?;
508
509        if !query_lower.is_empty() {
510            let matching: Vec<FileEntry> = notes
511                .iter()
512                .filter(|f| {
513                    let top = f.parent_dir.split('/').next().unwrap_or("");
514                    top.to_lowercase().starts_with(&query_lower)
515                        || f.display_name.to_lowercase().contains(&query_lower)
516                })
517                .cloned()
518                .collect();
519            if !matching.is_empty() {
520                notes = matching;
521            }
522        }
523
524        notes.sort_by_key(|a| Reverse(a.ctime));
525        Ok(notes)
526    }
527
528    /// List all `.md` files in the vault with their sizes.
529    /// Returns `(posix_path, size_bytes)` pairs. Skips dot-files and dot-dirs.
530    pub fn all_md_files(&self) -> Result<Vec<(String, i64)>, FsError> {
531        let mut result = Vec::new();
532        self.collect_md_paths(&self.root, &self.root, &mut result)?;
533        Ok(result)
534    }
535
536    // ── Private helpers ─────────────────────────────────────
537
538    #[allow(clippy::only_used_in_recursion)]
539    fn walk_dir(
540        &self,
541        root_path: &Path,
542        current_path: &Path,
543        extensions: &[&str],
544        result: &mut HashMap<String, i64>,
545    ) -> Result<(), FsError> {
546        if !current_path.is_dir() {
547            return Ok(());
548        }
549        for entry in std::fs::read_dir(current_path)? {
550            let entry = entry?;
551            let path = entry.path();
552            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
553
554            if filename.starts_with('.') {
555                continue;
556            }
557
558            // Use the entry's own metadata (symlink_metadata on Unix) so a
559            // symlink planted inside the root cannot redirect the walk to
560            // files outside the sandbox (F6).
561            let meta = match entry.metadata() {
562                Ok(m) => m,
563                Err(_) => continue,
564            };
565            if meta.file_type().is_symlink() {
566                continue;
567            }
568
569            if meta.is_dir() {
570                self.walk_dir(root_path, &path, extensions, result)?;
571            } else {
572                if !extensions.is_empty() {
573                    let ext = path
574                        .extension()
575                        .and_then(|e| e.to_str())
576                        .map(|e| format!(".{e}"));
577                    let ext_match = ext
578                        .as_ref()
579                        .map(|e| extensions.contains(&e.as_str()))
580                        .unwrap_or(false);
581                    if !ext_match {
582                        continue;
583                    }
584                }
585
586                let rel = path
587                    .strip_prefix(root_path)
588                    .map_err(|_| FsError::UnsafePath)?;
589                let display = rel.to_string_lossy();
590                let display_path = if display.starts_with('/') || display.starts_with('\\') {
591                    display[1..].to_string()
592                } else {
593                    display.to_string()
594                };
595
596                result.insert(display_path, mtime_to_ms(meta.modified()?));
597            }
598        }
599        Ok(())
600    }
601
602    #[allow(clippy::only_used_in_recursion)]
603    fn collect_md_files(
604        &self,
605        root_path: &Path,
606        current_path: &Path,
607        files: &mut Vec<FileEntry>,
608    ) -> Result<(), FsError> {
609        if !current_path.is_dir() {
610            return Ok(());
611        }
612        for entry in std::fs::read_dir(current_path)? {
613            let entry = entry?;
614            let path = entry.path();
615            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
616
617            // entry.metadata() does not traverse the entry's own symlink,
618            // and we skip any symlink outright so the walk stays inside
619            // the sandbox (F6).
620            let meta = match entry.metadata() {
621                Ok(m) => m,
622                Err(_) => continue,
623            };
624            if meta.file_type().is_symlink() {
625                continue;
626            }
627
628            if meta.is_dir() {
629                if filename.starts_with('.') {
630                    continue;
631                }
632                self.collect_md_files(root_path, &path, files)?;
633            } else {
634                if !filename.ends_with(".md") || filename.starts_with('.') {
635                    continue;
636                }
637
638                let rel = path
639                    .strip_prefix(root_path)
640                    .map_err(|_| FsError::UnsafePath)?;
641                let parent = rel
642                    .parent()
643                    .map(|p| p.to_string_lossy().to_string())
644                    .unwrap_or_default();
645                let parent_str = if parent.is_empty() || parent == "." {
646                    DIR_USER_ROOT.to_string()
647                } else {
648                    parent
649                };
650
651                let ctime = mtime_to_ms(meta.modified().unwrap_or(SystemTime::UNIX_EPOCH));
652                let hash = hash_filename(filename);
653                let display_name = display_name(filename);
654
655                files.push(FileEntry::new(
656                    filename.to_string(),
657                    hash,
658                    display_name,
659                    ctime,
660                    meta.len() > 0,
661                    false,
662                    parent_str,
663                ));
664            }
665        }
666        Ok(())
667    }
668
669    /// Collect all .md file paths and sizes (for frontmatter scanning).
670    #[allow(clippy::only_used_in_recursion)]
671    fn collect_md_paths(
672        &self,
673        root_path: &Path,
674        current_path: &Path,
675        result: &mut Vec<(String, i64)>,
676    ) -> Result<(), FsError> {
677        if !current_path.is_dir() {
678            return Ok(());
679        }
680        for entry in std::fs::read_dir(current_path)? {
681            let entry = entry?;
682            let path = entry.path();
683            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
684            if filename.starts_with('.') {
685                continue;
686            }
687            let meta = match entry.metadata() {
688                Ok(m) => m,
689                Err(_) => continue,
690            };
691            if meta.file_type().is_symlink() {
692                continue;
693            }
694            if meta.is_dir() {
695                self.collect_md_paths(root_path, &path, result)?;
696            } else if filename.ends_with(".md") {
697                let rel = path
698                    .strip_prefix(root_path)
699                    .map_err(|_| FsError::UnsafePath)?;
700                result.push((rel.to_string_lossy().to_string(), meta.len() as i64));
701            }
702        }
703        Ok(())
704    }
705
706    fn calculate_used_quota(&self) -> std::io::Result<i64> {
707        let mut total = 0i64;
708        if self.root.exists() {
709            for entry in std::fs::read_dir(&self.root)? {
710                let entry = entry?;
711                let meta = entry.metadata()?;
712                if meta.is_file() {
713                    total += meta.len() as i64;
714                } else if meta.is_dir() {
715                    total += dir_size(entry.path())?;
716                }
717            }
718        }
719        Ok(total)
720    }
721}
722
723// ============================================================================
724// Free Functions
725// ============================================================================
726
727/// Compute MD5 hash of a filename (first 11 hex characters).
728pub fn hash_filename(filename: &str) -> String {
729    let mut hasher = Md5::new();
730    hasher.update(filename.as_bytes());
731    hex::encode(hasher.finalize())[..11].to_string()
732}
733
734/// Compute short hash (first 5 hex characters).
735pub fn short_hash(filename: &str) -> String {
736    let mut hasher = Md5::new();
737    hasher.update(filename.as_bytes());
738    hex::encode(hasher.finalize())[..5].to_string()
739}
740
741/// Sanitize a filename by replacing forbidden characters.
742pub fn sanitize_filename(filename: &str) -> String {
743    let mut result = filename.to_string();
744    for (forbidden, safe) in FORBIDDEN_CHARS {
745        result = result.replace(forbidden, safe);
746    }
747    result
748}
749
750/// Reverse sanitize: restore original forbidden characters.
751pub fn unsanitize_filename(filename: &str) -> String {
752    let mut result = filename.to_string();
753    for (forbidden, safe) in FORBIDDEN_CHARS {
754        if !forbidden.is_empty() && *forbidden != "\x00" {
755            result = result.replace(safe, forbidden);
756        }
757    }
758    result
759}
760
761/// Get display name from filename: capitalized, without `.md` extension.
762pub fn display_name(filename: &str) -> String {
763    let trimmed = filename.trim();
764    let without_ext = trimmed.strip_suffix(".md").unwrap_or(trimmed);
765    let mut chars = without_ext.chars();
766    match chars.next() {
767        None => String::new(),
768        Some(first) => first.to_uppercase().chain(chars).collect(),
769    }
770}
771
772/// Check if a filename represents a checklist item.
773pub fn is_checklist_item(filename: &str) -> bool {
774    let trimmed = filename.trim();
775    if !trimmed.starts_with('-') {
776        return false;
777    }
778    if let Some(pos) = trimmed.rfind('-') {
779        pos > 0 && pos < trimmed.len() - 1
780    } else {
781        false
782    }
783}
784
785/// Filter: exclude checklist files.
786pub fn exclude_checklists(files: &[FileEntry]) -> Vec<FileEntry> {
787    files
788        .iter()
789        .filter(|f| {
790            let name = f.name.trim_end_matches(".md");
791            !(name.starts_with('_') && name.ends_with('_'))
792        })
793        .cloned()
794        .collect()
795}
796
797/// Filter: exclude system directories.
798pub fn exclude_system_dirs(files: &[FileEntry]) -> Vec<FileEntry> {
799    files
800        .iter()
801        .filter(|f| !SYSTEM_DIRS.contains(&f.name.as_str()))
802        .cloned()
803        .collect()
804}
805
806/// Filter: exclude system files.
807pub fn exclude_system_files(files: &[FileEntry]) -> Vec<FileEntry> {
808    files
809        .iter()
810        .filter(|f| !SYSTEM_FILES.contains(&f.name.as_str()))
811        .cloned()
812        .collect()
813}
814
815/// Filter: only directories.
816pub fn only_dirs(files: &[FileEntry]) -> Vec<FileEntry> {
817    files.iter().filter(|f| f.is_dir).cloned().collect()
818}
819
820/// Filter: only files (not directories).
821pub fn only_files(files: &[FileEntry]) -> Vec<FileEntry> {
822    files.iter().filter(|f| !f.is_dir).cloned().collect()
823}
824
825/// Filter: only user markdown files (exclude system files, dirs, non-md).
826pub fn only_user_md_files(files: &[FileEntry]) -> Vec<FileEntry> {
827    files
828        .iter()
829        .filter(|f| {
830            !f.is_dir && f.name.ends_with(".md") && !SYSTEM_FILES.contains(&f.name.as_str())
831        })
832        .cloned()
833        .collect()
834}
835
836/// Sort files by ctime descending (newest first).
837pub fn sort_by_ctime_desc(files: &mut [FileEntry]) {
838    files.sort_by_key(|a| Reverse(a.ctime));
839}
840
841/// Extract filenames from a list of file entries.
842pub fn only_filenames(files: &[FileEntry]) -> Vec<String> {
843    files.iter().map(|f| f.name.clone()).collect()
844}
845
846/// Split a POSIX-style path like "brain/Rust.md" into (dir, filename).
847/// Root-level files like "Chat.md" become ("/", "Chat.md").
848pub fn split_posix_path(path: &str) -> (&str, &str) {
849    let path = path.trim_start_matches('/');
850    if let Some(slash_pos) = path.rfind('/') {
851        let (dir, file) = path.split_at(slash_pos);
852        (dir, &file[1..])
853    } else {
854        (crate::types::DIR_USER_ROOT, path)
855    }
856}
857
858// ── Internal helpers ────────────────────────────────────────
859
860fn normalize_path(path: &Path) -> (PathBuf, bool) {
861    let mut components = Vec::new();
862    let mut escaped = false;
863    for component in path.components() {
864        match component {
865            std::path::Component::Normal(s) => components.push(s),
866            std::path::Component::ParentDir => {
867                if components.is_empty() {
868                    escaped = true;
869                } else {
870                    components.pop();
871                }
872            }
873            std::path::Component::CurDir => {}
874            std::path::Component::RootDir | std::path::Component::Prefix(_) => {}
875        }
876    }
877    (components.iter().collect(), escaped)
878}
879
880fn mtime_to_ms(time: SystemTime) -> i64 {
881    time.duration_since(SystemTime::UNIX_EPOCH)
882        .map(|d| d.as_millis() as i64)
883        .unwrap_or(0)
884}
885
886fn dir_size(path: PathBuf) -> std::io::Result<i64> {
887    let mut total = 0i64;
888    for entry in std::fs::read_dir(path)? {
889        let entry = entry?;
890        let meta = entry.metadata()?;
891        if meta.is_file() {
892            total += meta.len() as i64;
893        } else if meta.is_dir() {
894            total += dir_size(entry.path())?;
895        }
896    }
897    Ok(total)
898}
899
900// ============================================================================
901// Tests
902// ============================================================================
903
904#[cfg(test)]
905mod tests {
906    use super::*;
907    use tempfile::TempDir;
908
909    fn test_fs() -> (VirtualFs, TempDir) {
910        let dir = TempDir::new().unwrap();
911        let fs = VirtualFs::new(dir.path().to_path_buf()).unwrap();
912        (fs, dir)
913    }
914
915    #[test]
916    fn test_write_and_read() {
917        let (fs, _t) = test_fs();
918        fs.write("brain", "test.md", "Hello").unwrap();
919        assert_eq!(fs.read("brain", "test.md").unwrap(), "Hello");
920    }
921
922    #[test]
923    fn test_exists() {
924        let (fs, _t) = test_fs();
925        assert!(!fs.exists("/", "nope.md").unwrap());
926        fs.write("/", "exists.md", "x").unwrap();
927        assert!(fs.exists("/", "exists.md").unwrap());
928    }
929
930    #[test]
931    fn test_delete() {
932        let (fs, _t) = test_fs();
933        fs.write("/", "del.md", "x").unwrap();
934        fs.del("/", "del.md").unwrap();
935        assert!(!fs.exists("/", "del.md").unwrap());
936    }
937
938    #[test]
939    fn test_rename() {
940        let (fs, _t) = test_fs();
941        fs.write("/", "old.md", "data").unwrap();
942        fs.rename("/", "old.md", "/", "new.md").unwrap();
943        assert!(!fs.exists("/", "old.md").unwrap());
944        assert_eq!(fs.read("/", "new.md").unwrap(), "data");
945    }
946
947    #[test]
948    fn test_path_traversal_rejected() {
949        let (fs, _t) = test_fs();
950        assert!(fs.safe_path("../etc", "passwd").is_err());
951        assert!(fs.safe_path("a", "../../etc/passwd").is_err());
952    }
953
954    #[test]
955    fn test_touch_creates_file() {
956        let (fs, _t) = test_fs();
957        fs.touch("/", "new.md").unwrap();
958        assert!(fs.exists("/", "new.md").unwrap());
959    }
960
961    #[test]
962    fn test_hash_filename_deterministic() {
963        assert_eq!(hash_filename("test.md"), hash_filename("test.md"));
964        assert_eq!(hash_filename("test.md").len(), 11);
965    }
966
967    #[test]
968    fn test_display_name() {
969        assert_eq!(display_name("rust.md"), "Rust");
970        assert_eq!(display_name(" filename "), "Filename");
971    }
972
973    #[test]
974    fn test_sanitize_roundtrip() {
975        let original = "test/file:name";
976        let sanitized = sanitize_filename(original);
977        assert_ne!(sanitized, original);
978        assert_eq!(unsanitize_filename(&sanitized), original);
979    }
980
981    #[test]
982    fn test_files_and_dirs() {
983        let (fs, _t) = test_fs();
984        fs.make_dir("brain").unwrap();
985        fs.write("brain", "Rust.md", "content").unwrap();
986        let entries = fs.files_and_dirs("brain").unwrap();
987        assert_eq!(entries.len(), 1);
988        assert_eq!(entries[0].name, "Rust.md");
989    }
990
991    #[test]
992    fn test_create_system_dirs() {
993        let (fs, _t) = test_fs();
994        fs.create_system_dirs().unwrap();
995        assert!(fs.exists(DIR_ARCHIVE, "").unwrap());
996        assert!(fs.exists(DIR_MEDIA, "").unwrap());
997        assert!(fs.exists(DIR_JOURNAL, "").unwrap());
998    }
999
1000    #[test]
1001    fn test_mtimes() {
1002        let (fs, _t) = test_fs();
1003        fs.write("/", "a.md", "a").unwrap();
1004        let mtimes = fs.mtimes("/", &[".md"]).unwrap();
1005        assert!(mtimes.contains_key("a.md"));
1006    }
1007
1008    #[test]
1009    fn test_search_files_by_name() {
1010        let (fs, _t) = test_fs();
1011        fs.make_dir("brain").unwrap();
1012        fs.write("brain", "Rust.md", "").unwrap();
1013        let results = fs.search_files_by_name("brain").unwrap();
1014        assert_eq!(results.len(), 1);
1015    }
1016
1017    #[test]
1018    fn test_unhash() {
1019        let (fs, _t) = test_fs();
1020        fs.write("/", "target.md", "x").unwrap();
1021        let h = hash_filename("target.md");
1022        assert_eq!(fs.unhash("/", &h).unwrap(), "target.md");
1023    }
1024
1025    #[test]
1026    fn test_filter_functions() {
1027        let f = FileEntry::new(
1028            "a.md".into(),
1029            "h".into(),
1030            "A".into(),
1031            0,
1032            true,
1033            false,
1034            "/".into(),
1035        );
1036        let d = FileEntry::new(
1037            "dir".into(),
1038            "h".into(),
1039            "Dir".into(),
1040            0,
1041            false,
1042            true,
1043            "/".into(),
1044        );
1045        assert_eq!(only_dirs(&[f.clone(), d.clone()]).len(), 1);
1046        assert_eq!(only_files(&[f.clone(), d]).len(), 1);
1047    }
1048
1049    #[test]
1050    fn test_quota_enforcement() {
1051        let dir = TempDir::new().unwrap();
1052        let fs = VirtualFs::new(dir.path().to_path_buf())
1053            .unwrap()
1054            .with_quota(1); // 1 KB
1055        assert!(fs.write("/", "big.md", &"x".repeat(2048)).is_err());
1056    }
1057
1058    #[test]
1059    fn test_read_write_bytes() {
1060        let (fs, _t) = test_fs();
1061        let data: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A]; // PNG header fragment
1062        fs.write_bytes("media", "image.png", data).unwrap();
1063        let read_back = fs.read_bytes("media", "image.png").unwrap();
1064        assert_eq!(read_back, data);
1065    }
1066
1067    #[test]
1068    fn test_write_bytes_quota() {
1069        let dir = TempDir::new().unwrap();
1070        let fs = VirtualFs::new(dir.path().to_path_buf())
1071            .unwrap()
1072            .with_quota(1); // 1 KB
1073        let big = vec![0u8; 2048];
1074        assert!(fs.write_bytes("/", "big.bin", &big).is_err());
1075    }
1076
1077    #[test]
1078    fn test_path_bytes_roundtrip() {
1079        let (fs, _t) = test_fs();
1080        let data = b"\x00\x01\x02\xFF binary data";
1081        fs.write_path_bytes("sub/file.bin", data).unwrap();
1082        let read_back = fs.read_path_bytes("sub/file.bin").unwrap();
1083        assert_eq!(read_back, data);
1084    }
1085}