Skip to main content

braid_core/fs/
scanner.rs

1//! File scanning module for BraidFS.
2//!
3//! Implements periodic directory scanning to catch any changes
4//! missed by the filesystem watcher.
5
6use crate::core::{BraidError, Result};
7use crate::fs::config::{get_root_dir, skip_file};
8use crate::fs::mapping;
9use std::collections::HashMap;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12use std::time::{Duration, SystemTime};
13use tokio::sync::RwLock;
14
15/// State for file scanning.
16#[derive(Debug, Default)]
17pub struct ScanState {
18    /// Last modification time for each tracked file.
19    pub file_mtimes: HashMap<PathBuf, u128>,
20    /// Whether a scan is currently running.
21    pub running: bool,
22    /// Number of watcher misses detected.
23    pub watcher_misses: u32,
24}
25
26impl ScanState {
27    pub fn new() -> Self {
28        Self::default()
29    }
30}
31
32/// Scan the root directory for file changes.
33///
34/// Returns a list of files that have changed since the last scan.
35pub async fn scan_files(
36    root_dir: &Path,
37    state: &Arc<RwLock<ScanState>>,
38    sync_urls: &HashMap<String, bool>,
39) -> Result<Vec<PathBuf>> {
40    // Check if already running
41    {
42        let mut s = state.write().await;
43        if s.running {
44            return Ok(Vec::new());
45        }
46        s.running = true;
47    }
48
49    let start_time = std::time::Instant::now();
50    let mut changed_files = Vec::new();
51
52    // Recursively scan directory
53    let result = scan_directory(root_dir, root_dir, state, sync_urls, &mut changed_files).await;
54
55    // Mark as done
56    {
57        let mut s = state.write().await;
58        s.running = false;
59    }
60
61    let elapsed = start_time.elapsed();
62    tracing::debug!(
63        "scan_files completed in {:?}, found {} changes",
64        elapsed,
65        changed_files.len()
66    );
67
68    if let Err(e) = result {
69        tracing::error!("Error during scan: {}", e);
70    }
71
72    Ok(changed_files)
73}
74
75/// Recursively scan a directory.
76async fn scan_directory(
77    dir: &Path,
78    root: &Path,
79    state: &Arc<RwLock<ScanState>>,
80    sync_urls: &HashMap<String, bool>,
81    changed: &mut Vec<PathBuf>,
82) -> Result<()> {
83    let mut entries = tokio::fs::read_dir(dir)
84        .await
85        .map_err(|e| BraidError::Io(e))?;
86
87    while let Some(entry) = entries.next_entry().await.map_err(|e| BraidError::Io(e))? {
88        let path = entry.path();
89        let rel_path = path.strip_prefix(root).unwrap_or(&path);
90        let rel_str = rel_path.to_string_lossy();
91
92        // Skip ignored files
93        if skip_file(&rel_str) {
94            continue;
95        }
96
97        let metadata = entry.metadata().await.map_err(|e| BraidError::Io(e))?;
98
99        if metadata.is_dir() {
100            // Recurse into subdirectories
101            Box::pin(scan_directory(&path, root, state, sync_urls, changed)).await?;
102        } else if metadata.is_file() {
103            // Check if this file is being synced
104        } else if metadata.is_file() {
105            // Check if this file is being synced
106            if let Ok(url) = mapping::path_to_url(&path) {
107                if !sync_urls.get(&url).copied().unwrap_or(false) {
108                    continue;
109                }
110
111                // Check modification time
112                let mtime = metadata
113                    .modified()
114                    .unwrap_or(SystemTime::UNIX_EPOCH)
115                    .duration_since(SystemTime::UNIX_EPOCH)
116                    .unwrap_or_default()
117                    .as_nanos();
118
119                let needs_sync = {
120                    let s = state.read().await;
121                    match s.file_mtimes.get(&path) {
122                        Some(&last_mtime) => mtime != last_mtime,
123                        None => true, // New file
124                    }
125                };
126
127                if needs_sync {
128                    changed.push(path.clone());
129
130                    // Update mtime
131                    let mut s = state.write().await;
132                    s.file_mtimes.insert(path, mtime);
133                }
134            }
135        }
136    }
137
138    Ok(())
139}
140
141/// Normalize a URL by removing trailing /index patterns.
142///
143/// Matches JS `normalize_url()` from braidfs/index.js.
144pub fn normalize_url(url: &str) -> String {
145    let mut normalized = url.to_string();
146
147    // Remove trailing /index/index/... patterns
148    while normalized.ends_with("/index") {
149        normalized = normalized[..normalized.len() - 6].to_string();
150    }
151
152    // Remove trailing slash
153    while normalized.ends_with('/') {
154        normalized.pop();
155    }
156
157    normalized
158}
159
160/// Check if a URL is well-formed and absolute.
161pub fn is_well_formed_absolute_url(url: &str) -> bool {
162    url::Url::parse(url).is_ok()
163}
164
165/// Start the periodic file scanning loop.
166pub async fn start_scan_loop(
167    state: Arc<RwLock<ScanState>>,
168    sync_urls: Arc<RwLock<HashMap<String, bool>>>,
169    scan_interval: Duration,
170    on_change: impl Fn(PathBuf) + Send + Sync + 'static,
171) {
172    let on_change = Arc::new(on_change);
173
174    loop {
175        tokio::time::sleep(scan_interval).await;
176
177        let root_dir = match get_root_dir() {
178            Ok(dir) => dir,
179            Err(e) => {
180                tracing::error!("Failed to get root dir: {}", e);
181                continue;
182            }
183        };
184
185        let urls = sync_urls.read().await.clone();
186        match scan_files(&root_dir, &state, &urls).await {
187            Ok(changed) => {
188                for path in changed {
189                    on_change(path);
190                }
191            }
192            Err(e) => {
193                tracing::error!("Scan error: {}", e);
194            }
195        }
196    }
197}
198
199/// Called when the file watcher misses an event.
200pub async fn on_watcher_miss(state: &Arc<RwLock<ScanState>>, message: &str, trigger_scan: bool) {
201    {
202        let mut s = state.write().await;
203        s.watcher_misses += 1;
204        tracing::warn!("watcher miss: {} [total: {}]", message, s.watcher_misses);
205    }
206
207    if trigger_scan {
208        // Trigger a scan shortly
209        tracing::info!("Triggering scan due to watcher miss");
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn test_normalize_url() {
219        assert_eq!(
220            normalize_url("http://example.com/path/index"),
221            "http://example.com/path"
222        );
223        assert_eq!(
224            normalize_url("http://example.com/index/index"),
225            "http://example.com"
226        );
227        assert_eq!(
228            normalize_url("http://example.com/path/"),
229            "http://example.com/path"
230        );
231        assert_eq!(normalize_url("http://example.com"), "http://example.com");
232    }
233
234    #[test]
235    fn test_is_well_formed_absolute_url() {
236        assert!(is_well_formed_absolute_url("http://example.com"));
237        assert!(is_well_formed_absolute_url("https://braid.org/path"));
238        assert!(!is_well_formed_absolute_url("not-a-url"));
239        assert!(!is_well_formed_absolute_url("relative/path"));
240    }
241}