rs_web/
tracker.rs

1//! Build dependency tracker for incremental builds
2
3use dashmap::DashMap;
4use parking_lot::Mutex;
5use std::cell::RefCell;
6use std::collections::HashMap;
7use std::hash::{Hash, Hasher};
8use std::path::PathBuf;
9use std::sync::Arc;
10use std::time::SystemTime;
11use twox_hash::XxHash64;
12
13#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
14pub struct FileState {
15    pub hash: u64,
16    pub mtime_secs: u64,
17    pub mtime_nanos: u32,
18}
19
20impl FileState {
21    pub fn new(hash: u64, mtime: SystemTime) -> Self {
22        let duration = mtime
23            .duration_since(SystemTime::UNIX_EPOCH)
24            .unwrap_or_default();
25        Self {
26            hash,
27            mtime_secs: duration.as_secs(),
28            mtime_nanos: duration.subsec_nanos(),
29        }
30    }
31
32    pub fn mtime(&self) -> SystemTime {
33        SystemTime::UNIX_EPOCH + std::time::Duration::new(self.mtime_secs, self.mtime_nanos)
34    }
35}
36
37thread_local! {
38    static LOCAL_READS: RefCell<Vec<(PathBuf, FileState)>> = RefCell::default();
39    static LOCAL_WRITES: RefCell<Vec<(PathBuf, FileState)>> = RefCell::default();
40}
41
42#[derive(Debug, Clone, PartialEq, Eq, Hash)]
43pub struct MemoKey {
44    pub function: &'static str,
45    pub input_hash: u64,
46}
47
48/// Asset reference extracted from HTML (script src, link href, img src, etc.)
49#[derive(Debug, Clone)]
50pub struct AssetRef {
51    /// The URL path as it appears in HTML (e.g., "/js/editor.js")
52    pub url_path: String,
53    /// The source file path if known (e.g., "static/js/editor.js")
54    pub source_path: Option<PathBuf>,
55}
56
57#[derive(Debug)]
58pub struct BuildTracker {
59    reads: Mutex<HashMap<PathBuf, FileState>>,
60    writes: Mutex<HashMap<PathBuf, FileState>>,
61    /// Maps output page path -> asset references found in its HTML
62    html_refs: Mutex<HashMap<PathBuf, Vec<AssetRef>>>,
63    /// Maps source asset path -> output pages that reference it
64    asset_to_pages: Mutex<HashMap<PathBuf, Vec<PathBuf>>>,
65    memo: DashMap<MemoKey, Vec<u8>>,
66    enabled: bool,
67}
68
69impl Default for BuildTracker {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl BuildTracker {
76    pub fn new() -> Self {
77        Self {
78            reads: Mutex::new(HashMap::new()),
79            writes: Mutex::new(HashMap::new()),
80            html_refs: Mutex::new(HashMap::new()),
81            asset_to_pages: Mutex::new(HashMap::new()),
82            memo: DashMap::new(),
83            enabled: true,
84        }
85    }
86
87    pub fn disabled() -> Self {
88        Self {
89            reads: Mutex::new(HashMap::new()),
90            writes: Mutex::new(HashMap::new()),
91            html_refs: Mutex::new(HashMap::new()),
92            asset_to_pages: Mutex::new(HashMap::new()),
93            memo: DashMap::new(),
94            enabled: false,
95        }
96    }
97
98    pub fn is_enabled(&self) -> bool {
99        self.enabled
100    }
101
102    pub fn record_read(&self, path: PathBuf, content: &[u8]) {
103        if !self.enabled {
104            return;
105        }
106        let hash = hash_content(content);
107        let mtime = std::fs::metadata(&path)
108            .and_then(|m| m.modified())
109            .unwrap_or(SystemTime::UNIX_EPOCH);
110        LOCAL_READS.with(|reads| {
111            reads.borrow_mut().push((path, FileState::new(hash, mtime)));
112        });
113    }
114
115    /// Record a file read directly to shared state (for async contexts like tokio)
116    /// Use this instead of record_read when running in async tasks where
117    /// thread-locals won't be merged properly
118    pub fn record_read_async(&self, path: PathBuf, content: &[u8]) {
119        if !self.enabled {
120            return;
121        }
122        let hash = hash_content(content);
123        let mtime = std::fs::metadata(&path)
124            .and_then(|m| m.modified())
125            .unwrap_or(SystemTime::UNIX_EPOCH);
126        self.reads.lock().insert(path, FileState::new(hash, mtime));
127    }
128
129    pub fn record_read_with_hash(&self, path: PathBuf, hash: u64, mtime: SystemTime) {
130        if !self.enabled {
131            return;
132        }
133        LOCAL_READS.with(|reads| {
134            reads.borrow_mut().push((path, FileState::new(hash, mtime)));
135        });
136    }
137
138    pub fn record_write(&self, path: PathBuf, content: &[u8]) {
139        if !self.enabled {
140            return;
141        }
142        let hash = hash_content(content);
143        let mtime = std::fs::metadata(&path)
144            .and_then(|m| m.modified())
145            .unwrap_or(SystemTime::now());
146        LOCAL_WRITES.with(|writes| {
147            writes
148                .borrow_mut()
149                .push((path, FileState::new(hash, mtime)));
150        });
151    }
152
153    /// Record a file write directly to shared state (for async contexts like tokio)
154    /// Use this instead of record_write when running in async tasks where
155    /// thread-locals won't be merged properly
156    pub fn record_write_async(&self, path: PathBuf, content: &[u8]) {
157        if !self.enabled {
158            return;
159        }
160        let hash = hash_content(content);
161        let mtime = std::fs::metadata(&path)
162            .and_then(|m| m.modified())
163            .unwrap_or(SystemTime::now());
164        self.writes.lock().insert(path, FileState::new(hash, mtime));
165    }
166
167    pub fn merge_thread_locals(&self) {
168        if !self.enabled {
169            return;
170        }
171        LOCAL_READS.with(|reads| {
172            let mut local = reads.borrow_mut();
173            if !local.is_empty() {
174                let mut main = self.reads.lock();
175                for (path, state) in local.drain(..) {
176                    main.insert(path, state);
177                }
178            }
179        });
180        LOCAL_WRITES.with(|writes| {
181            let mut local = writes.borrow_mut();
182            if !local.is_empty() {
183                let mut main = self.writes.lock();
184                for (path, state) in local.drain(..) {
185                    main.insert(path, state);
186                }
187            }
188        });
189    }
190
191    pub fn merge_all_threads(&self) {
192        if !self.enabled {
193            return;
194        }
195        self.merge_thread_locals();
196        rayon::broadcast(|_| {
197            self.merge_thread_locals();
198        });
199    }
200
201    pub fn memo_get(&self, function: &'static str, input_hash: u64) -> Option<Vec<u8>> {
202        if !self.enabled {
203            return None;
204        }
205        let key = MemoKey {
206            function,
207            input_hash,
208        };
209        self.memo.get(&key).map(|v| v.clone())
210    }
211
212    pub fn memo_set(&self, function: &'static str, input_hash: u64, output: Vec<u8>) {
213        if !self.enabled {
214            return;
215        }
216        let key = MemoKey {
217            function,
218            input_hash,
219        };
220        self.memo.insert(key, output);
221    }
222
223    pub fn get_reads(&self) -> HashMap<PathBuf, FileState> {
224        self.merge_thread_locals();
225        self.reads.lock().clone()
226    }
227
228    pub fn get_writes(&self) -> HashMap<PathBuf, FileState> {
229        self.merge_thread_locals();
230        self.writes.lock().clone()
231    }
232
233    pub fn clear(&self) {
234        LOCAL_READS.with(|r| r.borrow_mut().clear());
235        LOCAL_WRITES.with(|w| w.borrow_mut().clear());
236        self.reads.lock().clear();
237        self.writes.lock().clear();
238        self.html_refs.lock().clear();
239        self.asset_to_pages.lock().clear();
240        self.memo.clear();
241    }
242
243    /// Clear only writes (for incremental rebuilds)
244    pub fn clear_writes(&self) {
245        LOCAL_WRITES.with(|w| w.borrow_mut().clear());
246        self.writes.lock().clear();
247    }
248
249    /// Record HTML asset references for a rendered page
250    pub fn record_html_refs(&self, page_path: PathBuf, refs: Vec<AssetRef>) {
251        if !self.enabled || refs.is_empty() {
252            return;
253        }
254        let mut html_refs = self.html_refs.lock();
255        let mut asset_to_pages = self.asset_to_pages.lock();
256
257        // Build reverse mapping
258        for asset_ref in &refs {
259            if let Some(ref source) = asset_ref.source_path {
260                asset_to_pages
261                    .entry(source.clone())
262                    .or_default()
263                    .push(page_path.clone());
264            }
265        }
266
267        html_refs.insert(page_path, refs);
268    }
269
270    /// Get pages that reference a given asset (by source path)
271    pub fn get_pages_for_asset(&self, asset_path: &PathBuf) -> Vec<PathBuf> {
272        self.asset_to_pages
273            .lock()
274            .get(asset_path)
275            .cloned()
276            .unwrap_or_default()
277    }
278
279    /// Check if an asset is referenced by any page
280    pub fn is_asset_referenced(&self, asset_path: &PathBuf) -> bool {
281        self.asset_to_pages.lock().contains_key(asset_path)
282    }
283
284    /// Get all HTML refs
285    pub fn get_html_refs(&self) -> HashMap<PathBuf, Vec<AssetRef>> {
286        self.html_refs.lock().clone()
287    }
288
289    /// Get reverse mapping of assets to pages
290    pub fn get_asset_to_pages(&self) -> HashMap<PathBuf, Vec<PathBuf>> {
291        self.asset_to_pages.lock().clone()
292    }
293
294    pub fn get_changed_files(&self, cached: &CachedDeps) -> Vec<PathBuf> {
295        let mut changed = Vec::new();
296        for (path, old_state) in &cached.reads {
297            if let Ok(metadata) = std::fs::metadata(path) {
298                if let Ok(mtime) = metadata.modified() {
299                    if mtime != old_state.mtime() {
300                        if let Ok(content) = std::fs::read(path) {
301                            if hash_content(&content) != old_state.hash {
302                                changed.push(path.clone());
303                            }
304                        } else {
305                            changed.push(path.clone());
306                        }
307                    }
308                } else {
309                    changed.push(path.clone());
310                }
311            } else {
312                changed.push(path.clone());
313            }
314        }
315        changed
316    }
317}
318
319#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
320pub struct CachedDeps {
321    pub reads: HashMap<PathBuf, FileState>,
322    pub writes: HashMap<PathBuf, FileState>,
323    /// Maps source asset path -> output pages that reference it
324    #[serde(default)]
325    pub asset_to_pages: HashMap<PathBuf, Vec<PathBuf>>,
326}
327
328impl CachedDeps {
329    pub fn from_tracker(tracker: &BuildTracker) -> Self {
330        Self {
331            reads: tracker.get_reads(),
332            writes: tracker.get_writes(),
333            asset_to_pages: tracker.get_asset_to_pages(),
334        }
335    }
336
337    pub fn load(path: &std::path::Path) -> Option<Self> {
338        let content = std::fs::read(path).ok()?;
339        postcard::from_bytes(&content).ok()
340    }
341
342    pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
343        if let Some(parent) = path.parent() {
344            std::fs::create_dir_all(parent)?;
345        }
346        let encoded = postcard::to_allocvec(self).map_err(std::io::Error::other)?;
347        std::fs::write(path, encoded)
348    }
349}
350
351pub fn hash_content(content: &[u8]) -> u64 {
352    let mut hasher = XxHash64::with_seed(0);
353    hasher.write(content);
354    hasher.finish()
355}
356
357pub fn hash_str(s: &str) -> u64 {
358    hash_content(s.as_bytes())
359}
360
361/// Extract asset references from HTML content.
362/// Looks for: script src, link href, img src, video src, audio src, source src
363pub fn extract_html_asset_refs(html: &str) -> Vec<String> {
364    use regex::Regex;
365    use std::sync::LazyLock;
366
367    static PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
368        vec![
369            // <script src="...">
370            Regex::new(r#"<script[^>]+src=["']([^"']+)["']"#).unwrap(),
371            // <link href="..."> (for CSS)
372            Regex::new(r#"<link[^>]+href=["']([^"']+)["']"#).unwrap(),
373            // <img src="...">
374            Regex::new(r#"<img[^>]+src=["']([^"']+)["']"#).unwrap(),
375            // <video src="...">
376            Regex::new(r#"<video[^>]+src=["']([^"']+)["']"#).unwrap(),
377            // <audio src="...">
378            Regex::new(r#"<audio[^>]+src=["']([^"']+)["']"#).unwrap(),
379            // <source src="...">
380            Regex::new(r#"<source[^>]+src=["']([^"']+)["']"#).unwrap(),
381            // CSS url(...)
382            Regex::new(r#"url\(["']?([^"')]+)["']?\)"#).unwrap(),
383        ]
384    });
385
386    let mut refs = Vec::new();
387    for pattern in PATTERNS.iter() {
388        for cap in pattern.captures_iter(html) {
389            if let Some(path) = cap.get(1) {
390                let path = path.as_str();
391                // Only include local paths (starting with / but not //)
392                if path.starts_with('/') && !path.starts_with("//") {
393                    refs.push(path.to_string());
394                }
395            }
396        }
397    }
398    refs.sort();
399    refs.dedup();
400    refs
401}
402
403/// Extract image references from markdown content
404/// Looks for: ![alt](url) and ![alt](url "title")
405pub fn extract_markdown_asset_refs(markdown: &str) -> Vec<String> {
406    use regex::Regex;
407    use std::sync::LazyLock;
408
409    static IMG_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
410        // ![alt](url) or ![alt](url "title")
411        Regex::new(r#"!\[[^\]]*\]\(([^)"'\s]+)"#).unwrap()
412    });
413
414    let mut refs = Vec::new();
415    for cap in IMG_PATTERN.captures_iter(markdown) {
416        if let Some(path) = cap.get(1) {
417            let path = path.as_str();
418            // Only include local paths
419            if path.starts_with('/') && !path.starts_with("//") {
420                refs.push(path.to_string());
421            }
422        }
423    }
424    refs.sort();
425    refs.dedup();
426    refs
427}
428
429/// Map a URL path (e.g., "/js/editor.js") to a source path (e.g., "static/js/editor.js")
430/// Uses the writes map to find what source file produced the output
431pub fn resolve_url_to_source(
432    url_path: &str,
433    output_dir: &std::path::Path,
434    writes: &HashMap<PathBuf, FileState>,
435    project_dir: &std::path::Path,
436) -> Option<PathBuf> {
437    // Convert URL path to output file path
438    // e.g., "/js/editor.js" -> "{output_dir}/js/editor.js"
439    let url_path = url_path.trim_start_matches('/');
440    let output_path = output_dir.join(url_path);
441    let output_canonical = output_path.canonicalize().ok()?;
442
443    // Check if this output was written during the build
444    if writes.contains_key(&output_canonical) {
445        // Try common source mappings:
446        // 1. static/{path} -> {output_dir}/{path}
447        // 2. {path} -> {output_dir}/{path}
448        let candidates = [
449            project_dir.join("static").join(url_path),
450            project_dir.join(url_path),
451        ];
452
453        for candidate in candidates {
454            if candidate.exists() {
455                return candidate.canonicalize().ok();
456            }
457        }
458    }
459
460    None
461}
462
463pub type SharedTracker = Arc<BuildTracker>;
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468
469    #[test]
470    fn test_hash_content() {
471        let content = b"hello world";
472        let hash1 = hash_content(content);
473        let hash2 = hash_content(content);
474        assert_eq!(hash1, hash2);
475
476        let different = b"hello world!";
477        let hash3 = hash_content(different);
478        assert_ne!(hash1, hash3);
479    }
480
481    #[test]
482    fn test_tracker_read_write() {
483        let tracker = BuildTracker::new();
484
485        tracker.record_read(PathBuf::from("test.txt"), b"content");
486        tracker.record_write(PathBuf::from("output.txt"), b"output");
487
488        tracker.merge_thread_locals();
489
490        let reads = tracker.get_reads();
491        let writes = tracker.get_writes();
492
493        assert_eq!(reads.len(), 1);
494        assert_eq!(writes.len(), 1);
495    }
496
497    #[test]
498    fn test_memo() {
499        let tracker = BuildTracker::new();
500
501        tracker.memo_set("render_markdown", 12345, b"cached".to_vec());
502
503        let cached = tracker.memo_get("render_markdown", 12345);
504        assert_eq!(cached, Some(b"cached".to_vec()));
505
506        let miss = tracker.memo_get("render_markdown", 99999);
507        assert_eq!(miss, None);
508    }
509}