rs_web/
tracker.rs

1//! Build dependency tracker for incremental builds
2
3use dashmap::DashMap;
4use parking_lot::Mutex;
5use std::cell::RefCell;
6use std::collections::HashMap;
7use std::hash::{Hash, Hasher};
8use std::path::PathBuf;
9use std::sync::Arc;
10use std::time::SystemTime;
11use twox_hash::XxHash64;
12
13#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
14pub struct FileState {
15    pub hash: u64,
16    pub mtime_secs: u64,
17    pub mtime_nanos: u32,
18}
19
20impl FileState {
21    pub fn new(hash: u64, mtime: SystemTime) -> Self {
22        let duration = mtime
23            .duration_since(SystemTime::UNIX_EPOCH)
24            .unwrap_or_default();
25        Self {
26            hash,
27            mtime_secs: duration.as_secs(),
28            mtime_nanos: duration.subsec_nanos(),
29        }
30    }
31
32    pub fn mtime(&self) -> SystemTime {
33        SystemTime::UNIX_EPOCH + std::time::Duration::new(self.mtime_secs, self.mtime_nanos)
34    }
35}
36
37thread_local! {
38    static LOCAL_READS: RefCell<Vec<(PathBuf, FileState)>> = RefCell::default();
39    static LOCAL_WRITES: RefCell<Vec<(PathBuf, FileState)>> = RefCell::default();
40}
41
42#[derive(Debug, Clone, PartialEq, Eq, Hash)]
43pub struct MemoKey {
44    pub function: &'static str,
45    pub input_hash: u64,
46}
47
48/// Asset reference extracted from HTML (script src, link href, img src, etc.)
49#[derive(Debug, Clone)]
50pub struct AssetRef {
51    /// The URL path as it appears in HTML (e.g., "/js/editor.js")
52    pub url_path: String,
53    /// The source file path if known (e.g., "static/js/editor.js")
54    pub source_path: Option<PathBuf>,
55}
56
57#[derive(Debug)]
58pub struct BuildTracker {
59    reads: Mutex<HashMap<PathBuf, FileState>>,
60    writes: Mutex<HashMap<PathBuf, FileState>>,
61    /// Maps output page path -> asset references found in its HTML
62    html_refs: Mutex<HashMap<PathBuf, Vec<AssetRef>>>,
63    /// Maps source asset path -> output pages that reference it
64    asset_to_pages: Mutex<HashMap<PathBuf, Vec<PathBuf>>>,
65    memo: DashMap<MemoKey, Vec<u8>>,
66    enabled: bool,
67}
68
69impl Default for BuildTracker {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl BuildTracker {
76    pub fn new() -> Self {
77        Self {
78            reads: Mutex::new(HashMap::new()),
79            writes: Mutex::new(HashMap::new()),
80            html_refs: Mutex::new(HashMap::new()),
81            asset_to_pages: Mutex::new(HashMap::new()),
82            memo: DashMap::new(),
83            enabled: true,
84        }
85    }
86
87    pub fn disabled() -> Self {
88        Self {
89            reads: Mutex::new(HashMap::new()),
90            writes: Mutex::new(HashMap::new()),
91            html_refs: Mutex::new(HashMap::new()),
92            asset_to_pages: Mutex::new(HashMap::new()),
93            memo: DashMap::new(),
94            enabled: false,
95        }
96    }
97
98    pub fn is_enabled(&self) -> bool {
99        self.enabled
100    }
101
102    pub fn record_read(&self, path: PathBuf, content: &[u8]) {
103        if !self.enabled {
104            return;
105        }
106        let hash = hash_content(content);
107        let mtime = std::fs::metadata(&path)
108            .and_then(|m| m.modified())
109            .unwrap_or(SystemTime::UNIX_EPOCH);
110        LOCAL_READS.with(|reads| {
111            reads.borrow_mut().push((path, FileState::new(hash, mtime)));
112        });
113    }
114
115    pub fn record_read_with_hash(&self, path: PathBuf, hash: u64, mtime: SystemTime) {
116        if !self.enabled {
117            return;
118        }
119        LOCAL_READS.with(|reads| {
120            reads.borrow_mut().push((path, FileState::new(hash, mtime)));
121        });
122    }
123
124    pub fn record_write(&self, path: PathBuf, content: &[u8]) {
125        if !self.enabled {
126            return;
127        }
128        let hash = hash_content(content);
129        let mtime = std::fs::metadata(&path)
130            .and_then(|m| m.modified())
131            .unwrap_or(SystemTime::now());
132        LOCAL_WRITES.with(|writes| {
133            writes
134                .borrow_mut()
135                .push((path, FileState::new(hash, mtime)));
136        });
137    }
138
139    pub fn merge_thread_locals(&self) {
140        if !self.enabled {
141            return;
142        }
143        LOCAL_READS.with(|reads| {
144            let mut local = reads.borrow_mut();
145            if !local.is_empty() {
146                let mut main = self.reads.lock();
147                for (path, state) in local.drain(..) {
148                    main.insert(path, state);
149                }
150            }
151        });
152        LOCAL_WRITES.with(|writes| {
153            let mut local = writes.borrow_mut();
154            if !local.is_empty() {
155                let mut main = self.writes.lock();
156                for (path, state) in local.drain(..) {
157                    main.insert(path, state);
158                }
159            }
160        });
161    }
162
163    pub fn merge_all_threads(&self) {
164        if !self.enabled {
165            return;
166        }
167        self.merge_thread_locals();
168        rayon::broadcast(|_| {
169            self.merge_thread_locals();
170        });
171    }
172
173    pub fn memo_get(&self, function: &'static str, input_hash: u64) -> Option<Vec<u8>> {
174        if !self.enabled {
175            return None;
176        }
177        let key = MemoKey {
178            function,
179            input_hash,
180        };
181        self.memo.get(&key).map(|v| v.clone())
182    }
183
184    pub fn memo_set(&self, function: &'static str, input_hash: u64, output: Vec<u8>) {
185        if !self.enabled {
186            return;
187        }
188        let key = MemoKey {
189            function,
190            input_hash,
191        };
192        self.memo.insert(key, output);
193    }
194
195    pub fn get_reads(&self) -> HashMap<PathBuf, FileState> {
196        self.merge_thread_locals();
197        self.reads.lock().clone()
198    }
199
200    pub fn get_writes(&self) -> HashMap<PathBuf, FileState> {
201        self.merge_thread_locals();
202        self.writes.lock().clone()
203    }
204
205    pub fn clear(&self) {
206        LOCAL_READS.with(|r| r.borrow_mut().clear());
207        LOCAL_WRITES.with(|w| w.borrow_mut().clear());
208        self.reads.lock().clear();
209        self.writes.lock().clear();
210        self.html_refs.lock().clear();
211        self.asset_to_pages.lock().clear();
212        self.memo.clear();
213    }
214
215    /// Record HTML asset references for a rendered page
216    pub fn record_html_refs(&self, page_path: PathBuf, refs: Vec<AssetRef>) {
217        if !self.enabled || refs.is_empty() {
218            return;
219        }
220        let mut html_refs = self.html_refs.lock();
221        let mut asset_to_pages = self.asset_to_pages.lock();
222
223        // Build reverse mapping
224        for asset_ref in &refs {
225            if let Some(ref source) = asset_ref.source_path {
226                asset_to_pages
227                    .entry(source.clone())
228                    .or_default()
229                    .push(page_path.clone());
230            }
231        }
232
233        html_refs.insert(page_path, refs);
234    }
235
236    /// Get pages that reference a given asset (by source path)
237    pub fn get_pages_for_asset(&self, asset_path: &PathBuf) -> Vec<PathBuf> {
238        self.asset_to_pages
239            .lock()
240            .get(asset_path)
241            .cloned()
242            .unwrap_or_default()
243    }
244
245    /// Check if an asset is referenced by any page
246    pub fn is_asset_referenced(&self, asset_path: &PathBuf) -> bool {
247        self.asset_to_pages.lock().contains_key(asset_path)
248    }
249
250    /// Get all HTML refs
251    pub fn get_html_refs(&self) -> HashMap<PathBuf, Vec<AssetRef>> {
252        self.html_refs.lock().clone()
253    }
254
255    /// Get reverse mapping of assets to pages
256    pub fn get_asset_to_pages(&self) -> HashMap<PathBuf, Vec<PathBuf>> {
257        self.asset_to_pages.lock().clone()
258    }
259
260    pub fn get_changed_files(&self, cached: &CachedDeps) -> Vec<PathBuf> {
261        let mut changed = Vec::new();
262        for (path, old_state) in &cached.reads {
263            if let Ok(metadata) = std::fs::metadata(path) {
264                if let Ok(mtime) = metadata.modified() {
265                    if mtime != old_state.mtime() {
266                        if let Ok(content) = std::fs::read(path) {
267                            if hash_content(&content) != old_state.hash {
268                                changed.push(path.clone());
269                            }
270                        } else {
271                            changed.push(path.clone());
272                        }
273                    }
274                } else {
275                    changed.push(path.clone());
276                }
277            } else {
278                changed.push(path.clone());
279            }
280        }
281        changed
282    }
283}
284
285#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
286pub struct CachedDeps {
287    pub reads: HashMap<PathBuf, FileState>,
288    pub writes: HashMap<PathBuf, FileState>,
289    /// Maps source asset path -> output pages that reference it
290    #[serde(default)]
291    pub asset_to_pages: HashMap<PathBuf, Vec<PathBuf>>,
292}
293
294impl CachedDeps {
295    pub fn from_tracker(tracker: &BuildTracker) -> Self {
296        Self {
297            reads: tracker.get_reads(),
298            writes: tracker.get_writes(),
299            asset_to_pages: tracker.get_asset_to_pages(),
300        }
301    }
302
303    pub fn load(path: &std::path::Path) -> Option<Self> {
304        let content = std::fs::read(path).ok()?;
305        postcard::from_bytes(&content).ok()
306    }
307
308    pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
309        if let Some(parent) = path.parent() {
310            std::fs::create_dir_all(parent)?;
311        }
312        let encoded = postcard::to_allocvec(self).map_err(std::io::Error::other)?;
313        std::fs::write(path, encoded)
314    }
315}
316
317pub fn hash_content(content: &[u8]) -> u64 {
318    let mut hasher = XxHash64::with_seed(0);
319    hasher.write(content);
320    hasher.finish()
321}
322
323pub fn hash_str(s: &str) -> u64 {
324    hash_content(s.as_bytes())
325}
326
327/// Extract asset references from HTML content.
328/// Looks for: script src, link href, img src, video src, audio src, source src
329pub fn extract_html_asset_refs(html: &str) -> Vec<String> {
330    use regex::Regex;
331    use std::sync::LazyLock;
332
333    static PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
334        vec![
335            // <script src="...">
336            Regex::new(r#"<script[^>]+src=["']([^"']+)["']"#).unwrap(),
337            // <link href="..."> (for CSS)
338            Regex::new(r#"<link[^>]+href=["']([^"']+)["']"#).unwrap(),
339            // <img src="...">
340            Regex::new(r#"<img[^>]+src=["']([^"']+)["']"#).unwrap(),
341            // <video src="...">
342            Regex::new(r#"<video[^>]+src=["']([^"']+)["']"#).unwrap(),
343            // <audio src="...">
344            Regex::new(r#"<audio[^>]+src=["']([^"']+)["']"#).unwrap(),
345            // <source src="...">
346            Regex::new(r#"<source[^>]+src=["']([^"']+)["']"#).unwrap(),
347            // CSS url(...)
348            Regex::new(r#"url\(["']?([^"')]+)["']?\)"#).unwrap(),
349        ]
350    });
351
352    let mut refs = Vec::new();
353    for pattern in PATTERNS.iter() {
354        for cap in pattern.captures_iter(html) {
355            if let Some(path) = cap.get(1) {
356                let path = path.as_str();
357                // Only include local paths (starting with / but not //)
358                if path.starts_with('/') && !path.starts_with("//") {
359                    refs.push(path.to_string());
360                }
361            }
362        }
363    }
364    refs.sort();
365    refs.dedup();
366    refs
367}
368
369/// Extract image references from markdown content
370/// Looks for: ![alt](url) and ![alt](url "title")
371pub fn extract_markdown_asset_refs(markdown: &str) -> Vec<String> {
372    use regex::Regex;
373    use std::sync::LazyLock;
374
375    static IMG_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
376        // ![alt](url) or ![alt](url "title")
377        Regex::new(r#"!\[[^\]]*\]\(([^)"'\s]+)"#).unwrap()
378    });
379
380    let mut refs = Vec::new();
381    for cap in IMG_PATTERN.captures_iter(markdown) {
382        if let Some(path) = cap.get(1) {
383            let path = path.as_str();
384            // Only include local paths
385            if path.starts_with('/') && !path.starts_with("//") {
386                refs.push(path.to_string());
387            }
388        }
389    }
390    refs.sort();
391    refs.dedup();
392    refs
393}
394
395/// Map a URL path (e.g., "/js/editor.js") to a source path (e.g., "static/js/editor.js")
396/// Uses the writes map to find what source file produced the output
397pub fn resolve_url_to_source(
398    url_path: &str,
399    output_dir: &std::path::Path,
400    writes: &HashMap<PathBuf, FileState>,
401    project_dir: &std::path::Path,
402) -> Option<PathBuf> {
403    // Convert URL path to output file path
404    // e.g., "/js/editor.js" -> "{output_dir}/js/editor.js"
405    let url_path = url_path.trim_start_matches('/');
406    let output_path = output_dir.join(url_path);
407    let output_canonical = output_path.canonicalize().ok()?;
408
409    // Check if this output was written during the build
410    if writes.contains_key(&output_canonical) {
411        // Try common source mappings:
412        // 1. static/{path} -> {output_dir}/{path}
413        // 2. {path} -> {output_dir}/{path}
414        let candidates = [
415            project_dir.join("static").join(url_path),
416            project_dir.join(url_path),
417        ];
418
419        for candidate in candidates {
420            if candidate.exists() {
421                return candidate.canonicalize().ok();
422            }
423        }
424    }
425
426    None
427}
428
429pub type SharedTracker = Arc<BuildTracker>;
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    #[test]
436    fn test_hash_content() {
437        let content = b"hello world";
438        let hash1 = hash_content(content);
439        let hash2 = hash_content(content);
440        assert_eq!(hash1, hash2);
441
442        let different = b"hello world!";
443        let hash3 = hash_content(different);
444        assert_ne!(hash1, hash3);
445    }
446
447    #[test]
448    fn test_tracker_read_write() {
449        let tracker = BuildTracker::new();
450
451        tracker.record_read(PathBuf::from("test.txt"), b"content");
452        tracker.record_write(PathBuf::from("output.txt"), b"output");
453
454        tracker.merge_thread_locals();
455
456        let reads = tracker.get_reads();
457        let writes = tracker.get_writes();
458
459        assert_eq!(reads.len(), 1);
460        assert_eq!(writes.len(), 1);
461    }
462
463    #[test]
464    fn test_memo() {
465        let tracker = BuildTracker::new();
466
467        tracker.memo_set("render_markdown", 12345, b"cached".to_vec());
468
469        let cached = tracker.memo_get("render_markdown", 12345);
470        assert_eq!(cached, Some(b"cached".to_vec()));
471
472        let miss = tracker.memo_get("render_markdown", 99999);
473        assert_eq!(miss, None);
474    }
475}