Skip to main content

harn_vm/
metadata.rs

1//! Project metadata store for `.burin/metadata/` sharded JSON files.
2//!
3//! Provides `metadata_get`, `metadata_set`, `metadata_save`, `metadata_stale`,
4//! and `metadata_refresh_hashes` builtins. Compatible with the Swift
5//! DirectoryMetadataStore format (sharded by package root).
6//!
7//! Resolution uses hierarchical inheritance: child directories inherit from
8//! parent directories, with overrides at each level.
9
10use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20
21/// Per-directory metadata: namespaces -> keys -> JSON values.
22#[derive(Clone, Default)]
23struct DirectoryMetadata {
24    namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
25}
26
27/// The full metadata store (all directories).
28struct MetadataState {
29    entries: BTreeMap<String, DirectoryMetadata>,
30    base_dir: PathBuf,
31    loaded: bool,
32    dirty: bool,
33}
34
35impl MetadataState {
36    fn new(base_dir: &Path) -> Self {
37        Self {
38            entries: BTreeMap::new(),
39            base_dir: base_dir.to_path_buf(),
40            loaded: false,
41            dirty: false,
42        }
43    }
44
45    fn metadata_dir(&self) -> PathBuf {
46        self.base_dir.join(".burin").join("metadata")
47    }
48
49    fn ensure_loaded(&mut self) {
50        if self.loaded {
51            return;
52        }
53        self.loaded = true;
54        let meta_dir = self.metadata_dir();
55        let entries = match std::fs::read_dir(&meta_dir) {
56            Ok(e) => e,
57            Err(_) => return,
58        };
59        for entry in entries.flatten() {
60            let path = entry.path();
61            if path.extension().map(|e| e == "json").unwrap_or(false) {
62                if let Ok(contents) = std::fs::read_to_string(&path) {
63                    self.load_shard(&contents);
64                }
65            }
66        }
67    }
68
69    fn load_shard(&mut self, contents: &str) {
70        let parsed: serde_json::Value = match serde_json::from_str(contents) {
71            Ok(v) => v,
72            Err(_) => return,
73        };
74        let shard_entries = match parsed.get("entries").and_then(|e| e.as_object()) {
75            Some(e) => e,
76            None => return,
77        };
78        for (dir, meta_val) in shard_entries {
79            let meta = parse_directory_metadata(meta_val);
80            self.entries.insert(dir.clone(), meta);
81        }
82    }
83
84    /// Resolve metadata for a directory with hierarchical inheritance.
85    /// Walks from root (".") through each path component, merging at each level.
86    fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
87        self.ensure_loaded();
88        let mut result = DirectoryMetadata::default();
89
90        // Start with root
91        if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
92            merge_metadata(&mut result, root);
93        }
94
95        // Walk path components
96        let components: Vec<&str> = directory
97            .split('/')
98            .filter(|c| !c.is_empty() && *c != ".")
99            .collect();
100        let mut current = String::new();
101        for component in components {
102            if current.is_empty() {
103                current = component.to_string();
104            } else {
105                current = format!("{current}/{component}");
106            }
107            if let Some(meta) = self.entries.get(&current) {
108                merge_metadata(&mut result, meta);
109            }
110        }
111
112        result
113    }
114
115    /// Get a specific namespace for a resolved directory.
116    fn get_namespace(
117        &mut self,
118        directory: &str,
119        namespace: &str,
120    ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
121        let resolved = self.resolve(directory);
122        resolved.namespaces.get(namespace).cloned()
123    }
124
125    /// Set metadata for a directory + namespace.
126    fn set_namespace(
127        &mut self,
128        directory: &str,
129        namespace: &str,
130        data: BTreeMap<FieldKey, serde_json::Value>,
131    ) {
132        self.ensure_loaded();
133        let meta = self.entries.entry(directory.to_string()).or_default();
134        let ns = meta.namespaces.entry(namespace.to_string()).or_default();
135        for (k, v) in data {
136            ns.insert(k, v);
137        }
138        self.dirty = true;
139    }
140
141    /// Save all metadata back to sharded JSON files.
142    fn save(&mut self) -> Result<(), String> {
143        if !self.dirty {
144            return Ok(());
145        }
146        let meta_dir = self.metadata_dir();
147        std::fs::create_dir_all(&meta_dir).map_err(|e| format!("metadata mkdir: {e}"))?;
148
149        // Shard by simple strategy: everything in one "root" shard for now.
150        // This matches Swift behavior for single-package projects.
151        let mut shard = serde_json::Map::new();
152        for (dir, meta) in &self.entries {
153            shard.insert(dir.clone(), serialize_directory_metadata(meta));
154        }
155
156        let store_obj = serde_json::json!({
157            "version": 2,
158            "generatedAt": chrono_now_iso(),
159            "entries": serde_json::Value::Object(shard)
160        });
161
162        let json =
163            serde_json::to_string_pretty(&store_obj).map_err(|e| format!("metadata json: {e}"))?;
164
165        let shard_path = meta_dir.join("root.json");
166        std::fs::write(&shard_path, json).map_err(|e| format!("metadata write: {e}"))?;
167        self.dirty = false;
168        Ok(())
169    }
170}
171
172fn chrono_now_iso() -> String {
173    // ISO 8601 timestamp without chrono dependency
174    let now = std::time::SystemTime::now();
175    let secs = now
176        .duration_since(std::time::UNIX_EPOCH)
177        .unwrap_or_default()
178        .as_secs();
179    // Convert to ISO 8601: 2026-03-29T14:00:00Z
180    let days = secs / 86400;
181    let time_secs = secs % 86400;
182    let hours = time_secs / 3600;
183    let minutes = (time_secs % 3600) / 60;
184    let seconds = time_secs % 60;
185    // Days since epoch to year/month/day (simplified, good enough for timestamps)
186    let mut y = 1970i64;
187    let mut remaining = days as i64;
188    loop {
189        let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
190            366
191        } else {
192            365
193        };
194        if remaining < days_in_year {
195            break;
196        }
197        remaining -= days_in_year;
198        y += 1;
199    }
200    let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
201    let month_days: [i64; 12] = [
202        31,
203        if leap { 29 } else { 28 },
204        31,
205        30,
206        31,
207        30,
208        31,
209        31,
210        30,
211        31,
212        30,
213        31,
214    ];
215    let mut m = 0usize;
216    for days in &month_days {
217        if remaining < *days {
218            break;
219        }
220        remaining -= *days;
221        m += 1;
222    }
223    format!(
224        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
225        y,
226        m + 1,
227        remaining + 1,
228        hours,
229        minutes,
230        seconds
231    )
232}
233
234fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
235    for (ns, fields) in &source.namespaces {
236        let target_ns = target.namespaces.entry(ns.clone()).or_default();
237        for (k, v) in fields {
238            target_ns.insert(k.clone(), v.clone());
239        }
240    }
241}
242
243fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
244    let mut meta = DirectoryMetadata::default();
245    let obj = match val.as_object() {
246        Some(o) => o,
247        None => return meta,
248    };
249    // Parse "namespaces" key (the standard format)
250    if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
251        for (ns_name, fields_val) in ns_obj {
252            if let Some(fields) = fields_val.as_object() {
253                let mut field_map = BTreeMap::new();
254                for (k, v) in fields {
255                    field_map.insert(k.clone(), v.clone());
256                }
257                meta.namespaces.insert(ns_name.clone(), field_map);
258            }
259        }
260    }
261    meta
262}
263
264fn serialize_directory_metadata(meta: &DirectoryMetadata) -> serde_json::Value {
265    let mut ns_obj = serde_json::Map::new();
266    for (ns_name, fields) in &meta.namespaces {
267        let mut fields_obj = serde_json::Map::new();
268        for (k, v) in fields {
269            fields_obj.insert(k.clone(), v.clone());
270        }
271        ns_obj.insert(ns_name.clone(), serde_json::Value::Object(fields_obj));
272    }
273    serde_json::json!({ "namespaces": serde_json::Value::Object(ns_obj) })
274}
275
276fn vm_to_json(val: &VmValue) -> serde_json::Value {
277    match val {
278        VmValue::String(s) => serde_json::Value::String(s.to_string()),
279        VmValue::Int(n) => serde_json::json!(*n),
280        VmValue::Float(n) => serde_json::json!(*n),
281        VmValue::Bool(b) => serde_json::Value::Bool(*b),
282        VmValue::Nil => serde_json::Value::Null,
283        VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
284        VmValue::Dict(map) => {
285            let obj: serde_json::Map<String, serde_json::Value> = map
286                .iter()
287                .map(|(k, v)| (k.clone(), vm_to_json(v)))
288                .collect();
289            serde_json::Value::Object(obj)
290        }
291        _ => serde_json::Value::Null,
292    }
293}
294
295fn json_to_vm(jv: &serde_json::Value) -> VmValue {
296    match jv {
297        serde_json::Value::Null => VmValue::Nil,
298        serde_json::Value::Bool(b) => VmValue::Bool(*b),
299        serde_json::Value::Number(n) => {
300            if let Some(i) = n.as_i64() {
301                VmValue::Int(i)
302            } else {
303                VmValue::Float(n.as_f64().unwrap_or(0.0))
304            }
305        }
306        serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
307        serde_json::Value::Array(arr) => {
308            VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
309        }
310        serde_json::Value::Object(map) => {
311            let mut m = BTreeMap::new();
312            for (k, v) in map {
313                m.insert(k.clone(), json_to_vm(v));
314            }
315            VmValue::Dict(Rc::new(m))
316        }
317    }
318}
319
320/// Register metadata builtins on a VM.
321///
322/// In standalone mode, these operate directly on `.burin/metadata/` files.
323/// In bridge mode, these are registered **before** bridge builtins so the
324/// host can override them if needed (but typically the VM handles this natively).
325pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
326    let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
327
328    // metadata_get(dir, namespace?) -> dict | nil
329    let s = Rc::clone(&state);
330    vm.register_builtin("metadata_get", move |args, _out| {
331        let dir = args.first().map(|a| a.display()).unwrap_or_default();
332        let namespace = args.get(1).and_then(|a| {
333            if matches!(a, VmValue::Nil) {
334                None
335            } else {
336                Some(a.display())
337            }
338        });
339
340        let mut st = s.borrow_mut();
341        if let Some(ns) = namespace {
342            match st.get_namespace(&dir, &ns) {
343                Some(fields) => {
344                    let mut m = BTreeMap::new();
345                    for (k, v) in fields {
346                        m.insert(k, json_to_vm(&v));
347                    }
348                    Ok(VmValue::Dict(Rc::new(m)))
349                }
350                None => Ok(VmValue::Nil),
351            }
352        } else {
353            // Return all namespaces flattened
354            let resolved = st.resolve(&dir);
355            let mut m = BTreeMap::new();
356            for fields in resolved.namespaces.values() {
357                for (k, v) in fields {
358                    m.insert(k.clone(), json_to_vm(v));
359                }
360            }
361            if m.is_empty() {
362                Ok(VmValue::Nil)
363            } else {
364                Ok(VmValue::Dict(Rc::new(m)))
365            }
366        }
367    });
368
369    // metadata_set(dir, namespace, data_dict)
370    let s = Rc::clone(&state);
371    vm.register_builtin("metadata_set", move |args, _out| {
372        let dir = args.first().map(|a| a.display()).unwrap_or_default();
373        let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
374        let data_val = args.get(2).unwrap_or(&VmValue::Nil);
375
376        let mut data = BTreeMap::new();
377        if let VmValue::Dict(dict) = data_val {
378            for (k, v) in dict.iter() {
379                data.insert(k.clone(), vm_to_json(v));
380            }
381        }
382
383        if !data.is_empty() {
384            s.borrow_mut().set_namespace(&dir, &namespace, data);
385        }
386        Ok(VmValue::Nil)
387    });
388
389    // metadata_save()
390    let s = Rc::clone(&state);
391    vm.register_builtin("metadata_save", move |_args, _out| {
392        s.borrow_mut().save().map_err(VmError::Runtime)?;
393        Ok(VmValue::Nil)
394    });
395
396    // metadata_stale(project) -> {any_stale: bool, tier1: [dirs], tier2: [dirs]}
397    // Compare stored structureHash/contentHash against current filesystem state.
398    let s = Rc::clone(&state);
399    let base2 = base_dir.to_path_buf();
400    vm.register_builtin("metadata_stale", move |_args, _out| {
401        s.borrow_mut().ensure_loaded();
402        let state = s.borrow();
403        let mut tier1_stale: Vec<VmValue> = Vec::new();
404        let mut tier2_stale: Vec<VmValue> = Vec::new();
405
406        for (dir, meta) in &state.entries {
407            let full_dir = if dir.is_empty() {
408                base2.clone()
409            } else {
410                base2.join(dir)
411            };
412            // Tier 1: structureHash — file list + sizes
413            if let Some(stored_hash) = meta
414                .namespaces
415                .get("classification")
416                .and_then(|ns| ns.get("structureHash"))
417                .and_then(|v| v.as_str())
418            {
419                let current_hash = compute_structure_hash(&full_dir);
420                if current_hash != stored_hash {
421                    tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
422                    continue; // If structure changed, skip tier2 check
423                }
424            }
425            // Tier 2: contentHash — file content digest
426            if let Some(stored_hash) = meta
427                .namespaces
428                .get("classification")
429                .and_then(|ns| ns.get("contentHash"))
430                .and_then(|v| v.as_str())
431            {
432                let current_hash = compute_content_hash_for_dir(&full_dir);
433                if current_hash != stored_hash {
434                    tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
435                }
436            }
437        }
438
439        let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
440        let mut m = BTreeMap::new();
441        m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
442        m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
443        m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
444        Ok(VmValue::Dict(Rc::new(m)))
445    });
446
447    // metadata_refresh_hashes(project) -> nil
448    // Recompute and store structureHash for all directories.
449    let s = Rc::clone(&state);
450    let base3 = base_dir.to_path_buf();
451    vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
452        let mut state = s.borrow_mut();
453        state.ensure_loaded();
454        let dirs: Vec<String> = state.entries.keys().cloned().collect();
455        for dir in dirs {
456            let full_dir = if dir.is_empty() {
457                base3.clone()
458            } else {
459                base3.join(&dir)
460            };
461            let hash = compute_structure_hash(&full_dir);
462            let entry = state.entries.entry(dir).or_default();
463            let ns = entry
464                .namespaces
465                .entry("classification".to_string())
466                .or_default();
467            ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
468        }
469        state.dirty = true;
470        Ok(VmValue::Nil)
471    });
472
473    // compute_content_hash(dir) -> string
474    // Hash of file list + sizes + mtimes in directory for staleness tracking
475    let base = base_dir.to_path_buf();
476    vm.register_builtin("compute_content_hash", move |args, _out| {
477        let dir = args.first().map(|a| a.display()).unwrap_or_default();
478        let full_dir = if dir.is_empty() {
479            base.clone()
480        } else {
481            base.join(&dir)
482        };
483        let hash = compute_content_hash_for_dir(&full_dir);
484        Ok(VmValue::String(Rc::from(hash)))
485    });
486
487    // invalidate_facts(dir) -> nil (no-op — facts live in metadata namespace now)
488    vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
489
490    // Also register scan builtins (scan_directory)
491    register_scan_builtins(vm, base_dir);
492}
493
494/// Compute structure hash for a directory (file names + sizes).
495fn compute_structure_hash(dir: &Path) -> String {
496    let mut entries: Vec<String> = Vec::new();
497    if let Ok(rd) = std::fs::read_dir(dir) {
498        for entry in rd.flatten() {
499            if let Ok(meta) = entry.metadata() {
500                let name = entry.file_name().to_string_lossy().to_string();
501                entries.push(format!("{}:{}", name, meta.len()));
502            }
503        }
504    }
505    entries.sort();
506    let joined = entries.join("|");
507    format!("{:x}", fnv_hash(joined.as_bytes()))
508}
509
510/// Compute content hash for a directory (file names + sizes + mtimes).
511fn compute_content_hash_for_dir(dir: &Path) -> String {
512    let mut entries: Vec<String> = Vec::new();
513    if let Ok(rd) = std::fs::read_dir(dir) {
514        for entry in rd.flatten() {
515            if let Ok(meta) = entry.metadata() {
516                let name = entry.file_name().to_string_lossy().to_string();
517                let mtime = meta
518                    .modified()
519                    .ok()
520                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
521                    .map(|d| d.as_secs())
522                    .unwrap_or(0);
523                entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
524            }
525        }
526    }
527    entries.sort();
528    let joined = entries.join("|");
529    format!("{:x}", fnv_hash(joined.as_bytes()))
530}
531
532/// FNV-1a hash (not crypto-grade, just for staleness detection).
533fn fnv_hash(data: &[u8]) -> u64 {
534    let mut hash: u64 = 0xcbf29ce484222325;
535    for &byte in data {
536        hash ^= byte as u64;
537        hash = hash.wrapping_mul(0x100000001b3);
538    }
539    hash
540}
541
542/// Register scan_directory builtin: native Rust file enumeration.
543pub fn register_scan_builtins(vm: &mut Vm, base_dir: &Path) {
544    let base = base_dir.to_path_buf();
545    // scan_directory(path?, pattern?) -> [{path, size, modified, is_dir}, ...]
546    vm.register_builtin("scan_directory", move |args, _out| {
547        let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
548        let pattern = args.get(1).and_then(|a| {
549            if matches!(a, VmValue::Nil) {
550                None
551            } else {
552                Some(a.display())
553            }
554        });
555        let full_dir = if rel_dir.is_empty() {
556            base.clone()
557        } else {
558            base.join(&rel_dir)
559        };
560        let mut results: Vec<VmValue> = Vec::new();
561        scan_dir_recursive(&full_dir, &base, &pattern, &mut results, 0, 5);
562        Ok(VmValue::List(Rc::new(results)))
563    });
564}
565
566fn scan_dir_recursive(
567    dir: &Path,
568    base: &Path,
569    pattern: &Option<String>,
570    results: &mut Vec<VmValue>,
571    depth: usize,
572    max_depth: usize,
573) {
574    if depth > max_depth {
575        return;
576    }
577    let rd = match std::fs::read_dir(dir) {
578        Ok(rd) => rd,
579        Err(_) => return,
580    };
581    for entry in rd.flatten() {
582        let meta = match entry.metadata() {
583            Ok(m) => m,
584            Err(_) => continue,
585        };
586        let name = entry.file_name().to_string_lossy().to_string();
587        // Skip hidden files and .burin directory
588        if name.starts_with('.') {
589            continue;
590        }
591        let rel_path = entry
592            .path()
593            .strip_prefix(base)
594            .unwrap_or(entry.path().as_path())
595            .to_string_lossy()
596            .to_string();
597        // Apply glob-like pattern filter
598        if let Some(pat) = pattern {
599            if !glob_match(pat, &rel_path) {
600                if meta.is_dir() {
601                    scan_dir_recursive(&entry.path(), base, pattern, results, depth + 1, max_depth);
602                }
603                continue;
604            }
605        }
606        let mtime = meta
607            .modified()
608            .ok()
609            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
610            .map(|d| d.as_secs() as i64)
611            .unwrap_or(0);
612        let mut m = BTreeMap::new();
613        m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
614        m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
615        m.insert("modified".to_string(), VmValue::Int(mtime));
616        m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
617        results.push(VmValue::Dict(Rc::new(m)));
618        if meta.is_dir() {
619            scan_dir_recursive(&entry.path(), base, pattern, results, depth + 1, max_depth);
620        }
621    }
622}
623
624/// Simple glob matching (supports * and ** patterns).
625fn glob_match(pattern: &str, path: &str) -> bool {
626    if pattern.contains("**") {
627        let parts: Vec<&str> = pattern.split("**").collect();
628        if parts.len() == 2 {
629            let prefix = parts[0].trim_end_matches('/');
630            let suffix = parts[1].trim_start_matches('/');
631            let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
632            let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
633            return prefix_ok && suffix_ok;
634        }
635    }
636    if pattern.contains('*') {
637        let parts: Vec<&str> = pattern.split('*').collect();
638        if parts.len() == 2 {
639            return path.starts_with(parts[0]) && path.ends_with(parts[1]);
640        }
641    }
642    path.contains(pattern)
643}