Skip to main content

harn_vm/
metadata.rs

1//! Project metadata store for `.burin/metadata/` sharded JSON files.
2//!
3//! Provides `metadata_get`, `metadata_set`, `metadata_save`, `metadata_stale`,
4//! and `metadata_refresh_hashes` builtins. Compatible with the Swift
5//! DirectoryMetadataStore format (sharded by package root).
6//!
7//! Resolution uses hierarchical inheritance: child directories inherit from
8//! parent directories, with overrides at each level.
9
10use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20
21/// Per-directory metadata: namespaces -> keys -> JSON values.
22#[derive(Clone, Default)]
23struct DirectoryMetadata {
24    namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
25}
26
27/// The full metadata store (all directories).
28struct MetadataState {
29    entries: BTreeMap<String, DirectoryMetadata>,
30    base_dir: PathBuf,
31    loaded: bool,
32    dirty: bool,
33}
34
35impl MetadataState {
36    fn new(base_dir: &Path) -> Self {
37        Self {
38            entries: BTreeMap::new(),
39            base_dir: base_dir.to_path_buf(),
40            loaded: false,
41            dirty: false,
42        }
43    }
44
45    fn metadata_dir(&self) -> PathBuf {
46        self.base_dir.join(".burin").join("metadata")
47    }
48
49    fn ensure_loaded(&mut self) {
50        if self.loaded {
51            return;
52        }
53        self.loaded = true;
54        let meta_dir = self.metadata_dir();
55        let entries = match std::fs::read_dir(&meta_dir) {
56            Ok(e) => e,
57            Err(_) => return,
58        };
59        for entry in entries.flatten() {
60            let path = entry.path();
61            if path.extension().map(|e| e == "json").unwrap_or(false) {
62                if let Ok(contents) = std::fs::read_to_string(&path) {
63                    self.load_shard(&contents);
64                }
65            }
66        }
67    }
68
69    fn load_shard(&mut self, contents: &str) {
70        let parsed: serde_json::Value = match serde_json::from_str(contents) {
71            Ok(v) => v,
72            Err(_) => return,
73        };
74        let shard_entries = match parsed.get("entries").and_then(|e| e.as_object()) {
75            Some(e) => e,
76            None => return,
77        };
78        for (dir, meta_val) in shard_entries {
79            let meta = parse_directory_metadata(meta_val);
80            self.entries.insert(dir.clone(), meta);
81        }
82    }
83
84    /// Resolve metadata for a directory with hierarchical inheritance.
85    /// Walks from root (".") through each path component, merging at each level.
86    fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
87        self.ensure_loaded();
88        let mut result = DirectoryMetadata::default();
89
90        // Start with root
91        if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
92            merge_metadata(&mut result, root);
93        }
94
95        // Walk path components
96        let components: Vec<&str> = directory
97            .split('/')
98            .filter(|c| !c.is_empty() && *c != ".")
99            .collect();
100        let mut current = String::new();
101        for component in components {
102            if current.is_empty() {
103                current = component.to_string();
104            } else {
105                current = format!("{current}/{component}");
106            }
107            if let Some(meta) = self.entries.get(&current) {
108                merge_metadata(&mut result, meta);
109            }
110        }
111
112        result
113    }
114
115    /// Get a specific namespace for a resolved directory.
116    fn get_namespace(
117        &mut self,
118        directory: &str,
119        namespace: &str,
120    ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
121        let resolved = self.resolve(directory);
122        resolved.namespaces.get(namespace).cloned()
123    }
124
125    fn local_directory(&mut self, directory: &str) -> DirectoryMetadata {
126        self.ensure_loaded();
127        self.entries.get(directory).cloned().unwrap_or_default()
128    }
129
130    /// Set metadata for a directory + namespace.
131    fn set_namespace(
132        &mut self,
133        directory: &str,
134        namespace: &str,
135        data: BTreeMap<FieldKey, serde_json::Value>,
136    ) {
137        self.ensure_loaded();
138        let meta = self.entries.entry(directory.to_string()).or_default();
139        let ns = meta.namespaces.entry(namespace.to_string()).or_default();
140        for (k, v) in data {
141            ns.insert(k, v);
142        }
143        self.dirty = true;
144    }
145
146    /// Save all metadata back to sharded JSON files.
147    fn save(&mut self) -> Result<(), String> {
148        if !self.dirty {
149            return Ok(());
150        }
151        let meta_dir = self.metadata_dir();
152        std::fs::create_dir_all(&meta_dir).map_err(|e| format!("metadata mkdir: {e}"))?;
153
154        // Shard by simple strategy: everything in one "root" shard for now.
155        // This matches Swift behavior for single-package projects.
156        let mut shard = serde_json::Map::new();
157        for (dir, meta) in &self.entries {
158            shard.insert(dir.clone(), serialize_directory_metadata(meta));
159        }
160
161        let store_obj = serde_json::json!({
162            "version": 2,
163            "generatedAt": chrono_now_iso(),
164            "entries": serde_json::Value::Object(shard)
165        });
166
167        let json =
168            serde_json::to_string_pretty(&store_obj).map_err(|e| format!("metadata json: {e}"))?;
169
170        let shard_path = meta_dir.join("root.json");
171        std::fs::write(&shard_path, json).map_err(|e| format!("metadata write: {e}"))?;
172        self.dirty = false;
173        Ok(())
174    }
175}
176
177fn chrono_now_iso() -> String {
178    // ISO 8601 timestamp without chrono dependency
179    let now = std::time::SystemTime::now();
180    let secs = now
181        .duration_since(std::time::UNIX_EPOCH)
182        .unwrap_or_default()
183        .as_secs();
184    // Convert to ISO 8601: 2026-03-29T14:00:00Z
185    let days = secs / 86400;
186    let time_secs = secs % 86400;
187    let hours = time_secs / 3600;
188    let minutes = (time_secs % 3600) / 60;
189    let seconds = time_secs % 60;
190    // Days since epoch to year/month/day (simplified, good enough for timestamps)
191    let mut y = 1970i64;
192    let mut remaining = days as i64;
193    loop {
194        let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
195            366
196        } else {
197            365
198        };
199        if remaining < days_in_year {
200            break;
201        }
202        remaining -= days_in_year;
203        y += 1;
204    }
205    let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
206    let month_days: [i64; 12] = [
207        31,
208        if leap { 29 } else { 28 },
209        31,
210        30,
211        31,
212        30,
213        31,
214        31,
215        30,
216        31,
217        30,
218        31,
219    ];
220    let mut m = 0usize;
221    for days in &month_days {
222        if remaining < *days {
223            break;
224        }
225        remaining -= *days;
226        m += 1;
227    }
228    format!(
229        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
230        y,
231        m + 1,
232        remaining + 1,
233        hours,
234        minutes,
235        seconds
236    )
237}
238
239fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
240    for (ns, fields) in &source.namespaces {
241        let target_ns = target.namespaces.entry(ns.clone()).or_default();
242        for (k, v) in fields {
243            target_ns.insert(k.clone(), v.clone());
244        }
245    }
246}
247
248fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
249    let mut meta = DirectoryMetadata::default();
250    let obj = match val.as_object() {
251        Some(o) => o,
252        None => return meta,
253    };
254    // Parse "namespaces" key (the standard format)
255    if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
256        for (ns_name, fields_val) in ns_obj {
257            if let Some(fields) = fields_val.as_object() {
258                let mut field_map = BTreeMap::new();
259                for (k, v) in fields {
260                    field_map.insert(k.clone(), v.clone());
261                }
262                meta.namespaces.insert(ns_name.clone(), field_map);
263            }
264        }
265    }
266    meta
267}
268
269fn serialize_directory_metadata(meta: &DirectoryMetadata) -> serde_json::Value {
270    let mut ns_obj = serde_json::Map::new();
271    for (ns_name, fields) in &meta.namespaces {
272        let mut fields_obj = serde_json::Map::new();
273        for (k, v) in fields {
274            fields_obj.insert(k.clone(), v.clone());
275        }
276        ns_obj.insert(ns_name.clone(), serde_json::Value::Object(fields_obj));
277    }
278    serde_json::json!({ "namespaces": serde_json::Value::Object(ns_obj) })
279}
280
281fn vm_to_json(val: &VmValue) -> serde_json::Value {
282    match val {
283        VmValue::String(s) => serde_json::Value::String(s.to_string()),
284        VmValue::Int(n) => serde_json::json!(*n),
285        VmValue::Float(n) => serde_json::json!(*n),
286        VmValue::Bool(b) => serde_json::Value::Bool(*b),
287        VmValue::Nil => serde_json::Value::Null,
288        VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
289        VmValue::Dict(map) => {
290            let obj: serde_json::Map<String, serde_json::Value> = map
291                .iter()
292                .map(|(k, v)| (k.clone(), vm_to_json(v)))
293                .collect();
294            serde_json::Value::Object(obj)
295        }
296        _ => serde_json::Value::Null,
297    }
298}
299
300fn json_to_vm(jv: &serde_json::Value) -> VmValue {
301    match jv {
302        serde_json::Value::Null => VmValue::Nil,
303        serde_json::Value::Bool(b) => VmValue::Bool(*b),
304        serde_json::Value::Number(n) => {
305            if let Some(i) = n.as_i64() {
306                VmValue::Int(i)
307            } else {
308                VmValue::Float(n.as_f64().unwrap_or(0.0))
309            }
310        }
311        serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
312        serde_json::Value::Array(arr) => {
313            VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
314        }
315        serde_json::Value::Object(map) => {
316            let mut m = BTreeMap::new();
317            for (k, v) in map {
318                m.insert(k.clone(), json_to_vm(v));
319            }
320            VmValue::Dict(Rc::new(m))
321        }
322    }
323}
324
325fn namespace_fields_to_vm(fields: &BTreeMap<FieldKey, serde_json::Value>) -> VmValue {
326    let mut map = BTreeMap::new();
327    for (k, v) in fields {
328        map.insert(k.clone(), json_to_vm(v));
329    }
330    VmValue::Dict(Rc::new(map))
331}
332
333fn directory_metadata_to_vm(meta: &DirectoryMetadata) -> VmValue {
334    let mut namespaces = BTreeMap::new();
335    for (ns, fields) in &meta.namespaces {
336        namespaces.insert(ns.clone(), namespace_fields_to_vm(fields));
337    }
338    VmValue::Dict(Rc::new(namespaces))
339}
340
341fn normalize_directory_key(dir: &str) -> String {
342    if dir.trim().is_empty() || dir == "." {
343        ".".to_string()
344    } else {
345        dir.to_string()
346    }
347}
348
349#[derive(Clone)]
350struct ScanOptions {
351    pattern: Option<String>,
352    max_depth: usize,
353    include_hidden: bool,
354    include_dirs: bool,
355    include_files: bool,
356}
357
358impl Default for ScanOptions {
359    fn default() -> Self {
360        Self {
361            pattern: None,
362            max_depth: 5,
363            include_hidden: false,
364            include_dirs: true,
365            include_files: true,
366        }
367    }
368}
369
370fn bool_arg(map: &BTreeMap<String, VmValue>, key: &str, default: bool) -> bool {
371    match map.get(key) {
372        Some(VmValue::Bool(value)) => *value,
373        _ => default,
374    }
375}
376
377fn usize_arg(map: &BTreeMap<String, VmValue>, key: &str, default: usize) -> usize {
378    match map.get(key) {
379        Some(VmValue::Int(value)) if *value >= 0 => *value as usize,
380        _ => default,
381    }
382}
383
384fn parse_scan_options(
385    pattern_or_options: Option<&VmValue>,
386    explicit_options: Option<&VmValue>,
387) -> ScanOptions {
388    let mut options = ScanOptions::default();
389    if let Some(VmValue::String(pattern)) = pattern_or_options {
390        options.pattern = Some(pattern.to_string());
391    } else if let Some(VmValue::Dict(dict)) = pattern_or_options {
392        apply_scan_options_dict(&mut options, dict);
393    }
394    if let Some(VmValue::Dict(dict)) = explicit_options {
395        apply_scan_options_dict(&mut options, dict);
396    }
397    options
398}
399
400fn apply_scan_options_dict(options: &mut ScanOptions, dict: &BTreeMap<String, VmValue>) {
401    if let Some(pattern) = dict.get("pattern").map(|value| value.display()) {
402        if !pattern.is_empty() {
403            options.pattern = Some(pattern);
404        }
405    }
406    options.max_depth = usize_arg(dict, "max_depth", options.max_depth);
407    options.include_hidden = bool_arg(dict, "include_hidden", options.include_hidden);
408    options.include_dirs = bool_arg(dict, "include_dirs", options.include_dirs);
409    options.include_files = bool_arg(dict, "include_files", options.include_files);
410}
411
412fn resolve_scan_root(base_dir: &Path, rel_dir: &str) -> PathBuf {
413    let candidate = PathBuf::from(rel_dir);
414    if candidate.is_absolute() {
415        return candidate;
416    }
417    if let Some(cwd) =
418        crate::stdlib::process::current_execution_context().and_then(|context| context.cwd)
419    {
420        return PathBuf::from(cwd).join(candidate);
421    }
422    if let Ok(cwd) = std::env::current_dir() {
423        return cwd.join(candidate);
424    }
425    base_dir.join(candidate)
426}
427
428/// Register metadata builtins on a VM.
429///
430/// In standalone mode, these operate directly on `.burin/metadata/` files.
431/// In bridge mode, these are registered **before** bridge builtins so the
432/// host can override them if needed (but typically the VM handles this natively).
433pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
434    let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
435
436    // metadata_get(dir, namespace?) -> dict | nil
437    let s = Rc::clone(&state);
438    vm.register_builtin("metadata_get", move |args, _out| {
439        let dir = args.first().map(|a| a.display()).unwrap_or_default();
440        let namespace = args.get(1).and_then(|a| {
441            if matches!(a, VmValue::Nil) {
442                None
443            } else {
444                Some(a.display())
445            }
446        });
447
448        let mut st = s.borrow_mut();
449        if let Some(ns) = namespace {
450            match st.get_namespace(&dir, &ns) {
451                Some(fields) => {
452                    let mut m = BTreeMap::new();
453                    for (k, v) in fields {
454                        m.insert(k, json_to_vm(&v));
455                    }
456                    Ok(VmValue::Dict(Rc::new(m)))
457                }
458                None => Ok(VmValue::Nil),
459            }
460        } else {
461            // Return all namespaces flattened
462            let resolved = st.resolve(&dir);
463            let mut m = BTreeMap::new();
464            for fields in resolved.namespaces.values() {
465                for (k, v) in fields {
466                    m.insert(k.clone(), json_to_vm(v));
467                }
468            }
469            if m.is_empty() {
470                Ok(VmValue::Nil)
471            } else {
472                Ok(VmValue::Dict(Rc::new(m)))
473            }
474        }
475    });
476
477    // metadata_resolve(dir, namespace?) -> dict | nil
478    let s = Rc::clone(&state);
479    vm.register_builtin("metadata_resolve", move |args, _out| {
480        let dir = args.first().map(|a| a.display()).unwrap_or_default();
481        let namespace = args.get(1).and_then(|a| {
482            if matches!(a, VmValue::Nil) {
483                None
484            } else {
485                Some(a.display())
486            }
487        });
488        let mut st = s.borrow_mut();
489        let resolved = st.resolve(&dir);
490        if let Some(ns) = namespace {
491            match resolved.namespaces.get(&ns) {
492                Some(fields) => Ok(namespace_fields_to_vm(fields)),
493                None => Ok(VmValue::Nil),
494            }
495        } else if resolved.namespaces.is_empty() {
496            Ok(VmValue::Nil)
497        } else {
498            Ok(directory_metadata_to_vm(&resolved))
499        }
500    });
501
502    // metadata_entries(namespace?) -> list
503    let s = Rc::clone(&state);
504    vm.register_builtin("metadata_entries", move |args, _out| {
505        let namespace = args.first().and_then(|a| {
506            if matches!(a, VmValue::Nil) {
507                None
508            } else {
509                Some(a.display())
510            }
511        });
512        let mut st = s.borrow_mut();
513        st.ensure_loaded();
514        let directories: Vec<String> = st.entries.keys().cloned().collect();
515        let mut items = Vec::new();
516        for dir in directories {
517            let local = st.local_directory(&dir);
518            let resolved = st.resolve(&dir);
519            let mut item = BTreeMap::new();
520            item.insert(
521                "dir".to_string(),
522                VmValue::String(Rc::from(normalize_directory_key(&dir))),
523            );
524            match &namespace {
525                Some(ns) => {
526                    item.insert(
527                        "local".to_string(),
528                        local
529                            .namespaces
530                            .get(ns)
531                            .map(namespace_fields_to_vm)
532                            .unwrap_or(VmValue::Nil),
533                    );
534                    item.insert(
535                        "resolved".to_string(),
536                        resolved
537                            .namespaces
538                            .get(ns)
539                            .map(namespace_fields_to_vm)
540                            .unwrap_or(VmValue::Nil),
541                    );
542                }
543                None => {
544                    item.insert("local".to_string(), directory_metadata_to_vm(&local));
545                    item.insert("resolved".to_string(), directory_metadata_to_vm(&resolved));
546                }
547            }
548            items.push(VmValue::Dict(Rc::new(item)));
549        }
550        Ok(VmValue::List(Rc::new(items)))
551    });
552
553    // metadata_set(dir, namespace, data_dict)
554    let s = Rc::clone(&state);
555    vm.register_builtin("metadata_set", move |args, _out| {
556        let dir = args.first().map(|a| a.display()).unwrap_or_default();
557        let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
558        let data_val = args.get(2).unwrap_or(&VmValue::Nil);
559
560        let mut data = BTreeMap::new();
561        if let VmValue::Dict(dict) = data_val {
562            for (k, v) in dict.iter() {
563                data.insert(k.clone(), vm_to_json(v));
564            }
565        }
566
567        if !data.is_empty() {
568            s.borrow_mut().set_namespace(&dir, &namespace, data);
569        }
570        Ok(VmValue::Nil)
571    });
572
573    // metadata_save()
574    let s = Rc::clone(&state);
575    vm.register_builtin("metadata_save", move |_args, _out| {
576        s.borrow_mut().save().map_err(VmError::Runtime)?;
577        Ok(VmValue::Nil)
578    });
579
580    // metadata_stale(project) -> {any_stale: bool, tier1: [dirs], tier2: [dirs]}
581    // Compare stored structureHash/contentHash against current filesystem state.
582    let s = Rc::clone(&state);
583    let base2 = base_dir.to_path_buf();
584    vm.register_builtin("metadata_stale", move |_args, _out| {
585        s.borrow_mut().ensure_loaded();
586        let state = s.borrow();
587        let mut tier1_stale: Vec<VmValue> = Vec::new();
588        let mut tier2_stale: Vec<VmValue> = Vec::new();
589
590        for (dir, meta) in &state.entries {
591            let full_dir = if dir.is_empty() {
592                base2.clone()
593            } else {
594                base2.join(dir)
595            };
596            // Tier 1: structureHash — file list + sizes
597            if let Some(stored_hash) = meta
598                .namespaces
599                .get("classification")
600                .and_then(|ns| ns.get("structureHash"))
601                .and_then(|v| v.as_str())
602            {
603                let current_hash = compute_structure_hash(&full_dir);
604                if current_hash != stored_hash {
605                    tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
606                    continue; // If structure changed, skip tier2 check
607                }
608            }
609            // Tier 2: contentHash — file content digest
610            if let Some(stored_hash) = meta
611                .namespaces
612                .get("classification")
613                .and_then(|ns| ns.get("contentHash"))
614                .and_then(|v| v.as_str())
615            {
616                let current_hash = compute_content_hash_for_dir(&full_dir);
617                if current_hash != stored_hash {
618                    tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
619                }
620            }
621        }
622
623        let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
624        let mut m = BTreeMap::new();
625        m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
626        m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
627        m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
628        Ok(VmValue::Dict(Rc::new(m)))
629    });
630
631    // metadata_refresh_hashes(project) -> nil
632    // Recompute and store structureHash for all directories.
633    let s = Rc::clone(&state);
634    let base3 = base_dir.to_path_buf();
635    vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
636        let mut state = s.borrow_mut();
637        state.ensure_loaded();
638        let dirs: Vec<String> = state.entries.keys().cloned().collect();
639        for dir in dirs {
640            let full_dir = if dir.is_empty() {
641                base3.clone()
642            } else {
643                base3.join(&dir)
644            };
645            let hash = compute_structure_hash(&full_dir);
646            let entry = state.entries.entry(dir).or_default();
647            let ns = entry
648                .namespaces
649                .entry("classification".to_string())
650                .or_default();
651            ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
652        }
653        state.dirty = true;
654        Ok(VmValue::Nil)
655    });
656
657    // metadata_status(namespace?) -> dict
658    let s = Rc::clone(&state);
659    let base4 = base_dir.to_path_buf();
660    vm.register_builtin("metadata_status", move |args, _out| {
661        let namespace = args.first().and_then(|a| {
662            if matches!(a, VmValue::Nil) {
663                None
664            } else {
665                Some(a.display())
666            }
667        });
668        s.borrow_mut().ensure_loaded();
669        let state = s.borrow();
670        let mut namespaces = BTreeMap::new();
671        let mut directories = Vec::new();
672        let mut missing_structure_hash = Vec::new();
673        let mut missing_content_hash = Vec::new();
674        for (dir, meta) in &state.entries {
675            directories.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
676            for ns in meta.namespaces.keys() {
677                namespaces.insert(ns.clone(), VmValue::Bool(true));
678            }
679            let full_dir = if dir.is_empty() {
680                base4.clone()
681            } else {
682                base4.join(dir)
683            };
684            let relevant = namespace
685                .as_ref()
686                .and_then(|name| meta.namespaces.get(name))
687                .or_else(|| meta.namespaces.get("classification"));
688            if let Some(fields) = relevant {
689                if !fields.contains_key("structureHash") && full_dir.exists() {
690                    missing_structure_hash
691                        .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
692                }
693                if !fields.contains_key("contentHash") && full_dir.exists() {
694                    missing_content_hash
695                        .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
696                }
697            }
698        }
699        let stale = metadata_stale_value(&state, &base4);
700        let mut result = BTreeMap::new();
701        result.insert(
702            "directory_count".to_string(),
703            VmValue::Int(state.entries.len() as i64),
704        );
705        result.insert(
706            "namespace_count".to_string(),
707            VmValue::Int(namespaces.len() as i64),
708        );
709        result.insert(
710            "namespaces".to_string(),
711            VmValue::List(Rc::new(
712                namespaces
713                    .keys()
714                    .cloned()
715                    .map(|name| VmValue::String(Rc::from(name)))
716                    .collect(),
717            )),
718        );
719        result.insert(
720            "directories".to_string(),
721            VmValue::List(Rc::new(directories)),
722        );
723        result.insert(
724            "missing_structure_hash".to_string(),
725            VmValue::List(Rc::new(missing_structure_hash)),
726        );
727        result.insert(
728            "missing_content_hash".to_string(),
729            VmValue::List(Rc::new(missing_content_hash)),
730        );
731        result.insert("stale".to_string(), stale);
732        Ok(VmValue::Dict(Rc::new(result)))
733    });
734
735    // compute_content_hash(dir) -> string
736    // Hash of file list + sizes + mtimes in directory for staleness tracking
737    let base = base_dir.to_path_buf();
738    vm.register_builtin("compute_content_hash", move |args, _out| {
739        let dir = args.first().map(|a| a.display()).unwrap_or_default();
740        let full_dir = if dir.is_empty() {
741            base.clone()
742        } else {
743            base.join(&dir)
744        };
745        let hash = compute_content_hash_for_dir(&full_dir);
746        Ok(VmValue::String(Rc::from(hash)))
747    });
748
749    // invalidate_facts(dir) -> nil (no-op — facts live in metadata namespace now)
750    vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
751
752    // Also register scan builtins (scan_directory)
753    register_scan_builtins(vm, base_dir);
754}
755
756/// Compute structure hash for a directory (file names + sizes).
757fn compute_structure_hash(dir: &Path) -> String {
758    let mut entries: Vec<String> = Vec::new();
759    if let Ok(rd) = std::fs::read_dir(dir) {
760        for entry in rd.flatten() {
761            if let Ok(meta) = entry.metadata() {
762                let name = entry.file_name().to_string_lossy().to_string();
763                entries.push(format!("{}:{}", name, meta.len()));
764            }
765        }
766    }
767    entries.sort();
768    let joined = entries.join("|");
769    format!("{:x}", fnv_hash(joined.as_bytes()))
770}
771
772/// Compute content hash for a directory (file names + sizes + mtimes).
773fn compute_content_hash_for_dir(dir: &Path) -> String {
774    let mut entries: Vec<String> = Vec::new();
775    if let Ok(rd) = std::fs::read_dir(dir) {
776        for entry in rd.flatten() {
777            if let Ok(meta) = entry.metadata() {
778                let name = entry.file_name().to_string_lossy().to_string();
779                let mtime = meta
780                    .modified()
781                    .ok()
782                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
783                    .map(|d| d.as_secs())
784                    .unwrap_or(0);
785                entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
786            }
787        }
788    }
789    entries.sort();
790    let joined = entries.join("|");
791    format!("{:x}", fnv_hash(joined.as_bytes()))
792}
793
794/// FNV-1a hash (not crypto-grade, just for staleness detection).
795fn fnv_hash(data: &[u8]) -> u64 {
796    let mut hash: u64 = 0xcbf29ce484222325;
797    for &byte in data {
798        hash ^= byte as u64;
799        hash = hash.wrapping_mul(0x100000001b3);
800    }
801    hash
802}
803
804/// Register scan_directory builtin: native Rust file enumeration.
805pub fn register_scan_builtins(vm: &mut Vm, base_dir: &Path) {
806    let base = base_dir.to_path_buf();
807    // scan_directory(path?, pattern?) -> [{path, size, modified, is_dir}, ...]
808    vm.register_builtin("scan_directory", move |args, _out| {
809        let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
810        let options = parse_scan_options(args.get(1), args.get(2));
811        let scan_base = resolve_scan_root(&base, ".");
812        let full_dir = if rel_dir.is_empty() {
813            scan_base.clone()
814        } else {
815            scan_base.join(&rel_dir)
816        };
817        let mut results: Vec<VmValue> = Vec::new();
818        scan_dir_recursive(&full_dir, &scan_base, &options, &mut results, 0);
819        Ok(VmValue::List(Rc::new(results)))
820    });
821}
822
823fn metadata_stale_value(state: &MetadataState, base_dir: &Path) -> VmValue {
824    let mut tier1_stale: Vec<VmValue> = Vec::new();
825    let mut tier2_stale: Vec<VmValue> = Vec::new();
826    for (dir, meta) in &state.entries {
827        let full_dir = if dir.is_empty() {
828            base_dir.to_path_buf()
829        } else {
830            base_dir.join(dir)
831        };
832        if let Some(stored_hash) = meta
833            .namespaces
834            .get("classification")
835            .and_then(|ns| ns.get("structureHash"))
836            .and_then(|v| v.as_str())
837        {
838            let current_hash = compute_structure_hash(&full_dir);
839            if current_hash != stored_hash {
840                tier1_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
841                continue;
842            }
843        }
844        if let Some(stored_hash) = meta
845            .namespaces
846            .get("classification")
847            .and_then(|ns| ns.get("contentHash"))
848            .and_then(|v| v.as_str())
849        {
850            let current_hash = compute_content_hash_for_dir(&full_dir);
851            if current_hash != stored_hash {
852                tier2_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
853            }
854        }
855    }
856    let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
857    let mut m = BTreeMap::new();
858    m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
859    m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
860    m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
861    VmValue::Dict(Rc::new(m))
862}
863
864fn scan_dir_recursive(
865    dir: &Path,
866    base: &Path,
867    options: &ScanOptions,
868    results: &mut Vec<VmValue>,
869    depth: usize,
870) {
871    if depth > options.max_depth {
872        return;
873    }
874    let rd = match std::fs::read_dir(dir) {
875        Ok(rd) => rd,
876        Err(_) => return,
877    };
878    for entry in rd.flatten() {
879        let meta = match entry.metadata() {
880            Ok(m) => m,
881            Err(_) => continue,
882        };
883        let name = entry.file_name().to_string_lossy().to_string();
884        // Skip hidden files and .burin directory
885        if !options.include_hidden && name.starts_with('.') {
886            continue;
887        }
888        let rel_path = entry
889            .path()
890            .strip_prefix(base)
891            .unwrap_or(entry.path().as_path())
892            .to_string_lossy()
893            .to_string();
894        // Apply glob-like pattern filter
895        if let Some(pat) = &options.pattern {
896            if !glob_match(pat, &rel_path) {
897                if meta.is_dir() {
898                    scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
899                }
900                continue;
901            }
902        }
903        let mtime = meta
904            .modified()
905            .ok()
906            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
907            .map(|d| d.as_secs() as i64)
908            .unwrap_or(0);
909        let mut m = BTreeMap::new();
910        m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
911        m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
912        m.insert("modified".to_string(), VmValue::Int(mtime));
913        m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
914        if (meta.is_dir() && options.include_dirs) || (!meta.is_dir() && options.include_files) {
915            results.push(VmValue::Dict(Rc::new(m)));
916        }
917        if meta.is_dir() {
918            scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
919        }
920    }
921}
922
923/// Simple glob matching (supports * and ** patterns).
924fn glob_match(pattern: &str, path: &str) -> bool {
925    if pattern.contains("**") {
926        let parts: Vec<&str> = pattern.split("**").collect();
927        if parts.len() == 2 {
928            let prefix = parts[0].trim_end_matches('/');
929            let suffix = parts[1].trim_start_matches('/');
930            let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
931            let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
932            return prefix_ok && suffix_ok;
933        }
934    }
935    if pattern.contains('*') {
936        let parts: Vec<&str> = pattern.split('*').collect();
937        if parts.len() == 2 {
938            return path.starts_with(parts[0]) && path.ends_with(parts[1]);
939        }
940    }
941    path.contains(pattern)
942}
943
944#[cfg(test)]
945mod tests {
946    use super::*;
947
948    fn temp_path(name: &str) -> PathBuf {
949        let unique = std::time::SystemTime::now()
950            .duration_since(std::time::UNIX_EPOCH)
951            .unwrap_or_default()
952            .as_nanos();
953        std::env::temp_dir().join(format!("harn-metadata-{name}-{unique}"))
954    }
955
956    #[test]
957    fn metadata_resolve_preserves_namespace_structure() {
958        let base = temp_path("resolve");
959        let mut state = MetadataState::new(&base);
960        state.set_namespace(
961            "".into(),
962            "classification",
963            BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
964        );
965        state.set_namespace(
966            "src".into(),
967            "classification",
968            BTreeMap::from([("owner".into(), serde_json::json!("vm"))]),
969        );
970
971        let resolved = state.resolve("src");
972        let classification = resolved.namespaces.get("classification").unwrap();
973        assert_eq!(
974            classification.get("language"),
975            Some(&serde_json::json!("rust"))
976        );
977        assert_eq!(classification.get("owner"), Some(&serde_json::json!("vm")));
978    }
979
980    #[test]
981    fn scan_options_filter_hidden_and_depth() {
982        let base = temp_path("scan");
983        std::fs::create_dir_all(base.join("project/deep")).unwrap();
984        std::fs::write(base.join("project/root.txt"), "root").unwrap();
985        std::fs::write(base.join("project/.hidden.txt"), "hidden").unwrap();
986        std::fs::write(base.join("project/deep/nested.txt"), "nested").unwrap();
987
988        let options = ScanOptions {
989            pattern: Some(".txt".into()),
990            max_depth: 0,
991            include_hidden: false,
992            include_dirs: false,
993            include_files: true,
994        };
995        let mut results = Vec::new();
996        scan_dir_recursive(&base.join("project"), &base, &options, &mut results, 0);
997        let paths: Vec<String> = results
998            .into_iter()
999            .map(|value| match value {
1000                VmValue::Dict(dict) => dict.get("path").unwrap().display(),
1001                _ => String::new(),
1002            })
1003            .collect();
1004        assert_eq!(paths, vec!["project/root.txt".to_string()]);
1005        let _ = std::fs::remove_dir_all(base);
1006    }
1007}