Skip to main content

harn_vm/
metadata.rs

1//! Project metadata store for Harn's runtime state root.
2//!
3//! Provides `metadata_get`, `metadata_set`, `metadata_save`, `metadata_stale`,
4//! and `metadata_refresh_hashes` builtins. Stores sharded JSON files by
5//! package root.
6//!
7//! Resolution uses hierarchical inheritance: child directories inherit from
8//! parent directories, with overrides at each level.
9
10use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20const LEGACY_SHARD_NAME: &str = "root.json";
21const NAMESPACE_ENTRIES_FILE: &str = "entries.json";
22
23/// Per-directory metadata: namespaces -> keys -> JSON values.
24#[derive(Clone, Default)]
25struct DirectoryMetadata {
26    namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
27}
28
29trait MetadataBackend {
30    fn backend_name(&self) -> &'static str;
31    fn load(&self, root: &Path) -> Result<BTreeMap<String, DirectoryMetadata>, String>;
32    fn save(
33        &self,
34        root: &Path,
35        entries: &BTreeMap<String, DirectoryMetadata>,
36    ) -> Result<(), String>;
37}
38
39#[derive(Default)]
40struct FilesystemMetadataBackend;
41
42impl FilesystemMetadataBackend {
43    fn new() -> Self {
44        Self
45    }
46}
47
48/// The full metadata store (all directories).
49struct MetadataState {
50    entries: BTreeMap<String, DirectoryMetadata>,
51    base_dir: PathBuf,
52    backend: Box<dyn MetadataBackend>,
53    loaded: bool,
54    dirty: bool,
55}
56
57impl MetadataState {
58    fn new(base_dir: &Path) -> Self {
59        Self {
60            entries: BTreeMap::new(),
61            base_dir: base_dir.to_path_buf(),
62            backend: Box::new(FilesystemMetadataBackend::new()),
63            loaded: false,
64            dirty: false,
65        }
66    }
67
68    fn metadata_dir(&self) -> PathBuf {
69        crate::runtime_paths::metadata_dir(&self.base_dir)
70    }
71
72    fn ensure_loaded(&mut self) {
73        if self.loaded {
74            return;
75        }
76        self.loaded = true;
77        if let Ok(entries) = self.backend.load(&self.metadata_dir()) {
78            self.entries = entries;
79        }
80    }
81
82    /// Resolve metadata for a directory with hierarchical inheritance.
83    /// Walks from root (".") through each path component, merging at each level.
84    fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
85        self.ensure_loaded();
86        let mut result = DirectoryMetadata::default();
87
88        if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
89            merge_metadata(&mut result, root);
90        }
91
92        let components: Vec<&str> = directory
93            .split('/')
94            .filter(|c| !c.is_empty() && *c != ".")
95            .collect();
96        let mut current = String::new();
97        for component in components {
98            if current.is_empty() {
99                current = component.to_string();
100            } else {
101                current = format!("{current}/{component}");
102            }
103            if let Some(meta) = self.entries.get(&current) {
104                merge_metadata(&mut result, meta);
105            }
106        }
107
108        result
109    }
110
111    /// Get a specific namespace for a resolved directory.
112    fn get_namespace(
113        &mut self,
114        directory: &str,
115        namespace: &str,
116    ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
117        let resolved = self.resolve(directory);
118        resolved.namespaces.get(namespace).cloned()
119    }
120
121    fn local_directory(&mut self, directory: &str) -> DirectoryMetadata {
122        self.ensure_loaded();
123        self.entries.get(directory).cloned().unwrap_or_default()
124    }
125
126    /// Set metadata for a directory + namespace.
127    fn set_namespace(
128        &mut self,
129        directory: &str,
130        namespace: &str,
131        data: BTreeMap<FieldKey, serde_json::Value>,
132    ) {
133        self.ensure_loaded();
134        let meta = self.entries.entry(directory.to_string()).or_default();
135        let ns = meta.namespaces.entry(namespace.to_string()).or_default();
136        for (k, v) in data {
137            ns.insert(k, v);
138        }
139        self.dirty = true;
140    }
141
142    /// Save all metadata back to sharded JSON files.
143    fn save(&mut self) -> Result<(), String> {
144        if !self.dirty {
145            return Ok(());
146        }
147        let meta_dir = self.metadata_dir();
148        self.backend.save(&meta_dir, &self.entries)?;
149        self.dirty = false;
150        Ok(())
151    }
152}
153
154impl MetadataBackend for FilesystemMetadataBackend {
155    fn backend_name(&self) -> &'static str {
156        "filesystem"
157    }
158
159    fn load(&self, root: &Path) -> Result<BTreeMap<String, DirectoryMetadata>, String> {
160        let mut entries = BTreeMap::new();
161        let legacy_path = root.join(LEGACY_SHARD_NAME);
162        if let Ok(contents) = std::fs::read_to_string(&legacy_path) {
163            entries = parse_legacy_entries(&contents);
164        }
165
166        let namespace_dirs = match std::fs::read_dir(root) {
167            Ok(read_dir) => read_dir,
168            Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(entries),
169            Err(error) => return Err(format!("metadata load: {error}")),
170        };
171
172        let mut dirs = namespace_dirs
173            .flatten()
174            .filter(|entry| entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false))
175            .collect::<Vec<_>>();
176        dirs.sort_by_key(|entry| entry.file_name());
177
178        for dir in dirs {
179            let shard_path = dir.path().join(NAMESPACE_ENTRIES_FILE);
180            let Ok(contents) = std::fs::read_to_string(&shard_path) else {
181                continue;
182            };
183            merge_namespace_entries(&mut entries, &contents);
184        }
185
186        Ok(entries)
187    }
188
189    fn save(
190        &self,
191        root: &Path,
192        entries: &BTreeMap<String, DirectoryMetadata>,
193    ) -> Result<(), String> {
194        std::fs::create_dir_all(root).map_err(|error| format!("metadata mkdir: {error}"))?;
195
196        let mut namespaces: BTreeMap<String, serde_json::Map<String, serde_json::Value>> =
197            BTreeMap::new();
198        for (dir, meta) in entries {
199            for (namespace, fields) in &meta.namespaces {
200                namespaces
201                    .entry(namespace.clone())
202                    .or_default()
203                    .insert(dir.clone(), serialize_namespace_fields(fields));
204            }
205        }
206
207        for (namespace, shard_entries) in namespaces {
208            let namespace_dir = root.join(namespace_path_component(&namespace));
209            std::fs::create_dir_all(&namespace_dir)
210                .map_err(|error| format!("metadata mkdir: {error}"))?;
211            let shard = serde_json::json!({
212                "version": 1,
213                "namespace": namespace,
214                "backend": self.backend_name(),
215                "generatedAt": chrono_now_iso(),
216                "entries": serde_json::Value::Object(shard_entries),
217            });
218            let json = serde_json::to_string_pretty(&shard)
219                .map_err(|error| format!("metadata json: {error}"))?;
220            std::fs::write(namespace_dir.join(NAMESPACE_ENTRIES_FILE), json)
221                .map_err(|error| format!("metadata write: {error}"))?;
222        }
223
224        Ok(())
225    }
226}
227
228/// ISO 8601 timestamp (e.g. `2026-03-29T14:00:00Z`) without a chrono dependency.
229fn chrono_now_iso() -> String {
230    let now = std::time::SystemTime::now();
231    let secs = now
232        .duration_since(std::time::UNIX_EPOCH)
233        .unwrap_or_default()
234        .as_secs();
235    let days = secs / 86400;
236    let time_secs = secs % 86400;
237    let hours = time_secs / 3600;
238    let minutes = (time_secs % 3600) / 60;
239    let seconds = time_secs % 60;
240    let mut y = 1970i64;
241    let mut remaining = days as i64;
242    loop {
243        let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
244            366
245        } else {
246            365
247        };
248        if remaining < days_in_year {
249            break;
250        }
251        remaining -= days_in_year;
252        y += 1;
253    }
254    let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
255    let month_days: [i64; 12] = [
256        31,
257        if leap { 29 } else { 28 },
258        31,
259        30,
260        31,
261        30,
262        31,
263        31,
264        30,
265        31,
266        30,
267        31,
268    ];
269    let mut m = 0usize;
270    for days in &month_days {
271        if remaining < *days {
272            break;
273        }
274        remaining -= *days;
275        m += 1;
276    }
277    format!(
278        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
279        y,
280        m + 1,
281        remaining + 1,
282        hours,
283        minutes,
284        seconds
285    )
286}
287
288fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
289    for (ns, fields) in &source.namespaces {
290        let target_ns = target.namespaces.entry(ns.clone()).or_default();
291        for (k, v) in fields {
292            target_ns.insert(k.clone(), v.clone());
293        }
294    }
295}
296
297fn parse_namespace_fields(val: &serde_json::Value) -> BTreeMap<FieldKey, serde_json::Value> {
298    let mut fields = BTreeMap::new();
299    let Some(obj) = val.as_object() else {
300        return fields;
301    };
302    for (key, value) in obj {
303        fields.insert(key.clone(), value.clone());
304    }
305    fields
306}
307
308fn serialize_namespace_fields(fields: &BTreeMap<FieldKey, serde_json::Value>) -> serde_json::Value {
309    let mut fields_obj = serde_json::Map::new();
310    for (k, v) in fields {
311        fields_obj.insert(k.clone(), v.clone());
312    }
313    serde_json::Value::Object(fields_obj)
314}
315
316fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
317    let mut meta = DirectoryMetadata::default();
318    let obj = match val.as_object() {
319        Some(o) => o,
320        None => return meta,
321    };
322    if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
323        for (ns_name, fields_val) in ns_obj {
324            if let Some(fields) = fields_val.as_object() {
325                let mut field_map = BTreeMap::new();
326                for (k, v) in fields {
327                    field_map.insert(k.clone(), v.clone());
328                }
329                meta.namespaces.insert(ns_name.clone(), field_map);
330            }
331        }
332    }
333    meta
334}
335
336fn parse_legacy_entries(contents: &str) -> BTreeMap<String, DirectoryMetadata> {
337    let mut entries = BTreeMap::new();
338    let parsed: serde_json::Value = match serde_json::from_str(contents) {
339        Ok(v) => v,
340        Err(_) => return entries,
341    };
342    let Some(shard_entries) = parsed.get("entries").and_then(|e| e.as_object()) else {
343        return entries;
344    };
345    for (dir, meta_val) in shard_entries {
346        entries.insert(dir.clone(), parse_directory_metadata(meta_val));
347    }
348    entries
349}
350
351fn merge_namespace_entries(entries: &mut BTreeMap<String, DirectoryMetadata>, contents: &str) {
352    let parsed: serde_json::Value = match serde_json::from_str(contents) {
353        Ok(v) => v,
354        Err(_) => return,
355    };
356    let Some(namespace) = parsed.get("namespace").and_then(|value| value.as_str()) else {
357        return;
358    };
359    let Some(shard_entries) = parsed.get("entries").and_then(|value| value.as_object()) else {
360        return;
361    };
362    for (dir, fields_val) in shard_entries {
363        let directory = entries.entry(dir.clone()).or_default();
364        directory
365            .namespaces
366            .insert(namespace.to_string(), parse_namespace_fields(fields_val));
367    }
368}
369
370fn namespace_path_component(namespace: &str) -> String {
371    let mut result = String::new();
372    for ch in namespace.chars() {
373        match ch {
374            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => result.push(ch),
375            _ => result.push_str(&format!("_{:02X}", ch as u32)),
376        }
377    }
378    if result.is_empty() || result == "." || result == ".." {
379        "_".to_string()
380    } else {
381        result
382    }
383}
384
385fn vm_to_json(val: &VmValue) -> serde_json::Value {
386    match val {
387        VmValue::String(s) => serde_json::Value::String(s.to_string()),
388        VmValue::Int(n) => serde_json::json!(*n),
389        VmValue::Float(n) => serde_json::json!(*n),
390        VmValue::Bool(b) => serde_json::Value::Bool(*b),
391        VmValue::Nil => serde_json::Value::Null,
392        VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
393        VmValue::Dict(map) => {
394            let obj: serde_json::Map<String, serde_json::Value> = map
395                .iter()
396                .map(|(k, v)| (k.clone(), vm_to_json(v)))
397                .collect();
398            serde_json::Value::Object(obj)
399        }
400        _ => serde_json::Value::Null,
401    }
402}
403
404fn json_to_vm(jv: &serde_json::Value) -> VmValue {
405    match jv {
406        serde_json::Value::Null => VmValue::Nil,
407        serde_json::Value::Bool(b) => VmValue::Bool(*b),
408        serde_json::Value::Number(n) => {
409            if let Some(i) = n.as_i64() {
410                VmValue::Int(i)
411            } else {
412                VmValue::Float(n.as_f64().unwrap_or(0.0))
413            }
414        }
415        serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
416        serde_json::Value::Array(arr) => {
417            VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
418        }
419        serde_json::Value::Object(map) => {
420            let mut m = BTreeMap::new();
421            for (k, v) in map {
422                m.insert(k.clone(), json_to_vm(v));
423            }
424            VmValue::Dict(Rc::new(m))
425        }
426    }
427}
428
429fn namespace_fields_to_vm(fields: &BTreeMap<FieldKey, serde_json::Value>) -> VmValue {
430    let mut map = BTreeMap::new();
431    for (k, v) in fields {
432        map.insert(k.clone(), json_to_vm(v));
433    }
434    VmValue::Dict(Rc::new(map))
435}
436
437fn directory_metadata_to_vm(meta: &DirectoryMetadata) -> VmValue {
438    let mut namespaces = BTreeMap::new();
439    for (ns, fields) in &meta.namespaces {
440        namespaces.insert(ns.clone(), namespace_fields_to_vm(fields));
441    }
442    VmValue::Dict(Rc::new(namespaces))
443}
444
445fn normalize_directory_key(dir: &str) -> String {
446    if dir.trim().is_empty() || dir == "." {
447        ".".to_string()
448    } else {
449        dir.to_string()
450    }
451}
452
453#[derive(Clone)]
454struct ScanOptions {
455    pattern: Option<String>,
456    max_depth: usize,
457    include_hidden: bool,
458    include_dirs: bool,
459    include_files: bool,
460}
461
462impl Default for ScanOptions {
463    fn default() -> Self {
464        Self {
465            pattern: None,
466            max_depth: 5,
467            include_hidden: false,
468            include_dirs: true,
469            include_files: true,
470        }
471    }
472}
473
474fn bool_arg(map: &BTreeMap<String, VmValue>, key: &str, default: bool) -> bool {
475    match map.get(key) {
476        Some(VmValue::Bool(value)) => *value,
477        _ => default,
478    }
479}
480
481fn usize_arg(map: &BTreeMap<String, VmValue>, key: &str, default: usize) -> usize {
482    match map.get(key) {
483        Some(VmValue::Int(value)) if *value >= 0 => *value as usize,
484        _ => default,
485    }
486}
487
488fn parse_scan_options(
489    pattern_or_options: Option<&VmValue>,
490    explicit_options: Option<&VmValue>,
491) -> ScanOptions {
492    let mut options = ScanOptions::default();
493    if let Some(VmValue::String(pattern)) = pattern_or_options {
494        options.pattern = Some(pattern.to_string());
495    } else if let Some(VmValue::Dict(dict)) = pattern_or_options {
496        apply_scan_options_dict(&mut options, dict);
497    }
498    if let Some(VmValue::Dict(dict)) = explicit_options {
499        apply_scan_options_dict(&mut options, dict);
500    }
501    options
502}
503
504fn apply_scan_options_dict(options: &mut ScanOptions, dict: &BTreeMap<String, VmValue>) {
505    if let Some(pattern) = dict.get("pattern").map(|value| value.display()) {
506        if !pattern.is_empty() {
507            options.pattern = Some(pattern);
508        }
509    }
510    options.max_depth = usize_arg(dict, "max_depth", options.max_depth);
511    options.include_hidden = bool_arg(dict, "include_hidden", options.include_hidden);
512    options.include_dirs = bool_arg(dict, "include_dirs", options.include_dirs);
513    options.include_files = bool_arg(dict, "include_files", options.include_files);
514}
515
516fn resolve_scan_root(rel_dir: &str) -> PathBuf {
517    let candidate = PathBuf::from(rel_dir);
518    if candidate.is_absolute() {
519        return candidate;
520    }
521    crate::stdlib::process::resolve_source_relative_path(rel_dir)
522}
523
524/// Register metadata builtins on a VM.
525///
526/// In standalone mode, these operate directly on the resolved Harn metadata
527/// state root.
528/// In bridge mode, these are registered **before** bridge builtins so the
529/// host can override them if needed (but typically the VM handles this natively).
530pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
531    let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
532
533    // metadata_get(dir, namespace?) -> dict | nil
534    let s = Rc::clone(&state);
535    vm.register_builtin("metadata_get", move |args, _out| {
536        let dir = args.first().map(|a| a.display()).unwrap_or_default();
537        let namespace = args.get(1).and_then(|a| {
538            if matches!(a, VmValue::Nil) {
539                None
540            } else {
541                Some(a.display())
542            }
543        });
544
545        let mut st = s.borrow_mut();
546        if let Some(ns) = namespace {
547            match st.get_namespace(&dir, &ns) {
548                Some(fields) => {
549                    let mut m = BTreeMap::new();
550                    for (k, v) in fields {
551                        m.insert(k, json_to_vm(&v));
552                    }
553                    Ok(VmValue::Dict(Rc::new(m)))
554                }
555                None => Ok(VmValue::Nil),
556            }
557        } else {
558            // Return all namespaces flattened.
559            let resolved = st.resolve(&dir);
560            let mut m = BTreeMap::new();
561            for fields in resolved.namespaces.values() {
562                for (k, v) in fields {
563                    m.insert(k.clone(), json_to_vm(v));
564                }
565            }
566            if m.is_empty() {
567                Ok(VmValue::Nil)
568            } else {
569                Ok(VmValue::Dict(Rc::new(m)))
570            }
571        }
572    });
573
574    // metadata_resolve(dir, namespace?) -> dict | nil
575    let s = Rc::clone(&state);
576    vm.register_builtin("metadata_resolve", move |args, _out| {
577        let dir = args.first().map(|a| a.display()).unwrap_or_default();
578        let namespace = args.get(1).and_then(|a| {
579            if matches!(a, VmValue::Nil) {
580                None
581            } else {
582                Some(a.display())
583            }
584        });
585        let mut st = s.borrow_mut();
586        let resolved = st.resolve(&dir);
587        if let Some(ns) = namespace {
588            match resolved.namespaces.get(&ns) {
589                Some(fields) => Ok(namespace_fields_to_vm(fields)),
590                None => Ok(VmValue::Nil),
591            }
592        } else if resolved.namespaces.is_empty() {
593            Ok(VmValue::Nil)
594        } else {
595            Ok(directory_metadata_to_vm(&resolved))
596        }
597    });
598
599    // metadata_entries(namespace?) -> list
600    let s = Rc::clone(&state);
601    vm.register_builtin("metadata_entries", move |args, _out| {
602        let namespace = args.first().and_then(|a| {
603            if matches!(a, VmValue::Nil) {
604                None
605            } else {
606                Some(a.display())
607            }
608        });
609        let mut st = s.borrow_mut();
610        st.ensure_loaded();
611        let directories: Vec<String> = st.entries.keys().cloned().collect();
612        let mut items = Vec::new();
613        for dir in directories {
614            let local = st.local_directory(&dir);
615            let resolved = st.resolve(&dir);
616            let mut item = BTreeMap::new();
617            item.insert(
618                "dir".to_string(),
619                VmValue::String(Rc::from(normalize_directory_key(&dir))),
620            );
621            match &namespace {
622                Some(ns) => {
623                    item.insert(
624                        "local".to_string(),
625                        local
626                            .namespaces
627                            .get(ns)
628                            .map(namespace_fields_to_vm)
629                            .unwrap_or(VmValue::Nil),
630                    );
631                    item.insert(
632                        "resolved".to_string(),
633                        resolved
634                            .namespaces
635                            .get(ns)
636                            .map(namespace_fields_to_vm)
637                            .unwrap_or(VmValue::Nil),
638                    );
639                }
640                None => {
641                    item.insert("local".to_string(), directory_metadata_to_vm(&local));
642                    item.insert("resolved".to_string(), directory_metadata_to_vm(&resolved));
643                }
644            }
645            items.push(VmValue::Dict(Rc::new(item)));
646        }
647        Ok(VmValue::List(Rc::new(items)))
648    });
649
650    // metadata_set(dir, namespace, data_dict)
651    let s = Rc::clone(&state);
652    vm.register_builtin("metadata_set", move |args, _out| {
653        let dir = args.first().map(|a| a.display()).unwrap_or_default();
654        let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
655        let data_val = args.get(2).unwrap_or(&VmValue::Nil);
656
657        let mut data = BTreeMap::new();
658        if let VmValue::Dict(dict) = data_val {
659            for (k, v) in dict.iter() {
660                data.insert(k.clone(), vm_to_json(v));
661            }
662        }
663
664        if !data.is_empty() {
665            s.borrow_mut().set_namespace(&dir, &namespace, data);
666        }
667        Ok(VmValue::Nil)
668    });
669
670    // metadata_save()
671    let s = Rc::clone(&state);
672    vm.register_builtin("metadata_save", move |_args, _out| {
673        s.borrow_mut().save().map_err(VmError::Runtime)?;
674        Ok(VmValue::Nil)
675    });
676
677    // metadata_stale(project) -> {any_stale: bool, tier1: [dirs], tier2: [dirs]}
678    // Compare stored structureHash/contentHash against current filesystem state.
679    let s = Rc::clone(&state);
680    let base2 = base_dir.to_path_buf();
681    vm.register_builtin("metadata_stale", move |_args, _out| {
682        s.borrow_mut().ensure_loaded();
683        let state = s.borrow();
684        let mut tier1_stale: Vec<VmValue> = Vec::new();
685        let mut tier2_stale: Vec<VmValue> = Vec::new();
686
687        for (dir, meta) in &state.entries {
688            let full_dir = if dir.is_empty() {
689                base2.clone()
690            } else {
691                base2.join(dir)
692            };
693            // Tier 1: structureHash — file list + sizes.
694            if let Some(stored_hash) = meta
695                .namespaces
696                .get("classification")
697                .and_then(|ns| ns.get("structureHash"))
698                .and_then(|v| v.as_str())
699            {
700                let current_hash = compute_structure_hash(&full_dir);
701                if current_hash != stored_hash {
702                    tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
703                    // Structure changed — skip the tier 2 content check.
704                    continue;
705                }
706            }
707            // Tier 2: contentHash — file content digest.
708            if let Some(stored_hash) = meta
709                .namespaces
710                .get("classification")
711                .and_then(|ns| ns.get("contentHash"))
712                .and_then(|v| v.as_str())
713            {
714                let current_hash = compute_content_hash_for_dir(&full_dir);
715                if current_hash != stored_hash {
716                    tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
717                }
718            }
719        }
720
721        let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
722        let mut m = BTreeMap::new();
723        m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
724        m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
725        m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
726        Ok(VmValue::Dict(Rc::new(m)))
727    });
728
729    // metadata_refresh_hashes(project) -> nil
730    // Recompute and store structureHash for all directories.
731    let s = Rc::clone(&state);
732    let base3 = base_dir.to_path_buf();
733    vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
734        let mut state = s.borrow_mut();
735        state.ensure_loaded();
736        let dirs: Vec<String> = state.entries.keys().cloned().collect();
737        for dir in dirs {
738            let full_dir = if dir.is_empty() {
739                base3.clone()
740            } else {
741                base3.join(&dir)
742            };
743            let hash = compute_structure_hash(&full_dir);
744            let entry = state.entries.entry(dir).or_default();
745            let ns = entry
746                .namespaces
747                .entry("classification".to_string())
748                .or_default();
749            ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
750        }
751        state.dirty = true;
752        Ok(VmValue::Nil)
753    });
754
755    // metadata_status(namespace?) -> dict
756    let s = Rc::clone(&state);
757    let base4 = base_dir.to_path_buf();
758    vm.register_builtin("metadata_status", move |args, _out| {
759        let namespace = args.first().and_then(|a| {
760            if matches!(a, VmValue::Nil) {
761                None
762            } else {
763                Some(a.display())
764            }
765        });
766        s.borrow_mut().ensure_loaded();
767        let state = s.borrow();
768        let mut namespaces = BTreeMap::new();
769        let mut directories = Vec::new();
770        let mut missing_structure_hash = Vec::new();
771        let mut missing_content_hash = Vec::new();
772        for (dir, meta) in &state.entries {
773            directories.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
774            for ns in meta.namespaces.keys() {
775                namespaces.insert(ns.clone(), VmValue::Bool(true));
776            }
777            let full_dir = if dir.is_empty() {
778                base4.clone()
779            } else {
780                base4.join(dir)
781            };
782            let relevant = namespace
783                .as_ref()
784                .and_then(|name| meta.namespaces.get(name))
785                .or_else(|| meta.namespaces.get("classification"));
786            if let Some(fields) = relevant {
787                if !fields.contains_key("structureHash") && full_dir.exists() {
788                    missing_structure_hash
789                        .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
790                }
791                if !fields.contains_key("contentHash") && full_dir.exists() {
792                    missing_content_hash
793                        .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
794                }
795            }
796        }
797        let stale = metadata_stale_value(&state, &base4);
798        let mut result = BTreeMap::new();
799        result.insert(
800            "directory_count".to_string(),
801            VmValue::Int(state.entries.len() as i64),
802        );
803        result.insert(
804            "namespace_count".to_string(),
805            VmValue::Int(namespaces.len() as i64),
806        );
807        result.insert(
808            "namespaces".to_string(),
809            VmValue::List(Rc::new(
810                namespaces
811                    .keys()
812                    .cloned()
813                    .map(|name| VmValue::String(Rc::from(name)))
814                    .collect(),
815            )),
816        );
817        result.insert(
818            "directories".to_string(),
819            VmValue::List(Rc::new(directories)),
820        );
821        result.insert(
822            "missing_structure_hash".to_string(),
823            VmValue::List(Rc::new(missing_structure_hash)),
824        );
825        result.insert(
826            "missing_content_hash".to_string(),
827            VmValue::List(Rc::new(missing_content_hash)),
828        );
829        result.insert("stale".to_string(), stale);
830        Ok(VmValue::Dict(Rc::new(result)))
831    });
832
833    // compute_content_hash(dir) -> string of file list + sizes + mtimes for staleness tracking.
834    let base = base_dir.to_path_buf();
835    vm.register_builtin("compute_content_hash", move |args, _out| {
836        let dir = args.first().map(|a| a.display()).unwrap_or_default();
837        let full_dir = if dir.is_empty() {
838            base.clone()
839        } else {
840            base.join(&dir)
841        };
842        let hash = compute_content_hash_for_dir(&full_dir);
843        Ok(VmValue::String(Rc::from(hash)))
844    });
845
846    // invalidate_facts is a no-op: facts live in the metadata namespace.
847    vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
848
849    register_scan_builtins(vm);
850}
851
852/// Compute structure hash for a directory (file names + sizes).
853fn compute_structure_hash(dir: &Path) -> String {
854    let mut entries: Vec<String> = Vec::new();
855    if let Ok(rd) = std::fs::read_dir(dir) {
856        for entry in rd.flatten() {
857            if let Ok(meta) = entry.metadata() {
858                let name = entry.file_name().to_string_lossy().into_owned();
859                entries.push(format!("{}:{}", name, meta.len()));
860            }
861        }
862    }
863    entries.sort();
864    let joined = entries.join("|");
865    format!("{:x}", fnv_hash(joined.as_bytes()))
866}
867
868/// Compute content hash for a directory (file names + sizes + mtimes).
869fn compute_content_hash_for_dir(dir: &Path) -> String {
870    let mut entries: Vec<String> = Vec::new();
871    if let Ok(rd) = std::fs::read_dir(dir) {
872        for entry in rd.flatten() {
873            if let Ok(meta) = entry.metadata() {
874                let name = entry.file_name().to_string_lossy().into_owned();
875                let mtime = meta
876                    .modified()
877                    .ok()
878                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
879                    .map(|d| d.as_secs())
880                    .unwrap_or(0);
881                entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
882            }
883        }
884    }
885    entries.sort();
886    let joined = entries.join("|");
887    format!("{:x}", fnv_hash(joined.as_bytes()))
888}
889
890/// FNV-1a hash (not crypto-grade, just for staleness detection).
891fn fnv_hash(data: &[u8]) -> u64 {
892    let mut hash: u64 = 0xcbf29ce484222325;
893    for &byte in data {
894        hash ^= byte as u64;
895        hash = hash.wrapping_mul(0x100000001b3);
896    }
897    hash
898}
899
900/// Register scan_directory builtin: native Rust file enumeration.
901pub fn register_scan_builtins(vm: &mut Vm) {
902    // scan_directory(path?, pattern?) -> [{path, size, modified, is_dir}, ...]
903    vm.register_builtin("scan_directory", move |args, _out| {
904        let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
905        let options = parse_scan_options(args.get(1), args.get(2));
906        let scan_base = resolve_scan_root(".");
907        let full_dir = if rel_dir.is_empty() {
908            scan_base.clone()
909        } else {
910            scan_base.join(&rel_dir)
911        };
912        let mut results: Vec<VmValue> = Vec::new();
913        scan_dir_recursive(&full_dir, &scan_base, &options, &mut results, 0);
914        Ok(VmValue::List(Rc::new(results)))
915    });
916}
917
918fn metadata_stale_value(state: &MetadataState, base_dir: &Path) -> VmValue {
919    let mut tier1_stale: Vec<VmValue> = Vec::new();
920    let mut tier2_stale: Vec<VmValue> = Vec::new();
921    for (dir, meta) in &state.entries {
922        let full_dir = if dir.is_empty() {
923            base_dir.to_path_buf()
924        } else {
925            base_dir.join(dir)
926        };
927        if let Some(stored_hash) = meta
928            .namespaces
929            .get("classification")
930            .and_then(|ns| ns.get("structureHash"))
931            .and_then(|v| v.as_str())
932        {
933            let current_hash = compute_structure_hash(&full_dir);
934            if current_hash != stored_hash {
935                tier1_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
936                continue;
937            }
938        }
939        if let Some(stored_hash) = meta
940            .namespaces
941            .get("classification")
942            .and_then(|ns| ns.get("contentHash"))
943            .and_then(|v| v.as_str())
944        {
945            let current_hash = compute_content_hash_for_dir(&full_dir);
946            if current_hash != stored_hash {
947                tier2_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
948            }
949        }
950    }
951    let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
952    let mut m = BTreeMap::new();
953    m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
954    m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
955    m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
956    VmValue::Dict(Rc::new(m))
957}
958
959fn scan_dir_recursive(
960    dir: &Path,
961    base: &Path,
962    options: &ScanOptions,
963    results: &mut Vec<VmValue>,
964    depth: usize,
965) {
966    if depth > options.max_depth {
967        return;
968    }
969    let rd = match std::fs::read_dir(dir) {
970        Ok(rd) => rd,
971        Err(_) => return,
972    };
973    for entry in rd.flatten() {
974        let meta = match entry.metadata() {
975            Ok(m) => m,
976            Err(_) => continue,
977        };
978        let name = entry.file_name().to_string_lossy().into_owned();
979        if !options.include_hidden && name.starts_with('.') {
980            continue;
981        }
982        let rel_path = entry
983            .path()
984            .strip_prefix(base)
985            .unwrap_or(entry.path().as_path())
986            .to_string_lossy()
987            .into_owned();
988        if let Some(pat) = &options.pattern {
989            if !glob_match(pat, &rel_path) {
990                if meta.is_dir() {
991                    scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
992                }
993                continue;
994            }
995        }
996        let mtime = meta
997            .modified()
998            .ok()
999            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1000            .map(|d| d.as_secs() as i64)
1001            .unwrap_or(0);
1002        let mut m = BTreeMap::new();
1003        m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
1004        m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
1005        m.insert("modified".to_string(), VmValue::Int(mtime));
1006        m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
1007        if (meta.is_dir() && options.include_dirs) || (!meta.is_dir() && options.include_files) {
1008            results.push(VmValue::Dict(Rc::new(m)));
1009        }
1010        if meta.is_dir() {
1011            scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
1012        }
1013    }
1014}
1015
1016/// Simple glob matching (supports * and ** patterns).
1017fn glob_match(pattern: &str, path: &str) -> bool {
1018    if pattern.contains("**") {
1019        let parts: Vec<&str> = pattern.split("**").collect();
1020        if parts.len() == 2 {
1021            let prefix = parts[0].trim_end_matches('/');
1022            let suffix = parts[1].trim_start_matches('/');
1023            let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
1024            let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
1025            return prefix_ok && suffix_ok;
1026        }
1027    }
1028    if pattern.contains('*') {
1029        let parts: Vec<&str> = pattern.split('*').collect();
1030        if parts.len() == 2 {
1031            return path.starts_with(parts[0]) && path.ends_with(parts[1]);
1032        }
1033    }
1034    path.contains(pattern)
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039    use super::*;
1040
1041    fn temp_path(name: &str) -> PathBuf {
1042        let unique = std::time::SystemTime::now()
1043            .duration_since(std::time::UNIX_EPOCH)
1044            .unwrap_or_default()
1045            .as_nanos();
1046        std::env::temp_dir().join(format!("harn-metadata-{name}-{unique}"))
1047    }
1048
1049    #[test]
1050    fn metadata_resolve_preserves_namespace_structure() {
1051        let base = temp_path("resolve");
1052        let mut state = MetadataState::new(&base);
1053        state.set_namespace(
1054            "",
1055            "classification",
1056            BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
1057        );
1058        state.set_namespace(
1059            "src",
1060            "classification",
1061            BTreeMap::from([("owner".into(), serde_json::json!("vm"))]),
1062        );
1063
1064        let resolved = state.resolve("src");
1065        let classification = resolved.namespaces.get("classification").unwrap();
1066        assert_eq!(
1067            classification.get("language"),
1068            Some(&serde_json::json!("rust"))
1069        );
1070        assert_eq!(classification.get("owner"), Some(&serde_json::json!("vm")));
1071    }
1072
1073    #[test]
1074    fn metadata_save_writes_namespace_shards() {
1075        let base = temp_path("save");
1076        let mut state = MetadataState::new(&base);
1077        state.set_namespace(
1078            ".",
1079            "classification",
1080            BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
1081        );
1082        state.set_namespace(
1083            "src",
1084            "coding-enrichment-v1",
1085            BTreeMap::from([("_deep_scan".into(), serde_json::json!({"version": 1}))]),
1086        );
1087        state.save().expect("save");
1088
1089        let metadata_root = crate::runtime_paths::metadata_dir(&base);
1090        let classification = std::fs::read_to_string(
1091            metadata_root
1092                .join("classification")
1093                .join(NAMESPACE_ENTRIES_FILE),
1094        )
1095        .expect("classification shard");
1096        let parsed = serde_json::from_str::<serde_json::Value>(&classification).expect("json");
1097        assert_eq!(
1098            parsed.get("namespace").and_then(|value| value.as_str()),
1099            Some("classification")
1100        );
1101        assert!(parsed
1102            .get("entries")
1103            .and_then(|value| value.get("."))
1104            .is_some());
1105
1106        let enrichment = std::fs::read_to_string(
1107            metadata_root
1108                .join("coding-enrichment-v1")
1109                .join(NAMESPACE_ENTRIES_FILE),
1110        )
1111        .expect("enrichment shard");
1112        let parsed = serde_json::from_str::<serde_json::Value>(&enrichment).expect("json");
1113        assert!(parsed
1114            .get("entries")
1115            .and_then(|value| value.get("src"))
1116            .is_some());
1117    }
1118
1119    #[test]
1120    fn metadata_load_merges_legacy_and_namespace_shards() {
1121        let base = temp_path("load");
1122        let metadata_root = crate::runtime_paths::metadata_dir(&base);
1123        std::fs::create_dir_all(metadata_root.join("facts")).unwrap();
1124        std::fs::write(
1125            metadata_root.join(LEGACY_SHARD_NAME),
1126            serde_json::json!({
1127                "version": 2,
1128                "entries": {
1129                    ".": {
1130                        "namespaces": {
1131                            "classification": {
1132                                "language": "rust"
1133                            }
1134                        }
1135                    }
1136                }
1137            })
1138            .to_string(),
1139        )
1140        .unwrap();
1141        std::fs::write(
1142            metadata_root.join("facts").join(NAMESPACE_ENTRIES_FILE),
1143            serde_json::json!({
1144                "version": 1,
1145                "namespace": "facts",
1146                "entries": {
1147                    "src": {
1148                        "kind": "module"
1149                    }
1150                }
1151            })
1152            .to_string(),
1153        )
1154        .unwrap();
1155
1156        let mut state = MetadataState::new(&base);
1157        state.ensure_loaded();
1158        assert_eq!(
1159            state
1160                .entries
1161                .get(".")
1162                .and_then(|meta| meta.namespaces.get("classification"))
1163                .and_then(|fields| fields.get("language")),
1164            Some(&serde_json::json!("rust"))
1165        );
1166        assert_eq!(
1167            state
1168                .entries
1169                .get("src")
1170                .and_then(|meta| meta.namespaces.get("facts"))
1171                .and_then(|fields| fields.get("kind")),
1172            Some(&serde_json::json!("module"))
1173        );
1174    }
1175
1176    #[test]
1177    fn scan_options_filter_hidden_and_depth() {
1178        let base = temp_path("scan");
1179        std::fs::create_dir_all(base.join("project/deep")).unwrap();
1180        std::fs::write(base.join("project/root.txt"), "root").unwrap();
1181        std::fs::write(base.join("project/.hidden.txt"), "hidden").unwrap();
1182        std::fs::write(base.join("project/deep/nested.txt"), "nested").unwrap();
1183
1184        let options = ScanOptions {
1185            pattern: Some(".txt".into()),
1186            max_depth: 0,
1187            include_hidden: false,
1188            include_dirs: false,
1189            include_files: true,
1190        };
1191        let mut results = Vec::new();
1192        scan_dir_recursive(&base.join("project"), &base, &options, &mut results, 0);
1193        let paths: Vec<String> = results
1194            .into_iter()
1195            .map(|value| match value {
1196                VmValue::Dict(dict) => dict.get("path").unwrap().display(),
1197                _ => String::new(),
1198            })
1199            .collect();
1200        assert_eq!(paths, vec!["project/root.txt".to_string()]);
1201        let _ = std::fs::remove_dir_all(base);
1202    }
1203}