1use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20const LEGACY_SHARD_NAME: &str = "root.json";
21const NAMESPACE_ENTRIES_FILE: &str = "entries.json";
22
23#[derive(Clone, Default)]
25struct DirectoryMetadata {
26 namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
27}
28
29trait MetadataBackend {
30 fn backend_name(&self) -> &'static str;
31 fn load(&self, root: &Path) -> Result<BTreeMap<String, DirectoryMetadata>, String>;
32 fn save(
33 &self,
34 root: &Path,
35 entries: &BTreeMap<String, DirectoryMetadata>,
36 ) -> Result<(), String>;
37}
38
39#[derive(Default)]
40struct FilesystemMetadataBackend;
41
42impl FilesystemMetadataBackend {
43 fn new() -> Self {
44 Self
45 }
46}
47
48struct MetadataState {
50 entries: BTreeMap<String, DirectoryMetadata>,
51 base_dir: PathBuf,
52 backend: Box<dyn MetadataBackend>,
53 loaded: bool,
54 dirty: bool,
55}
56
57impl MetadataState {
58 fn new(base_dir: &Path) -> Self {
59 Self {
60 entries: BTreeMap::new(),
61 base_dir: base_dir.to_path_buf(),
62 backend: Box::new(FilesystemMetadataBackend::new()),
63 loaded: false,
64 dirty: false,
65 }
66 }
67
68 fn metadata_dir(&self) -> PathBuf {
69 crate::runtime_paths::metadata_dir(&self.base_dir)
70 }
71
72 fn ensure_loaded(&mut self) {
73 if self.loaded {
74 return;
75 }
76 self.loaded = true;
77 if let Ok(entries) = self.backend.load(&self.metadata_dir()) {
78 self.entries = entries;
79 }
80 }
81
82 fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
85 self.ensure_loaded();
86 let mut result = DirectoryMetadata::default();
87
88 if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
89 merge_metadata(&mut result, root);
90 }
91
92 let components: Vec<&str> = directory
93 .split('/')
94 .filter(|c| !c.is_empty() && *c != ".")
95 .collect();
96 let mut current = String::new();
97 for component in components {
98 if current.is_empty() {
99 current = component.to_string();
100 } else {
101 current = format!("{current}/{component}");
102 }
103 if let Some(meta) = self.entries.get(¤t) {
104 merge_metadata(&mut result, meta);
105 }
106 }
107
108 result
109 }
110
111 fn get_namespace(
113 &mut self,
114 directory: &str,
115 namespace: &str,
116 ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
117 let resolved = self.resolve(directory);
118 resolved.namespaces.get(namespace).cloned()
119 }
120
121 fn local_directory(&mut self, directory: &str) -> DirectoryMetadata {
122 self.ensure_loaded();
123 self.entries.get(directory).cloned().unwrap_or_default()
124 }
125
126 fn set_namespace(
128 &mut self,
129 directory: &str,
130 namespace: &str,
131 data: BTreeMap<FieldKey, serde_json::Value>,
132 ) {
133 self.ensure_loaded();
134 let meta = self.entries.entry(directory.to_string()).or_default();
135 let ns = meta.namespaces.entry(namespace.to_string()).or_default();
136 for (k, v) in data {
137 ns.insert(k, v);
138 }
139 self.dirty = true;
140 }
141
142 fn save(&mut self) -> Result<(), String> {
144 if !self.dirty {
145 return Ok(());
146 }
147 let meta_dir = self.metadata_dir();
148 self.backend.save(&meta_dir, &self.entries)?;
149 self.dirty = false;
150 Ok(())
151 }
152}
153
154impl MetadataBackend for FilesystemMetadataBackend {
155 fn backend_name(&self) -> &'static str {
156 "filesystem"
157 }
158
159 fn load(&self, root: &Path) -> Result<BTreeMap<String, DirectoryMetadata>, String> {
160 let mut entries = BTreeMap::new();
161 let legacy_path = root.join(LEGACY_SHARD_NAME);
162 if let Ok(contents) = std::fs::read_to_string(&legacy_path) {
163 entries = parse_legacy_entries(&contents);
164 }
165
166 let namespace_dirs = match std::fs::read_dir(root) {
167 Ok(read_dir) => read_dir,
168 Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(entries),
169 Err(error) => return Err(format!("metadata load: {error}")),
170 };
171
172 let mut dirs = namespace_dirs
173 .flatten()
174 .filter(|entry| entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false))
175 .collect::<Vec<_>>();
176 dirs.sort_by_key(|entry| entry.file_name());
177
178 for dir in dirs {
179 let shard_path = dir.path().join(NAMESPACE_ENTRIES_FILE);
180 let Ok(contents) = std::fs::read_to_string(&shard_path) else {
181 continue;
182 };
183 merge_namespace_entries(&mut entries, &contents);
184 }
185
186 Ok(entries)
187 }
188
189 fn save(
190 &self,
191 root: &Path,
192 entries: &BTreeMap<String, DirectoryMetadata>,
193 ) -> Result<(), String> {
194 std::fs::create_dir_all(root).map_err(|error| format!("metadata mkdir: {error}"))?;
195
196 let mut namespaces: BTreeMap<String, serde_json::Map<String, serde_json::Value>> =
197 BTreeMap::new();
198 for (dir, meta) in entries {
199 for (namespace, fields) in &meta.namespaces {
200 namespaces
201 .entry(namespace.clone())
202 .or_default()
203 .insert(dir.clone(), serialize_namespace_fields(fields));
204 }
205 }
206
207 for (namespace, shard_entries) in namespaces {
208 let namespace_dir = root.join(namespace_path_component(&namespace));
209 std::fs::create_dir_all(&namespace_dir)
210 .map_err(|error| format!("metadata mkdir: {error}"))?;
211 let shard = serde_json::json!({
212 "version": 1,
213 "namespace": namespace,
214 "backend": self.backend_name(),
215 "generatedAt": chrono_now_iso(),
216 "entries": serde_json::Value::Object(shard_entries),
217 });
218 let json = serde_json::to_string_pretty(&shard)
219 .map_err(|error| format!("metadata json: {error}"))?;
220 std::fs::write(namespace_dir.join(NAMESPACE_ENTRIES_FILE), json)
221 .map_err(|error| format!("metadata write: {error}"))?;
222 }
223
224 Ok(())
225 }
226}
227
228fn chrono_now_iso() -> String {
230 let now = std::time::SystemTime::now();
231 let secs = now
232 .duration_since(std::time::UNIX_EPOCH)
233 .unwrap_or_default()
234 .as_secs();
235 let days = secs / 86400;
236 let time_secs = secs % 86400;
237 let hours = time_secs / 3600;
238 let minutes = (time_secs % 3600) / 60;
239 let seconds = time_secs % 60;
240 let mut y = 1970i64;
241 let mut remaining = days as i64;
242 loop {
243 let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
244 366
245 } else {
246 365
247 };
248 if remaining < days_in_year {
249 break;
250 }
251 remaining -= days_in_year;
252 y += 1;
253 }
254 let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
255 let month_days: [i64; 12] = [
256 31,
257 if leap { 29 } else { 28 },
258 31,
259 30,
260 31,
261 30,
262 31,
263 31,
264 30,
265 31,
266 30,
267 31,
268 ];
269 let mut m = 0usize;
270 for days in &month_days {
271 if remaining < *days {
272 break;
273 }
274 remaining -= *days;
275 m += 1;
276 }
277 format!(
278 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
279 y,
280 m + 1,
281 remaining + 1,
282 hours,
283 minutes,
284 seconds
285 )
286}
287
288fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
289 for (ns, fields) in &source.namespaces {
290 let target_ns = target.namespaces.entry(ns.clone()).or_default();
291 for (k, v) in fields {
292 target_ns.insert(k.clone(), v.clone());
293 }
294 }
295}
296
297fn parse_namespace_fields(val: &serde_json::Value) -> BTreeMap<FieldKey, serde_json::Value> {
298 let mut fields = BTreeMap::new();
299 let Some(obj) = val.as_object() else {
300 return fields;
301 };
302 for (key, value) in obj {
303 fields.insert(key.clone(), value.clone());
304 }
305 fields
306}
307
308fn serialize_namespace_fields(fields: &BTreeMap<FieldKey, serde_json::Value>) -> serde_json::Value {
309 let mut fields_obj = serde_json::Map::new();
310 for (k, v) in fields {
311 fields_obj.insert(k.clone(), v.clone());
312 }
313 serde_json::Value::Object(fields_obj)
314}
315
316fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
317 let mut meta = DirectoryMetadata::default();
318 let obj = match val.as_object() {
319 Some(o) => o,
320 None => return meta,
321 };
322 if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
323 for (ns_name, fields_val) in ns_obj {
324 if let Some(fields) = fields_val.as_object() {
325 let mut field_map = BTreeMap::new();
326 for (k, v) in fields {
327 field_map.insert(k.clone(), v.clone());
328 }
329 meta.namespaces.insert(ns_name.clone(), field_map);
330 }
331 }
332 }
333 meta
334}
335
336fn parse_legacy_entries(contents: &str) -> BTreeMap<String, DirectoryMetadata> {
337 let mut entries = BTreeMap::new();
338 let parsed: serde_json::Value = match serde_json::from_str(contents) {
339 Ok(v) => v,
340 Err(_) => return entries,
341 };
342 let Some(shard_entries) = parsed.get("entries").and_then(|e| e.as_object()) else {
343 return entries;
344 };
345 for (dir, meta_val) in shard_entries {
346 entries.insert(dir.clone(), parse_directory_metadata(meta_val));
347 }
348 entries
349}
350
351fn merge_namespace_entries(entries: &mut BTreeMap<String, DirectoryMetadata>, contents: &str) {
352 let parsed: serde_json::Value = match serde_json::from_str(contents) {
353 Ok(v) => v,
354 Err(_) => return,
355 };
356 let Some(namespace) = parsed.get("namespace").and_then(|value| value.as_str()) else {
357 return;
358 };
359 let Some(shard_entries) = parsed.get("entries").and_then(|value| value.as_object()) else {
360 return;
361 };
362 for (dir, fields_val) in shard_entries {
363 let directory = entries.entry(dir.clone()).or_default();
364 directory
365 .namespaces
366 .insert(namespace.to_string(), parse_namespace_fields(fields_val));
367 }
368}
369
370fn namespace_path_component(namespace: &str) -> String {
371 let mut result = String::new();
372 for ch in namespace.chars() {
373 match ch {
374 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => result.push(ch),
375 _ => result.push_str(&format!("_{:02X}", ch as u32)),
376 }
377 }
378 if result.is_empty() || result == "." || result == ".." {
379 "_".to_string()
380 } else {
381 result
382 }
383}
384
385fn vm_to_json(val: &VmValue) -> serde_json::Value {
386 match val {
387 VmValue::String(s) => serde_json::Value::String(s.to_string()),
388 VmValue::Int(n) => serde_json::json!(*n),
389 VmValue::Float(n) => serde_json::json!(*n),
390 VmValue::Bool(b) => serde_json::Value::Bool(*b),
391 VmValue::Nil => serde_json::Value::Null,
392 VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
393 VmValue::Dict(map) => {
394 let obj: serde_json::Map<String, serde_json::Value> = map
395 .iter()
396 .map(|(k, v)| (k.clone(), vm_to_json(v)))
397 .collect();
398 serde_json::Value::Object(obj)
399 }
400 _ => serde_json::Value::Null,
401 }
402}
403
404fn json_to_vm(jv: &serde_json::Value) -> VmValue {
405 match jv {
406 serde_json::Value::Null => VmValue::Nil,
407 serde_json::Value::Bool(b) => VmValue::Bool(*b),
408 serde_json::Value::Number(n) => {
409 if let Some(i) = n.as_i64() {
410 VmValue::Int(i)
411 } else {
412 VmValue::Float(n.as_f64().unwrap_or(0.0))
413 }
414 }
415 serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
416 serde_json::Value::Array(arr) => {
417 VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
418 }
419 serde_json::Value::Object(map) => {
420 let mut m = BTreeMap::new();
421 for (k, v) in map {
422 m.insert(k.clone(), json_to_vm(v));
423 }
424 VmValue::Dict(Rc::new(m))
425 }
426 }
427}
428
429fn namespace_fields_to_vm(fields: &BTreeMap<FieldKey, serde_json::Value>) -> VmValue {
430 let mut map = BTreeMap::new();
431 for (k, v) in fields {
432 map.insert(k.clone(), json_to_vm(v));
433 }
434 VmValue::Dict(Rc::new(map))
435}
436
437fn directory_metadata_to_vm(meta: &DirectoryMetadata) -> VmValue {
438 let mut namespaces = BTreeMap::new();
439 for (ns, fields) in &meta.namespaces {
440 namespaces.insert(ns.clone(), namespace_fields_to_vm(fields));
441 }
442 VmValue::Dict(Rc::new(namespaces))
443}
444
445fn normalize_directory_key(dir: &str) -> String {
446 if dir.trim().is_empty() || dir == "." {
447 ".".to_string()
448 } else {
449 dir.to_string()
450 }
451}
452
453#[derive(Clone)]
454struct ScanOptions {
455 pattern: Option<String>,
456 max_depth: usize,
457 include_hidden: bool,
458 include_dirs: bool,
459 include_files: bool,
460}
461
462impl Default for ScanOptions {
463 fn default() -> Self {
464 Self {
465 pattern: None,
466 max_depth: 5,
467 include_hidden: false,
468 include_dirs: true,
469 include_files: true,
470 }
471 }
472}
473
474fn bool_arg(map: &BTreeMap<String, VmValue>, key: &str, default: bool) -> bool {
475 match map.get(key) {
476 Some(VmValue::Bool(value)) => *value,
477 _ => default,
478 }
479}
480
481fn usize_arg(map: &BTreeMap<String, VmValue>, key: &str, default: usize) -> usize {
482 match map.get(key) {
483 Some(VmValue::Int(value)) if *value >= 0 => *value as usize,
484 _ => default,
485 }
486}
487
488fn parse_scan_options(
489 pattern_or_options: Option<&VmValue>,
490 explicit_options: Option<&VmValue>,
491) -> ScanOptions {
492 let mut options = ScanOptions::default();
493 if let Some(VmValue::String(pattern)) = pattern_or_options {
494 options.pattern = Some(pattern.to_string());
495 } else if let Some(VmValue::Dict(dict)) = pattern_or_options {
496 apply_scan_options_dict(&mut options, dict);
497 }
498 if let Some(VmValue::Dict(dict)) = explicit_options {
499 apply_scan_options_dict(&mut options, dict);
500 }
501 options
502}
503
504fn apply_scan_options_dict(options: &mut ScanOptions, dict: &BTreeMap<String, VmValue>) {
505 if let Some(pattern) = dict.get("pattern").map(|value| value.display()) {
506 if !pattern.is_empty() {
507 options.pattern = Some(pattern);
508 }
509 }
510 options.max_depth = usize_arg(dict, "max_depth", options.max_depth);
511 options.include_hidden = bool_arg(dict, "include_hidden", options.include_hidden);
512 options.include_dirs = bool_arg(dict, "include_dirs", options.include_dirs);
513 options.include_files = bool_arg(dict, "include_files", options.include_files);
514}
515
516fn resolve_scan_root(rel_dir: &str) -> PathBuf {
517 let candidate = PathBuf::from(rel_dir);
518 if candidate.is_absolute() {
519 return candidate;
520 }
521 crate::stdlib::process::resolve_source_relative_path(rel_dir)
522}
523
524pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
531 let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
532
533 let s = Rc::clone(&state);
535 vm.register_builtin("metadata_get", move |args, _out| {
536 let dir = args.first().map(|a| a.display()).unwrap_or_default();
537 let namespace = args.get(1).and_then(|a| {
538 if matches!(a, VmValue::Nil) {
539 None
540 } else {
541 Some(a.display())
542 }
543 });
544
545 let mut st = s.borrow_mut();
546 if let Some(ns) = namespace {
547 match st.get_namespace(&dir, &ns) {
548 Some(fields) => {
549 let mut m = BTreeMap::new();
550 for (k, v) in fields {
551 m.insert(k, json_to_vm(&v));
552 }
553 Ok(VmValue::Dict(Rc::new(m)))
554 }
555 None => Ok(VmValue::Nil),
556 }
557 } else {
558 let resolved = st.resolve(&dir);
560 let mut m = BTreeMap::new();
561 for fields in resolved.namespaces.values() {
562 for (k, v) in fields {
563 m.insert(k.clone(), json_to_vm(v));
564 }
565 }
566 if m.is_empty() {
567 Ok(VmValue::Nil)
568 } else {
569 Ok(VmValue::Dict(Rc::new(m)))
570 }
571 }
572 });
573
574 let s = Rc::clone(&state);
576 vm.register_builtin("metadata_resolve", move |args, _out| {
577 let dir = args.first().map(|a| a.display()).unwrap_or_default();
578 let namespace = args.get(1).and_then(|a| {
579 if matches!(a, VmValue::Nil) {
580 None
581 } else {
582 Some(a.display())
583 }
584 });
585 let mut st = s.borrow_mut();
586 let resolved = st.resolve(&dir);
587 if let Some(ns) = namespace {
588 match resolved.namespaces.get(&ns) {
589 Some(fields) => Ok(namespace_fields_to_vm(fields)),
590 None => Ok(VmValue::Nil),
591 }
592 } else if resolved.namespaces.is_empty() {
593 Ok(VmValue::Nil)
594 } else {
595 Ok(directory_metadata_to_vm(&resolved))
596 }
597 });
598
599 let s = Rc::clone(&state);
601 vm.register_builtin("metadata_entries", move |args, _out| {
602 let namespace = args.first().and_then(|a| {
603 if matches!(a, VmValue::Nil) {
604 None
605 } else {
606 Some(a.display())
607 }
608 });
609 let mut st = s.borrow_mut();
610 st.ensure_loaded();
611 let directories: Vec<String> = st.entries.keys().cloned().collect();
612 let mut items = Vec::new();
613 for dir in directories {
614 let local = st.local_directory(&dir);
615 let resolved = st.resolve(&dir);
616 let mut item = BTreeMap::new();
617 item.insert(
618 "dir".to_string(),
619 VmValue::String(Rc::from(normalize_directory_key(&dir))),
620 );
621 match &namespace {
622 Some(ns) => {
623 item.insert(
624 "local".to_string(),
625 local
626 .namespaces
627 .get(ns)
628 .map(namespace_fields_to_vm)
629 .unwrap_or(VmValue::Nil),
630 );
631 item.insert(
632 "resolved".to_string(),
633 resolved
634 .namespaces
635 .get(ns)
636 .map(namespace_fields_to_vm)
637 .unwrap_or(VmValue::Nil),
638 );
639 }
640 None => {
641 item.insert("local".to_string(), directory_metadata_to_vm(&local));
642 item.insert("resolved".to_string(), directory_metadata_to_vm(&resolved));
643 }
644 }
645 items.push(VmValue::Dict(Rc::new(item)));
646 }
647 Ok(VmValue::List(Rc::new(items)))
648 });
649
650 let s = Rc::clone(&state);
652 vm.register_builtin("metadata_set", move |args, _out| {
653 let dir = args.first().map(|a| a.display()).unwrap_or_default();
654 let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
655 let data_val = args.get(2).unwrap_or(&VmValue::Nil);
656
657 let mut data = BTreeMap::new();
658 if let VmValue::Dict(dict) = data_val {
659 for (k, v) in dict.iter() {
660 data.insert(k.clone(), vm_to_json(v));
661 }
662 }
663
664 if !data.is_empty() {
665 s.borrow_mut().set_namespace(&dir, &namespace, data);
666 }
667 Ok(VmValue::Nil)
668 });
669
670 let s = Rc::clone(&state);
672 vm.register_builtin("metadata_save", move |_args, _out| {
673 s.borrow_mut().save().map_err(VmError::Runtime)?;
674 Ok(VmValue::Nil)
675 });
676
677 let s = Rc::clone(&state);
680 let base2 = base_dir.to_path_buf();
681 vm.register_builtin("metadata_stale", move |_args, _out| {
682 s.borrow_mut().ensure_loaded();
683 let state = s.borrow();
684 let mut tier1_stale: Vec<VmValue> = Vec::new();
685 let mut tier2_stale: Vec<VmValue> = Vec::new();
686
687 for (dir, meta) in &state.entries {
688 let full_dir = if dir.is_empty() {
689 base2.clone()
690 } else {
691 base2.join(dir)
692 };
693 if let Some(stored_hash) = meta
695 .namespaces
696 .get("classification")
697 .and_then(|ns| ns.get("structureHash"))
698 .and_then(|v| v.as_str())
699 {
700 let current_hash = compute_structure_hash(&full_dir);
701 if current_hash != stored_hash {
702 tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
703 continue;
705 }
706 }
707 if let Some(stored_hash) = meta
709 .namespaces
710 .get("classification")
711 .and_then(|ns| ns.get("contentHash"))
712 .and_then(|v| v.as_str())
713 {
714 let current_hash = compute_content_hash_for_dir(&full_dir);
715 if current_hash != stored_hash {
716 tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
717 }
718 }
719 }
720
721 let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
722 let mut m = BTreeMap::new();
723 m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
724 m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
725 m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
726 Ok(VmValue::Dict(Rc::new(m)))
727 });
728
729 let s = Rc::clone(&state);
732 let base3 = base_dir.to_path_buf();
733 vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
734 let mut state = s.borrow_mut();
735 state.ensure_loaded();
736 let dirs: Vec<String> = state.entries.keys().cloned().collect();
737 for dir in dirs {
738 let full_dir = if dir.is_empty() {
739 base3.clone()
740 } else {
741 base3.join(&dir)
742 };
743 let hash = compute_structure_hash(&full_dir);
744 let entry = state.entries.entry(dir).or_default();
745 let ns = entry
746 .namespaces
747 .entry("classification".to_string())
748 .or_default();
749 ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
750 }
751 state.dirty = true;
752 Ok(VmValue::Nil)
753 });
754
755 let s = Rc::clone(&state);
757 let base4 = base_dir.to_path_buf();
758 vm.register_builtin("metadata_status", move |args, _out| {
759 let namespace = args.first().and_then(|a| {
760 if matches!(a, VmValue::Nil) {
761 None
762 } else {
763 Some(a.display())
764 }
765 });
766 s.borrow_mut().ensure_loaded();
767 let state = s.borrow();
768 let mut namespaces = BTreeMap::new();
769 let mut directories = Vec::new();
770 let mut missing_structure_hash = Vec::new();
771 let mut missing_content_hash = Vec::new();
772 for (dir, meta) in &state.entries {
773 directories.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
774 for ns in meta.namespaces.keys() {
775 namespaces.insert(ns.clone(), VmValue::Bool(true));
776 }
777 let full_dir = if dir.is_empty() {
778 base4.clone()
779 } else {
780 base4.join(dir)
781 };
782 let relevant = namespace
783 .as_ref()
784 .and_then(|name| meta.namespaces.get(name))
785 .or_else(|| meta.namespaces.get("classification"));
786 if let Some(fields) = relevant {
787 if !fields.contains_key("structureHash") && full_dir.exists() {
788 missing_structure_hash
789 .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
790 }
791 if !fields.contains_key("contentHash") && full_dir.exists() {
792 missing_content_hash
793 .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
794 }
795 }
796 }
797 let stale = metadata_stale_value(&state, &base4);
798 let mut result = BTreeMap::new();
799 result.insert(
800 "directory_count".to_string(),
801 VmValue::Int(state.entries.len() as i64),
802 );
803 result.insert(
804 "namespace_count".to_string(),
805 VmValue::Int(namespaces.len() as i64),
806 );
807 result.insert(
808 "namespaces".to_string(),
809 VmValue::List(Rc::new(
810 namespaces
811 .keys()
812 .cloned()
813 .map(|name| VmValue::String(Rc::from(name)))
814 .collect(),
815 )),
816 );
817 result.insert(
818 "directories".to_string(),
819 VmValue::List(Rc::new(directories)),
820 );
821 result.insert(
822 "missing_structure_hash".to_string(),
823 VmValue::List(Rc::new(missing_structure_hash)),
824 );
825 result.insert(
826 "missing_content_hash".to_string(),
827 VmValue::List(Rc::new(missing_content_hash)),
828 );
829 result.insert("stale".to_string(), stale);
830 Ok(VmValue::Dict(Rc::new(result)))
831 });
832
833 let base = base_dir.to_path_buf();
835 vm.register_builtin("compute_content_hash", move |args, _out| {
836 let dir = args.first().map(|a| a.display()).unwrap_or_default();
837 let full_dir = if dir.is_empty() {
838 base.clone()
839 } else {
840 base.join(&dir)
841 };
842 let hash = compute_content_hash_for_dir(&full_dir);
843 Ok(VmValue::String(Rc::from(hash)))
844 });
845
846 vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
848
849 register_scan_builtins(vm);
850}
851
852fn compute_structure_hash(dir: &Path) -> String {
854 let mut entries: Vec<String> = Vec::new();
855 if let Ok(rd) = std::fs::read_dir(dir) {
856 for entry in rd.flatten() {
857 if let Ok(meta) = entry.metadata() {
858 let name = entry.file_name().to_string_lossy().into_owned();
859 entries.push(format!("{}:{}", name, meta.len()));
860 }
861 }
862 }
863 entries.sort();
864 let joined = entries.join("|");
865 format!("{:x}", fnv_hash(joined.as_bytes()))
866}
867
868fn compute_content_hash_for_dir(dir: &Path) -> String {
870 let mut entries: Vec<String> = Vec::new();
871 if let Ok(rd) = std::fs::read_dir(dir) {
872 for entry in rd.flatten() {
873 if let Ok(meta) = entry.metadata() {
874 let name = entry.file_name().to_string_lossy().into_owned();
875 let mtime = meta
876 .modified()
877 .ok()
878 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
879 .map(|d| d.as_secs())
880 .unwrap_or(0);
881 entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
882 }
883 }
884 }
885 entries.sort();
886 let joined = entries.join("|");
887 format!("{:x}", fnv_hash(joined.as_bytes()))
888}
889
890fn fnv_hash(data: &[u8]) -> u64 {
892 let mut hash: u64 = 0xcbf29ce484222325;
893 for &byte in data {
894 hash ^= byte as u64;
895 hash = hash.wrapping_mul(0x100000001b3);
896 }
897 hash
898}
899
900pub fn register_scan_builtins(vm: &mut Vm) {
902 vm.register_builtin("scan_directory", move |args, _out| {
904 let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
905 let options = parse_scan_options(args.get(1), args.get(2));
906 let scan_base = resolve_scan_root(".");
907 let full_dir = if rel_dir.is_empty() {
908 scan_base.clone()
909 } else {
910 scan_base.join(&rel_dir)
911 };
912 let mut results: Vec<VmValue> = Vec::new();
913 scan_dir_recursive(&full_dir, &scan_base, &options, &mut results, 0);
914 Ok(VmValue::List(Rc::new(results)))
915 });
916}
917
918fn metadata_stale_value(state: &MetadataState, base_dir: &Path) -> VmValue {
919 let mut tier1_stale: Vec<VmValue> = Vec::new();
920 let mut tier2_stale: Vec<VmValue> = Vec::new();
921 for (dir, meta) in &state.entries {
922 let full_dir = if dir.is_empty() {
923 base_dir.to_path_buf()
924 } else {
925 base_dir.join(dir)
926 };
927 if let Some(stored_hash) = meta
928 .namespaces
929 .get("classification")
930 .and_then(|ns| ns.get("structureHash"))
931 .and_then(|v| v.as_str())
932 {
933 let current_hash = compute_structure_hash(&full_dir);
934 if current_hash != stored_hash {
935 tier1_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
936 continue;
937 }
938 }
939 if let Some(stored_hash) = meta
940 .namespaces
941 .get("classification")
942 .and_then(|ns| ns.get("contentHash"))
943 .and_then(|v| v.as_str())
944 {
945 let current_hash = compute_content_hash_for_dir(&full_dir);
946 if current_hash != stored_hash {
947 tier2_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
948 }
949 }
950 }
951 let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
952 let mut m = BTreeMap::new();
953 m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
954 m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
955 m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
956 VmValue::Dict(Rc::new(m))
957}
958
959fn scan_dir_recursive(
960 dir: &Path,
961 base: &Path,
962 options: &ScanOptions,
963 results: &mut Vec<VmValue>,
964 depth: usize,
965) {
966 if depth > options.max_depth {
967 return;
968 }
969 let rd = match std::fs::read_dir(dir) {
970 Ok(rd) => rd,
971 Err(_) => return,
972 };
973 for entry in rd.flatten() {
974 let meta = match entry.metadata() {
975 Ok(m) => m,
976 Err(_) => continue,
977 };
978 let name = entry.file_name().to_string_lossy().into_owned();
979 if !options.include_hidden && name.starts_with('.') {
980 continue;
981 }
982 let rel_path = entry
983 .path()
984 .strip_prefix(base)
985 .unwrap_or(entry.path().as_path())
986 .to_string_lossy()
987 .into_owned();
988 if let Some(pat) = &options.pattern {
989 if !glob_match(pat, &rel_path) {
990 if meta.is_dir() {
991 scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
992 }
993 continue;
994 }
995 }
996 let mtime = meta
997 .modified()
998 .ok()
999 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1000 .map(|d| d.as_secs() as i64)
1001 .unwrap_or(0);
1002 let mut m = BTreeMap::new();
1003 m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
1004 m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
1005 m.insert("modified".to_string(), VmValue::Int(mtime));
1006 m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
1007 if (meta.is_dir() && options.include_dirs) || (!meta.is_dir() && options.include_files) {
1008 results.push(VmValue::Dict(Rc::new(m)));
1009 }
1010 if meta.is_dir() {
1011 scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
1012 }
1013 }
1014}
1015
1016fn glob_match(pattern: &str, path: &str) -> bool {
1018 if pattern.contains("**") {
1019 let parts: Vec<&str> = pattern.split("**").collect();
1020 if parts.len() == 2 {
1021 let prefix = parts[0].trim_end_matches('/');
1022 let suffix = parts[1].trim_start_matches('/');
1023 let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
1024 let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
1025 return prefix_ok && suffix_ok;
1026 }
1027 }
1028 if pattern.contains('*') {
1029 let parts: Vec<&str> = pattern.split('*').collect();
1030 if parts.len() == 2 {
1031 return path.starts_with(parts[0]) && path.ends_with(parts[1]);
1032 }
1033 }
1034 path.contains(pattern)
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039 use super::*;
1040
1041 fn temp_path(name: &str) -> PathBuf {
1042 let unique = std::time::SystemTime::now()
1043 .duration_since(std::time::UNIX_EPOCH)
1044 .unwrap_or_default()
1045 .as_nanos();
1046 std::env::temp_dir().join(format!("harn-metadata-{name}-{unique}"))
1047 }
1048
1049 #[test]
1050 fn metadata_resolve_preserves_namespace_structure() {
1051 let base = temp_path("resolve");
1052 let mut state = MetadataState::new(&base);
1053 state.set_namespace(
1054 "",
1055 "classification",
1056 BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
1057 );
1058 state.set_namespace(
1059 "src",
1060 "classification",
1061 BTreeMap::from([("owner".into(), serde_json::json!("vm"))]),
1062 );
1063
1064 let resolved = state.resolve("src");
1065 let classification = resolved.namespaces.get("classification").unwrap();
1066 assert_eq!(
1067 classification.get("language"),
1068 Some(&serde_json::json!("rust"))
1069 );
1070 assert_eq!(classification.get("owner"), Some(&serde_json::json!("vm")));
1071 }
1072
1073 #[test]
1074 fn metadata_save_writes_namespace_shards() {
1075 let base = temp_path("save");
1076 let mut state = MetadataState::new(&base);
1077 state.set_namespace(
1078 ".",
1079 "classification",
1080 BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
1081 );
1082 state.set_namespace(
1083 "src",
1084 "coding-enrichment-v1",
1085 BTreeMap::from([("_deep_scan".into(), serde_json::json!({"version": 1}))]),
1086 );
1087 state.save().expect("save");
1088
1089 let metadata_root = crate::runtime_paths::metadata_dir(&base);
1090 let classification = std::fs::read_to_string(
1091 metadata_root
1092 .join("classification")
1093 .join(NAMESPACE_ENTRIES_FILE),
1094 )
1095 .expect("classification shard");
1096 let parsed = serde_json::from_str::<serde_json::Value>(&classification).expect("json");
1097 assert_eq!(
1098 parsed.get("namespace").and_then(|value| value.as_str()),
1099 Some("classification")
1100 );
1101 assert!(parsed
1102 .get("entries")
1103 .and_then(|value| value.get("."))
1104 .is_some());
1105
1106 let enrichment = std::fs::read_to_string(
1107 metadata_root
1108 .join("coding-enrichment-v1")
1109 .join(NAMESPACE_ENTRIES_FILE),
1110 )
1111 .expect("enrichment shard");
1112 let parsed = serde_json::from_str::<serde_json::Value>(&enrichment).expect("json");
1113 assert!(parsed
1114 .get("entries")
1115 .and_then(|value| value.get("src"))
1116 .is_some());
1117 }
1118
1119 #[test]
1120 fn metadata_load_merges_legacy_and_namespace_shards() {
1121 let base = temp_path("load");
1122 let metadata_root = crate::runtime_paths::metadata_dir(&base);
1123 std::fs::create_dir_all(metadata_root.join("facts")).unwrap();
1124 std::fs::write(
1125 metadata_root.join(LEGACY_SHARD_NAME),
1126 serde_json::json!({
1127 "version": 2,
1128 "entries": {
1129 ".": {
1130 "namespaces": {
1131 "classification": {
1132 "language": "rust"
1133 }
1134 }
1135 }
1136 }
1137 })
1138 .to_string(),
1139 )
1140 .unwrap();
1141 std::fs::write(
1142 metadata_root.join("facts").join(NAMESPACE_ENTRIES_FILE),
1143 serde_json::json!({
1144 "version": 1,
1145 "namespace": "facts",
1146 "entries": {
1147 "src": {
1148 "kind": "module"
1149 }
1150 }
1151 })
1152 .to_string(),
1153 )
1154 .unwrap();
1155
1156 let mut state = MetadataState::new(&base);
1157 state.ensure_loaded();
1158 assert_eq!(
1159 state
1160 .entries
1161 .get(".")
1162 .and_then(|meta| meta.namespaces.get("classification"))
1163 .and_then(|fields| fields.get("language")),
1164 Some(&serde_json::json!("rust"))
1165 );
1166 assert_eq!(
1167 state
1168 .entries
1169 .get("src")
1170 .and_then(|meta| meta.namespaces.get("facts"))
1171 .and_then(|fields| fields.get("kind")),
1172 Some(&serde_json::json!("module"))
1173 );
1174 }
1175
1176 #[test]
1177 fn scan_options_filter_hidden_and_depth() {
1178 let base = temp_path("scan");
1179 std::fs::create_dir_all(base.join("project/deep")).unwrap();
1180 std::fs::write(base.join("project/root.txt"), "root").unwrap();
1181 std::fs::write(base.join("project/.hidden.txt"), "hidden").unwrap();
1182 std::fs::write(base.join("project/deep/nested.txt"), "nested").unwrap();
1183
1184 let options = ScanOptions {
1185 pattern: Some(".txt".into()),
1186 max_depth: 0,
1187 include_hidden: false,
1188 include_dirs: false,
1189 include_files: true,
1190 };
1191 let mut results = Vec::new();
1192 scan_dir_recursive(&base.join("project"), &base, &options, &mut results, 0);
1193 let paths: Vec<String> = results
1194 .into_iter()
1195 .map(|value| match value {
1196 VmValue::Dict(dict) => dict.get("path").unwrap().display(),
1197 _ => String::new(),
1198 })
1199 .collect();
1200 assert_eq!(paths, vec!["project/root.txt".to_string()]);
1201 let _ = std::fs::remove_dir_all(base);
1202 }
1203}