1use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20
21#[derive(Clone, Default)]
23struct DirectoryMetadata {
24 namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
25}
26
27struct MetadataState {
29 entries: BTreeMap<String, DirectoryMetadata>,
30 base_dir: PathBuf,
31 loaded: bool,
32 dirty: bool,
33}
34
35impl MetadataState {
36 fn new(base_dir: &Path) -> Self {
37 Self {
38 entries: BTreeMap::new(),
39 base_dir: base_dir.to_path_buf(),
40 loaded: false,
41 dirty: false,
42 }
43 }
44
45 fn metadata_dir(&self) -> PathBuf {
46 self.base_dir.join(".burin").join("metadata")
47 }
48
49 fn ensure_loaded(&mut self) {
50 if self.loaded {
51 return;
52 }
53 self.loaded = true;
54 let meta_dir = self.metadata_dir();
55 let entries = match std::fs::read_dir(&meta_dir) {
56 Ok(e) => e,
57 Err(_) => return,
58 };
59 for entry in entries.flatten() {
60 let path = entry.path();
61 if path.extension().map(|e| e == "json").unwrap_or(false) {
62 if let Ok(contents) = std::fs::read_to_string(&path) {
63 self.load_shard(&contents);
64 }
65 }
66 }
67 }
68
69 fn load_shard(&mut self, contents: &str) {
70 let parsed: serde_json::Value = match serde_json::from_str(contents) {
71 Ok(v) => v,
72 Err(_) => return,
73 };
74 let shard_entries = match parsed.get("entries").and_then(|e| e.as_object()) {
75 Some(e) => e,
76 None => return,
77 };
78 for (dir, meta_val) in shard_entries {
79 let meta = parse_directory_metadata(meta_val);
80 self.entries.insert(dir.clone(), meta);
81 }
82 }
83
84 fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
87 self.ensure_loaded();
88 let mut result = DirectoryMetadata::default();
89
90 if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
92 merge_metadata(&mut result, root);
93 }
94
95 let components: Vec<&str> = directory
97 .split('/')
98 .filter(|c| !c.is_empty() && *c != ".")
99 .collect();
100 let mut current = String::new();
101 for component in components {
102 if current.is_empty() {
103 current = component.to_string();
104 } else {
105 current = format!("{current}/{component}");
106 }
107 if let Some(meta) = self.entries.get(¤t) {
108 merge_metadata(&mut result, meta);
109 }
110 }
111
112 result
113 }
114
115 fn get_namespace(
117 &mut self,
118 directory: &str,
119 namespace: &str,
120 ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
121 let resolved = self.resolve(directory);
122 resolved.namespaces.get(namespace).cloned()
123 }
124
125 fn local_directory(&mut self, directory: &str) -> DirectoryMetadata {
126 self.ensure_loaded();
127 self.entries.get(directory).cloned().unwrap_or_default()
128 }
129
130 fn set_namespace(
132 &mut self,
133 directory: &str,
134 namespace: &str,
135 data: BTreeMap<FieldKey, serde_json::Value>,
136 ) {
137 self.ensure_loaded();
138 let meta = self.entries.entry(directory.to_string()).or_default();
139 let ns = meta.namespaces.entry(namespace.to_string()).or_default();
140 for (k, v) in data {
141 ns.insert(k, v);
142 }
143 self.dirty = true;
144 }
145
146 fn save(&mut self) -> Result<(), String> {
148 if !self.dirty {
149 return Ok(());
150 }
151 let meta_dir = self.metadata_dir();
152 std::fs::create_dir_all(&meta_dir).map_err(|e| format!("metadata mkdir: {e}"))?;
153
154 let mut shard = serde_json::Map::new();
157 for (dir, meta) in &self.entries {
158 shard.insert(dir.clone(), serialize_directory_metadata(meta));
159 }
160
161 let store_obj = serde_json::json!({
162 "version": 2,
163 "generatedAt": chrono_now_iso(),
164 "entries": serde_json::Value::Object(shard)
165 });
166
167 let json =
168 serde_json::to_string_pretty(&store_obj).map_err(|e| format!("metadata json: {e}"))?;
169
170 let shard_path = meta_dir.join("root.json");
171 std::fs::write(&shard_path, json).map_err(|e| format!("metadata write: {e}"))?;
172 self.dirty = false;
173 Ok(())
174 }
175}
176
177fn chrono_now_iso() -> String {
178 let now = std::time::SystemTime::now();
180 let secs = now
181 .duration_since(std::time::UNIX_EPOCH)
182 .unwrap_or_default()
183 .as_secs();
184 let days = secs / 86400;
186 let time_secs = secs % 86400;
187 let hours = time_secs / 3600;
188 let minutes = (time_secs % 3600) / 60;
189 let seconds = time_secs % 60;
190 let mut y = 1970i64;
192 let mut remaining = days as i64;
193 loop {
194 let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
195 366
196 } else {
197 365
198 };
199 if remaining < days_in_year {
200 break;
201 }
202 remaining -= days_in_year;
203 y += 1;
204 }
205 let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
206 let month_days: [i64; 12] = [
207 31,
208 if leap { 29 } else { 28 },
209 31,
210 30,
211 31,
212 30,
213 31,
214 31,
215 30,
216 31,
217 30,
218 31,
219 ];
220 let mut m = 0usize;
221 for days in &month_days {
222 if remaining < *days {
223 break;
224 }
225 remaining -= *days;
226 m += 1;
227 }
228 format!(
229 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
230 y,
231 m + 1,
232 remaining + 1,
233 hours,
234 minutes,
235 seconds
236 )
237}
238
239fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
240 for (ns, fields) in &source.namespaces {
241 let target_ns = target.namespaces.entry(ns.clone()).or_default();
242 for (k, v) in fields {
243 target_ns.insert(k.clone(), v.clone());
244 }
245 }
246}
247
248fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
249 let mut meta = DirectoryMetadata::default();
250 let obj = match val.as_object() {
251 Some(o) => o,
252 None => return meta,
253 };
254 if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
256 for (ns_name, fields_val) in ns_obj {
257 if let Some(fields) = fields_val.as_object() {
258 let mut field_map = BTreeMap::new();
259 for (k, v) in fields {
260 field_map.insert(k.clone(), v.clone());
261 }
262 meta.namespaces.insert(ns_name.clone(), field_map);
263 }
264 }
265 }
266 meta
267}
268
269fn serialize_directory_metadata(meta: &DirectoryMetadata) -> serde_json::Value {
270 let mut ns_obj = serde_json::Map::new();
271 for (ns_name, fields) in &meta.namespaces {
272 let mut fields_obj = serde_json::Map::new();
273 for (k, v) in fields {
274 fields_obj.insert(k.clone(), v.clone());
275 }
276 ns_obj.insert(ns_name.clone(), serde_json::Value::Object(fields_obj));
277 }
278 serde_json::json!({ "namespaces": serde_json::Value::Object(ns_obj) })
279}
280
281fn vm_to_json(val: &VmValue) -> serde_json::Value {
282 match val {
283 VmValue::String(s) => serde_json::Value::String(s.to_string()),
284 VmValue::Int(n) => serde_json::json!(*n),
285 VmValue::Float(n) => serde_json::json!(*n),
286 VmValue::Bool(b) => serde_json::Value::Bool(*b),
287 VmValue::Nil => serde_json::Value::Null,
288 VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
289 VmValue::Dict(map) => {
290 let obj: serde_json::Map<String, serde_json::Value> = map
291 .iter()
292 .map(|(k, v)| (k.clone(), vm_to_json(v)))
293 .collect();
294 serde_json::Value::Object(obj)
295 }
296 _ => serde_json::Value::Null,
297 }
298}
299
300fn json_to_vm(jv: &serde_json::Value) -> VmValue {
301 match jv {
302 serde_json::Value::Null => VmValue::Nil,
303 serde_json::Value::Bool(b) => VmValue::Bool(*b),
304 serde_json::Value::Number(n) => {
305 if let Some(i) = n.as_i64() {
306 VmValue::Int(i)
307 } else {
308 VmValue::Float(n.as_f64().unwrap_or(0.0))
309 }
310 }
311 serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
312 serde_json::Value::Array(arr) => {
313 VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
314 }
315 serde_json::Value::Object(map) => {
316 let mut m = BTreeMap::new();
317 for (k, v) in map {
318 m.insert(k.clone(), json_to_vm(v));
319 }
320 VmValue::Dict(Rc::new(m))
321 }
322 }
323}
324
325fn namespace_fields_to_vm(fields: &BTreeMap<FieldKey, serde_json::Value>) -> VmValue {
326 let mut map = BTreeMap::new();
327 for (k, v) in fields {
328 map.insert(k.clone(), json_to_vm(v));
329 }
330 VmValue::Dict(Rc::new(map))
331}
332
333fn directory_metadata_to_vm(meta: &DirectoryMetadata) -> VmValue {
334 let mut namespaces = BTreeMap::new();
335 for (ns, fields) in &meta.namespaces {
336 namespaces.insert(ns.clone(), namespace_fields_to_vm(fields));
337 }
338 VmValue::Dict(Rc::new(namespaces))
339}
340
341fn normalize_directory_key(dir: &str) -> String {
342 if dir.trim().is_empty() || dir == "." {
343 ".".to_string()
344 } else {
345 dir.to_string()
346 }
347}
348
349#[derive(Clone)]
350struct ScanOptions {
351 pattern: Option<String>,
352 max_depth: usize,
353 include_hidden: bool,
354 include_dirs: bool,
355 include_files: bool,
356}
357
358impl Default for ScanOptions {
359 fn default() -> Self {
360 Self {
361 pattern: None,
362 max_depth: 5,
363 include_hidden: false,
364 include_dirs: true,
365 include_files: true,
366 }
367 }
368}
369
370fn bool_arg(map: &BTreeMap<String, VmValue>, key: &str, default: bool) -> bool {
371 match map.get(key) {
372 Some(VmValue::Bool(value)) => *value,
373 _ => default,
374 }
375}
376
377fn usize_arg(map: &BTreeMap<String, VmValue>, key: &str, default: usize) -> usize {
378 match map.get(key) {
379 Some(VmValue::Int(value)) if *value >= 0 => *value as usize,
380 _ => default,
381 }
382}
383
384fn parse_scan_options(
385 pattern_or_options: Option<&VmValue>,
386 explicit_options: Option<&VmValue>,
387) -> ScanOptions {
388 let mut options = ScanOptions::default();
389 if let Some(VmValue::String(pattern)) = pattern_or_options {
390 options.pattern = Some(pattern.to_string());
391 } else if let Some(VmValue::Dict(dict)) = pattern_or_options {
392 apply_scan_options_dict(&mut options, dict);
393 }
394 if let Some(VmValue::Dict(dict)) = explicit_options {
395 apply_scan_options_dict(&mut options, dict);
396 }
397 options
398}
399
400fn apply_scan_options_dict(options: &mut ScanOptions, dict: &BTreeMap<String, VmValue>) {
401 if let Some(pattern) = dict.get("pattern").map(|value| value.display()) {
402 if !pattern.is_empty() {
403 options.pattern = Some(pattern);
404 }
405 }
406 options.max_depth = usize_arg(dict, "max_depth", options.max_depth);
407 options.include_hidden = bool_arg(dict, "include_hidden", options.include_hidden);
408 options.include_dirs = bool_arg(dict, "include_dirs", options.include_dirs);
409 options.include_files = bool_arg(dict, "include_files", options.include_files);
410}
411
412fn resolve_scan_root(base_dir: &Path, rel_dir: &str) -> PathBuf {
413 let candidate = PathBuf::from(rel_dir);
414 if candidate.is_absolute() {
415 return candidate;
416 }
417 if let Some(cwd) =
418 crate::stdlib::process::current_execution_context().and_then(|context| context.cwd)
419 {
420 return PathBuf::from(cwd).join(candidate);
421 }
422 if let Ok(cwd) = std::env::current_dir() {
423 return cwd.join(candidate);
424 }
425 base_dir.join(candidate)
426}
427
428pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
434 let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
435
436 let s = Rc::clone(&state);
438 vm.register_builtin("metadata_get", move |args, _out| {
439 let dir = args.first().map(|a| a.display()).unwrap_or_default();
440 let namespace = args.get(1).and_then(|a| {
441 if matches!(a, VmValue::Nil) {
442 None
443 } else {
444 Some(a.display())
445 }
446 });
447
448 let mut st = s.borrow_mut();
449 if let Some(ns) = namespace {
450 match st.get_namespace(&dir, &ns) {
451 Some(fields) => {
452 let mut m = BTreeMap::new();
453 for (k, v) in fields {
454 m.insert(k, json_to_vm(&v));
455 }
456 Ok(VmValue::Dict(Rc::new(m)))
457 }
458 None => Ok(VmValue::Nil),
459 }
460 } else {
461 let resolved = st.resolve(&dir);
463 let mut m = BTreeMap::new();
464 for fields in resolved.namespaces.values() {
465 for (k, v) in fields {
466 m.insert(k.clone(), json_to_vm(v));
467 }
468 }
469 if m.is_empty() {
470 Ok(VmValue::Nil)
471 } else {
472 Ok(VmValue::Dict(Rc::new(m)))
473 }
474 }
475 });
476
477 let s = Rc::clone(&state);
479 vm.register_builtin("metadata_resolve", move |args, _out| {
480 let dir = args.first().map(|a| a.display()).unwrap_or_default();
481 let namespace = args.get(1).and_then(|a| {
482 if matches!(a, VmValue::Nil) {
483 None
484 } else {
485 Some(a.display())
486 }
487 });
488 let mut st = s.borrow_mut();
489 let resolved = st.resolve(&dir);
490 if let Some(ns) = namespace {
491 match resolved.namespaces.get(&ns) {
492 Some(fields) => Ok(namespace_fields_to_vm(fields)),
493 None => Ok(VmValue::Nil),
494 }
495 } else if resolved.namespaces.is_empty() {
496 Ok(VmValue::Nil)
497 } else {
498 Ok(directory_metadata_to_vm(&resolved))
499 }
500 });
501
502 let s = Rc::clone(&state);
504 vm.register_builtin("metadata_entries", move |args, _out| {
505 let namespace = args.first().and_then(|a| {
506 if matches!(a, VmValue::Nil) {
507 None
508 } else {
509 Some(a.display())
510 }
511 });
512 let mut st = s.borrow_mut();
513 st.ensure_loaded();
514 let directories: Vec<String> = st.entries.keys().cloned().collect();
515 let mut items = Vec::new();
516 for dir in directories {
517 let local = st.local_directory(&dir);
518 let resolved = st.resolve(&dir);
519 let mut item = BTreeMap::new();
520 item.insert(
521 "dir".to_string(),
522 VmValue::String(Rc::from(normalize_directory_key(&dir))),
523 );
524 match &namespace {
525 Some(ns) => {
526 item.insert(
527 "local".to_string(),
528 local
529 .namespaces
530 .get(ns)
531 .map(namespace_fields_to_vm)
532 .unwrap_or(VmValue::Nil),
533 );
534 item.insert(
535 "resolved".to_string(),
536 resolved
537 .namespaces
538 .get(ns)
539 .map(namespace_fields_to_vm)
540 .unwrap_or(VmValue::Nil),
541 );
542 }
543 None => {
544 item.insert("local".to_string(), directory_metadata_to_vm(&local));
545 item.insert("resolved".to_string(), directory_metadata_to_vm(&resolved));
546 }
547 }
548 items.push(VmValue::Dict(Rc::new(item)));
549 }
550 Ok(VmValue::List(Rc::new(items)))
551 });
552
553 let s = Rc::clone(&state);
555 vm.register_builtin("metadata_set", move |args, _out| {
556 let dir = args.first().map(|a| a.display()).unwrap_or_default();
557 let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
558 let data_val = args.get(2).unwrap_or(&VmValue::Nil);
559
560 let mut data = BTreeMap::new();
561 if let VmValue::Dict(dict) = data_val {
562 for (k, v) in dict.iter() {
563 data.insert(k.clone(), vm_to_json(v));
564 }
565 }
566
567 if !data.is_empty() {
568 s.borrow_mut().set_namespace(&dir, &namespace, data);
569 }
570 Ok(VmValue::Nil)
571 });
572
573 let s = Rc::clone(&state);
575 vm.register_builtin("metadata_save", move |_args, _out| {
576 s.borrow_mut().save().map_err(VmError::Runtime)?;
577 Ok(VmValue::Nil)
578 });
579
580 let s = Rc::clone(&state);
583 let base2 = base_dir.to_path_buf();
584 vm.register_builtin("metadata_stale", move |_args, _out| {
585 s.borrow_mut().ensure_loaded();
586 let state = s.borrow();
587 let mut tier1_stale: Vec<VmValue> = Vec::new();
588 let mut tier2_stale: Vec<VmValue> = Vec::new();
589
590 for (dir, meta) in &state.entries {
591 let full_dir = if dir.is_empty() {
592 base2.clone()
593 } else {
594 base2.join(dir)
595 };
596 if let Some(stored_hash) = meta
598 .namespaces
599 .get("classification")
600 .and_then(|ns| ns.get("structureHash"))
601 .and_then(|v| v.as_str())
602 {
603 let current_hash = compute_structure_hash(&full_dir);
604 if current_hash != stored_hash {
605 tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
606 continue; }
608 }
609 if let Some(stored_hash) = meta
611 .namespaces
612 .get("classification")
613 .and_then(|ns| ns.get("contentHash"))
614 .and_then(|v| v.as_str())
615 {
616 let current_hash = compute_content_hash_for_dir(&full_dir);
617 if current_hash != stored_hash {
618 tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
619 }
620 }
621 }
622
623 let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
624 let mut m = BTreeMap::new();
625 m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
626 m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
627 m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
628 Ok(VmValue::Dict(Rc::new(m)))
629 });
630
631 let s = Rc::clone(&state);
634 let base3 = base_dir.to_path_buf();
635 vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
636 let mut state = s.borrow_mut();
637 state.ensure_loaded();
638 let dirs: Vec<String> = state.entries.keys().cloned().collect();
639 for dir in dirs {
640 let full_dir = if dir.is_empty() {
641 base3.clone()
642 } else {
643 base3.join(&dir)
644 };
645 let hash = compute_structure_hash(&full_dir);
646 let entry = state.entries.entry(dir).or_default();
647 let ns = entry
648 .namespaces
649 .entry("classification".to_string())
650 .or_default();
651 ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
652 }
653 state.dirty = true;
654 Ok(VmValue::Nil)
655 });
656
657 let s = Rc::clone(&state);
659 let base4 = base_dir.to_path_buf();
660 vm.register_builtin("metadata_status", move |args, _out| {
661 let namespace = args.first().and_then(|a| {
662 if matches!(a, VmValue::Nil) {
663 None
664 } else {
665 Some(a.display())
666 }
667 });
668 s.borrow_mut().ensure_loaded();
669 let state = s.borrow();
670 let mut namespaces = BTreeMap::new();
671 let mut directories = Vec::new();
672 let mut missing_structure_hash = Vec::new();
673 let mut missing_content_hash = Vec::new();
674 for (dir, meta) in &state.entries {
675 directories.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
676 for ns in meta.namespaces.keys() {
677 namespaces.insert(ns.clone(), VmValue::Bool(true));
678 }
679 let full_dir = if dir.is_empty() {
680 base4.clone()
681 } else {
682 base4.join(dir)
683 };
684 let relevant = namespace
685 .as_ref()
686 .and_then(|name| meta.namespaces.get(name))
687 .or_else(|| meta.namespaces.get("classification"));
688 if let Some(fields) = relevant {
689 if !fields.contains_key("structureHash") && full_dir.exists() {
690 missing_structure_hash
691 .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
692 }
693 if !fields.contains_key("contentHash") && full_dir.exists() {
694 missing_content_hash
695 .push(VmValue::String(Rc::from(normalize_directory_key(dir))));
696 }
697 }
698 }
699 let stale = metadata_stale_value(&state, &base4);
700 let mut result = BTreeMap::new();
701 result.insert(
702 "directory_count".to_string(),
703 VmValue::Int(state.entries.len() as i64),
704 );
705 result.insert(
706 "namespace_count".to_string(),
707 VmValue::Int(namespaces.len() as i64),
708 );
709 result.insert(
710 "namespaces".to_string(),
711 VmValue::List(Rc::new(
712 namespaces
713 .keys()
714 .cloned()
715 .map(|name| VmValue::String(Rc::from(name)))
716 .collect(),
717 )),
718 );
719 result.insert(
720 "directories".to_string(),
721 VmValue::List(Rc::new(directories)),
722 );
723 result.insert(
724 "missing_structure_hash".to_string(),
725 VmValue::List(Rc::new(missing_structure_hash)),
726 );
727 result.insert(
728 "missing_content_hash".to_string(),
729 VmValue::List(Rc::new(missing_content_hash)),
730 );
731 result.insert("stale".to_string(), stale);
732 Ok(VmValue::Dict(Rc::new(result)))
733 });
734
735 let base = base_dir.to_path_buf();
738 vm.register_builtin("compute_content_hash", move |args, _out| {
739 let dir = args.first().map(|a| a.display()).unwrap_or_default();
740 let full_dir = if dir.is_empty() {
741 base.clone()
742 } else {
743 base.join(&dir)
744 };
745 let hash = compute_content_hash_for_dir(&full_dir);
746 Ok(VmValue::String(Rc::from(hash)))
747 });
748
749 vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
751
752 register_scan_builtins(vm, base_dir);
754}
755
756fn compute_structure_hash(dir: &Path) -> String {
758 let mut entries: Vec<String> = Vec::new();
759 if let Ok(rd) = std::fs::read_dir(dir) {
760 for entry in rd.flatten() {
761 if let Ok(meta) = entry.metadata() {
762 let name = entry.file_name().to_string_lossy().to_string();
763 entries.push(format!("{}:{}", name, meta.len()));
764 }
765 }
766 }
767 entries.sort();
768 let joined = entries.join("|");
769 format!("{:x}", fnv_hash(joined.as_bytes()))
770}
771
772fn compute_content_hash_for_dir(dir: &Path) -> String {
774 let mut entries: Vec<String> = Vec::new();
775 if let Ok(rd) = std::fs::read_dir(dir) {
776 for entry in rd.flatten() {
777 if let Ok(meta) = entry.metadata() {
778 let name = entry.file_name().to_string_lossy().to_string();
779 let mtime = meta
780 .modified()
781 .ok()
782 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
783 .map(|d| d.as_secs())
784 .unwrap_or(0);
785 entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
786 }
787 }
788 }
789 entries.sort();
790 let joined = entries.join("|");
791 format!("{:x}", fnv_hash(joined.as_bytes()))
792}
793
794fn fnv_hash(data: &[u8]) -> u64 {
796 let mut hash: u64 = 0xcbf29ce484222325;
797 for &byte in data {
798 hash ^= byte as u64;
799 hash = hash.wrapping_mul(0x100000001b3);
800 }
801 hash
802}
803
804pub fn register_scan_builtins(vm: &mut Vm, base_dir: &Path) {
806 let base = base_dir.to_path_buf();
807 vm.register_builtin("scan_directory", move |args, _out| {
809 let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
810 let options = parse_scan_options(args.get(1), args.get(2));
811 let scan_base = resolve_scan_root(&base, ".");
812 let full_dir = if rel_dir.is_empty() {
813 scan_base.clone()
814 } else {
815 scan_base.join(&rel_dir)
816 };
817 let mut results: Vec<VmValue> = Vec::new();
818 scan_dir_recursive(&full_dir, &scan_base, &options, &mut results, 0);
819 Ok(VmValue::List(Rc::new(results)))
820 });
821}
822
823fn metadata_stale_value(state: &MetadataState, base_dir: &Path) -> VmValue {
824 let mut tier1_stale: Vec<VmValue> = Vec::new();
825 let mut tier2_stale: Vec<VmValue> = Vec::new();
826 for (dir, meta) in &state.entries {
827 let full_dir = if dir.is_empty() {
828 base_dir.to_path_buf()
829 } else {
830 base_dir.join(dir)
831 };
832 if let Some(stored_hash) = meta
833 .namespaces
834 .get("classification")
835 .and_then(|ns| ns.get("structureHash"))
836 .and_then(|v| v.as_str())
837 {
838 let current_hash = compute_structure_hash(&full_dir);
839 if current_hash != stored_hash {
840 tier1_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
841 continue;
842 }
843 }
844 if let Some(stored_hash) = meta
845 .namespaces
846 .get("classification")
847 .and_then(|ns| ns.get("contentHash"))
848 .and_then(|v| v.as_str())
849 {
850 let current_hash = compute_content_hash_for_dir(&full_dir);
851 if current_hash != stored_hash {
852 tier2_stale.push(VmValue::String(Rc::from(normalize_directory_key(dir))));
853 }
854 }
855 }
856 let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
857 let mut m = BTreeMap::new();
858 m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
859 m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
860 m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
861 VmValue::Dict(Rc::new(m))
862}
863
864fn scan_dir_recursive(
865 dir: &Path,
866 base: &Path,
867 options: &ScanOptions,
868 results: &mut Vec<VmValue>,
869 depth: usize,
870) {
871 if depth > options.max_depth {
872 return;
873 }
874 let rd = match std::fs::read_dir(dir) {
875 Ok(rd) => rd,
876 Err(_) => return,
877 };
878 for entry in rd.flatten() {
879 let meta = match entry.metadata() {
880 Ok(m) => m,
881 Err(_) => continue,
882 };
883 let name = entry.file_name().to_string_lossy().to_string();
884 if !options.include_hidden && name.starts_with('.') {
886 continue;
887 }
888 let rel_path = entry
889 .path()
890 .strip_prefix(base)
891 .unwrap_or(entry.path().as_path())
892 .to_string_lossy()
893 .to_string();
894 if let Some(pat) = &options.pattern {
896 if !glob_match(pat, &rel_path) {
897 if meta.is_dir() {
898 scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
899 }
900 continue;
901 }
902 }
903 let mtime = meta
904 .modified()
905 .ok()
906 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
907 .map(|d| d.as_secs() as i64)
908 .unwrap_or(0);
909 let mut m = BTreeMap::new();
910 m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
911 m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
912 m.insert("modified".to_string(), VmValue::Int(mtime));
913 m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
914 if (meta.is_dir() && options.include_dirs) || (!meta.is_dir() && options.include_files) {
915 results.push(VmValue::Dict(Rc::new(m)));
916 }
917 if meta.is_dir() {
918 scan_dir_recursive(&entry.path(), base, options, results, depth + 1);
919 }
920 }
921}
922
923fn glob_match(pattern: &str, path: &str) -> bool {
925 if pattern.contains("**") {
926 let parts: Vec<&str> = pattern.split("**").collect();
927 if parts.len() == 2 {
928 let prefix = parts[0].trim_end_matches('/');
929 let suffix = parts[1].trim_start_matches('/');
930 let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
931 let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
932 return prefix_ok && suffix_ok;
933 }
934 }
935 if pattern.contains('*') {
936 let parts: Vec<&str> = pattern.split('*').collect();
937 if parts.len() == 2 {
938 return path.starts_with(parts[0]) && path.ends_with(parts[1]);
939 }
940 }
941 path.contains(pattern)
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947
948 fn temp_path(name: &str) -> PathBuf {
949 let unique = std::time::SystemTime::now()
950 .duration_since(std::time::UNIX_EPOCH)
951 .unwrap_or_default()
952 .as_nanos();
953 std::env::temp_dir().join(format!("harn-metadata-{name}-{unique}"))
954 }
955
956 #[test]
957 fn metadata_resolve_preserves_namespace_structure() {
958 let base = temp_path("resolve");
959 let mut state = MetadataState::new(&base);
960 state.set_namespace(
961 "".into(),
962 "classification",
963 BTreeMap::from([("language".into(), serde_json::json!("rust"))]),
964 );
965 state.set_namespace(
966 "src".into(),
967 "classification",
968 BTreeMap::from([("owner".into(), serde_json::json!("vm"))]),
969 );
970
971 let resolved = state.resolve("src");
972 let classification = resolved.namespaces.get("classification").unwrap();
973 assert_eq!(
974 classification.get("language"),
975 Some(&serde_json::json!("rust"))
976 );
977 assert_eq!(classification.get("owner"), Some(&serde_json::json!("vm")));
978 }
979
980 #[test]
981 fn scan_options_filter_hidden_and_depth() {
982 let base = temp_path("scan");
983 std::fs::create_dir_all(base.join("project/deep")).unwrap();
984 std::fs::write(base.join("project/root.txt"), "root").unwrap();
985 std::fs::write(base.join("project/.hidden.txt"), "hidden").unwrap();
986 std::fs::write(base.join("project/deep/nested.txt"), "nested").unwrap();
987
988 let options = ScanOptions {
989 pattern: Some(".txt".into()),
990 max_depth: 0,
991 include_hidden: false,
992 include_dirs: false,
993 include_files: true,
994 };
995 let mut results = Vec::new();
996 scan_dir_recursive(&base.join("project"), &base, &options, &mut results, 0);
997 let paths: Vec<String> = results
998 .into_iter()
999 .map(|value| match value {
1000 VmValue::Dict(dict) => dict.get("path").unwrap().display(),
1001 _ => String::new(),
1002 })
1003 .collect();
1004 assert_eq!(paths, vec!["project/root.txt".to_string()]);
1005 let _ = std::fs::remove_dir_all(base);
1006 }
1007}