1use std::cell::RefCell;
11use std::collections::BTreeMap;
12use std::path::{Path, PathBuf};
13use std::rc::Rc;
14
15use crate::value::{VmError, VmValue};
16use crate::vm::Vm;
17
18type Namespace = String;
19type FieldKey = String;
20
21#[derive(Clone, Default)]
23struct DirectoryMetadata {
24 namespaces: BTreeMap<Namespace, BTreeMap<FieldKey, serde_json::Value>>,
25}
26
27struct MetadataState {
29 entries: BTreeMap<String, DirectoryMetadata>,
30 base_dir: PathBuf,
31 loaded: bool,
32 dirty: bool,
33}
34
35impl MetadataState {
36 fn new(base_dir: &Path) -> Self {
37 Self {
38 entries: BTreeMap::new(),
39 base_dir: base_dir.to_path_buf(),
40 loaded: false,
41 dirty: false,
42 }
43 }
44
45 fn metadata_dir(&self) -> PathBuf {
46 self.base_dir.join(".burin").join("metadata")
47 }
48
49 fn ensure_loaded(&mut self) {
50 if self.loaded {
51 return;
52 }
53 self.loaded = true;
54 let meta_dir = self.metadata_dir();
55 let entries = match std::fs::read_dir(&meta_dir) {
56 Ok(e) => e,
57 Err(_) => return,
58 };
59 for entry in entries.flatten() {
60 let path = entry.path();
61 if path.extension().map(|e| e == "json").unwrap_or(false) {
62 if let Ok(contents) = std::fs::read_to_string(&path) {
63 self.load_shard(&contents);
64 }
65 }
66 }
67 }
68
69 fn load_shard(&mut self, contents: &str) {
70 let parsed: serde_json::Value = match serde_json::from_str(contents) {
71 Ok(v) => v,
72 Err(_) => return,
73 };
74 let shard_entries = match parsed.get("entries").and_then(|e| e.as_object()) {
75 Some(e) => e,
76 None => return,
77 };
78 for (dir, meta_val) in shard_entries {
79 let meta = parse_directory_metadata(meta_val);
80 self.entries.insert(dir.clone(), meta);
81 }
82 }
83
84 fn resolve(&mut self, directory: &str) -> DirectoryMetadata {
87 self.ensure_loaded();
88 let mut result = DirectoryMetadata::default();
89
90 if let Some(root) = self.entries.get(".").or_else(|| self.entries.get("")) {
92 merge_metadata(&mut result, root);
93 }
94
95 let components: Vec<&str> = directory
97 .split('/')
98 .filter(|c| !c.is_empty() && *c != ".")
99 .collect();
100 let mut current = String::new();
101 for component in components {
102 if current.is_empty() {
103 current = component.to_string();
104 } else {
105 current = format!("{current}/{component}");
106 }
107 if let Some(meta) = self.entries.get(¤t) {
108 merge_metadata(&mut result, meta);
109 }
110 }
111
112 result
113 }
114
115 fn get_namespace(
117 &mut self,
118 directory: &str,
119 namespace: &str,
120 ) -> Option<BTreeMap<FieldKey, serde_json::Value>> {
121 let resolved = self.resolve(directory);
122 resolved.namespaces.get(namespace).cloned()
123 }
124
125 fn set_namespace(
127 &mut self,
128 directory: &str,
129 namespace: &str,
130 data: BTreeMap<FieldKey, serde_json::Value>,
131 ) {
132 self.ensure_loaded();
133 let meta = self.entries.entry(directory.to_string()).or_default();
134 let ns = meta.namespaces.entry(namespace.to_string()).or_default();
135 for (k, v) in data {
136 ns.insert(k, v);
137 }
138 self.dirty = true;
139 }
140
141 fn save(&mut self) -> Result<(), String> {
143 if !self.dirty {
144 return Ok(());
145 }
146 let meta_dir = self.metadata_dir();
147 std::fs::create_dir_all(&meta_dir).map_err(|e| format!("metadata mkdir: {e}"))?;
148
149 let mut shard = serde_json::Map::new();
152 for (dir, meta) in &self.entries {
153 shard.insert(dir.clone(), serialize_directory_metadata(meta));
154 }
155
156 let store_obj = serde_json::json!({
157 "version": 2,
158 "generatedAt": chrono_now_iso(),
159 "entries": serde_json::Value::Object(shard)
160 });
161
162 let json =
163 serde_json::to_string_pretty(&store_obj).map_err(|e| format!("metadata json: {e}"))?;
164
165 let shard_path = meta_dir.join("root.json");
166 std::fs::write(&shard_path, json).map_err(|e| format!("metadata write: {e}"))?;
167 self.dirty = false;
168 Ok(())
169 }
170}
171
172fn chrono_now_iso() -> String {
173 let now = std::time::SystemTime::now();
175 let secs = now
176 .duration_since(std::time::UNIX_EPOCH)
177 .unwrap_or_default()
178 .as_secs();
179 let days = secs / 86400;
181 let time_secs = secs % 86400;
182 let hours = time_secs / 3600;
183 let minutes = (time_secs % 3600) / 60;
184 let seconds = time_secs % 60;
185 let mut y = 1970i64;
187 let mut remaining = days as i64;
188 loop {
189 let days_in_year: i64 = if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
190 366
191 } else {
192 365
193 };
194 if remaining < days_in_year {
195 break;
196 }
197 remaining -= days_in_year;
198 y += 1;
199 }
200 let leap = y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
201 let month_days: [i64; 12] = [
202 31,
203 if leap { 29 } else { 28 },
204 31,
205 30,
206 31,
207 30,
208 31,
209 31,
210 30,
211 31,
212 30,
213 31,
214 ];
215 let mut m = 0usize;
216 for days in &month_days {
217 if remaining < *days {
218 break;
219 }
220 remaining -= *days;
221 m += 1;
222 }
223 format!(
224 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
225 y,
226 m + 1,
227 remaining + 1,
228 hours,
229 minutes,
230 seconds
231 )
232}
233
234fn merge_metadata(target: &mut DirectoryMetadata, source: &DirectoryMetadata) {
235 for (ns, fields) in &source.namespaces {
236 let target_ns = target.namespaces.entry(ns.clone()).or_default();
237 for (k, v) in fields {
238 target_ns.insert(k.clone(), v.clone());
239 }
240 }
241}
242
243fn parse_directory_metadata(val: &serde_json::Value) -> DirectoryMetadata {
244 let mut meta = DirectoryMetadata::default();
245 let obj = match val.as_object() {
246 Some(o) => o,
247 None => return meta,
248 };
249 if let Some(ns_obj) = obj.get("namespaces").and_then(|n| n.as_object()) {
251 for (ns_name, fields_val) in ns_obj {
252 if let Some(fields) = fields_val.as_object() {
253 let mut field_map = BTreeMap::new();
254 for (k, v) in fields {
255 field_map.insert(k.clone(), v.clone());
256 }
257 meta.namespaces.insert(ns_name.clone(), field_map);
258 }
259 }
260 }
261 meta
262}
263
264fn serialize_directory_metadata(meta: &DirectoryMetadata) -> serde_json::Value {
265 let mut ns_obj = serde_json::Map::new();
266 for (ns_name, fields) in &meta.namespaces {
267 let mut fields_obj = serde_json::Map::new();
268 for (k, v) in fields {
269 fields_obj.insert(k.clone(), v.clone());
270 }
271 ns_obj.insert(ns_name.clone(), serde_json::Value::Object(fields_obj));
272 }
273 serde_json::json!({ "namespaces": serde_json::Value::Object(ns_obj) })
274}
275
276fn vm_to_json(val: &VmValue) -> serde_json::Value {
277 match val {
278 VmValue::String(s) => serde_json::Value::String(s.to_string()),
279 VmValue::Int(n) => serde_json::json!(*n),
280 VmValue::Float(n) => serde_json::json!(*n),
281 VmValue::Bool(b) => serde_json::Value::Bool(*b),
282 VmValue::Nil => serde_json::Value::Null,
283 VmValue::List(items) => serde_json::Value::Array(items.iter().map(vm_to_json).collect()),
284 VmValue::Dict(map) => {
285 let obj: serde_json::Map<String, serde_json::Value> = map
286 .iter()
287 .map(|(k, v)| (k.clone(), vm_to_json(v)))
288 .collect();
289 serde_json::Value::Object(obj)
290 }
291 _ => serde_json::Value::Null,
292 }
293}
294
295fn json_to_vm(jv: &serde_json::Value) -> VmValue {
296 match jv {
297 serde_json::Value::Null => VmValue::Nil,
298 serde_json::Value::Bool(b) => VmValue::Bool(*b),
299 serde_json::Value::Number(n) => {
300 if let Some(i) = n.as_i64() {
301 VmValue::Int(i)
302 } else {
303 VmValue::Float(n.as_f64().unwrap_or(0.0))
304 }
305 }
306 serde_json::Value::String(s) => VmValue::String(Rc::from(s.as_str())),
307 serde_json::Value::Array(arr) => {
308 VmValue::List(Rc::new(arr.iter().map(json_to_vm).collect()))
309 }
310 serde_json::Value::Object(map) => {
311 let mut m = BTreeMap::new();
312 for (k, v) in map {
313 m.insert(k.clone(), json_to_vm(v));
314 }
315 VmValue::Dict(Rc::new(m))
316 }
317 }
318}
319
320pub fn register_metadata_builtins(vm: &mut Vm, base_dir: &Path) {
326 let state = Rc::new(RefCell::new(MetadataState::new(base_dir)));
327
328 let s = Rc::clone(&state);
330 vm.register_builtin("metadata_get", move |args, _out| {
331 let dir = args.first().map(|a| a.display()).unwrap_or_default();
332 let namespace = args.get(1).and_then(|a| {
333 if matches!(a, VmValue::Nil) {
334 None
335 } else {
336 Some(a.display())
337 }
338 });
339
340 let mut st = s.borrow_mut();
341 if let Some(ns) = namespace {
342 match st.get_namespace(&dir, &ns) {
343 Some(fields) => {
344 let mut m = BTreeMap::new();
345 for (k, v) in fields {
346 m.insert(k, json_to_vm(&v));
347 }
348 Ok(VmValue::Dict(Rc::new(m)))
349 }
350 None => Ok(VmValue::Nil),
351 }
352 } else {
353 let resolved = st.resolve(&dir);
355 let mut m = BTreeMap::new();
356 for fields in resolved.namespaces.values() {
357 for (k, v) in fields {
358 m.insert(k.clone(), json_to_vm(v));
359 }
360 }
361 if m.is_empty() {
362 Ok(VmValue::Nil)
363 } else {
364 Ok(VmValue::Dict(Rc::new(m)))
365 }
366 }
367 });
368
369 let s = Rc::clone(&state);
371 vm.register_builtin("metadata_set", move |args, _out| {
372 let dir = args.first().map(|a| a.display()).unwrap_or_default();
373 let namespace = args.get(1).map(|a| a.display()).unwrap_or_default();
374 let data_val = args.get(2).unwrap_or(&VmValue::Nil);
375
376 let mut data = BTreeMap::new();
377 if let VmValue::Dict(dict) = data_val {
378 for (k, v) in dict.iter() {
379 data.insert(k.clone(), vm_to_json(v));
380 }
381 }
382
383 if !data.is_empty() {
384 s.borrow_mut().set_namespace(&dir, &namespace, data);
385 }
386 Ok(VmValue::Nil)
387 });
388
389 let s = Rc::clone(&state);
391 vm.register_builtin("metadata_save", move |_args, _out| {
392 s.borrow_mut().save().map_err(VmError::Runtime)?;
393 Ok(VmValue::Nil)
394 });
395
396 let s = Rc::clone(&state);
399 let base2 = base_dir.to_path_buf();
400 vm.register_builtin("metadata_stale", move |_args, _out| {
401 s.borrow_mut().ensure_loaded();
402 let state = s.borrow();
403 let mut tier1_stale: Vec<VmValue> = Vec::new();
404 let mut tier2_stale: Vec<VmValue> = Vec::new();
405
406 for (dir, meta) in &state.entries {
407 let full_dir = if dir.is_empty() {
408 base2.clone()
409 } else {
410 base2.join(dir)
411 };
412 if let Some(stored_hash) = meta
414 .namespaces
415 .get("classification")
416 .and_then(|ns| ns.get("structureHash"))
417 .and_then(|v| v.as_str())
418 {
419 let current_hash = compute_structure_hash(&full_dir);
420 if current_hash != stored_hash {
421 tier1_stale.push(VmValue::String(Rc::from(dir.as_str())));
422 continue; }
424 }
425 if let Some(stored_hash) = meta
427 .namespaces
428 .get("classification")
429 .and_then(|ns| ns.get("contentHash"))
430 .and_then(|v| v.as_str())
431 {
432 let current_hash = compute_content_hash_for_dir(&full_dir);
433 if current_hash != stored_hash {
434 tier2_stale.push(VmValue::String(Rc::from(dir.as_str())));
435 }
436 }
437 }
438
439 let any_stale = !tier1_stale.is_empty() || !tier2_stale.is_empty();
440 let mut m = BTreeMap::new();
441 m.insert("any_stale".to_string(), VmValue::Bool(any_stale));
442 m.insert("tier1".to_string(), VmValue::List(Rc::new(tier1_stale)));
443 m.insert("tier2".to_string(), VmValue::List(Rc::new(tier2_stale)));
444 Ok(VmValue::Dict(Rc::new(m)))
445 });
446
447 let s = Rc::clone(&state);
450 let base3 = base_dir.to_path_buf();
451 vm.register_builtin("metadata_refresh_hashes", move |_args, _out| {
452 let mut state = s.borrow_mut();
453 state.ensure_loaded();
454 let dirs: Vec<String> = state.entries.keys().cloned().collect();
455 for dir in dirs {
456 let full_dir = if dir.is_empty() {
457 base3.clone()
458 } else {
459 base3.join(&dir)
460 };
461 let hash = compute_structure_hash(&full_dir);
462 let entry = state.entries.entry(dir).or_default();
463 let ns = entry
464 .namespaces
465 .entry("classification".to_string())
466 .or_default();
467 ns.insert("structureHash".to_string(), serde_json::Value::String(hash));
468 }
469 state.dirty = true;
470 Ok(VmValue::Nil)
471 });
472
473 let base = base_dir.to_path_buf();
476 vm.register_builtin("compute_content_hash", move |args, _out| {
477 let dir = args.first().map(|a| a.display()).unwrap_or_default();
478 let full_dir = if dir.is_empty() {
479 base.clone()
480 } else {
481 base.join(&dir)
482 };
483 let hash = compute_content_hash_for_dir(&full_dir);
484 Ok(VmValue::String(Rc::from(hash)))
485 });
486
487 vm.register_builtin("invalidate_facts", |_args, _out| Ok(VmValue::Nil));
489
490 register_scan_builtins(vm, base_dir);
492}
493
494fn compute_structure_hash(dir: &Path) -> String {
496 let mut entries: Vec<String> = Vec::new();
497 if let Ok(rd) = std::fs::read_dir(dir) {
498 for entry in rd.flatten() {
499 if let Ok(meta) = entry.metadata() {
500 let name = entry.file_name().to_string_lossy().to_string();
501 entries.push(format!("{}:{}", name, meta.len()));
502 }
503 }
504 }
505 entries.sort();
506 let joined = entries.join("|");
507 format!("{:x}", fnv_hash(joined.as_bytes()))
508}
509
510fn compute_content_hash_for_dir(dir: &Path) -> String {
512 let mut entries: Vec<String> = Vec::new();
513 if let Ok(rd) = std::fs::read_dir(dir) {
514 for entry in rd.flatten() {
515 if let Ok(meta) = entry.metadata() {
516 let name = entry.file_name().to_string_lossy().to_string();
517 let mtime = meta
518 .modified()
519 .ok()
520 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
521 .map(|d| d.as_secs())
522 .unwrap_or(0);
523 entries.push(format!("{}:{}:{}", name, meta.len(), mtime));
524 }
525 }
526 }
527 entries.sort();
528 let joined = entries.join("|");
529 format!("{:x}", fnv_hash(joined.as_bytes()))
530}
531
532fn fnv_hash(data: &[u8]) -> u64 {
534 let mut hash: u64 = 0xcbf29ce484222325;
535 for &byte in data {
536 hash ^= byte as u64;
537 hash = hash.wrapping_mul(0x100000001b3);
538 }
539 hash
540}
541
542pub fn register_scan_builtins(vm: &mut Vm, base_dir: &Path) {
544 let base = base_dir.to_path_buf();
545 vm.register_builtin("scan_directory", move |args, _out| {
547 let rel_dir = args.first().map(|a| a.display()).unwrap_or_default();
548 let pattern = args.get(1).and_then(|a| {
549 if matches!(a, VmValue::Nil) {
550 None
551 } else {
552 Some(a.display())
553 }
554 });
555 let full_dir = if rel_dir.is_empty() {
556 base.clone()
557 } else {
558 base.join(&rel_dir)
559 };
560 let mut results: Vec<VmValue> = Vec::new();
561 scan_dir_recursive(&full_dir, &base, &pattern, &mut results, 0, 5);
562 Ok(VmValue::List(Rc::new(results)))
563 });
564}
565
566fn scan_dir_recursive(
567 dir: &Path,
568 base: &Path,
569 pattern: &Option<String>,
570 results: &mut Vec<VmValue>,
571 depth: usize,
572 max_depth: usize,
573) {
574 if depth > max_depth {
575 return;
576 }
577 let rd = match std::fs::read_dir(dir) {
578 Ok(rd) => rd,
579 Err(_) => return,
580 };
581 for entry in rd.flatten() {
582 let meta = match entry.metadata() {
583 Ok(m) => m,
584 Err(_) => continue,
585 };
586 let name = entry.file_name().to_string_lossy().to_string();
587 if name.starts_with('.') {
589 continue;
590 }
591 let rel_path = entry
592 .path()
593 .strip_prefix(base)
594 .unwrap_or(entry.path().as_path())
595 .to_string_lossy()
596 .to_string();
597 if let Some(pat) = pattern {
599 if !glob_match(pat, &rel_path) {
600 if meta.is_dir() {
601 scan_dir_recursive(&entry.path(), base, pattern, results, depth + 1, max_depth);
602 }
603 continue;
604 }
605 }
606 let mtime = meta
607 .modified()
608 .ok()
609 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
610 .map(|d| d.as_secs() as i64)
611 .unwrap_or(0);
612 let mut m = BTreeMap::new();
613 m.insert("path".to_string(), VmValue::String(Rc::from(rel_path)));
614 m.insert("size".to_string(), VmValue::Int(meta.len() as i64));
615 m.insert("modified".to_string(), VmValue::Int(mtime));
616 m.insert("is_dir".to_string(), VmValue::Bool(meta.is_dir()));
617 results.push(VmValue::Dict(Rc::new(m)));
618 if meta.is_dir() {
619 scan_dir_recursive(&entry.path(), base, pattern, results, depth + 1, max_depth);
620 }
621 }
622}
623
624fn glob_match(pattern: &str, path: &str) -> bool {
626 if pattern.contains("**") {
627 let parts: Vec<&str> = pattern.split("**").collect();
628 if parts.len() == 2 {
629 let prefix = parts[0].trim_end_matches('/');
630 let suffix = parts[1].trim_start_matches('/');
631 let prefix_ok = prefix.is_empty() || path.starts_with(prefix);
632 let suffix_ok = suffix.is_empty() || path.ends_with(suffix);
633 return prefix_ok && suffix_ok;
634 }
635 }
636 if pattern.contains('*') {
637 let parts: Vec<&str> = pattern.split('*').collect();
638 if parts.len() == 2 {
639 return path.starts_with(parts[0]) && path.ends_with(parts[1]);
640 }
641 }
642 path.contains(pattern)
643}