Skip to main content

haystack_core/xeto/
loader.rs

1//! Xeto library loader — parses .xeto source, resolves names, produces Specs.
2
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5
6/// Maximum size of a single .xeto file (10 MB).
7const MAX_XETO_FILE_SIZE: u64 = 10 * 1024 * 1024;
8
9use crate::ontology::{DefNamespace, Lib};
10use crate::xeto::XetoError;
11use crate::xeto::parser::parse_xeto;
12use crate::xeto::resolver::XetoResolver;
13use crate::xeto::spec::{Spec, spec_from_def};
14
15/// Load a Xeto library from source text.
16///
17/// Parses the source, resolves names against already-loaded libraries in `ns`,
18/// and returns the library metadata plus resolved specs.
19pub fn load_xeto_source(
20    source: &str,
21    lib_name: &str,
22    ns: &DefNamespace,
23) -> Result<(Lib, Vec<Spec>), XetoError> {
24    let xeto_file = parse_xeto(source)?;
25    load_from_ast(xeto_file, lib_name, ns)
26}
27
28/// Load a Xeto library from a directory containing .xeto files.
29///
30/// Reads all .xeto files in the directory, concatenates them (pragma from
31/// the first file that has one), and processes as a single library.
32pub fn load_xeto_dir(dir: &Path, ns: &DefNamespace) -> Result<(String, Lib, Vec<Spec>), XetoError> {
33    let mut all_source = String::new();
34    let mut lib_name: Option<String> = None;
35
36    // Read .xeto files sorted by name for deterministic ordering
37    let mut entries: Vec<_> = std::fs::read_dir(dir)
38        .map_err(|e| XetoError::Load(format!("cannot read directory: {e}")))?
39        .filter_map(|e| e.ok())
40        .filter(|e| e.path().extension().is_some_and(|ext| ext == "xeto"))
41        .collect();
42    entries.sort_by_key(|e| e.file_name());
43
44    if entries.is_empty() {
45        return Err(XetoError::Load("no .xeto files found in directory".into()));
46    }
47
48    for entry in &entries {
49        // Skip symlinks for security (consistent with scan_xeto_dir)
50        let file_type = entry
51            .file_type()
52            .map_err(|e| XetoError::Load(format!("cannot read file type: {e}")))?;
53        if file_type.is_symlink() {
54            continue;
55        }
56
57        // Check file size
58        let metadata = entry
59            .metadata()
60            .map_err(|e| XetoError::Load(format!("cannot read metadata: {e}")))?;
61        if metadata.len() > MAX_XETO_FILE_SIZE {
62            return Err(XetoError::Load(format!(
63                "file too large ({} bytes): {}",
64                metadata.len(),
65                entry.path().display()
66            )));
67        }
68
69        let content = std::fs::read_to_string(entry.path())
70            .map_err(|e| XetoError::Load(format!("cannot read {:?}: {e}", entry.path())))?;
71
72        // Try to extract lib name from pragma if we haven't found one yet
73        if lib_name.is_none()
74            && let Ok(xf) = parse_xeto(&content)
75            && let Some(ref pragma) = xf.pragma
76        {
77            lib_name = Some(pragma.name.clone());
78        }
79
80        all_source.push_str(&content);
81        all_source.push('\n');
82    }
83
84    // Fall back to directory name if no pragma found
85    let name = lib_name.unwrap_or_else(|| {
86        dir.file_name()
87            .and_then(|n| n.to_str())
88            .unwrap_or("unknown")
89            .to_string()
90    });
91
92    let (lib, specs) = load_xeto_source(&all_source, &name, ns)?;
93    Ok((name, lib, specs))
94}
95
96/// Scan a directory for its Xeto library name and dependency list without
97/// fully loading or resolving.  Returns `(lib_name, depends, source)`.
98fn scan_xeto_dir(dir: &Path) -> Result<(String, Vec<String>, String), XetoError> {
99    let mut all_source = String::new();
100    let mut lib_name: Option<String> = None;
101    let mut depends: Vec<String> = Vec::new();
102
103    let mut entries: Vec<_> = std::fs::read_dir(dir)
104        .map_err(|e| XetoError::Load(format!("cannot read directory {:?}: {e}", dir)))?
105        .filter_map(|e| e.ok())
106        .filter(|e| e.path().extension().is_some_and(|ext| ext == "xeto"))
107        .collect();
108    entries.sort_by_key(|e| e.file_name());
109
110    if entries.is_empty() {
111        return Err(XetoError::Load(format!(
112            "no .xeto files found in {:?}",
113            dir
114        )));
115    }
116
117    for entry in &entries {
118        // Check for symlinks
119        let file_type = entry
120            .file_type()
121            .map_err(|e| XetoError::Load(format!("cannot read file type: {e}")))?;
122        if file_type.is_symlink() {
123            continue; // Skip symlinks for security
124        }
125
126        // Check file size
127        let metadata = entry
128            .metadata()
129            .map_err(|e| XetoError::Load(format!("cannot read metadata: {e}")))?;
130        if metadata.len() > MAX_XETO_FILE_SIZE {
131            return Err(XetoError::Load(format!(
132                "file too large ({} bytes): {}",
133                metadata.len(),
134                entry.path().display()
135            )));
136        }
137
138        let content = std::fs::read_to_string(entry.path())
139            .map_err(|e| XetoError::Load(format!("cannot read {:?}: {e}", entry.path())))?;
140
141        if lib_name.is_none()
142            && let Ok(xf) = parse_xeto(&content)
143            && let Some(ref pragma) = xf.pragma
144        {
145            if !pragma.name.is_empty() {
146                lib_name = Some(pragma.name.clone());
147            }
148            depends = pragma.depends.clone();
149        }
150
151        all_source.push_str(&content);
152        all_source.push('\n');
153    }
154
155    let name = lib_name.unwrap_or_else(|| {
156        dir.file_name()
157            .and_then(|n| n.to_str())
158            .unwrap_or("unknown")
159            .to_string()
160    });
161
162    Ok((name, depends, all_source))
163}
164
165/// Load multiple Xeto libraries from directories with automatic dependency resolution.
166///
167/// Scans each directory for library metadata (name and dependencies from pragmas),
168/// performs topological sort to determine load order, detects circular dependencies,
169/// and loads each library into `ns` in the correct order.
170///
171/// Returns the library names in the order they were loaded.
172pub fn load_xeto_with_deps(
173    dirs: &[PathBuf],
174    ns: &mut DefNamespace,
175) -> Result<Vec<String>, XetoError> {
176    // Phase 1: Scan all directories for lib names and dependencies
177    let mut scanned: Vec<(String, Vec<String>, String, PathBuf)> = Vec::new();
178    let mut seen_names = HashSet::new();
179
180    for dir in dirs {
181        let canonical_dir = dir
182            .canonicalize()
183            .map_err(|e| XetoError::Load(format!("cannot resolve path {}: {e}", dir.display())))?;
184        let (name, depends, source) = scan_xeto_dir(&canonical_dir)?;
185        // Verify resolved path is still under the canonical directory
186        let file_canonical = canonical_dir
187            .canonicalize()
188            .map_err(|e| XetoError::Load(format!("cannot resolve: {e}")))?;
189        if !file_canonical.starts_with(&canonical_dir) {
190            return Err(XetoError::Load(format!(
191                "path traversal detected: {}",
192                dir.display()
193            )));
194        }
195
196        if !seen_names.insert(name.clone()) {
197            return Err(XetoError::Load(format!(
198                "duplicate library name '{}' in {:?}",
199                name, dir
200            )));
201        }
202        scanned.push((name, depends, source, canonical_dir));
203    }
204
205    // Phase 2: Build resolver and compute topological order
206    let mut resolver = XetoResolver::new();
207
208    // Add already-loaded libs from the namespace
209    for (name, lib) in ns.libs() {
210        let all_names: HashSet<String> = ns
211            .specs(Some(name))
212            .iter()
213            .map(|s| s.name.clone())
214            .collect();
215        resolver.add_lib(name, all_names, lib.depends.clone());
216    }
217
218    // Add scanned libs (with empty spec sets — we only need deps for ordering)
219    for (name, depends, _, _) in &scanned {
220        resolver.add_lib(name, HashSet::new(), depends.clone());
221    }
222
223    let order = resolver.dependency_order()?;
224
225    // Phase 3: Load in dependency order (skip libs already in namespace)
226    let scanned_map: HashMap<String, (String, PathBuf)> = scanned
227        .into_iter()
228        .map(|(name, _, source, dir)| (name.clone(), (source, dir)))
229        .collect();
230
231    let mut loaded = Vec::new();
232    for lib_name in &order {
233        if ns.libs().contains_key(lib_name.as_str()) {
234            continue; // already loaded (e.g. bundled libs)
235        }
236        if let Some((source, _)) = scanned_map.get(lib_name) {
237            ns.load_xeto_str(source, lib_name)?;
238            loaded.push(lib_name.clone());
239        }
240        // libs not in scanned_map and not in ns are transitive deps
241        // that must already be loaded — the load_xeto_source call will
242        // validate this via the dependency check
243    }
244
245    Ok(loaded)
246}
247
248/// Internal: convert a parsed XetoFile into a Lib + Vec<Spec>.
249fn load_from_ast(
250    xeto_file: crate::xeto::ast::XetoFile,
251    lib_name: &str,
252    ns: &DefNamespace,
253) -> Result<(Lib, Vec<Spec>), XetoError> {
254    // Build resolver with known libs
255    let mut resolver = XetoResolver::new();
256    for (name, lib) in ns.libs() {
257        let mut all_names: HashSet<String> = ns
258            .specs(Some(name))
259            .iter()
260            .map(|s| s.name.clone())
261            .collect();
262        // Also include def names for resolution
263        for def_name in lib.defs.keys() {
264            all_names.insert(def_name.clone());
265        }
266        resolver.add_lib(name, all_names, lib.depends.clone());
267    }
268
269    // Register this library's spec names for self-resolution
270    let own_names: HashSet<String> = xeto_file.specs.iter().map(|s| s.name.clone()).collect();
271    let depends: Vec<String> = xeto_file
272        .pragma
273        .as_ref()
274        .map(|p| p.depends.clone())
275        .unwrap_or_default();
276    resolver.add_lib(lib_name, own_names, depends.clone());
277
278    // Validate dependencies exist
279    for dep in &depends {
280        if !ns.libs().contains_key(dep.as_str()) {
281            return Err(XetoError::Load(format!(
282                "library '{}' depends on '{}' which is not loaded",
283                lib_name, dep
284            )));
285        }
286    }
287
288    // Resolve names and convert to Specs
289    let mut specs = Vec::new();
290    for spec_def in &xeto_file.specs {
291        let mut resolved = spec_from_def(spec_def, lib_name);
292
293        // Resolve base type name
294        if let Some(ref base) = resolved.base
295            && let Some(resolved_name) = resolver.resolve(base, lib_name)
296        {
297            resolved.base = Some(resolved_name);
298        }
299
300        // Resolve slot type_refs
301        for slot in &mut resolved.slots {
302            if let Some(ref type_ref) = slot.type_ref
303                && let Some(resolved_name) = resolver.resolve(type_ref, lib_name)
304            {
305                slot.type_ref = Some(resolved_name);
306            }
307        }
308
309        specs.push(resolved);
310    }
311
312    // Build Lib metadata
313    let pragma = xeto_file.pragma.as_ref();
314    let lib = Lib {
315        name: lib_name.to_string(),
316        version: pragma
317            .map(|p| p.version.clone())
318            .unwrap_or_else(|| "0.0.0".into()),
319        doc: pragma.map(|p| p.doc.clone()).unwrap_or_default(),
320        depends,
321        defs: HashMap::new(), // Specs are registered separately
322    };
323
324    Ok((lib, specs))
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    fn empty_ns() -> DefNamespace {
332        DefNamespace::new()
333    }
334
335    #[test]
336    fn load_simple_spec() {
337        let source = r#"
338Foo: Obj {
339  name: Str
340  active
341}
342"#;
343        let ns = empty_ns();
344        let (lib, specs) = load_xeto_source(source, "test", &ns).unwrap();
345        assert_eq!(lib.name, "test");
346        assert_eq!(specs.len(), 1);
347        assert_eq!(specs[0].qname, "test::Foo");
348        assert_eq!(specs[0].slots.len(), 2);
349    }
350
351    #[test]
352    fn load_with_pragma() {
353        let source = r#"
354pragma: Lib <
355  doc: "Test library"
356  version: "1.0.0"
357>
358
359Bar: Obj {
360  count: Number
361}
362"#;
363        let ns = empty_ns();
364        let (lib, specs) = load_xeto_source(source, "testlib", &ns).unwrap();
365        assert_eq!(lib.version, "1.0.0");
366        assert_eq!(lib.doc, "Test library");
367        assert_eq!(specs.len(), 1);
368    }
369
370    #[test]
371    fn load_multiple_specs() {
372        let source = r#"
373Parent: Obj {
374  equip
375}
376
377Child: Parent {
378  ahu
379  dis: Str
380}
381"#;
382        let ns = empty_ns();
383        let (_, specs) = load_xeto_source(source, "test", &ns).unwrap();
384        assert_eq!(specs.len(), 2);
385        let child = specs.iter().find(|s| s.name == "Child").unwrap();
386        assert_eq!(child.base.as_deref(), Some("test::Parent"));
387    }
388
389    #[test]
390    fn load_registers_in_namespace() {
391        let source = "Baz: Obj { tag }";
392        let mut ns = DefNamespace::new();
393        let qnames = ns.load_xeto_str(source, "mylib").unwrap();
394        assert_eq!(qnames, vec!["mylib::Baz"]);
395        assert!(ns.get_spec("mylib::Baz").is_some());
396    }
397
398    #[test]
399    fn load_missing_dependency_fails() {
400        let source = r#"
401pragma: Lib <
402  doc: "Needs base"
403  version: "1.0.0"
404  depends: { { lib: "nonexistent" } }
405>
406
407Foo: Obj { tag }
408"#;
409        let ns = empty_ns();
410        let result = load_xeto_source(source, "test", &ns);
411        assert!(result.is_err());
412    }
413
414    #[test]
415    fn load_and_unload_roundtrip() {
416        let source = "Foo: Obj { marker }";
417        let mut ns = DefNamespace::new();
418        ns.load_xeto_str(source, "temp").unwrap();
419        assert!(ns.get_spec("temp::Foo").is_some());
420        ns.unload_lib("temp").unwrap();
421        assert!(ns.get_spec("temp::Foo").is_none());
422    }
423
424    #[test]
425    fn load_with_deps_single_dir() {
426        let tmp = tempfile::tempdir().unwrap();
427        let dir = tmp.path().join("mylib");
428        std::fs::create_dir(&dir).unwrap();
429        std::fs::write(
430            dir.join("lib.xeto"),
431            r#"
432pragma: Lib <
433  doc: "My lib"
434  version: "1.0.0"
435>
436
437Widget: Obj {
438  label: Str
439}
440"#,
441        )
442        .unwrap();
443
444        let mut ns = DefNamespace::new();
445        let loaded = load_xeto_with_deps(&[dir], &mut ns).unwrap();
446        assert_eq!(loaded, vec!["mylib"]);
447        assert!(ns.get_spec("mylib::Widget").is_some());
448    }
449
450    #[test]
451    fn load_with_deps_respects_order() {
452        let tmp = tempfile::tempdir().unwrap();
453
454        // Base lib — no dependencies
455        let base = tmp.path().join("base");
456        std::fs::create_dir(&base).unwrap();
457        std::fs::write(
458            base.join("lib.xeto"),
459            r#"
460pragma: Lib <
461  doc: "Base"
462  version: "1.0.0"
463>
464
465BaseType: Obj { core }
466"#,
467        )
468        .unwrap();
469
470        // App lib — depends on base
471        let app = tmp.path().join("app");
472        std::fs::create_dir(&app).unwrap();
473        std::fs::write(
474            app.join("lib.xeto"),
475            r#"
476pragma: Lib <
477  doc: "App"
478  version: "1.0.0"
479  depends: { { lib: "base" } }
480>
481
482AppType: BaseType { extra }
483"#,
484        )
485        .unwrap();
486
487        // Pass dirs in reverse order — topo sort should still load base first
488        let mut ns = DefNamespace::new();
489        let loaded = load_xeto_with_deps(&[app, base], &mut ns).unwrap();
490        assert_eq!(loaded, vec!["base", "app"]);
491        assert!(ns.get_spec("base::BaseType").is_some());
492        assert!(ns.get_spec("app::AppType").is_some());
493    }
494
495    #[test]
496    fn load_with_deps_circular_detected() {
497        let tmp = tempfile::tempdir().unwrap();
498
499        let lib_a = tmp.path().join("a");
500        std::fs::create_dir(&lib_a).unwrap();
501        std::fs::write(
502            lib_a.join("lib.xeto"),
503            r#"
504pragma: Lib <
505  doc: "A"
506  version: "1.0.0"
507  depends: { { lib: "b" } }
508>
509
510A: Obj { tag }
511"#,
512        )
513        .unwrap();
514
515        let lib_b = tmp.path().join("b");
516        std::fs::create_dir(&lib_b).unwrap();
517        std::fs::write(
518            lib_b.join("lib.xeto"),
519            r#"
520pragma: Lib <
521  doc: "B"
522  version: "1.0.0"
523  depends: { { lib: "a" } }
524>
525
526B: Obj { tag }
527"#,
528        )
529        .unwrap();
530
531        let mut ns = DefNamespace::new();
532        let result = load_xeto_with_deps(&[lib_a, lib_b], &mut ns);
533        assert!(result.is_err());
534        let err = result.unwrap_err().to_string();
535        assert!(err.contains("circular"), "expected circular error: {err}");
536    }
537
538    #[test]
539    fn load_with_deps_duplicate_name() {
540        let tmp = tempfile::tempdir().unwrap();
541
542        // Two dirs that both resolve to lib name "samename" (via dir name, no pragma)
543        let dir1 = tmp.path().join("samename");
544        std::fs::create_dir(&dir1).unwrap();
545        std::fs::write(dir1.join("a.xeto"), "Foo: Obj { x }").unwrap();
546
547        // Use a pragma with name: "samename" to produce the duplicate
548        let dir2 = tmp.path().join("other");
549        std::fs::create_dir(&dir2).unwrap();
550        std::fs::write(
551            dir2.join("lib.xeto"),
552            "pragma: Lib < name: \"samename\", version: \"1.0.0\" >\nBar: Obj { y }",
553        )
554        .unwrap();
555
556        let mut ns = DefNamespace::new();
557        let result = load_xeto_with_deps(&[dir1, dir2], &mut ns);
558        assert!(result.is_err());
559        assert!(result.unwrap_err().to_string().contains("duplicate"));
560    }
561}