Skip to main content

ontologos_parser/
load.rs

1use std::path::{Component, Path, PathBuf};
2
3use ontologos_core::Ontology;
4
5use crate::limits::ParseLimits;
6use crate::map::map_to_core;
7use crate::read::{read_horned_owl, sniff_file_header};
8use crate::{
9    detect_format, detect_format_from_bytes, detect_functional_from_bytes,
10    detect_turtle_from_bytes, Error, Format, Result,
11};
12
13/// Resolve and validate a path before loading an ontology file.
14pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
15    let normalized = normalize_path(path)?;
16
17    if let Some(base) = base {
18        let base_normalized = normalize_path(base)?;
19        if !normalized.starts_with(&base_normalized) {
20            return Err(Error::Parse(format!(
21                "path {} escapes allowed base {}",
22                normalized.display(),
23                base_normalized.display()
24            )));
25        }
26    }
27
28    Ok(normalized)
29}
30
31/// Load an ontology from a validated file path.
32pub fn load_ontology(path: &Path) -> Result<Ontology> {
33    load_ontology_with_limits(path, ParseLimits::default())
34}
35
36/// Load an ontology constrained to stay under `base` (untrusted uploads).
37pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
38    load_ontology_with_limits_and_base(path, ParseLimits::default(), Some(base))
39}
40
41/// Load an ontology with custom [`ParseLimits`].
42pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
43    load_ontology_with_limits_and_base(path, limits, None)
44}
45
46/// Load an ontology with custom limits and optional sandbox base directory.
47pub fn load_ontology_with_limits_and_base(
48    path: &Path,
49    limits: ParseLimits,
50    base: Option<&Path>,
51) -> Result<Ontology> {
52    let validated = validate_load_path(path, base)?;
53    if !validated.is_file() {
54        return Err(Error::Parse(format!("not a file: {}", validated.display())));
55    }
56
57    let format = detect_format_with_sniff(&validated)?;
58    let set_ontology = read_horned_owl(&validated, format, limits)?;
59    let (mut ontology, report) = map_to_core(&set_ontology, limits)?;
60    ontology.set_parse_meta(report.into_meta());
61    Ok(ontology)
62}
63
64fn detect_format_with_sniff(path: &Path) -> Result<Format> {
65    if let Some(format) = detect_format(path) {
66        return Ok(format);
67    }
68
69    let header = sniff_file_header(path, 4096)?;
70    if let Some(format) = detect_format_from_bytes(&header) {
71        return Ok(format);
72    }
73    if detect_turtle_from_bytes(&header) {
74        return Ok(Format::Turtle);
75    }
76    if detect_functional_from_bytes(&header) {
77        return Ok(Format::Functional);
78    }
79
80    Err(Error::UnsupportedFormat(format!(
81        "could not detect OWL/RDF format for {}",
82        path.display()
83    )))
84}
85
86fn normalize_path(path: &Path) -> Result<PathBuf> {
87    let base = if path.is_absolute() {
88        PathBuf::new()
89    } else {
90        std::env::current_dir().map_err(|e| Error::Parse(e.to_string()))?
91    };
92
93    let mut normalized = base;
94    for component in path.components() {
95        match component {
96            Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
97            Component::CurDir => {}
98            Component::ParentDir => {
99                if !normalized.pop() {
100                    return Err(Error::Parse("path escapes beyond filesystem root".into()));
101                }
102            }
103            Component::Normal(part) => normalized.push(part),
104        }
105    }
106
107    if normalized.exists() {
108        normalized = normalized
109            .canonicalize()
110            .map_err(|e| Error::Parse(e.to_string()))?;
111    }
112
113    Ok(normalized)
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use std::path::Path;
120
121    #[test]
122    fn rejects_path_traversal_outside_base() {
123        let base = std::env::current_dir().expect("cwd");
124        let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
125            .expect_err("traversal");
126        assert!(matches!(err, Error::Parse(_)));
127    }
128}