Skip to main content

ontologos_parser/
load.rs

1use std::path::{Component, Path, PathBuf};
2
3use ontologos_core::Ontology;
4
5use crate::limits::ParseLimits;
6use crate::map::map_to_core;
7use crate::read::{read_horned_owl, sniff_file_header};
8use crate::{
9    detect_format, detect_format_from_bytes, detect_functional_from_bytes,
10    detect_turtle_from_bytes, Error, Format, Result,
11};
12
13/// Resolve and validate a path before loading an ontology file.
14pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
15    let normalized = normalize_path(path)?;
16
17    if let Some(base) = base {
18        let base_normalized = normalize_path(base)?;
19        if !path_is_under_base(&normalized, &base_normalized) {
20            return Err(Error::Parse(format!(
21                "path {} escapes allowed base {}",
22                normalized.display(),
23                base_normalized.display()
24            )));
25        }
26    }
27
28    Ok(normalized)
29}
30
31/// Load an ontology from a validated file path.
32pub fn load_ontology(path: &Path) -> Result<Ontology> {
33    load_ontology_with_limits(path, ParseLimits::default())
34}
35
36/// Load an ontology constrained to stay under `base` (untrusted uploads).
37pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
38    load_ontology_with_limits_and_base(path, ParseLimits::default(), Some(base))
39}
40
41/// Load an ontology with custom [`ParseLimits`].
42pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
43    load_ontology_with_limits_and_base(path, limits, None)
44}
45
46/// Load an ontology with custom limits and optional sandbox base directory.
47pub fn load_ontology_with_limits_and_base(
48    path: &Path,
49    limits: ParseLimits,
50    base: Option<&Path>,
51) -> Result<Ontology> {
52    let validated = validate_load_path(path, base)?;
53    if !validated.is_file() {
54        return Err(Error::Parse(format!("not a file: {}", validated.display())));
55    }
56
57    let format = detect_format_with_sniff(&validated)?;
58    let set_ontology = read_horned_owl(&validated, format, limits)?;
59    let (mut ontology, report) = map_to_core(&set_ontology, limits)?;
60    ontology.set_parse_meta(report.into_meta());
61    Ok(ontology)
62}
63
64fn detect_format_with_sniff(path: &Path) -> Result<Format> {
65    if let Some(format) = detect_format(path) {
66        return Ok(format);
67    }
68
69    let header = sniff_file_header(path, 4096)?;
70    if let Some(format) = detect_format_from_bytes(&header) {
71        return Ok(format);
72    }
73    if detect_turtle_from_bytes(&header) {
74        return Ok(Format::Turtle);
75    }
76    if detect_functional_from_bytes(&header) {
77        return Ok(Format::Functional);
78    }
79
80    Err(Error::UnsupportedFormat(format!(
81        "could not detect OWL/RDF format for {}",
82        path.display()
83    )))
84}
85
86fn normalize_path(path: &Path) -> Result<PathBuf> {
87    let base = if path.is_absolute() {
88        PathBuf::new()
89    } else {
90        std::env::current_dir().map_err(|e| Error::Parse(e.to_string()))?
91    };
92
93    let mut normalized = base;
94    for component in path.components() {
95        match component {
96            Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
97            Component::CurDir => {}
98            Component::ParentDir => {
99                if !normalized.pop() {
100                    return Err(Error::Parse("path escapes beyond filesystem root".into()));
101                }
102            }
103            Component::Normal(part) => normalized.push(part),
104        }
105    }
106
107    if normalized.exists() {
108        normalized = normalized
109            .canonicalize()
110            .map_err(|e| Error::Parse(e.to_string()))?;
111    }
112
113    Ok(normalized)
114}
115
116/// True when `path` is the same as or nested under `base` (path-component wise).
117fn path_is_under_base(path: &Path, base: &Path) -> bool {
118    let mut path_iter = path.components();
119    for base_comp in base.components() {
120        match path_iter.next() {
121            Some(path_comp) if path_comp == base_comp => {}
122            _ => return false,
123        }
124    }
125    true
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use std::path::Path;
132
133    #[test]
134    fn rejects_path_traversal_outside_base() {
135        let base = std::env::current_dir().expect("cwd");
136        let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
137            .expect_err("traversal");
138        assert!(matches!(err, Error::Parse(_)));
139    }
140
141    #[test]
142    fn rejects_path_prefix_bypass() {
143        let parent = std::env::temp_dir();
144        let base = parent.join("ontologos_uploads_base");
145        let evil = parent.join("ontologos_uploads_base_evil");
146        std::fs::create_dir_all(&base).expect("create base");
147        std::fs::create_dir_all(&evil).expect("create evil sibling");
148        let file = evil.join("secret.owl");
149        std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
150
151        let err = validate_load_path(&file, Some(&base)).expect_err("prefix bypass");
152        assert!(matches!(err, Error::Parse(_)));
153
154        let _ = std::fs::remove_file(&file);
155        let _ = std::fs::remove_dir(&evil);
156        let _ = std::fs::remove_dir(&base);
157    }
158
159    #[test]
160    fn path_is_under_base_accepts_nested_file() {
161        let parent = std::env::temp_dir();
162        let base = parent.join("ontologos_nested_base");
163        let nested = base.join("nested");
164        std::fs::create_dir_all(&nested).expect("create nested");
165        let file = nested.join("ontology.owl");
166        std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
167
168        let validated = validate_load_path(&file, Some(&base)).expect("nested file under base");
169        assert!(path_is_under_base(
170            &validated,
171            &base.canonicalize().expect("canonicalize base")
172        ));
173
174        let _ = std::fs::remove_file(&file);
175        let _ = std::fs::remove_dir(&nested);
176        let _ = std::fs::remove_dir(&base);
177    }
178}