1use std::path::{Component, Path, PathBuf};
2
3use ontologos_core::Ontology;
4
5use crate::limits::ParseLimits;
6use crate::map::map_to_core;
7use crate::read::{read_horned_owl, sniff_file_header};
8use crate::{
9 detect_format, detect_format_from_bytes, detect_functional_from_bytes,
10 detect_turtle_from_bytes, Error, Format, Result,
11};
12
13pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
15 let normalized = normalize_path(path)?;
16
17 if let Some(base) = base {
18 let base_normalized = normalize_path(base)?;
19 if !path_is_under_base(&normalized, &base_normalized) {
20 return Err(Error::Parse(format!(
21 "path {} escapes allowed base {}",
22 normalized.display(),
23 base_normalized.display()
24 )));
25 }
26 }
27
28 Ok(normalized)
29}
30
31pub fn load_ontology(path: &Path) -> Result<Ontology> {
33 load_ontology_with_limits(path, ParseLimits::default())
34}
35
36pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
38 load_ontology_with_limits_and_base(path, ParseLimits::default(), Some(base))
39}
40
41pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
43 load_ontology_with_limits_and_base(path, limits, None)
44}
45
46pub fn load_ontology_with_limits_and_base(
48 path: &Path,
49 limits: ParseLimits,
50 base: Option<&Path>,
51) -> Result<Ontology> {
52 let validated = validate_load_path(path, base)?;
53 if !validated.is_file() {
54 return Err(Error::Parse(format!("not a file: {}", validated.display())));
55 }
56
57 let format = detect_format_with_sniff(&validated)?;
58 let set_ontology = read_horned_owl(&validated, format, limits)?;
59 let (mut ontology, report) = map_to_core(&set_ontology, limits)?;
60 ontology.set_parse_meta(report.into_meta());
61 Ok(ontology)
62}
63
64fn detect_format_with_sniff(path: &Path) -> Result<Format> {
65 if let Some(format) = detect_format(path) {
66 return Ok(format);
67 }
68
69 let header = sniff_file_header(path, 4096)?;
70 if let Some(format) = detect_format_from_bytes(&header) {
71 return Ok(format);
72 }
73 if detect_turtle_from_bytes(&header) {
74 return Ok(Format::Turtle);
75 }
76 if detect_functional_from_bytes(&header) {
77 return Ok(Format::Functional);
78 }
79
80 Err(Error::UnsupportedFormat(format!(
81 "could not detect OWL/RDF format for {}",
82 path.display()
83 )))
84}
85
86fn normalize_path(path: &Path) -> Result<PathBuf> {
87 let base = if path.is_absolute() {
88 PathBuf::new()
89 } else {
90 std::env::current_dir().map_err(|e| Error::Parse(e.to_string()))?
91 };
92
93 let mut normalized = base;
94 for component in path.components() {
95 match component {
96 Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
97 Component::CurDir => {}
98 Component::ParentDir => {
99 if !normalized.pop() {
100 return Err(Error::Parse("path escapes beyond filesystem root".into()));
101 }
102 }
103 Component::Normal(part) => normalized.push(part),
104 }
105 }
106
107 if normalized.exists() {
108 normalized = normalized
109 .canonicalize()
110 .map_err(|e| Error::Parse(e.to_string()))?;
111 }
112
113 Ok(normalized)
114}
115
116fn path_is_under_base(path: &Path, base: &Path) -> bool {
118 let mut path_iter = path.components();
119 for base_comp in base.components() {
120 match path_iter.next() {
121 Some(path_comp) if path_comp == base_comp => {}
122 _ => return false,
123 }
124 }
125 true
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131 use std::path::Path;
132
133 #[test]
134 fn rejects_path_traversal_outside_base() {
135 let base = std::env::current_dir().expect("cwd");
136 let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
137 .expect_err("traversal");
138 assert!(matches!(err, Error::Parse(_)));
139 }
140
141 #[test]
142 fn rejects_path_prefix_bypass() {
143 let parent = std::env::temp_dir();
144 let base = parent.join("ontologos_uploads_base");
145 let evil = parent.join("ontologos_uploads_base_evil");
146 std::fs::create_dir_all(&base).expect("create base");
147 std::fs::create_dir_all(&evil).expect("create evil sibling");
148 let file = evil.join("secret.owl");
149 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
150
151 let err = validate_load_path(&file, Some(&base)).expect_err("prefix bypass");
152 assert!(matches!(err, Error::Parse(_)));
153
154 let _ = std::fs::remove_file(&file);
155 let _ = std::fs::remove_dir(&evil);
156 let _ = std::fs::remove_dir(&base);
157 }
158
159 #[test]
160 fn path_is_under_base_accepts_nested_file() {
161 let parent = std::env::temp_dir();
162 let base = parent.join("ontologos_nested_base");
163 let nested = base.join("nested");
164 std::fs::create_dir_all(&nested).expect("create nested");
165 let file = nested.join("ontology.owl");
166 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
167
168 let validated = validate_load_path(&file, Some(&base)).expect("nested file under base");
169 assert!(path_is_under_base(
170 &validated,
171 &base.canonicalize().expect("canonicalize base")
172 ));
173
174 let _ = std::fs::remove_file(&file);
175 let _ = std::fs::remove_dir(&nested);
176 let _ = std::fs::remove_dir(&base);
177 }
178}