1use std::fs::{File, OpenOptions};
2use std::io::{Read, Seek, SeekFrom};
3use std::path::{Component, Path, PathBuf};
4
5use ontologos_core::Ontology;
6
7use crate::limits::ParseLimits;
8use crate::map::map_to_core;
9use crate::read::{read_horned_owl_from_reader, sniff_and_rewind};
10use crate::{
11 detect_format, detect_format_from_bytes, detect_functional_from_bytes,
12 detect_turtle_from_bytes, Error, Format, Result,
13};
14
15#[cfg(target_os = "linux")]
16const O_NOFOLLOW: i32 = 0o100_000;
17#[cfg(target_os = "macos")]
18const O_NOFOLLOW: i32 = 0x0000_0040;
19#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))]
20const O_NOFOLLOW: i32 = 0;
21
22pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
24 let normalized = normalize_path(path)?;
25
26 if let Some(base) = base {
27 let base_normalized = normalize_path(base)?;
28 if !path_is_under_base(&normalized, &base_normalized) {
29 return Err(Error::Parse(format!(
30 "path {} escapes allowed base {}",
31 normalized.display(),
32 base_normalized.display()
33 )));
34 }
35 }
36
37 Ok(normalized)
38}
39
40pub fn load_ontology(path: &Path) -> Result<Ontology> {
42 load_ontology_with_limits(path, ParseLimits::default())
43}
44
45pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
47 load_ontology_with_limits_and_base(path, ParseLimits::default(), Some(base))
48}
49
50pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
52 load_ontology_with_limits_and_base(path, limits, None)
53}
54
55pub fn load_ontology_with_limits_and_base(
57 path: &Path,
58 limits: ParseLimits,
59 base: Option<&Path>,
60) -> Result<Ontology> {
61 let validated = validate_load_path(path, base)?;
62 if !validated.is_file() {
63 return Err(Error::Parse(format!("not a file: {}", validated.display())));
64 }
65
66 let mut file = open_for_load(&validated, base)?;
67 let file_len = file
68 .metadata()
69 .map_err(|e| Error::Parse(e.to_string()))?
70 .len();
71 if file_len as usize > limits.max_file_bytes {
72 return Err(Error::Parse(format!(
73 "file size {file_len} exceeds limit of {} bytes",
74 limits.max_file_bytes
75 )));
76 }
77 let format = detect_format_with_sniff(path, &mut file)?;
78 let set_ontology = read_horned_owl_from_reader(&mut file, format, limits)?;
79 let (mut ontology, report) = map_to_core(&set_ontology, limits)?;
80 ontology.set_parse_meta(report.into_meta());
81 Ok(ontology)
82}
83
84fn open_for_load(path: &Path, base: Option<&Path>) -> Result<File> {
85 let pre_meta = std::fs::symlink_metadata(path).map_err(|e| Error::Parse(e.to_string()))?;
86 let file = open_readonly_nofollow(path)?;
87 if let Some(base) = base {
88 verify_opened_under_base(&file, base, path, &pre_meta)?;
89 }
90 Ok(file)
91}
92
93fn open_readonly_nofollow(path: &Path) -> Result<File> {
94 #[cfg(unix)]
95 {
96 use std::os::unix::fs::OpenOptionsExt;
97 OpenOptions::new()
98 .read(true)
99 .custom_flags(O_NOFOLLOW)
100 .open(path)
101 .map_err(|e| Error::Parse(e.to_string()))
102 }
103 #[cfg(not(unix))]
104 {
105 File::open(path).map_err(|e| Error::Parse(e.to_string()))
106 }
107}
108
109fn verify_opened_under_base(
110 file: &File,
111 base: &Path,
112 validated: &Path,
113 pre_meta: &std::fs::Metadata,
114) -> Result<()> {
115 #[cfg(unix)]
116 use std::os::unix::fs::MetadataExt;
117
118 let file_meta = file.metadata().map_err(|e| Error::Parse(e.to_string()))?;
119 #[cfg(unix)]
120 if pre_meta.dev() != file_meta.dev() || pre_meta.ino() != file_meta.ino() {
121 return Err(Error::Parse(
122 "ontology path changed between validation and open".into(),
123 ));
124 }
125 #[cfg(not(unix))]
126 let _ = (pre_meta, file_meta);
127
128 let base_normalized = normalize_path(base)?;
129 let base_canon = base_normalized
130 .canonicalize()
131 .map_err(|e| Error::Parse(e.to_string()))?;
132
133 if let Ok(opened) = opened_path(file) {
134 let opened_canon = opened
135 .canonicalize()
136 .map_err(|e| Error::Parse(e.to_string()))?;
137 if !path_is_under_base(&opened_canon, &base_canon) {
138 return Err(Error::Parse(format!(
139 "opened file {} escapes allowed base {}",
140 opened_canon.display(),
141 base_canon.display()
142 )));
143 }
144 return Ok(());
145 }
146
147 let validated_canon = validated
148 .canonicalize()
149 .map_err(|e| Error::Parse(e.to_string()))?;
150 if !path_is_under_base(&validated_canon, &base_canon) {
151 return Err(Error::Parse(format!(
152 "path {} escapes allowed base {}",
153 validated_canon.display(),
154 base_canon.display()
155 )));
156 }
157 Ok(())
158}
159
160#[cfg(target_os = "linux")]
161fn opened_path(file: &File) -> Result<PathBuf> {
162 use std::os::unix::io::AsRawFd;
163 let fd = file.as_raw_fd();
164 std::fs::read_link(format!("/proc/self/fd/{fd}")).map_err(|e| Error::Parse(e.to_string()))
165}
166
167#[cfg(target_os = "macos")]
168fn opened_path(file: &File) -> Result<PathBuf> {
169 use std::ffi::CStr;
170 use std::os::unix::io::AsRawFd;
171
172 const F_GETPATH: i32 = 50;
173 let fd = file.as_raw_fd();
174 let mut buf = [0u8; 1024];
175 let rc = unsafe { libc::fcntl(fd, F_GETPATH, buf.as_mut_ptr()) };
176 if rc == -1 {
177 return Err(Error::Parse("fcntl(F_GETPATH) failed".into()));
178 }
179 let cstr = CStr::from_bytes_until_nul(&buf).map_err(|e| Error::Parse(e.to_string()))?;
180 Ok(PathBuf::from(cstr.to_string_lossy().into_owned()))
181}
182
183#[cfg(not(any(target_os = "linux", target_os = "macos")))]
184fn opened_path(_file: &File) -> Result<PathBuf> {
185 Err(Error::Parse("fd path resolution unavailable".into()))
186}
187
188fn detect_format_with_sniff(path: &Path, reader: &mut (impl Read + Seek)) -> Result<Format> {
189 if let Some(format) = detect_format(path) {
190 reader
191 .seek(SeekFrom::Start(0))
192 .map_err(|e| Error::Parse(e.to_string()))?;
193 return Ok(format);
194 }
195
196 let header = sniff_and_rewind(reader, 4096)?;
197 if let Some(format) = detect_format_from_bytes(&header) {
198 return Ok(format);
199 }
200 if detect_turtle_from_bytes(&header) {
201 return Ok(Format::Turtle);
202 }
203 if detect_functional_from_bytes(&header) {
204 return Ok(Format::Functional);
205 }
206
207 Err(Error::UnsupportedFormat(format!(
208 "could not detect OWL/RDF format for {}",
209 path.display()
210 )))
211}
212
213fn normalize_path(path: &Path) -> Result<PathBuf> {
214 let base = if path.is_absolute() {
215 PathBuf::new()
216 } else {
217 std::env::current_dir().map_err(|e| Error::Parse(e.to_string()))?
218 };
219
220 let mut normalized = base;
221 for component in path.components() {
222 match component {
223 Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
224 Component::CurDir => {}
225 Component::ParentDir => {
226 if !normalized.pop() {
227 return Err(Error::Parse("path escapes beyond filesystem root".into()));
228 }
229 }
230 Component::Normal(part) => normalized.push(part),
231 }
232 }
233
234 if normalized.exists() {
235 normalized = normalized
236 .canonicalize()
237 .map_err(|e| Error::Parse(e.to_string()))?;
238 }
239
240 Ok(normalized)
241}
242
243fn path_is_under_base(path: &Path, base: &Path) -> bool {
245 let mut path_iter = path.components();
246 for base_comp in base.components() {
247 match path_iter.next() {
248 Some(path_comp) if path_comp == base_comp => {}
249 _ => return false,
250 }
251 }
252 true
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258 use std::path::Path;
259
260 #[test]
261 fn rejects_path_traversal_outside_base() {
262 let base = std::env::current_dir().expect("cwd");
263 let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
264 .expect_err("traversal");
265 assert!(matches!(err, Error::Parse(_)));
266 }
267
268 #[test]
269 fn rejects_path_prefix_bypass() {
270 let parent = std::env::temp_dir();
271 let base = parent.join("ontologos_uploads_base");
272 let evil = parent.join("ontologos_uploads_base_evil");
273 std::fs::create_dir_all(&base).expect("create base");
274 std::fs::create_dir_all(&evil).expect("create evil sibling");
275 let file = evil.join("secret.owl");
276 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
277
278 let err = validate_load_path(&file, Some(&base)).expect_err("prefix bypass");
279 assert!(matches!(err, Error::Parse(_)));
280
281 let _ = std::fs::remove_file(&file);
282 let _ = std::fs::remove_dir(&evil);
283 let _ = std::fs::remove_dir(&base);
284 }
285
286 #[test]
287 fn path_is_under_base_accepts_nested_file() {
288 let parent = std::env::temp_dir();
289 let base = parent.join("ontologos_nested_base");
290 let nested = base.join("nested");
291 std::fs::create_dir_all(&nested).expect("create nested");
292 let file = nested.join("ontology.owl");
293 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
294
295 let validated = validate_load_path(&file, Some(&base)).expect("nested file under base");
296 assert!(path_is_under_base(
297 &validated,
298 &base.canonicalize().expect("canonicalize base")
299 ));
300
301 let _ = std::fs::remove_file(&file);
302 let _ = std::fs::remove_dir(&nested);
303 let _ = std::fs::remove_dir(&base);
304 }
305
306 #[cfg(unix)]
307 #[test]
308 fn sandboxed_load_does_not_follow_symlink_to_outside_file() {
309 use std::os::unix::fs::symlink;
310
311 let parent = std::env::temp_dir();
312 let base = parent.join("ontologos_sandbox_base");
313 let outside = parent.join("ontologos_outside_secret.owl");
314 let link = base.join("ontology.owl");
315 std::fs::create_dir_all(&base).expect("create base");
316 std::fs::write(&outside, b"OUTSIDE_SECRET_CONTENT").expect("write outside");
317
318 symlink(&outside, &link).expect("symlink");
319
320 let err = load_ontology_in(&base, &link).expect_err("symlink escape");
321 assert!(matches!(err, Error::Parse(_) | Error::UnsupportedFormat(_)));
322
323 let _ = std::fs::remove_file(&link);
324 let _ = std::fs::remove_file(&outside);
325 let _ = std::fs::remove_dir(&base);
326 }
327}