1use std::path::{Component, Path, PathBuf};
2
3use ontologos_core::Ontology;
4
5use crate::limits::ParseLimits;
6use crate::map::map_to_core;
7use crate::read::{read_horned_owl, sniff_file_header};
8use crate::{
9 detect_format, detect_format_from_bytes, detect_functional_from_bytes,
10 detect_turtle_from_bytes, Error, Format, Result,
11};
12
13pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
15 let normalized = normalize_path(path)?;
16
17 if let Some(base) = base {
18 let base_normalized = normalize_path(base)?;
19 if !normalized.starts_with(&base_normalized) {
20 return Err(Error::Parse(format!(
21 "path {} escapes allowed base {}",
22 normalized.display(),
23 base_normalized.display()
24 )));
25 }
26 }
27
28 Ok(normalized)
29}
30
31pub fn load_ontology(path: &Path) -> Result<Ontology> {
33 load_ontology_with_limits(path, ParseLimits::default())
34}
35
36pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
38 load_ontology_with_limits_and_base(path, ParseLimits::default(), Some(base))
39}
40
41pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
43 load_ontology_with_limits_and_base(path, limits, None)
44}
45
46pub fn load_ontology_with_limits_and_base(
48 path: &Path,
49 limits: ParseLimits,
50 base: Option<&Path>,
51) -> Result<Ontology> {
52 let validated = validate_load_path(path, base)?;
53 if !validated.is_file() {
54 return Err(Error::Parse(format!("not a file: {}", validated.display())));
55 }
56
57 let format = detect_format_with_sniff(&validated)?;
58 let set_ontology = read_horned_owl(&validated, format, limits)?;
59 let (mut ontology, report) = map_to_core(&set_ontology, limits)?;
60 ontology.set_parse_meta(report.into_meta());
61 Ok(ontology)
62}
63
64fn detect_format_with_sniff(path: &Path) -> Result<Format> {
65 if let Some(format) = detect_format(path) {
66 return Ok(format);
67 }
68
69 let header = sniff_file_header(path, 4096)?;
70 if let Some(format) = detect_format_from_bytes(&header) {
71 return Ok(format);
72 }
73 if detect_turtle_from_bytes(&header) {
74 return Ok(Format::Turtle);
75 }
76 if detect_functional_from_bytes(&header) {
77 return Ok(Format::Functional);
78 }
79
80 Err(Error::UnsupportedFormat(format!(
81 "could not detect OWL/RDF format for {}",
82 path.display()
83 )))
84}
85
86fn normalize_path(path: &Path) -> Result<PathBuf> {
87 let base = if path.is_absolute() {
88 PathBuf::new()
89 } else {
90 std::env::current_dir().map_err(|e| Error::Parse(e.to_string()))?
91 };
92
93 let mut normalized = base;
94 for component in path.components() {
95 match component {
96 Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
97 Component::CurDir => {}
98 Component::ParentDir => {
99 if !normalized.pop() {
100 return Err(Error::Parse("path escapes beyond filesystem root".into()));
101 }
102 }
103 Component::Normal(part) => normalized.push(part),
104 }
105 }
106
107 if normalized.exists() {
108 normalized = normalized
109 .canonicalize()
110 .map_err(|e| Error::Parse(e.to_string()))?;
111 }
112
113 Ok(normalized)
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use std::path::Path;
120
121 #[test]
122 fn rejects_path_traversal_outside_base() {
123 let base = std::env::current_dir().expect("cwd");
124 let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
125 .expect_err("traversal");
126 assert!(matches!(err, Error::Parse(_)));
127 }
128}