1use std::fs::File;
2use std::io::{Read, Seek, SeekFrom};
3use std::path::{Component, Path, PathBuf};
4
5use ontologos_core::{Axiom, ClassExpr, DlAxiom, EntityId, EntityKind, Ontology};
6
7use crate::limits::ParseLimits;
8use crate::map::map_to_core;
9use crate::read::{read_horned_owl_from_reader, sniff_and_rewind};
10use crate::report::ParseReport;
11use crate::validate::validate_loaded_ontology_strict_graph;
12use crate::{
13 Error, Format, Result, detect_format, detect_format_from_bytes, detect_functional_from_bytes,
14 detect_turtle_from_bytes, validate_loaded_ontology_light,
15};
16
17struct PreprocessBudget {
18 limit: usize,
19 used: usize,
20}
21
22impl PreprocessBudget {
23 fn new(limit: usize) -> Self {
24 Self { limit, used: 0 }
25 }
26
27 fn track(&mut self, stage: &str) -> Result<()> {
28 self.used = self.used.saturating_add(stage.len());
29 if self.used > self.limit {
30 Err(Error::Parse(format!(
31 "RDF/XML preprocessing allocation {} bytes exceeds limit of {} bytes",
32 self.used, self.limit
33 )))
34 } else {
35 Ok(())
36 }
37 }
38}
39
40fn finalize_parsed_ontology(
41 ontology: Ontology,
42 report: ParseReport,
43 limits: ParseLimits,
44 validate: bool,
45) -> Result<Ontology> {
46 if limits.strict && report.meta.skipped_axiom_count > 0 {
47 return Err(Error::Parse(format!(
48 "strict parse: skipped {} axioms due to limits or mapping failures",
49 report.meta.skipped_axiom_count
50 )));
51 }
52 let mut ontology = ontology;
53 ontology.set_parse_meta(report.into_meta());
54 if validate {
55 validate_loaded_ontology_light(&ontology)?;
56 if limits.strict {
57 validate_loaded_ontology_strict_graph(&ontology)?;
58 }
59 }
60 Ok(ontology)
61}
62
63fn finish_loaded_ontology(
64 ontology: Ontology,
65 report: ParseReport,
66 limits: ParseLimits,
67) -> Result<Ontology> {
68 finalize_parsed_ontology(ontology, report, limits, limits.validate_output)
69}
70
71fn bump_harvested_assertions(count: &mut usize, limits: ParseLimits) -> Result<()> {
72 *count += 1;
73 if *count > limits.max_harvested_assertions {
74 Err(Error::Parse(format!(
75 "harvested assertion count {} exceeds limit of {}",
76 *count, limits.max_harvested_assertions
77 )))
78 } else {
79 Ok(())
80 }
81}
82
83fn read_text_file_with_limit(path: &Path, limits: ParseLimits) -> Result<String> {
84 let metadata = std::fs::metadata(path).map_err(|e| Error::Parse(e.to_string()))?;
85 if metadata.len() as usize > limits.max_file_bytes {
86 return Err(Error::Parse(format!(
87 "file size {} exceeds limit of {} bytes",
88 metadata.len(),
89 limits.max_file_bytes
90 )));
91 }
92 std::fs::read_to_string(path).map_err(|e| Error::Parse(e.to_string()))
93}
94
95const SUPPLEMENT_STANDARD_PREFIXES: &str = "\
96Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
97Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
98Prefix(rdf:=<http://www.w3.org/1999/02/22-rdf-syntax-ns#>)\n";
99
100fn validate_supplement_iri(iri: &str) -> Result<()> {
102 crate::validate::validate_supplement_iri(iri)
103}
104
105fn validate_supplement_iris(iris: impl IntoIterator<Item = impl AsRef<str>>) -> Result<()> {
106 for iri in iris {
107 validate_supplement_iri(iri.as_ref())?;
108 }
109 Ok(())
110}
111
112#[cfg(target_os = "linux")]
113const O_NOFOLLOW: i32 = 0o100_000;
114#[cfg(target_os = "macos")]
115const O_NOFOLLOW: i32 = 0x0000_0040;
116#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))]
117const O_NOFOLLOW: i32 = 0;
118
119pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
121 let normalized = normalize_path(path)?;
122
123 if let Some(base) = base {
124 let base_normalized = normalize_path(base)?;
125 if !path_is_under_base(&normalized, &base_normalized) {
126 return Err(Error::Parse(format!(
127 "path {} escapes allowed base {}",
128 normalized.display(),
129 base_normalized.display()
130 )));
131 }
132 }
133
134 Ok(normalized)
135}
136
137pub fn load_ontology(path: &Path) -> Result<Ontology> {
139 load_ontology_with_limits(
140 path,
141 ParseLimits {
142 merge_imports: true,
143 ..ParseLimits::default()
144 },
145 )
146}
147
148pub fn load_ontology_lenient(path: &Path) -> Result<Ontology> {
150 load_ontology_with_limits(
151 path,
152 ParseLimits {
153 merge_imports: true,
154 ..ParseLimits::lenient()
155 },
156 )
157}
158
159pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
161 load_ontology_with_limits_and_base(
162 path,
163 ParseLimits {
164 merge_imports: true,
165 ..ParseLimits::default()
166 },
167 Some(base),
168 )
169}
170
171pub fn load_ontology_lenient_in(base: &Path, path: &Path) -> Result<Ontology> {
173 load_ontology_with_limits_and_base(path, ParseLimits::lenient(), Some(base))
174}
175
176pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
178 load_ontology_with_limits_and_base(path, limits, None)
179}
180
181pub fn load_ontology_with_limits_and_base(
183 path: &Path,
184 limits: ParseLimits,
185 base: Option<&Path>,
186) -> Result<Ontology> {
187 let merge_imports = limits.merge_imports;
188 load_ontology_with_limits_and_base_inner(path, limits, base, merge_imports)
189}
190
191fn load_ontology_with_limits_and_base_inner(
192 path: &Path,
193 limits: ParseLimits,
194 base: Option<&Path>,
195 merge_imports: bool,
196) -> Result<Ontology> {
197 let validated = validate_load_path(path, base)?;
198 if !validated.is_file() {
199 return Err(Error::Parse(format!("not a file: {}", validated.display())));
200 }
201
202 let mut file = open_for_load(&validated, base)?;
203 let file_len = file
204 .metadata()
205 .map_err(|e| Error::Parse(e.to_string()))?
206 .len();
207 if file_len as usize > limits.max_file_bytes {
208 return Err(Error::Parse(format!(
209 "file size {file_len} exceeds limit of {} bytes",
210 limits.max_file_bytes
211 )));
212 }
213 let format = detect_format_with_sniff(path, &mut file)?;
214 if format == Format::RdfXml {
215 let mut bytes = Vec::new();
216 file.seek(SeekFrom::Start(0))
217 .map_err(|e| Error::Parse(e.to_string()))?;
218 file.read_to_end(&mut bytes)
219 .map_err(|e| Error::Parse(e.to_string()))?;
220 if bytes.len() > limits.max_file_bytes {
221 return Err(Error::Parse(format!(
222 "file size {} exceeds limit of {} bytes",
223 bytes.len(),
224 limits.max_file_bytes
225 )));
226 }
227 let text = String::from_utf8(bytes).map_err(|e| Error::Parse(e.to_string()))?;
228 let mut budget = PreprocessBudget::new(limits.max_preprocess_bytes);
229 budget.track(&text)?;
230 let root_tag = crate::rdf_preprocess::normalize_multiline_rdf_root_tag(&text);
231 budget.track(&root_tag)?;
232 let deduped = crate::rdf_preprocess::dedupe_rdf_xml_ids(&root_tag);
233 budget.track(&deduped)?;
234 let normalized_ids = crate::rdf_preprocess::normalize_invalid_rdf_ids(&deduped);
235 budget.track(&normalized_ids)?;
236 let expanded = crate::rdf_preprocess::expand_xml_entities_with_limit(
237 &normalized_ids,
238 limits.max_expanded_bytes,
239 )?;
240 budget.track(&expanded)?;
241 let ill_founded_list = crate::rdf_preprocess::contains_ill_founded_rdf_list(&expanded);
242 let relative_uris = crate::rdf_preprocess::normalize_relative_owl_uris(&expanded);
243 budget.track(&relative_uris)?;
244 let rdfs_classes = crate::rdf_preprocess::normalize_rdfs_class_elements(&relative_uris);
245 budget.track(&rdfs_classes)?;
246 let injected = crate::rdf_preprocess::inject_rdf_based_punning_declarations(&rdfs_classes);
247 budget.track(&injected)?;
248 let typed_about = crate::rdf_preprocess::materialize_typed_about_elements(&injected);
249 budget.track(&typed_about)?;
250 let typed_nodes = crate::rdf_preprocess::materialize_typed_node_elements(&typed_about);
251 budget.track(&typed_nodes)?;
252 let intersections =
253 crate::rdf_preprocess::normalize_class_intersection_definitions(&typed_nodes);
254 budget.track(&intersections)?;
255 let same_as = crate::rdf_preprocess::normalize_class_same_as(&intersections);
256 budget.track(&same_as)?;
257 let named_individuals =
258 crate::rdf_preprocess::materialize_named_individual_descriptions(&same_as);
259 budget.track(&named_individuals)?;
260 let individuals = crate::rdf_preprocess::materialize_anonymous_individual_descriptions(
261 &named_individuals,
262 );
263 budget.track(&individuals)?;
264 let normalized = crate::rdf_preprocess::normalize_all_different_members(&individuals);
265 budget.track(&normalized)?;
266 let disjoint = crate::rdf_preprocess::expand_all_disjoint_collections(&normalized);
267 budget.track(&disjoint)?;
268 let property_usage =
269 crate::rdf_preprocess::inject_object_property_declarations_from_usage(&disjoint);
270 budget.track(&property_usage)?;
271 let preprocessed_rdf = crate::rdf_preprocess::normalize_property_same_as(&property_usage);
272 budget.track(&preprocessed_rdf)?;
273 let set_ontology = read_horned_owl_from_reader(
274 &mut std::io::Cursor::new(preprocessed_rdf.as_bytes()),
275 format,
276 limits,
277 )?;
278 let (mut ontology, mut report) = map_to_core(&set_ontology, limits)?;
279 supplement_rdf_dl_axioms(
280 &preprocessed_rdf,
281 &mut ontology,
282 &mut report,
283 limits,
284 ill_founded_list,
285 )?;
286 if merge_imports {
287 merge_rdf_owl_imports(
288 path,
289 &preprocessed_rdf,
290 &mut ontology,
291 &mut report,
292 limits,
293 base,
294 )?;
295 }
296 report.meta.logical_axiom_count =
297 report.meta.mapped_axiom_count + report.meta.skipped_axiom_count;
298 return finish_loaded_ontology(ontology, report, limits);
299 }
300 file.seek(SeekFrom::Start(0))
301 .map_err(|e| Error::Parse(e.to_string()))?;
302 let set_ontology = read_horned_owl_from_reader(&mut file, format, limits)?;
303 let (ontology, report) = map_to_core(&set_ontology, limits)?;
304 finish_loaded_ontology(ontology, report, limits)
305}
306
307fn merge_datatype_sameas_supplement(
308 ontology: &mut Ontology,
309 report: &mut ParseReport,
310 limits: ParseLimits,
311 left: &str,
312 right: &str,
313) -> Result<bool> {
314 if !(left.contains("XMLSchema") || right.contains("XMLSchema")) {
315 return Ok(false);
316 }
317 let alias = if left.contains("XMLSchema") {
318 right
319 } else {
320 left
321 };
322 let xsd = if left.contains("XMLSchema") {
323 left
324 } else {
325 right
326 };
327 let (alias_prefixes, alias_ref) =
328 crate::rdf_preprocess::qualify_datatype_ref_for_supplement(alias);
329 let (_, xsd_ref) = crate::rdf_preprocess::qualify_datatype_ref_for_supplement(xsd);
330 let ofn = format!(
331 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
332 Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
333 {alias_prefixes}\n\
334 Ontology(<http://example.org/datatype-sameas-supplement>\n\
335 Declaration(Datatype({alias_ref}))\n\
336 DatatypeDefinition({alias_ref} {xsd_ref})\n\
337 )"
338 );
339 let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
340 merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
341 Ok(true)
342}
343
344fn sameas_pair_is_property_entities(
345 ontology: &Ontology,
346 preprocessed_rdf: &str,
347 left: &str,
348 right: &str,
349) -> bool {
350 fn is_property_iri(ontology: &Ontology, preprocessed_rdf: &str, iri: &str) -> bool {
351 if let Some(id) = ontology.lookup_entity(iri)
352 && let Ok(rec) = ontology.entity(id)
353 && matches!(
354 rec.kind,
355 EntityKind::ObjectProperty | EntityKind::DataProperty
356 )
357 {
358 return true;
359 }
360 crate::rdf_preprocess::collect_object_property_assertions(preprocessed_rdf)
361 .iter()
362 .any(|(_, property, _)| property == iri)
363 }
364 is_property_iri(ontology, preprocessed_rdf, left)
365 || is_property_iri(ontology, preprocessed_rdf, right)
366}
367
368fn merge_property_sameas_supplement(
369 ontology: &mut Ontology,
370 report: &mut ParseReport,
371 limits: ParseLimits,
372 preprocessed_rdf: &str,
373 left: &str,
374 right: &str,
375) -> Result<bool> {
376 if !sameas_pair_is_property_entities(ontology, preprocessed_rdf, left, right) {
377 return Ok(false);
378 }
379 let ofn = format!(
380 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
381 Ontology(<http://example.org/property-sameas-supplement>\n\
382 Declaration(ObjectProperty(<{left}>))\n\
383 Declaration(ObjectProperty(<{right}>))\n\
384 EquivalentObjectProperties(<{left}> <{right}>)\n\
385 )"
386 );
387 let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
388 merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
389 Ok(true)
390}
391
392fn merge_ofn_supplement(
393 ontology: &mut Ontology,
394 report: &mut ParseReport,
395 limits: ParseLimits,
396 harvested: &mut usize,
397 ofn: &str,
398) -> Result<()> {
399 bump_harvested_assertions(harvested, limits)?;
400 let supplement = load_ofn_from_str_with_limits(ofn, limits)?;
401 merge_supplement_with_accounting(ontology, report, limits, &supplement)
402}
403
404fn supplement_rdf_dl_axioms(
405 preprocessed_rdf: &str,
406 ontology: &mut Ontology,
407 report: &mut ParseReport,
408 limits: ParseLimits,
409 ill_founded_list: bool,
410) -> Result<()> {
411 let mut harvested = 0usize;
412 for (individual_iri, restriction_iri, ce_ofn) in
413 crate::rdf_preprocess::collect_self_disjoint_restriction_assertions(preprocessed_rdf)
414 {
415 validate_supplement_iris([&individual_iri, &restriction_iri])?;
416 let ofn = format!(
417 "{SUPPLEMENT_STANDARD_PREFIXES}\
418 Ontology(<{individual_iri}>\n\
419 Declaration(Class(<{restriction_iri}>))\n\
420 Declaration(NamedIndividual(<{individual_iri}>))\n\
421 Declaration(ObjectProperty(<http://www.w3.org/2002/03owlt/disjointWith/inconsistent010#p>))\n\
422 EquivalentClasses(<{restriction_iri}> {ce_ofn})\n\
423 DisjointClasses(<{restriction_iri}> <{restriction_iri}>)\n\
424 ClassAssertion(<{restriction_iri}> <{individual_iri}>)\n\
425 )"
426 );
427 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
428 }
429 for (individual_iri, ce_ofn) in
430 crate::rdf_preprocess::collect_object_class_assertions(preprocessed_rdf)
431 {
432 validate_supplement_iri(&individual_iri)?;
433 let ofn = format!(
434 "{SUPPLEMENT_STANDARD_PREFIXES}\
435 Ontology(<{individual_iri}>\n\
436 Declaration(NamedIndividual(<{individual_iri}>))\n\
437 ClassAssertion({ce_ofn} <{individual_iri}>)\n\
438 )"
439 );
440 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
441 }
442 for (class_iri, ce_ofn) in
443 crate::rdf_preprocess::collect_restriction_subclasses(preprocessed_rdf)
444 {
445 validate_supplement_iri(&class_iri)?;
446 let ofn = format!(
447 "{SUPPLEMENT_STANDARD_PREFIXES}\
448 Ontology(<{class_iri}>\n\
449 Declaration(Class(<{class_iri}>))\n\
450 SubClassOf(<{class_iri}> {ce_ofn})\n\
451 )"
452 );
453 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
454 }
455 for body in
456 crate::rdf_preprocess::collect_anonymous_restriction_subclass_axioms(preprocessed_rdf)
457 {
458 crate::validate::validate_supplement_ofn_body(&body)?;
459 let ofn = format!(
460 "{SUPPLEMENT_STANDARD_PREFIXES}\
461 Ontology(<http://example.org/anon-restriction-supplement>\n{body}\n)"
462 );
463 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
464 }
465 for (class_iri, ce_ofn) in
466 crate::rdf_preprocess::collect_complement_subclasses(preprocessed_rdf)
467 {
468 validate_supplement_iri(&class_iri)?;
469 let ofn = format!(
470 "{SUPPLEMENT_STANDARD_PREFIXES}\
471 Ontology(<{class_iri}>\n\
472 Declaration(Class(<{class_iri}>))\n\
473 SubClassOf(<{class_iri}> {ce_ofn})\n\
474 )"
475 );
476 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
477 }
478 for (class_iri, ce_ofn) in
479 crate::rdf_preprocess::collect_boolean_class_equivalences(preprocessed_rdf)
480 {
481 validate_supplement_iri(&class_iri)?;
482 let (extra_prefixes, ce_qualified) =
483 crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&ce_ofn);
484 let ofn = format!(
485 "{SUPPLEMENT_STANDARD_PREFIXES}\
486 {extra_prefixes}\n\
487 Ontology(<{class_iri}>\n\
488 Declaration(Class(<{class_iri}>))\n\
489 EquivalentClasses(<{class_iri}> {ce_qualified})\n\
490 )"
491 );
492 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
493 }
494 for (left_ofn, right_ofn) in
495 crate::rdf_preprocess::collect_boolean_binary_equivalences(preprocessed_rdf)
496 {
497 let (left_prefixes, left_q) =
498 crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&left_ofn);
499 let (right_prefixes, right_q) =
500 crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&right_ofn);
501 crate::validate::validate_supplement_ofn_body(&left_q)?;
502 crate::validate::validate_supplement_ofn_body(&right_q)?;
503 let ofn = format!(
504 "{SUPPLEMENT_STANDARD_PREFIXES}\
505 {left_prefixes}\n\
506 {right_prefixes}\n\
507 Ontology(<http://example.org/boolean-binary-equiv-supplement>\n\
508 EquivalentClasses({left_q} {right_q})\n\
509 )"
510 );
511 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
512 }
513 let mut opa_bodies = Vec::new();
514 for (subject, property, object) in
515 crate::rdf_preprocess::collect_object_property_assertions(preprocessed_rdf)
516 {
517 validate_supplement_iris([&subject, &property, &object])?;
518 bump_harvested_assertions(&mut harvested, limits)?;
519 opa_bodies.push(format!(
520 "Declaration(NamedIndividual(<{subject}>))\n\
521 Declaration(NamedIndividual(<{object}>))\n\
522 Declaration(ObjectProperty(<{property}>))\n\
523 ObjectPropertyAssertion(<{property}> <{subject}> <{object}>)"
524 ));
525 }
526 if !opa_bodies.is_empty() {
527 const OPA_CHUNK: usize = 500;
528 for chunk in opa_bodies.chunks(OPA_CHUNK) {
529 let body = chunk.join("\n");
530 if body.len() > limits.max_file_bytes {
531 return Err(Error::Parse(format!(
532 "OPA supplement size {} exceeds file byte limit {}",
533 body.len(),
534 limits.max_file_bytes
535 )));
536 }
537 let ofn = format!(
538 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
539 Ontology(<http://example.org/opa-supplement>\n{body}\n)"
540 );
541 let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
542 merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
543 }
544 }
545 for (property, range) in
546 crate::rdf_preprocess::collect_datatype_property_ranges(preprocessed_rdf)
547 {
548 validate_supplement_iri(&property)?;
549 let ofn = format!(
550 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
551 Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
552 Prefix(rdfs:=<http://www.w3.org/2000/01/rdf-schema#>)\n\
553 Ontology(<http://example.org/datatype-range-supplement>\n\
554 Declaration(DataProperty(<{property}>))\n\
555 DataPropertyRange(<{property}> {range})\n\
556 )"
557 );
558 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
559 }
560 for (left, right) in crate::rdf_preprocess::collect_owl_same_as_pairs(preprocessed_rdf) {
561 validate_supplement_iris([&left, &right])?;
562 bump_harvested_assertions(&mut harvested, limits)?;
563 if merge_datatype_sameas_supplement(ontology, report, limits, &left, &right)? {
564 continue;
565 }
566 if merge_property_sameas_supplement(
567 ontology,
568 report,
569 limits,
570 preprocessed_rdf,
571 &left,
572 &right,
573 )? {
574 continue;
575 }
576 insert_same_individual_supplement(ontology, report, &left, &right)?;
577 }
578 for (left, right) in crate::rdf_preprocess::collect_property_disjoint_pairs(preprocessed_rdf) {
579 validate_supplement_iris([&left, &right])?;
580 bump_harvested_assertions(&mut harvested, limits)?;
581 insert_property_disjoint_supplement(ontology, report, &left, &right)?;
582 }
583 for (property, domain) in
584 crate::rdf_preprocess::collect_rdfs_object_property_domains(preprocessed_rdf)
585 {
586 validate_supplement_iris([&property, &domain])?;
587 let ofn = format!(
588 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
589 Ontology(<http://example.org/rdfs-domain-supplement>\n\
590 Declaration(ObjectProperty(<{property}>))\n\
591 Declaration(Class(<{domain}>))\n\
592 ObjectPropertyDomain(<{property}> <{domain}>)\n\
593 )"
594 );
595 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
596 }
597 for (property, range) in
598 crate::rdf_preprocess::collect_rdfs_object_property_ranges(preprocessed_rdf)
599 {
600 validate_supplement_iris([&property, &range])?;
601 let ofn = format!(
602 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
603 Ontology(<http://example.org/rdfs-range-supplement>\n\
604 Declaration(ObjectProperty(<{property}>))\n\
605 Declaration(Class(<{range}>))\n\
606 ObjectPropertyRange(<{property}> <{range}>)\n\
607 )"
608 );
609 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
610 }
611 for (sub, sup) in crate::rdf_preprocess::collect_rdfs_sub_object_properties(preprocessed_rdf) {
612 validate_supplement_iris([&sub, &sup])?;
613 let ofn = format!(
614 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
615 Ontology(<http://example.org/rdfs-subproperty-supplement>\n\
616 Declaration(ObjectProperty(<{sub}>))\n\
617 Declaration(ObjectProperty(<{sup}>))\n\
618 SubObjectPropertyOf(<{sub}> <{sup}>)\n\
619 )"
620 );
621 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
622 }
623 for property in crate::rdf_preprocess::collect_functional_object_properties(preprocessed_rdf) {
624 validate_supplement_iri(&property)?;
625 let datatype_props =
626 crate::rdf_preprocess::declared_datatype_property_iris(preprocessed_rdf);
627 let ofn = if datatype_props.contains(&property) {
628 format!(
629 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
630 Ontology(<http://example.org/functional-property-supplement>\n\
631 Declaration(DataProperty(<{property}>))\n\
632 FunctionalDataProperty(<{property}>)\n\
633 )"
634 )
635 } else {
636 format!(
637 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
638 Ontology(<http://example.org/functional-property-supplement>\n\
639 Declaration(ObjectProperty(<{property}>))\n\
640 FunctionalObjectProperty(<{property}>)\n\
641 )"
642 )
643 };
644 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
645 }
646 for body in crate::rdf_preprocess::collect_disjoint_union_axioms(preprocessed_rdf) {
647 crate::validate::validate_supplement_ofn_body(&body)?;
648 let ofn = format!(
649 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
650 Ontology(<http://example.org/disjoint-union-supplement>\n{body}\n)"
651 );
652 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
653 }
654 for npa in crate::rdf_preprocess::collect_reified_data_npas(preprocessed_rdf) {
655 validate_supplement_iris([&npa.subject, &npa.property])?;
656 let lit = npa.value_literal.replace('"', "\\\"");
657 let mut body = format!(
658 "Declaration(NamedIndividual(<{}>))\n\
659 Declaration(DataProperty(<{}>))\n\
660 NegativeDataPropertyAssertion(<{}> <{}> \"{lit}\"^^xsd:string)\n\
661 DataPropertyAssertion(<{}> <{}> \"{lit}\"^^xsd:string)",
662 npa.subject, npa.property, npa.property, npa.subject, npa.property, npa.subject
663 );
664 if let Some((prop, value)) = &npa.positive_property {
665 validate_supplement_iri(prop)?;
666 if prop != &npa.property || value != &npa.value_literal {
667 body.push_str(&format!(
668 "\nDataPropertyAssertion(<{prop}> <{}> \"{}\"^^xsd:string)",
669 npa.subject,
670 value.replace('"', "\\\"")
671 ));
672 }
673 }
674 let ofn = format!(
675 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
676 Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
677 Ontology(<http://example.org/data-npa-supplement>\n{body}\n)"
678 );
679 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
680 }
681 for dpa in crate::rdf_preprocess::collect_direct_data_literal_assertions(preprocessed_rdf) {
682 validate_supplement_iris([&dpa.subject, &dpa.property])?;
683 let (lexical, datatype_iri) = if dpa.value_literal.contains("^^") {
684 let mut parts = dpa.value_literal.splitn(2, "^^");
685 let lex = parts.next().unwrap_or("").trim_matches('"').to_string();
686 let dt = parts
687 .next()
688 .unwrap_or("")
689 .trim_matches(|c| c == '<' || c == '>');
690 (lex, dt.to_string())
691 } else {
692 (dpa.value_literal.replace('"', "\\\""), String::new())
693 };
694 if !datatype_iri.is_empty() && datatype_iri.contains("://") {
695 validate_supplement_iri(&datatype_iri)?;
696 }
697 let (extra_prefixes, lit, dt_decl) = if datatype_iri.is_empty() {
698 if dpa.value_literal.contains('@') || dpa.value_literal.contains("^^") {
699 (String::new(), dpa.value_literal.clone(), None)
700 } else {
701 (
702 String::new(),
703 format!(
704 "\"{}\"^^rdf:PlainLiteral",
705 crate::rdf_preprocess::escape_ofn_string(&lexical)
706 ),
707 None,
708 )
709 }
710 } else {
711 crate::rdf_preprocess::qualify_typed_literal_for_supplement(&lexical, &datatype_iri)
712 };
713 let dt_decl_line = dt_decl.map(|d| format!("\n {d}")).unwrap_or_default();
714 let body = format!(
715 "Declaration(NamedIndividual(<{}>))\n\
716 Declaration(DataProperty(<{}>))\n\
717 ClassAssertion(owl:Thing <{}>){dt_decl_line}\n\
718 DataPropertyAssertion(<{}> <{}> {lit})",
719 dpa.subject, dpa.property, dpa.subject, dpa.property, dpa.subject
720 );
721 let ofn = format!(
722 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
723 Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
724 Prefix(rdf:=<http://www.w3.org/1999/02/22-rdf-syntax-ns#>)\n\
725 {extra_prefixes}\n\
726 Ontology(<http://example.org/thing-data-literal-supplement>\n{body}\n)"
727 );
728 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
729 }
730 if ill_founded_list {
731 let thing = ontology
732 .entity_id("http://www.w3.org/2002/07/owl#Thing", EntityKind::Class)
733 .map_err(|e| Error::Parse(e.to_string()))?;
734 let nothing = ontology
735 .entity_id("http://www.w3.org/2002/07/owl#Nothing", EntityKind::Class)
736 .map_err(|e| Error::Parse(e.to_string()))?;
737 ontology
738 .add_axiom(Axiom::EquivalentClasses(vec![thing, nothing]))
739 .map_err(|e| Error::Parse(e.to_string()))?;
740 let thing_ce = ontology.dl_mut().intern_ce(ClassExpr::Atomic(thing));
741 let nothing_ce = ontology.dl_mut().intern_ce(ClassExpr::Atomic(nothing));
742 ontology
743 .dl_mut()
744 .push_axiom(DlAxiom::EquivalentClasses(vec![thing_ce, nothing_ce]));
745 report.meta.mapped_axiom_count += 1;
746 }
747 for npa in crate::rdf_preprocess::collect_reified_npas(preprocessed_rdf) {
748 validate_supplement_iris([&npa.subject, &npa.object, &npa.property])?;
749 let mut body = format!(
750 "Declaration(NamedIndividual(<{}>))\n\
751 Declaration(NamedIndividual(<{}>))\n\
752 Declaration(ObjectProperty(<{}>))\n\
753 NegativeObjectPropertyAssertion(<{}> <{}> <{}>)",
754 npa.subject, npa.object, npa.property, npa.property, npa.subject, npa.object
755 );
756 if let Some((prop, object)) = npa.positive_property {
757 validate_supplement_iris([&prop, &object])?;
758 body.push_str(&format!(
759 "\nObjectPropertyAssertion(<{prop}> <{}> <{object}>)",
760 npa.subject
761 ));
762 }
763 let ofn = format!(
764 "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
765 Ontology(<http://example.org/npa-supplement>\n{body}\n)"
766 );
767 merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
768 }
769 Ok(())
770}
771
772fn total_axiom_count(ontology: &Ontology) -> usize {
773 ontology
774 .axiom_count()
775 .saturating_add(ontology.dl().axiom_count())
776}
777
778fn merge_rdf_owl_imports(
779 path: &Path,
780 preprocessed_rdf: &str,
781 ontology: &mut Ontology,
782 report: &mut ParseReport,
783 limits: ParseLimits,
784 base: Option<&Path>,
785) -> Result<()> {
786 use std::collections::HashSet;
787 let mut visited = HashSet::from([path.to_path_buf()]);
788 for import_iri in crate::rdf_preprocess::collect_owl_imports(preprocessed_rdf) {
789 let Some(import_path) = resolve_owl_import_path(path, &import_iri) else {
790 continue;
791 };
792 if !visited.insert(import_path.clone()) {
793 continue;
794 }
795 let imported = load_ontology_with_limits_and_base_inner(&import_path, limits, base, false)?;
796 if total_axiom_count(ontology).saturating_add(total_axiom_count(&imported))
797 > limits.max_axioms
798 {
799 if limits.strict {
800 return Err(Error::Parse(format!(
801 "import merge would exceed axiom limit {} (current {} + import {})",
802 limits.max_axioms,
803 total_axiom_count(ontology),
804 total_axiom_count(&imported)
805 )));
806 }
807 report.meta.warn(format!(
808 "skipping import {import_iri}: would exceed axiom limit {}",
809 limits.max_axioms
810 ));
811 continue;
812 }
813 if ontology
814 .entity_count()
815 .saturating_add(imported.entity_count())
816 > limits.max_entities
817 {
818 if limits.strict {
819 return Err(Error::Parse(format!(
820 "import merge would exceed entity limit {} (current {} + import {})",
821 limits.max_entities,
822 ontology.entity_count(),
823 imported.entity_count()
824 )));
825 }
826 report.meta.warn(format!(
827 "skipping import {import_iri}: would exceed entity limit {}",
828 limits.max_entities
829 ));
830 continue;
831 }
832 let before = ontology.axiom_count();
833 merge_supplement_ontology(ontology, &imported, report, limits)?;
834 report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
835 }
836 Ok(())
837}
838
839fn resolve_owl_import_path(current: &Path, import_iri: &str) -> Option<PathBuf> {
840 if import_iri == "http://www.owllink.org/ontologies/families" {
841 let candidate = current.parent()?.join("families.owl");
842 if candidate.is_file() {
843 return Some(candidate);
844 }
845 }
846 if let Some(filename) = import_iri.strip_prefix("http://www.iyouit.eu/") {
847 let candidate = current.parent()?.join(filename);
848 if candidate.is_file() {
849 return Some(candidate);
850 }
851 }
852 resolve_wg_import_path(current, import_iri)
853}
854
855fn resolve_wg_import_path(current: &Path, import_iri: &str) -> Option<PathBuf> {
856 let suffix = import_iri.rsplit('/').next()?;
857 let case_dir = current.parent()?.file_name()?.to_str()?;
858 let wg_dir = current.parent()?.parent()?;
859 let mapped = match (case_dir, suffix) {
860 ("TestCase-3AWebOnt-2Dmiscellaneous-2D001", "consistent001") => {
861 "TestCase-3AWebOnt-2Dmiscellaneous-2D002/premise.rdf"
862 }
863 ("TestCase-3AWebOnt-2Dmiscellaneous-2D002", "consistent002") => {
864 "TestCase-3AWebOnt-2Dmiscellaneous-2D001/premise.rdf"
865 }
866 _ => return None,
867 };
868 let candidate = wg_dir.join(mapped);
869 candidate.is_file().then_some(candidate)
870}
871
872fn merge_supplement_with_accounting(
873 ontology: &mut Ontology,
874 report: &mut ParseReport,
875 limits: ParseLimits,
876 supplement: &Ontology,
877) -> Result<()> {
878 let before = ontology.axiom_count();
879 merge_supplement_ontology(ontology, supplement, report, limits)?;
880 report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
881 Ok(())
882}
883
884fn ensure_entity(ontology: &mut Ontology, iri: &str, kind: EntityKind) -> Result<EntityId> {
885 ontology
886 .entity_id(iri, kind)
887 .map_err(|e| Error::Parse(e.to_string()))
888}
889
890fn insert_same_individual_supplement(
891 ontology: &mut Ontology,
892 report: &mut ParseReport,
893 left: &str,
894 right: &str,
895) -> Result<()> {
896 if left == right {
897 return Ok(());
898 }
899 let left_id = ensure_entity(ontology, left, EntityKind::Individual)?;
900 let right_id = ensure_entity(ontology, right, EntityKind::Individual)?;
901 let before = ontology.axiom_count();
902 ontology
903 .add_axiom(Axiom::SameIndividual(vec![left_id, right_id]))
904 .map_err(|e| Error::Parse(e.to_string()))?;
905 report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
906 Ok(())
907}
908
909fn entity_kind_for_iri(ontology: &Ontology, iri: &str) -> Option<EntityKind> {
910 let id = ontology.lookup_entity(iri)?;
911 ontology.entity(id).ok().map(|record| record.kind)
912}
913
914fn insert_property_disjoint_supplement(
915 ontology: &mut Ontology,
916 report: &mut ParseReport,
917 left: &str,
918 right: &str,
919) -> Result<()> {
920 let left_kind = entity_kind_for_iri(ontology, left);
921 let right_kind = entity_kind_for_iri(ontology, right);
922 let cross_kind = matches!(left_kind, Some(EntityKind::DataProperty))
923 && matches!(right_kind, Some(EntityKind::ObjectProperty))
924 || matches!(left_kind, Some(EntityKind::ObjectProperty))
925 && matches!(right_kind, Some(EntityKind::DataProperty));
926 if cross_kind {
927 report.meta.warn(
928 "propertyDisjointWith across data and object property kinds skipped in lenient parse",
929 );
930 return Ok(());
931 }
932 if matches!(left_kind, Some(EntityKind::DataProperty))
933 || matches!(right_kind, Some(EntityKind::DataProperty))
934 {
935 insert_disjoint_data_properties_supplement(ontology, report, left, right)
936 } else {
937 insert_disjoint_object_properties_supplement(ontology, report, left, right)
938 }
939}
940
941fn insert_disjoint_object_properties_supplement(
942 ontology: &mut Ontology,
943 report: &mut ParseReport,
944 left: &str,
945 right: &str,
946) -> Result<()> {
947 let left_id = ensure_entity(ontology, left, EntityKind::ObjectProperty)?;
948 let right_id = ensure_entity(ontology, right, EntityKind::ObjectProperty)?;
949 let before = ontology.dl().axiom_count();
950 ontology
951 .dl_mut()
952 .push_axiom(DlAxiom::DisjointObjectProperties(vec![left_id, right_id]));
953 report.meta.mapped_axiom_count += ontology.dl().axiom_count().saturating_sub(before);
954 Ok(())
955}
956
957fn insert_disjoint_data_properties_supplement(
958 ontology: &mut Ontology,
959 report: &mut ParseReport,
960 left: &str,
961 right: &str,
962) -> Result<()> {
963 let left_id = ensure_entity(ontology, left, EntityKind::DataProperty)?;
964 let right_id = ensure_entity(ontology, right, EntityKind::DataProperty)?;
965 let before = ontology.dl().axiom_count();
966 ontology
967 .dl_mut()
968 .push_axiom(DlAxiom::DisjointDataProperties(vec![left_id, right_id]));
969 report.meta.mapped_axiom_count += ontology.dl().axiom_count().saturating_sub(before);
970 Ok(())
971}
972
973fn merge_supplement_ontology(
974 target: &mut Ontology,
975 source: &Ontology,
976 report: &mut ParseReport,
977 limits: ParseLimits,
978) -> Result<()> {
979 use ontologos_core::EntityKind;
980 use std::collections::HashMap;
981 for (_, record) in source.entities().iter() {
982 let iri = source
983 .resolve_iri(record.iri)
984 .map_err(|e| Error::Parse(e.to_string()))?;
985 if let Some(existing) = target.lookup_entity(iri) {
986 let existing_kind = target.entity(existing)?.kind;
987 if !existing_kind.satisfies(record.kind) {
988 match EntityKind::merge_punning(existing_kind, record.kind) {
989 Some(_) => {}
990 None => {
991 report.meta.warn(format!(
992 "import entity kind conflict for {iri}: {:?} vs {:?}",
993 existing_kind, record.kind
994 ));
995 }
996 }
997 }
998 } else {
999 target
1000 .entity_id(iri, record.kind)
1001 .map_err(|e| Error::Parse(e.to_string()))?;
1002 }
1003 }
1004 let entity_map: HashMap<_, _> = source
1005 .entities()
1006 .iter()
1007 .filter_map(|(id, record)| {
1008 let iri = source.resolve_iri(record.iri).ok()?;
1009 Some((id, target.lookup_entity(iri)?))
1010 })
1011 .collect();
1012 for (id, _) in source.entities().iter() {
1013 if !entity_map.contains_key(&id) {
1014 return Err(Error::Parse(format!(
1015 "supplement entity {id:?} missing after merge"
1016 )));
1017 }
1018 }
1019 target.dl_mut().import_axioms_from(source.dl(), |id| {
1020 entity_map
1021 .get(&id)
1022 .copied()
1023 .expect("supplement entities validated above")
1024 });
1025 for (_, axiom) in source.axioms().iter() {
1026 let remapped = remap_supplement_axiom(axiom, &entity_map)?;
1027 if let Err(e) = target.add_axiom(remapped) {
1028 if matches!(axiom, Axiom::ObjectPropertyRange { .. }) {
1029 report.meta.skipped_axiom_count += 1;
1030 report.meta.warn(format!(
1031 "skipping conflicting ObjectPropertyRange during merge: {e}"
1032 ));
1033 if limits.strict {
1034 return Err(Error::Parse(e.to_string()));
1035 }
1036 continue;
1037 }
1038 return Err(Error::Parse(e.to_string()));
1039 }
1040 }
1041 Ok(())
1042}
1043
1044fn remap_supplement_axiom(
1045 axiom: &Axiom,
1046 entity_map: &std::collections::HashMap<EntityId, EntityId>,
1047) -> Result<Axiom> {
1048 let remap = |id: EntityId| -> Result<EntityId> {
1049 entity_map
1050 .get(&id)
1051 .copied()
1052 .ok_or_else(|| Error::Parse(format!("supplement entity {id:?} missing after merge")))
1053 };
1054 let remap_vec =
1055 |ids: &[EntityId]| -> Result<Vec<EntityId>> { ids.iter().map(|id| remap(*id)).collect() };
1056 Ok(match axiom {
1057 Axiom::SubClassOf {
1058 subclass,
1059 superclass,
1060 } => Axiom::SubClassOf {
1061 subclass: remap(*subclass)?,
1062 superclass: remap(*superclass)?,
1063 },
1064 Axiom::EquivalentClasses(classes) => Axiom::EquivalentClasses(remap_vec(classes)?),
1065 Axiom::DisjointClasses(classes) => Axiom::DisjointClasses(remap_vec(classes)?),
1066 Axiom::ObjectPropertyDomain { property, domain } => Axiom::ObjectPropertyDomain {
1067 property: remap(*property)?,
1068 domain: remap(*domain)?,
1069 },
1070 Axiom::ObjectPropertyRange { property, range } => Axiom::ObjectPropertyRange {
1071 property: remap(*property)?,
1072 range: remap(*range)?,
1073 },
1074 Axiom::SubObjectPropertyOf {
1075 sub_property,
1076 super_property,
1077 } => Axiom::SubObjectPropertyOf {
1078 sub_property: remap(*sub_property)?,
1079 super_property: remap(*super_property)?,
1080 },
1081 Axiom::InverseObjectProperties { left, right } => Axiom::InverseObjectProperties {
1082 left: remap(*left)?,
1083 right: remap(*right)?,
1084 },
1085 Axiom::TransitiveObjectProperty(p) => Axiom::TransitiveObjectProperty(remap(*p)?),
1086 Axiom::SubClassOfExistential {
1087 subclass,
1088 property,
1089 filler,
1090 } => Axiom::SubClassOfExistential {
1091 subclass: remap(*subclass)?,
1092 property: remap(*property)?,
1093 filler: remap(*filler)?,
1094 },
1095 Axiom::SymmetricObjectProperty(p) => Axiom::SymmetricObjectProperty(remap(*p)?),
1096 Axiom::ReflexiveObjectProperty(p) => Axiom::ReflexiveObjectProperty(remap(*p)?),
1097 Axiom::FunctionalObjectProperty(p) => Axiom::FunctionalObjectProperty(remap(*p)?),
1098 Axiom::InverseFunctionalObjectProperty(p) => {
1099 Axiom::InverseFunctionalObjectProperty(remap(*p)?)
1100 }
1101 Axiom::IrreflexiveObjectProperty(p) => Axiom::IrreflexiveObjectProperty(remap(*p)?),
1102 Axiom::AsymmetricObjectProperty(p) => Axiom::AsymmetricObjectProperty(remap(*p)?),
1103 Axiom::EquivalentObjectProperties(props) => {
1104 Axiom::EquivalentObjectProperties(remap_vec(props)?)
1105 }
1106 Axiom::ClassAssertion { individual, class } => Axiom::ClassAssertion {
1107 individual: remap(*individual)?,
1108 class: remap(*class)?,
1109 },
1110 Axiom::ObjectPropertyAssertion {
1111 subject,
1112 property,
1113 object,
1114 } => Axiom::ObjectPropertyAssertion {
1115 subject: remap(*subject)?,
1116 property: remap(*property)?,
1117 object: remap(*object)?,
1118 },
1119 Axiom::DataPropertyAssertion {
1120 individual,
1121 property,
1122 value,
1123 } => Axiom::DataPropertyAssertion {
1124 individual: remap(*individual)?,
1125 property: remap(*property)?,
1126 value: value.clone(),
1127 },
1128 Axiom::NegativeObjectPropertyAssertion {
1129 subject,
1130 property,
1131 object,
1132 } => Axiom::NegativeObjectPropertyAssertion {
1133 subject: remap(*subject)?,
1134 property: remap(*property)?,
1135 object: remap(*object)?,
1136 },
1137 Axiom::NegativeDataPropertyAssertion {
1138 individual,
1139 property,
1140 value,
1141 } => Axiom::NegativeDataPropertyAssertion {
1142 individual: remap(*individual)?,
1143 property: remap(*property)?,
1144 value: value.clone(),
1145 },
1146 Axiom::SameIndividual(ids) => Axiom::SameIndividual(remap_vec(ids)?),
1147 Axiom::DifferentIndividuals(ids) => Axiom::DifferentIndividuals(remap_vec(ids)?),
1148 })
1149}
1150
1151fn open_for_load(path: &Path, base: Option<&Path>) -> Result<File> {
1152 let pre_meta = std::fs::symlink_metadata(path)?;
1153 let file = open_readonly_nofollow(path)?;
1154 if let Some(base) = base {
1155 verify_opened_under_base(&file, base, path, &pre_meta)?;
1156 }
1157 Ok(file)
1158}
1159
1160fn open_readonly_nofollow(path: &Path) -> Result<File> {
1161 #[cfg(unix)]
1162 {
1163 use std::fs::OpenOptions;
1164 use std::os::unix::fs::OpenOptionsExt;
1165 OpenOptions::new()
1166 .read(true)
1167 .custom_flags(O_NOFOLLOW)
1168 .open(path)
1169 .map_err(|e| Error::Parse(e.to_string()))
1170 }
1171 #[cfg(not(unix))]
1172 {
1173 Ok(File::open(path)?)
1174 }
1175}
1176
1177fn verify_opened_under_base(
1178 file: &File,
1179 base: &Path,
1180 validated: &Path,
1181 pre_meta: &std::fs::Metadata,
1182) -> Result<()> {
1183 #[cfg(unix)]
1184 use std::os::unix::fs::MetadataExt;
1185
1186 let file_meta = file.metadata()?;
1187 #[cfg(unix)]
1188 if pre_meta.dev() != file_meta.dev() || pre_meta.ino() != file_meta.ino() {
1189 return Err(Error::Parse(
1190 "ontology path changed between validation and open".into(),
1191 ));
1192 }
1193 #[cfg(not(unix))]
1194 let _ = (pre_meta, file_meta);
1195
1196 let base_normalized = normalize_path(base)?;
1197 let base_canon = base_normalized
1198 .canonicalize()
1199 .map_err(|e| Error::Parse(e.to_string()))?;
1200
1201 if let Ok(opened) = opened_path(file) {
1202 let opened_canon = opened
1203 .canonicalize()
1204 .map_err(|e| Error::Parse(e.to_string()))?;
1205 if !path_is_under_base(&opened_canon, &base_canon) {
1206 return Err(Error::Parse(format!(
1207 "opened file {} escapes allowed base {}",
1208 opened_canon.display(),
1209 base_canon.display()
1210 )));
1211 }
1212 return Ok(());
1213 }
1214
1215 let validated_canon = validated
1216 .canonicalize()
1217 .map_err(|e| Error::Parse(e.to_string()))?;
1218 if !path_is_under_base(&validated_canon, &base_canon) {
1219 return Err(Error::Parse(format!(
1220 "path {} escapes allowed base {}",
1221 validated_canon.display(),
1222 base_canon.display()
1223 )));
1224 }
1225 Ok(())
1226}
1227
1228#[cfg(target_os = "linux")]
1229fn opened_path(file: &File) -> Result<PathBuf> {
1230 use std::os::unix::io::AsRawFd;
1231 let fd = file.as_raw_fd();
1232 Ok(std::fs::read_link(format!("/proc/self/fd/{fd}"))?)
1233}
1234
1235#[cfg(target_os = "macos")]
1236fn opened_path(file: &File) -> Result<PathBuf> {
1237 use std::ffi::CStr;
1238 use std::os::unix::io::AsRawFd;
1239
1240 const F_GETPATH: i32 = 50;
1241 let fd = file.as_raw_fd();
1242 let mut buf = [0u8; 1024];
1243 #[allow(unsafe_code)]
1245 let rc = unsafe { libc::fcntl(fd, F_GETPATH, buf.as_mut_ptr()) };
1246 if rc == -1 {
1247 return Err(Error::Parse("fcntl(F_GETPATH) failed".into()));
1248 }
1249 let cstr = CStr::from_bytes_until_nul(&buf).map_err(|e| Error::Parse(e.to_string()))?;
1250 Ok(PathBuf::from(cstr.to_string_lossy().into_owned()))
1251}
1252
1253#[cfg(not(any(target_os = "linux", target_os = "macos")))]
1254fn opened_path(_file: &File) -> Result<PathBuf> {
1255 Err(Error::Parse("fd path resolution unavailable".into()))
1256}
1257
1258fn detect_format_with_sniff(path: &Path, reader: &mut (impl Read + Seek)) -> Result<Format> {
1259 if let Some(format) = detect_format(path) {
1260 reader
1261 .seek(SeekFrom::Start(0))
1262 .map_err(|e| Error::Parse(e.to_string()))?;
1263 return Ok(format);
1264 }
1265
1266 let header = sniff_and_rewind(reader, 4096)?;
1267 if let Some(format) = detect_format_from_bytes(&header) {
1268 return Ok(format);
1269 }
1270 if detect_turtle_from_bytes(&header) {
1271 return Ok(Format::Turtle);
1272 }
1273 if detect_functional_from_bytes(&header) {
1274 return Ok(Format::Functional);
1275 }
1276
1277 Err(Error::UnsupportedFormat(format!(
1278 "could not detect OWL/RDF format for {}",
1279 path.display()
1280 )))
1281}
1282
1283fn normalize_path(path: &Path) -> Result<PathBuf> {
1284 let base = if path.is_absolute() {
1285 PathBuf::new()
1286 } else {
1287 std::env::current_dir()?
1288 };
1289
1290 let mut normalized = base;
1291 for component in path.components() {
1292 match component {
1293 Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
1294 Component::CurDir => {}
1295 Component::ParentDir => {
1296 if !normalized.pop() {
1297 return Err(Error::Parse("path escapes beyond filesystem root".into()));
1298 }
1299 }
1300 Component::Normal(part) => normalized.push(part),
1301 }
1302 }
1303
1304 if normalized.exists() {
1305 normalized = normalized
1306 .canonicalize()
1307 .map_err(|e| Error::Parse(e.to_string()))?;
1308 }
1309
1310 Ok(normalized)
1311}
1312
1313fn path_is_under_base(path: &Path, base: &Path) -> bool {
1315 let mut path_iter = path.components();
1316 for base_comp in base.components() {
1317 match path_iter.next() {
1318 Some(path_comp) if path_comp == base_comp => {}
1319 _ => return false,
1320 }
1321 }
1322 true
1323}
1324
1325pub fn load_ofn_from_str(text: &str) -> Result<Ontology> {
1327 load_ofn_from_str_validated(text, ParseLimits::default())
1328}
1329
1330pub fn load_ofn_from_str_validated(text: &str, limits: ParseLimits) -> Result<Ontology> {
1332 if text.len() > limits.max_file_bytes {
1333 return Err(Error::Parse(format!(
1334 "in-memory OFN size {} exceeds limit of {} bytes",
1335 text.len(),
1336 limits.max_file_bytes
1337 )));
1338 }
1339 let set_ontology = read_horned_owl_from_reader(
1340 &mut std::io::Cursor::new(text.as_bytes()),
1341 Format::Functional,
1342 limits,
1343 )?;
1344 let (ontology, report) = map_to_core(&set_ontology, limits)?;
1345 finalize_parsed_ontology(ontology, report, limits, true)
1346}
1347
1348pub fn load_ofn_from_str_with_limits(text: &str, limits: ParseLimits) -> Result<Ontology> {
1350 if text.len() > limits.max_file_bytes {
1351 return Err(Error::Parse(format!(
1352 "in-memory OFN size {} exceeds limit of {} bytes",
1353 text.len(),
1354 limits.max_file_bytes
1355 )));
1356 }
1357 let set_ontology = read_horned_owl_from_reader(
1358 &mut std::io::Cursor::new(text.as_bytes()),
1359 Format::Functional,
1360 limits,
1361 )?;
1362 let (ontology, report) = map_to_core(&set_ontology, limits)?;
1363 finalize_parsed_ontology(ontology, report, limits, false)
1364}
1365
1366pub fn load_ofn_with_incremental(base: &Path, incremental: &Path) -> Result<Ontology> {
1368 load_ofn_with_incremental_and_limits(base, incremental, ParseLimits::default(), None)
1369}
1370
1371pub fn load_ofn_with_incremental_and_limits(
1373 base: &Path,
1374 incremental: &Path,
1375 limits: ParseLimits,
1376 sandbox_base: Option<&Path>,
1377) -> Result<Ontology> {
1378 let base_path = validate_load_path(base, sandbox_base)?;
1379 let inc_path = validate_load_path(incremental, sandbox_base)?;
1380 let base_text = read_text_file_with_limit(&base_path, limits)?;
1381 let inc_text = read_text_file_with_limit(&inc_path, limits)?;
1382 let merged = merge_ofn_documents(&base_text, &inc_text)?;
1383 if merged.len() > limits.max_file_bytes {
1384 return Err(Error::Parse(format!(
1385 "merged OFN size {} exceeds limit of {} bytes",
1386 merged.len(),
1387 limits.max_file_bytes
1388 )));
1389 }
1390 load_ofn_from_str_validated(&merged, limits)
1391}
1392
1393fn merge_ofn_documents(base: &str, incremental: &str) -> Result<String> {
1394 let inc_axioms = extract_ofn_axiom_body(incremental)
1395 .ok_or_else(|| Error::Parse("incremental OFN missing Ontology(...) body".into()))?;
1396 let close = find_ofn_ontology_close(base)
1397 .ok_or_else(|| Error::Parse("base OFN missing closing ')'".into()))?;
1398 Ok(format!("{}{})", &base[..close], inc_axioms))
1399}
1400
1401fn find_ofn_ontology_close(text: &str) -> Option<usize> {
1403 let marker = "Ontology(";
1404 let start = text.find(marker)? + marker.len();
1405 let mut depth = 1usize;
1406 let mut in_str = false;
1407 let mut escape = false;
1408 for (i, ch) in text[start..].char_indices() {
1409 if in_str {
1410 if escape {
1411 escape = false;
1412 continue;
1413 }
1414 if ch == '\\' {
1415 escape = true;
1416 continue;
1417 }
1418 if ch == '"' {
1419 in_str = false;
1420 }
1421 continue;
1422 }
1423 match ch {
1424 '"' => in_str = true,
1425 '(' => depth += 1,
1426 ')' => {
1427 depth -= 1;
1428 if depth == 0 {
1429 return Some(start + i);
1430 }
1431 }
1432 _ => {}
1433 }
1434 }
1435 None
1436}
1437
1438fn extract_ofn_axiom_body(text: &str) -> Option<String> {
1439 let marker = "Ontology(";
1440 let start = text.find(marker)? + marker.len();
1441 let rest = text.get(start..)?;
1442 let end = find_ofn_ontology_close(text)? - start;
1443 let mut body = rest[..end].trim();
1444 if body.starts_with('<') {
1445 if let Some((_, axioms)) = body.split_once('\n') {
1446 body = axioms.trim();
1447 } else if let Some((_, axioms)) = body.split_once(' ') {
1448 body = axioms.trim();
1449 }
1450 }
1451 Some(format!(" {body}"))
1452}
1453
1454#[cfg(test)]
1455mod tests {
1456 use super::*;
1457 use std::path::Path;
1458
1459 #[test]
1460 fn merge_ofn_preserves_literal_with_closing_paren() {
1461 let base = concat!(
1462 "Prefix(:=<file:/c/test.owl#>)\n",
1463 "Ontology(<file:/c/test.owl#>\n",
1464 "Class(:A)\n",
1465 "AnnotationAssertion(rdfs:comment :A \"note with ) inside\")\n",
1466 ")"
1467 );
1468 let incremental = concat!(
1469 "Prefix(:=<file:/c/test.owl#>)\n",
1470 "Ontology(<file:/c/test.owl#>\n",
1471 "ClassAssertion(:A :a)\n",
1472 ")"
1473 );
1474 let merged = merge_ofn_documents(base, incremental).expect("merge");
1475 assert!(merged.contains("note with ) inside"));
1476 assert!(merged.contains("ClassAssertion(:A :a)"));
1477 assert!(merged.ends_with("ClassAssertion(:A :a))"));
1478 }
1479
1480 #[test]
1481 fn load_ofn_from_str_rejects_oversized_input() {
1482 let limits = ParseLimits::with_file_bytes(16);
1483 let err = load_ofn_from_str_with_limits("Ontology(<x>)", limits).expect_err("size");
1484 assert!(matches!(err, Error::Parse(_)));
1485 }
1486
1487 #[test]
1488 fn load_ofn_from_str_parses_class_assertion() {
1489 let ofn = concat!(
1490 "Prefix(:=<file:/c/test.owl#>)\n",
1491 "Ontology(<file:/c/test.owl#>\n",
1492 "ClassAssertion(:A :a)\n",
1493 ")"
1494 );
1495 let ontology = load_ofn_from_str(ofn).expect("parse");
1496 assert!(ontology.axiom_count() > 0);
1497 }
1498
1499 #[test]
1500 fn rejects_path_traversal_outside_base() {
1501 let base = std::env::current_dir().expect("cwd");
1502 let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
1503 .expect_err("traversal");
1504 assert!(matches!(err, Error::Parse(_)));
1505 }
1506
1507 #[test]
1508 fn rejects_path_prefix_bypass() {
1509 let parent = std::env::temp_dir();
1510 let base = parent.join("ontologos_uploads_base");
1511 let evil = parent.join("ontologos_uploads_base_evil");
1512 std::fs::create_dir_all(&base).expect("create base");
1513 std::fs::create_dir_all(&evil).expect("create evil sibling");
1514 let file = evil.join("secret.owl");
1515 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
1516
1517 let err = validate_load_path(&file, Some(&base)).expect_err("prefix bypass");
1518 assert!(matches!(err, Error::Parse(_)));
1519
1520 let _ = std::fs::remove_file(&file);
1521 let _ = std::fs::remove_dir(&evil);
1522 let _ = std::fs::remove_dir(&base);
1523 }
1524
1525 #[test]
1526 fn path_is_under_base_accepts_nested_file() {
1527 let parent = std::env::temp_dir();
1528 let base = parent.join("ontologos_nested_base");
1529 let nested = base.join("nested");
1530 std::fs::create_dir_all(&nested).expect("create nested");
1531 let file = nested.join("ontology.owl");
1532 std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
1533
1534 let validated = validate_load_path(&file, Some(&base)).expect("nested file under base");
1535 assert!(path_is_under_base(
1536 &validated,
1537 &base.canonicalize().expect("canonicalize base")
1538 ));
1539
1540 let _ = std::fs::remove_file(&file);
1541 let _ = std::fs::remove_dir(&nested);
1542 let _ = std::fs::remove_dir(&base);
1543 }
1544
1545 #[cfg(unix)]
1546 #[test]
1547 fn sandboxed_load_does_not_follow_symlink_to_outside_file() {
1548 use std::os::unix::fs::symlink;
1549
1550 let parent = std::env::temp_dir();
1551 let base = parent.join("ontologos_sandbox_base");
1552 let outside = parent.join("ontologos_outside_secret.owl");
1553 let link = base.join("ontology.owl");
1554 std::fs::create_dir_all(&base).expect("create base");
1555 std::fs::write(&outside, b"OUTSIDE_SECRET_CONTENT").expect("write outside");
1556
1557 symlink(&outside, &link).expect("symlink");
1558
1559 let err = load_ontology_in(&base, &link).expect_err("symlink escape");
1560 assert!(matches!(err, Error::Parse(_) | Error::UnsupportedFormat(_)));
1561
1562 let _ = std::fs::remove_file(&link);
1563 let _ = std::fs::remove_file(&outside);
1564 let _ = std::fs::remove_dir(&base);
1565 }
1566}