Skip to main content

ontologos_parser/
load.rs

1use std::fs::File;
2use std::io::{Read, Seek, SeekFrom};
3use std::path::{Component, Path, PathBuf};
4
5use ontologos_core::{Axiom, ClassExpr, DlAxiom, EntityId, EntityKind, Ontology};
6
7use crate::limits::ParseLimits;
8use crate::map::map_to_core;
9use crate::read::{read_horned_owl_from_reader, sniff_and_rewind};
10use crate::report::ParseReport;
11use crate::validate::validate_loaded_ontology_strict_graph;
12use crate::{
13    Error, Format, Result, detect_format, detect_format_from_bytes, detect_functional_from_bytes,
14    detect_turtle_from_bytes, validate_loaded_ontology_light,
15};
16
17struct PreprocessBudget {
18    limit: usize,
19    used: usize,
20}
21
22impl PreprocessBudget {
23    fn new(limit: usize) -> Self {
24        Self { limit, used: 0 }
25    }
26
27    fn track(&mut self, stage: &str) -> Result<()> {
28        self.used = self.used.saturating_add(stage.len());
29        if self.used > self.limit {
30            Err(Error::Parse(format!(
31                "RDF/XML preprocessing allocation {} bytes exceeds limit of {} bytes",
32                self.used, self.limit
33            )))
34        } else {
35            Ok(())
36        }
37    }
38}
39
40fn finalize_parsed_ontology(
41    ontology: Ontology,
42    report: ParseReport,
43    limits: ParseLimits,
44    validate: bool,
45) -> Result<Ontology> {
46    if limits.strict && report.meta.skipped_axiom_count > 0 {
47        return Err(Error::Parse(format!(
48            "strict parse: skipped {} axioms due to limits or mapping failures",
49            report.meta.skipped_axiom_count
50        )));
51    }
52    let mut ontology = ontology;
53    ontology.set_parse_meta(report.into_meta());
54    if validate {
55        validate_loaded_ontology_light(&ontology)?;
56        if limits.strict {
57            validate_loaded_ontology_strict_graph(&ontology)?;
58        }
59    }
60    Ok(ontology)
61}
62
63fn finish_loaded_ontology(
64    ontology: Ontology,
65    report: ParseReport,
66    limits: ParseLimits,
67) -> Result<Ontology> {
68    finalize_parsed_ontology(ontology, report, limits, limits.validate_output)
69}
70
71fn bump_harvested_assertions(count: &mut usize, limits: ParseLimits) -> Result<()> {
72    *count += 1;
73    if *count > limits.max_harvested_assertions {
74        Err(Error::Parse(format!(
75            "harvested assertion count {} exceeds limit of {}",
76            *count, limits.max_harvested_assertions
77        )))
78    } else {
79        Ok(())
80    }
81}
82
83fn read_text_file_with_limit(path: &Path, limits: ParseLimits) -> Result<String> {
84    let metadata = std::fs::metadata(path).map_err(|e| Error::Parse(e.to_string()))?;
85    if metadata.len() as usize > limits.max_file_bytes {
86        return Err(Error::Parse(format!(
87            "file size {} exceeds limit of {} bytes",
88            metadata.len(),
89            limits.max_file_bytes
90        )));
91    }
92    std::fs::read_to_string(path).map_err(|e| Error::Parse(e.to_string()))
93}
94
95const SUPPLEMENT_STANDARD_PREFIXES: &str = "\
96Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
97Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
98Prefix(rdf:=<http://www.w3.org/1999/02/22-rdf-syntax-ns#>)\n";
99
100/// Reject IRIs that would break OWL Functional Syntax interpolation in supplements.
101fn validate_supplement_iri(iri: &str) -> Result<()> {
102    crate::validate::validate_supplement_iri(iri)
103}
104
105fn validate_supplement_iris(iris: impl IntoIterator<Item = impl AsRef<str>>) -> Result<()> {
106    for iri in iris {
107        validate_supplement_iri(iri.as_ref())?;
108    }
109    Ok(())
110}
111
112#[cfg(target_os = "linux")]
113const O_NOFOLLOW: i32 = 0o100_000;
114#[cfg(target_os = "macos")]
115const O_NOFOLLOW: i32 = 0x0000_0040;
116#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))]
117const O_NOFOLLOW: i32 = 0;
118
119/// Resolve and validate a path before loading an ontology file.
120pub fn validate_load_path(path: &Path, base: Option<&Path>) -> Result<PathBuf> {
121    let normalized = normalize_path(path)?;
122
123    if let Some(base) = base {
124        let base_normalized = normalize_path(base)?;
125        if !path_is_under_base(&normalized, &base_normalized) {
126            return Err(Error::Parse(format!(
127                "path {} escapes allowed base {}",
128                normalized.display(),
129                base_normalized.display()
130            )));
131        }
132    }
133
134    Ok(normalized)
135}
136
137/// Load an ontology from a validated file path (trusted local file; merges `owl:imports`).
138pub fn load_ontology(path: &Path) -> Result<Ontology> {
139    load_ontology_with_limits(
140        path,
141        ParseLimits {
142            merge_imports: true,
143            ..ParseLimits::default()
144        },
145    )
146}
147
148/// Load an ontology without failing on skipped axioms or incompatible declarations.
149pub fn load_ontology_lenient(path: &Path) -> Result<Ontology> {
150    load_ontology_with_limits(
151        path,
152        ParseLimits {
153            merge_imports: true,
154            ..ParseLimits::lenient()
155        },
156    )
157}
158
159/// Load an ontology constrained to stay under `base` (untrusted uploads).
160pub fn load_ontology_in(base: &Path, path: &Path) -> Result<Ontology> {
161    load_ontology_with_limits_and_base(
162        path,
163        ParseLimits {
164            merge_imports: true,
165            ..ParseLimits::default()
166        },
167        Some(base),
168    )
169}
170
171/// Lenient sandboxed load for untrusted uploads that may skip axioms with warnings.
172pub fn load_ontology_lenient_in(base: &Path, path: &Path) -> Result<Ontology> {
173    load_ontology_with_limits_and_base(path, ParseLimits::lenient(), Some(base))
174}
175
176/// Load an ontology with custom [`ParseLimits`].
177pub fn load_ontology_with_limits(path: &Path, limits: ParseLimits) -> Result<Ontology> {
178    load_ontology_with_limits_and_base(path, limits, None)
179}
180
181/// Load an ontology with custom limits and optional sandbox base directory.
182pub fn load_ontology_with_limits_and_base(
183    path: &Path,
184    limits: ParseLimits,
185    base: Option<&Path>,
186) -> Result<Ontology> {
187    let merge_imports = limits.merge_imports;
188    load_ontology_with_limits_and_base_inner(path, limits, base, merge_imports)
189}
190
191fn load_ontology_with_limits_and_base_inner(
192    path: &Path,
193    limits: ParseLimits,
194    base: Option<&Path>,
195    merge_imports: bool,
196) -> Result<Ontology> {
197    let validated = validate_load_path(path, base)?;
198    if !validated.is_file() {
199        return Err(Error::Parse(format!("not a file: {}", validated.display())));
200    }
201
202    let mut file = open_for_load(&validated, base)?;
203    let file_len = file
204        .metadata()
205        .map_err(|e| Error::Parse(e.to_string()))?
206        .len();
207    if file_len as usize > limits.max_file_bytes {
208        return Err(Error::Parse(format!(
209            "file size {file_len} exceeds limit of {} bytes",
210            limits.max_file_bytes
211        )));
212    }
213    let format = detect_format_with_sniff(path, &mut file)?;
214    if format == Format::RdfXml {
215        let mut bytes = Vec::new();
216        file.seek(SeekFrom::Start(0))
217            .map_err(|e| Error::Parse(e.to_string()))?;
218        file.read_to_end(&mut bytes)
219            .map_err(|e| Error::Parse(e.to_string()))?;
220        if bytes.len() > limits.max_file_bytes {
221            return Err(Error::Parse(format!(
222                "file size {} exceeds limit of {} bytes",
223                bytes.len(),
224                limits.max_file_bytes
225            )));
226        }
227        let text = String::from_utf8(bytes).map_err(|e| Error::Parse(e.to_string()))?;
228        let mut budget = PreprocessBudget::new(limits.max_preprocess_bytes);
229        budget.track(&text)?;
230        let root_tag = crate::rdf_preprocess::normalize_multiline_rdf_root_tag(&text);
231        budget.track(&root_tag)?;
232        let deduped = crate::rdf_preprocess::dedupe_rdf_xml_ids(&root_tag);
233        budget.track(&deduped)?;
234        let normalized_ids = crate::rdf_preprocess::normalize_invalid_rdf_ids(&deduped);
235        budget.track(&normalized_ids)?;
236        let expanded = crate::rdf_preprocess::expand_xml_entities_with_limit(
237            &normalized_ids,
238            limits.max_expanded_bytes,
239        )?;
240        budget.track(&expanded)?;
241        let ill_founded_list = crate::rdf_preprocess::contains_ill_founded_rdf_list(&expanded);
242        let relative_uris = crate::rdf_preprocess::normalize_relative_owl_uris(&expanded);
243        budget.track(&relative_uris)?;
244        let rdfs_classes = crate::rdf_preprocess::normalize_rdfs_class_elements(&relative_uris);
245        budget.track(&rdfs_classes)?;
246        let injected = crate::rdf_preprocess::inject_rdf_based_punning_declarations(&rdfs_classes);
247        budget.track(&injected)?;
248        let typed_about = crate::rdf_preprocess::materialize_typed_about_elements(&injected);
249        budget.track(&typed_about)?;
250        let typed_nodes = crate::rdf_preprocess::materialize_typed_node_elements(&typed_about);
251        budget.track(&typed_nodes)?;
252        let intersections =
253            crate::rdf_preprocess::normalize_class_intersection_definitions(&typed_nodes);
254        budget.track(&intersections)?;
255        let same_as = crate::rdf_preprocess::normalize_class_same_as(&intersections);
256        budget.track(&same_as)?;
257        let named_individuals =
258            crate::rdf_preprocess::materialize_named_individual_descriptions(&same_as);
259        budget.track(&named_individuals)?;
260        let individuals = crate::rdf_preprocess::materialize_anonymous_individual_descriptions(
261            &named_individuals,
262        );
263        budget.track(&individuals)?;
264        let normalized = crate::rdf_preprocess::normalize_all_different_members(&individuals);
265        budget.track(&normalized)?;
266        let disjoint = crate::rdf_preprocess::expand_all_disjoint_collections(&normalized);
267        budget.track(&disjoint)?;
268        let property_usage =
269            crate::rdf_preprocess::inject_object_property_declarations_from_usage(&disjoint);
270        budget.track(&property_usage)?;
271        let preprocessed_rdf = crate::rdf_preprocess::normalize_property_same_as(&property_usage);
272        budget.track(&preprocessed_rdf)?;
273        let set_ontology = read_horned_owl_from_reader(
274            &mut std::io::Cursor::new(preprocessed_rdf.as_bytes()),
275            format,
276            limits,
277        )?;
278        let (mut ontology, mut report) = map_to_core(&set_ontology, limits)?;
279        supplement_rdf_dl_axioms(
280            &preprocessed_rdf,
281            &mut ontology,
282            &mut report,
283            limits,
284            ill_founded_list,
285        )?;
286        if merge_imports {
287            merge_rdf_owl_imports(
288                path,
289                &preprocessed_rdf,
290                &mut ontology,
291                &mut report,
292                limits,
293                base,
294            )?;
295        }
296        report.meta.logical_axiom_count =
297            report.meta.mapped_axiom_count + report.meta.skipped_axiom_count;
298        return finish_loaded_ontology(ontology, report, limits);
299    }
300    file.seek(SeekFrom::Start(0))
301        .map_err(|e| Error::Parse(e.to_string()))?;
302    let set_ontology = read_horned_owl_from_reader(&mut file, format, limits)?;
303    let (ontology, report) = map_to_core(&set_ontology, limits)?;
304    finish_loaded_ontology(ontology, report, limits)
305}
306
307fn merge_datatype_sameas_supplement(
308    ontology: &mut Ontology,
309    report: &mut ParseReport,
310    limits: ParseLimits,
311    left: &str,
312    right: &str,
313) -> Result<bool> {
314    if !(left.contains("XMLSchema") || right.contains("XMLSchema")) {
315        return Ok(false);
316    }
317    let alias = if left.contains("XMLSchema") {
318        right
319    } else {
320        left
321    };
322    let xsd = if left.contains("XMLSchema") {
323        left
324    } else {
325        right
326    };
327    let (alias_prefixes, alias_ref) =
328        crate::rdf_preprocess::qualify_datatype_ref_for_supplement(alias);
329    let (_, xsd_ref) = crate::rdf_preprocess::qualify_datatype_ref_for_supplement(xsd);
330    let ofn = format!(
331        "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
332         Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
333         {alias_prefixes}\n\
334         Ontology(<http://example.org/datatype-sameas-supplement>\n\
335           Declaration(Datatype({alias_ref}))\n\
336           DatatypeDefinition({alias_ref} {xsd_ref})\n\
337         )"
338    );
339    let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
340    merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
341    Ok(true)
342}
343
344fn sameas_pair_is_property_entities(
345    ontology: &Ontology,
346    preprocessed_rdf: &str,
347    left: &str,
348    right: &str,
349) -> bool {
350    fn is_property_iri(ontology: &Ontology, preprocessed_rdf: &str, iri: &str) -> bool {
351        if let Some(id) = ontology.lookup_entity(iri)
352            && let Ok(rec) = ontology.entity(id)
353            && matches!(
354                rec.kind,
355                EntityKind::ObjectProperty | EntityKind::DataProperty
356            )
357        {
358            return true;
359        }
360        crate::rdf_preprocess::collect_object_property_assertions(preprocessed_rdf)
361            .iter()
362            .any(|(_, property, _)| property == iri)
363    }
364    is_property_iri(ontology, preprocessed_rdf, left)
365        || is_property_iri(ontology, preprocessed_rdf, right)
366}
367
368fn merge_property_sameas_supplement(
369    ontology: &mut Ontology,
370    report: &mut ParseReport,
371    limits: ParseLimits,
372    preprocessed_rdf: &str,
373    left: &str,
374    right: &str,
375) -> Result<bool> {
376    if !sameas_pair_is_property_entities(ontology, preprocessed_rdf, left, right) {
377        return Ok(false);
378    }
379    let ofn = format!(
380        "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
381         Ontology(<http://example.org/property-sameas-supplement>\n\
382           Declaration(ObjectProperty(<{left}>))\n\
383           Declaration(ObjectProperty(<{right}>))\n\
384           EquivalentObjectProperties(<{left}> <{right}>)\n\
385         )"
386    );
387    let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
388    merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
389    Ok(true)
390}
391
392fn merge_ofn_supplement(
393    ontology: &mut Ontology,
394    report: &mut ParseReport,
395    limits: ParseLimits,
396    harvested: &mut usize,
397    ofn: &str,
398) -> Result<()> {
399    bump_harvested_assertions(harvested, limits)?;
400    let supplement = load_ofn_from_str_with_limits(ofn, limits)?;
401    merge_supplement_with_accounting(ontology, report, limits, &supplement)
402}
403
404fn supplement_rdf_dl_axioms(
405    preprocessed_rdf: &str,
406    ontology: &mut Ontology,
407    report: &mut ParseReport,
408    limits: ParseLimits,
409    ill_founded_list: bool,
410) -> Result<()> {
411    let mut harvested = 0usize;
412    for (individual_iri, restriction_iri, ce_ofn) in
413        crate::rdf_preprocess::collect_self_disjoint_restriction_assertions(preprocessed_rdf)
414    {
415        validate_supplement_iris([&individual_iri, &restriction_iri])?;
416        let ofn = format!(
417            "{SUPPLEMENT_STANDARD_PREFIXES}\
418             Ontology(<{individual_iri}>\n\
419               Declaration(Class(<{restriction_iri}>))\n\
420               Declaration(NamedIndividual(<{individual_iri}>))\n\
421               Declaration(ObjectProperty(<http://www.w3.org/2002/03owlt/disjointWith/inconsistent010#p>))\n\
422               EquivalentClasses(<{restriction_iri}> {ce_ofn})\n\
423               DisjointClasses(<{restriction_iri}> <{restriction_iri}>)\n\
424               ClassAssertion(<{restriction_iri}> <{individual_iri}>)\n\
425             )"
426        );
427        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
428    }
429    for (individual_iri, ce_ofn) in
430        crate::rdf_preprocess::collect_object_class_assertions(preprocessed_rdf)
431    {
432        validate_supplement_iri(&individual_iri)?;
433        let ofn = format!(
434            "{SUPPLEMENT_STANDARD_PREFIXES}\
435             Ontology(<{individual_iri}>\n\
436               Declaration(NamedIndividual(<{individual_iri}>))\n\
437               ClassAssertion({ce_ofn} <{individual_iri}>)\n\
438             )"
439        );
440        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
441    }
442    for (class_iri, ce_ofn) in
443        crate::rdf_preprocess::collect_restriction_subclasses(preprocessed_rdf)
444    {
445        validate_supplement_iri(&class_iri)?;
446        let ofn = format!(
447            "{SUPPLEMENT_STANDARD_PREFIXES}\
448             Ontology(<{class_iri}>\n\
449               Declaration(Class(<{class_iri}>))\n\
450               SubClassOf(<{class_iri}> {ce_ofn})\n\
451             )"
452        );
453        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
454    }
455    for body in
456        crate::rdf_preprocess::collect_anonymous_restriction_subclass_axioms(preprocessed_rdf)
457    {
458        crate::validate::validate_supplement_ofn_body(&body)?;
459        let ofn = format!(
460            "{SUPPLEMENT_STANDARD_PREFIXES}\
461             Ontology(<http://example.org/anon-restriction-supplement>\n{body}\n)"
462        );
463        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
464    }
465    for (class_iri, ce_ofn) in
466        crate::rdf_preprocess::collect_complement_subclasses(preprocessed_rdf)
467    {
468        validate_supplement_iri(&class_iri)?;
469        let ofn = format!(
470            "{SUPPLEMENT_STANDARD_PREFIXES}\
471             Ontology(<{class_iri}>\n\
472               Declaration(Class(<{class_iri}>))\n\
473               SubClassOf(<{class_iri}> {ce_ofn})\n\
474             )"
475        );
476        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
477    }
478    for (class_iri, ce_ofn) in
479        crate::rdf_preprocess::collect_boolean_class_equivalences(preprocessed_rdf)
480    {
481        validate_supplement_iri(&class_iri)?;
482        let (extra_prefixes, ce_qualified) =
483            crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&ce_ofn);
484        let ofn = format!(
485            "{SUPPLEMENT_STANDARD_PREFIXES}\
486             {extra_prefixes}\n\
487             Ontology(<{class_iri}>\n\
488               Declaration(Class(<{class_iri}>))\n\
489               EquivalentClasses(<{class_iri}> {ce_qualified})\n\
490             )"
491        );
492        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
493    }
494    for (left_ofn, right_ofn) in
495        crate::rdf_preprocess::collect_boolean_binary_equivalences(preprocessed_rdf)
496    {
497        let (left_prefixes, left_q) =
498            crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&left_ofn);
499        let (right_prefixes, right_q) =
500            crate::rdf_preprocess::qualify_ce_ofn_for_supplement(&right_ofn);
501        crate::validate::validate_supplement_ofn_body(&left_q)?;
502        crate::validate::validate_supplement_ofn_body(&right_q)?;
503        let ofn = format!(
504            "{SUPPLEMENT_STANDARD_PREFIXES}\
505             {left_prefixes}\n\
506             {right_prefixes}\n\
507             Ontology(<http://example.org/boolean-binary-equiv-supplement>\n\
508               EquivalentClasses({left_q} {right_q})\n\
509             )"
510        );
511        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
512    }
513    let mut opa_bodies = Vec::new();
514    for (subject, property, object) in
515        crate::rdf_preprocess::collect_object_property_assertions(preprocessed_rdf)
516    {
517        validate_supplement_iris([&subject, &property, &object])?;
518        bump_harvested_assertions(&mut harvested, limits)?;
519        opa_bodies.push(format!(
520            "Declaration(NamedIndividual(<{subject}>))\n\
521             Declaration(NamedIndividual(<{object}>))\n\
522             Declaration(ObjectProperty(<{property}>))\n\
523             ObjectPropertyAssertion(<{property}> <{subject}> <{object}>)"
524        ));
525    }
526    if !opa_bodies.is_empty() {
527        const OPA_CHUNK: usize = 500;
528        for chunk in opa_bodies.chunks(OPA_CHUNK) {
529            let body = chunk.join("\n");
530            if body.len() > limits.max_file_bytes {
531                return Err(Error::Parse(format!(
532                    "OPA supplement size {} exceeds file byte limit {}",
533                    body.len(),
534                    limits.max_file_bytes
535                )));
536            }
537            let ofn = format!(
538                "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
539                 Ontology(<http://example.org/opa-supplement>\n{body}\n)"
540            );
541            let supplement = load_ofn_from_str_with_limits(&ofn, limits)?;
542            merge_supplement_with_accounting(ontology, report, limits, &supplement)?;
543        }
544    }
545    for (property, range) in
546        crate::rdf_preprocess::collect_datatype_property_ranges(preprocessed_rdf)
547    {
548        validate_supplement_iri(&property)?;
549        let ofn = format!(
550            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
551             Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
552             Prefix(rdfs:=<http://www.w3.org/2000/01/rdf-schema#>)\n\
553             Ontology(<http://example.org/datatype-range-supplement>\n\
554               Declaration(DataProperty(<{property}>))\n\
555               DataPropertyRange(<{property}> {range})\n\
556             )"
557        );
558        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
559    }
560    for (left, right) in crate::rdf_preprocess::collect_owl_same_as_pairs(preprocessed_rdf) {
561        validate_supplement_iris([&left, &right])?;
562        bump_harvested_assertions(&mut harvested, limits)?;
563        if merge_datatype_sameas_supplement(ontology, report, limits, &left, &right)? {
564            continue;
565        }
566        if merge_property_sameas_supplement(
567            ontology,
568            report,
569            limits,
570            preprocessed_rdf,
571            &left,
572            &right,
573        )? {
574            continue;
575        }
576        insert_same_individual_supplement(ontology, report, &left, &right)?;
577    }
578    for (left, right) in crate::rdf_preprocess::collect_property_disjoint_pairs(preprocessed_rdf) {
579        validate_supplement_iris([&left, &right])?;
580        bump_harvested_assertions(&mut harvested, limits)?;
581        insert_property_disjoint_supplement(ontology, report, &left, &right)?;
582    }
583    for (property, domain) in
584        crate::rdf_preprocess::collect_rdfs_object_property_domains(preprocessed_rdf)
585    {
586        validate_supplement_iris([&property, &domain])?;
587        let ofn = format!(
588            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
589             Ontology(<http://example.org/rdfs-domain-supplement>\n\
590               Declaration(ObjectProperty(<{property}>))\n\
591               Declaration(Class(<{domain}>))\n\
592               ObjectPropertyDomain(<{property}> <{domain}>)\n\
593             )"
594        );
595        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
596    }
597    for (property, range) in
598        crate::rdf_preprocess::collect_rdfs_object_property_ranges(preprocessed_rdf)
599    {
600        validate_supplement_iris([&property, &range])?;
601        let ofn = format!(
602            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
603             Ontology(<http://example.org/rdfs-range-supplement>\n\
604               Declaration(ObjectProperty(<{property}>))\n\
605               Declaration(Class(<{range}>))\n\
606               ObjectPropertyRange(<{property}> <{range}>)\n\
607             )"
608        );
609        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
610    }
611    for (sub, sup) in crate::rdf_preprocess::collect_rdfs_sub_object_properties(preprocessed_rdf) {
612        validate_supplement_iris([&sub, &sup])?;
613        let ofn = format!(
614            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
615             Ontology(<http://example.org/rdfs-subproperty-supplement>\n\
616               Declaration(ObjectProperty(<{sub}>))\n\
617               Declaration(ObjectProperty(<{sup}>))\n\
618               SubObjectPropertyOf(<{sub}> <{sup}>)\n\
619             )"
620        );
621        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
622    }
623    for property in crate::rdf_preprocess::collect_functional_object_properties(preprocessed_rdf) {
624        validate_supplement_iri(&property)?;
625        let datatype_props =
626            crate::rdf_preprocess::declared_datatype_property_iris(preprocessed_rdf);
627        let ofn = if datatype_props.contains(&property) {
628            format!(
629                "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
630                 Ontology(<http://example.org/functional-property-supplement>\n\
631                   Declaration(DataProperty(<{property}>))\n\
632                   FunctionalDataProperty(<{property}>)\n\
633                 )"
634            )
635        } else {
636            format!(
637                "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
638                 Ontology(<http://example.org/functional-property-supplement>\n\
639                   Declaration(ObjectProperty(<{property}>))\n\
640                   FunctionalObjectProperty(<{property}>)\n\
641                 )"
642            )
643        };
644        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
645    }
646    for body in crate::rdf_preprocess::collect_disjoint_union_axioms(preprocessed_rdf) {
647        crate::validate::validate_supplement_ofn_body(&body)?;
648        let ofn = format!(
649            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
650             Ontology(<http://example.org/disjoint-union-supplement>\n{body}\n)"
651        );
652        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
653    }
654    for npa in crate::rdf_preprocess::collect_reified_data_npas(preprocessed_rdf) {
655        validate_supplement_iris([&npa.subject, &npa.property])?;
656        let lit = npa.value_literal.replace('"', "\\\"");
657        let mut body = format!(
658            "Declaration(NamedIndividual(<{}>))\n\
659             Declaration(DataProperty(<{}>))\n\
660             NegativeDataPropertyAssertion(<{}> <{}> \"{lit}\"^^xsd:string)\n\
661             DataPropertyAssertion(<{}> <{}> \"{lit}\"^^xsd:string)",
662            npa.subject, npa.property, npa.property, npa.subject, npa.property, npa.subject
663        );
664        if let Some((prop, value)) = &npa.positive_property {
665            validate_supplement_iri(prop)?;
666            if prop != &npa.property || value != &npa.value_literal {
667                body.push_str(&format!(
668                    "\nDataPropertyAssertion(<{prop}> <{}> \"{}\"^^xsd:string)",
669                    npa.subject,
670                    value.replace('"', "\\\"")
671                ));
672            }
673        }
674        let ofn = format!(
675            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
676             Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
677             Ontology(<http://example.org/data-npa-supplement>\n{body}\n)"
678        );
679        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
680    }
681    for dpa in crate::rdf_preprocess::collect_direct_data_literal_assertions(preprocessed_rdf) {
682        validate_supplement_iris([&dpa.subject, &dpa.property])?;
683        let (lexical, datatype_iri) = if dpa.value_literal.contains("^^") {
684            let mut parts = dpa.value_literal.splitn(2, "^^");
685            let lex = parts.next().unwrap_or("").trim_matches('"').to_string();
686            let dt = parts
687                .next()
688                .unwrap_or("")
689                .trim_matches(|c| c == '<' || c == '>');
690            (lex, dt.to_string())
691        } else {
692            (dpa.value_literal.replace('"', "\\\""), String::new())
693        };
694        if !datatype_iri.is_empty() && datatype_iri.contains("://") {
695            validate_supplement_iri(&datatype_iri)?;
696        }
697        let (extra_prefixes, lit, dt_decl) = if datatype_iri.is_empty() {
698            if dpa.value_literal.contains('@') || dpa.value_literal.contains("^^") {
699                (String::new(), dpa.value_literal.clone(), None)
700            } else {
701                (
702                    String::new(),
703                    format!(
704                        "\"{}\"^^rdf:PlainLiteral",
705                        crate::rdf_preprocess::escape_ofn_string(&lexical)
706                    ),
707                    None,
708                )
709            }
710        } else {
711            crate::rdf_preprocess::qualify_typed_literal_for_supplement(&lexical, &datatype_iri)
712        };
713        let dt_decl_line = dt_decl.map(|d| format!("\n       {d}")).unwrap_or_default();
714        let body = format!(
715            "Declaration(NamedIndividual(<{}>))\n\
716             Declaration(DataProperty(<{}>))\n\
717             ClassAssertion(owl:Thing <{}>){dt_decl_line}\n\
718             DataPropertyAssertion(<{}> <{}> {lit})",
719            dpa.subject, dpa.property, dpa.subject, dpa.property, dpa.subject
720        );
721        let ofn = format!(
722            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
723             Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)\n\
724             Prefix(rdf:=<http://www.w3.org/1999/02/22-rdf-syntax-ns#>)\n\
725             {extra_prefixes}\n\
726             Ontology(<http://example.org/thing-data-literal-supplement>\n{body}\n)"
727        );
728        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
729    }
730    if ill_founded_list {
731        let thing = ontology
732            .entity_id("http://www.w3.org/2002/07/owl#Thing", EntityKind::Class)
733            .map_err(|e| Error::Parse(e.to_string()))?;
734        let nothing = ontology
735            .entity_id("http://www.w3.org/2002/07/owl#Nothing", EntityKind::Class)
736            .map_err(|e| Error::Parse(e.to_string()))?;
737        ontology
738            .add_axiom(Axiom::EquivalentClasses(vec![thing, nothing]))
739            .map_err(|e| Error::Parse(e.to_string()))?;
740        let thing_ce = ontology.dl_mut().intern_ce(ClassExpr::Atomic(thing));
741        let nothing_ce = ontology.dl_mut().intern_ce(ClassExpr::Atomic(nothing));
742        ontology
743            .dl_mut()
744            .push_axiom(DlAxiom::EquivalentClasses(vec![thing_ce, nothing_ce]));
745        report.meta.mapped_axiom_count += 1;
746    }
747    for npa in crate::rdf_preprocess::collect_reified_npas(preprocessed_rdf) {
748        validate_supplement_iris([&npa.subject, &npa.object, &npa.property])?;
749        let mut body = format!(
750            "Declaration(NamedIndividual(<{}>))\n\
751             Declaration(NamedIndividual(<{}>))\n\
752             Declaration(ObjectProperty(<{}>))\n\
753             NegativeObjectPropertyAssertion(<{}> <{}> <{}>)",
754            npa.subject, npa.object, npa.property, npa.property, npa.subject, npa.object
755        );
756        if let Some((prop, object)) = npa.positive_property {
757            validate_supplement_iris([&prop, &object])?;
758            body.push_str(&format!(
759                "\nObjectPropertyAssertion(<{prop}> <{}> <{object}>)",
760                npa.subject
761            ));
762        }
763        let ofn = format!(
764            "Prefix(owl:=<http://www.w3.org/2002/07/owl#>)\n\
765             Ontology(<http://example.org/npa-supplement>\n{body}\n)"
766        );
767        merge_ofn_supplement(ontology, report, limits, &mut harvested, &ofn)?;
768    }
769    Ok(())
770}
771
772fn total_axiom_count(ontology: &Ontology) -> usize {
773    ontology
774        .axiom_count()
775        .saturating_add(ontology.dl().axiom_count())
776}
777
778fn merge_rdf_owl_imports(
779    path: &Path,
780    preprocessed_rdf: &str,
781    ontology: &mut Ontology,
782    report: &mut ParseReport,
783    limits: ParseLimits,
784    base: Option<&Path>,
785) -> Result<()> {
786    use std::collections::HashSet;
787    let mut visited = HashSet::from([path.to_path_buf()]);
788    for import_iri in crate::rdf_preprocess::collect_owl_imports(preprocessed_rdf) {
789        let Some(import_path) = resolve_owl_import_path(path, &import_iri) else {
790            continue;
791        };
792        if !visited.insert(import_path.clone()) {
793            continue;
794        }
795        let imported = load_ontology_with_limits_and_base_inner(&import_path, limits, base, false)?;
796        if total_axiom_count(ontology).saturating_add(total_axiom_count(&imported))
797            > limits.max_axioms
798        {
799            if limits.strict {
800                return Err(Error::Parse(format!(
801                    "import merge would exceed axiom limit {} (current {} + import {})",
802                    limits.max_axioms,
803                    total_axiom_count(ontology),
804                    total_axiom_count(&imported)
805                )));
806            }
807            report.meta.warn(format!(
808                "skipping import {import_iri}: would exceed axiom limit {}",
809                limits.max_axioms
810            ));
811            continue;
812        }
813        if ontology
814            .entity_count()
815            .saturating_add(imported.entity_count())
816            > limits.max_entities
817        {
818            if limits.strict {
819                return Err(Error::Parse(format!(
820                    "import merge would exceed entity limit {} (current {} + import {})",
821                    limits.max_entities,
822                    ontology.entity_count(),
823                    imported.entity_count()
824                )));
825            }
826            report.meta.warn(format!(
827                "skipping import {import_iri}: would exceed entity limit {}",
828                limits.max_entities
829            ));
830            continue;
831        }
832        let before = ontology.axiom_count();
833        merge_supplement_ontology(ontology, &imported, report, limits)?;
834        report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
835    }
836    Ok(())
837}
838
839fn resolve_owl_import_path(current: &Path, import_iri: &str) -> Option<PathBuf> {
840    if import_iri == "http://www.owllink.org/ontologies/families" {
841        let candidate = current.parent()?.join("families.owl");
842        if candidate.is_file() {
843            return Some(candidate);
844        }
845    }
846    if let Some(filename) = import_iri.strip_prefix("http://www.iyouit.eu/") {
847        let candidate = current.parent()?.join(filename);
848        if candidate.is_file() {
849            return Some(candidate);
850        }
851    }
852    resolve_wg_import_path(current, import_iri)
853}
854
855fn resolve_wg_import_path(current: &Path, import_iri: &str) -> Option<PathBuf> {
856    let suffix = import_iri.rsplit('/').next()?;
857    let case_dir = current.parent()?.file_name()?.to_str()?;
858    let wg_dir = current.parent()?.parent()?;
859    let mapped = match (case_dir, suffix) {
860        ("TestCase-3AWebOnt-2Dmiscellaneous-2D001", "consistent001") => {
861            "TestCase-3AWebOnt-2Dmiscellaneous-2D002/premise.rdf"
862        }
863        ("TestCase-3AWebOnt-2Dmiscellaneous-2D002", "consistent002") => {
864            "TestCase-3AWebOnt-2Dmiscellaneous-2D001/premise.rdf"
865        }
866        _ => return None,
867    };
868    let candidate = wg_dir.join(mapped);
869    candidate.is_file().then_some(candidate)
870}
871
872fn merge_supplement_with_accounting(
873    ontology: &mut Ontology,
874    report: &mut ParseReport,
875    limits: ParseLimits,
876    supplement: &Ontology,
877) -> Result<()> {
878    let before = ontology.axiom_count();
879    merge_supplement_ontology(ontology, supplement, report, limits)?;
880    report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
881    Ok(())
882}
883
884fn ensure_entity(ontology: &mut Ontology, iri: &str, kind: EntityKind) -> Result<EntityId> {
885    ontology
886        .entity_id(iri, kind)
887        .map_err(|e| Error::Parse(e.to_string()))
888}
889
890fn insert_same_individual_supplement(
891    ontology: &mut Ontology,
892    report: &mut ParseReport,
893    left: &str,
894    right: &str,
895) -> Result<()> {
896    if left == right {
897        return Ok(());
898    }
899    let left_id = ensure_entity(ontology, left, EntityKind::Individual)?;
900    let right_id = ensure_entity(ontology, right, EntityKind::Individual)?;
901    let before = ontology.axiom_count();
902    ontology
903        .add_axiom(Axiom::SameIndividual(vec![left_id, right_id]))
904        .map_err(|e| Error::Parse(e.to_string()))?;
905    report.meta.mapped_axiom_count += ontology.axiom_count().saturating_sub(before);
906    Ok(())
907}
908
909fn entity_kind_for_iri(ontology: &Ontology, iri: &str) -> Option<EntityKind> {
910    let id = ontology.lookup_entity(iri)?;
911    ontology.entity(id).ok().map(|record| record.kind)
912}
913
914fn insert_property_disjoint_supplement(
915    ontology: &mut Ontology,
916    report: &mut ParseReport,
917    left: &str,
918    right: &str,
919) -> Result<()> {
920    let left_kind = entity_kind_for_iri(ontology, left);
921    let right_kind = entity_kind_for_iri(ontology, right);
922    let cross_kind = matches!(left_kind, Some(EntityKind::DataProperty))
923        && matches!(right_kind, Some(EntityKind::ObjectProperty))
924        || matches!(left_kind, Some(EntityKind::ObjectProperty))
925            && matches!(right_kind, Some(EntityKind::DataProperty));
926    if cross_kind {
927        report.meta.warn(
928            "propertyDisjointWith across data and object property kinds skipped in lenient parse",
929        );
930        return Ok(());
931    }
932    if matches!(left_kind, Some(EntityKind::DataProperty))
933        || matches!(right_kind, Some(EntityKind::DataProperty))
934    {
935        insert_disjoint_data_properties_supplement(ontology, report, left, right)
936    } else {
937        insert_disjoint_object_properties_supplement(ontology, report, left, right)
938    }
939}
940
941fn insert_disjoint_object_properties_supplement(
942    ontology: &mut Ontology,
943    report: &mut ParseReport,
944    left: &str,
945    right: &str,
946) -> Result<()> {
947    let left_id = ensure_entity(ontology, left, EntityKind::ObjectProperty)?;
948    let right_id = ensure_entity(ontology, right, EntityKind::ObjectProperty)?;
949    let before = ontology.dl().axiom_count();
950    ontology
951        .dl_mut()
952        .push_axiom(DlAxiom::DisjointObjectProperties(vec![left_id, right_id]));
953    report.meta.mapped_axiom_count += ontology.dl().axiom_count().saturating_sub(before);
954    Ok(())
955}
956
957fn insert_disjoint_data_properties_supplement(
958    ontology: &mut Ontology,
959    report: &mut ParseReport,
960    left: &str,
961    right: &str,
962) -> Result<()> {
963    let left_id = ensure_entity(ontology, left, EntityKind::DataProperty)?;
964    let right_id = ensure_entity(ontology, right, EntityKind::DataProperty)?;
965    let before = ontology.dl().axiom_count();
966    ontology
967        .dl_mut()
968        .push_axiom(DlAxiom::DisjointDataProperties(vec![left_id, right_id]));
969    report.meta.mapped_axiom_count += ontology.dl().axiom_count().saturating_sub(before);
970    Ok(())
971}
972
973fn merge_supplement_ontology(
974    target: &mut Ontology,
975    source: &Ontology,
976    report: &mut ParseReport,
977    limits: ParseLimits,
978) -> Result<()> {
979    use ontologos_core::EntityKind;
980    use std::collections::HashMap;
981    for (_, record) in source.entities().iter() {
982        let iri = source
983            .resolve_iri(record.iri)
984            .map_err(|e| Error::Parse(e.to_string()))?;
985        if let Some(existing) = target.lookup_entity(iri) {
986            let existing_kind = target.entity(existing)?.kind;
987            if !existing_kind.satisfies(record.kind) {
988                match EntityKind::merge_punning(existing_kind, record.kind) {
989                    Some(_) => {}
990                    None => {
991                        report.meta.warn(format!(
992                            "import entity kind conflict for {iri}: {:?} vs {:?}",
993                            existing_kind, record.kind
994                        ));
995                    }
996                }
997            }
998        } else {
999            target
1000                .entity_id(iri, record.kind)
1001                .map_err(|e| Error::Parse(e.to_string()))?;
1002        }
1003    }
1004    let entity_map: HashMap<_, _> = source
1005        .entities()
1006        .iter()
1007        .filter_map(|(id, record)| {
1008            let iri = source.resolve_iri(record.iri).ok()?;
1009            Some((id, target.lookup_entity(iri)?))
1010        })
1011        .collect();
1012    for (id, _) in source.entities().iter() {
1013        if !entity_map.contains_key(&id) {
1014            return Err(Error::Parse(format!(
1015                "supplement entity {id:?} missing after merge"
1016            )));
1017        }
1018    }
1019    target.dl_mut().import_axioms_from(source.dl(), |id| {
1020        entity_map
1021            .get(&id)
1022            .copied()
1023            .expect("supplement entities validated above")
1024    });
1025    for (_, axiom) in source.axioms().iter() {
1026        let remapped = remap_supplement_axiom(axiom, &entity_map)?;
1027        if let Err(e) = target.add_axiom(remapped) {
1028            if matches!(axiom, Axiom::ObjectPropertyRange { .. }) {
1029                report.meta.skipped_axiom_count += 1;
1030                report.meta.warn(format!(
1031                    "skipping conflicting ObjectPropertyRange during merge: {e}"
1032                ));
1033                if limits.strict {
1034                    return Err(Error::Parse(e.to_string()));
1035                }
1036                continue;
1037            }
1038            return Err(Error::Parse(e.to_string()));
1039        }
1040    }
1041    Ok(())
1042}
1043
1044fn remap_supplement_axiom(
1045    axiom: &Axiom,
1046    entity_map: &std::collections::HashMap<EntityId, EntityId>,
1047) -> Result<Axiom> {
1048    let remap = |id: EntityId| -> Result<EntityId> {
1049        entity_map
1050            .get(&id)
1051            .copied()
1052            .ok_or_else(|| Error::Parse(format!("supplement entity {id:?} missing after merge")))
1053    };
1054    let remap_vec =
1055        |ids: &[EntityId]| -> Result<Vec<EntityId>> { ids.iter().map(|id| remap(*id)).collect() };
1056    Ok(match axiom {
1057        Axiom::SubClassOf {
1058            subclass,
1059            superclass,
1060        } => Axiom::SubClassOf {
1061            subclass: remap(*subclass)?,
1062            superclass: remap(*superclass)?,
1063        },
1064        Axiom::EquivalentClasses(classes) => Axiom::EquivalentClasses(remap_vec(classes)?),
1065        Axiom::DisjointClasses(classes) => Axiom::DisjointClasses(remap_vec(classes)?),
1066        Axiom::ObjectPropertyDomain { property, domain } => Axiom::ObjectPropertyDomain {
1067            property: remap(*property)?,
1068            domain: remap(*domain)?,
1069        },
1070        Axiom::ObjectPropertyRange { property, range } => Axiom::ObjectPropertyRange {
1071            property: remap(*property)?,
1072            range: remap(*range)?,
1073        },
1074        Axiom::SubObjectPropertyOf {
1075            sub_property,
1076            super_property,
1077        } => Axiom::SubObjectPropertyOf {
1078            sub_property: remap(*sub_property)?,
1079            super_property: remap(*super_property)?,
1080        },
1081        Axiom::InverseObjectProperties { left, right } => Axiom::InverseObjectProperties {
1082            left: remap(*left)?,
1083            right: remap(*right)?,
1084        },
1085        Axiom::TransitiveObjectProperty(p) => Axiom::TransitiveObjectProperty(remap(*p)?),
1086        Axiom::SubClassOfExistential {
1087            subclass,
1088            property,
1089            filler,
1090        } => Axiom::SubClassOfExistential {
1091            subclass: remap(*subclass)?,
1092            property: remap(*property)?,
1093            filler: remap(*filler)?,
1094        },
1095        Axiom::SymmetricObjectProperty(p) => Axiom::SymmetricObjectProperty(remap(*p)?),
1096        Axiom::ReflexiveObjectProperty(p) => Axiom::ReflexiveObjectProperty(remap(*p)?),
1097        Axiom::FunctionalObjectProperty(p) => Axiom::FunctionalObjectProperty(remap(*p)?),
1098        Axiom::InverseFunctionalObjectProperty(p) => {
1099            Axiom::InverseFunctionalObjectProperty(remap(*p)?)
1100        }
1101        Axiom::IrreflexiveObjectProperty(p) => Axiom::IrreflexiveObjectProperty(remap(*p)?),
1102        Axiom::AsymmetricObjectProperty(p) => Axiom::AsymmetricObjectProperty(remap(*p)?),
1103        Axiom::EquivalentObjectProperties(props) => {
1104            Axiom::EquivalentObjectProperties(remap_vec(props)?)
1105        }
1106        Axiom::ClassAssertion { individual, class } => Axiom::ClassAssertion {
1107            individual: remap(*individual)?,
1108            class: remap(*class)?,
1109        },
1110        Axiom::ObjectPropertyAssertion {
1111            subject,
1112            property,
1113            object,
1114        } => Axiom::ObjectPropertyAssertion {
1115            subject: remap(*subject)?,
1116            property: remap(*property)?,
1117            object: remap(*object)?,
1118        },
1119        Axiom::DataPropertyAssertion {
1120            individual,
1121            property,
1122            value,
1123        } => Axiom::DataPropertyAssertion {
1124            individual: remap(*individual)?,
1125            property: remap(*property)?,
1126            value: value.clone(),
1127        },
1128        Axiom::NegativeObjectPropertyAssertion {
1129            subject,
1130            property,
1131            object,
1132        } => Axiom::NegativeObjectPropertyAssertion {
1133            subject: remap(*subject)?,
1134            property: remap(*property)?,
1135            object: remap(*object)?,
1136        },
1137        Axiom::NegativeDataPropertyAssertion {
1138            individual,
1139            property,
1140            value,
1141        } => Axiom::NegativeDataPropertyAssertion {
1142            individual: remap(*individual)?,
1143            property: remap(*property)?,
1144            value: value.clone(),
1145        },
1146        Axiom::SameIndividual(ids) => Axiom::SameIndividual(remap_vec(ids)?),
1147        Axiom::DifferentIndividuals(ids) => Axiom::DifferentIndividuals(remap_vec(ids)?),
1148    })
1149}
1150
1151fn open_for_load(path: &Path, base: Option<&Path>) -> Result<File> {
1152    let pre_meta = std::fs::symlink_metadata(path)?;
1153    let file = open_readonly_nofollow(path)?;
1154    if let Some(base) = base {
1155        verify_opened_under_base(&file, base, path, &pre_meta)?;
1156    }
1157    Ok(file)
1158}
1159
1160fn open_readonly_nofollow(path: &Path) -> Result<File> {
1161    #[cfg(unix)]
1162    {
1163        use std::fs::OpenOptions;
1164        use std::os::unix::fs::OpenOptionsExt;
1165        OpenOptions::new()
1166            .read(true)
1167            .custom_flags(O_NOFOLLOW)
1168            .open(path)
1169            .map_err(|e| Error::Parse(e.to_string()))
1170    }
1171    #[cfg(not(unix))]
1172    {
1173        Ok(File::open(path)?)
1174    }
1175}
1176
1177fn verify_opened_under_base(
1178    file: &File,
1179    base: &Path,
1180    validated: &Path,
1181    pre_meta: &std::fs::Metadata,
1182) -> Result<()> {
1183    #[cfg(unix)]
1184    use std::os::unix::fs::MetadataExt;
1185
1186    let file_meta = file.metadata()?;
1187    #[cfg(unix)]
1188    if pre_meta.dev() != file_meta.dev() || pre_meta.ino() != file_meta.ino() {
1189        return Err(Error::Parse(
1190            "ontology path changed between validation and open".into(),
1191        ));
1192    }
1193    #[cfg(not(unix))]
1194    let _ = (pre_meta, file_meta);
1195
1196    let base_normalized = normalize_path(base)?;
1197    let base_canon = base_normalized
1198        .canonicalize()
1199        .map_err(|e| Error::Parse(e.to_string()))?;
1200
1201    if let Ok(opened) = opened_path(file) {
1202        let opened_canon = opened
1203            .canonicalize()
1204            .map_err(|e| Error::Parse(e.to_string()))?;
1205        if !path_is_under_base(&opened_canon, &base_canon) {
1206            return Err(Error::Parse(format!(
1207                "opened file {} escapes allowed base {}",
1208                opened_canon.display(),
1209                base_canon.display()
1210            )));
1211        }
1212        return Ok(());
1213    }
1214
1215    let validated_canon = validated
1216        .canonicalize()
1217        .map_err(|e| Error::Parse(e.to_string()))?;
1218    if !path_is_under_base(&validated_canon, &base_canon) {
1219        return Err(Error::Parse(format!(
1220            "path {} escapes allowed base {}",
1221            validated_canon.display(),
1222            base_canon.display()
1223        )));
1224    }
1225    Ok(())
1226}
1227
1228#[cfg(target_os = "linux")]
1229fn opened_path(file: &File) -> Result<PathBuf> {
1230    use std::os::unix::io::AsRawFd;
1231    let fd = file.as_raw_fd();
1232    Ok(std::fs::read_link(format!("/proc/self/fd/{fd}"))?)
1233}
1234
1235#[cfg(target_os = "macos")]
1236fn opened_path(file: &File) -> Result<PathBuf> {
1237    use std::ffi::CStr;
1238    use std::os::unix::io::AsRawFd;
1239
1240    const F_GETPATH: i32 = 50;
1241    let fd = file.as_raw_fd();
1242    let mut buf = [0u8; 1024];
1243    // SAFETY: `fcntl(F_GETPATH)` is the macOS API for resolving an open fd to its path.
1244    #[allow(unsafe_code)]
1245    let rc = unsafe { libc::fcntl(fd, F_GETPATH, buf.as_mut_ptr()) };
1246    if rc == -1 {
1247        return Err(Error::Parse("fcntl(F_GETPATH) failed".into()));
1248    }
1249    let cstr = CStr::from_bytes_until_nul(&buf).map_err(|e| Error::Parse(e.to_string()))?;
1250    Ok(PathBuf::from(cstr.to_string_lossy().into_owned()))
1251}
1252
1253#[cfg(not(any(target_os = "linux", target_os = "macos")))]
1254fn opened_path(_file: &File) -> Result<PathBuf> {
1255    Err(Error::Parse("fd path resolution unavailable".into()))
1256}
1257
1258fn detect_format_with_sniff(path: &Path, reader: &mut (impl Read + Seek)) -> Result<Format> {
1259    if let Some(format) = detect_format(path) {
1260        reader
1261            .seek(SeekFrom::Start(0))
1262            .map_err(|e| Error::Parse(e.to_string()))?;
1263        return Ok(format);
1264    }
1265
1266    let header = sniff_and_rewind(reader, 4096)?;
1267    if let Some(format) = detect_format_from_bytes(&header) {
1268        return Ok(format);
1269    }
1270    if detect_turtle_from_bytes(&header) {
1271        return Ok(Format::Turtle);
1272    }
1273    if detect_functional_from_bytes(&header) {
1274        return Ok(Format::Functional);
1275    }
1276
1277    Err(Error::UnsupportedFormat(format!(
1278        "could not detect OWL/RDF format for {}",
1279        path.display()
1280    )))
1281}
1282
1283fn normalize_path(path: &Path) -> Result<PathBuf> {
1284    let base = if path.is_absolute() {
1285        PathBuf::new()
1286    } else {
1287        std::env::current_dir()?
1288    };
1289
1290    let mut normalized = base;
1291    for component in path.components() {
1292        match component {
1293            Component::Prefix(_) | Component::RootDir => normalized.push(component.as_os_str()),
1294            Component::CurDir => {}
1295            Component::ParentDir => {
1296                if !normalized.pop() {
1297                    return Err(Error::Parse("path escapes beyond filesystem root".into()));
1298                }
1299            }
1300            Component::Normal(part) => normalized.push(part),
1301        }
1302    }
1303
1304    if normalized.exists() {
1305        normalized = normalized
1306            .canonicalize()
1307            .map_err(|e| Error::Parse(e.to_string()))?;
1308    }
1309
1310    Ok(normalized)
1311}
1312
1313/// True when `path` is the same as or nested under `base` (path-component wise).
1314fn path_is_under_base(path: &Path, base: &Path) -> bool {
1315    let mut path_iter = path.components();
1316    for base_comp in base.components() {
1317        match path_iter.next() {
1318            Some(path_comp) if path_comp == base_comp => {}
1319            _ => return false,
1320        }
1321    }
1322    true
1323}
1324
1325/// Parse OWL Functional Syntax from an in-memory document (no temp file).
1326pub fn load_ofn_from_str(text: &str) -> Result<Ontology> {
1327    load_ofn_from_str_validated(text, ParseLimits::default())
1328}
1329
1330/// Parse in-memory OFN and run post-load validation (public callers).
1331pub fn load_ofn_from_str_validated(text: &str, limits: ParseLimits) -> Result<Ontology> {
1332    if text.len() > limits.max_file_bytes {
1333        return Err(Error::Parse(format!(
1334            "in-memory OFN size {} exceeds limit of {} bytes",
1335            text.len(),
1336            limits.max_file_bytes
1337        )));
1338    }
1339    let set_ontology = read_horned_owl_from_reader(
1340        &mut std::io::Cursor::new(text.as_bytes()),
1341        Format::Functional,
1342        limits,
1343    )?;
1344    let (ontology, report) = map_to_core(&set_ontology, limits)?;
1345    finalize_parsed_ontology(ontology, report, limits, true)
1346}
1347
1348/// Parse OWL Functional Syntax from an in-memory document with custom limits.
1349pub fn load_ofn_from_str_with_limits(text: &str, limits: ParseLimits) -> Result<Ontology> {
1350    if text.len() > limits.max_file_bytes {
1351        return Err(Error::Parse(format!(
1352            "in-memory OFN size {} exceeds limit of {} bytes",
1353            text.len(),
1354            limits.max_file_bytes
1355        )));
1356    }
1357    let set_ontology = read_horned_owl_from_reader(
1358        &mut std::io::Cursor::new(text.as_bytes()),
1359        Format::Functional,
1360        limits,
1361    )?;
1362    let (ontology, report) = map_to_core(&set_ontology, limits)?;
1363    finalize_parsed_ontology(ontology, report, limits, false)
1364}
1365
1366/// Load an OFN ontology and append axioms from a second OFN fragment (same prefixes/IRIs).
1367pub fn load_ofn_with_incremental(base: &Path, incremental: &Path) -> Result<Ontology> {
1368    load_ofn_with_incremental_and_limits(base, incremental, ParseLimits::default(), None)
1369}
1370
1371/// Load and merge OFN documents with path validation and parse limits.
1372pub fn load_ofn_with_incremental_and_limits(
1373    base: &Path,
1374    incremental: &Path,
1375    limits: ParseLimits,
1376    sandbox_base: Option<&Path>,
1377) -> Result<Ontology> {
1378    let base_path = validate_load_path(base, sandbox_base)?;
1379    let inc_path = validate_load_path(incremental, sandbox_base)?;
1380    let base_text = read_text_file_with_limit(&base_path, limits)?;
1381    let inc_text = read_text_file_with_limit(&inc_path, limits)?;
1382    let merged = merge_ofn_documents(&base_text, &inc_text)?;
1383    if merged.len() > limits.max_file_bytes {
1384        return Err(Error::Parse(format!(
1385            "merged OFN size {} exceeds limit of {} bytes",
1386            merged.len(),
1387            limits.max_file_bytes
1388        )));
1389    }
1390    load_ofn_from_str_validated(&merged, limits)
1391}
1392
1393fn merge_ofn_documents(base: &str, incremental: &str) -> Result<String> {
1394    let inc_axioms = extract_ofn_axiom_body(incremental)
1395        .ok_or_else(|| Error::Parse("incremental OFN missing Ontology(...) body".into()))?;
1396    let close = find_ofn_ontology_close(base)
1397        .ok_or_else(|| Error::Parse("base OFN missing closing ')'".into()))?;
1398    Ok(format!("{}{})", &base[..close], inc_axioms))
1399}
1400
1401/// Index of the closing `)` for the outer `Ontology(...)` form, respecting quoted strings.
1402fn find_ofn_ontology_close(text: &str) -> Option<usize> {
1403    let marker = "Ontology(";
1404    let start = text.find(marker)? + marker.len();
1405    let mut depth = 1usize;
1406    let mut in_str = false;
1407    let mut escape = false;
1408    for (i, ch) in text[start..].char_indices() {
1409        if in_str {
1410            if escape {
1411                escape = false;
1412                continue;
1413            }
1414            if ch == '\\' {
1415                escape = true;
1416                continue;
1417            }
1418            if ch == '"' {
1419                in_str = false;
1420            }
1421            continue;
1422        }
1423        match ch {
1424            '"' => in_str = true,
1425            '(' => depth += 1,
1426            ')' => {
1427                depth -= 1;
1428                if depth == 0 {
1429                    return Some(start + i);
1430                }
1431            }
1432            _ => {}
1433        }
1434    }
1435    None
1436}
1437
1438fn extract_ofn_axiom_body(text: &str) -> Option<String> {
1439    let marker = "Ontology(";
1440    let start = text.find(marker)? + marker.len();
1441    let rest = text.get(start..)?;
1442    let end = find_ofn_ontology_close(text)? - start;
1443    let mut body = rest[..end].trim();
1444    if body.starts_with('<') {
1445        if let Some((_, axioms)) = body.split_once('\n') {
1446            body = axioms.trim();
1447        } else if let Some((_, axioms)) = body.split_once(' ') {
1448            body = axioms.trim();
1449        }
1450    }
1451    Some(format!(" {body}"))
1452}
1453
1454#[cfg(test)]
1455mod tests {
1456    use super::*;
1457    use std::path::Path;
1458
1459    #[test]
1460    fn merge_ofn_preserves_literal_with_closing_paren() {
1461        let base = concat!(
1462            "Prefix(:=<file:/c/test.owl#>)\n",
1463            "Ontology(<file:/c/test.owl#>\n",
1464            "Class(:A)\n",
1465            "AnnotationAssertion(rdfs:comment :A \"note with ) inside\")\n",
1466            ")"
1467        );
1468        let incremental = concat!(
1469            "Prefix(:=<file:/c/test.owl#>)\n",
1470            "Ontology(<file:/c/test.owl#>\n",
1471            "ClassAssertion(:A :a)\n",
1472            ")"
1473        );
1474        let merged = merge_ofn_documents(base, incremental).expect("merge");
1475        assert!(merged.contains("note with ) inside"));
1476        assert!(merged.contains("ClassAssertion(:A :a)"));
1477        assert!(merged.ends_with("ClassAssertion(:A :a))"));
1478    }
1479
1480    #[test]
1481    fn load_ofn_from_str_rejects_oversized_input() {
1482        let limits = ParseLimits::with_file_bytes(16);
1483        let err = load_ofn_from_str_with_limits("Ontology(<x>)", limits).expect_err("size");
1484        assert!(matches!(err, Error::Parse(_)));
1485    }
1486
1487    #[test]
1488    fn load_ofn_from_str_parses_class_assertion() {
1489        let ofn = concat!(
1490            "Prefix(:=<file:/c/test.owl#>)\n",
1491            "Ontology(<file:/c/test.owl#>\n",
1492            "ClassAssertion(:A :a)\n",
1493            ")"
1494        );
1495        let ontology = load_ofn_from_str(ofn).expect("parse");
1496        assert!(ontology.axiom_count() > 0);
1497    }
1498
1499    #[test]
1500    fn rejects_path_traversal_outside_base() {
1501        let base = std::env::current_dir().expect("cwd");
1502        let err = validate_load_path(Path::new("../../../etc/passwd"), Some(&base))
1503            .expect_err("traversal");
1504        assert!(matches!(err, Error::Parse(_)));
1505    }
1506
1507    #[test]
1508    fn rejects_path_prefix_bypass() {
1509        let parent = std::env::temp_dir();
1510        let base = parent.join("ontologos_uploads_base");
1511        let evil = parent.join("ontologos_uploads_base_evil");
1512        std::fs::create_dir_all(&base).expect("create base");
1513        std::fs::create_dir_all(&evil).expect("create evil sibling");
1514        let file = evil.join("secret.owl");
1515        std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
1516
1517        let err = validate_load_path(&file, Some(&base)).expect_err("prefix bypass");
1518        assert!(matches!(err, Error::Parse(_)));
1519
1520        let _ = std::fs::remove_file(&file);
1521        let _ = std::fs::remove_dir(&evil);
1522        let _ = std::fs::remove_dir(&base);
1523    }
1524
1525    #[test]
1526    fn path_is_under_base_accepts_nested_file() {
1527        let parent = std::env::temp_dir();
1528        let base = parent.join("ontologos_nested_base");
1529        let nested = base.join("nested");
1530        std::fs::create_dir_all(&nested).expect("create nested");
1531        let file = nested.join("ontology.owl");
1532        std::fs::write(&file, b"<rdf:RDF/>").expect("write file");
1533
1534        let validated = validate_load_path(&file, Some(&base)).expect("nested file under base");
1535        assert!(path_is_under_base(
1536            &validated,
1537            &base.canonicalize().expect("canonicalize base")
1538        ));
1539
1540        let _ = std::fs::remove_file(&file);
1541        let _ = std::fs::remove_dir(&nested);
1542        let _ = std::fs::remove_dir(&base);
1543    }
1544
1545    #[cfg(unix)]
1546    #[test]
1547    fn sandboxed_load_does_not_follow_symlink_to_outside_file() {
1548        use std::os::unix::fs::symlink;
1549
1550        let parent = std::env::temp_dir();
1551        let base = parent.join("ontologos_sandbox_base");
1552        let outside = parent.join("ontologos_outside_secret.owl");
1553        let link = base.join("ontology.owl");
1554        std::fs::create_dir_all(&base).expect("create base");
1555        std::fs::write(&outside, b"OUTSIDE_SECRET_CONTENT").expect("write outside");
1556
1557        symlink(&outside, &link).expect("symlink");
1558
1559        let err = load_ontology_in(&base, &link).expect_err("symlink escape");
1560        assert!(matches!(err, Error::Parse(_) | Error::UnsupportedFormat(_)));
1561
1562        let _ = std::fs::remove_file(&link);
1563        let _ = std::fs::remove_file(&outside);
1564        let _ = std::fs::remove_dir(&base);
1565    }
1566}