Skip to main content

ontologos_parser/
lib.rs

1//! OWL and RDF syntax parsers for OntoLogos.
2//!
3//! v0.2 loads OWL/XML, RDF/XML, Turtle, and OWL Functional Syntax into
4//! [`ontologos_core::Ontology`] via [`load_ontology`].
5//!
6//! # Example
7//!
8//! ```no_run
9//! use ontologos_parser::load_ontology;
10//!
11//! let ontology = load_ontology(std::path::Path::new("ontology.owl"))?;
12//! println!("axioms: {}", ontology.axiom_count());
13//! # Ok::<(), ontologos_parser::Error>(())
14//! ```
15//!
16//! See [load guide](https://github.com/eddiethedean/ontologos/blob/main/docs/getting-started/load-owl-file.md).
17
18#![warn(missing_docs)]
19
20mod error;
21mod limits;
22mod load;
23mod map;
24mod map_dl;
25mod map_swrl;
26mod rdf_preprocess;
27mod read;
28mod report;
29mod validate;
30
31pub use error::{Error, Result};
32pub use limits::ParseLimits;
33pub use load::{
34    load_ofn_from_str, load_ofn_from_str_with_limits, load_ofn_with_incremental,
35    load_ofn_with_incremental_and_limits, load_ontology, load_ontology_in, load_ontology_lenient,
36    load_ontology_lenient_in, load_ontology_with_limits, load_ontology_with_limits_and_base,
37    validate_load_path,
38};
39pub use rdf_preprocess::{expand_xml_entities, expand_xml_entities_with_limit};
40pub use read::{detect_turtle_from_bytes, read_horned_owl_from_reader, sniff_file_header};
41pub use validate::{validate_loaded_ontology, validate_loaded_ontology_light};
42
43/// Supported ontology serialization formats.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum Format {
46    /// OWL/XML syntax.
47    OwlXml,
48    /// RDF/XML syntax.
49    RdfXml,
50    /// Turtle / RDF Turtle.
51    Turtle,
52    /// OWL Functional Syntax (`.ofn`, `.func`).
53    Functional,
54}
55
56/// Detect format from file content bytes.
57#[must_use]
58pub fn detect_format_from_bytes(header: &[u8]) -> Option<Format> {
59    if detect_functional_from_bytes(header) {
60        return Some(Format::Functional);
61    }
62    let text = std::str::from_utf8(header).ok()?;
63    let trimmed = text.trim_start();
64    if trimmed.contains("rdf:RDF") || trimmed.contains("<rdf:RDF") {
65        return Some(Format::RdfXml);
66    }
67    if trimmed.contains("<Ontology ") || trimmed.contains(":Ontology ") {
68        return Some(Format::OwlXml);
69    }
70    if detect_turtle_from_bytes(header) {
71        return Some(Format::Turtle);
72    }
73    None
74}
75
76/// Detect OWL Functional Syntax from a file header.
77#[must_use]
78pub fn detect_functional_from_bytes(header: &[u8]) -> bool {
79    let text = match std::str::from_utf8(header) {
80        Ok(t) => t.trim_start(),
81        Err(_) => return false,
82    };
83    text.starts_with("Prefix(") || text.starts_with("Ontology(")
84}
85
86/// Detect the most likely format from a file path and optional content sniffing.
87#[must_use]
88pub fn detect_format(path: &std::path::Path) -> Option<Format> {
89    match path.extension()?.to_str()? {
90        "owl" => sniff_xml_format(path),
91        "xml" => sniff_xml_format(path),
92        "rdf" => Some(Format::RdfXml),
93        "ttl" | "turtle" => Some(Format::Turtle),
94        "ofn" | "func" => Some(Format::Functional),
95        _ => None,
96    }
97}
98
99fn sniff_xml_format(path: &std::path::Path) -> Option<Format> {
100    let header = sniff_file_header(path, 4096).ok()?;
101    detect_format_from_bytes(&header)
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use std::io::Write;
108    use std::path::Path;
109
110    #[test]
111    fn detect_format_by_extension() {
112        assert_eq!(detect_format(Path::new("p.rdf")), Some(Format::RdfXml));
113        assert_eq!(detect_format(Path::new("p.ttl")), Some(Format::Turtle));
114        assert_eq!(detect_format(Path::new("p.turtle")), Some(Format::Turtle));
115        assert_eq!(detect_format(Path::new("p.ofn")), Some(Format::Functional));
116        assert_eq!(detect_format(Path::new("p.func")), Some(Format::Functional));
117        assert_eq!(detect_format(Path::new("p.txt")), None);
118        assert_eq!(detect_format(Path::new("noext")), None);
119    }
120
121    #[test]
122    fn detect_functional_from_bytes_header() {
123        let header = b"Prefix(:=<http://example.org/>)\nOntology(<http://example.org/o>)";
124        assert!(detect_functional_from_bytes(header));
125        assert_eq!(detect_format_from_bytes(header), Some(Format::Functional));
126    }
127
128    #[test]
129    fn detect_format_from_bytes_owl_xml() {
130        let header = br#"<?xml version="1.0"?><Ontology xmlns="http://www.w3.org/2002/07/owl#"/>"#;
131        assert_eq!(detect_format_from_bytes(header), Some(Format::OwlXml));
132    }
133
134    #[test]
135    fn detect_format_from_bytes_rdf_xml() {
136        let header = br#"<?xml version="1.0"?><rdf:RDF/>"#;
137        assert_eq!(detect_format_from_bytes(header), Some(Format::RdfXml));
138    }
139
140    #[test]
141    fn plain_xml_extension_without_sniff_returns_none() {
142        let path = std::env::temp_dir().join(format!(
143            "ontologos_parser_test_{}_{}.xml",
144            std::process::id(),
145            std::time::SystemTime::now()
146                .duration_since(std::time::UNIX_EPOCH)
147                .expect("time")
148                .as_nanos()
149        ));
150        {
151            let mut file = std::fs::File::create(&path).expect("create");
152            file.write_all(b"<config><item/></config>").expect("write");
153        }
154        assert_eq!(detect_format(&path), None);
155        let _ = std::fs::remove_file(&path);
156    }
157}