Skip to main content

ontologos_parser/
lib.rs

1//! OWL and RDF syntax parsers for OntoLogos.
2//!
3//! v0.2 loads OWL/XML, RDF/XML, Turtle, and OWL Functional Syntax into
4//! [`ontologos_core::Ontology`] via [`load_ontology`].
5//!
6//! # Example
7//!
8//! ```no_run
9//! use ontologos_parser::load_ontology;
10//!
11//! let ontology = load_ontology(std::path::Path::new("ontology.owl"))?;
12//! println!("axioms: {}", ontology.axiom_count());
13//! # Ok::<(), ontologos_parser::Error>(())
14//! ```
15//!
16//! See [load guide](https://github.com/eddiethedean/ontologos/blob/main/docs/getting-started/load-owl-file.md).
17
18#![warn(missing_docs)]
19
20mod error;
21mod limits;
22mod load;
23mod map;
24mod read;
25mod report;
26
27pub use error::{Error, Result};
28pub use limits::ParseLimits;
29pub use load::{
30    load_ontology, load_ontology_in, load_ontology_with_limits, load_ontology_with_limits_and_base,
31    validate_load_path,
32};
33pub use read::detect_turtle_from_bytes;
34
35/// Supported ontology serialization formats.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum Format {
38    /// OWL/XML syntax.
39    OwlXml,
40    /// RDF/XML syntax.
41    RdfXml,
42    /// Turtle / RDF Turtle.
43    Turtle,
44    /// OWL Functional Syntax (`.ofn`, `.func`).
45    Functional,
46}
47
48/// Detect format from file content bytes.
49#[must_use]
50pub fn detect_format_from_bytes(header: &[u8]) -> Option<Format> {
51    if detect_functional_from_bytes(header) {
52        return Some(Format::Functional);
53    }
54    let text = std::str::from_utf8(header).ok()?;
55    let trimmed = text.trim_start();
56    if trimmed.contains("rdf:RDF") || trimmed.contains("<rdf:RDF") {
57        return Some(Format::RdfXml);
58    }
59    if trimmed.contains("<Ontology ") || trimmed.contains(":Ontology ") {
60        return Some(Format::OwlXml);
61    }
62    if detect_turtle_from_bytes(header) {
63        return Some(Format::Turtle);
64    }
65    None
66}
67
68/// Detect OWL Functional Syntax from a file header.
69#[must_use]
70pub fn detect_functional_from_bytes(header: &[u8]) -> bool {
71    let text = match std::str::from_utf8(header) {
72        Ok(t) => t.trim_start(),
73        Err(_) => return false,
74    };
75    text.starts_with("Prefix(") || text.starts_with("Ontology(")
76}
77
78/// Detect the most likely format from a file path and optional content sniffing.
79#[must_use]
80pub fn detect_format(path: &std::path::Path) -> Option<Format> {
81    match path.extension()?.to_str()? {
82        "owl" => sniff_xml_format(path),
83        "xml" => sniff_xml_format(path),
84        "rdf" => Some(Format::RdfXml),
85        "ttl" | "turtle" => Some(Format::Turtle),
86        "ofn" | "func" => Some(Format::Functional),
87        _ => None,
88    }
89}
90
91fn sniff_xml_format(path: &std::path::Path) -> Option<Format> {
92    const SNIFF_BYTES: usize = 4096;
93    let mut header = vec![0_u8; SNIFF_BYTES];
94    let read = std::fs::File::open(path)
95        .and_then(|mut file| {
96            use std::io::Read;
97            file.read(&mut header)
98        })
99        .ok()?;
100    detect_format_from_bytes(&header[..read])
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use std::io::Write;
107    use std::path::Path;
108
109    #[test]
110    fn detect_format_by_extension() {
111        assert_eq!(detect_format(Path::new("p.rdf")), Some(Format::RdfXml));
112        assert_eq!(detect_format(Path::new("p.ttl")), Some(Format::Turtle));
113        assert_eq!(detect_format(Path::new("p.turtle")), Some(Format::Turtle));
114        assert_eq!(detect_format(Path::new("p.ofn")), Some(Format::Functional));
115        assert_eq!(detect_format(Path::new("p.func")), Some(Format::Functional));
116        assert_eq!(detect_format(Path::new("p.txt")), None);
117        assert_eq!(detect_format(Path::new("noext")), None);
118    }
119
120    #[test]
121    fn detect_functional_from_bytes_header() {
122        let header = b"Prefix(:=<http://example.org/>)\nOntology(<http://example.org/o>)";
123        assert!(detect_functional_from_bytes(header));
124        assert_eq!(detect_format_from_bytes(header), Some(Format::Functional));
125    }
126
127    #[test]
128    fn detect_format_from_bytes_owl_xml() {
129        let header = br#"<?xml version="1.0"?><Ontology xmlns="http://www.w3.org/2002/07/owl#"/>"#;
130        assert_eq!(detect_format_from_bytes(header), Some(Format::OwlXml));
131    }
132
133    #[test]
134    fn detect_format_from_bytes_rdf_xml() {
135        let header = br#"<?xml version="1.0"?><rdf:RDF/>"#;
136        assert_eq!(detect_format_from_bytes(header), Some(Format::RdfXml));
137    }
138
139    #[test]
140    fn plain_xml_extension_without_sniff_returns_none() {
141        let dir = std::env::temp_dir().join("ontologos_parser_test");
142        let _ = std::fs::create_dir_all(&dir);
143        let path = dir.join("config.xml");
144        {
145            let mut file = std::fs::File::create(&path).expect("create");
146            file.write_all(b"<config><item/></config>").expect("write");
147        }
148        assert_eq!(detect_format(&path), None);
149        let _ = std::fs::remove_file(&path);
150    }
151}