Skip to main content

ontologos_parser/
lib.rs

1//! OWL and RDF syntax parsers for OntoLogos.
2//!
3//! v0.2 loads OWL/XML, RDF/XML, Turtle, and OWL Functional Syntax into
4//! [`ontologos_core::Ontology`] via [`load_ontology`].
5//!
6//! # Example
7//!
8//! ```no_run
9//! use ontologos_parser::load_ontology;
10//!
11//! let ontology = load_ontology(std::path::Path::new("ontology.owl"))?;
12//! println!("axioms: {}", ontology.axiom_count());
13//! # Ok::<(), ontologos_parser::Error>(())
14//! ```
15//!
16//! See [load guide](https://github.com/eddiethedean/ontologos/blob/main/docs/getting-started/load-owl-file.md).
17
18#![warn(missing_docs)]
19
20mod error;
21mod limits;
22mod load;
23mod map;
24mod read;
25mod report;
26
27pub use error::{Error, Result};
28pub use limits::ParseLimits;
29pub use load::{
30    load_ontology, load_ontology_in, load_ontology_with_limits, load_ontology_with_limits_and_base,
31    validate_load_path,
32};
33pub use read::{
34    detect_turtle_from_bytes, read_horned_owl, read_horned_owl_from_reader, sniff_file_header,
35};
36
37/// Supported ontology serialization formats.
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum Format {
40    /// OWL/XML syntax.
41    OwlXml,
42    /// RDF/XML syntax.
43    RdfXml,
44    /// Turtle / RDF Turtle.
45    Turtle,
46    /// OWL Functional Syntax (`.ofn`, `.func`).
47    Functional,
48}
49
50/// Detect format from file content bytes.
51#[must_use]
52pub fn detect_format_from_bytes(header: &[u8]) -> Option<Format> {
53    if detect_functional_from_bytes(header) {
54        return Some(Format::Functional);
55    }
56    let text = std::str::from_utf8(header).ok()?;
57    let trimmed = text.trim_start();
58    if trimmed.contains("rdf:RDF") || trimmed.contains("<rdf:RDF") {
59        return Some(Format::RdfXml);
60    }
61    if trimmed.contains("<Ontology ") || trimmed.contains(":Ontology ") {
62        return Some(Format::OwlXml);
63    }
64    if detect_turtle_from_bytes(header) {
65        return Some(Format::Turtle);
66    }
67    None
68}
69
70/// Detect OWL Functional Syntax from a file header.
71#[must_use]
72pub fn detect_functional_from_bytes(header: &[u8]) -> bool {
73    let text = match std::str::from_utf8(header) {
74        Ok(t) => t.trim_start(),
75        Err(_) => return false,
76    };
77    text.starts_with("Prefix(") || text.starts_with("Ontology(")
78}
79
80/// Detect the most likely format from a file path and optional content sniffing.
81#[must_use]
82pub fn detect_format(path: &std::path::Path) -> Option<Format> {
83    match path.extension()?.to_str()? {
84        "owl" => sniff_xml_format(path),
85        "xml" => sniff_xml_format(path),
86        "rdf" => Some(Format::RdfXml),
87        "ttl" | "turtle" => Some(Format::Turtle),
88        "ofn" | "func" => Some(Format::Functional),
89        _ => None,
90    }
91}
92
93fn sniff_xml_format(path: &std::path::Path) -> Option<Format> {
94    let header = sniff_file_header(path, 4096).ok()?;
95    detect_format_from_bytes(&header)
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use std::io::Write;
102    use std::path::Path;
103
104    #[test]
105    fn detect_format_by_extension() {
106        assert_eq!(detect_format(Path::new("p.rdf")), Some(Format::RdfXml));
107        assert_eq!(detect_format(Path::new("p.ttl")), Some(Format::Turtle));
108        assert_eq!(detect_format(Path::new("p.turtle")), Some(Format::Turtle));
109        assert_eq!(detect_format(Path::new("p.ofn")), Some(Format::Functional));
110        assert_eq!(detect_format(Path::new("p.func")), Some(Format::Functional));
111        assert_eq!(detect_format(Path::new("p.txt")), None);
112        assert_eq!(detect_format(Path::new("noext")), None);
113    }
114
115    #[test]
116    fn detect_functional_from_bytes_header() {
117        let header = b"Prefix(:=<http://example.org/>)\nOntology(<http://example.org/o>)";
118        assert!(detect_functional_from_bytes(header));
119        assert_eq!(detect_format_from_bytes(header), Some(Format::Functional));
120    }
121
122    #[test]
123    fn detect_format_from_bytes_owl_xml() {
124        let header = br#"<?xml version="1.0"?><Ontology xmlns="http://www.w3.org/2002/07/owl#"/>"#;
125        assert_eq!(detect_format_from_bytes(header), Some(Format::OwlXml));
126    }
127
128    #[test]
129    fn detect_format_from_bytes_rdf_xml() {
130        let header = br#"<?xml version="1.0"?><rdf:RDF/>"#;
131        assert_eq!(detect_format_from_bytes(header), Some(Format::RdfXml));
132    }
133
134    #[test]
135    fn plain_xml_extension_without_sniff_returns_none() {
136        let dir = std::env::temp_dir().join("ontologos_parser_test");
137        let _ = std::fs::create_dir_all(&dir);
138        let path = dir.join("config.xml");
139        {
140            let mut file = std::fs::File::create(&path).expect("create");
141            file.write_all(b"<config><item/></config>").expect("write");
142        }
143        assert_eq!(detect_format(&path), None);
144        let _ = std::fs::remove_file(&path);
145    }
146}