Skip to main content

ontologos_parser/
read.rs

1use std::fs::File;
2use std::io::{BufReader, Read, Seek, SeekFrom};
3use std::panic::{catch_unwind, AssertUnwindSafe};
4use std::path::Path;
5
6use horned_owl::curie::PrefixMapping;
7use horned_owl::error::HornedError;
8use horned_owl::io::ofn::reader as ofn_reader;
9use horned_owl::io::owx::reader as owx_reader;
10use horned_owl::io::rdf::reader as rdf_reader;
11use horned_owl::io::{ParserConfiguration, RDFParserConfiguration};
12use horned_owl::model::RcStr;
13use horned_owl::ontology::set::SetOntology;
14use oxrdfio::RdfFormat;
15
16use crate::limits::ParseLimits;
17use crate::{Error, Format, Result};
18
19/// Read a horned-owl ontology from disk after format detection and size checks.
20///
21/// Prefer [`crate::load_ontology`] or [`crate::load_ontology_in`] for sandboxed loads.
22#[allow(dead_code)]
23pub fn read_horned_owl(
24    path: &Path,
25    format: Format,
26    limits: ParseLimits,
27) -> Result<SetOntology<RcStr>> {
28    let metadata = std::fs::metadata(path).map_err(|e| Error::Parse(e.to_string()))?;
29    check_file_size(metadata.len(), limits)?;
30    let file = File::open(path).map_err(|e| Error::Parse(e.to_string()))?;
31    read_horned_owl_from_reader(BufReader::new(file), format, limits)
32}
33
34/// Parse ontology bytes from an already-open reader (single-fd load path).
35///
36/// `limits` are enforced during axiom mapping in [`crate::map`]; horned-owl itself
37/// may allocate before mapping caps apply (see `docs/security.md`).
38pub fn read_horned_owl_from_reader<R: Read>(
39    reader: R,
40    format: Format,
41    _limits: ParseLimits,
42) -> Result<SetOntology<RcStr>> {
43    let config = parser_config(format);
44
45    let (ontology, _prefixes) = match format {
46        Format::OwlXml => guard_horned_parse(|| {
47            owx_reader::read(&mut BufReader::new(reader), config).map_err(map_horned_error)
48        })?,
49        Format::RdfXml | Format::Turtle => guard_horned_parse(|| {
50            let mut reader = BufReader::new(reader);
51            let (concrete, incomplete) =
52                rdf_reader::read(&mut reader, config).map_err(map_horned_error)?;
53            if !incomplete.is_complete() {
54                return Err(Error::Parse(
55                    "RDF parse incomplete: input truncated or malformed".into(),
56                ));
57            }
58            Ok((concrete.into(), PrefixMapping::default()))
59        })?,
60        Format::Functional => guard_horned_parse(|| {
61            let mut reader = BufReader::new(reader);
62            ofn_reader::read(&mut reader, config).map_err(map_horned_error)
63        })?,
64    };
65
66    Ok(ontology)
67}
68
69/// Horned-owl may panic on some malformed RDF/XML; convert to [`Error::Parse`] for callers.
70fn guard_horned_parse<T, F>(f: F) -> Result<T>
71where
72    F: FnOnce() -> Result<T>,
73{
74    match catch_unwind(AssertUnwindSafe(f)) {
75        Ok(result) => result,
76        Err(payload) => Err(Error::Parse(panic_payload_message(payload))),
77    }
78}
79
80fn panic_payload_message(payload: Box<dyn std::any::Any + Send>) -> String {
81    payload
82        .downcast_ref::<&str>()
83        .map(|s| format!("parser internal error: {s}"))
84        .or_else(|| {
85            payload
86                .downcast_ref::<String>()
87                .map(|s| format!("parser internal error: {s}"))
88        })
89        .unwrap_or_else(|| "parser internal error (unknown panic)".into())
90}
91
92fn check_file_size(len: u64, limits: ParseLimits) -> Result<()> {
93    if len as usize > limits.max_file_bytes {
94        return Err(Error::Parse(format!(
95            "file size {len} exceeds limit of {} bytes",
96            limits.max_file_bytes
97        )));
98    }
99    Ok(())
100}
101
102fn parser_config(format: Format) -> ParserConfiguration {
103    let rdf = match format {
104        Format::Turtle => RDFParserConfiguration {
105            format: Some(RdfFormat::Turtle),
106            ..RDFParserConfiguration::default()
107        },
108        Format::RdfXml => RDFParserConfiguration {
109            format: Some(RdfFormat::RdfXml),
110            ..RDFParserConfiguration::default()
111        },
112        _ => RDFParserConfiguration::default(),
113    };
114    ParserConfiguration {
115        rdf,
116        ..ParserConfiguration::default()
117    }
118}
119
120pub(crate) fn map_horned_error(err: HornedError) -> Error {
121    Error::Parse(err.to_string())
122}
123
124/// Sniff the first bytes of a file for Turtle `@prefix` or `PREFIX` declarations.
125pub fn detect_turtle_from_bytes(header: &[u8]) -> bool {
126    let text = match std::str::from_utf8(header) {
127        Ok(t) => strip_utf8_bom(t).trim_start(),
128        Err(_) => return false,
129    };
130    text.starts_with("@prefix")
131        || text.starts_with("@base")
132        || text.to_ascii_lowercase().starts_with("prefix ")
133        || text.contains("\n@prefix")
134        || text.to_ascii_lowercase().contains("\nprefix ")
135}
136
137fn strip_utf8_bom(text: &str) -> &str {
138    text.strip_prefix('\u{feff}').unwrap_or(text)
139}
140
141/// Read up to `max` bytes from `path` for format sniffing.
142pub fn sniff_file_header(path: &Path, max: usize) -> Result<Vec<u8>> {
143    let mut file = File::open(path).map_err(|e| Error::Parse(e.to_string()))?;
144    sniff_reader(&mut file, max)
145}
146
147/// Read up to `max` bytes from a reader for format sniffing.
148pub fn sniff_reader(reader: &mut impl Read, max: usize) -> Result<Vec<u8>> {
149    let mut header = vec![0_u8; max];
150    let read = reader
151        .read(&mut header)
152        .map_err(|e| Error::Parse(e.to_string()))?;
153    header.truncate(read);
154    Ok(header)
155}
156
157/// Sniff from a seekable reader and rewind to the start.
158pub fn sniff_and_rewind(reader: &mut (impl Read + Seek), max: usize) -> Result<Vec<u8>> {
159    let header = sniff_reader(reader, max)?;
160    reader
161        .seek(SeekFrom::Start(0))
162        .map_err(|e| Error::Parse(e.to_string()))?;
163    Ok(header)
164}