1use std::fs::File;
2use std::io::{BufReader, Read, Seek, SeekFrom};
3use std::panic::{catch_unwind, AssertUnwindSafe};
4use std::path::Path;
5
6use horned_owl::curie::PrefixMapping;
7use horned_owl::error::HornedError;
8use horned_owl::io::ofn::reader as ofn_reader;
9use horned_owl::io::owx::reader as owx_reader;
10use horned_owl::io::rdf::reader as rdf_reader;
11use horned_owl::io::{ParserConfiguration, RDFParserConfiguration};
12use horned_owl::model::RcStr;
13use horned_owl::ontology::set::SetOntology;
14use oxrdfio::RdfFormat;
15
16use crate::limits::ParseLimits;
17use crate::{Error, Format, Result};
18
19#[allow(dead_code)]
23pub fn read_horned_owl(
24 path: &Path,
25 format: Format,
26 limits: ParseLimits,
27) -> Result<SetOntology<RcStr>> {
28 let metadata = std::fs::metadata(path).map_err(|e| Error::Parse(e.to_string()))?;
29 check_file_size(metadata.len(), limits)?;
30 let file = File::open(path).map_err(|e| Error::Parse(e.to_string()))?;
31 read_horned_owl_from_reader(BufReader::new(file), format, limits)
32}
33
34pub fn read_horned_owl_from_reader<R: Read>(
39 reader: R,
40 format: Format,
41 _limits: ParseLimits,
42) -> Result<SetOntology<RcStr>> {
43 let config = parser_config(format);
44
45 let (ontology, _prefixes) = match format {
46 Format::OwlXml => guard_horned_parse(|| {
47 owx_reader::read(&mut BufReader::new(reader), config).map_err(map_horned_error)
48 })?,
49 Format::RdfXml | Format::Turtle => guard_horned_parse(|| {
50 let mut reader = BufReader::new(reader);
51 let (concrete, incomplete) =
52 rdf_reader::read(&mut reader, config).map_err(map_horned_error)?;
53 if !incomplete.is_complete() {
54 return Err(Error::Parse(
55 "RDF parse incomplete: input truncated or malformed".into(),
56 ));
57 }
58 Ok((concrete.into(), PrefixMapping::default()))
59 })?,
60 Format::Functional => guard_horned_parse(|| {
61 let mut reader = BufReader::new(reader);
62 ofn_reader::read(&mut reader, config).map_err(map_horned_error)
63 })?,
64 };
65
66 Ok(ontology)
67}
68
69fn guard_horned_parse<T, F>(f: F) -> Result<T>
71where
72 F: FnOnce() -> Result<T>,
73{
74 match catch_unwind(AssertUnwindSafe(f)) {
75 Ok(result) => result,
76 Err(payload) => Err(Error::Parse(panic_payload_message(payload))),
77 }
78}
79
80fn panic_payload_message(payload: Box<dyn std::any::Any + Send>) -> String {
81 payload
82 .downcast_ref::<&str>()
83 .map(|s| format!("parser internal error: {s}"))
84 .or_else(|| {
85 payload
86 .downcast_ref::<String>()
87 .map(|s| format!("parser internal error: {s}"))
88 })
89 .unwrap_or_else(|| "parser internal error (unknown panic)".into())
90}
91
92fn check_file_size(len: u64, limits: ParseLimits) -> Result<()> {
93 if len as usize > limits.max_file_bytes {
94 return Err(Error::Parse(format!(
95 "file size {len} exceeds limit of {} bytes",
96 limits.max_file_bytes
97 )));
98 }
99 Ok(())
100}
101
102fn parser_config(format: Format) -> ParserConfiguration {
103 let rdf = match format {
104 Format::Turtle => RDFParserConfiguration {
105 format: Some(RdfFormat::Turtle),
106 ..RDFParserConfiguration::default()
107 },
108 Format::RdfXml => RDFParserConfiguration {
109 format: Some(RdfFormat::RdfXml),
110 ..RDFParserConfiguration::default()
111 },
112 _ => RDFParserConfiguration::default(),
113 };
114 ParserConfiguration {
115 rdf,
116 ..ParserConfiguration::default()
117 }
118}
119
120pub(crate) fn map_horned_error(err: HornedError) -> Error {
121 Error::Parse(err.to_string())
122}
123
124pub fn detect_turtle_from_bytes(header: &[u8]) -> bool {
126 let text = match std::str::from_utf8(header) {
127 Ok(t) => strip_utf8_bom(t).trim_start(),
128 Err(_) => return false,
129 };
130 text.starts_with("@prefix")
131 || text.starts_with("@base")
132 || text.to_ascii_lowercase().starts_with("prefix ")
133 || text.contains("\n@prefix")
134 || text.to_ascii_lowercase().contains("\nprefix ")
135}
136
137fn strip_utf8_bom(text: &str) -> &str {
138 text.strip_prefix('\u{feff}').unwrap_or(text)
139}
140
141pub fn sniff_file_header(path: &Path, max: usize) -> Result<Vec<u8>> {
143 let mut file = File::open(path).map_err(|e| Error::Parse(e.to_string()))?;
144 sniff_reader(&mut file, max)
145}
146
147pub fn sniff_reader(reader: &mut impl Read, max: usize) -> Result<Vec<u8>> {
149 let mut header = vec![0_u8; max];
150 let read = reader
151 .read(&mut header)
152 .map_err(|e| Error::Parse(e.to_string()))?;
153 header.truncate(read);
154 Ok(header)
155}
156
157pub fn sniff_and_rewind(reader: &mut (impl Read + Seek), max: usize) -> Result<Vec<u8>> {
159 let header = sniff_reader(reader, max)?;
160 reader
161 .seek(SeekFrom::Start(0))
162 .map_err(|e| Error::Parse(e.to_string()))?;
163 Ok(header)
164}