cognee_ontology/
loader.rs1use sophia_api::graph::{Graph, MutableGraph};
7use sophia_api::parser::TripleParser;
8use sophia_api::prelude::{Quad, QuadParser, QuadSource};
9use sophia_api::source::TripleSource;
10use sophia_inmem::graph::FastGraph;
11use sophia_jsonld::JsonLdParser;
12use sophia_turtle::parser::turtle;
13use sophia_xml::parser::RdfXmlParser;
14use std::io::Read;
15use std::path::{Path, PathBuf};
16use tracing::{info, warn};
17
18use crate::error::{OntologyError, OntologyResult};
19
20pub enum OntologyFileInput {
24 Path(PathBuf),
26 Paths(Vec<PathBuf>),
28 Reader(Box<dyn Read>),
30 Readers(Vec<Box<dyn Read>>),
32}
33
34impl From<PathBuf> for OntologyFileInput {
35 fn from(path: PathBuf) -> Self {
36 OntologyFileInput::Path(path)
37 }
38}
39
40impl From<Vec<PathBuf>> for OntologyFileInput {
41 fn from(paths: Vec<PathBuf>) -> Self {
42 OntologyFileInput::Paths(paths)
43 }
44}
45
46impl<'a> From<&'a str> for OntologyFileInput {
47 fn from(path: &'a str) -> Self {
48 OntologyFileInput::Path(PathBuf::from(path))
49 }
50}
51
52impl From<Vec<&str>> for OntologyFileInput {
53 fn from(paths: Vec<&str>) -> Self {
54 OntologyFileInput::Paths(paths.into_iter().map(PathBuf::from).collect())
55 }
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60enum RdfFormat {
61 Turtle, RdfXml, NTriples, JsonLd, }
66
67impl RdfFormat {
68 fn from_path(path: &Path) -> Option<Self> {
70 path.extension()
71 .and_then(|ext| ext.to_str())
72 .and_then(|ext| match ext.to_lowercase().as_str() {
73 "ttl" => Some(RdfFormat::Turtle),
74 "rdf" | "owl" | "xml" => Some(RdfFormat::RdfXml),
75 "nt" => Some(RdfFormat::NTriples),
76 "jsonld" => Some(RdfFormat::JsonLd),
77 _ => None,
78 })
79 }
80}
81
82pub fn load_ontology_files(input: OntologyFileInput) -> OntologyResult<Option<FastGraph>> {
87 match input {
88 OntologyFileInput::Path(path) => load_single_path(&path),
89 OntologyFileInput::Paths(paths) => load_multiple_paths(&paths),
90 OntologyFileInput::Reader(reader) => load_single_reader(reader),
91 OntologyFileInput::Readers(readers) => load_multiple_readers(readers),
92 }
93}
94
95fn load_single_path(path: &Path) -> OntologyResult<Option<FastGraph>> {
96 if !path.exists() {
97 warn!(
98 "Ontology file '{}' not found. Skipping this file.",
99 path.display()
100 );
101 return Ok(None);
102 }
103
104 let content = std::fs::read_to_string(path).map_err(|e| {
105 OntologyError::FileNotFound(format!("Failed to read file '{}': {}", path.display(), e))
106 })?;
107
108 let format = RdfFormat::from_path(path).ok_or_else(|| {
109 OntologyError::ParseError(format!(
110 "Unknown RDF format for file '{}'. Supported: .ttl, .rdf, .owl, .nt, .jsonld",
111 path.display()
112 ))
113 })?;
114
115 let parse_result = match format {
116 RdfFormat::Turtle => parse_turtle_with_path_base(path, &content),
117 _ => parse_rdf(&content, format),
118 };
119
120 match parse_result {
121 Ok(graph) => {
122 info!("Ontology loaded successfully from file: {}", path.display());
123 Ok(Some(graph))
124 }
125 Err(e) => {
126 warn!("Failed to parse ontology file '{}': {}", path.display(), e);
127 Ok(None)
128 }
129 }
130}
131
132fn parse_turtle_with_path_base(path: &Path, content: &str) -> OntologyResult<FastGraph> {
133 let absolute = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
134 let base_iri = format!("file://{}", absolute.to_string_lossy());
135 let content_with_base = format!("@base <{base_iri}> .\n{content}");
136
137 parse_rdf(&content_with_base, RdfFormat::Turtle)
138}
139
140fn load_multiple_paths(paths: &[PathBuf]) -> OntologyResult<Option<FastGraph>> {
141 if paths.is_empty() {
142 info!("No ontology file provided. No owl ontology will be attached to the graph.");
143 return Ok(None);
144 }
145
146 let mut merged_graph = FastGraph::new();
147 let mut loaded_count = 0;
148
149 for path in paths {
150 match load_single_path(path) {
151 Ok(Some(graph)) => {
152 merged_graph.insert_all(graph.triples()).map_err(|e| {
153 OntologyError::ParseError(format!(
154 "Failed to merge graph from '{}': {}",
155 path.display(),
156 e
157 ))
158 })?;
159 loaded_count += 1;
160 }
161 Ok(None) => {}
162 Err(e) => warn!(
163 "Failed to process ontology file '{}': {}",
164 path.display(),
165 e
166 ),
167 }
168 }
169
170 if loaded_count == 0 {
171 info!("No valid ontology files found. No owl ontology will be attached to the graph.");
172 Ok(None)
173 } else {
174 info!("Total ontology files loaded: {}", loaded_count);
175 Ok(Some(merged_graph))
176 }
177}
178
179fn load_single_reader(mut reader: Box<dyn Read>) -> OntologyResult<Option<FastGraph>> {
180 let mut content = String::new();
181 reader
182 .read_to_string(&mut content)
183 .map_err(|e| OntologyError::FileNotFound(format!("Failed to read from reader: {e}")))?;
184
185 let parse_attempts = [
187 RdfFormat::RdfXml,
188 RdfFormat::Turtle,
189 RdfFormat::JsonLd,
190 RdfFormat::NTriples,
191 ];
192
193 let mut last_error: Option<OntologyError> = None;
194 let mut parsed_graph: Option<FastGraph> = None;
195
196 for format in parse_attempts {
197 match parse_rdf(&content, format) {
198 Ok(graph) => {
199 parsed_graph = Some(graph);
200 break;
201 }
202 Err(e) => last_error = Some(e),
203 }
204 }
205
206 match parsed_graph {
207 Some(graph) => {
208 info!("Ontology loaded successfully from reader");
209 Ok(Some(graph))
210 }
211 None => {
212 let err_message = last_error
213 .map(|e| e.to_string())
214 .unwrap_or_else(|| "Unknown parse error".to_string());
215 warn!("Failed to parse ontology from reader: {}", err_message);
216 Ok(None)
217 }
218 }
219}
220
221fn load_multiple_readers(readers: Vec<Box<dyn Read>>) -> OntologyResult<Option<FastGraph>> {
222 if readers.is_empty() {
223 info!("No ontology file provided. No owl ontology will be attached to the graph.");
224 return Ok(None);
225 }
226
227 let mut merged_graph = FastGraph::new();
228 let mut loaded_count = 0;
229
230 for reader in readers {
231 if let Some(graph) = load_single_reader(reader)? {
232 merged_graph.insert_all(graph.triples()).map_err(|e| {
233 OntologyError::ParseError(format!("Failed to merge graph from reader: {e}"))
234 })?;
235 loaded_count += 1;
236 }
237 }
238
239 if loaded_count == 0 {
240 info!("No valid ontology readers found. No owl ontology will be attached to the graph.");
241 Ok(None)
242 } else {
243 info!("Total ontology readers loaded: {}", loaded_count);
244 Ok(Some(merged_graph))
245 }
246}
247
248fn parse_rdf(content: &str, format: RdfFormat) -> OntologyResult<FastGraph> {
250 match format {
251 RdfFormat::Turtle | RdfFormat::NTriples => turtle::parse_str(content)
252 .collect_triples()
253 .map_err(|e| OntologyError::ParseError(format!("Turtle/N-Triples parse error: {e}"))),
254 RdfFormat::RdfXml => RdfXmlParser::default()
255 .parse_str(content)
256 .collect_triples()
257 .map_err(|e| OntologyError::ParseError(format!("RDF/XML parse error: {e}"))),
258 RdfFormat::JsonLd => JsonLdParser::new()
259 .parse_str(content)
260 .filter_quads(|q| q.g().is_none())
261 .map_quads(Quad::into_triple)
262 .collect_triples()
263 .map_err(|e| OntologyError::ParseError(format!("JSON-LD parse error: {e}"))),
264 }
265}
266
267#[cfg(test)]
268#[allow(
269 clippy::unwrap_used,
270 clippy::expect_used,
271 reason = "test code — panics are acceptable failures"
272)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_format_detection_turtle() {
278 assert_eq!(
279 RdfFormat::from_path(Path::new("ontology.ttl")),
280 Some(RdfFormat::Turtle)
281 );
282 }
283
284 #[test]
285 fn test_format_detection_rdfxml() {
286 assert_eq!(
287 RdfFormat::from_path(Path::new("ontology.rdf")),
288 Some(RdfFormat::RdfXml)
289 );
290 assert_eq!(
291 RdfFormat::from_path(Path::new("ontology.owl")),
292 Some(RdfFormat::RdfXml)
293 );
294 }
295
296 #[test]
297 fn test_format_detection_unknown() {
298 assert_eq!(RdfFormat::from_path(Path::new("ontology.txt")), None);
299 }
300
301 #[test]
302 fn test_load_missing_file() {
303 let result = load_single_path(Path::new("nonexistent.ttl")).unwrap();
304 assert!(result.is_none());
305 }
306
307 #[test]
308 fn test_parse_simple_turtle() {
309 let ttl = r#"
310 @prefix ex: <http://example.org#> .
311 ex:Car a ex:Vehicle .
312 "#;
313
314 let graph = parse_rdf(ttl, RdfFormat::Turtle).unwrap();
315 assert!(graph.triples().count() > 0);
316 }
317
318 #[test]
319 fn test_parse_simple_rdfxml() {
320 let rdfxml = r#"<?xml version="1.0"?>
321 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
322 xmlns:ex="http://example.org#">
323 <rdf:Description rdf:about="http://example.org#Car">
324 <rdf:type rdf:resource="http://example.org#Vehicle"/>
325 </rdf:Description>
326 </rdf:RDF>"#;
327
328 let graph = parse_rdf(rdfxml, RdfFormat::RdfXml).unwrap();
329 assert!(graph.triples().count() > 0);
330 }
331
332 #[test]
333 fn test_parse_simple_jsonld() {
334 let jsonld = r#"{
335 "@context": {
336 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
337 "ex": "http://example.org#",
338 "type": {"@id": "rdf:type", "@type": "@id"}
339 },
340 "@id": "ex:Car",
341 "type": "ex:Vehicle"
342 }"#;
343
344 let graph = parse_rdf(jsonld, RdfFormat::JsonLd).unwrap();
345 assert!(graph.triples().count() > 0);
346 }
347
348 #[test]
349 fn test_parse_invalid_turtle() {
350 let ttl = "invalid turtle syntax !!!";
351 let result = parse_rdf(ttl, RdfFormat::Turtle);
352 assert!(result.is_err());
353 }
354
355 #[test]
356 fn test_parse_invalid_rdfxml() {
357 let rdfxml = "<rdf:RDF><rdf:Description></rdf:RDF>";
358 let result = parse_rdf(rdfxml, RdfFormat::RdfXml);
359 assert!(result.is_err());
360 }
361
362 #[test]
363 fn test_parse_invalid_jsonld() {
364 let jsonld = "{invalid json-ld}";
365 let result = parse_rdf(jsonld, RdfFormat::JsonLd);
366 assert!(result.is_err());
367 }
368
369 #[test]
370 fn test_parse_empty_turtle() {
371 let ttl = "";
372 let graph = parse_rdf(ttl, RdfFormat::Turtle).unwrap();
373 assert_eq!(graph.triples().count(), 0);
374 }
375}