ontoenv/
lib.rs

1use ::ontoenv::api::{OntoEnv as OntoEnvRs, ResolveTarget};
2use ::ontoenv::config;
3use ::ontoenv::consts::{IMPORTS, ONTOLOGY, TYPE};
4use ::ontoenv::ontology::OntologyLocation;
5use ::ontoenv::transform;
6use anyhow::Error;
7use oxigraph::model::{BlankNode, Literal, NamedNode, SubjectRef, Term};
8use pyo3::{
9    prelude::*,
10    types::{IntoPyDict, PyString, PyTuple},
11};
12use std::borrow::Borrow;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, Mutex, Once};
15
16fn anyhow_to_pyerr(e: Error) -> PyErr {
17    PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string())
18}
19
20static INIT: Once = Once::new();
21
22#[allow(dead_code)]
23struct MyTerm(Term);
24impl From<Result<Bound<'_, PyAny>, pyo3::PyErr>> for MyTerm {
25    fn from(s: Result<Bound<'_, PyAny>, pyo3::PyErr>) -> Self {
26        let s = s.unwrap();
27        let typestr = s.get_type().name().unwrap();
28        let typestr = typestr.to_string();
29        let data_type: Option<NamedNode> = match s.getattr("datatype") {
30            Ok(dt) => {
31                if dt.is_none() {
32                    None
33                } else {
34                    Some(NamedNode::new(dt.to_string()).unwrap())
35                }
36            }
37            Err(_) => None,
38        };
39        let lang: Option<String> = match s.getattr("language") {
40            Ok(l) => {
41                if l.is_none() {
42                    None
43                } else {
44                    Some(l.to_string())
45                }
46            }
47            Err(_) => None,
48        };
49        let n: Term = match typestr.borrow() {
50            "URIRef" => Term::NamedNode(NamedNode::new(s.to_string()).unwrap()),
51            "Literal" => match (data_type, lang) {
52                (Some(dt), None) => Term::Literal(Literal::new_typed_literal(s.to_string(), dt)),
53                (None, Some(l)) => {
54                    Term::Literal(Literal::new_language_tagged_literal(s.to_string(), l).unwrap())
55                }
56                (_, _) => Term::Literal(Literal::new_simple_literal(s.to_string())),
57            },
58            "BNode" => Term::BlankNode(BlankNode::new(s.to_string()).unwrap()),
59            _ => Term::NamedNode(NamedNode::new(s.to_string()).unwrap()),
60        };
61        MyTerm(n)
62    }
63}
64
65fn term_to_python<'a>(
66    py: Python,
67    rdflib: &Bound<'a, PyModule>,
68    node: Term,
69) -> PyResult<Bound<'a, PyAny>> {
70    let dtype: Option<String> = match &node {
71        Term::Literal(lit) => {
72            let mut s = lit.datatype().to_string();
73            s.remove(0);
74            s.remove(s.len() - 1);
75            Some(s)
76        }
77        _ => None,
78    };
79    let lang: Option<&str> = match &node {
80        Term::Literal(lit) => lit.language(),
81        _ => None,
82    };
83
84    let res: Bound<'_, PyAny> = match &node {
85        Term::NamedNode(uri) => {
86            let mut uri = uri.to_string();
87            uri.remove(0);
88            uri.remove(uri.len() - 1);
89            rdflib.getattr("URIRef")?.call1((uri,))?
90        }
91        Term::Literal(literal) => {
92            match (dtype, lang) {
93                // prioritize 'lang' -> it implies String
94                (_, Some(lang)) => {
95                    rdflib
96                        .getattr("Literal")?
97                        .call1((literal.value(), lang, py.None()))?
98                }
99                (Some(dtype), None) => {
100                    rdflib
101                        .getattr("Literal")?
102                        .call1((literal.value(), py.None(), dtype))?
103                }
104                (None, None) => rdflib.getattr("Literal")?.call1((literal.value(),))?,
105            }
106        }
107        Term::BlankNode(id) => rdflib
108            .getattr("BNode")?
109            .call1((id.clone().into_string(),))?,
110        Term::Triple(_) => {
111            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
112                "Triples are not supported",
113            ))
114        }
115    };
116    Ok(res)
117}
118
119#[pyclass]
120#[derive(Clone)]
121struct Config {
122    cfg: config::Config,
123}
124
125#[pymethods]
126impl Config {
127    #[new]
128    #[pyo3(signature = (search_directories=None, require_ontology_names=false, strict=false, offline=false, resolution_policy="default".to_owned(), root=".".to_owned(), includes=None, excludes=None, temporary=false))]
129    fn new(
130        search_directories: Option<Vec<String>>,
131        require_ontology_names: bool,
132        strict: bool,
133        offline: bool,
134        resolution_policy: String,
135        root: String,
136        includes: Option<Vec<String>>,
137        excludes: Option<Vec<String>>,
138        temporary: bool,
139    ) -> PyResult<Self> {
140        Ok(Config {
141            cfg: config::Config::new(
142                root.to_string().into(),
143                search_directories.map(|dirs| {
144                    dirs.iter()
145                        .map(|s| s.to_string().into())
146                        .collect::<Vec<PathBuf>>()
147                }),
148                includes
149                    .unwrap_or_default()
150                    .iter()
151                    .map(|s| s.to_string())
152                    .collect::<Vec<_>>(),
153                excludes
154                    .unwrap_or_default()
155                    .iter()
156                    .map(|s| s.to_string())
157                    .collect::<Vec<_>>(),
158                require_ontology_names,
159                strict,
160                offline,
161                resolution_policy.to_string(),
162                false,
163                temporary,
164            )
165            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?,
166        })
167    }
168}
169
170#[pyclass]
171struct OntoEnv {
172    inner: Arc<Mutex<OntoEnvRs>>,
173}
174
175#[pymethods]
176impl OntoEnv {
177    #[new]
178    #[pyo3(signature = (config=None, path=Some(Path::new(".").to_owned()), recreate=false, read_only=false))]
179    fn new(
180        _py: Python,
181        config: Option<Config>,
182        path: Option<PathBuf>,
183        recreate: bool,
184        read_only: bool,
185    ) -> PyResult<Self> {
186        // wrap env_logger::init() in a Once to ensure it's only called once. This can
187        // happen if a user script creates multiple OntoEnv instances
188        INIT.call_once(|| {
189            env_logger::init();
190        });
191
192        let config_path = path.unwrap_or_else(|| PathBuf::from("."));
193        let env = if let Some(c) = config {
194            // if temporary is true, create a new OntoEnv
195            if c.cfg.temporary {
196                OntoEnvRs::init(c.cfg, recreate).map_err(anyhow_to_pyerr)
197            } else if !recreate && config_path.join(".ontoenv").exists() {
198                // if temporary is false, load from the directory
199                OntoEnvRs::load_from_directory(config_path, read_only).map_err(anyhow_to_pyerr)
200            } else {
201                // if temporary is false and recreate is true or the directory doesn't exist, create a new OntoEnv
202                OntoEnvRs::init(c.cfg, recreate).map_err(anyhow_to_pyerr)
203            }
204        } else {
205            // If no config but a valid path is given, attempt to load from the directory
206            OntoEnvRs::load_from_directory(config_path, read_only).map_err(anyhow_to_pyerr)
207        }?;
208
209        let inner = Arc::new(Mutex::new(env));
210        let mut env = inner.lock().unwrap();
211        env.update().map_err(anyhow_to_pyerr)?;
212        env.save_to_directory().map_err(anyhow_to_pyerr)?;
213
214        Ok(OntoEnv {
215            inner: inner.clone(),
216        })
217    }
218
219    fn update(&self) -> PyResult<()> {
220        let inner = self.inner.clone();
221        let mut env = inner.lock().unwrap();
222        env.update().map_err(anyhow_to_pyerr)?;
223        env.save_to_directory().map_err(anyhow_to_pyerr)?;
224        Ok(())
225    }
226
227    // fn is_read_only(&self) -> PyResult<bool> {
228    //     let inner = self.inner.clone();
229    //     let env = inner.lock().unwrap();
230    //     Ok(env.is_read_only())
231    // }
232
233    fn __repr__(&self) -> PyResult<String> {
234        let inner = self.inner.clone();
235        let env = inner.lock().unwrap();
236        let stats = env.stats().map_err(anyhow_to_pyerr)?;
237        Ok(format!(
238            "<OntoEnv: {} ontologies, {} graphs, {} triples>",
239            stats.num_ontologies, stats.num_graphs, stats.num_triples,
240        ))
241    }
242
243    // The following methods will now access the inner OntoEnv in a thread-safe manner:
244
245    fn import_graph(
246        &self,
247        py: Python,
248        destination_graph: &Bound<'_, PyAny>,
249        uri: &str,
250    ) -> PyResult<()> {
251        let inner = self.inner.clone();
252        let env = inner.lock().unwrap();
253        let rdflib = py.import("rdflib")?;
254        let iri = NamedNode::new(uri)
255            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
256        let graphid = env
257            .resolve(ResolveTarget::Graph(iri.clone()).into())
258            .ok_or_else(|| {
259                PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
260                    "Failed to resolve graph for URI: {}",
261                    uri
262                ))
263            })?;
264        let mut graph = env.get_graph(&graphid).map_err(anyhow_to_pyerr)?;
265
266        let uriref_constructor = rdflib.getattr("URIRef")?;
267        let type_uri = uriref_constructor.call1((TYPE.as_str(),))?;
268        let ontology_uri = uriref_constructor.call1((ONTOLOGY.as_str(),))?;
269        let kwargs = [("predicate", type_uri), ("object", ontology_uri)].into_py_dict(py)?;
270        let result = destination_graph.call_method("value", (), Some(&kwargs))?;
271        if !result.is_none() {
272            let ontology = NamedNode::new(result.extract::<String>()?)
273                .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
274            let base_ontology: SubjectRef = SubjectRef::NamedNode(ontology.as_ref());
275
276            transform::rewrite_sh_prefixes_graph(&mut graph, base_ontology);
277            transform::remove_ontology_declarations_graph(&mut graph, base_ontology);
278        }
279        // remove the owl:import statement for the 'uri' ontology
280        transform::remove_owl_imports_graph(&mut graph, Some(&[(&iri).into()]));
281
282        Python::with_gil(|_py| {
283            for triple in graph.into_iter() {
284                let s: Term = triple.subject.into();
285                let p: Term = triple.predicate.into();
286                let o: Term = triple.object.into();
287
288                let t = PyTuple::new(
289                    py,
290                    &[
291                        term_to_python(py, &rdflib, s)?,
292                        term_to_python(py, &rdflib, p)?,
293                        term_to_python(py, &rdflib, o)?,
294                    ],
295                )?;
296
297                destination_graph.getattr("add")?.call1((t,))?;
298            }
299            Ok::<(), PyErr>(())
300        })?;
301        Ok(())
302    }
303
304    /// List the ontologies in the imports closure of the given ontology
305    #[pyo3(signature = (uri))]
306    fn list_closure(&self, _py: Python, uri: &str) -> PyResult<Vec<String>> {
307        let iri = NamedNode::new(uri)
308            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
309        let inner = self.inner.clone();
310        let env = inner.lock().unwrap();
311        let graphid = env
312            .resolve(ResolveTarget::Graph(iri.clone()).into())
313            .ok_or_else(|| {
314                PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
315                    "Failed to resolve graph for URI: {}",
316                    uri
317                ))
318            })?;
319        let ont = env.ontologies().get(&graphid).ok_or_else(|| {
320            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Ontology {} not found", iri))
321        })?;
322        let closure = env
323            .get_dependency_closure(ont.id())
324            .map_err(anyhow_to_pyerr)?;
325        let names: Vec<String> = closure.iter().map(|ont| ont.name().to_string()).collect();
326        Ok(names)
327    }
328
329    /// Merge all graphs in the imports closure of the given ontology into a single graph. If
330    /// destination_graph is provided, add the merged graph to the destination_graph. If not,
331    /// return the merged graph.
332    #[pyo3(signature = (uri, destination_graph=None, rewrite_sh_prefixes=false, remove_owl_imports=false))]
333    fn get_closure<'a>(
334        &self,
335        py: Python<'a>,
336        uri: &str,
337        destination_graph: Option<&Bound<'a, PyAny>>,
338        rewrite_sh_prefixes: bool,
339        remove_owl_imports: bool,
340    ) -> PyResult<Bound<'a, PyAny>> {
341        let rdflib = py.import("rdflib")?;
342        let iri = NamedNode::new(uri)
343            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
344        let inner = self.inner.clone();
345        let env = inner.lock().unwrap();
346        let graphid = env
347            .resolve(ResolveTarget::Graph(iri.clone()).into())
348            .ok_or_else(|| {
349                PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
350                    "No graph with URI: {}",
351                    uri
352                ))
353            })?;
354        let ont = env.ontologies().get(&graphid).ok_or_else(|| {
355            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Ontology {} not found", iri))
356        })?;
357        let closure = env
358            .get_dependency_closure(ont.id())
359            .map_err(anyhow_to_pyerr)?;
360        // if destination_graph is null, create a new rdflib.Graph()
361        let destination_graph = match destination_graph {
362            Some(g) => g.clone(),
363            None => rdflib.getattr("Graph")?.call0()?,
364        };
365        let union = env
366            .get_union_graph(
367                &closure,
368                Some(rewrite_sh_prefixes),
369                Some(remove_owl_imports),
370            )
371            .map_err(anyhow_to_pyerr)?;
372        Python::with_gil(|_py| {
373            for triple in union.dataset.into_iter() {
374                let s: Term = triple.subject.into();
375                let p: Term = triple.predicate.into();
376                let o: Term = triple.object.into();
377                let t = PyTuple::new(
378                    py,
379                    &[
380                        term_to_python(py, &rdflib, s)?,
381                        term_to_python(py, &rdflib, p)?,
382                        term_to_python(py, &rdflib, o)?,
383                    ],
384                )?;
385                destination_graph.getattr("add")?.call1((t,))?;
386            }
387
388            // Remove each successful_imports url in the closure from the destination_graph
389            if remove_owl_imports {
390                for graphid in union.graph_ids {
391                    let iri = term_to_python(py, &rdflib, Term::NamedNode(graphid.into()))?;
392                    let pred = term_to_python(py, &rdflib, IMPORTS.into())?;
393                    // remove triples with (None, pred, iri)
394                    let remove_tuple = PyTuple::new(py, &[py.None(), pred.into(), iri.into()])?;
395                    destination_graph
396                        .getattr("remove")?
397                        .call1((remove_tuple,))?;
398                }
399            }
400
401            // Remove each url in the closure from the destination_graph
402            return Ok::<Bound<'_, PyAny>, PyErr>(destination_graph);
403        })
404    }
405
406    /// Print the contents of the OntoEnv
407    #[pyo3(signature = (includes=None))]
408    fn dump(&self, _py: Python, includes: Option<String>) -> PyResult<()> {
409        let inner = self.inner.clone();
410        let env = inner.lock().unwrap();
411        env.dump(includes.as_deref());
412        Ok(())
413    }
414
415    /// Import the dependencies of the given graph into the graph. Removes the owl:imports
416    /// of all imported ontologies.
417    #[pyo3(signature = (graph))]
418    fn import_dependencies<'a>(
419        &self,
420        py: Python<'a>,
421        graph: &Bound<'a, PyAny>,
422    ) -> PyResult<Bound<'a, PyAny>> {
423        let rdflib = py.import("rdflib")?;
424        let py_rdf_type = term_to_python(py, &rdflib, Term::NamedNode(TYPE.into()))?;
425        let py_ontology = term_to_python(py, &rdflib, Term::NamedNode(ONTOLOGY.into()))?;
426        let value_fun: Py<PyAny> = graph.getattr("value")?.into();
427        let kwargs = [("predicate", py_rdf_type), ("object", py_ontology)].into_py_dict(py)?;
428        let ontology = value_fun.call(py, (), Some(&kwargs))?;
429
430        if ontology.is_none(py) {
431            return Ok(graph.clone());
432        }
433
434        let ontology = ontology.to_string();
435
436        self.get_closure(py, &ontology, Some(graph), true, true)
437    }
438
439    /// Add a new ontology to the OntoEnv
440    fn add(&self, location: &Bound<'_, PyAny>) -> PyResult<()> {
441        let inner = self.inner.clone();
442        let mut env = inner.lock().unwrap();
443        let location =
444            OntologyLocation::from_str(&location.to_string()).map_err(anyhow_to_pyerr)?;
445        env.add(location, true).map_err(anyhow_to_pyerr)?;
446        env.save_to_directory().map_err(anyhow_to_pyerr)?;
447        Ok(())
448    }
449
450    /// Refresh the OntoEnv by re-loading all remote graphs and loading
451    /// any local graphs which have changed since the last update
452    fn refresh(&self) -> PyResult<()> {
453        let inner = self.inner.clone();
454        let mut env = inner.lock().unwrap();
455        env.update().map_err(anyhow_to_pyerr)?;
456        env.save_to_directory().map_err(anyhow_to_pyerr)?;
457        Ok(())
458    }
459
460    /// Get the names of all ontologies that depend on the given ontology
461    fn get_dependents(&self, uri: &str) -> PyResult<Vec<String>> {
462        let iri = NamedNode::new(uri)
463            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
464        let inner = self.inner.clone();
465        let env = inner.lock().unwrap();
466        let dependents = env.get_dependents(&iri).map_err(anyhow_to_pyerr)?;
467        let names: Vec<String> = dependents
468            .iter()
469            .map(|ont| ont.name().to_string())
470            .collect();
471        Ok(names)
472    }
473
474    /// Export the graph with the given URI to an rdflib.Graph
475    fn get_graph(&self, py: Python, uri: &Bound<'_, PyString>) -> PyResult<Py<PyAny>> {
476        let rdflib = py.import("rdflib")?;
477        let iri = NamedNode::new(uri.to_string())
478            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
479        let graph = {
480            let inner = self.inner.clone();
481            let env = inner.lock().unwrap();
482            let graphid = env
483                .resolve(ResolveTarget::Graph(iri).into())
484                .ok_or_else(|| {
485                    PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
486                        "Failed to resolve graph for URI: {}",
487                        uri
488                    ))
489                })?;
490            println!("graphid: {:?}", graphid);
491            let graph = env.get_graph(&graphid).map_err(anyhow_to_pyerr)?;
492            graph
493        };
494        let res = rdflib.getattr("Graph")?.call0()?;
495        for triple in graph.into_iter() {
496            let s: Term = triple.subject.into();
497            let p: Term = triple.predicate.into();
498            let o: Term = triple.object.into();
499
500            let t = PyTuple::new(
501                py,
502                &[
503                    term_to_python(py, &rdflib, s)?,
504                    term_to_python(py, &rdflib, p)?,
505                    term_to_python(py, &rdflib, o)?,
506                ],
507            )?;
508
509            res.getattr("add")?.call1((t,))?;
510        }
511        Ok(res.into())
512    }
513
514    /// Get the names of all ontologies in the OntoEnv
515    fn get_ontology_names(&self) -> PyResult<Vec<String>> {
516        let inner = self.inner.clone();
517        let env = inner.lock().unwrap();
518        let names: Vec<String> = env
519            .ontologies()
520            .keys()
521            .map(|k| k.name().to_string())
522            .collect();
523        Ok(names)
524    }
525
526    /// Convert the OntoEnv to an rdflib.Dataset
527    fn to_rdflib_dataset(&self, py: Python) -> PyResult<Py<PyAny>> {
528        // rdflib.ConjunctiveGraph(store="Oxigraph")
529        let inner = self.inner.clone();
530        let env = inner.lock().unwrap();
531        let rdflib = py.import("rdflib")?;
532        let dataset = rdflib.getattr("Dataset")?;
533
534        // call Dataset(store="Oxigraph")
535        let kwargs = [("store", "Oxigraph")].into_py_dict(py)?;
536        let store = dataset.call((), Some(&kwargs))?;
537        let path = env.store_path().unwrap();
538        store.getattr("open")?.call1((path,))?;
539        Ok(store.into())
540    }
541
542    pub fn store_path(&self) -> PyResult<Option<String>> {
543        let inner = self.inner.clone();
544        let env = inner.lock().unwrap();
545        match env.store_path() {
546            Some(path) => Ok(Some(path.to_string_lossy().to_string())),
547            None => Ok(None), // Return None if the path doesn't exist (e.g., temporary env)
548        }
549    }
550
551    // Wrapper method to raise error if store_path is None, matching previous panic behavior
552    // but providing a Python-level error. Or tests can check for None.
553    // Let's keep the Option return type for flexibility and adjust tests.
554
555    pub fn flush(&mut self, py: Python<'_>) -> PyResult<()> {
556        py.allow_threads(|| {
557            let inner = self.inner.clone();
558            let mut env = inner.lock().unwrap();
559            env.flush().map_err(anyhow_to_pyerr)?;
560            Ok(())
561        })
562    }
563}
564
565#[pymodule]
566fn ontoenv(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
567    m.add_class::<Config>()?;
568    m.add_class::<OntoEnv>()?;
569    // add version attribute
570    m.add("version", env!("CARGO_PKG_VERSION"))?;
571    Ok(())
572}