sea_core/
kg_import.rs

1use crate::graph::Graph;
2use crate::kg::KnowledgeGraph;
3
4// These types are only used when the 'shacl' feature is enabled; make their
5// imports conditional to avoid unused-import warnings when the feature is off.
6#[cfg(feature = "shacl")]
7use crate::kg::{ShaclProperty, ShaclShape};
8use std::fmt;
9
10#[cfg(feature = "shacl")]
11use oxigraph::model::{GraphNameRef, Term};
12#[cfg(feature = "shacl")]
13use oxigraph::sparql::QueryResults;
14#[cfg(feature = "shacl")]
15use oxigraph::store::Store;
16
17#[derive(Debug)]
18pub enum ImportError {
19    ShaclValidation(String),
20    Other(String),
21}
22
23impl fmt::Display for ImportError {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            ImportError::ShaclValidation(msg) => write!(f, "{}", msg),
27            ImportError::Other(msg) => write!(f, "{}", msg),
28        }
29    }
30}
31
32impl std::error::Error for ImportError {}
33
34pub fn import_kg_turtle(turtle: &str) -> Result<Graph, ImportError> {
35    match KnowledgeGraph::from_turtle(turtle) {
36        Ok(kg) => validate_and_convert(kg),
37        Err(e) => Err(ImportError::Other(format!(
38            "Failed to parse Turtle KG: {}",
39            e
40        ))),
41    }
42}
43
44fn validate_and_convert(kg: KnowledgeGraph) -> Result<Graph, ImportError> {
45    match kg.validate_shacl() {
46        Ok(vs) => {
47            if !vs.is_empty() {
48                let summary = vs
49                    .iter()
50                    .map(|v| format!("[{:?}] {}", v.severity, v.message))
51                    .collect::<Vec<_>>()
52                    .join("; ");
53                Err(ImportError::ShaclValidation(format!(
54                    "SHACL validation failed: {}",
55                    summary
56                )))
57            } else {
58                match kg.to_graph() {
59                    Ok(graph) => Ok(graph),
60                    Err(e) => Err(ImportError::Other(format!(
61                        "Failed to convert KG to Graph: {}",
62                        e
63                    ))),
64                }
65            }
66        }
67        Err(e) => Err(ImportError::ShaclValidation(format!(
68            "SHACL validation failed: {}",
69            e
70        ))),
71    }
72}
73
74pub fn import_kg_rdfxml(xml: &str) -> Result<Graph, ImportError> {
75    #[cfg(feature = "shacl")]
76    {
77        let store = Store::new()
78            .map_err(|e| ImportError::Other(format!("Failed to create oxigraph store: {}", e)))?;
79        let fmt = oxigraph::io::GraphFormat::RdfXml;
80
81        // Load the RDF/XML into the default graph
82        store
83            .load_graph(xml.as_bytes(), fmt, GraphNameRef::DefaultGraph, None)
84            .map_err(|e| {
85                ImportError::Other(format!("Failed to parse RDF/XML with oxigraph: {}", e))
86            })?;
87
88        // Serialize the parsed RDF/XML into Turtle so we can reuse KnowledgeGraph::from_turtle
89        let mut writer = Vec::new();
90        let turtle_fmt = oxigraph::io::GraphFormat::Turtle;
91        store
92            .dump_graph(&mut writer, turtle_fmt, GraphNameRef::DefaultGraph)
93            .map_err(|e| {
94                ImportError::Other(format!("Failed to serialize RDF/XML to Turtle: {}", e))
95            })?;
96        let turtle_str = String::from_utf8(writer)
97            .map_err(|e| ImportError::Other(format!("Invalid UTF-8: {}", e)))?;
98
99        let mut kg = KnowledgeGraph::from_turtle(&turtle_str)
100            .map_err(|e| ImportError::Other(format!("Failed to convert RDF/XML to KG: {}", e)))?;
101
102        // If we didn't pick up any shapes from the Turtle representation, try to extract
103        // SHACL shapes directly from the store via SPARQL.
104        if kg.shapes.is_empty() {
105            augment_shapes_from_store(&store, &mut kg)?;
106        }
107
108        validate_and_convert(kg)
109    }
110    #[cfg(not(feature = "shacl"))]
111    {
112        let _ = xml;
113        Err(ImportError::Other(
114            "RDF/XML import is not supported in this build (enable feature 'shacl')".to_string(),
115        ))
116    }
117}
118
119#[cfg(feature = "shacl")]
120fn augment_shapes_from_store(store: &Store, kg: &mut KnowledgeGraph) -> Result<(), ImportError> {
121    let q = r#"
122        PREFIX sh: <http://www.w3.org/ns/shacl#>
123        PREFIX sea: <http://domainforge.ai/sea#>
124        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
125        SELECT ?shape ?target ?path ?datatype ?minCount ?maxCount ?minExclusive WHERE {
126            ?shape a sh:NodeShape .
127            OPTIONAL { ?shape sh:targetClass ?target . }
128            ?shape sh:property ?prop .
129            ?prop sh:path ?path .
130            OPTIONAL { ?prop sh:datatype ?datatype . }
131            OPTIONAL { ?prop sh:minCount ?minCount . }
132            OPTIONAL { ?prop sh:maxCount ?maxCount . }
133            OPTIONAL { ?prop sh:minExclusive ?minExclusive . }
134        }
135    "#;
136
137    match store.query(q) {
138        Ok(QueryResults::Solutions(solutions)) => {
139            use std::collections::HashMap;
140
141            // Map: shape URI -> (optional target class IRI, properties)
142            let mut map: HashMap<String, (Option<String>, Vec<ShaclProperty>)> = HashMap::new();
143
144            for sol_res in solutions {
145                let sol = sol_res.map_err(|e| {
146                    ImportError::Other(format!("Error reading SHACL SPARQL solution: {}", e))
147                })?;
148
149                let term_to_named_str = |t: &Term| match t {
150                    Term::NamedNode(nn) => Some(nn.as_str().to_string()),
151                    _ => None,
152                };
153                let term_to_literal_val = |t: &Term| match t {
154                    Term::Literal(l) => Some(l.value().to_string()),
155                    _ => None,
156                };
157
158                let shape_term = sol.get("shape").and_then(term_to_named_str);
159                let target_term = sol.get("target").and_then(term_to_named_str);
160                let path_term = sol.get("path").and_then(term_to_named_str);
161                // datatype can be an IRI like http://www.w3.org/2001/XMLSchema#decimal
162                let datatype_term = sol.get("datatype").and_then(term_to_named_str);
163                let min_count = sol
164                    .get("minCount")
165                    .and_then(term_to_literal_val)
166                    .and_then(|v| v.parse::<u32>().ok());
167                let max_count = sol
168                    .get("maxCount")
169                    .and_then(term_to_literal_val)
170                    .and_then(|v| v.parse::<u32>().ok());
171                // minExclusive returns a literal value (e.g., "0"^^xsd:decimal); we only use the lexical form
172                let min_exclusive = sol.get("minExclusive").and_then(term_to_literal_val);
173
174                if let (Some(shape), Some(path)) = (shape_term.clone(), path_term.clone()) {
175                    let path_pref =
176                        if let Some(stripped) = path.strip_prefix("http://domainforge.ai/sea#") {
177                            format!("sea:{}", stripped)
178                        } else if let Some(stripped) =
179                            path.strip_prefix("http://www.w3.org/2000/01/rdf-schema#")
180                        {
181                            format!("rdfs:{}", stripped)
182                        } else {
183                            path.clone()
184                        };
185
186                    // Normalize datatype IRIs to xsd: prefixes
187                    let datatype_pref = datatype_term.as_ref().map(|dt| {
188                        if let Some(rest) = dt.strip_prefix("http://www.w3.org/2001/XMLSchema#") {
189                            format!("xsd:{}", rest)
190                        } else {
191                            dt.clone()
192                        }
193                    });
194
195                    // If min_exclusive present but datatype not specified, assume xsd:decimal
196                    let inferred_datatype = match (&datatype_pref, &min_exclusive) {
197                        (None, Some(_)) => Some("xsd:decimal".to_string()),
198                        _ => datatype_pref.clone(),
199                    };
200
201                    let prop = ShaclProperty {
202                        path: path_pref,
203                        datatype: inferred_datatype,
204                        min_count,
205                        max_count,
206                        min_exclusive,
207                    };
208
209                    let entry = map.entry(shape.clone()).or_insert((None, Vec::new()));
210                    if let Some(target) = target_term.clone() {
211                        entry.0 = Some(target.clone());
212                    }
213                    entry.1.push(prop);
214                }
215            }
216
217            let mut extracted_shapes: Vec<ShaclShape> = Vec::new();
218            for (shape_uri, (target_opt, props)) in map.into_iter() {
219                let target_class = if let Some(target) = target_opt {
220                    if let Some(stripped) = target.strip_prefix("http://domainforge.ai/sea#") {
221                        format!("sea:{}", stripped)
222                    } else {
223                        target
224                    }
225                } else if let Some(pos) = shape_uri.find('#') {
226                    let fragment = &shape_uri[pos + 1..];
227                    let fragment = fragment.strip_suffix("Shape").unwrap_or(fragment);
228                    format!("sea:{}", fragment)
229                } else {
230                    shape_uri.clone()
231                };
232
233                extracted_shapes.push(ShaclShape {
234                    target_class,
235                    properties: props,
236                });
237            }
238
239            if !extracted_shapes.is_empty() {
240                kg.shapes = extracted_shapes;
241            }
242
243            Ok(())
244        }
245        Ok(_) => Ok(()),
246        Err(e) => Err(ImportError::Other(format!(
247            "Failed to execute SHACL SPARQL query: {}",
248            e
249        ))),
250    }
251}