Skip to main content

tensorlogic_oxirs_bridge/schema/
inference.rs

1//! RDFS inference engine for materializing entailed triples.
2//!
3//! This module implements RDFS entailment rules to infer new triples from
4//! existing RDF data. The inference engine supports:
5//! - rdfs:subClassOf transitivity
6//! - Property domain/range inheritance
7//! - Type propagation through class hierarchies
8//! - Property inheritance through property hierarchies
9
10use anyhow::Result;
11use oxrdf::{Graph, NamedNode, NamedOrBlankNodeRef, TermRef, Triple};
12use std::collections::{HashMap, HashSet};
13
14use super::SchemaAnalyzer;
15
16/// RDFS inference engine that materializes entailed triples
17pub struct RdfsInferenceEngine {
18    /// Original graph
19    pub graph: Graph,
20    /// Inferred triples (materialized)
21    pub inferred: Graph,
22    /// Subclass hierarchy cache
23    subclass_hierarchy: HashMap<String, HashSet<String>>,
24    /// Subproperty hierarchy cache
25    subproperty_hierarchy: HashMap<String, HashSet<String>>,
26    /// Property domains cache
27    property_domains: HashMap<String, HashSet<String>>,
28    /// Property ranges cache
29    property_ranges: HashMap<String, HashSet<String>>,
30}
31
32impl RdfsInferenceEngine {
33    /// Create a new inference engine from a graph
34    pub fn new(graph: Graph) -> Self {
35        RdfsInferenceEngine {
36            graph,
37            inferred: Graph::new(),
38            subclass_hierarchy: HashMap::new(),
39            subproperty_hierarchy: HashMap::new(),
40            property_domains: HashMap::new(),
41            property_ranges: HashMap::new(),
42        }
43    }
44
45    /// Run all RDFS inference rules and materialize entailed triples
46    pub fn materialize(&mut self) -> Result<()> {
47        // Build hierarchy caches first
48        self.build_subclass_hierarchy()?;
49        self.build_subproperty_hierarchy()?;
50        self.build_property_constraints()?;
51
52        // Apply inference rules iteratively until fixpoint
53        let mut changed = true;
54        let mut iterations = 0;
55        const MAX_ITERATIONS: usize = 100;
56
57        while changed && iterations < MAX_ITERATIONS {
58            changed = false;
59            iterations += 1;
60
61            // Rule: rdfs2 (domain type inference)
62            // If (x, p, y) and (p, rdfs:domain, D) then (x, rdf:type, D)
63            if self.apply_domain_inference()? {
64                changed = true;
65            }
66
67            // Rule: rdfs3 (range type inference)
68            // If (x, p, y) and (p, rdfs:range, R) then (y, rdf:type, R)
69            if self.apply_range_inference()? {
70                changed = true;
71            }
72
73            // Rule: rdfs9 (subclass inheritance)
74            // If (x, rdf:type, C) and (C, rdfs:subClassOf, D) then (x, rdf:type, D)
75            if self.apply_subclass_inference()? {
76                changed = true;
77            }
78
79            // Rule: rdfs7 (subproperty inference)
80            // If (x, p, y) and (p, rdfs:subPropertyOf, q) then (x, q, y)
81            if self.apply_subproperty_inference()? {
82                changed = true;
83            }
84        }
85
86        Ok(())
87    }
88
89    /// Get the complete graph (original + inferred triples)
90    pub fn get_complete_graph(&self) -> Graph {
91        let mut complete = self.graph.clone();
92        for triple in self.inferred.iter() {
93            complete.insert(triple);
94        }
95        complete
96    }
97
98    /// Get only the inferred triples
99    pub fn get_inferred_triples(&self) -> &Graph {
100        &self.inferred
101    }
102
103    /// Build transitive closure of subclass hierarchy
104    fn build_subclass_hierarchy(&mut self) -> Result<()> {
105        let rdfs_subclass = NamedNode::new("http://www.w3.org/2000/01/rdf-schema#subClassOf")?;
106
107        // First, collect direct subclass relationships
108        let mut direct_subclasses: HashMap<String, HashSet<String>> = HashMap::new();
109
110        for triple in self.graph.iter() {
111            if triple.predicate == rdfs_subclass.as_ref() {
112                if let (NamedOrBlankNodeRef::NamedNode(subj), TermRef::NamedNode(obj)) =
113                    (triple.subject, triple.object)
114                {
115                    direct_subclasses
116                        .entry(subj.as_str().to_string())
117                        .or_default()
118                        .insert(obj.as_str().to_string());
119                }
120            }
121        }
122
123        // Compute transitive closure
124        for class in direct_subclasses.keys() {
125            let mut visited = HashSet::new();
126            let mut stack = vec![class.clone()];
127
128            while let Some(current) = stack.pop() {
129                if visited.contains(&current) {
130                    continue;
131                }
132                visited.insert(current.clone());
133
134                if let Some(parents) = direct_subclasses.get(&current) {
135                    for parent in parents {
136                        stack.push(parent.clone());
137                    }
138                }
139            }
140
141            visited.remove(class);
142            self.subclass_hierarchy.insert(class.clone(), visited);
143        }
144
145        Ok(())
146    }
147
148    /// Build transitive closure of subproperty hierarchy
149    fn build_subproperty_hierarchy(&mut self) -> Result<()> {
150        let rdfs_subproperty =
151            NamedNode::new("http://www.w3.org/2000/01/rdf-schema#subPropertyOf")?;
152
153        // First, collect direct subproperty relationships
154        let mut direct_subprops: HashMap<String, HashSet<String>> = HashMap::new();
155
156        for triple in self.graph.iter() {
157            if triple.predicate == rdfs_subproperty.as_ref() {
158                if let (NamedOrBlankNodeRef::NamedNode(subj), TermRef::NamedNode(obj)) =
159                    (triple.subject, triple.object)
160                {
161                    direct_subprops
162                        .entry(subj.as_str().to_string())
163                        .or_default()
164                        .insert(obj.as_str().to_string());
165                }
166            }
167        }
168
169        // Compute transitive closure
170        for prop in direct_subprops.keys() {
171            let mut visited = HashSet::new();
172            let mut stack = vec![prop.clone()];
173
174            while let Some(current) = stack.pop() {
175                if visited.contains(&current) {
176                    continue;
177                }
178                visited.insert(current.clone());
179
180                if let Some(parents) = direct_subprops.get(&current) {
181                    for parent in parents {
182                        stack.push(parent.clone());
183                    }
184                }
185            }
186
187            visited.remove(prop);
188            self.subproperty_hierarchy.insert(prop.clone(), visited);
189        }
190
191        Ok(())
192    }
193
194    /// Build property domain and range constraints
195    fn build_property_constraints(&mut self) -> Result<()> {
196        let rdfs_domain = NamedNode::new("http://www.w3.org/2000/01/rdf-schema#domain")?;
197        let rdfs_range = NamedNode::new("http://www.w3.org/2000/01/rdf-schema#range")?;
198
199        for triple in self.graph.iter() {
200            if triple.predicate == rdfs_domain.as_ref() {
201                if let (NamedOrBlankNodeRef::NamedNode(subj), TermRef::NamedNode(obj)) =
202                    (triple.subject, triple.object)
203                {
204                    self.property_domains
205                        .entry(subj.as_str().to_string())
206                        .or_default()
207                        .insert(obj.as_str().to_string());
208                }
209            } else if triple.predicate == rdfs_range.as_ref() {
210                if let (NamedOrBlankNodeRef::NamedNode(subj), TermRef::NamedNode(obj)) =
211                    (triple.subject, triple.object)
212                {
213                    self.property_ranges
214                        .entry(subj.as_str().to_string())
215                        .or_default()
216                        .insert(obj.as_str().to_string());
217                }
218            }
219        }
220
221        Ok(())
222    }
223
224    /// Apply domain inference rule (rdfs2)
225    fn apply_domain_inference(&mut self) -> Result<bool> {
226        let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
227        let mut new_triples = Vec::new();
228
229        for triple in self.graph.iter() {
230            let pred_str = triple.predicate.as_str().to_string();
231
232            if let Some(domains) = self.property_domains.get(&pred_str) {
233                if let NamedOrBlankNodeRef::NamedNode(subj) = triple.subject {
234                    for domain in domains {
235                        let domain_node = NamedNode::new(domain.clone())?;
236                        let type_triple =
237                            Triple::new(subj.into_owned(), rdf_type.clone(), domain_node.clone());
238
239                        // Check if triple already exists
240                        if !self.graph.contains(&type_triple)
241                            && !self.inferred.contains(&type_triple)
242                        {
243                            new_triples.push(type_triple);
244                        }
245                    }
246                }
247            }
248        }
249
250        let changed = !new_triples.is_empty();
251        for triple in new_triples {
252            self.inferred.insert(&triple);
253            self.graph.insert(&triple);
254        }
255
256        Ok(changed)
257    }
258
259    /// Apply range inference rule (rdfs3)
260    fn apply_range_inference(&mut self) -> Result<bool> {
261        let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
262        let mut new_triples = Vec::new();
263
264        for triple in self.graph.iter() {
265            let pred_str = triple.predicate.as_str().to_string();
266
267            if let Some(ranges) = self.property_ranges.get(&pred_str) {
268                if let TermRef::NamedNode(obj) = triple.object {
269                    for range in ranges {
270                        let range_node = NamedNode::new(range.clone())?;
271                        let type_triple =
272                            Triple::new(obj.into_owned(), rdf_type.clone(), range_node.clone());
273
274                        if !self.graph.contains(&type_triple)
275                            && !self.inferred.contains(&type_triple)
276                        {
277                            new_triples.push(type_triple);
278                        }
279                    }
280                }
281            }
282        }
283
284        let changed = !new_triples.is_empty();
285        for triple in new_triples {
286            self.inferred.insert(&triple);
287            self.graph.insert(&triple);
288        }
289
290        Ok(changed)
291    }
292
293    /// Apply subclass inference rule (rdfs9)
294    fn apply_subclass_inference(&mut self) -> Result<bool> {
295        let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?;
296        let mut new_triples = Vec::new();
297
298        for triple in self.graph.iter() {
299            if triple.predicate == rdf_type.as_ref() {
300                if let (NamedOrBlankNodeRef::NamedNode(subj), TermRef::NamedNode(class_obj)) =
301                    (triple.subject, triple.object)
302                {
303                    let class_str = class_obj.as_str().to_string();
304
305                    // Get all superclasses
306                    if let Some(superclasses) = self.subclass_hierarchy.get(&class_str) {
307                        for superclass in superclasses {
308                            let superclass_node = NamedNode::new(superclass.clone())?;
309                            let type_triple = Triple::new(
310                                subj.into_owned(),
311                                rdf_type.clone(),
312                                superclass_node.clone(),
313                            );
314
315                            if !self.graph.contains(&type_triple)
316                                && !self.inferred.contains(&type_triple)
317                            {
318                                new_triples.push(type_triple);
319                            }
320                        }
321                    }
322                }
323            }
324        }
325
326        let changed = !new_triples.is_empty();
327        for triple in new_triples {
328            self.inferred.insert(&triple);
329            self.graph.insert(&triple);
330        }
331
332        Ok(changed)
333    }
334
335    /// Apply subproperty inference rule (rdfs7)
336    fn apply_subproperty_inference(&mut self) -> Result<bool> {
337        let mut new_triples = Vec::new();
338
339        for triple in self.graph.iter() {
340            let pred_str = triple.predicate.as_str().to_string();
341
342            if let Some(superprops) = self.subproperty_hierarchy.get(&pred_str) {
343                for superprop in superprops {
344                    let superprop_node = NamedNode::new(superprop.clone())?;
345                    let new_triple = Triple::new(
346                        triple.subject.into_owned(),
347                        superprop_node.clone(),
348                        triple.object.into_owned(),
349                    );
350
351                    if !self.graph.contains(&new_triple) && !self.inferred.contains(&new_triple) {
352                        new_triples.push(new_triple);
353                    }
354                }
355            }
356        }
357
358        let changed = !new_triples.is_empty();
359        for triple in new_triples {
360            self.inferred.insert(&triple);
361            self.graph.insert(&triple);
362        }
363
364        Ok(changed)
365    }
366
367    /// Get all superclasses of a given class (including transitive)
368    pub fn get_all_superclasses(&self, class_iri: &str) -> HashSet<String> {
369        self.subclass_hierarchy
370            .get(class_iri)
371            .cloned()
372            .unwrap_or_default()
373    }
374
375    /// Get all superproperties of a given property (including transitive)
376    pub fn get_all_superproperties(&self, prop_iri: &str) -> HashSet<String> {
377        self.subproperty_hierarchy
378            .get(prop_iri)
379            .cloned()
380            .unwrap_or_default()
381    }
382
383    /// Check if class A is a subclass of class B (direct or transitive)
384    pub fn is_subclass_of(&self, class_a: &str, class_b: &str) -> bool {
385        if let Some(superclasses) = self.subclass_hierarchy.get(class_a) {
386            superclasses.contains(class_b)
387        } else {
388            false
389        }
390    }
391
392    /// Check if property A is a subproperty of property B (direct or transitive)
393    pub fn is_subproperty_of(&self, prop_a: &str, prop_b: &str) -> bool {
394        if let Some(superprops) = self.subproperty_hierarchy.get(prop_a) {
395            superprops.contains(prop_b)
396        } else {
397            false
398        }
399    }
400
401    /// Get statistics about inferred triples
402    pub fn get_inference_stats(&self) -> InferenceStats {
403        InferenceStats {
404            original_triples: self.graph.len() - self.inferred.len(),
405            inferred_triples: self.inferred.len(),
406            total_triples: self.graph.len(),
407            subclass_relations: self
408                .subclass_hierarchy
409                .values()
410                .map(|s| s.len())
411                .sum::<usize>(),
412            subproperty_relations: self
413                .subproperty_hierarchy
414                .values()
415                .map(|s| s.len())
416                .sum::<usize>(),
417        }
418    }
419}
420
421/// Statistics about RDFS inference
422#[derive(Debug, Clone)]
423pub struct InferenceStats {
424    pub original_triples: usize,
425    pub inferred_triples: usize,
426    pub total_triples: usize,
427    pub subclass_relations: usize,
428    pub subproperty_relations: usize,
429}
430
431impl SchemaAnalyzer {
432    /// Create an RDFS inference engine from this analyzer's graph
433    pub fn create_inference_engine(&self) -> RdfsInferenceEngine {
434        RdfsInferenceEngine::new(self.graph.clone())
435    }
436
437    /// Run RDFS inference and return the materialized graph
438    pub fn materialize_rdfs_entailments(&self) -> Result<Graph> {
439        let mut engine = self.create_inference_engine();
440        engine.materialize()?;
441        Ok(engine.get_complete_graph())
442    }
443}