Skip to main content

oxirs_arq/executor/
dataset.rs

1//! Dataset Implementation
2//!
3//! This module provides dataset abstractions and implementations for query execution.
4
5use crate::algebra::{PropertyPath, Term as AlgebraTerm, TriplePattern};
6use crate::path::{PathDataset, PropertyPath as PathPropertyPath};
7use anyhow::{anyhow, Result};
8use oxirs_core::RdfTerm;
9use std::collections::HashSet;
10
11/// Dataset trait for data access during query execution
12pub trait Dataset: Send + Sync {
13    /// Find all triples matching the given pattern
14    fn find_triples(
15        &self,
16        pattern: &TriplePattern,
17    ) -> Result<Vec<(AlgebraTerm, AlgebraTerm, AlgebraTerm)>>;
18
19    /// Check if a triple exists in the dataset
20    fn contains_triple(
21        &self,
22        subject: &AlgebraTerm,
23        predicate: &AlgebraTerm,
24        object: &AlgebraTerm,
25    ) -> Result<bool>;
26
27    /// Get all subjects in the dataset
28    fn subjects(&self) -> Result<Vec<AlgebraTerm>>;
29
30    /// Get all predicates in the dataset
31    fn predicates(&self) -> Result<Vec<AlgebraTerm>>;
32
33    /// Get all objects in the dataset
34    fn objects(&self) -> Result<Vec<AlgebraTerm>>;
35}
36
37/// In-memory dataset implementation for testing
38#[derive(Debug, Clone)]
39pub struct InMemoryDataset {
40    triples: Vec<(AlgebraTerm, AlgebraTerm, AlgebraTerm)>,
41}
42
43impl InMemoryDataset {
44    pub fn new() -> Self {
45        Self {
46            triples: Vec::new(),
47        }
48    }
49
50    pub fn add_triple(
51        &mut self,
52        subject: AlgebraTerm,
53        predicate: AlgebraTerm,
54        object: AlgebraTerm,
55    ) {
56        self.triples.push((subject, predicate, object));
57    }
58
59    pub fn from_triples(triples: Vec<(AlgebraTerm, AlgebraTerm, AlgebraTerm)>) -> Self {
60        Self { triples }
61    }
62}
63
64impl Dataset for InMemoryDataset {
65    fn find_triples(
66        &self,
67        pattern: &TriplePattern,
68    ) -> Result<Vec<(AlgebraTerm, AlgebraTerm, AlgebraTerm)>> {
69        let results = self
70            .triples
71            .iter()
72            .filter(|(s, p, o)| {
73                matches_term(&pattern.subject, s)
74                    && matches_term(&pattern.predicate, p)
75                    && matches_term(&pattern.object, o)
76            })
77            .cloned()
78            .collect();
79        Ok(results)
80    }
81
82    fn contains_triple(
83        &self,
84        subject: &AlgebraTerm,
85        predicate: &AlgebraTerm,
86        object: &AlgebraTerm,
87    ) -> Result<bool> {
88        Ok(self
89            .triples
90            .iter()
91            .any(|(s, p, o)| s == subject && p == predicate && o == object))
92    }
93
94    fn subjects(&self) -> Result<Vec<AlgebraTerm>> {
95        let subjects: HashSet<_> = self.triples.iter().map(|(s, _, _)| s.clone()).collect();
96        Ok(subjects.into_iter().collect())
97    }
98
99    fn predicates(&self) -> Result<Vec<AlgebraTerm>> {
100        let predicates: HashSet<_> = self.triples.iter().map(|(_, p, _)| p.clone()).collect();
101        Ok(predicates.into_iter().collect())
102    }
103
104    fn objects(&self) -> Result<Vec<AlgebraTerm>> {
105        let objects: HashSet<_> = self.triples.iter().map(|(_, _, o)| o.clone()).collect();
106        Ok(objects.into_iter().collect())
107    }
108}
109
110impl Default for InMemoryDataset {
111    fn default() -> Self {
112        Self::new()
113    }
114}
115
116fn matches_term(pattern: &AlgebraTerm, term: &AlgebraTerm) -> bool {
117    match pattern {
118        AlgebraTerm::Variable(_) => true, // Variables match any term
119        _ => pattern == term,
120    }
121}
122
123/// Adapter to make Dataset implement PathDataset
124pub struct DatasetPathAdapter<'a> {
125    dataset: &'a dyn Dataset,
126}
127
128impl<'a> DatasetPathAdapter<'a> {
129    pub fn new(dataset: &'a dyn Dataset) -> Self {
130        Self { dataset }
131    }
132}
133
134impl<'a> PathDataset for DatasetPathAdapter<'a> {
135    fn find_outgoing(
136        &self,
137        subject: &AlgebraTerm,
138        predicate: &AlgebraTerm,
139    ) -> Result<Vec<AlgebraTerm>> {
140        let pattern = TriplePattern::new(
141            subject.clone(),
142            predicate.clone(),
143            AlgebraTerm::Variable(crate::algebra::Variable::new("?o")?),
144        );
145        let triples = self.dataset.find_triples(&pattern)?;
146        Ok(triples.into_iter().map(|(_, _, o)| o).collect())
147    }
148
149    fn find_incoming(
150        &self,
151        predicate: &AlgebraTerm,
152        object: &AlgebraTerm,
153    ) -> Result<Vec<AlgebraTerm>> {
154        let pattern = TriplePattern::new(
155            AlgebraTerm::Variable(crate::algebra::Variable::new("?s")?),
156            predicate.clone(),
157            object.clone(),
158        );
159        let triples = self.dataset.find_triples(&pattern)?;
160        Ok(triples.into_iter().map(|(s, _, _)| s).collect())
161    }
162
163    fn find_predicates(
164        &self,
165        subject: &AlgebraTerm,
166        object: &AlgebraTerm,
167    ) -> Result<Vec<AlgebraTerm>> {
168        let pattern = TriplePattern::new(
169            subject.clone(),
170            AlgebraTerm::Variable(crate::algebra::Variable::new("?p")?),
171            object.clone(),
172        );
173        let triples = self.dataset.find_triples(&pattern)?;
174        Ok(triples.into_iter().map(|(_, p, _)| p).collect())
175    }
176
177    fn get_predicates(&self) -> Result<Vec<AlgebraTerm>> {
178        self.dataset.predicates()
179    }
180
181    fn contains_triple(
182        &self,
183        subject: &AlgebraTerm,
184        predicate: &AlgebraTerm,
185        object: &AlgebraTerm,
186    ) -> Result<bool> {
187        self.dataset.contains_triple(subject, predicate, object)
188    }
189}
190
191/// Convert algebra PropertyPath to path module PropertyPath
192pub fn convert_property_path(path: &PropertyPath) -> Result<PathPropertyPath> {
193    match path {
194        PropertyPath::Iri(iri) => Ok(PathPropertyPath::Direct(AlgebraTerm::Iri(iri.clone()))),
195        PropertyPath::Variable(var) => {
196            Ok(PathPropertyPath::Direct(AlgebraTerm::Variable(var.clone())))
197        }
198        PropertyPath::Inverse(inner) => {
199            let inner_path = convert_property_path(inner)?;
200            Ok(PathPropertyPath::Inverse(Box::new(inner_path)))
201        }
202        PropertyPath::Sequence(left, right) => {
203            let left_path = convert_property_path(left)?;
204            let right_path = convert_property_path(right)?;
205            Ok(PathPropertyPath::Sequence(
206                Box::new(left_path),
207                Box::new(right_path),
208            ))
209        }
210        PropertyPath::Alternative(left, right) => {
211            let left_path = convert_property_path(left)?;
212            let right_path = convert_property_path(right)?;
213            Ok(PathPropertyPath::Alternative(
214                Box::new(left_path),
215                Box::new(right_path),
216            ))
217        }
218        PropertyPath::ZeroOrMore(inner) => {
219            let inner_path = convert_property_path(inner)?;
220            Ok(PathPropertyPath::ZeroOrMore(Box::new(inner_path)))
221        }
222        PropertyPath::OneOrMore(inner) => {
223            let inner_path = convert_property_path(inner)?;
224            Ok(PathPropertyPath::OneOrMore(Box::new(inner_path)))
225        }
226        PropertyPath::ZeroOrOne(inner) => {
227            let inner_path = convert_property_path(inner)?;
228            Ok(PathPropertyPath::ZeroOrOne(Box::new(inner_path)))
229        }
230        PropertyPath::NegatedPropertySet(paths) => {
231            let mut terms = Vec::new();
232            for p in paths {
233                match p {
234                    PropertyPath::Iri(iri) => terms.push(AlgebraTerm::Iri(iri.clone())),
235                    PropertyPath::Variable(var) => terms.push(AlgebraTerm::Variable(var.clone())),
236                    _ => {
237                        return Err(anyhow!(
238                            "Negated property set can only contain IRIs or variables"
239                        ))
240                    }
241                }
242            }
243            Ok(PathPropertyPath::NegatedPropertySet(terms))
244        }
245    }
246}
247
248/// Adapter to make ConcreteStore implement Dataset trait
249/// This is primarily for benchmarking and testing purposes
250pub struct ConcreteStoreDataset {
251    store: std::sync::Arc<oxirs_core::rdf_store::ConcreteStore>,
252}
253
254impl ConcreteStoreDataset {
255    pub fn new(store: oxirs_core::rdf_store::ConcreteStore) -> Self {
256        Self {
257            store: std::sync::Arc::new(store),
258        }
259    }
260
261    pub fn from_arc(store: std::sync::Arc<oxirs_core::rdf_store::ConcreteStore>) -> Self {
262        Self { store }
263    }
264}
265
266impl Clone for ConcreteStoreDataset {
267    fn clone(&self) -> Self {
268        Self {
269            store: std::sync::Arc::clone(&self.store),
270        }
271    }
272}
273
274impl Dataset for ConcreteStoreDataset {
275    fn find_triples(
276        &self,
277        pattern: &TriplePattern,
278    ) -> Result<Vec<(AlgebraTerm, AlgebraTerm, AlgebraTerm)>> {
279        use oxirs_core::rdf_store::Store;
280
281        // Convert pattern to ConcreteStore query
282        let subject = match &pattern.subject {
283            AlgebraTerm::Iri(iri) => Some(oxirs_core::model::Subject::NamedNode(iri.clone())),
284            AlgebraTerm::Variable(_) => None,
285            AlgebraTerm::BlankNode(id) => Some(oxirs_core::model::Subject::BlankNode(
286                oxirs_core::model::BlankNode::new(id)
287                    .map_err(|e| anyhow!("Invalid blank node: {}", e))?,
288            )),
289            _ => return Err(anyhow!("Invalid subject in pattern")),
290        };
291
292        let predicate = match &pattern.predicate {
293            AlgebraTerm::Iri(iri) => Some(oxirs_core::model::Predicate::NamedNode(iri.clone())),
294            AlgebraTerm::Variable(_) => None, // Wildcard - match any predicate
295            AlgebraTerm::PropertyPath(path) => {
296                // For simple property paths that are just IRIs, extract the IRI
297                match path {
298                    crate::algebra::PropertyPath::Iri(iri) => {
299                        Some(oxirs_core::model::Predicate::NamedNode(iri.clone()))
300                    }
301                    crate::algebra::PropertyPath::Variable(_) => None, // Wildcard
302                    _ => {
303                        return Err(anyhow!(
304                            "Complex property paths not yet supported in find_triples"
305                        ))
306                    }
307                }
308            }
309            _ => {
310                return Err(anyhow!(
311                    "Predicate must be IRI, variable, or property path, got: {:?}",
312                    pattern.predicate
313                ))
314            }
315        };
316
317        let object = match &pattern.object {
318            AlgebraTerm::Iri(iri) => Some(oxirs_core::model::Object::NamedNode(iri.clone())),
319            AlgebraTerm::Literal(lit) => Some(oxirs_core::model::Object::Literal(
320                oxirs_core::model::Literal::new(&lit.value),
321            )),
322            AlgebraTerm::BlankNode(id) => Some(oxirs_core::model::Object::BlankNode(
323                oxirs_core::model::BlankNode::new(id)
324                    .map_err(|e| anyhow!("Invalid blank node: {}", e))?,
325            )),
326            AlgebraTerm::Variable(_) => None,
327            _ => return Err(anyhow!("Invalid object in pattern")),
328        };
329
330        // Query the store
331        let quads = self.store.find_quads(
332            subject.as_ref(),
333            predicate.as_ref(),
334            object.as_ref(),
335            None, // default graph
336        )?;
337
338        // Convert quads to triples
339        let triples: Vec<_> = quads
340            .into_iter()
341            .filter_map(|quad| {
342                let s = match quad.subject() {
343                    oxirs_core::model::Subject::NamedNode(n) => {
344                        AlgebraTerm::Iri(oxirs_core::model::NamedNode::new(n.as_str()).ok()?)
345                    }
346                    oxirs_core::model::Subject::BlankNode(b) => {
347                        AlgebraTerm::BlankNode(b.as_str().to_string())
348                    }
349                    oxirs_core::model::Subject::Variable(v) => AlgebraTerm::Variable(v.clone()),
350                    oxirs_core::model::Subject::QuotedTriple(_) => {
351                        // Skip RDF-star quoted triples for now
352                        return None;
353                    }
354                };
355
356                let p = AlgebraTerm::Iri(
357                    oxirs_core::model::NamedNode::new(quad.predicate().as_str()).ok()?,
358                );
359
360                let o = match quad.object() {
361                    oxirs_core::model::Object::NamedNode(n) => {
362                        AlgebraTerm::Iri(oxirs_core::model::NamedNode::new(n.as_str()).ok()?)
363                    }
364                    oxirs_core::model::Object::Literal(l) => {
365                        AlgebraTerm::Literal(crate::algebra::Literal {
366                            value: l.value().to_string(),
367                            datatype: None,
368                            language: l.language().map(|s| s.to_string()),
369                        })
370                    }
371                    oxirs_core::model::Object::BlankNode(b) => {
372                        AlgebraTerm::BlankNode(b.as_str().to_string())
373                    }
374                    oxirs_core::model::Object::Variable(v) => AlgebraTerm::Variable(v.clone()),
375                    oxirs_core::model::Object::QuotedTriple(_) => {
376                        // Skip RDF-star quoted triples for now
377                        return None;
378                    }
379                };
380
381                Some((s, p, o))
382            })
383            .collect();
384
385        Ok(triples)
386    }
387
388    fn contains_triple(
389        &self,
390        subject: &AlgebraTerm,
391        predicate: &AlgebraTerm,
392        object: &AlgebraTerm,
393    ) -> Result<bool> {
394        let pattern = TriplePattern::new(subject.clone(), predicate.clone(), object.clone());
395        let triples = self.find_triples(&pattern)?;
396        Ok(!triples.is_empty())
397    }
398
399    fn subjects(&self) -> Result<Vec<AlgebraTerm>> {
400        use oxirs_core::rdf_store::Store;
401        let quads = self.store.find_quads(None, None, None, None)?;
402        let subjects: HashSet<_> = quads
403            .into_iter()
404            .filter_map(|quad| match quad.subject() {
405                oxirs_core::model::Subject::NamedNode(n) => Some(AlgebraTerm::Iri(
406                    oxirs_core::model::NamedNode::new(n.as_str()).ok()?,
407                )),
408                oxirs_core::model::Subject::BlankNode(b) => {
409                    Some(AlgebraTerm::BlankNode(b.as_str().to_string()))
410                }
411                oxirs_core::model::Subject::Variable(v) => Some(AlgebraTerm::Variable(v.clone())),
412                oxirs_core::model::Subject::QuotedTriple(_) => None,
413            })
414            .collect();
415        Ok(subjects.into_iter().collect())
416    }
417
418    fn predicates(&self) -> Result<Vec<AlgebraTerm>> {
419        use oxirs_core::rdf_store::Store;
420        let quads = self.store.find_quads(None, None, None, None)?;
421        let predicates: HashSet<_> = quads
422            .into_iter()
423            .filter_map(|quad| {
424                Some(AlgebraTerm::Iri(
425                    oxirs_core::model::NamedNode::new(quad.predicate().as_str()).ok()?,
426                ))
427            })
428            .collect();
429        Ok(predicates.into_iter().collect())
430    }
431
432    fn objects(&self) -> Result<Vec<AlgebraTerm>> {
433        use oxirs_core::rdf_store::Store;
434        let quads = self.store.find_quads(None, None, None, None)?;
435        let objects: HashSet<_> = quads
436            .into_iter()
437            .filter_map(|quad| match quad.object() {
438                oxirs_core::model::Object::NamedNode(n) => Some(AlgebraTerm::Iri(
439                    oxirs_core::model::NamedNode::new(n.as_str()).ok()?,
440                )),
441                oxirs_core::model::Object::Literal(l) => {
442                    Some(AlgebraTerm::Literal(crate::algebra::Literal {
443                        value: l.value().to_string(),
444                        datatype: None,
445                        language: l.language().map(|s| s.to_string()),
446                    }))
447                }
448                oxirs_core::model::Object::BlankNode(b) => {
449                    Some(AlgebraTerm::BlankNode(b.as_str().to_string()))
450                }
451                oxirs_core::model::Object::Variable(v) => Some(AlgebraTerm::Variable(v.clone())),
452                oxirs_core::model::Object::QuotedTriple(_) => None,
453            })
454            .collect();
455        Ok(objects.into_iter().collect())
456    }
457}