Skip to main content

oxirs_core/model/
dataset.rs

1//! RDF Dataset implementation
2
3use crate::model::{Graph, GraphName, GraphNameRef, Object, Predicate, Quad, QuadRef, Subject};
4use std::collections::HashMap;
5use std::iter::FromIterator;
6
7/// An in-memory RDF Dataset
8///
9/// A dataset is a collection of named graphs plus a default graph.
10/// Each named graph is identified by an IRI or blank node.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct Dataset {
13    default_graph: Graph,
14    named_graphs: HashMap<GraphName, Graph>,
15}
16
17impl Dataset {
18    /// Creates a new empty dataset
19    pub fn new() -> Self {
20        Dataset {
21            default_graph: Graph::new(),
22            named_graphs: HashMap::new(),
23        }
24    }
25
26    /// Creates a new dataset with the specified capacity for named graphs
27    pub fn with_capacity(capacity: usize) -> Self {
28        Dataset {
29            default_graph: Graph::new(),
30            named_graphs: HashMap::with_capacity(capacity),
31        }
32    }
33
34    /// Returns a reference to the default graph
35    pub fn default_graph(&self) -> &Graph {
36        &self.default_graph
37    }
38
39    /// Returns a mutable reference to the default graph
40    pub fn default_graph_mut(&mut self) -> &mut Graph {
41        &mut self.default_graph
42    }
43
44    /// Returns a reference to the named graph with the given name
45    pub fn named_graph(&self, name: &GraphName) -> Option<&Graph> {
46        if name.is_default_graph() {
47            Some(&self.default_graph)
48        } else {
49            self.named_graphs.get(name)
50        }
51    }
52
53    /// Returns a mutable reference to the named graph with the given name
54    ///
55    /// Creates the graph if it doesn't exist.
56    pub fn named_graph_mut(&mut self, name: &GraphName) -> &mut Graph {
57        if name.is_default_graph() {
58            &mut self.default_graph
59        } else {
60            self.named_graphs.entry(name.clone()).or_default()
61        }
62    }
63
64    /// Inserts a quad into the dataset
65    ///
66    /// Returns `true` if the quad was not already present, `false` otherwise.
67    pub fn insert(&mut self, quad: Quad) -> bool {
68        let triple = quad.to_triple();
69        let graph = self.named_graph_mut(quad.graph_name());
70        graph.insert(triple)
71    }
72
73    /// Removes a quad from the dataset
74    ///
75    /// Returns `true` if the quad was present, `false` otherwise.
76    pub fn remove(&mut self, quad: &Quad) -> bool {
77        let triple = quad.to_triple();
78        if let Some(graph) = self.named_graphs.get_mut(quad.graph_name()) {
79            graph.remove(&triple)
80        } else if quad.graph_name().is_default_graph() {
81            self.default_graph.remove(&triple)
82        } else {
83            false
84        }
85    }
86
87    /// Returns `true` if the dataset contains the specified quad
88    pub fn contains(&self, quad: &Quad) -> bool {
89        let triple = quad.to_triple();
90        if let Some(graph) = self.named_graph(quad.graph_name()) {
91            graph.contains(&triple)
92        } else {
93            false
94        }
95    }
96
97    /// Returns the total number of quads in the dataset
98    pub fn len(&self) -> usize {
99        self.default_graph.len() + self.named_graphs.values().map(|g| g.len()).sum::<usize>()
100    }
101
102    /// Returns `true` if the dataset contains no quads
103    pub fn is_empty(&self) -> bool {
104        self.default_graph.is_empty() && self.named_graphs.values().all(|g| g.is_empty())
105    }
106
107    /// Returns the number of named graphs (excluding default graph)
108    pub fn named_graph_count(&self) -> usize {
109        self.named_graphs.len()
110    }
111
112    /// Returns an iterator over all graph names (excluding default graph)
113    pub fn graph_names(&self) -> impl Iterator<Item = &GraphName> {
114        self.named_graphs.keys()
115    }
116
117    /// Returns an iterator over all named graphs
118    pub fn named_graphs(&self) -> impl Iterator<Item = (&GraphName, &Graph)> {
119        self.named_graphs.iter()
120    }
121
122    /// Clears the dataset, removing all quads
123    pub fn clear(&mut self) {
124        self.default_graph.clear();
125        self.named_graphs.clear();
126    }
127
128    /// Removes a named graph from the dataset
129    ///
130    /// Returns the removed graph if it existed.
131    pub fn remove_graph(&mut self, name: &GraphName) -> Option<Graph> {
132        if name.is_default_graph() {
133            let mut graph = Graph::new();
134            std::mem::swap(&mut graph, &mut self.default_graph);
135            Some(graph)
136        } else {
137            self.named_graphs.remove(name)
138        }
139    }
140
141    /// Returns an iterator over all quads in the dataset
142    pub fn iter(&self) -> impl Iterator<Item = Quad> + '_ {
143        let default_quads = self
144            .default_graph
145            .iter()
146            .map(|triple| Quad::from_triple(triple.clone()));
147
148        let named_quads = self.named_graphs.iter().flat_map(|(name, graph)| {
149            graph
150                .iter()
151                .map(move |triple| Quad::from_triple_in_graph(triple.clone(), name.clone()))
152        });
153
154        default_quads.chain(named_quads)
155    }
156
157    /// Returns an iterator over all quads in the dataset as references
158    pub fn iter_ref(&self) -> impl Iterator<Item = QuadRef<'_>> + '_ {
159        let default_quads = self.default_graph.iter().map(|triple| {
160            QuadRef::new(
161                triple.subject().into(),
162                triple.predicate().into(),
163                triple.object().into(),
164                GraphNameRef::DefaultGraph,
165            )
166        });
167
168        let named_quads = self.named_graphs.iter().flat_map(|(name, graph)| {
169            graph.iter().map(move |triple| {
170                QuadRef::new(
171                    triple.subject().into(),
172                    triple.predicate().into(),
173                    triple.object().into(),
174                    name.into(),
175                )
176            })
177        });
178
179        default_quads.chain(named_quads)
180    }
181
182    /// Finds all quads matching the given pattern
183    ///
184    /// `None` values in the pattern act as wildcards.
185    pub fn quads_for_pattern<'a>(
186        &'a self,
187        subject: Option<&'a Subject>,
188        predicate: Option<&'a Predicate>,
189        object: Option<&'a Object>,
190        graph_name: Option<&'a GraphName>,
191    ) -> Box<dyn Iterator<Item = Quad> + 'a> {
192        if let Some(graph_name) = graph_name {
193            if let Some(graph) = self.named_graph(graph_name) {
194                let graph_name = graph_name.clone();
195                Box::new(
196                    graph
197                        .triples_for_pattern(subject, predicate, object)
198                        .map(move |triple| {
199                            Quad::from_triple_in_graph(triple.clone(), graph_name.clone())
200                        }),
201                ) as Box<dyn Iterator<Item = Quad> + '_>
202            } else {
203                Box::new(std::iter::empty())
204            }
205        } else {
206            // Search all graphs
207            Box::new(self.iter().filter(move |quad| {
208                let triple = quad.to_triple();
209                if let Some(s) = subject {
210                    if triple.subject() != s {
211                        return false;
212                    }
213                }
214                if let Some(p) = predicate {
215                    if triple.predicate() != p {
216                        return false;
217                    }
218                }
219                if let Some(o) = object {
220                    if triple.object() != o {
221                        return false;
222                    }
223                }
224                true
225            }))
226        }
227    }
228
229    /// Extends the dataset with quads from an iterator
230    pub fn extend<I>(&mut self, quads: I)
231    where
232        I: IntoIterator<Item = Quad>,
233    {
234        for quad in quads {
235            self.insert(quad);
236        }
237    }
238
239    /// Creates the union of this dataset with another dataset
240    pub fn union(&self, other: &Dataset) -> Dataset {
241        let mut result = self.clone();
242        result.extend(other.iter());
243        result
244    }
245}
246
247impl Default for Dataset {
248    fn default() -> Self {
249        Self::new()
250    }
251}
252
253impl FromIterator<Quad> for Dataset {
254    fn from_iter<T: IntoIterator<Item = Quad>>(iter: T) -> Self {
255        let mut dataset = Dataset::new();
256        dataset.extend(iter);
257        dataset
258    }
259}
260
261impl Extend<Quad> for Dataset {
262    fn extend<T: IntoIterator<Item = Quad>>(&mut self, iter: T) {
263        for quad in iter {
264            self.insert(quad);
265        }
266    }
267}
268
269impl IntoIterator for Dataset {
270    type Item = Quad;
271    type IntoIter = std::vec::IntoIter<Quad>;
272
273    fn into_iter(self) -> Self::IntoIter {
274        self.iter().collect::<Vec<_>>().into_iter()
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281    use crate::model::{Literal, NamedNode};
282
283    fn create_test_quad(graph_name: Option<NamedNode>) -> Quad {
284        let subject = NamedNode::new("http://example.org/subject").expect("valid IRI");
285        let predicate = NamedNode::new("http://example.org/predicate").expect("valid IRI");
286        let object = Literal::new("object");
287
288        if let Some(graph_name) = graph_name {
289            Quad::new(subject, predicate, object, graph_name)
290        } else {
291            Quad::new_default_graph(subject, predicate, object)
292        }
293    }
294
295    #[test]
296    fn test_dataset_basic_operations() {
297        let mut dataset = Dataset::new();
298        let quad = create_test_quad(None);
299
300        assert!(dataset.is_empty());
301        assert_eq!(dataset.len(), 0);
302        assert_eq!(dataset.named_graph_count(), 0);
303
304        assert!(dataset.insert(quad.clone()));
305        assert!(!dataset.is_empty());
306        assert_eq!(dataset.len(), 1);
307        assert!(dataset.contains(&quad));
308
309        assert!(!dataset.insert(quad.clone())); // Already exists
310        assert_eq!(dataset.len(), 1);
311
312        assert!(dataset.remove(&quad));
313        assert!(dataset.is_empty());
314        assert_eq!(dataset.len(), 0);
315        assert!(!dataset.contains(&quad));
316    }
317
318    #[test]
319    fn test_dataset_named_graphs() {
320        let mut dataset = Dataset::new();
321
322        let quad1 = create_test_quad(None); // Default graph
323        let graph_name = NamedNode::new("http://example.org/graph1").expect("valid IRI");
324        let quad2 = create_test_quad(Some(graph_name.clone()));
325
326        dataset.insert(quad1.clone());
327        dataset.insert(quad2.clone());
328
329        assert_eq!(dataset.len(), 2);
330        assert_eq!(dataset.named_graph_count(), 1);
331        assert_eq!(dataset.default_graph().len(), 1);
332
333        let named_graph = dataset
334            .named_graph(&GraphName::NamedNode(graph_name.clone()))
335            .expect("operation should succeed");
336        assert_eq!(named_graph.len(), 1);
337
338        // Test graph names iterator
339        let graph_names: Vec<_> = dataset.graph_names().collect();
340        assert_eq!(graph_names.len(), 1);
341        assert!(graph_names.contains(&&GraphName::NamedNode(graph_name)));
342    }
343
344    #[test]
345    fn test_dataset_pattern_matching() {
346        let mut dataset = Dataset::new();
347
348        let subject = NamedNode::new("http://example.org/subject").expect("valid IRI");
349        let predicate1 = NamedNode::new("http://example.org/predicate1").expect("valid IRI");
350        let predicate2 = NamedNode::new("http://example.org/predicate2").expect("valid IRI");
351        let object = Literal::new("object");
352        let graph_name = NamedNode::new("http://example.org/graph").expect("valid IRI");
353
354        let quad1 = Quad::new_default_graph(subject.clone(), predicate1.clone(), object.clone());
355        let quad2 = Quad::new(subject.clone(), predicate2, object, graph_name.clone());
356
357        dataset.insert(quad1.clone());
358        dataset.insert(quad2.clone());
359
360        // Find by subject
361        let by_subject: Vec<_> = dataset
362            .quads_for_pattern(Some(&Subject::NamedNode(subject.clone())), None, None, None)
363            .collect();
364        assert_eq!(by_subject.len(), 2);
365
366        // Find by graph
367        let by_graph: Vec<_> = dataset
368            .quads_for_pattern(None, None, None, Some(&GraphName::NamedNode(graph_name)))
369            .collect();
370        assert_eq!(by_graph.len(), 1);
371        assert_eq!(by_graph[0], quad2);
372
373        // Find in default graph
374        let by_default_graph: Vec<_> = dataset
375            .quads_for_pattern(None, None, None, Some(&GraphName::DefaultGraph))
376            .collect();
377        assert_eq!(by_default_graph.len(), 1);
378        assert_eq!(by_default_graph[0], quad1);
379    }
380
381    #[test]
382    fn test_dataset_iteration() {
383        let mut dataset = Dataset::new();
384
385        let quad1 = create_test_quad(None);
386        let graph_name = NamedNode::new("http://example.org/graph").expect("valid IRI");
387        let quad2 = create_test_quad(Some(graph_name));
388
389        dataset.insert(quad1.clone());
390        dataset.insert(quad2.clone());
391
392        let quads: Vec<_> = dataset.iter().collect();
393        assert_eq!(quads.len(), 2);
394        assert!(quads.contains(&quad1));
395        assert!(quads.contains(&quad2));
396    }
397
398    #[test]
399    fn test_dataset_remove_graph() {
400        let mut dataset = Dataset::new();
401
402        let graph_name = NamedNode::new("http://example.org/graph").expect("valid IRI");
403        let quad = create_test_quad(Some(graph_name.clone()));
404
405        dataset.insert(quad);
406        assert_eq!(dataset.named_graph_count(), 1);
407
408        let removed_graph = dataset.remove_graph(&GraphName::NamedNode(graph_name));
409        assert!(removed_graph.is_some());
410        assert_eq!(dataset.named_graph_count(), 0);
411        assert_eq!(dataset.len(), 0);
412    }
413}