rgen_core/
graph.rs

1use anyhow::{bail, Result};
2use fxhash::FxHasher;
3use lru::LruCache;
4use oxigraph::io::RdfFormat;
5use oxigraph::model::{GraphName, NamedNode, Quad, Subject, Term};
6use oxigraph::sparql::{Query, QueryOptions, QueryResults};
7use oxigraph::store::Store;
8use serde_json::Value as JsonValue;
9use std::collections::BTreeMap;
10use std::fs::File;
11use std::hash::{Hash, Hasher};
12use std::io::BufReader;
13use std::num::NonZeroUsize;
14use std::path::Path;
15use std::sync::{
16    atomic::{AtomicU64, Ordering},
17    Arc, Mutex,
18};
19
20#[derive(Clone, Debug)]
21pub enum CachedResult {
22    Boolean(bool),
23    Solutions(Vec<BTreeMap<String, String>>),
24    Graph(Vec<String>), // Serialized triples
25}
26
27impl CachedResult {
28    /// Convert to serde_json::Value for Tera consumption
29    pub fn to_json(&self) -> JsonValue {
30        match self {
31            CachedResult::Boolean(b) => JsonValue::Bool(*b),
32            CachedResult::Solutions(rows) => {
33                let arr: Vec<JsonValue> = rows
34                    .iter()
35                    .map(|row| {
36                        let mut obj = serde_json::Map::new();
37                        for (k, v) in row {
38                            obj.insert(k.clone(), JsonValue::String(v.clone()));
39                        }
40                        JsonValue::Object(obj)
41                    })
42                    .collect();
43                JsonValue::Array(arr)
44            }
45            CachedResult::Graph(_triples) => JsonValue::String(String::new()),
46        }
47    }
48}
49
50/// Thread-safe Oxigraph wrapper with SPARQL caching. Clone is cheap (shared store).
51pub struct Graph {
52    inner: Store,
53    epoch: Arc<AtomicU64>,
54    plan_cache: Arc<Mutex<LruCache<u64, Query>>>,
55    result_cache: Arc<Mutex<LruCache<(u64, u64), CachedResult>>>,
56}
57
58impl Graph {
59    pub fn new() -> Result<Self> {
60        let plan_cache_size = NonZeroUsize::new(100)
61            .ok_or_else(|| anyhow::anyhow!("Invalid cache size"))?;
62        let result_cache_size = NonZeroUsize::new(1000)
63            .ok_or_else(|| anyhow::anyhow!("Invalid cache size"))?;
64
65        Ok(Self {
66            inner: Store::new()?,
67            epoch: Arc::new(AtomicU64::new(1)),
68            plan_cache: Arc::new(Mutex::new(LruCache::new(plan_cache_size))),
69            result_cache: Arc::new(Mutex::new(LruCache::new(result_cache_size))),
70        })
71    }
72
73    fn current_epoch(&self) -> u64 {
74        self.epoch.load(Ordering::Relaxed)
75    }
76
77    fn bump_epoch(&self) {
78        self.epoch.fetch_add(1, Ordering::Relaxed);
79    }
80
81    fn hash_query(&self, sparql: &str) -> u64 {
82        let mut hasher = FxHasher::default();
83        sparql.hash(&mut hasher);
84        hasher.finish()
85    }
86
87    fn materialize_results(&self, results: QueryResults) -> Result<CachedResult> {
88        match results {
89            QueryResults::Boolean(b) => Ok(CachedResult::Boolean(b)),
90            QueryResults::Solutions(solutions) => {
91                let mut rows = Vec::new();
92                for solution in solutions {
93                    let solution = solution?;
94                    let mut row = BTreeMap::new();
95                    for (var, term) in solution.iter() {
96                        row.insert(var.as_str().to_string(), term.to_string());
97                    }
98                    rows.push(row);
99                }
100                Ok(CachedResult::Solutions(rows))
101            }
102            QueryResults::Graph(quads) => {
103                let triples: Result<Vec<String>> = quads
104                    .map(|q| q.map(|quad| quad.to_string()).map_err(Into::into))
105                    .collect();
106                Ok(CachedResult::Graph(triples?))
107            }
108        }
109    }
110
111    pub fn insert_turtle(&self, turtle: &str) -> Result<()> {
112        self.inner
113            .load_from_reader(RdfFormat::Turtle, turtle.as_bytes())?;
114        self.bump_epoch();
115        Ok(())
116    }
117
118    pub fn insert_turtle_with_base(&self, turtle: &str, _base_iri: &str) -> Result<()> {
119        // Note: The new Oxigraph API doesn't support base IRI in load_from_reader
120        // We'll need to handle this differently or use a different approach
121        self.inner
122            .load_from_reader(RdfFormat::Turtle, turtle.as_bytes())?;
123        self.bump_epoch();
124        Ok(())
125    }
126
127    pub fn insert_turtle_in(&self, turtle: &str, _graph_iri: &str) -> Result<()> {
128        // Note: The new Oxigraph API doesn't support named graphs in load_from_reader
129        // We'll need to handle this differently or use a different approach
130        self.inner
131            .load_from_reader(RdfFormat::Turtle, turtle.as_bytes())?;
132        self.bump_epoch();
133        Ok(())
134    }
135
136    pub fn insert_quad(&self, s: &str, p: &str, o: &str) -> Result<()> {
137        let s = NamedNode::new(s)?;
138        let p = NamedNode::new(p)?;
139        let o = NamedNode::new(o)?;
140        self.inner
141            .insert(&Quad::new(s, p, o, GraphName::DefaultGraph))?;
142        self.bump_epoch();
143        Ok(())
144    }
145
146    pub fn load_path<P: AsRef<Path>>(&self, path: P) -> Result<()> {
147        let path = path.as_ref();
148        let ext = path
149            .extension()
150            .and_then(|e| e.to_str())
151            .map(|s| s.to_ascii_lowercase())
152            .unwrap_or_default();
153
154        let fmt = match ext.as_str() {
155            "ttl" | "turtle" => RdfFormat::Turtle,
156            "nt" | "ntriples" => RdfFormat::NTriples,
157            "rdf" | "xml" => RdfFormat::RdfXml,
158            other => bail!("unsupported RDF format: {}", other),
159        };
160
161        let file = File::open(path)?;
162        let reader = BufReader::new(file);
163        self.inner.load_from_reader(fmt, reader)?;
164        self.bump_epoch();
165        Ok(())
166    }
167
168    pub fn query_cached(&self, sparql: &str) -> Result<CachedResult> {
169        let query_hash = self.hash_query(sparql);
170        let epoch = self.current_epoch();
171        let cache_key = (query_hash, epoch);
172
173        // Check result cache
174        if let Some(cached) = self.result_cache
175            .lock()
176            .map_err(|e| anyhow::anyhow!("Cache lock poisoned: {}", e))?
177            .get(&cache_key)
178            .cloned()
179        {
180            return Ok(cached);
181        }
182
183        // Check plan cache or parse
184        let query = {
185            let mut cache = self.plan_cache
186                .lock()
187                .map_err(|e| anyhow::anyhow!("Cache lock poisoned: {}", e))?;
188            if let Some(q) = cache.get(&query_hash).cloned() {
189                q
190            } else {
191                let q = Query::parse(sparql, None)?;
192                cache.put(query_hash, q.clone());
193                q
194            }
195        };
196
197        // Execute and materialize
198        let results = self.inner.query_opt(query, QueryOptions::default())?;
199        let cached = self.materialize_results(results)?;
200
201        // Store in cache
202        self.result_cache
203            .lock()
204            .map_err(|e| anyhow::anyhow!("Cache lock poisoned: {}", e))?
205            .put(cache_key, cached.clone());
206
207        Ok(cached)
208    }
209
210    pub fn query(&self, sparql: &str) -> Result<QueryResults> {
211        // For backward compatibility, we need to reconstruct QueryResults
212        // This is inefficient but maintains API compatibility
213        let cached = self.query_cached(sparql)?;
214
215        match cached {
216            CachedResult::Boolean(b) => Ok(QueryResults::Boolean(b)),
217            CachedResult::Solutions(_) | CachedResult::Graph(_) => {
218                // Fall back to direct query for non-boolean results
219                // since we can't reconstruct the iterator properly
220                Ok(self.inner.query(sparql)?)
221            }
222        }
223    }
224
225    pub fn query_with_prolog(
226        &self, sparql: &str, prefixes: &BTreeMap<String, String>, base: Option<&str>,
227    ) -> Result<QueryResults> {
228        let head = build_prolog(prefixes, base);
229        let q = if head.is_empty() {
230            sparql.into()
231        } else {
232            format!("{head}\n{sparql}")
233        };
234        self.query(&q)
235    }
236
237    pub fn query_prepared(&self, q: &Query) -> Result<QueryResults> {
238        Ok(self.inner.query_opt(q.clone(), QueryOptions::default())?)
239    }
240
241    /// Typed pattern filter (no extra allocs).
242    pub fn quads_for_pattern(
243        &self, s: Option<&Subject>, p: Option<&NamedNode>, o: Option<&Term>, g: Option<&GraphName>,
244    ) -> Result<Vec<Quad>> {
245        Ok(self
246            .inner
247            .quads_for_pattern(
248                s.map(|x| x.as_ref()),
249                p.map(|x| x.as_ref()),
250                o.map(|x| x.as_ref()),
251                g.map(|x| x.as_ref()),
252            )
253            .collect::<Result<Vec<_>, _>>()?)
254    }
255
256    pub fn clear(&self) -> Result<()> {
257        self.inner.clear()?;
258        self.bump_epoch();
259        Ok(())
260    }
261
262    pub fn len(&self) -> usize {
263        #[allow(deprecated)]
264        {
265            self.inner.len().unwrap_or(0)
266        }
267    }
268
269    pub fn is_empty(&self) -> bool {
270        self.len() == 0
271    }
272}
273
274impl Clone for Graph {
275    fn clone(&self) -> Self {
276        Self {
277            inner: self.inner.clone(),
278            epoch: Arc::clone(&self.epoch),
279            plan_cache: Arc::clone(&self.plan_cache),
280            result_cache: Arc::clone(&self.result_cache),
281        }
282    }
283}
284
285pub fn build_prolog(prefixes: &BTreeMap<String, String>, base: Option<&str>) -> String {
286    let mut s = String::new();
287    if let Some(b) = base {
288        let _ = std::fmt::Write::write_fmt(&mut s, format_args!("BASE <{}>\n", b));
289    }
290    for (pfx, iri) in prefixes {
291        let _ = std::fmt::Write::write_fmt(&mut s, format_args!("PREFIX {}: <{}>\n", pfx, iri));
292    }
293    s
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299    use oxigraph::model::NamedNode;
300
301    #[test]
302    fn insert_turtle_and_query() -> Result<()> {
303        let g = Graph::new()?;
304        let ttl = r#"
305            @prefix ex: <http://example.org/> .
306            ex:alice ex:knows ex:bob .
307        "#;
308        g.insert_turtle(ttl)?;
309
310        let res = g.query("SELECT ?s WHERE { ?s ?p ?o }")?;
311        if let QueryResults::Solutions(mut it) = res {
312            let first = it.next().unwrap().unwrap();
313            let s = first.get("s").unwrap().to_string();
314            assert_eq!(s, "<http://example.org/alice>");
315        } else {
316            return Err(anyhow::anyhow!("Expected Solutions results"));
317        }
318        Ok(())
319    }
320
321    #[test]
322    fn insert_quad_and_filter() -> Result<()> {
323        let g = Graph::new()?;
324        g.insert_quad(
325            "http://example.org/A",
326            "http://example.org/rel",
327            "http://example.org/B",
328        )?;
329        let a = NamedNode::new("http://example.org/A")?;
330        let list = g.quads_for_pattern(Some(&a.into()), None, None, None)?;
331        assert_eq!(list.len(), 1);
332        Ok(())
333    }
334
335    #[test]
336    fn insert_turtle_with_base() -> Result<()> {
337        let g = Graph::new()?;
338        let ttl = r#"
339            @prefix ex: <http://example.org/> .
340            ex:alice ex:knows ex:bob .
341        "#;
342        g.insert_turtle_with_base(ttl, "http://example.org/")?;
343
344        let res = g.query("SELECT ?s WHERE { ?s ?p ?o }")?;
345        if let QueryResults::Solutions(mut it) = res {
346            let first = it.next().unwrap().unwrap();
347            let s = first.get("s").unwrap().to_string();
348            assert_eq!(s, "<http://example.org/alice>");
349        } else {
350            return Err(anyhow::anyhow!("Expected Solutions results"));
351        }
352        Ok(())
353    }
354
355    #[test]
356    fn query_with_prolog_works() -> Result<()> {
357        let g = Graph::new()?;
358        g.insert_turtle("@prefix ex: <http://example/> . ex:x a ex:T .")?;
359        let mut p = BTreeMap::new();
360        p.insert("ex".to_string(), "http://example/".to_string());
361        let q = "SELECT ?s WHERE { ?s a ex:T }";
362        let res = g.query_with_prolog(q, &p, None)?;
363        if let QueryResults::Solutions(mut it) = res {
364            let first = it.next().unwrap().unwrap();
365            let s = first.get("s").unwrap().to_string();
366            assert_eq!(s, "<http://example/x>");
367        } else {
368            return Err(anyhow::anyhow!("Expected Solutions results"));
369        }
370        Ok(())
371    }
372
373    #[test]
374    fn test_cached_result_to_json() {
375        // Test Boolean variant
376        let bool_result = CachedResult::Boolean(true);
377        let json = bool_result.to_json();
378        assert_eq!(json, JsonValue::Bool(true));
379
380        // Test Solutions variant
381        let mut solutions = Vec::new();
382        let mut row = BTreeMap::new();
383        row.insert("name".to_string(), "Alice".to_string());
384        row.insert("age".to_string(), "30".to_string());
385        solutions.push(row);
386        let solutions_result = CachedResult::Solutions(solutions);
387        let json = solutions_result.to_json();
388
389        if let JsonValue::Array(arr) = json {
390            assert_eq!(arr.len(), 1);
391            if let JsonValue::Object(obj) = &arr[0] {
392                assert_eq!(obj.get("name").unwrap(), "Alice");
393                assert_eq!(obj.get("age").unwrap(), "30");
394            } else {
395                panic!("Expected object in array");
396            }
397        } else {
398            panic!("Expected array");
399        }
400
401        // Test Graph variant
402        let graph_result = CachedResult::Graph(vec!["<http://example.org/subject> <http://example.org/predicate> <http://example.org/object> .".to_string()]);
403        let json = graph_result.to_json();
404        assert_eq!(json, JsonValue::String(String::new()));
405    }
406
407    #[test]
408    fn test_graph_creation_and_basic_properties() -> Result<()> {
409        let g = Graph::new()?;
410        assert!(g.is_empty());
411        assert_eq!(g.len(), 0);
412        assert_eq!(g.current_epoch(), 1); // Epoch starts at 1
413
414        // Test epoch bumping
415        g.bump_epoch();
416        assert_eq!(g.current_epoch(), 2);
417
418        Ok(())
419    }
420
421    #[test]
422    fn test_insert_turtle_in() -> Result<()> {
423        let g = Graph::new()?;
424        let ttl = r#"
425            @prefix ex: <http://example.org/> .
426            ex:alice ex:knows ex:bob .
427        "#;
428        g.insert_turtle_in(ttl, "http://example.org/graph1")?;
429
430        // Should have inserted the triple
431        assert!(!g.is_empty());
432        assert_eq!(g.len(), 1);
433
434        Ok(())
435    }
436
437    #[test]
438    fn test_query_cached() -> Result<()> {
439        let g = Graph::new()?;
440        let ttl = r#"
441            @prefix ex: <http://example.org/> .
442            ex:alice ex:knows ex:bob .
443            ex:bob ex:knows ex:charlie .
444        "#;
445        g.insert_turtle(ttl)?;
446
447        // First query should execute and cache
448        let result1 = g.query_cached("SELECT ?s WHERE { ?s <http://example.org/knows> ?o }")?;
449
450        // Second query should use cache
451        let result2 = g.query_cached("SELECT ?s WHERE { ?s <http://example.org/knows> ?o }")?;
452
453        // Results should be identical
454        match (&result1, &result2) {
455            (CachedResult::Solutions(sol1), CachedResult::Solutions(sol2)) => {
456                assert_eq!(sol1.len(), sol2.len());
457                assert_eq!(sol1.len(), 2); // alice->bob, bob->charlie
458            }
459            _ => return Err(anyhow::anyhow!("Expected Solutions results")),
460        }
461
462        Ok(())
463    }
464
465    #[test]
466    fn test_query_prepared() -> Result<()> {
467        let g = Graph::new()?;
468        let ttl = r#"
469            @prefix ex: <http://example.org/> .
470            ex:alice ex:knows ex:bob .
471        "#;
472        g.insert_turtle(ttl)?;
473
474        let query = Query::parse("SELECT ?s WHERE { ?s ?p ?o }", None)?;
475        let results = g.query_prepared(&query)?;
476
477        if let QueryResults::Solutions(mut it) = results {
478            let first = it.next().unwrap().unwrap();
479            let s = first.get("s").unwrap().to_string();
480            assert_eq!(s, "<http://example.org/alice>");
481        } else {
482            return Err(anyhow::anyhow!("Expected Solutions results"));
483        }
484
485        Ok(())
486    }
487
488    #[test]
489    fn test_quads_for_pattern() -> Result<()> {
490        let g = Graph::new()?;
491        g.insert_quad(
492            "http://example.org/A",
493            "http://example.org/rel",
494            "http://example.org/B",
495        )?;
496        g.insert_quad(
497            "http://example.org/A",
498            "http://example.org/rel2",
499            "http://example.org/C",
500        )?;
501
502        let a = NamedNode::new("http://example.org/A")?;
503        let rel = NamedNode::new("http://example.org/rel")?;
504
505        // Test filtering by subject only
506        let quads = g.quads_for_pattern(Some(&a.clone().into()), None, None, None)?;
507        assert_eq!(quads.len(), 2);
508
509        // Test filtering by subject and predicate
510        let quads = g.quads_for_pattern(Some(&a.into()), Some(&rel.into()), None, None)?;
511        assert_eq!(quads.len(), 1);
512
513        Ok(())
514    }
515
516    #[test]
517    fn test_clear() -> Result<()> {
518        let g = Graph::new()?;
519        g.insert_quad(
520            "http://example.org/A",
521            "http://example.org/rel",
522            "http://example.org/B",
523        )?;
524
525        assert!(!g.is_empty());
526        assert_eq!(g.len(), 1);
527
528        g.clear()?;
529
530        assert!(g.is_empty());
531        assert_eq!(g.len(), 0);
532
533        Ok(())
534    }
535
536    #[test]
537    fn test_hash_query() -> Result<()> {
538        let g = Graph::new()?;
539
540        let hash1 = g.hash_query("SELECT ?s WHERE { ?s ?p ?o }");
541        let hash2 = g.hash_query("SELECT ?s WHERE { ?s ?p ?o }");
542        let hash3 = g.hash_query("SELECT ?o WHERE { ?s ?p ?o }");
543
544        // Same query should produce same hash
545        assert_eq!(hash1, hash2);
546
547        // Different query should produce different hash
548        assert_ne!(hash1, hash3);
549
550        Ok(())
551    }
552
553    #[test]
554    fn test_materialize_results() -> Result<()> {
555        let g = Graph::new()?;
556        let ttl = r#"
557            @prefix ex: <http://example.org/> .
558            ex:alice ex:knows ex:bob .
559        "#;
560        g.insert_turtle(ttl)?;
561
562        let query = "SELECT ?s WHERE { ?s ?p ?o }";
563        let results = g.query(query)?;
564
565        let cached = g.materialize_results(results)?;
566
567        match cached {
568            CachedResult::Solutions(solutions) => {
569                assert_eq!(solutions.len(), 1);
570                let row = &solutions[0];
571                assert_eq!(row.get("s").unwrap(), "<http://example.org/alice>");
572            }
573            _ => return Err(anyhow::anyhow!("Expected Solutions result")),
574        }
575
576        Ok(())
577    }
578
579    #[test]
580    fn test_build_prolog() {
581        let mut prefixes = BTreeMap::new();
582        prefixes.insert("ex".to_string(), "http://example.org/".to_string());
583        prefixes.insert(
584            "rdf".to_string(),
585            "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
586        );
587
588        let prolog = build_prolog(&prefixes, Some("http://example.org/base"));
589
590        assert!(prolog.contains("PREFIX ex: <http://example.org/>"));
591        assert!(prolog.contains("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"));
592        assert!(prolog.contains("BASE <http://example.org/base>"));
593    }
594
595    #[test]
596    fn test_build_prolog_no_base() {
597        let mut prefixes = BTreeMap::new();
598        prefixes.insert("ex".to_string(), "http://example.org/".to_string());
599
600        let prolog = build_prolog(&prefixes, None);
601
602        assert!(prolog.contains("PREFIX ex: <http://example.org/>"));
603        assert!(!prolog.contains("BASE"));
604    }
605
606    #[test]
607    fn test_clone_graph() -> Result<()> {
608        let g1 = Graph::new()?;
609        g1.insert_quad(
610            "http://example.org/A",
611            "http://example.org/rel",
612            "http://example.org/B",
613        )?;
614
615        let g2 = g1.clone();
616
617        // Both should have the same data
618        assert_eq!(g1.len(), g2.len());
619        assert_eq!(g1.is_empty(), g2.is_empty());
620
621        // Both should be able to query the same data
622        let results1 = g1.query("SELECT ?s WHERE { ?s ?p ?o }")?;
623        let results2 = g2.query("SELECT ?s WHERE { ?s ?p ?o }")?;
624
625        // Results should be identical
626        match (results1, results2) {
627            (QueryResults::Solutions(mut it1), QueryResults::Solutions(mut it2)) => {
628                let row1 = it1.next().unwrap().unwrap();
629                let row2 = it2.next().unwrap().unwrap();
630                assert_eq!(row1.get("s").unwrap(), row2.get("s").unwrap());
631            }
632            _ => return Err(anyhow::anyhow!("Expected Solutions results")),
633        }
634
635        Ok(())
636    }
637}