Skip to main content

reddb_rql/modes/
sparql.rs

1//! SPARQL Parser
2//!
3//! Parses W3C SPARQL-like queries for RDF-style graph patterns:
4//! - `SELECT ?host ?ip WHERE { ?host :hasIP ?ip }`
5//! - `PREFIX ex: <http://example.org/> SELECT ?x WHERE { ?x ex:type ?t }`
6//!
7//! # Supported Features
8//!
9//! - SELECT queries with variables (?var)
10//! - WHERE clause with triple patterns
11//! - PREFIX declarations
12//! - FILTER expressions
13//! - OPTIONAL patterns
14//! - LIMIT and OFFSET
15//!
16//! # Mapping to Graph Model
17//!
18//! SPARQL triple patterns map to our graph model:
19//! - Subject → Node
20//! - Predicate → Edge type
21//! - Object → Node or literal value
22
23use crate::ast::{
24    CompareOp, EdgeDirection, EdgePattern, FieldRef, Filter, GraphPattern, GraphQuery, NodePattern,
25    Projection, QueryExpr,
26};
27use reddb_types::types::Value;
28use std::collections::HashMap;
29
30/// SPARQL parse error
31#[derive(Debug, Clone)]
32pub struct SparqlError {
33    pub message: String,
34    pub position: usize,
35}
36
37impl std::fmt::Display for SparqlError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        write!(f, "SPARQL error at {}: {}", self.position, self.message)
40    }
41}
42
43impl std::error::Error for SparqlError {}
44
45/// A SPARQL query
46#[derive(Debug, Clone)]
47pub struct SparqlQuery {
48    /// PREFIX declarations
49    pub prefixes: HashMap<String, String>,
50    /// Selected variables
51    pub select: Vec<String>,
52    /// SELECT DISTINCT
53    pub distinct: bool,
54    /// WHERE clause patterns
55    pub where_patterns: Vec<TriplePattern>,
56    /// FILTER expressions
57    pub filters: Vec<SparqlFilter>,
58    /// OPTIONAL patterns
59    pub optionals: Vec<Vec<TriplePattern>>,
60    /// ORDER BY
61    pub order_by: Vec<(String, bool)>, // (var, ascending)
62    /// LIMIT
63    pub limit: Option<u64>,
64    /// OFFSET
65    pub offset: Option<u64>,
66}
67
68/// A triple pattern (subject, predicate, object)
69#[derive(Debug, Clone)]
70pub struct TriplePattern {
71    pub subject: SparqlTerm,
72    pub predicate: SparqlTerm,
73    pub object: SparqlTerm,
74}
75
76/// A term in a triple pattern
77#[derive(Debug, Clone)]
78pub enum SparqlTerm {
79    /// Variable: ?name
80    Variable(String),
81    /// Prefixed IRI: prefix:local
82    PrefixedName(String, String),
83    /// Full IRI: <http://...>
84    Iri(String),
85    /// Literal string
86    Literal(String),
87    /// Typed literal
88    TypedLiteral(String, String),
89    /// Numeric literal
90    Number(f64),
91    /// Boolean
92    Boolean(bool),
93    /// Shorthand predicate 'a' for rdf:type
94    A,
95}
96
97/// SPARQL filter expression
98#[derive(Debug, Clone)]
99pub enum SparqlFilter {
100    /// Comparison: ?x = value
101    Compare(String, CompareOp, SparqlTerm),
102    /// REGEX filter
103    Regex(String, String, Option<String>),
104    /// BOUND(?var)
105    Bound(String),
106    /// !BOUND(?var)
107    NotBound(String),
108    /// isIRI(?var)
109    IsIri(String),
110    /// isLiteral(?var)
111    IsLiteral(String),
112    /// CONTAINS(?var, 'text')
113    Contains(String, String),
114    /// STRSTARTS(?var, 'prefix')
115    StrStarts(String, String),
116    /// STRENDS(?var, 'suffix')
117    StrEnds(String, String),
118    /// AND
119    And(Box<SparqlFilter>, Box<SparqlFilter>),
120    /// OR
121    Or(Box<SparqlFilter>, Box<SparqlFilter>),
122    /// NOT
123    Not(Box<SparqlFilter>),
124}
125
126/// SPARQL parser
127pub struct SparqlParser<'a> {
128    input: &'a str,
129    pos: usize,
130}
131
132impl<'a> SparqlParser<'a> {
133    /// Create a new parser
134    pub fn new(input: &'a str) -> Self {
135        Self { input, pos: 0 }
136    }
137
138    /// Parse a SPARQL query string
139    pub fn parse(input: &str) -> Result<SparqlQuery, SparqlError> {
140        let mut parser = SparqlParser::new(input);
141        parser.parse_query()
142    }
143
144    /// Parse a full query
145    fn parse_query(&mut self) -> Result<SparqlQuery, SparqlError> {
146        let mut query = SparqlQuery {
147            prefixes: HashMap::new(),
148            select: Vec::new(),
149            distinct: false,
150            where_patterns: Vec::new(),
151            filters: Vec::new(),
152            optionals: Vec::new(),
153            order_by: Vec::new(),
154            limit: None,
155            offset: None,
156        };
157
158        // Parse PREFIX declarations
159        while self.peek_keyword("PREFIX") {
160            self.consume_keyword("PREFIX")?;
161            let prefix = self.parse_prefix_name()?;
162            self.expect(':')?;
163            let iri = self.parse_iri()?;
164            query.prefixes.insert(prefix, iri);
165        }
166
167        // Parse SELECT
168        self.consume_keyword("SELECT")?;
169
170        // Check for DISTINCT
171        if self.peek_keyword("DISTINCT") {
172            self.consume_keyword("DISTINCT")?;
173            query.distinct = true;
174        }
175
176        // Parse selected variables or *
177        if self.consume_if("*") {
178            query.select.push("*".to_string());
179        } else {
180            loop {
181                self.skip_whitespace();
182                if self.peek() != Some('?') && self.peek() != Some('$') {
183                    break;
184                }
185                let var = self.parse_variable()?;
186                query.select.push(var);
187            }
188        }
189
190        // Parse WHERE clause
191        self.consume_keyword("WHERE")?;
192        self.expect('{')?;
193
194        // Parse patterns inside WHERE
195        self.parse_where_body(&mut query)?;
196
197        self.expect('}')?;
198
199        // Parse optional modifiers
200        while !self.is_at_end() {
201            self.skip_whitespace();
202
203            if self.peek_keyword("ORDER") {
204                self.consume_keyword("ORDER")?;
205                self.consume_keyword("BY")?;
206
207                loop {
208                    self.skip_whitespace();
209                    let ascending = if self.peek_keyword("DESC") {
210                        self.consume_keyword("DESC")?;
211                        self.expect('(')?;
212                        let var = self.parse_variable()?;
213                        self.expect(')')?;
214                        query.order_by.push((var, false));
215                        false
216                    } else if self.peek_keyword("ASC") {
217                        self.consume_keyword("ASC")?;
218                        self.expect('(')?;
219                        let var = self.parse_variable()?;
220                        self.expect(')')?;
221                        query.order_by.push((var, true));
222                        true
223                    } else if self.peek() == Some('?') || self.peek() == Some('$') {
224                        let var = self.parse_variable()?;
225                        query.order_by.push((var, true));
226                        true
227                    } else {
228                        break;
229                    };
230                    let _ = ascending;
231                }
232            } else if self.peek_keyword("FILTER") {
233                // FILTER can also appear after WHERE clause
234                self.consume_keyword("FILTER")?;
235                let filter = self.parse_filter()?;
236                query.filters.push(filter);
237            } else if self.peek_keyword("LIMIT") {
238                self.consume_keyword("LIMIT")?;
239                query.limit = Some(self.parse_integer()? as u64);
240            } else if self.peek_keyword("OFFSET") {
241                self.consume_keyword("OFFSET")?;
242                query.offset = Some(self.parse_integer()? as u64);
243            } else {
244                break;
245            }
246        }
247
248        Ok(query)
249    }
250
251    /// Parse the body of a WHERE clause
252    fn parse_where_body(&mut self, query: &mut SparqlQuery) -> Result<(), SparqlError> {
253        loop {
254            self.skip_whitespace();
255
256            if self.peek() == Some('}') {
257                break;
258            }
259
260            // Check for OPTIONAL
261            if self.peek_keyword("OPTIONAL") {
262                self.consume_keyword("OPTIONAL")?;
263                self.expect('{')?;
264                let mut optional_patterns = Vec::new();
265                self.parse_patterns(&mut optional_patterns)?;
266                self.expect('}')?;
267                query.optionals.push(optional_patterns);
268                continue;
269            }
270
271            // Check for FILTER
272            if self.peek_keyword("FILTER") {
273                self.consume_keyword("FILTER")?;
274                let filter = self.parse_filter()?;
275                query.filters.push(filter);
276                continue;
277            }
278
279            // Parse triple pattern
280            if let Ok(pattern) = self.parse_triple_pattern() {
281                query.where_patterns.push(pattern);
282
283                // Optional dot separator
284                self.skip_whitespace();
285                self.consume_if(".");
286            } else {
287                break;
288            }
289        }
290
291        Ok(())
292    }
293
294    /// Parse patterns into a vector
295    fn parse_patterns(&mut self, patterns: &mut Vec<TriplePattern>) -> Result<(), SparqlError> {
296        loop {
297            self.skip_whitespace();
298
299            if self.peek() == Some('}') {
300                break;
301            }
302
303            if let Ok(pattern) = self.parse_triple_pattern() {
304                patterns.push(pattern);
305                self.skip_whitespace();
306                self.consume_if(".");
307            } else {
308                break;
309            }
310        }
311        Ok(())
312    }
313
314    /// Parse a triple pattern
315    fn parse_triple_pattern(&mut self) -> Result<TriplePattern, SparqlError> {
316        self.skip_whitespace();
317        let subject = self.parse_term()?;
318
319        self.skip_whitespace();
320        let predicate = self.parse_term()?;
321
322        self.skip_whitespace();
323        let object = self.parse_term()?;
324
325        Ok(TriplePattern {
326            subject,
327            predicate,
328            object,
329        })
330    }
331
332    /// Parse a single term
333    fn parse_term(&mut self) -> Result<SparqlTerm, SparqlError> {
334        self.skip_whitespace();
335
336        // Variable
337        if self.peek() == Some('?') || self.peek() == Some('$') {
338            return Ok(SparqlTerm::Variable(self.parse_variable()?));
339        }
340
341        // Full IRI
342        if self.peek() == Some('<') {
343            return Ok(SparqlTerm::Iri(self.parse_iri()?));
344        }
345
346        // String literal
347        if self.peek() == Some('"') || self.peek() == Some('\'') {
348            let lit = self.parse_string()?;
349
350            // Check for type annotation
351            self.skip_whitespace();
352            if self.consume_if("^^") {
353                let datatype = self.parse_term()?;
354                if let SparqlTerm::Iri(dt) | SparqlTerm::PrefixedName(_, dt) = &datatype {
355                    return Ok(SparqlTerm::TypedLiteral(lit, dt.clone()));
356                }
357            }
358
359            return Ok(SparqlTerm::Literal(lit));
360        }
361
362        // Number
363        if self
364            .peek()
365            .map(|c| c.is_ascii_digit() || c == '-' || c == '+')
366            .unwrap_or(false)
367        {
368            return Ok(SparqlTerm::Number(self.parse_number()?));
369        }
370
371        // Boolean
372        if self.peek_keyword("true") {
373            self.consume_keyword("true")?;
374            return Ok(SparqlTerm::Boolean(true));
375        }
376        if self.peek_keyword("false") {
377            self.consume_keyword("false")?;
378            return Ok(SparqlTerm::Boolean(false));
379        }
380
381        // 'a' shorthand for rdf:type
382        if self.peek() == Some('a') {
383            let next = self.input.get(self.pos + 1..self.pos + 2);
384            if next
385                .map(|s| {
386                    s.chars()
387                        .next()
388                        .map(|c| !c.is_alphanumeric())
389                        .unwrap_or(true)
390                })
391                .unwrap_or(true)
392            {
393                self.pos += 1;
394                return Ok(SparqlTerm::A);
395            }
396        }
397
398        // Prefixed name: prefix:local
399        let prefix = self.parse_prefix_name()?;
400        if self.consume_if(":") {
401            let local = self.parse_local_name()?;
402            return Ok(SparqlTerm::PrefixedName(prefix, local));
403        }
404
405        // Just a local name with empty prefix
406        Ok(SparqlTerm::PrefixedName(String::new(), prefix))
407    }
408
409    /// Parse a FILTER expression
410    fn parse_filter(&mut self) -> Result<SparqlFilter, SparqlError> {
411        self.skip_whitespace();
412        self.expect('(')?;
413        let filter = self.parse_filter_expr()?;
414        self.expect(')')?;
415        Ok(filter)
416    }
417
418    /// Parse filter expression inside parentheses
419    fn parse_filter_expr(&mut self) -> Result<SparqlFilter, SparqlError> {
420        self.skip_whitespace();
421
422        // NOT
423        if self.peek() == Some('!') {
424            self.pos += 1;
425            let inner = self.parse_filter_expr()?;
426            return Ok(SparqlFilter::Not(Box::new(inner)));
427        }
428
429        // Function-style filters
430        if self.peek_keyword("BOUND") {
431            self.consume_keyword("BOUND")?;
432            self.expect('(')?;
433            let var = self.parse_variable()?;
434            self.expect(')')?;
435            return Ok(SparqlFilter::Bound(var));
436        }
437
438        if self.peek_keyword("isIRI") || self.peek_keyword("isURI") {
439            self.skip_identifier();
440            self.expect('(')?;
441            let var = self.parse_variable()?;
442            self.expect(')')?;
443            return Ok(SparqlFilter::IsIri(var));
444        }
445
446        if self.peek_keyword("isLiteral") {
447            self.consume_keyword("isLiteral")?;
448            self.expect('(')?;
449            let var = self.parse_variable()?;
450            self.expect(')')?;
451            return Ok(SparqlFilter::IsLiteral(var));
452        }
453
454        if self.peek_keyword("CONTAINS") {
455            self.consume_keyword("CONTAINS")?;
456            self.expect('(')?;
457            let var = self.parse_variable()?;
458            self.expect(',')?;
459            let pattern = self.parse_string()?;
460            self.expect(')')?;
461            return Ok(SparqlFilter::Contains(var, pattern));
462        }
463
464        if self.peek_keyword("STRSTARTS") {
465            self.consume_keyword("STRSTARTS")?;
466            self.expect('(')?;
467            let var = self.parse_variable()?;
468            self.expect(',')?;
469            let pattern = self.parse_string()?;
470            self.expect(')')?;
471            return Ok(SparqlFilter::StrStarts(var, pattern));
472        }
473
474        if self.peek_keyword("STRENDS") {
475            self.consume_keyword("STRENDS")?;
476            self.expect('(')?;
477            let var = self.parse_variable()?;
478            self.expect(',')?;
479            let pattern = self.parse_string()?;
480            self.expect(')')?;
481            return Ok(SparqlFilter::StrEnds(var, pattern));
482        }
483
484        if self.peek_keyword("REGEX") {
485            self.consume_keyword("REGEX")?;
486            self.expect('(')?;
487            let var = self.parse_variable()?;
488            self.expect(',')?;
489            let pattern = self.parse_string()?;
490            let flags = if self.consume_if(",") {
491                Some(self.parse_string()?)
492            } else {
493                None
494            };
495            self.expect(')')?;
496            return Ok(SparqlFilter::Regex(var, pattern, flags));
497        }
498
499        // Comparison expression: ?var op value
500        if self.peek() == Some('?') || self.peek() == Some('$') {
501            let var = self.parse_variable()?;
502            self.skip_whitespace();
503
504            let op = if self.consume_if("=") {
505                CompareOp::Eq
506            } else if self.consume_if("!=") {
507                CompareOp::Ne
508            } else if self.consume_if("<=") {
509                CompareOp::Le
510            } else if self.consume_if(">=") {
511                CompareOp::Ge
512            } else if self.consume_if("<") {
513                CompareOp::Lt
514            } else if self.consume_if(">") {
515                CompareOp::Gt
516            } else {
517                return Err(self.error("Expected comparison operator"));
518            };
519
520            self.skip_whitespace();
521            let value = self.parse_term()?;
522
523            return Ok(SparqlFilter::Compare(var, op, value));
524        }
525
526        Err(self.error("Invalid filter expression"))
527    }
528
529    // Helper methods
530
531    fn skip_whitespace(&mut self) {
532        while let Some(c) = self.peek() {
533            if c.is_whitespace() {
534                self.pos += 1;
535            } else if c == '#' {
536                // Skip comment
537                while let Some(c) = self.peek() {
538                    self.pos += 1;
539                    if c == '\n' {
540                        break;
541                    }
542                }
543            } else {
544                break;
545            }
546        }
547    }
548
549    fn peek(&self) -> Option<char> {
550        self.input[self.pos..].chars().next()
551    }
552
553    fn is_at_end(&self) -> bool {
554        self.pos >= self.input.len()
555    }
556
557    fn consume_if(&mut self, s: &str) -> bool {
558        self.skip_whitespace();
559        if self.input[self.pos..].starts_with(s) {
560            self.pos += s.len();
561            true
562        } else {
563            false
564        }
565    }
566
567    fn expect(&mut self, c: char) -> Result<(), SparqlError> {
568        self.skip_whitespace();
569        if self.peek() == Some(c) {
570            self.pos += 1;
571            Ok(())
572        } else {
573            Err(self.error(&format!("Expected '{}', found {:?}", c, self.peek())))
574        }
575    }
576
577    fn peek_keyword(&self, keyword: &str) -> bool {
578        let remaining = &self.input[self.pos..].trim_start();
579        if remaining.len() >= keyword.len() {
580            let word = &remaining[..keyword.len()];
581            word.eq_ignore_ascii_case(keyword)
582                && remaining
583                    .chars()
584                    .nth(keyword.len())
585                    .map(|c| !c.is_alphanumeric())
586                    .unwrap_or(true)
587        } else {
588            false
589        }
590    }
591
592    fn consume_keyword(&mut self, keyword: &str) -> Result<(), SparqlError> {
593        self.skip_whitespace();
594        if self.peek_keyword(keyword) {
595            self.pos += self.input[self.pos..].len() - self.input[self.pos..].trim_start().len();
596            self.pos += keyword.len();
597            Ok(())
598        } else {
599            Err(self.error(&format!("Expected keyword '{}'", keyword)))
600        }
601    }
602
603    fn skip_identifier(&mut self) {
604        while let Some(c) = self.peek() {
605            if c.is_alphanumeric() || c == '_' {
606                self.pos += 1;
607            } else {
608                break;
609            }
610        }
611    }
612
613    fn parse_variable(&mut self) -> Result<String, SparqlError> {
614        self.skip_whitespace();
615        if self.peek() != Some('?') && self.peek() != Some('$') {
616            return Err(self.error("Expected variable starting with ? or $"));
617        }
618        self.pos += 1;
619
620        let start = self.pos;
621        while let Some(c) = self.peek() {
622            if c.is_alphanumeric() || c == '_' {
623                self.pos += 1;
624            } else {
625                break;
626            }
627        }
628
629        Ok(self.input[start..self.pos].to_string())
630    }
631
632    fn parse_prefix_name(&mut self) -> Result<String, SparqlError> {
633        self.skip_whitespace();
634        let start = self.pos;
635        while let Some(c) = self.peek() {
636            if c.is_alphanumeric() || c == '_' || c == '-' {
637                self.pos += 1;
638            } else {
639                break;
640            }
641        }
642        Ok(self.input[start..self.pos].to_string())
643    }
644
645    fn parse_local_name(&mut self) -> Result<String, SparqlError> {
646        let start = self.pos;
647        while let Some(c) = self.peek() {
648            if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' {
649                self.pos += 1;
650            } else {
651                break;
652            }
653        }
654        Ok(self.input[start..self.pos].to_string())
655    }
656
657    fn parse_iri(&mut self) -> Result<String, SparqlError> {
658        self.skip_whitespace();
659        self.expect('<')?;
660        let start = self.pos;
661        while let Some(c) = self.peek() {
662            if c == '>' {
663                let iri = self.input[start..self.pos].to_string();
664                self.pos += 1;
665                return Ok(iri);
666            }
667            self.pos += 1;
668        }
669        Err(self.error("Unterminated IRI"))
670    }
671
672    fn parse_string(&mut self) -> Result<String, SparqlError> {
673        self.skip_whitespace();
674        let quote = self.peek();
675        if quote != Some('"') && quote != Some('\'') {
676            return Err(self.error("Expected string"));
677        }
678        self.pos += 1;
679
680        let start = self.pos;
681        while let Some(c) = self.peek() {
682            if Some(c) == quote {
683                let s = self.input[start..self.pos].to_string();
684                self.pos += 1;
685                return Ok(s);
686            }
687            if c == '\\' {
688                self.pos += 2;
689            } else {
690                self.pos += 1;
691            }
692        }
693        Err(self.error("Unterminated string"))
694    }
695
696    fn parse_integer(&mut self) -> Result<i64, SparqlError> {
697        self.skip_whitespace();
698        let start = self.pos;
699        if self.peek() == Some('-') || self.peek() == Some('+') {
700            self.pos += 1;
701        }
702        while let Some(c) = self.peek() {
703            if c.is_ascii_digit() {
704                self.pos += 1;
705            } else {
706                break;
707            }
708        }
709        let s = &self.input[start..self.pos];
710        s.parse()
711            .map_err(|_| self.error(&format!("Invalid integer: {}", s)))
712    }
713
714    fn parse_number(&mut self) -> Result<f64, SparqlError> {
715        self.skip_whitespace();
716        let start = self.pos;
717        if self.peek() == Some('-') || self.peek() == Some('+') {
718            self.pos += 1;
719        }
720        while let Some(c) = self.peek() {
721            if c.is_ascii_digit() || c == '.' || c == 'e' || c == 'E' {
722                self.pos += 1;
723            } else {
724                break;
725            }
726        }
727        let s = &self.input[start..self.pos];
728        s.parse()
729            .map_err(|_| self.error(&format!("Invalid number: {}", s)))
730    }
731
732    fn error(&self, message: &str) -> SparqlError {
733        SparqlError {
734            message: message.to_string(),
735            position: self.pos,
736        }
737    }
738}
739
740impl SparqlQuery {
741    /// Convert SPARQL query to QueryExpr
742    pub fn to_query_expr(&self) -> QueryExpr {
743        let mut nodes: Vec<NodePattern> = Vec::new();
744        let mut edges: Vec<EdgePattern> = Vec::new();
745        let mut filters: Vec<Filter> = Vec::new();
746        let mut var_to_alias: HashMap<String, String> = HashMap::new();
747        let mut alias_counter = 0;
748
749        // Helper to get or create alias for a variable
750        let mut get_alias = |var: &str| -> String {
751            if let Some(alias) = var_to_alias.get(var) {
752                alias.clone()
753            } else {
754                let alias = format!("n{}", alias_counter);
755                alias_counter += 1;
756                var_to_alias.insert(var.to_string(), alias.clone());
757                nodes.push(NodePattern {
758                    alias: alias.clone(),
759                    node_label: None,
760                    properties: Vec::new(),
761                });
762                alias
763            }
764        };
765
766        // Convert triple patterns to edges
767        for pattern in &self.where_patterns {
768            let subject_alias = match &pattern.subject {
769                SparqlTerm::Variable(v) => get_alias(v),
770                _ => continue, // Skip non-variable subjects for now
771            };
772
773            let predicate_label = match &pattern.predicate {
774                SparqlTerm::PrefixedName(_, local) => Some(local.clone()),
775                SparqlTerm::A => Some("type".to_string()),
776                SparqlTerm::Iri(iri) => {
777                    // Extract local name from IRI
778                    iri.rsplit('/')
779                        .next()
780                        .or_else(|| iri.rsplit('#').next())
781                        .map(|s| s.to_string())
782                }
783                _ => None,
784            };
785
786            // Predicate label travels as a free-form string. Normalise
787            // camelCase to snake_case so SPARQL `:hasService` and Cypher
788            // `has_service` match the same edge label in storage.
789            let edge_label = predicate_label.as_ref().map(|l| {
790                let lower = l.to_lowercase();
791                match lower.as_str() {
792                    "hasservice" => "has_service".to_string(),
793                    "hasendpoint" => "has_endpoint".to_string(),
794                    "usestech" => "uses_tech".to_string(),
795                    "authaccess" => "auth_access".to_string(),
796                    "affectedby" => "affected_by".to_string(),
797                    "connectsto" => "connects_to".to_string(),
798                    "relatedto" => "related_to".to_string(),
799                    "hasuser" => "has_user".to_string(),
800                    "hascert" => "has_cert".to_string(),
801                    _ => lower,
802                }
803            });
804
805            match &pattern.object {
806                SparqlTerm::Variable(v) => {
807                    let object_alias = get_alias(v);
808                    edges.push(EdgePattern {
809                        alias: None,
810                        from: subject_alias.clone(),
811                        to: object_alias,
812                        edge_label,
813                        direction: EdgeDirection::Outgoing,
814                        min_hops: 1,
815                        max_hops: 1,
816                    });
817                }
818                SparqlTerm::Literal(lit) | SparqlTerm::TypedLiteral(lit, _) => {
819                    // Object is a literal - add as property filter
820                    if let Some(pred) = predicate_label {
821                        filters.push(Filter::Compare {
822                            field: FieldRef::NodeProperty {
823                                alias: subject_alias.clone(),
824                                property: pred,
825                            },
826                            op: CompareOp::Eq,
827                            value: Value::text(lit.clone()),
828                        });
829                    }
830                }
831                _ => {}
832            }
833        }
834
835        // Convert SPARQL filters
836        for filter in &self.filters {
837            if let Some(f) = convert_sparql_filter(filter) {
838                filters.push(f);
839            }
840        }
841
842        // Build projections
843        let projections = if self.select.contains(&"*".to_string()) {
844            // Return all node IDs for * projection
845            nodes
846                .iter()
847                .map(|n| {
848                    Projection::from_field(FieldRef::NodeId {
849                        alias: n.alias.clone(),
850                    })
851                })
852                .collect()
853        } else {
854            self.select
855                .iter()
856                .filter_map(|v| {
857                    var_to_alias.get(v).map(|alias| {
858                        Projection::from_field(FieldRef::NodeId {
859                            alias: alias.clone(),
860                        })
861                    })
862                })
863                .collect()
864        };
865
866        // Fold multiple filters into nested And
867        let combined_filter = if filters.is_empty() {
868            None
869        } else {
870            let mut iter = filters.into_iter();
871            let first = iter.next().unwrap();
872            Some(iter.fold(first, |acc, f| Filter::And(Box::new(acc), Box::new(f))))
873        };
874
875        QueryExpr::Graph(GraphQuery {
876            alias: None,
877            pattern: GraphPattern { nodes, edges },
878            filter: combined_filter,
879            return_: projections,
880            limit: self.limit,
881        })
882    }
883}
884
885/// Convert a SPARQL filter to our Filter type
886fn convert_sparql_filter(filter: &SparqlFilter) -> Option<Filter> {
887    // Helper to create FieldRef from SPARQL variable name
888    let var_to_field = |var: &str| -> FieldRef {
889        // Strip ? prefix if present
890        let clean = var.trim_start_matches('?');
891        FieldRef::NodeProperty {
892            alias: clean.to_string(),
893            property: "value".to_string(), // Default property
894        }
895    };
896
897    match filter {
898        SparqlFilter::Compare(var, op, term) => {
899            let value = match term {
900                SparqlTerm::Literal(s) => Value::text(s.clone()),
901                SparqlTerm::Number(n) => Value::Float(*n),
902                SparqlTerm::Boolean(b) => Value::Boolean(*b),
903                _ => return None,
904            };
905            Some(Filter::Compare {
906                field: var_to_field(var),
907                op: *op,
908                value,
909            })
910        }
911        SparqlFilter::Bound(var) => Some(Filter::IsNotNull(var_to_field(var))),
912        SparqlFilter::NotBound(var) => Some(Filter::IsNull(var_to_field(var))),
913        SparqlFilter::Contains(var, pattern) => Some(Filter::Like {
914            field: var_to_field(var),
915            pattern: format!("%{}%", pattern),
916        }),
917        SparqlFilter::StrStarts(var, prefix) => Some(Filter::StartsWith {
918            field: var_to_field(var),
919            prefix: prefix.clone(),
920        }),
921        SparqlFilter::StrEnds(var, suffix) => Some(Filter::EndsWith {
922            field: var_to_field(var),
923            suffix: suffix.clone(),
924        }),
925        SparqlFilter::And(a, b) => {
926            let fa = convert_sparql_filter(a)?;
927            let fb = convert_sparql_filter(b)?;
928            Some(Filter::And(Box::new(fa), Box::new(fb)))
929        }
930        SparqlFilter::Or(a, b) => {
931            let fa = convert_sparql_filter(a)?;
932            let fb = convert_sparql_filter(b)?;
933            Some(Filter::Or(Box::new(fa), Box::new(fb)))
934        }
935        SparqlFilter::Not(inner) => {
936            let fi = convert_sparql_filter(inner)?;
937            Some(Filter::Not(Box::new(fi)))
938        }
939        _ => None,
940    }
941}
942
943#[cfg(test)]
944mod tests {
945    use super::*;
946
947    #[test]
948    fn test_parse_simple_select() {
949        let q = SparqlParser::parse("SELECT ?host WHERE { ?host :hasIP ?ip }").unwrap();
950        assert_eq!(q.select, vec!["host"]);
951        assert_eq!(q.where_patterns.len(), 1);
952    }
953
954    #[test]
955    fn test_parse_with_prefix() {
956        let q = SparqlParser::parse(
957            "PREFIX ex: <http://example.org/> SELECT ?x WHERE { ?x ex:type ?t }",
958        )
959        .unwrap();
960        assert!(q.prefixes.contains_key("ex"));
961        assert_eq!(q.select, vec!["x"]);
962    }
963
964    #[test]
965    fn test_parse_multiple_patterns() {
966        let q = SparqlParser::parse(
967            "SELECT ?host ?ip WHERE { ?host :hasIP ?ip . ?host :hasName ?name }",
968        )
969        .unwrap();
970        assert_eq!(q.where_patterns.len(), 2);
971    }
972
973    #[test]
974    fn test_parse_with_limit() {
975        let q = SparqlParser::parse("SELECT ?x WHERE { ?x :type ?t } LIMIT 10").unwrap();
976        assert_eq!(q.limit, Some(10));
977    }
978
979    #[test]
980    fn test_parse_with_filter() {
981        let q = SparqlParser::parse("SELECT ?host WHERE { ?host :port ?p } FILTER (?p > 1000)")
982            .unwrap();
983        assert_eq!(q.filters.len(), 1);
984    }
985
986    #[test]
987    fn test_parse_select_star() {
988        let q = SparqlParser::parse("SELECT * WHERE { ?s ?p ?o }").unwrap();
989        assert!(q.select.contains(&"*".to_string()));
990    }
991
992    #[test]
993    fn test_to_query_expr() {
994        let q = SparqlParser::parse("SELECT ?host ?ip WHERE { ?host :hasIP ?ip }").unwrap();
995        let expr = q.to_query_expr();
996        assert!(matches!(expr, QueryExpr::Graph(_)));
997    }
998
999    #[test]
1000    fn test_parse_optional_order_offset_and_filter_functions() {
1001        let q = SparqlParser::parse(
1002            r#"
1003            PREFIX ex: <http://example.org/>
1004            SELECT DISTINCT ?host ?name WHERE {
1005                # comments are whitespace
1006                ?host a ex:Host .
1007                OPTIONAL {
1008                    ?host :hasName "web"^^<http://www.w3.org/2001/XMLSchema#string> .
1009                }
1010                FILTER (REGEX(?name, "web", "i"))
1011                FILTER (BOUND(?name))
1012                FILTER (!BOUND(?missing))
1013                FILTER (isIRI(?host))
1014                FILTER (isURI(?host))
1015                FILTER (isLiteral(?name))
1016                FILTER (CONTAINS(?name, "w"))
1017                FILTER (STRSTARTS(?name, "w"))
1018                FILTER (STRENDS(?name, "b"))
1019            }
1020            FILTER (?score >= 0.5)
1021            ORDER BY DESC(?score) ASC(?host) ?name
1022            LIMIT 5
1023            OFFSET 2
1024            "#,
1025        )
1026        .unwrap();
1027
1028        assert!(q.distinct);
1029        assert_eq!(
1030            q.prefixes.get("ex").map(String::as_str),
1031            Some("http://example.org/")
1032        );
1033        assert_eq!(q.where_patterns.len(), 1);
1034        assert_eq!(q.optionals.len(), 1);
1035        assert_eq!(q.filters.len(), 10);
1036        assert_eq!(
1037            q.order_by,
1038            vec![
1039                ("score".to_string(), false),
1040                ("host".to_string(), true),
1041                ("name".to_string(), true),
1042            ]
1043        );
1044        assert_eq!(q.limit, Some(5));
1045        assert_eq!(q.offset, Some(2));
1046
1047        assert!(matches!(
1048            &q.optionals[0][0].object,
1049            SparqlTerm::TypedLiteral(value, datatype)
1050                if value == "web"
1051                    && datatype == "http://www.w3.org/2001/XMLSchema#string"
1052        ));
1053    }
1054
1055    #[test]
1056    fn test_to_query_expr_normalizes_edges_literals_and_star_projection() {
1057        let q = SparqlParser::parse(
1058            "SELECT * WHERE { ?host :hasService ?svc . ?host <http://example.org/connectsTo> ?peer . ?host :hasName 'web' . } LIMIT 3",
1059        )
1060        .unwrap();
1061        let QueryExpr::Graph(graph) = q.to_query_expr() else {
1062            panic!("SPARQL should lower to GraphQuery");
1063        };
1064
1065        assert_eq!(graph.limit, Some(3));
1066        assert_eq!(graph.pattern.edges.len(), 2);
1067        assert!(graph
1068            .pattern
1069            .edges
1070            .iter()
1071            .any(|edge| edge.edge_label.as_deref() == Some("has_service")));
1072        assert!(graph
1073            .pattern
1074            .edges
1075            .iter()
1076            .any(|edge| edge.edge_label.as_deref() == Some("connects_to")));
1077        assert_eq!(graph.return_.len(), graph.pattern.nodes.len());
1078        assert!(graph.filter.is_some());
1079    }
1080
1081    #[test]
1082    fn test_convert_sparql_filter_variants() {
1083        assert!(matches!(
1084            convert_sparql_filter(&SparqlFilter::Compare(
1085                "age".to_string(),
1086                CompareOp::Ge,
1087                SparqlTerm::Number(18.5),
1088            )),
1089            Some(Filter::Compare {
1090                op: CompareOp::Ge,
1091                value: Value::Float(18.5),
1092                ..
1093            })
1094        ));
1095        assert!(matches!(
1096            convert_sparql_filter(&SparqlFilter::Compare(
1097                "active".to_string(),
1098                CompareOp::Eq,
1099                SparqlTerm::Boolean(true),
1100            )),
1101            Some(Filter::Compare {
1102                value: Value::Boolean(true),
1103                ..
1104            })
1105        ));
1106        assert!(matches!(
1107            convert_sparql_filter(&SparqlFilter::Compare(
1108                "name".to_string(),
1109                CompareOp::Eq,
1110                SparqlTerm::Literal("alice".to_string()),
1111            )),
1112            Some(Filter::Compare {
1113                value: Value::Text(text),
1114                ..
1115            }) if text.as_ref() == "alice"
1116        ));
1117        assert!(matches!(
1118            convert_sparql_filter(&SparqlFilter::Bound("name".to_string())),
1119            Some(Filter::IsNotNull(_))
1120        ));
1121        assert!(matches!(
1122            convert_sparql_filter(&SparqlFilter::NotBound("name".to_string())),
1123            Some(Filter::IsNull(_))
1124        ));
1125        assert!(matches!(
1126            convert_sparql_filter(&SparqlFilter::Contains("name".to_string(), "lic".to_string())),
1127            Some(Filter::Like { pattern, .. }) if pattern == "%lic%"
1128        ));
1129        assert!(matches!(
1130            convert_sparql_filter(&SparqlFilter::StrStarts("name".to_string(), "a".to_string())),
1131            Some(Filter::StartsWith { prefix, .. }) if prefix == "a"
1132        ));
1133        assert!(matches!(
1134            convert_sparql_filter(&SparqlFilter::StrEnds("name".to_string(), "e".to_string())),
1135            Some(Filter::EndsWith { suffix, .. }) if suffix == "e"
1136        ));
1137        assert!(matches!(
1138            convert_sparql_filter(&SparqlFilter::And(
1139                Box::new(SparqlFilter::Bound("a".to_string())),
1140                Box::new(SparqlFilter::Bound("b".to_string())),
1141            )),
1142            Some(Filter::And(_, _))
1143        ));
1144        assert!(matches!(
1145            convert_sparql_filter(&SparqlFilter::Or(
1146                Box::new(SparqlFilter::Bound("a".to_string())),
1147                Box::new(SparqlFilter::Bound("b".to_string())),
1148            )),
1149            Some(Filter::Or(_, _))
1150        ));
1151        assert!(matches!(
1152            convert_sparql_filter(&SparqlFilter::Not(Box::new(SparqlFilter::Bound(
1153                "a".to_string(),
1154            )))),
1155            Some(Filter::Not(_))
1156        ));
1157        assert!(convert_sparql_filter(&SparqlFilter::Regex(
1158            "name".to_string(),
1159            "a.*".to_string(),
1160            None,
1161        ))
1162        .is_none());
1163        assert!(convert_sparql_filter(&SparqlFilter::IsIri("s".to_string())).is_none());
1164        assert!(convert_sparql_filter(&SparqlFilter::IsLiteral("s".to_string())).is_none());
1165        assert!(convert_sparql_filter(&SparqlFilter::Compare(
1166            "iri".to_string(),
1167            CompareOp::Eq,
1168            SparqlTerm::Iri("http://example.org/id".to_string()),
1169        ))
1170        .is_none());
1171    }
1172}