oxirs_core/query/
parser.rs

1//! SPARQL query parser
2//!
3//! This is a placeholder implementation that will be enhanced with full
4//! SPARQL 1.1 parsing capabilities in future iterations.
5
6use crate::model::{BlankNode, Literal, NamedNode, Variable};
7use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
8use crate::query::sparql_algebra::{GraphPattern, TermPattern, TriplePattern};
9use crate::query::sparql_query::Query;
10use crate::OxirsError;
11use std::collections::HashMap;
12
13/// A SPARQL parser
14#[derive(Debug, Clone, Default)]
15pub struct SparqlParser {
16    base_iri: Option<NamedNode>,
17    prefixes: HashMap<String, NamedNode>,
18}
19
20impl SparqlParser {
21    /// Creates a new SPARQL parser
22    pub fn new() -> Self {
23        Self::default()
24    }
25
26    /// Sets the base IRI for resolving relative IRIs
27    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, OxirsError> {
28        self.base_iri = Some(NamedNode::new(base_iri.into())?);
29        Ok(self)
30    }
31
32    /// Adds a prefix mapping
33    pub fn with_prefix(
34        mut self,
35        prefix: impl Into<String>,
36        iri: impl Into<String>,
37    ) -> Result<Self, OxirsError> {
38        self.prefixes
39            .insert(prefix.into(), NamedNode::new(iri.into())?);
40        Ok(self)
41    }
42
43    /// Parses a SPARQL query string - alias for parse_query
44    pub fn parse(&self, query: &str) -> Result<Query, OxirsError> {
45        self.parse_query(query)
46    }
47
48    /// Parses a SPARQL query string
49    pub fn parse_query(&self, query: &str) -> Result<Query, OxirsError> {
50        // This is a simplified parser for demonstration
51        // Full implementation would use a proper parser generator
52
53        let query = query.trim();
54
55        // Very basic SELECT query detection
56        if query.to_uppercase().starts_with("SELECT") {
57            self.parse_select_query(query)
58        } else if query.to_uppercase().starts_with("CONSTRUCT") {
59            self.parse_construct_query(query)
60        } else if query.to_uppercase().starts_with("ASK") {
61            self.parse_ask_query(query)
62        } else if query.to_uppercase().starts_with("DESCRIBE") {
63            self.parse_describe_query(query)
64        } else {
65            Err(OxirsError::Parse(
66                "Unsupported query form. Query must start with SELECT, CONSTRUCT, ASK, or DESCRIBE"
67                    .to_string(),
68            ))
69        }
70    }
71
72    // Private helper methods for parsing different query forms
73
74    fn parse_select_query(&self, query: &str) -> Result<Query, OxirsError> {
75        // Extract WHERE clause (simplified parsing)
76        let where_start = query
77            .to_uppercase()
78            .find("WHERE")
79            .ok_or_else(|| OxirsError::Parse("SELECT query must have WHERE clause".to_string()))?;
80
81        // Parse WHERE clause (simplified - just extract triple patterns)
82        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
83
84        Ok(Query::Select {
85            dataset: None,
86            pattern,
87            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
88        })
89    }
90
91    fn parse_construct_query(&self, query: &str) -> Result<Query, OxirsError> {
92        // Find CONSTRUCT template and WHERE clause
93        let construct_start = query.to_uppercase().find("CONSTRUCT").unwrap() + 9;
94        let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
95            OxirsError::Parse("CONSTRUCT query must have WHERE clause".to_string())
96        })?;
97
98        // Parse template (simplified - just get the content between braces)
99        let construct_clause = query[construct_start..where_start].trim();
100        let algebra_template = self.parse_construct_template(construct_clause)?;
101        let template: Vec<TriplePattern> = algebra_template
102            .iter()
103            .map(|p| self.convert_triple_pattern(p))
104            .collect();
105
106        // Parse WHERE clause
107        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
108
109        Ok(Query::Construct {
110            template,
111            dataset: None,
112            pattern,
113            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
114        })
115    }
116
117    fn parse_ask_query(&self, query: &str) -> Result<Query, OxirsError> {
118        let where_start = query
119            .to_uppercase()
120            .find("WHERE")
121            .ok_or_else(|| OxirsError::Parse("ASK query must have WHERE clause".to_string()))?;
122
123        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
124
125        Ok(Query::Ask {
126            dataset: None,
127            pattern,
128            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
129        })
130    }
131
132    fn parse_describe_query(&self, query: &str) -> Result<Query, OxirsError> {
133        let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
134            OxirsError::Parse("DESCRIBE query must have WHERE clause".to_string())
135        })?;
136
137        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
138
139        Ok(Query::Describe {
140            dataset: None,
141            pattern,
142            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
143        })
144    }
145
146    fn parse_construct_template(
147        &self,
148        template_text: &str,
149    ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
150        let content = template_text.trim();
151        if !content.starts_with('{') || !content.ends_with('}') {
152            return Err(OxirsError::Parse(
153                "CONSTRUCT template must be enclosed in {}".to_string(),
154            ));
155        }
156
157        let content = content[1..content.len() - 1].trim();
158        let mut triple_patterns: Vec<AlgebraTriplePattern> = Vec::new();
159
160        // Split by periods, but respect IRI brackets
161        let triple_strings = self.split_triples_by_period(content);
162
163        for triple_str in triple_strings {
164            let triple_str = triple_str.trim();
165            if triple_str.is_empty() || triple_str.starts_with("FILTER") {
166                continue;
167            }
168
169            // Parse triple pattern (subject predicate object)
170            let parts: Vec<&str> = triple_str.split_whitespace().collect();
171            if parts.len() < 3 {
172                return Err(OxirsError::Parse(format!(
173                    "Invalid triple pattern: '{triple_str}'"
174                )));
175            }
176
177            let subject = self.parse_term_pattern(parts[0])?;
178            let predicate = self.parse_term_pattern(parts[1])?;
179            let object = self.parse_term_pattern(parts[2])?;
180
181            // Validate subject pattern (literals can't be subjects)
182            if matches!(subject, TermPattern::Literal(_)) {
183                return Err(OxirsError::Parse("Literals cannot be subjects".to_string()));
184            }
185
186            // Validate predicate pattern (only named nodes and variables allowed)
187            if !matches!(
188                predicate,
189                TermPattern::NamedNode(_) | TermPattern::Variable(_)
190            ) {
191                return Err(OxirsError::Parse(
192                    "Predicates must be named nodes or variables".to_string(),
193                ));
194            }
195
196            // Convert sparql_algebra::TermPattern to algebra::TermPattern
197            let algebra_subject = self.convert_to_algebra_term(&subject)?;
198            let algebra_predicate = self.convert_to_algebra_term(&predicate)?;
199            let algebra_object = self.convert_to_algebra_term(&object)?;
200
201            triple_patterns.push(AlgebraTriplePattern::new(
202                algebra_subject,
203                algebra_predicate,
204                algebra_object,
205            ));
206        }
207
208        Ok(triple_patterns)
209    }
210
211    // Helper method to convert sparql_algebra::TermPattern to algebra::TermPattern
212    fn convert_to_algebra_term(
213        &self,
214        term: &TermPattern,
215    ) -> Result<AlgebraTermPattern, OxirsError> {
216        match term {
217            TermPattern::NamedNode(n) => Ok(AlgebraTermPattern::NamedNode(n.clone())),
218            TermPattern::BlankNode(b) => Ok(AlgebraTermPattern::BlankNode(b.clone())),
219            TermPattern::Literal(l) => Ok(AlgebraTermPattern::Literal(l.clone())),
220            TermPattern::Variable(v) => Ok(AlgebraTermPattern::Variable(v.clone())),
221            #[cfg(feature = "sparql-12")]
222            TermPattern::Triple(_) => Err(OxirsError::Parse(
223                "Quoted triples not supported in construct templates".to_string(),
224            )),
225        }
226    }
227
228    fn parse_where_clause(&self, where_text: &str) -> Result<GraphPattern, OxirsError> {
229        // Very simplified parsing - just extract basic triple patterns
230        let content = where_text.trim();
231        if !content.starts_with('{') || !content.ends_with('}') {
232            return Err(OxirsError::Parse(
233                "WHERE clause must be enclosed in {}".to_string(),
234            ));
235        }
236
237        let content = content[1..content.len() - 1].trim();
238        let mut triple_patterns: Vec<TriplePattern> = Vec::new();
239
240        // Split by periods, but respect IRI brackets
241        let triple_strings = self.split_triples_by_period(content);
242
243        for triple_str in triple_strings {
244            let triple_str = triple_str.trim();
245            if triple_str.is_empty() || triple_str.starts_with("FILTER") {
246                continue;
247            }
248
249            // Parse triple pattern (subject predicate object)
250            let parts: Vec<&str> = triple_str.split_whitespace().collect();
251            if parts.len() < 3 {
252                return Err(OxirsError::Parse(format!(
253                    "Invalid triple pattern: '{triple_str}'"
254                )));
255            }
256
257            let subject = self.parse_term_pattern(parts[0])?;
258            let predicate = self.parse_term_pattern(parts[1])?;
259            let object = self.parse_term_pattern(parts[2])?;
260
261            triple_patterns.push(TriplePattern::new(subject, predicate, object));
262        }
263
264        Ok(GraphPattern::Bgp {
265            patterns: triple_patterns,
266        })
267    }
268
269    fn parse_term_pattern(&self, term: &str) -> Result<TermPattern, OxirsError> {
270        if term.starts_with('?') || term.starts_with('$') {
271            Variable::new(term).map(TermPattern::Variable)
272        } else if term.starts_with('<') && term.ends_with('>') {
273            let iri = &term[1..term.len() - 1];
274            NamedNode::new(iri).map(TermPattern::NamedNode)
275        } else if term.starts_with('"') && term.ends_with('"') {
276            let value = &term[1..term.len() - 1];
277            Ok(TermPattern::Literal(Literal::new(value)))
278        } else if term.starts_with("_:") {
279            BlankNode::new(term).map(TermPattern::BlankNode)
280        } else if let Some(colon_pos) = term.find(':') {
281            // Prefixed name
282            let prefix = &term[..colon_pos];
283            let local = &term[colon_pos + 1..];
284
285            if let Some(namespace) = self.prefixes.get(prefix) {
286                let iri = format!("{}{}", namespace.as_str(), local);
287                NamedNode::new(iri).map(TermPattern::NamedNode)
288            } else {
289                Err(OxirsError::Parse(format!("Unknown prefix: {prefix}")))
290            }
291        } else {
292            Err(OxirsError::Parse(format!("Invalid term pattern: {term}")))
293        }
294    }
295
296    /// Convert algebra TermPattern to sparql_algebra TermPattern
297    fn convert_term_pattern(&self, term: &AlgebraTermPattern) -> TermPattern {
298        match term {
299            AlgebraTermPattern::NamedNode(n) => TermPattern::NamedNode(n.clone()),
300            AlgebraTermPattern::BlankNode(b) => TermPattern::BlankNode(b.clone()),
301            AlgebraTermPattern::Literal(l) => TermPattern::Literal(l.clone()),
302            AlgebraTermPattern::Variable(v) => TermPattern::Variable(v.clone()),
303        }
304    }
305
306    /// Convert AlgebraTriplePattern to sparql_algebra TriplePattern
307    fn convert_triple_pattern(&self, pattern: &AlgebraTriplePattern) -> TriplePattern {
308        TriplePattern::new(
309            self.convert_term_pattern(&pattern.subject),
310            self.convert_term_pattern(&pattern.predicate),
311            self.convert_term_pattern(&pattern.object),
312        )
313    }
314
315    /// Convert sparql_algebra TermPattern back to algebra TermPattern
316    fn convert_term_pattern_back(&self, term: &TermPattern) -> AlgebraTermPattern {
317        match term {
318            TermPattern::NamedNode(n) => AlgebraTermPattern::NamedNode(n.clone()),
319            TermPattern::BlankNode(b) => AlgebraTermPattern::BlankNode(b.clone()),
320            TermPattern::Literal(l) => AlgebraTermPattern::Literal(l.clone()),
321            TermPattern::Variable(v) => AlgebraTermPattern::Variable(v.clone()),
322            #[cfg(feature = "sparql-12")]
323            TermPattern::Triple(_) => {
324                // Triple patterns in term position are not yet supported
325                todo!("Triple patterns in term position are not yet fully implemented")
326            }
327        }
328    }
329
330    /// Convert sparql_algebra TriplePattern back to AlgebraTriplePattern
331    pub fn convert_triple_pattern_back(&self, pattern: &TriplePattern) -> AlgebraTriplePattern {
332        AlgebraTriplePattern::new(
333            self.convert_term_pattern_back(&pattern.subject),
334            self.convert_term_pattern_back(&pattern.predicate),
335            self.convert_term_pattern_back(&pattern.object),
336        )
337    }
338
339    /// Split triples by period while respecting IRI brackets
340    fn split_triples_by_period(&self, content: &str) -> Vec<String> {
341        let mut triples = Vec::new();
342        let mut current = String::new();
343        let mut in_iri = false;
344        let mut in_literal = false;
345        let mut escape_next = false;
346
347        for ch in content.chars() {
348            if escape_next {
349                current.push(ch);
350                escape_next = false;
351                continue;
352            }
353
354            match ch {
355                '\\' => {
356                    escape_next = true;
357                    current.push(ch);
358                }
359                '<' if !in_literal => {
360                    in_iri = true;
361                    current.push(ch);
362                }
363                '>' if in_iri && !in_literal => {
364                    in_iri = false;
365                    current.push(ch);
366                }
367                '"' => {
368                    in_literal = !in_literal;
369                    current.push(ch);
370                }
371                '.' if !in_iri && !in_literal => {
372                    // End of triple
373                    let trimmed = current.trim();
374                    if !trimmed.is_empty() {
375                        triples.push(trimmed.to_string());
376                    }
377                    current.clear();
378                }
379                _ => {
380                    current.push(ch);
381                }
382            }
383        }
384
385        // Don't forget the last triple if there's no trailing period
386        let trimmed = current.trim();
387        if !trimmed.is_empty() {
388            triples.push(trimmed.to_string());
389        }
390
391        triples
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_simple_select_query() {
401        let parser = SparqlParser::new();
402        let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
403        let result = parser.parse_query(query);
404        assert!(result.is_ok());
405
406        if let Ok(Query::Select { pattern, .. }) = result {
407            match pattern {
408                GraphPattern::Bgp { patterns } => {
409                    assert_eq!(patterns.len(), 1);
410                    // Verify it's a triple pattern with variables
411                    let triple = &patterns[0];
412                    assert!(matches!(triple.subject, TermPattern::Variable(_)));
413                    assert!(matches!(triple.predicate, TermPattern::Variable(_)));
414                    assert!(matches!(triple.object, TermPattern::Variable(_)));
415                }
416                _ => panic!("Expected BGP pattern"),
417            }
418        } else {
419            panic!("Expected SELECT query");
420        }
421    }
422
423    #[test]
424    fn test_ask_query() {
425        let parser = SparqlParser::new();
426        let query = "ASK WHERE { ?s ?p ?o . }";
427        let result = parser.parse_query(query);
428        assert!(result.is_ok());
429
430        if let Ok(Query::Ask { pattern, .. }) = result {
431            match pattern {
432                GraphPattern::Bgp { patterns } => {
433                    assert_eq!(patterns.len(), 1);
434                }
435                _ => panic!("Expected BGP pattern"),
436            }
437        } else {
438            panic!("Expected ASK query");
439        }
440    }
441
442    #[test]
443    fn test_construct_query() {
444        let parser = SparqlParser::new();
445        let query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o . }";
446        let result = parser.parse_query(query);
447        assert!(result.is_ok());
448
449        if let Ok(Query::Construct {
450            template, pattern, ..
451        }) = result
452        {
453            assert_eq!(template.len(), 1);
454            match pattern {
455                GraphPattern::Bgp { patterns } => {
456                    assert_eq!(patterns.len(), 1);
457                }
458                _ => panic!("Expected BGP pattern"),
459            }
460        } else {
461            panic!("Expected CONSTRUCT query");
462        }
463    }
464
465    #[test]
466    fn test_parse_with_prefix() {
467        let parser = SparqlParser::new()
468            .with_prefix("ex", "http://example.org/")
469            .unwrap();
470
471        let query = "SELECT ?s WHERE { ex:subject ?p ?o . }";
472        let result = parser.parse_query(query);
473        assert!(result.is_ok());
474    }
475
476    #[test]
477    fn test_invalid_query() {
478        let parser = SparqlParser::new();
479        let query = "INVALID QUERY";
480        let result = parser.parse_query(query);
481        assert!(result.is_err());
482    }
483}