Skip to main content

oxirs_core/query/
parser.rs

1//! SPARQL query parser
2//!
3//! This is a placeholder implementation that will be enhanced with full
4//! SPARQL 1.1 parsing capabilities in future iterations.
5
6use crate::model::{BlankNode, Literal, NamedNode, Variable};
7use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
8use crate::query::sparql_algebra::{GraphPattern, TermPattern, TriplePattern};
9use crate::query::sparql_query::Query;
10use crate::OxirsError;
11use std::collections::HashMap;
12
13/// A SPARQL parser
14#[derive(Debug, Clone, Default)]
15pub struct SparqlParser {
16    base_iri: Option<NamedNode>,
17    prefixes: HashMap<String, NamedNode>,
18}
19
20impl SparqlParser {
21    /// Creates a new SPARQL parser
22    pub fn new() -> Self {
23        Self::default()
24    }
25
26    /// Sets the base IRI for resolving relative IRIs
27    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, OxirsError> {
28        self.base_iri = Some(NamedNode::new(base_iri.into())?);
29        Ok(self)
30    }
31
32    /// Adds a prefix mapping
33    pub fn with_prefix(
34        mut self,
35        prefix: impl Into<String>,
36        iri: impl Into<String>,
37    ) -> Result<Self, OxirsError> {
38        self.prefixes
39            .insert(prefix.into(), NamedNode::new(iri.into())?);
40        Ok(self)
41    }
42
43    /// Parses a SPARQL query string - alias for parse_query
44    pub fn parse(&self, query: &str) -> Result<Query, OxirsError> {
45        self.parse_query(query)
46    }
47
48    /// Parses a SPARQL query string
49    pub fn parse_query(&self, query: &str) -> Result<Query, OxirsError> {
50        // This is a simplified parser for demonstration
51        // Full implementation would use a proper parser generator
52
53        let query = query.trim();
54
55        // Very basic SELECT query detection
56        if query.to_uppercase().starts_with("SELECT") {
57            self.parse_select_query(query)
58        } else if query.to_uppercase().starts_with("CONSTRUCT") {
59            self.parse_construct_query(query)
60        } else if query.to_uppercase().starts_with("ASK") {
61            self.parse_ask_query(query)
62        } else if query.to_uppercase().starts_with("DESCRIBE") {
63            self.parse_describe_query(query)
64        } else {
65            Err(OxirsError::Parse(
66                "Unsupported query form. Query must start with SELECT, CONSTRUCT, ASK, or DESCRIBE"
67                    .to_string(),
68            ))
69        }
70    }
71
72    // Private helper methods for parsing different query forms
73
74    fn parse_select_query(&self, query: &str) -> Result<Query, OxirsError> {
75        // Extract WHERE clause (simplified parsing)
76        let where_start = query
77            .to_uppercase()
78            .find("WHERE")
79            .ok_or_else(|| OxirsError::Parse("SELECT query must have WHERE clause".to_string()))?;
80
81        // Parse WHERE clause (simplified - just extract triple patterns)
82        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
83
84        Ok(Query::Select {
85            dataset: None,
86            pattern,
87            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
88        })
89    }
90
91    fn parse_construct_query(&self, query: &str) -> Result<Query, OxirsError> {
92        // Find CONSTRUCT template and WHERE clause
93        let construct_start = query
94            .to_uppercase()
95            .find("CONSTRUCT")
96            .expect("CONSTRUCT keyword should be present in construct query")
97            + 9;
98        let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
99            OxirsError::Parse("CONSTRUCT query must have WHERE clause".to_string())
100        })?;
101
102        // Parse template (simplified - just get the content between braces)
103        let construct_clause = query[construct_start..where_start].trim();
104        let algebra_template = self.parse_construct_template(construct_clause)?;
105        let template: Vec<TriplePattern> = algebra_template
106            .iter()
107            .map(|p| self.convert_triple_pattern(p))
108            .collect();
109
110        // Parse WHERE clause
111        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
112
113        Ok(Query::Construct {
114            template,
115            dataset: None,
116            pattern,
117            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
118        })
119    }
120
121    fn parse_ask_query(&self, query: &str) -> Result<Query, OxirsError> {
122        let where_start = query
123            .to_uppercase()
124            .find("WHERE")
125            .ok_or_else(|| OxirsError::Parse("ASK query must have WHERE clause".to_string()))?;
126
127        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
128
129        Ok(Query::Ask {
130            dataset: None,
131            pattern,
132            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
133        })
134    }
135
136    fn parse_describe_query(&self, query: &str) -> Result<Query, OxirsError> {
137        let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
138            OxirsError::Parse("DESCRIBE query must have WHERE clause".to_string())
139        })?;
140
141        let pattern = self.parse_where_clause(&query[where_start + 5..])?;
142
143        Ok(Query::Describe {
144            dataset: None,
145            pattern,
146            base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
147        })
148    }
149
150    fn parse_construct_template(
151        &self,
152        template_text: &str,
153    ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
154        let content = template_text.trim();
155        if !content.starts_with('{') || !content.ends_with('}') {
156            return Err(OxirsError::Parse(
157                "CONSTRUCT template must be enclosed in {}".to_string(),
158            ));
159        }
160
161        let content = content[1..content.len() - 1].trim();
162        let mut triple_patterns: Vec<AlgebraTriplePattern> = Vec::new();
163
164        // Split by periods, but respect IRI brackets
165        let triple_strings = self.split_triples_by_period(content);
166
167        for triple_str in triple_strings {
168            let triple_str = triple_str.trim();
169            if triple_str.is_empty() || triple_str.starts_with("FILTER") {
170                continue;
171            }
172
173            // Parse triple pattern (subject predicate object)
174            let parts: Vec<&str> = triple_str.split_whitespace().collect();
175            if parts.len() < 3 {
176                return Err(OxirsError::Parse(format!(
177                    "Invalid triple pattern: '{triple_str}'"
178                )));
179            }
180
181            let subject = self.parse_term_pattern(parts[0])?;
182            let predicate = self.parse_term_pattern(parts[1])?;
183            let object = self.parse_term_pattern(parts[2])?;
184
185            // Validate subject pattern (literals can't be subjects)
186            if matches!(subject, TermPattern::Literal(_)) {
187                return Err(OxirsError::Parse("Literals cannot be subjects".to_string()));
188            }
189
190            // Validate predicate pattern (only named nodes and variables allowed)
191            if !matches!(
192                predicate,
193                TermPattern::NamedNode(_) | TermPattern::Variable(_)
194            ) {
195                return Err(OxirsError::Parse(
196                    "Predicates must be named nodes or variables".to_string(),
197                ));
198            }
199
200            // Convert sparql_algebra::TermPattern to algebra::TermPattern
201            let algebra_subject = self.convert_to_algebra_term(&subject)?;
202            let algebra_predicate = self.convert_to_algebra_term(&predicate)?;
203            let algebra_object = self.convert_to_algebra_term(&object)?;
204
205            triple_patterns.push(AlgebraTriplePattern::new(
206                algebra_subject,
207                algebra_predicate,
208                algebra_object,
209            ));
210        }
211
212        Ok(triple_patterns)
213    }
214
215    // Helper method to convert sparql_algebra::TermPattern to algebra::TermPattern
216    fn convert_to_algebra_term(
217        &self,
218        term: &TermPattern,
219    ) -> Result<AlgebraTermPattern, OxirsError> {
220        match term {
221            TermPattern::NamedNode(n) => Ok(AlgebraTermPattern::NamedNode(n.clone())),
222            TermPattern::BlankNode(b) => Ok(AlgebraTermPattern::BlankNode(b.clone())),
223            TermPattern::Literal(l) => Ok(AlgebraTermPattern::Literal(l.clone())),
224            TermPattern::Variable(v) => Ok(AlgebraTermPattern::Variable(v.clone())),
225            #[cfg(feature = "sparql-12")]
226            TermPattern::Triple(_) => Err(OxirsError::Parse(
227                "Quoted triples not supported in construct templates".to_string(),
228            )),
229        }
230    }
231
232    fn parse_where_clause(&self, where_text: &str) -> Result<GraphPattern, OxirsError> {
233        // Very simplified parsing - just extract basic triple patterns
234        let content = where_text.trim();
235        if !content.starts_with('{') || !content.ends_with('}') {
236            return Err(OxirsError::Parse(
237                "WHERE clause must be enclosed in {}".to_string(),
238            ));
239        }
240
241        let content = content[1..content.len() - 1].trim();
242        let mut triple_patterns: Vec<TriplePattern> = Vec::new();
243
244        // Split by periods, but respect IRI brackets
245        let triple_strings = self.split_triples_by_period(content);
246
247        for triple_str in triple_strings {
248            let triple_str = triple_str.trim();
249            if triple_str.is_empty() || triple_str.starts_with("FILTER") {
250                continue;
251            }
252
253            // Parse triple pattern (subject predicate object)
254            let parts: Vec<&str> = triple_str.split_whitespace().collect();
255            if parts.len() < 3 {
256                return Err(OxirsError::Parse(format!(
257                    "Invalid triple pattern: '{triple_str}'"
258                )));
259            }
260
261            let subject = self.parse_term_pattern(parts[0])?;
262            let predicate = self.parse_term_pattern(parts[1])?;
263            let object = self.parse_term_pattern(parts[2])?;
264
265            triple_patterns.push(TriplePattern::new(subject, predicate, object));
266        }
267
268        Ok(GraphPattern::Bgp {
269            patterns: triple_patterns,
270        })
271    }
272
273    fn parse_term_pattern(&self, term: &str) -> Result<TermPattern, OxirsError> {
274        if term.starts_with('?') || term.starts_with('$') {
275            Variable::new(term).map(TermPattern::Variable)
276        } else if term.starts_with('<') && term.ends_with('>') {
277            let iri = &term[1..term.len() - 1];
278            NamedNode::new(iri).map(TermPattern::NamedNode)
279        } else if term.starts_with('"') && term.ends_with('"') {
280            let value = &term[1..term.len() - 1];
281            Ok(TermPattern::Literal(Literal::new(value)))
282        } else if term.starts_with("_:") {
283            BlankNode::new(term).map(TermPattern::BlankNode)
284        } else if let Some(colon_pos) = term.find(':') {
285            // Prefixed name
286            let prefix = &term[..colon_pos];
287            let local = &term[colon_pos + 1..];
288
289            if let Some(namespace) = self.prefixes.get(prefix) {
290                let iri = format!("{}{}", namespace.as_str(), local);
291                NamedNode::new(iri).map(TermPattern::NamedNode)
292            } else {
293                Err(OxirsError::Parse(format!("Unknown prefix: {prefix}")))
294            }
295        } else {
296            Err(OxirsError::Parse(format!("Invalid term pattern: {term}")))
297        }
298    }
299
300    /// Convert algebra TermPattern to sparql_algebra TermPattern
301    fn convert_term_pattern(&self, term: &AlgebraTermPattern) -> TermPattern {
302        match term {
303            AlgebraTermPattern::NamedNode(n) => TermPattern::NamedNode(n.clone()),
304            AlgebraTermPattern::BlankNode(b) => TermPattern::BlankNode(b.clone()),
305            AlgebraTermPattern::Literal(l) => TermPattern::Literal(l.clone()),
306            AlgebraTermPattern::Variable(v) => TermPattern::Variable(v.clone()),
307            AlgebraTermPattern::QuotedTriple(_) => {
308                panic!("RDF-star quoted triples not yet supported in SPARQL algebra conversion")
309            }
310        }
311    }
312
313    /// Convert AlgebraTriplePattern to sparql_algebra TriplePattern
314    fn convert_triple_pattern(&self, pattern: &AlgebraTriplePattern) -> TriplePattern {
315        TriplePattern::new(
316            self.convert_term_pattern(&pattern.subject),
317            self.convert_term_pattern(&pattern.predicate),
318            self.convert_term_pattern(&pattern.object),
319        )
320    }
321
322    /// Convert sparql_algebra TermPattern back to algebra TermPattern
323    #[allow(clippy::only_used_in_recursion)]
324    fn convert_term_pattern_back(&self, term: &TermPattern) -> AlgebraTermPattern {
325        match term {
326            TermPattern::NamedNode(n) => AlgebraTermPattern::NamedNode(n.clone()),
327            TermPattern::BlankNode(b) => AlgebraTermPattern::BlankNode(b.clone()),
328            TermPattern::Literal(l) => AlgebraTermPattern::Literal(l.clone()),
329            TermPattern::Variable(v) => AlgebraTermPattern::Variable(v.clone()),
330            #[cfg(feature = "sparql-12")]
331            TermPattern::Triple(triple_pattern) => {
332                // RDF-star: Triple patterns in term position (quoted triples)
333                // Convert the nested triple pattern recursively
334                let subject = self.convert_term_pattern_back(&triple_pattern.subject);
335                let predicate = self.convert_term_pattern_back(&triple_pattern.predicate);
336                let object = self.convert_term_pattern_back(&triple_pattern.object);
337
338                // Create a quoted triple pattern (RDF-star feature)
339                // This represents a triple that appears as a term in another triple
340                AlgebraTermPattern::QuotedTriple(Box::new(crate::query::AlgebraTriplePattern::new(
341                    subject, predicate, object,
342                )))
343            }
344        }
345    }
346
347    /// Convert sparql_algebra TriplePattern back to AlgebraTriplePattern
348    pub fn convert_triple_pattern_back(&self, pattern: &TriplePattern) -> AlgebraTriplePattern {
349        AlgebraTriplePattern::new(
350            self.convert_term_pattern_back(&pattern.subject),
351            self.convert_term_pattern_back(&pattern.predicate),
352            self.convert_term_pattern_back(&pattern.object),
353        )
354    }
355
356    /// Split triples by period while respecting IRI brackets
357    fn split_triples_by_period(&self, content: &str) -> Vec<String> {
358        let mut triples = Vec::new();
359        let mut current = String::new();
360        let mut in_iri = false;
361        let mut in_literal = false;
362        let mut escape_next = false;
363
364        for ch in content.chars() {
365            if escape_next {
366                current.push(ch);
367                escape_next = false;
368                continue;
369            }
370
371            match ch {
372                '\\' => {
373                    escape_next = true;
374                    current.push(ch);
375                }
376                '<' if !in_literal => {
377                    in_iri = true;
378                    current.push(ch);
379                }
380                '>' if in_iri && !in_literal => {
381                    in_iri = false;
382                    current.push(ch);
383                }
384                '"' => {
385                    in_literal = !in_literal;
386                    current.push(ch);
387                }
388                '.' if !in_iri && !in_literal => {
389                    // End of triple
390                    let trimmed = current.trim();
391                    if !trimmed.is_empty() {
392                        triples.push(trimmed.to_string());
393                    }
394                    current.clear();
395                }
396                _ => {
397                    current.push(ch);
398                }
399            }
400        }
401
402        // Don't forget the last triple if there's no trailing period
403        let trimmed = current.trim();
404        if !trimmed.is_empty() {
405            triples.push(trimmed.to_string());
406        }
407
408        triples
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_simple_select_query() {
418        let parser = SparqlParser::new();
419        let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
420        let result = parser.parse_query(query);
421        assert!(result.is_ok());
422
423        if let Ok(Query::Select { pattern, .. }) = result {
424            match pattern {
425                GraphPattern::Bgp { patterns } => {
426                    assert_eq!(patterns.len(), 1);
427                    // Verify it's a triple pattern with variables
428                    let triple = &patterns[0];
429                    assert!(matches!(triple.subject, TermPattern::Variable(_)));
430                    assert!(matches!(triple.predicate, TermPattern::Variable(_)));
431                    assert!(matches!(triple.object, TermPattern::Variable(_)));
432                }
433                _ => panic!("Expected BGP pattern"),
434            }
435        } else {
436            panic!("Expected SELECT query");
437        }
438    }
439
440    #[test]
441    fn test_ask_query() {
442        let parser = SparqlParser::new();
443        let query = "ASK WHERE { ?s ?p ?o . }";
444        let result = parser.parse_query(query);
445        assert!(result.is_ok());
446
447        if let Ok(Query::Ask { pattern, .. }) = result {
448            match pattern {
449                GraphPattern::Bgp { patterns } => {
450                    assert_eq!(patterns.len(), 1);
451                }
452                _ => panic!("Expected BGP pattern"),
453            }
454        } else {
455            panic!("Expected ASK query");
456        }
457    }
458
459    #[test]
460    fn test_construct_query() {
461        let parser = SparqlParser::new();
462        let query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o . }";
463        let result = parser.parse_query(query);
464        assert!(result.is_ok());
465
466        if let Ok(Query::Construct {
467            template, pattern, ..
468        }) = result
469        {
470            assert_eq!(template.len(), 1);
471            match pattern {
472                GraphPattern::Bgp { patterns } => {
473                    assert_eq!(patterns.len(), 1);
474                }
475                _ => panic!("Expected BGP pattern"),
476            }
477        } else {
478            panic!("Expected CONSTRUCT query");
479        }
480    }
481
482    #[test]
483    fn test_parse_with_prefix() {
484        let parser = SparqlParser::new()
485            .with_prefix("ex", "http://example.org/")
486            .expect("operation should succeed");
487
488        let query = "SELECT ?s WHERE { ex:subject ?p ?o . }";
489        let result = parser.parse_query(query);
490        assert!(result.is_ok());
491    }
492
493    #[test]
494    fn test_invalid_query() {
495        let parser = SparqlParser::new();
496        let query = "INVALID QUERY";
497        let result = parser.parse_query(query);
498        assert!(result.is_err());
499    }
500}