1use crate::model::{BlankNode, Literal, NamedNode, Variable};
7use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
8use crate::query::sparql_algebra::{GraphPattern, TermPattern, TriplePattern};
9use crate::query::sparql_query::Query;
10use crate::OxirsError;
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, Default)]
15pub struct SparqlParser {
16 base_iri: Option<NamedNode>,
17 prefixes: HashMap<String, NamedNode>,
18}
19
20impl SparqlParser {
21 pub fn new() -> Self {
23 Self::default()
24 }
25
26 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, OxirsError> {
28 self.base_iri = Some(NamedNode::new(base_iri.into())?);
29 Ok(self)
30 }
31
32 pub fn with_prefix(
34 mut self,
35 prefix: impl Into<String>,
36 iri: impl Into<String>,
37 ) -> Result<Self, OxirsError> {
38 self.prefixes
39 .insert(prefix.into(), NamedNode::new(iri.into())?);
40 Ok(self)
41 }
42
43 pub fn parse(&self, query: &str) -> Result<Query, OxirsError> {
45 self.parse_query(query)
46 }
47
48 pub fn parse_query(&self, query: &str) -> Result<Query, OxirsError> {
50 let query = query.trim();
54
55 if query.to_uppercase().starts_with("SELECT") {
57 self.parse_select_query(query)
58 } else if query.to_uppercase().starts_with("CONSTRUCT") {
59 self.parse_construct_query(query)
60 } else if query.to_uppercase().starts_with("ASK") {
61 self.parse_ask_query(query)
62 } else if query.to_uppercase().starts_with("DESCRIBE") {
63 self.parse_describe_query(query)
64 } else {
65 Err(OxirsError::Parse(
66 "Unsupported query form. Query must start with SELECT, CONSTRUCT, ASK, or DESCRIBE"
67 .to_string(),
68 ))
69 }
70 }
71
72 fn parse_select_query(&self, query: &str) -> Result<Query, OxirsError> {
75 let where_start = query
77 .to_uppercase()
78 .find("WHERE")
79 .ok_or_else(|| OxirsError::Parse("SELECT query must have WHERE clause".to_string()))?;
80
81 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
83
84 Ok(Query::Select {
85 dataset: None,
86 pattern,
87 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
88 })
89 }
90
91 fn parse_construct_query(&self, query: &str) -> Result<Query, OxirsError> {
92 let construct_start = query
94 .to_uppercase()
95 .find("CONSTRUCT")
96 .expect("CONSTRUCT keyword should be present in construct query")
97 + 9;
98 let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
99 OxirsError::Parse("CONSTRUCT query must have WHERE clause".to_string())
100 })?;
101
102 let construct_clause = query[construct_start..where_start].trim();
104 let algebra_template = self.parse_construct_template(construct_clause)?;
105 let template: Vec<TriplePattern> = algebra_template
106 .iter()
107 .map(|p| self.convert_triple_pattern(p))
108 .collect();
109
110 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
112
113 Ok(Query::Construct {
114 template,
115 dataset: None,
116 pattern,
117 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
118 })
119 }
120
121 fn parse_ask_query(&self, query: &str) -> Result<Query, OxirsError> {
122 let where_start = query
123 .to_uppercase()
124 .find("WHERE")
125 .ok_or_else(|| OxirsError::Parse("ASK query must have WHERE clause".to_string()))?;
126
127 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
128
129 Ok(Query::Ask {
130 dataset: None,
131 pattern,
132 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
133 })
134 }
135
136 fn parse_describe_query(&self, query: &str) -> Result<Query, OxirsError> {
137 let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
138 OxirsError::Parse("DESCRIBE query must have WHERE clause".to_string())
139 })?;
140
141 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
142
143 Ok(Query::Describe {
144 dataset: None,
145 pattern,
146 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
147 })
148 }
149
150 fn parse_construct_template(
151 &self,
152 template_text: &str,
153 ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
154 let content = template_text.trim();
155 if !content.starts_with('{') || !content.ends_with('}') {
156 return Err(OxirsError::Parse(
157 "CONSTRUCT template must be enclosed in {}".to_string(),
158 ));
159 }
160
161 let content = content[1..content.len() - 1].trim();
162 let mut triple_patterns: Vec<AlgebraTriplePattern> = Vec::new();
163
164 let triple_strings = self.split_triples_by_period(content);
166
167 for triple_str in triple_strings {
168 let triple_str = triple_str.trim();
169 if triple_str.is_empty() || triple_str.starts_with("FILTER") {
170 continue;
171 }
172
173 let parts: Vec<&str> = triple_str.split_whitespace().collect();
175 if parts.len() < 3 {
176 return Err(OxirsError::Parse(format!(
177 "Invalid triple pattern: '{triple_str}'"
178 )));
179 }
180
181 let subject = self.parse_term_pattern(parts[0])?;
182 let predicate = self.parse_term_pattern(parts[1])?;
183 let object = self.parse_term_pattern(parts[2])?;
184
185 if matches!(subject, TermPattern::Literal(_)) {
187 return Err(OxirsError::Parse("Literals cannot be subjects".to_string()));
188 }
189
190 if !matches!(
192 predicate,
193 TermPattern::NamedNode(_) | TermPattern::Variable(_)
194 ) {
195 return Err(OxirsError::Parse(
196 "Predicates must be named nodes or variables".to_string(),
197 ));
198 }
199
200 let algebra_subject = self.convert_to_algebra_term(&subject)?;
202 let algebra_predicate = self.convert_to_algebra_term(&predicate)?;
203 let algebra_object = self.convert_to_algebra_term(&object)?;
204
205 triple_patterns.push(AlgebraTriplePattern::new(
206 algebra_subject,
207 algebra_predicate,
208 algebra_object,
209 ));
210 }
211
212 Ok(triple_patterns)
213 }
214
215 fn convert_to_algebra_term(
217 &self,
218 term: &TermPattern,
219 ) -> Result<AlgebraTermPattern, OxirsError> {
220 match term {
221 TermPattern::NamedNode(n) => Ok(AlgebraTermPattern::NamedNode(n.clone())),
222 TermPattern::BlankNode(b) => Ok(AlgebraTermPattern::BlankNode(b.clone())),
223 TermPattern::Literal(l) => Ok(AlgebraTermPattern::Literal(l.clone())),
224 TermPattern::Variable(v) => Ok(AlgebraTermPattern::Variable(v.clone())),
225 #[cfg(feature = "sparql-12")]
226 TermPattern::Triple(_) => Err(OxirsError::Parse(
227 "Quoted triples not supported in construct templates".to_string(),
228 )),
229 }
230 }
231
232 fn parse_where_clause(&self, where_text: &str) -> Result<GraphPattern, OxirsError> {
233 let content = where_text.trim();
235 if !content.starts_with('{') || !content.ends_with('}') {
236 return Err(OxirsError::Parse(
237 "WHERE clause must be enclosed in {}".to_string(),
238 ));
239 }
240
241 let content = content[1..content.len() - 1].trim();
242 let mut triple_patterns: Vec<TriplePattern> = Vec::new();
243
244 let triple_strings = self.split_triples_by_period(content);
246
247 for triple_str in triple_strings {
248 let triple_str = triple_str.trim();
249 if triple_str.is_empty() || triple_str.starts_with("FILTER") {
250 continue;
251 }
252
253 let parts: Vec<&str> = triple_str.split_whitespace().collect();
255 if parts.len() < 3 {
256 return Err(OxirsError::Parse(format!(
257 "Invalid triple pattern: '{triple_str}'"
258 )));
259 }
260
261 let subject = self.parse_term_pattern(parts[0])?;
262 let predicate = self.parse_term_pattern(parts[1])?;
263 let object = self.parse_term_pattern(parts[2])?;
264
265 triple_patterns.push(TriplePattern::new(subject, predicate, object));
266 }
267
268 Ok(GraphPattern::Bgp {
269 patterns: triple_patterns,
270 })
271 }
272
273 fn parse_term_pattern(&self, term: &str) -> Result<TermPattern, OxirsError> {
274 if term.starts_with('?') || term.starts_with('$') {
275 Variable::new(term).map(TermPattern::Variable)
276 } else if term.starts_with('<') && term.ends_with('>') {
277 let iri = &term[1..term.len() - 1];
278 NamedNode::new(iri).map(TermPattern::NamedNode)
279 } else if term.starts_with('"') && term.ends_with('"') {
280 let value = &term[1..term.len() - 1];
281 Ok(TermPattern::Literal(Literal::new(value)))
282 } else if term.starts_with("_:") {
283 BlankNode::new(term).map(TermPattern::BlankNode)
284 } else if let Some(colon_pos) = term.find(':') {
285 let prefix = &term[..colon_pos];
287 let local = &term[colon_pos + 1..];
288
289 if let Some(namespace) = self.prefixes.get(prefix) {
290 let iri = format!("{}{}", namespace.as_str(), local);
291 NamedNode::new(iri).map(TermPattern::NamedNode)
292 } else {
293 Err(OxirsError::Parse(format!("Unknown prefix: {prefix}")))
294 }
295 } else {
296 Err(OxirsError::Parse(format!("Invalid term pattern: {term}")))
297 }
298 }
299
300 fn convert_term_pattern(&self, term: &AlgebraTermPattern) -> TermPattern {
302 match term {
303 AlgebraTermPattern::NamedNode(n) => TermPattern::NamedNode(n.clone()),
304 AlgebraTermPattern::BlankNode(b) => TermPattern::BlankNode(b.clone()),
305 AlgebraTermPattern::Literal(l) => TermPattern::Literal(l.clone()),
306 AlgebraTermPattern::Variable(v) => TermPattern::Variable(v.clone()),
307 AlgebraTermPattern::QuotedTriple(_) => {
308 panic!("RDF-star quoted triples not yet supported in SPARQL algebra conversion")
309 }
310 }
311 }
312
313 fn convert_triple_pattern(&self, pattern: &AlgebraTriplePattern) -> TriplePattern {
315 TriplePattern::new(
316 self.convert_term_pattern(&pattern.subject),
317 self.convert_term_pattern(&pattern.predicate),
318 self.convert_term_pattern(&pattern.object),
319 )
320 }
321
322 #[allow(clippy::only_used_in_recursion)]
324 fn convert_term_pattern_back(&self, term: &TermPattern) -> AlgebraTermPattern {
325 match term {
326 TermPattern::NamedNode(n) => AlgebraTermPattern::NamedNode(n.clone()),
327 TermPattern::BlankNode(b) => AlgebraTermPattern::BlankNode(b.clone()),
328 TermPattern::Literal(l) => AlgebraTermPattern::Literal(l.clone()),
329 TermPattern::Variable(v) => AlgebraTermPattern::Variable(v.clone()),
330 #[cfg(feature = "sparql-12")]
331 TermPattern::Triple(triple_pattern) => {
332 let subject = self.convert_term_pattern_back(&triple_pattern.subject);
335 let predicate = self.convert_term_pattern_back(&triple_pattern.predicate);
336 let object = self.convert_term_pattern_back(&triple_pattern.object);
337
338 AlgebraTermPattern::QuotedTriple(Box::new(crate::query::AlgebraTriplePattern::new(
341 subject, predicate, object,
342 )))
343 }
344 }
345 }
346
347 pub fn convert_triple_pattern_back(&self, pattern: &TriplePattern) -> AlgebraTriplePattern {
349 AlgebraTriplePattern::new(
350 self.convert_term_pattern_back(&pattern.subject),
351 self.convert_term_pattern_back(&pattern.predicate),
352 self.convert_term_pattern_back(&pattern.object),
353 )
354 }
355
356 fn split_triples_by_period(&self, content: &str) -> Vec<String> {
358 let mut triples = Vec::new();
359 let mut current = String::new();
360 let mut in_iri = false;
361 let mut in_literal = false;
362 let mut escape_next = false;
363
364 for ch in content.chars() {
365 if escape_next {
366 current.push(ch);
367 escape_next = false;
368 continue;
369 }
370
371 match ch {
372 '\\' => {
373 escape_next = true;
374 current.push(ch);
375 }
376 '<' if !in_literal => {
377 in_iri = true;
378 current.push(ch);
379 }
380 '>' if in_iri && !in_literal => {
381 in_iri = false;
382 current.push(ch);
383 }
384 '"' => {
385 in_literal = !in_literal;
386 current.push(ch);
387 }
388 '.' if !in_iri && !in_literal => {
389 let trimmed = current.trim();
391 if !trimmed.is_empty() {
392 triples.push(trimmed.to_string());
393 }
394 current.clear();
395 }
396 _ => {
397 current.push(ch);
398 }
399 }
400 }
401
402 let trimmed = current.trim();
404 if !trimmed.is_empty() {
405 triples.push(trimmed.to_string());
406 }
407
408 triples
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_simple_select_query() {
418 let parser = SparqlParser::new();
419 let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
420 let result = parser.parse_query(query);
421 assert!(result.is_ok());
422
423 if let Ok(Query::Select { pattern, .. }) = result {
424 match pattern {
425 GraphPattern::Bgp { patterns } => {
426 assert_eq!(patterns.len(), 1);
427 let triple = &patterns[0];
429 assert!(matches!(triple.subject, TermPattern::Variable(_)));
430 assert!(matches!(triple.predicate, TermPattern::Variable(_)));
431 assert!(matches!(triple.object, TermPattern::Variable(_)));
432 }
433 _ => panic!("Expected BGP pattern"),
434 }
435 } else {
436 panic!("Expected SELECT query");
437 }
438 }
439
440 #[test]
441 fn test_ask_query() {
442 let parser = SparqlParser::new();
443 let query = "ASK WHERE { ?s ?p ?o . }";
444 let result = parser.parse_query(query);
445 assert!(result.is_ok());
446
447 if let Ok(Query::Ask { pattern, .. }) = result {
448 match pattern {
449 GraphPattern::Bgp { patterns } => {
450 assert_eq!(patterns.len(), 1);
451 }
452 _ => panic!("Expected BGP pattern"),
453 }
454 } else {
455 panic!("Expected ASK query");
456 }
457 }
458
459 #[test]
460 fn test_construct_query() {
461 let parser = SparqlParser::new();
462 let query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o . }";
463 let result = parser.parse_query(query);
464 assert!(result.is_ok());
465
466 if let Ok(Query::Construct {
467 template, pattern, ..
468 }) = result
469 {
470 assert_eq!(template.len(), 1);
471 match pattern {
472 GraphPattern::Bgp { patterns } => {
473 assert_eq!(patterns.len(), 1);
474 }
475 _ => panic!("Expected BGP pattern"),
476 }
477 } else {
478 panic!("Expected CONSTRUCT query");
479 }
480 }
481
482 #[test]
483 fn test_parse_with_prefix() {
484 let parser = SparqlParser::new()
485 .with_prefix("ex", "http://example.org/")
486 .expect("operation should succeed");
487
488 let query = "SELECT ?s WHERE { ex:subject ?p ?o . }";
489 let result = parser.parse_query(query);
490 assert!(result.is_ok());
491 }
492
493 #[test]
494 fn test_invalid_query() {
495 let parser = SparqlParser::new();
496 let query = "INVALID QUERY";
497 let result = parser.parse_query(query);
498 assert!(result.is_err());
499 }
500}