1use crate::model::{BlankNode, Literal, NamedNode, Variable};
7use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
8use crate::query::sparql_algebra::{GraphPattern, TermPattern, TriplePattern};
9use crate::query::sparql_query::Query;
10use crate::OxirsError;
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, Default)]
15pub struct SparqlParser {
16 base_iri: Option<NamedNode>,
17 prefixes: HashMap<String, NamedNode>,
18}
19
20impl SparqlParser {
21 pub fn new() -> Self {
23 Self::default()
24 }
25
26 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, OxirsError> {
28 self.base_iri = Some(NamedNode::new(base_iri.into())?);
29 Ok(self)
30 }
31
32 pub fn with_prefix(
34 mut self,
35 prefix: impl Into<String>,
36 iri: impl Into<String>,
37 ) -> Result<Self, OxirsError> {
38 self.prefixes
39 .insert(prefix.into(), NamedNode::new(iri.into())?);
40 Ok(self)
41 }
42
43 pub fn parse(&self, query: &str) -> Result<Query, OxirsError> {
45 self.parse_query(query)
46 }
47
48 pub fn parse_query(&self, query: &str) -> Result<Query, OxirsError> {
50 let query = query.trim();
54
55 if query.to_uppercase().starts_with("SELECT") {
57 self.parse_select_query(query)
58 } else if query.to_uppercase().starts_with("CONSTRUCT") {
59 self.parse_construct_query(query)
60 } else if query.to_uppercase().starts_with("ASK") {
61 self.parse_ask_query(query)
62 } else if query.to_uppercase().starts_with("DESCRIBE") {
63 self.parse_describe_query(query)
64 } else {
65 Err(OxirsError::Parse(
66 "Unsupported query form. Query must start with SELECT, CONSTRUCT, ASK, or DESCRIBE"
67 .to_string(),
68 ))
69 }
70 }
71
72 fn parse_select_query(&self, query: &str) -> Result<Query, OxirsError> {
75 let where_start = query
77 .to_uppercase()
78 .find("WHERE")
79 .ok_or_else(|| OxirsError::Parse("SELECT query must have WHERE clause".to_string()))?;
80
81 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
83
84 Ok(Query::Select {
85 dataset: None,
86 pattern,
87 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
88 })
89 }
90
91 fn parse_construct_query(&self, query: &str) -> Result<Query, OxirsError> {
92 let construct_start = query.to_uppercase().find("CONSTRUCT").unwrap() + 9;
94 let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
95 OxirsError::Parse("CONSTRUCT query must have WHERE clause".to_string())
96 })?;
97
98 let construct_clause = query[construct_start..where_start].trim();
100 let algebra_template = self.parse_construct_template(construct_clause)?;
101 let template: Vec<TriplePattern> = algebra_template
102 .iter()
103 .map(|p| self.convert_triple_pattern(p))
104 .collect();
105
106 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
108
109 Ok(Query::Construct {
110 template,
111 dataset: None,
112 pattern,
113 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
114 })
115 }
116
117 fn parse_ask_query(&self, query: &str) -> Result<Query, OxirsError> {
118 let where_start = query
119 .to_uppercase()
120 .find("WHERE")
121 .ok_or_else(|| OxirsError::Parse("ASK query must have WHERE clause".to_string()))?;
122
123 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
124
125 Ok(Query::Ask {
126 dataset: None,
127 pattern,
128 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
129 })
130 }
131
132 fn parse_describe_query(&self, query: &str) -> Result<Query, OxirsError> {
133 let where_start = query.to_uppercase().find("WHERE").ok_or_else(|| {
134 OxirsError::Parse("DESCRIBE query must have WHERE clause".to_string())
135 })?;
136
137 let pattern = self.parse_where_clause(&query[where_start + 5..])?;
138
139 Ok(Query::Describe {
140 dataset: None,
141 pattern,
142 base_iri: self.base_iri.as_ref().map(|iri| iri.as_str().to_string()),
143 })
144 }
145
146 fn parse_construct_template(
147 &self,
148 template_text: &str,
149 ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
150 let content = template_text.trim();
151 if !content.starts_with('{') || !content.ends_with('}') {
152 return Err(OxirsError::Parse(
153 "CONSTRUCT template must be enclosed in {}".to_string(),
154 ));
155 }
156
157 let content = content[1..content.len() - 1].trim();
158 let mut triple_patterns: Vec<AlgebraTriplePattern> = Vec::new();
159
160 let triple_strings = self.split_triples_by_period(content);
162
163 for triple_str in triple_strings {
164 let triple_str = triple_str.trim();
165 if triple_str.is_empty() || triple_str.starts_with("FILTER") {
166 continue;
167 }
168
169 let parts: Vec<&str> = triple_str.split_whitespace().collect();
171 if parts.len() < 3 {
172 return Err(OxirsError::Parse(format!(
173 "Invalid triple pattern: '{triple_str}'"
174 )));
175 }
176
177 let subject = self.parse_term_pattern(parts[0])?;
178 let predicate = self.parse_term_pattern(parts[1])?;
179 let object = self.parse_term_pattern(parts[2])?;
180
181 if matches!(subject, TermPattern::Literal(_)) {
183 return Err(OxirsError::Parse("Literals cannot be subjects".to_string()));
184 }
185
186 if !matches!(
188 predicate,
189 TermPattern::NamedNode(_) | TermPattern::Variable(_)
190 ) {
191 return Err(OxirsError::Parse(
192 "Predicates must be named nodes or variables".to_string(),
193 ));
194 }
195
196 let algebra_subject = self.convert_to_algebra_term(&subject)?;
198 let algebra_predicate = self.convert_to_algebra_term(&predicate)?;
199 let algebra_object = self.convert_to_algebra_term(&object)?;
200
201 triple_patterns.push(AlgebraTriplePattern::new(
202 algebra_subject,
203 algebra_predicate,
204 algebra_object,
205 ));
206 }
207
208 Ok(triple_patterns)
209 }
210
211 fn convert_to_algebra_term(
213 &self,
214 term: &TermPattern,
215 ) -> Result<AlgebraTermPattern, OxirsError> {
216 match term {
217 TermPattern::NamedNode(n) => Ok(AlgebraTermPattern::NamedNode(n.clone())),
218 TermPattern::BlankNode(b) => Ok(AlgebraTermPattern::BlankNode(b.clone())),
219 TermPattern::Literal(l) => Ok(AlgebraTermPattern::Literal(l.clone())),
220 TermPattern::Variable(v) => Ok(AlgebraTermPattern::Variable(v.clone())),
221 #[cfg(feature = "sparql-12")]
222 TermPattern::Triple(_) => Err(OxirsError::Parse(
223 "Quoted triples not supported in construct templates".to_string(),
224 )),
225 }
226 }
227
228 fn parse_where_clause(&self, where_text: &str) -> Result<GraphPattern, OxirsError> {
229 let content = where_text.trim();
231 if !content.starts_with('{') || !content.ends_with('}') {
232 return Err(OxirsError::Parse(
233 "WHERE clause must be enclosed in {}".to_string(),
234 ));
235 }
236
237 let content = content[1..content.len() - 1].trim();
238 let mut triple_patterns: Vec<TriplePattern> = Vec::new();
239
240 let triple_strings = self.split_triples_by_period(content);
242
243 for triple_str in triple_strings {
244 let triple_str = triple_str.trim();
245 if triple_str.is_empty() || triple_str.starts_with("FILTER") {
246 continue;
247 }
248
249 let parts: Vec<&str> = triple_str.split_whitespace().collect();
251 if parts.len() < 3 {
252 return Err(OxirsError::Parse(format!(
253 "Invalid triple pattern: '{triple_str}'"
254 )));
255 }
256
257 let subject = self.parse_term_pattern(parts[0])?;
258 let predicate = self.parse_term_pattern(parts[1])?;
259 let object = self.parse_term_pattern(parts[2])?;
260
261 triple_patterns.push(TriplePattern::new(subject, predicate, object));
262 }
263
264 Ok(GraphPattern::Bgp {
265 patterns: triple_patterns,
266 })
267 }
268
269 fn parse_term_pattern(&self, term: &str) -> Result<TermPattern, OxirsError> {
270 if term.starts_with('?') || term.starts_with('$') {
271 Variable::new(term).map(TermPattern::Variable)
272 } else if term.starts_with('<') && term.ends_with('>') {
273 let iri = &term[1..term.len() - 1];
274 NamedNode::new(iri).map(TermPattern::NamedNode)
275 } else if term.starts_with('"') && term.ends_with('"') {
276 let value = &term[1..term.len() - 1];
277 Ok(TermPattern::Literal(Literal::new(value)))
278 } else if term.starts_with("_:") {
279 BlankNode::new(term).map(TermPattern::BlankNode)
280 } else if let Some(colon_pos) = term.find(':') {
281 let prefix = &term[..colon_pos];
283 let local = &term[colon_pos + 1..];
284
285 if let Some(namespace) = self.prefixes.get(prefix) {
286 let iri = format!("{}{}", namespace.as_str(), local);
287 NamedNode::new(iri).map(TermPattern::NamedNode)
288 } else {
289 Err(OxirsError::Parse(format!("Unknown prefix: {prefix}")))
290 }
291 } else {
292 Err(OxirsError::Parse(format!("Invalid term pattern: {term}")))
293 }
294 }
295
296 fn convert_term_pattern(&self, term: &AlgebraTermPattern) -> TermPattern {
298 match term {
299 AlgebraTermPattern::NamedNode(n) => TermPattern::NamedNode(n.clone()),
300 AlgebraTermPattern::BlankNode(b) => TermPattern::BlankNode(b.clone()),
301 AlgebraTermPattern::Literal(l) => TermPattern::Literal(l.clone()),
302 AlgebraTermPattern::Variable(v) => TermPattern::Variable(v.clone()),
303 }
304 }
305
306 fn convert_triple_pattern(&self, pattern: &AlgebraTriplePattern) -> TriplePattern {
308 TriplePattern::new(
309 self.convert_term_pattern(&pattern.subject),
310 self.convert_term_pattern(&pattern.predicate),
311 self.convert_term_pattern(&pattern.object),
312 )
313 }
314
315 fn convert_term_pattern_back(&self, term: &TermPattern) -> AlgebraTermPattern {
317 match term {
318 TermPattern::NamedNode(n) => AlgebraTermPattern::NamedNode(n.clone()),
319 TermPattern::BlankNode(b) => AlgebraTermPattern::BlankNode(b.clone()),
320 TermPattern::Literal(l) => AlgebraTermPattern::Literal(l.clone()),
321 TermPattern::Variable(v) => AlgebraTermPattern::Variable(v.clone()),
322 #[cfg(feature = "sparql-12")]
323 TermPattern::Triple(_) => {
324 todo!("Triple patterns in term position are not yet fully implemented")
326 }
327 }
328 }
329
330 pub fn convert_triple_pattern_back(&self, pattern: &TriplePattern) -> AlgebraTriplePattern {
332 AlgebraTriplePattern::new(
333 self.convert_term_pattern_back(&pattern.subject),
334 self.convert_term_pattern_back(&pattern.predicate),
335 self.convert_term_pattern_back(&pattern.object),
336 )
337 }
338
339 fn split_triples_by_period(&self, content: &str) -> Vec<String> {
341 let mut triples = Vec::new();
342 let mut current = String::new();
343 let mut in_iri = false;
344 let mut in_literal = false;
345 let mut escape_next = false;
346
347 for ch in content.chars() {
348 if escape_next {
349 current.push(ch);
350 escape_next = false;
351 continue;
352 }
353
354 match ch {
355 '\\' => {
356 escape_next = true;
357 current.push(ch);
358 }
359 '<' if !in_literal => {
360 in_iri = true;
361 current.push(ch);
362 }
363 '>' if in_iri && !in_literal => {
364 in_iri = false;
365 current.push(ch);
366 }
367 '"' => {
368 in_literal = !in_literal;
369 current.push(ch);
370 }
371 '.' if !in_iri && !in_literal => {
372 let trimmed = current.trim();
374 if !trimmed.is_empty() {
375 triples.push(trimmed.to_string());
376 }
377 current.clear();
378 }
379 _ => {
380 current.push(ch);
381 }
382 }
383 }
384
385 let trimmed = current.trim();
387 if !trimmed.is_empty() {
388 triples.push(trimmed.to_string());
389 }
390
391 triples
392 }
393}
394
395#[cfg(test)]
396mod tests {
397 use super::*;
398
399 #[test]
400 fn test_simple_select_query() {
401 let parser = SparqlParser::new();
402 let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
403 let result = parser.parse_query(query);
404 assert!(result.is_ok());
405
406 if let Ok(Query::Select { pattern, .. }) = result {
407 match pattern {
408 GraphPattern::Bgp { patterns } => {
409 assert_eq!(patterns.len(), 1);
410 let triple = &patterns[0];
412 assert!(matches!(triple.subject, TermPattern::Variable(_)));
413 assert!(matches!(triple.predicate, TermPattern::Variable(_)));
414 assert!(matches!(triple.object, TermPattern::Variable(_)));
415 }
416 _ => panic!("Expected BGP pattern"),
417 }
418 } else {
419 panic!("Expected SELECT query");
420 }
421 }
422
423 #[test]
424 fn test_ask_query() {
425 let parser = SparqlParser::new();
426 let query = "ASK WHERE { ?s ?p ?o . }";
427 let result = parser.parse_query(query);
428 assert!(result.is_ok());
429
430 if let Ok(Query::Ask { pattern, .. }) = result {
431 match pattern {
432 GraphPattern::Bgp { patterns } => {
433 assert_eq!(patterns.len(), 1);
434 }
435 _ => panic!("Expected BGP pattern"),
436 }
437 } else {
438 panic!("Expected ASK query");
439 }
440 }
441
442 #[test]
443 fn test_construct_query() {
444 let parser = SparqlParser::new();
445 let query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o . }";
446 let result = parser.parse_query(query);
447 assert!(result.is_ok());
448
449 if let Ok(Query::Construct {
450 template, pattern, ..
451 }) = result
452 {
453 assert_eq!(template.len(), 1);
454 match pattern {
455 GraphPattern::Bgp { patterns } => {
456 assert_eq!(patterns.len(), 1);
457 }
458 _ => panic!("Expected BGP pattern"),
459 }
460 } else {
461 panic!("Expected CONSTRUCT query");
462 }
463 }
464
465 #[test]
466 fn test_parse_with_prefix() {
467 let parser = SparqlParser::new()
468 .with_prefix("ex", "http://example.org/")
469 .unwrap();
470
471 let query = "SELECT ?s WHERE { ex:subject ?p ?o . }";
472 let result = parser.parse_query(query);
473 assert!(result.is_ok());
474 }
475
476 #[test]
477 fn test_invalid_query() {
478 let parser = SparqlParser::new();
479 let query = "INVALID QUERY";
480 let result = parser.parse_query(query);
481 assert!(result.is_err());
482 }
483}