Skip to main content

oxirs_core/sparql/
patterns.rs

1//! SPARQL pattern matching: OPTIONAL and UNION clauses
2
3use crate::model::{Quad, Term};
4use crate::rdf_store::VariableBinding;
5use crate::Result;
6
7/// Simple triple pattern for matching
8#[derive(Debug, Clone)]
9pub struct SimpleTriplePattern {
10    pub subject: Option<String>,
11    pub predicate: Option<String>,
12    pub object: Option<String>,
13}
14
15/// Pattern group (required or optional)
16#[derive(Debug, Clone)]
17pub struct PatternGroup {
18    pub patterns: Vec<SimpleTriplePattern>,
19    pub optional: bool,
20}
21
22/// Union group for SPARQL UNION clause
23#[derive(Debug, Clone)]
24pub struct UnionGroup {
25    pub branches: Vec<Vec<PatternGroup>>,
26}
27
28/// Check if query contains UNION
29pub fn has_union(sparql: &str) -> bool {
30    let sparql_upper = sparql.to_uppercase();
31    sparql_upper.contains(" UNION ")
32        || sparql_upper.contains("\nUNION\n")
33        || sparql_upper.contains("{UNION")
34}
35
36/// Find matching closing brace
37pub fn find_matching_brace(text: &str, start_pos: usize) -> Option<usize> {
38    let chars: Vec<char> = text.chars().collect();
39    if start_pos >= chars.len() || chars[start_pos] != '{' {
40        return None;
41    }
42
43    let mut brace_count = 1;
44    for (i, &ch) in chars.iter().enumerate().skip(start_pos + 1) {
45        if ch == '{' {
46            brace_count += 1;
47        } else if ch == '}' {
48            brace_count -= 1;
49            if brace_count == 0 {
50                return Some(i);
51            }
52        }
53    }
54
55    None
56}
57
58/// Parse a simple triple pattern from text
59pub fn parse_simple_pattern(text: &str) -> Option<SimpleTriplePattern> {
60    // Simple pattern: ?s ?p ?o . or <uri> <uri> "literal" .
61    let text = text.trim();
62
63    // Split by periods and process each potential pattern
64    for line in text.split('.') {
65        let line = line.trim();
66        if line.is_empty() {
67            continue;
68        }
69
70        // Skip FILTER, BIND, VALUES, UNION keywords
71        let line_upper = line.to_uppercase();
72        if line_upper.contains("FILTER")
73            || line_upper.contains("BIND")
74            || line_upper.contains("VALUES")
75            || line_upper.contains("UNION")
76        {
77            continue;
78        }
79
80        let parts: Vec<&str> = line.split_whitespace().collect();
81        if parts.len() >= 3 {
82            return Some(SimpleTriplePattern {
83                subject: Some(parts[0].to_string()),
84                predicate: Some(parts[1].to_string()),
85                object: Some(parts[2..].join(" ")),
86            });
87        }
88    }
89
90    None
91}
92
93/// Extract pattern groups (required and optional) from WHERE clause
94pub fn extract_pattern_groups(sparql: &str) -> Result<Vec<PatternGroup>> {
95    let mut groups = Vec::new();
96
97    if let Some(where_start) = sparql.to_uppercase().find("WHERE") {
98        let where_clause = &sparql[where_start + 5..];
99
100        // Find the main WHERE block
101        if let Some(start_brace) = where_clause.find('{') {
102            if let Some(end_brace) = find_matching_brace(where_clause, start_brace) {
103                let pattern_text = &where_clause[start_brace + 1..end_brace];
104
105                // Check for OPTIONAL blocks
106                let sparql_upper = pattern_text.to_uppercase();
107                if sparql_upper.contains("OPTIONAL") {
108                    // Parse with OPTIONAL support
109                    let mut pos = 0;
110                    let mut required_patterns = Vec::new();
111
112                    while pos < pattern_text.len() {
113                        // Look for OPTIONAL keyword
114                        if let Some(opt_pos) = pattern_text[pos..].to_uppercase().find("OPTIONAL") {
115                            let abs_pos = pos + opt_pos;
116
117                            // Add any required patterns before OPTIONAL
118                            let before_optional = &pattern_text[pos..abs_pos];
119                            if let Some(req_pattern) = parse_simple_pattern(before_optional) {
120                                required_patterns.push(req_pattern);
121                            }
122
123                            // Find OPTIONAL block
124                            let after_optional = &pattern_text[abs_pos + 8..];
125                            if let Some(opt_brace) = after_optional.find('{') {
126                                if let Some(opt_end) =
127                                    find_matching_brace(after_optional, opt_brace)
128                                {
129                                    let optional_content = &after_optional[opt_brace + 1..opt_end];
130
131                                    // Parse optional patterns
132                                    if let Some(opt_pattern) =
133                                        parse_simple_pattern(optional_content)
134                                    {
135                                        groups.push(PatternGroup {
136                                            patterns: vec![opt_pattern],
137                                            optional: true,
138                                        });
139                                    }
140
141                                    pos = abs_pos + 8 + opt_end + 1;
142                                } else {
143                                    break;
144                                }
145                            } else {
146                                break;
147                            }
148                        } else {
149                            // No more OPTIONAL, add remaining as required
150                            if let Some(req_pattern) = parse_simple_pattern(&pattern_text[pos..]) {
151                                required_patterns.push(req_pattern);
152                            }
153                            break;
154                        }
155                    }
156
157                    // Add required patterns group
158                    if !required_patterns.is_empty() {
159                        groups.push(PatternGroup {
160                            patterns: required_patterns,
161                            optional: false,
162                        });
163                    }
164                } else {
165                    // No OPTIONAL - all patterns are required
166                    if let Some(pattern) = parse_simple_pattern(pattern_text) {
167                        groups.push(PatternGroup {
168                            patterns: vec![pattern],
169                            optional: false,
170                        });
171                    }
172                }
173            }
174        }
175    }
176
177    Ok(groups)
178}
179
180/// Apply optional patterns to extend existing bindings
181pub fn apply_optional_patterns<F>(
182    bindings: Vec<VariableBinding>,
183    patterns: &[SimpleTriplePattern],
184    query_quads: F,
185) -> Result<Vec<VariableBinding>>
186where
187    F: Fn(&SimpleTriplePattern) -> Result<Vec<Quad>>,
188{
189    let mut new_results = Vec::new();
190
191    for binding in bindings {
192        let mut extended = false;
193
194        // Try to extend this binding with optional patterns
195        for pattern in patterns {
196            let matching_quads = query_quads(pattern)?;
197
198            for quad in matching_quads {
199                let mut new_binding = binding.clone();
200                let mut compatible = true;
201
202                // Check subject compatibility
203                if let Some(var) = &pattern.subject {
204                    if let Some(var_name) = var.strip_prefix('?') {
205                        if let Some(existing) = binding.get(var_name) {
206                            let new_term = Term::from(quad.subject().clone());
207                            if format!("{:?}", existing) != format!("{:?}", new_term) {
208                                compatible = false;
209                            }
210                        } else {
211                            new_binding
212                                .bind(var_name.to_string(), Term::from(quad.subject().clone()));
213                        }
214                    }
215                }
216
217                // Check predicate compatibility
218                if compatible {
219                    if let Some(var) = &pattern.predicate {
220                        if let Some(var_name) = var.strip_prefix('?') {
221                            if let Some(existing) = binding.get(var_name) {
222                                let new_term = Term::from(quad.predicate().clone());
223                                if format!("{:?}", existing) != format!("{:?}", new_term) {
224                                    compatible = false;
225                                }
226                            } else {
227                                new_binding.bind(
228                                    var_name.to_string(),
229                                    Term::from(quad.predicate().clone()),
230                                );
231                            }
232                        }
233                    }
234                }
235
236                // Check object compatibility
237                if compatible {
238                    if let Some(var) = &pattern.object {
239                        if let Some(var_name) = var.strip_prefix('?') {
240                            if let Some(existing) = binding.get(var_name) {
241                                let new_term = Term::from(quad.object().clone());
242                                if format!("{:?}", existing) != format!("{:?}", new_term) {
243                                    compatible = false;
244                                }
245                            } else {
246                                new_binding
247                                    .bind(var_name.to_string(), Term::from(quad.object().clone()));
248                            }
249                        }
250                    }
251                }
252
253                if compatible {
254                    new_results.push(new_binding);
255                    extended = true;
256                }
257            }
258        }
259
260        // If no optional pattern matched, keep original binding
261        if !extended {
262            new_results.push(binding);
263        }
264    }
265
266    Ok(new_results)
267}
268
269/// Extract UNION groups from WHERE clause
270pub fn extract_union_groups(sparql: &str) -> Result<Option<UnionGroup>> {
271    if !has_union(sparql) {
272        return Ok(None);
273    }
274
275    if let Some(where_start) = sparql.to_uppercase().find("WHERE") {
276        let where_clause = &sparql[where_start + 5..];
277
278        if let Some(start_brace) = where_clause.find('{') {
279            if let Some(end_brace) = find_matching_brace(where_clause, start_brace) {
280                let content = &where_clause[start_brace + 1..end_brace];
281
282                // Split by UNION keyword
283                let mut branches = Vec::new();
284                let mut current_branch = String::new();
285
286                let mut pos = 0;
287                while pos < content.len() {
288                    if let Some(union_pos) = content[pos..].to_uppercase().find(" UNION ") {
289                        let abs_pos = pos + union_pos;
290                        current_branch.push_str(&content[pos..abs_pos]);
291
292                        // Parse the branch we accumulated
293                        if let Some(branch) = parse_union_branch(&current_branch)? {
294                            branches.push(branch);
295                        }
296
297                        current_branch.clear();
298                        pos = abs_pos + 7; // Skip " UNION "
299                    } else {
300                        // Last branch
301                        current_branch.push_str(&content[pos..]);
302                        break;
303                    }
304                }
305
306                // Parse final branch
307                if !current_branch.trim().is_empty() {
308                    if let Some(branch) = parse_union_branch(&current_branch)? {
309                        branches.push(branch);
310                    }
311                }
312
313                if !branches.is_empty() {
314                    return Ok(Some(UnionGroup { branches }));
315                }
316            }
317        }
318    }
319
320    Ok(None)
321}
322
323/// Parse a single UNION branch
324pub fn parse_union_branch(branch_text: &str) -> Result<Option<Vec<PatternGroup>>> {
325    let branch_text = branch_text.trim();
326
327    // Branch can be either { pattern } or just pattern
328    let pattern_text = if branch_text.starts_with('{') && branch_text.ends_with('}') {
329        &branch_text[1..branch_text.len() - 1]
330    } else {
331        branch_text
332    };
333
334    let mut groups = Vec::new();
335
336    // Check for OPTIONAL in the branch
337    if pattern_text.to_uppercase().contains("OPTIONAL") {
338        // Parse with OPTIONAL support
339        let mut pos = 0;
340        let mut required_patterns = Vec::new();
341
342        while pos < pattern_text.len() {
343            if let Some(opt_pos) = pattern_text[pos..].to_uppercase().find("OPTIONAL") {
344                let abs_pos = pos + opt_pos;
345
346                // Add required patterns before OPTIONAL
347                let before_optional = &pattern_text[pos..abs_pos];
348                if let Some(req_pattern) = parse_simple_pattern(before_optional) {
349                    required_patterns.push(req_pattern);
350                }
351
352                // Find OPTIONAL block
353                let after_optional = &pattern_text[abs_pos + 8..];
354                if let Some(opt_brace) = after_optional.find('{') {
355                    if let Some(opt_end) = find_matching_brace(after_optional, opt_brace) {
356                        let optional_content = &after_optional[opt_brace + 1..opt_end];
357
358                        if let Some(opt_pattern) = parse_simple_pattern(optional_content) {
359                            groups.push(PatternGroup {
360                                patterns: vec![opt_pattern],
361                                optional: true,
362                            });
363                        }
364
365                        pos = abs_pos + 8 + opt_end + 1;
366                    } else {
367                        break;
368                    }
369                } else {
370                    break;
371                }
372            } else {
373                // No more OPTIONAL
374                if let Some(req_pattern) = parse_simple_pattern(&pattern_text[pos..]) {
375                    required_patterns.push(req_pattern);
376                }
377                break;
378            }
379        }
380
381        if !required_patterns.is_empty() {
382            groups.push(PatternGroup {
383                patterns: required_patterns,
384                optional: false,
385            });
386        }
387    } else {
388        // No OPTIONAL - simple pattern
389        if let Some(pattern) = parse_simple_pattern(pattern_text) {
390            groups.push(PatternGroup {
391                patterns: vec![pattern],
392                optional: false,
393            });
394        }
395    }
396
397    if groups.is_empty() {
398        Ok(None)
399    } else {
400        Ok(Some(groups))
401    }
402}
403
404/// Execute a SELECT query with UNION (needs to be implemented in RdfStore)
405/// This is a placeholder - actual implementation stays in RdfStore
406pub fn execute_union_query_placeholder() {
407    // This function signature is here for reference
408    // The actual execute_union_query must stay in RdfStore because it needs access to self
409}