Skip to main content

graphmind/query/
mod.rs

1//! # Query Processing Pipeline
2//!
3//! This module implements the full **query processing pipeline** for Graphmind's OpenCypher
4//! dialect, following the same staged architecture used by virtually every database engine
5//! and compiler:
6//!
7//! ```text
8//!   Source Text          Pest PEG Parser        Abstract Syntax Tree
9//!  ┌──────────┐        ┌──────────────┐        ┌──────────────────┐
10//!  │ MATCH    │──Lex──>│  cypher.pest │──AST──>│  Query struct    │
11//!  │ (n:Foo)  │ +Parse │  (PEG rules) │        │  (ast.rs)        │
12//!  │ RETURN n │        └──────────────┘        └────────┬─────────┘
13//!  └──────────┘                                         │
14//!                                                       │ plan()
15//!                                                       v
16//!                     Execution Plan             ┌──────────────┐
17//!                    ┌──────────────┐            │ QueryPlanner │
18//!                    │ Operator tree│<───────────│ (planner.rs) │
19//!                    │ (Volcano)    │            └──────────────┘
20//!                    └──────┬───────┘
21//!                           │ next() / next_mut()
22//!                           v
23//!                    ┌──────────────┐
24//!                    │  RecordBatch │  (final output)
25//!                    └──────────────┘
26//! ```
27//!
28//! This mirrors how compilers work: source code is lexed into tokens, parsed into an AST,
29//! lowered to an intermediate representation (the execution plan), and finally "executed"
30//! (in a compiler, that means code generation; here, it means pulling records through
31//! operators). The analogy is not accidental -- query languages *are* domain-specific
32//! programming languages.
33//!
34//! ## Parsing: PEG via Pest
35//!
36//! The parser uses [Pest](https://pest.rs), a Rust crate that implements **Parsing Expression
37//! Grammars (PEGs)**. Unlike context-free grammars (CFGs) used by yacc/bison, PEGs use an
38//! ordered-choice operator (`/`) that tries alternatives left-to-right and commits to the
39//! first match. This makes PEGs **always unambiguous** -- there is exactly one parse tree for
40//! any input, which eliminates an entire class of grammar debugging. The grammar lives in
41//! [`cypher.pest`](cypher.pest) and is compiled into a Rust parser at build time via a proc
42//! macro (`#[derive(Parser)]`).
43//!
44//! ## Execution: Volcano Iterator Model (ADR-007)
45//!
46//! Query execution follows the **Volcano iterator model** invented by Goetz Graefe. Each
47//! physical operator (scan, filter, expand, project, etc.) implements a `next()` method that
48//! **pulls** a single record from its child operator. Records flow upward through the
49//! operator tree one at a time, like a lazy iterator chain in Rust (`iter().filter().map()`).
50//! This is memory-efficient because intermediate results are never fully materialized -- each
51//! operator processes one record and immediately passes it upstream.
52//!
53//! ## LRU Parse Cache
54//!
55//! Parsing is expensive (PEG matching, AST construction, string allocation). Since many
56//! applications execute the same queries repeatedly with different parameters, this module
57//! maintains an **LRU (Least Recently Used) cache** of parsed ASTs. On a cache hit, we skip
58//! parsing entirely and jump straight to planning. The cache uses `Mutex<LruCache>` for
59//! thread safety, with lock-free `AtomicU64` counters for hit/miss statistics.
60//!
61//! ## Read vs Write Execution Paths
62//!
63//! Queries are split into two execution paths based on mutability:
64//! - **[`QueryExecutor`]**: read-only queries (MATCH, RETURN, EXPLAIN). Takes `&GraphStore`.
65//! - **[`MutQueryExecutor`]**: write queries (CREATE, DELETE, SET, MERGE). Takes `&mut GraphStore`.
66//!
67//! This separation mirrors Rust's ownership model -- shared references (`&T`) allow
68//! concurrent reads, while exclusive references (`&mut T`) guarantee single-writer access.
69//! The type system enforces at compile time that no read query can accidentally modify the
70//! graph.
71
72pub mod ast;
73pub mod executor;
74pub mod parser;
75
76use lru::LruCache;
77use std::collections::HashMap;
78use std::num::NonZeroUsize;
79use std::sync::atomic::{AtomicU64, Ordering};
80use std::sync::Mutex;
81
82// Re-export main types
83pub use ast::Query;
84pub use executor::{
85    ExecutionError,
86    ExecutionResult,
87    MutQueryExecutor, // Added for CREATE/DELETE/SET support
88    QueryExecutor,
89    Record,
90    RecordBatch,
91    Value,
92};
93pub use parser::{parse_query, ParseError, ParseResult};
94
95/// Default LRU cache capacity
96const DEFAULT_CACHE_CAPACITY: usize = 1024;
97
98/// Lock-free cache hit/miss counters.
99pub struct CacheStats {
100    hits: AtomicU64,
101    misses: AtomicU64,
102}
103
104impl CacheStats {
105    fn new() -> Self {
106        Self {
107            hits: AtomicU64::new(0),
108            misses: AtomicU64::new(0),
109        }
110    }
111
112    /// Total cache hits since engine creation.
113    pub fn hits(&self) -> u64 {
114        self.hits.load(Ordering::Relaxed)
115    }
116
117    /// Total cache misses since engine creation.
118    pub fn misses(&self) -> u64 {
119        self.misses.load(Ordering::Relaxed)
120    }
121
122    fn record_hit(&self) {
123        self.hits.fetch_add(1, Ordering::Relaxed);
124    }
125
126    fn record_miss(&self) {
127        self.misses.fetch_add(1, Ordering::Relaxed);
128    }
129}
130
131/// Query engine - high-level interface for executing queries
132///
133/// Includes an LRU AST cache that eliminates repeated parsing overhead
134/// for identical queries. The cache is keyed by whitespace-normalized
135/// query strings and evicts least-recently-used entries when full.
136pub struct QueryEngine {
137    /// Parsed AST cache: normalized query string -> Query AST
138    ast_cache: Mutex<LruCache<String, Query>>,
139    /// Lock-free hit/miss counters
140    stats: CacheStats,
141    /// Per-query timeout in seconds (0 = no timeout)
142    query_timeout_secs: u64,
143}
144
145impl QueryEngine {
146    /// Create a new query engine with the default cache capacity (1024 entries)
147    pub fn new() -> Self {
148        Self::with_capacity(DEFAULT_CACHE_CAPACITY)
149    }
150
151    /// Create a new query engine with a specific cache capacity
152    pub fn with_capacity(capacity: usize) -> Self {
153        let cap = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(1).unwrap());
154        Self {
155            ast_cache: Mutex::new(LruCache::new(cap)),
156            stats: CacheStats::new(),
157            query_timeout_secs: 45,
158        }
159    }
160
161    /// Return a reference to the cache statistics (hits/misses).
162    pub fn cache_stats(&self) -> &CacheStats {
163        &self.stats
164    }
165
166    /// Return the current number of entries in the cache.
167    pub fn cache_len(&self) -> usize {
168        self.ast_cache.lock().unwrap().len()
169    }
170
171    /// Parse with caching — normalizes whitespace for cache hits
172    fn cached_parse(&self, query_str: &str) -> Result<Query, Box<dyn std::error::Error>> {
173        let normalized = query_str.split_whitespace().collect::<Vec<_>>().join(" ");
174
175        // Check cache (LruCache::get promotes to most-recently-used)
176        {
177            let mut cache = self.ast_cache.lock().unwrap();
178            if let Some(cached) = cache.get(&normalized) {
179                self.stats.record_hit();
180                return Ok(cached.clone());
181            }
182        }
183
184        self.stats.record_miss();
185
186        // Parse and cache (LRU evicts automatically when full)
187        let query = parse_query(query_str)?;
188        {
189            let mut cache = self.ast_cache.lock().unwrap();
190            cache.put(normalized, query.clone());
191        }
192        Ok(query)
193    }
194
195    /// Split a query string on semicolons, respecting quoted strings.
196    /// Returns individual statement strings (trimmed, non-empty).
197    /// Strip `// ...` line comments (not inside strings).
198    /// Returns a new string with comments removed.
199    pub fn strip_line_comments(input: &str) -> String {
200        let mut result = String::with_capacity(input.len());
201        let mut in_single = false;
202        let mut in_double = false;
203        let mut chars = input.chars().peekable();
204
205        while let Some(c) = chars.next() {
206            match c {
207                '\'' if !in_double => {
208                    in_single = !in_single;
209                    result.push(c);
210                }
211                '"' if !in_single => {
212                    in_double = !in_double;
213                    result.push(c);
214                }
215                '/' if !in_single && !in_double => {
216                    if chars.peek() == Some(&'/') {
217                        // Line comment — skip to end of line
218                        for remaining in chars.by_ref() {
219                            if remaining == '\n' {
220                                result.push('\n');
221                                break;
222                            }
223                        }
224                    } else if chars.peek() == Some(&'*') {
225                        // Block comment — skip to */
226                        chars.next(); // consume *
227                        let mut prev = ' ';
228                        for remaining in chars.by_ref() {
229                            if prev == '*' && remaining == '/' {
230                                break;
231                            }
232                            prev = remaining;
233                        }
234                        result.push(' '); // replace block comment with space
235                    } else {
236                        result.push(c);
237                    }
238                }
239                _ => result.push(c),
240            }
241        }
242        result
243    }
244
245    pub fn split_statements(input: &str) -> Vec<&str> {
246        let mut statements = Vec::new();
247        let mut start = 0;
248        let mut in_single_quote = false;
249        let mut in_double_quote = false;
250        let mut escape_next = false;
251        let bytes = input.as_bytes();
252
253        for i in 0..bytes.len() {
254            if escape_next {
255                escape_next = false;
256                continue;
257            }
258            match bytes[i] {
259                b'\\' => escape_next = true,
260                b'\'' if !in_double_quote => in_single_quote = !in_single_quote,
261                b'"' if !in_single_quote => in_double_quote = !in_double_quote,
262                b';' if !in_single_quote && !in_double_quote => {
263                    let stmt = input[start..i].trim();
264                    if !stmt.is_empty() {
265                        statements.push(stmt);
266                    }
267                    start = i + 1;
268                }
269                _ => {}
270            }
271        }
272        // Last segment (after final semicolon or no semicolons at all)
273        let last = input[start..].trim();
274        if !last.is_empty() {
275            statements.push(last);
276        }
277        statements
278    }
279
280    /// Rewrite multi-CREATE statements by inserting WITH clauses to carry variables forward.
281    /// E.g.: `CREATE (a:Person) CREATE (b:Person) CREATE (a)-[:KNOWS]->(b)`
282    /// becomes: `CREATE (a:Person) WITH a CREATE (b:Person) WITH a, b CREATE (a)-[:KNOWS]->(b)`
283    /// Expand `UNWIND [1,2,3] AS x CREATE ({num: x})` into individual CREATEs.
284    /// Returns None if the query is not an UNWIND+CREATE pattern.
285    fn expand_unwind_create(input: &str) -> Option<Vec<String>> {
286        let trimmed = input.trim();
287        let upper = trimmed.to_uppercase();
288
289        // Must start with UNWIND and contain CREATE but not MATCH/RETURN/WITH
290        if !upper.starts_with("UNWIND") || !upper.contains("CREATE") {
291            return None;
292        }
293        if upper.contains("MATCH")
294            || upper.contains("RETURN")
295            || upper.contains("WITH")
296            || upper.contains("MERGE")
297        {
298            return None;
299        }
300
301        // Extract the list and variable: UNWIND [...] AS var
302        // Find the list literal [...]
303        let list_start = trimmed.find('[')?;
304        let mut depth = 0;
305        let mut list_end = None;
306        for (i, ch) in trimmed[list_start..].char_indices() {
307            match ch {
308                '[' => depth += 1,
309                ']' => {
310                    depth -= 1;
311                    if depth == 0 {
312                        list_end = Some(list_start + i + 1);
313                        break;
314                    }
315                }
316                _ => {}
317            }
318        }
319        let list_end = list_end?;
320        let list_str = &trimmed[list_start..list_end];
321
322        // Find AS variable
323        let after_list = &trimmed[list_end..];
324        let as_pos = after_list.to_uppercase().find(" AS ")?;
325        let after_as = &after_list[as_pos + 4..].trim();
326        let var_end = after_as
327            .find(|c: char| !c.is_alphanumeric() && c != '_')
328            .unwrap_or(after_as.len());
329        let var_name = &after_as[..var_end].trim();
330
331        // Find CREATE clause
332        let create_pos = upper.find("CREATE")?;
333        let create_clause = &trimmed[create_pos..];
334
335        // Parse the list values (simple: split by comma, handle nested)
336        let inner = &list_str[1..list_str.len() - 1]; // strip [ ]
337        let mut elements = Vec::new();
338        let mut current = String::new();
339        let mut nest = 0;
340        let mut in_str = false;
341        let mut str_char = ' ';
342        for ch in inner.chars() {
343            match ch {
344                '\'' | '"' if !in_str => {
345                    in_str = true;
346                    str_char = ch;
347                    current.push(ch);
348                }
349                c if in_str && c == str_char => {
350                    in_str = false;
351                    current.push(ch);
352                }
353                '[' | '{' if !in_str => {
354                    nest += 1;
355                    current.push(ch);
356                }
357                ']' | '}' if !in_str => {
358                    nest -= 1;
359                    current.push(ch);
360                }
361                ',' if !in_str && nest == 0 => {
362                    elements.push(current.trim().to_string());
363                    current.clear();
364                }
365                _ => current.push(ch),
366            }
367        }
368        if !current.trim().is_empty() {
369            elements.push(current.trim().to_string());
370        }
371
372        // Generate one CREATE per element, substituting the variable
373        let mut result = Vec::new();
374        for element in &elements {
375            // Replace occurrences of the variable in the CREATE clause with the literal value
376            let create = create_clause.to_string();
377            // Simple replacement: replace `: var}` and `: var,` patterns
378            // Also handle `{prop: var}` and `{prop: var,`
379            // Use word-boundary replacement
380            let mut new_create = String::new();
381            let var_bytes = var_name.as_bytes();
382            let create_bytes = create.as_bytes();
383            let mut i = 0;
384            while i < create_bytes.len() {
385                if i + var_bytes.len() <= create_bytes.len()
386                    && &create_bytes[i..i + var_bytes.len()] == var_bytes
387                {
388                    // Check word boundaries
389                    let before_ok = i == 0
390                        || !create_bytes[i - 1].is_ascii_alphanumeric()
391                            && create_bytes[i - 1] != b'_';
392                    let after_ok = i + var_bytes.len() >= create_bytes.len()
393                        || !create_bytes[i + var_bytes.len()].is_ascii_alphanumeric()
394                            && create_bytes[i + var_bytes.len()] != b'_';
395                    if before_ok && after_ok {
396                        new_create.push_str(element);
397                        i += var_bytes.len();
398                        continue;
399                    }
400                }
401                new_create.push(create_bytes[i] as char);
402                i += 1;
403            }
404            result.push(new_create);
405        }
406
407        if result.is_empty() {
408            None
409        } else {
410            Some(result)
411        }
412    }
413
414    fn rewrite_multi_create(input: &str) -> String {
415        // Regex-free approach: split on CREATE keyword boundaries (not inside quotes)
416        let upper = input.to_uppercase();
417        if upper.matches("CREATE").count() <= 1 {
418            return input.to_string();
419        }
420
421        // Find positions of each CREATE keyword (not inside quotes)
422        let mut create_positions = Vec::new();
423        let bytes = input.as_bytes();
424        let upper_bytes = upper.as_bytes();
425        let mut in_single = false;
426        let mut in_double = false;
427        let mut i = 0;
428        while i < bytes.len() {
429            match bytes[i] {
430                b'\'' if !in_double => in_single = !in_single,
431                b'"' if !in_single => in_double = !in_double,
432                _ if !in_single && !in_double && i + 6 <= upper_bytes.len() => {
433                    if upper_bytes[i..i + 6] == *b"CREATE" {
434                        // Make sure it's a keyword boundary (not part of another word)
435                        let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
436                        let after_ok =
437                            i + 6 >= bytes.len() || !bytes[i + 6].is_ascii_alphanumeric();
438                        if before_ok && after_ok {
439                            create_positions.push(i);
440                        }
441                    }
442                }
443                _ => {}
444            }
445            i += 1;
446        }
447
448        if create_positions.len() <= 1 {
449            return input.to_string();
450        }
451
452        // If query already has WITH, don't rewrite (user is managing variable passing)
453        if upper.contains("WITH") {
454            return input.to_string();
455        }
456
457        // If query has MATCH, don't rewrite — the MATCH+CREATE pattern is handled
458        // natively by the grammar (MATCH ... CREATE pattern).
459        // For multiple CREATEs after a MATCH, users should use semicolons.
460        if upper.contains("MATCH") {
461            return input.to_string();
462        }
463
464        // Extract variables from each CREATE clause and insert WITH between them
465        let mut result = String::new();
466        let mut accumulated_vars: Vec<String> = Vec::new();
467
468        for (idx, &pos) in create_positions.iter().enumerate() {
469            let end = if idx + 1 < create_positions.len() {
470                create_positions[idx + 1]
471            } else {
472                input.len()
473            };
474            // For the first clause, include any MATCH prefix before the first CREATE
475            let start = if idx == 0 { 0 } else { pos };
476            let clause = input[start..end].trim();
477
478            // If not the first CREATE and we have accumulated variables, insert WITH
479            if idx > 0 && !accumulated_vars.is_empty() {
480                result.push_str(" WITH ");
481                result.push_str(&accumulated_vars.join(", "));
482                result.push(' ');
483            }
484
485            result.push_str(clause);
486
487            // Extract variable names from this CREATE clause: (varname:Label ...) or (varname)
488            let clause_bytes = clause.as_bytes();
489            let mut j = 0;
490            while j < clause_bytes.len() {
491                if clause_bytes[j] == b'(' {
492                    j += 1;
493                    // Skip whitespace
494                    while j < clause_bytes.len() && clause_bytes[j].is_ascii_whitespace() {
495                        j += 1;
496                    }
497                    // Read variable name (alphanumeric + underscore)
498                    let var_start = j;
499                    while j < clause_bytes.len()
500                        && (clause_bytes[j].is_ascii_alphanumeric() || clause_bytes[j] == b'_')
501                    {
502                        j += 1;
503                    }
504                    if j > var_start {
505                        let var = &clause[var_start..j];
506                        // Only collect if it looks like a variable (not a label after colon)
507                        if !accumulated_vars.contains(&var.to_string()) {
508                            accumulated_vars.push(var.to_string());
509                        }
510                    }
511                }
512                j += 1;
513            }
514        }
515
516        result
517    }
518
519    /// Parse and execute a read-only Cypher query (MATCH, RETURN, etc.)
520    /// Supports multiple semicolon-separated statements.
521    pub fn execute(
522        &self,
523        query_str: &str,
524        store: &crate::graph::GraphStore,
525    ) -> Result<RecordBatch, Box<dyn std::error::Error>> {
526        self.execute_with_params(query_str, store, &HashMap::new())
527    }
528
529    /// Execute a read-only query with parameters
530    pub fn execute_with_params(
531        &self,
532        query_str: &str,
533        store: &crate::graph::GraphStore,
534        params: &HashMap<String, crate::graph::PropertyValue>,
535    ) -> Result<RecordBatch, Box<dyn std::error::Error>> {
536        let cleaned = Self::strip_line_comments(query_str);
537        let statements = Self::split_statements(&cleaned);
538        let mut last_result = RecordBatch {
539            records: Vec::new(),
540            columns: Vec::new(),
541        };
542
543        for stmt in &statements {
544            let query = self.cached_parse(stmt)?;
545            let mut executor = QueryExecutor::new(store);
546            if !params.is_empty() {
547                executor = executor.with_params(params.clone());
548            }
549            if self.query_timeout_secs > 0 {
550                executor = executor.with_deadline(
551                    std::time::Instant::now()
552                        + std::time::Duration::from_secs(self.query_timeout_secs),
553                );
554            }
555            last_result = executor.execute(&query)?;
556        }
557
558        Ok(last_result)
559    }
560
561    /// Parse and execute a write Cypher query (CREATE, DELETE, SET, etc.)
562    /// Supports multiple semicolon-separated statements. Each statement
563    /// sees the effects of previous statements (shared store).
564    /// Also rewrites multi-CREATE queries to use WITH for variable sharing.
565    pub fn execute_mut(
566        &self,
567        query_str: &str,
568        store: &mut crate::graph::GraphStore,
569        tenant_id: &str,
570    ) -> Result<RecordBatch, Box<dyn std::error::Error>> {
571        self.execute_mut_with_params(query_str, store, tenant_id, &HashMap::new())
572    }
573
574    /// Execute a write query with parameters
575    pub fn execute_mut_with_params(
576        &self,
577        query_str: &str,
578        store: &mut crate::graph::GraphStore,
579        tenant_id: &str,
580        params: &HashMap<String, crate::graph::PropertyValue>,
581    ) -> Result<RecordBatch, Box<dyn std::error::Error>> {
582        let cleaned = Self::strip_line_comments(query_str);
583        let statements = Self::split_statements(&cleaned);
584        let mut last_result = RecordBatch {
585            records: Vec::new(),
586            columns: Vec::new(),
587        };
588
589        for stmt in &statements {
590            // Rewrite multi-CREATE to use WITH for variable sharing
591            let rewritten = Self::rewrite_multi_create(stmt);
592
593            // Handle UNWIND+CREATE: expand UNWIND list into per-element CREATEs
594            if let Some(expanded) = Self::expand_unwind_create(&rewritten) {
595                for create_stmt in &expanded {
596                    let query = self.cached_parse(create_stmt)?;
597                    let mut executor = MutQueryExecutor::new(store, tenant_id.to_string());
598                    if !params.is_empty() {
599                        executor = executor.with_params(params.clone());
600                    }
601                    last_result = executor.execute(&query)?;
602                }
603                continue;
604            }
605
606            let query = self.cached_parse(&rewritten)?;
607            let mut executor = MutQueryExecutor::new(store, tenant_id.to_string());
608            if !params.is_empty() {
609                executor = executor.with_params(params.clone());
610            }
611            last_result = executor.execute(&query)?;
612        }
613
614        Ok(last_result)
615    }
616}
617
618impl Default for QueryEngine {
619    fn default() -> Self {
620        Self::new()
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627    use crate::graph::GraphStore;
628
629    #[test]
630    fn test_query_engine_creation() {
631        let engine = QueryEngine::new();
632        drop(engine);
633    }
634
635    #[test]
636    fn test_end_to_end_simple_query() {
637        let mut store = GraphStore::new();
638
639        // Create test data
640        let alice = store.create_node("Person");
641        if let Some(node) = store.get_node_mut(alice) {
642            node.set_property("name", "Alice");
643            node.set_property("age", 30i64);
644        }
645
646        let bob = store.create_node("Person");
647        if let Some(node) = store.get_node_mut(bob) {
648            node.set_property("name", "Bob");
649            node.set_property("age", 25i64);
650        }
651
652        // Execute query
653        let engine = QueryEngine::new();
654        let result = engine.execute("MATCH (n:Person) RETURN n", &store);
655
656        assert!(result.is_ok());
657        let batch = result.unwrap();
658        assert_eq!(batch.len(), 2);
659        assert_eq!(batch.columns.len(), 1);
660        assert_eq!(batch.columns[0], "n");
661    }
662
663    #[test]
664    fn test_query_with_filter() {
665        let mut store = GraphStore::new();
666
667        let alice = store.create_node("Person");
668        if let Some(node) = store.get_node_mut(alice) {
669            node.set_property("name", "Alice");
670            node.set_property("age", 30i64);
671        }
672
673        let bob = store.create_node("Person");
674        if let Some(node) = store.get_node_mut(bob) {
675            node.set_property("name", "Bob");
676            node.set_property("age", 25i64);
677        }
678
679        let engine = QueryEngine::new();
680        let result = engine.execute("MATCH (n:Person) WHERE n.age > 28 RETURN n", &store);
681
682        assert!(result.is_ok());
683        let batch = result.unwrap();
684        assert_eq!(batch.len(), 1); // Only Alice
685    }
686
687    #[test]
688    fn test_query_with_limit() {
689        let mut store = GraphStore::new();
690
691        for i in 0..10 {
692            let node = store.create_node("Person");
693            if let Some(n) = store.get_node_mut(node) {
694                n.set_property("id", i as i64);
695            }
696        }
697
698        let engine = QueryEngine::new();
699        let result = engine.execute("MATCH (n:Person) RETURN n LIMIT 5", &store);
700
701        assert!(result.is_ok());
702        let batch = result.unwrap();
703        assert_eq!(batch.len(), 5);
704    }
705
706    #[test]
707    fn test_query_with_edge_traversal() {
708        let mut store = GraphStore::new();
709
710        let alice = store.create_node("Person");
711        if let Some(node) = store.get_node_mut(alice) {
712            node.set_property("name", "Alice");
713        }
714
715        let bob = store.create_node("Person");
716        if let Some(node) = store.get_node_mut(bob) {
717            node.set_property("name", "Bob");
718        }
719
720        store.create_edge(alice, bob, "KNOWS").unwrap();
721
722        let engine = QueryEngine::new();
723        let result = engine.execute("MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a, b", &store);
724
725        assert!(result.is_ok());
726        let batch = result.unwrap();
727        assert_eq!(batch.len(), 1);
728        assert_eq!(batch.columns.len(), 2);
729    }
730
731    #[test]
732    fn test_property_projection() {
733        let mut store = GraphStore::new();
734
735        let alice = store.create_node("Person");
736        if let Some(node) = store.get_node_mut(alice) {
737            node.set_property("name", "Alice");
738            node.set_property("age", 30i64);
739        }
740
741        let engine = QueryEngine::new();
742        let result = engine.execute("MATCH (n:Person) RETURN n.name, n.age", &store);
743
744        assert!(result.is_ok());
745        let batch = result.unwrap();
746        assert_eq!(batch.len(), 1);
747        assert_eq!(batch.columns.len(), 2);
748        assert_eq!(batch.columns[0], "n.name");
749        assert_eq!(batch.columns[1], "n.age");
750    }
751
752    // ==================== CREATE TESTS ====================
753
754    #[test]
755    fn test_create_single_node() {
756        // Test: CREATE (n:Person)
757        let mut store = GraphStore::new();
758        let engine = QueryEngine::new();
759
760        // Execute CREATE query
761        let result = engine.execute_mut(r#"CREATE (n:Person)"#, &mut store, "default");
762
763        assert!(result.is_ok(), "CREATE query should succeed");
764
765        // Verify node was created by querying it
766        let query_result = engine.execute("MATCH (n:Person) RETURN n", &store);
767        assert!(query_result.is_ok());
768        let batch = query_result.unwrap();
769        assert_eq!(batch.len(), 1, "Should have created 1 Person node");
770    }
771
772    #[test]
773    fn test_create_node_with_properties() {
774        // Test: CREATE (n:Person {name: "Alice", age: 30})
775        let mut store = GraphStore::new();
776        let engine = QueryEngine::new();
777
778        // Execute CREATE query with properties
779        let result = engine.execute_mut(
780            r#"CREATE (n:Person {name: "Alice", age: 30})"#,
781            &mut store,
782            "default",
783        );
784
785        assert!(
786            result.is_ok(),
787            "CREATE query with properties should succeed"
788        );
789
790        // Verify node was created with correct properties
791        let query_result = engine.execute("MATCH (n:Person) RETURN n.name, n.age", &store);
792        assert!(query_result.is_ok());
793        let batch = query_result.unwrap();
794        assert_eq!(batch.len(), 1, "Should have created 1 Person node");
795    }
796
797    #[test]
798    fn test_create_multiple_nodes() {
799        // Test multiple CREATE operations
800        let mut store = GraphStore::new();
801        let engine = QueryEngine::new();
802
803        // Create first node
804        let result1 = engine.execute_mut(
805            r#"CREATE (a:Person {name: "Alice"})"#,
806            &mut store,
807            "default",
808        );
809        assert!(result1.is_ok());
810
811        // Create second node
812        let result2 =
813            engine.execute_mut(r#"CREATE (b:Person {name: "Bob"})"#, &mut store, "default");
814        assert!(result2.is_ok());
815
816        // Verify both nodes exist
817        let query_result = engine.execute("MATCH (n:Person) RETURN n", &store);
818        assert!(query_result.is_ok());
819        let batch = query_result.unwrap();
820        assert_eq!(batch.len(), 2, "Should have created 2 Person nodes");
821    }
822
823    #[test]
824    fn test_create_returns_error_on_readonly_executor() {
825        // Test that using read-only executor for CREATE fails
826        let store = GraphStore::new();
827        let engine = QueryEngine::new();
828
829        // Try to execute CREATE with read-only execute() - should fail
830        let result = engine.execute(r#"CREATE (n:Person)"#, &store);
831
832        assert!(
833            result.is_err(),
834            "CREATE should fail with read-only executor"
835        );
836    }
837
838    // ==================== CREATE EDGE TESTS ====================
839
840    #[test]
841    fn test_create_edge_simple() {
842        // Test: CREATE (a:Person)-[:KNOWS]->(b:Person)
843        let mut store = GraphStore::new();
844        let engine = QueryEngine::new();
845
846        // Execute CREATE query with edge
847        let result = engine.execute_mut(
848            r#"CREATE (a:Person {name: "Alice"})-[:KNOWS]->(b:Person {name: "Bob"})"#,
849            &mut store,
850            "default",
851        );
852
853        assert!(
854            result.is_ok(),
855            "CREATE with edge should succeed: {:?}",
856            result.err()
857        );
858
859        // Verify nodes were created
860        let query_result = engine.execute("MATCH (n:Person) RETURN n", &store);
861        assert!(query_result.is_ok());
862        let batch = query_result.unwrap();
863        assert_eq!(batch.len(), 2, "Should have created 2 Person nodes");
864
865        // Verify edge was created by querying the relationship
866        let edge_result =
867            engine.execute("MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a, b", &store);
868        assert!(edge_result.is_ok(), "Edge query should succeed");
869        let edge_batch = edge_result.unwrap();
870        assert_eq!(edge_batch.len(), 1, "Should have 1 KNOWS relationship");
871    }
872
873    #[test]
874    fn test_create_edge_with_properties() {
875        // Test: CREATE (a:Person)-[:KNOWS {since: 2020}]->(b:Person)
876        let mut store = GraphStore::new();
877        let engine = QueryEngine::new();
878
879        // Execute CREATE query with edge properties
880        let result = engine.execute_mut(
881            r#"CREATE (a:Person {name: "Alice"})-[:FRIENDS {since: 2020}]->(b:Person {name: "Bob"})"#,
882            &mut store,
883            "default"
884        );
885
886        assert!(
887            result.is_ok(),
888            "CREATE with edge properties should succeed: {:?}",
889            result.err()
890        );
891
892        // Verify edge was created
893        let edge_result = engine.execute(
894            "MATCH (a:Person)-[r:FRIENDS]->(b:Person) RETURN a, r, b",
895            &store,
896        );
897        assert!(edge_result.is_ok(), "Edge query should succeed");
898        let edge_batch = edge_result.unwrap();
899        assert_eq!(edge_batch.len(), 1, "Should have 1 FRIENDS relationship");
900    }
901
902    #[test]
903    fn test_create_chain_pattern() {
904        // Test: CREATE (a:Person)-[:KNOWS]->(b:Person)-[:LIKES]->(c:Movie)
905        let mut store = GraphStore::new();
906        let engine = QueryEngine::new();
907
908        // Execute CREATE query with chain of edges
909        let result = engine.execute_mut(
910            r#"CREATE (a:Person {name: "Alice"})-[:KNOWS]->(b:Person {name: "Bob"})-[:LIKES]->(c:Movie {title: "Matrix"})"#,
911            &mut store,
912            "default"
913        );
914
915        assert!(
916            result.is_ok(),
917            "CREATE chain should succeed: {:?}",
918            result.err()
919        );
920
921        // Verify 2 Person nodes and 1 Movie node created
922        let person_result = engine.execute("MATCH (n:Person) RETURN n", &store);
923        assert!(person_result.is_ok());
924        assert_eq!(
925            person_result.unwrap().len(),
926            2,
927            "Should have 2 Person nodes"
928        );
929
930        let movie_result = engine.execute("MATCH (n:Movie) RETURN n", &store);
931        assert!(movie_result.is_ok());
932        assert_eq!(movie_result.unwrap().len(), 1, "Should have 1 Movie node");
933
934        // Verify both edges were created
935        let knows_result =
936            engine.execute("MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a, b", &store);
937        assert!(knows_result.is_ok());
938        assert_eq!(
939            knows_result.unwrap().len(),
940            1,
941            "Should have 1 KNOWS relationship"
942        );
943
944        let likes_result =
945            engine.execute("MATCH (a:Person)-[:LIKES]->(b:Movie) RETURN a, b", &store);
946        assert!(likes_result.is_ok());
947        assert_eq!(
948            likes_result.unwrap().len(),
949            1,
950            "Should have 1 LIKES relationship"
951        );
952    }
953
954    #[test]
955    fn test_cache_hit_miss_tracking() {
956        let store = GraphStore::new();
957        let engine = QueryEngine::new();
958
959        // First execution — cache miss
960        let _ = engine.execute("MATCH (n:Person) RETURN n", &store);
961        assert_eq!(engine.cache_stats().hits(), 0);
962        assert_eq!(engine.cache_stats().misses(), 1);
963        assert_eq!(engine.cache_len(), 1);
964
965        // Second identical query — cache hit
966        let _ = engine.execute("MATCH (n:Person) RETURN n", &store);
967        assert_eq!(engine.cache_stats().hits(), 1);
968        assert_eq!(engine.cache_stats().misses(), 1);
969
970        // Different query — cache miss
971        let _ = engine.execute("MATCH (n:Movie) RETURN n", &store);
972        assert_eq!(engine.cache_stats().hits(), 1);
973        assert_eq!(engine.cache_stats().misses(), 2);
974        assert_eq!(engine.cache_len(), 2);
975
976        // Whitespace-normalized hit
977        let _ = engine.execute("MATCH  (n:Person)  RETURN  n", &store);
978        assert_eq!(engine.cache_stats().hits(), 2);
979        assert_eq!(engine.cache_stats().misses(), 2);
980    }
981
982    #[test]
983    fn test_lru_eviction() {
984        let store = GraphStore::new();
985        let engine = QueryEngine::with_capacity(2);
986
987        // Fill cache to capacity
988        let _ = engine.execute("MATCH (a:Person) RETURN a", &store);
989        let _ = engine.execute("MATCH (b:Movie) RETURN b", &store);
990        assert_eq!(engine.cache_len(), 2);
991
992        // Third distinct query should evict the LRU entry
993        let _ = engine.execute("MATCH (c:Company) RETURN c", &store);
994        assert_eq!(engine.cache_len(), 2); // Still 2, not 3
995
996        // The first query should have been evicted (was LRU)
997        let _ = engine.execute("MATCH (a:Person) RETURN a", &store);
998        // If evicted: miss count goes up; if still cached: hit count goes up
999        // We had 3 misses so far, this should be a 4th miss
1000        assert_eq!(engine.cache_stats().misses(), 4);
1001    }
1002
1003    // ==================== MULTI-STATEMENT TESTS ====================
1004
1005    #[test]
1006    fn test_split_statements_basic() {
1007        let stmts = QueryEngine::split_statements("CREATE (a:Person); CREATE (b:Person)");
1008        assert_eq!(stmts, vec!["CREATE (a:Person)", "CREATE (b:Person)"]);
1009    }
1010
1011    #[test]
1012    fn test_split_statements_with_whitespace() {
1013        let stmts = QueryEngine::split_statements("  CREATE (a:Person) ;  CREATE (b:Person) ;  ");
1014        assert_eq!(stmts, vec!["CREATE (a:Person)", "CREATE (b:Person)"]);
1015    }
1016
1017    #[test]
1018    fn test_split_statements_no_semicolon() {
1019        let stmts = QueryEngine::split_statements("MATCH (n) RETURN n");
1020        assert_eq!(stmts, vec!["MATCH (n) RETURN n"]);
1021    }
1022
1023    #[test]
1024    fn test_split_statements_respects_single_quotes() {
1025        let stmts = QueryEngine::split_statements("CREATE (n:P {x: 'a;b'}); MATCH (n) RETURN n");
1026        assert_eq!(stmts, vec!["CREATE (n:P {x: 'a;b'})", "MATCH (n) RETURN n"]);
1027    }
1028
1029    #[test]
1030    fn test_split_statements_respects_double_quotes() {
1031        let stmts = QueryEngine::split_statements(r#"CREATE (n:P {x: "a;b"}); MATCH (n) RETURN n"#);
1032        assert_eq!(
1033            stmts,
1034            vec![r#"CREATE (n:P {x: "a;b"})"#, "MATCH (n) RETURN n"]
1035        );
1036    }
1037
1038    #[test]
1039    fn test_multi_create_with_semicolons() {
1040        let mut store = GraphStore::new();
1041        let engine = QueryEngine::new();
1042
1043        engine
1044            .execute_mut(
1045                "CREATE (a:Person {name: 'Alice'}); CREATE (b:Person {name: 'Bob'})",
1046                &mut store,
1047                "default",
1048            )
1049            .unwrap();
1050
1051        let result = engine
1052            .execute("MATCH (n:Person) RETURN count(n)", &store)
1053            .unwrap();
1054        let count = result.records[0]
1055            .get("count(n)")
1056            .unwrap()
1057            .as_property()
1058            .unwrap()
1059            .as_integer()
1060            .unwrap();
1061        assert_eq!(count, 2);
1062    }
1063
1064    #[test]
1065    fn test_multi_statement_create_then_match_relationship() {
1066        let mut store = GraphStore::new();
1067        let engine = QueryEngine::new();
1068
1069        // Create nodes and relationship in one multi-statement call
1070        engine
1071            .execute_mut(
1072                "CREATE (a:Person {name: 'Alice'}); \
1073             CREATE (b:Person {name: 'Bob'}); \
1074             MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) CREATE (a)-[:KNOWS]->(b)",
1075                &mut store,
1076                "default",
1077            )
1078            .unwrap();
1079
1080        // Verify
1081        let result = engine
1082            .execute(
1083                "MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a.name, b.name",
1084                &store,
1085            )
1086            .unwrap();
1087        assert_eq!(result.len(), 1);
1088    }
1089
1090    #[test]
1091    fn test_multi_statement_trailing_semicolon() {
1092        let mut store = GraphStore::new();
1093        let engine = QueryEngine::new();
1094
1095        // Trailing semicolons should be fine
1096        engine
1097            .execute_mut("CREATE (n:Test {val: 1});", &mut store, "default")
1098            .unwrap();
1099
1100        let result = engine
1101            .execute("MATCH (n:Test) RETURN count(n)", &store)
1102            .unwrap();
1103        let count = result.records[0]
1104            .get("count(n)")
1105            .unwrap()
1106            .as_property()
1107            .unwrap()
1108            .as_integer()
1109            .unwrap();
1110        assert_eq!(count, 1);
1111    }
1112
1113    #[test]
1114    fn test_multi_create_no_semicolons_shared_variables() {
1115        // This is the key user request: multi-line CREATE with variable sharing
1116        let mut store = GraphStore::new();
1117        let engine = QueryEngine::new();
1118
1119        engine
1120            .execute_mut(
1121                "CREATE (a:Person {name: 'Alice', age: 30})
1122                 CREATE (b:Person {name: 'Bob', age: 25})
1123                 CREATE (a)-[:KNOWS {since: 2020}]->(b)",
1124                &mut store,
1125                "default",
1126            )
1127            .unwrap();
1128
1129        // Verify nodes
1130        let result = engine
1131            .execute("MATCH (n:Person) RETURN count(n)", &store)
1132            .unwrap();
1133        let count = result.records[0]
1134            .get("count(n)")
1135            .unwrap()
1136            .as_property()
1137            .unwrap()
1138            .as_integer()
1139            .unwrap();
1140        assert_eq!(count, 2, "Should have 2 Person nodes");
1141
1142        // Verify relationship
1143        let result = engine
1144            .execute(
1145                "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN a.name, b.name, r.since",
1146                &store,
1147            )
1148            .unwrap();
1149        assert_eq!(result.len(), 1, "Should have 1 KNOWS relationship");
1150    }
1151
1152    #[test]
1153    fn test_multi_create_with_match_no_semicolons() {
1154        let mut store = GraphStore::new();
1155        let engine = QueryEngine::new();
1156
1157        // Create then query in one go (use semicolons to separate CREATE+MATCH)
1158        engine
1159            .execute_mut(
1160                "CREATE (c:City {name: 'Paris'});
1161                 CREATE (p:Person {name: 'Alice'});
1162                 MATCH (p:Person {name: 'Alice'}), (c:City {name: 'Paris'})
1163                 CREATE (p)-[:LIVES_IN]->(c)",
1164                &mut store,
1165                "default",
1166            )
1167            .unwrap();
1168
1169        let result = engine
1170            .execute(
1171                "MATCH (p:Person)-[:LIVES_IN]->(c:City) RETURN p.name, c.name",
1172                &store,
1173            )
1174            .unwrap();
1175        assert_eq!(result.len(), 1);
1176    }
1177
1178    #[test]
1179    fn test_match_then_create_then_create_relationship() {
1180        // MATCH existing node, CREATE new node, CREATE relationship
1181        // Uses semicolons to separate MATCH+CREATE from the relationship CREATE
1182        let mut store = GraphStore::new();
1183        let engine = QueryEngine::new();
1184
1185        // Use semicolons: CREATE both nodes, then MATCH both to create relationship
1186        engine
1187            .execute_mut(
1188                "CREATE (f:Person {name: 'Fabisch Kamau'}); \
1189                 CREATE (g:Person {name: 'Gloria Muthoni'}); \
1190                 MATCH (f:Person {name: 'Fabisch Kamau'}), (g:Person {name: 'Gloria Muthoni'}) \
1191                 CREATE (f)-[:LOVES]->(g)",
1192                &mut store,
1193                "default",
1194            )
1195            .unwrap();
1196
1197        // Verify
1198        let result = engine
1199            .execute(
1200                "MATCH (a:Person)-[:LOVES]->(b:Person) RETURN a.name, b.name",
1201                &store,
1202            )
1203            .unwrap();
1204        assert_eq!(result.len(), 1);
1205    }
1206
1207    #[test]
1208    fn test_multi_merge_script_with_set_and_relationships() {
1209        let engine = QueryEngine::new();
1210        let mut store = GraphStore::new();
1211
1212        let script = r#"
1213// Projects
1214MERGE (p1:Project {name: "GraphMind"})
1215MERGE (p2:Project {name: "AI Assistant"})
1216// Skills
1217MERGE (s1:Skill {name: "Semantic Search"})
1218SET s1.description = "Search using embeddings", s1.confidence = 0.9
1219MERGE (s2:Skill {name: "Graph Traversal"})
1220SET s2.description = "Traverse relationships"
1221// Relationships
1222MERGE (s1)-[:BELONGS_TO_PROJECT]->(p1)
1223MERGE (s2)-[:BELONGS_TO_PROJECT]->(p1)
1224"#;
1225
1226        let result = engine.execute_mut(script, &mut store, "default");
1227        assert!(
1228            result.is_ok(),
1229            "Multi-MERGE script should execute: {:?}",
1230            result.err()
1231        );
1232
1233        // Verify nodes created
1234        assert!(
1235            store.node_count() >= 4,
1236            "Should have at least 4 nodes, got {}",
1237            store.node_count()
1238        );
1239
1240        // Verify edges created
1241        assert!(
1242            store.edge_count() >= 2,
1243            "Should have at least 2 edges, got {}",
1244            store.edge_count()
1245        );
1246
1247        // Verify SET worked
1248        let skills = store.get_nodes_by_label(&crate::graph::Label::new("Skill"));
1249        let s1 = skills.iter().find(|n| {
1250            n.get_property("name")
1251                == Some(&crate::graph::PropertyValue::String(
1252                    "Semantic Search".into(),
1253                ))
1254        });
1255        assert!(s1.is_some(), "Should find Semantic Search skill");
1256        assert_eq!(
1257            s1.unwrap().get_property("confidence"),
1258            Some(&crate::graph::PropertyValue::Float(0.9)),
1259            "SET should have applied confidence"
1260        );
1261    }
1262}