Skip to main content

grafeo_engine/
database.rs

1//! The main database struct and operations.
2//!
3//! Start here with [`GrafeoDB`] - it's your handle to everything.
4
5use std::path::Path;
6use std::sync::Arc;
7
8use parking_lot::RwLock;
9
10use grafeo_adapters::storage::wal::{WalConfig, WalManager, WalRecord, WalRecovery};
11use grafeo_common::memory::buffer::{BufferManager, BufferManagerConfig};
12use grafeo_common::utils::error::Result;
13use grafeo_core::graph::lpg::LpgStore;
14#[cfg(feature = "rdf")]
15use grafeo_core::graph::rdf::RdfStore;
16
17use crate::config::Config;
18use crate::query::cache::QueryCache;
19use crate::session::Session;
20use crate::transaction::TransactionManager;
21
22/// Your handle to a Grafeo database.
23///
24/// Start here. Create one with [`new_in_memory()`](Self::new_in_memory) for
25/// quick experiments, or [`open()`](Self::open) for persistent storage.
26/// Then grab a [`session()`](Self::session) to start querying.
27///
28/// # Examples
29///
30/// ```
31/// use grafeo_engine::GrafeoDB;
32///
33/// // Quick in-memory database
34/// let db = GrafeoDB::new_in_memory();
35///
36/// // Add some data
37/// db.create_node(&["Person"]);
38///
39/// // Query it
40/// let session = db.session();
41/// let result = session.execute("MATCH (p:Person) RETURN p")?;
42/// # Ok::<(), grafeo_common::utils::error::Error>(())
43/// ```
44pub struct GrafeoDB {
45    /// Database configuration.
46    config: Config,
47    /// The underlying graph store.
48    store: Arc<LpgStore>,
49    /// RDF triple store (if RDF feature is enabled).
50    #[cfg(feature = "rdf")]
51    rdf_store: Arc<RdfStore>,
52    /// Transaction manager.
53    tx_manager: Arc<TransactionManager>,
54    /// Unified buffer manager.
55    buffer_manager: Arc<BufferManager>,
56    /// Write-ahead log manager (if durability is enabled).
57    wal: Option<Arc<WalManager>>,
58    /// Query cache for parsed and optimized plans.
59    query_cache: Arc<QueryCache>,
60    /// Whether the database is open.
61    is_open: RwLock<bool>,
62}
63
64impl GrafeoDB {
65    /// Creates an in-memory database - fast to create, gone when dropped.
66    ///
67    /// Use this for tests, experiments, or when you don't need persistence.
68    /// For data that survives restarts, use [`open()`](Self::open) instead.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use grafeo_engine::GrafeoDB;
74    ///
75    /// let db = GrafeoDB::new_in_memory();
76    /// let session = db.session();
77    /// session.execute("INSERT (:Person {name: 'Alice'})")?;
78    /// # Ok::<(), grafeo_common::utils::error::Error>(())
79    /// ```
80    #[must_use]
81    pub fn new_in_memory() -> Self {
82        Self::with_config(Config::in_memory()).expect("In-memory database creation should not fail")
83    }
84
85    /// Opens a database at the given path, creating it if it doesn't exist.
86    ///
87    /// If you've used this path before, Grafeo recovers your data from the
88    /// write-ahead log automatically. First open on a new path creates an
89    /// empty database.
90    ///
91    /// # Errors
92    ///
93    /// Returns an error if the path isn't writable or recovery fails.
94    ///
95    /// # Examples
96    ///
97    /// ```no_run
98    /// use grafeo_engine::GrafeoDB;
99    ///
100    /// let db = GrafeoDB::open("./my_social_network")?;
101    /// # Ok::<(), grafeo_common::utils::error::Error>(())
102    /// ```
103    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
104        Self::with_config(Config::persistent(path.as_ref()))
105    }
106
107    /// Creates a database with custom configuration.
108    ///
109    /// Use this when you need fine-grained control over memory limits,
110    /// thread counts, or persistence settings. For most cases,
111    /// [`new_in_memory()`](Self::new_in_memory) or [`open()`](Self::open)
112    /// are simpler.
113    ///
114    /// # Errors
115    ///
116    /// Returns an error if the database can't be created or recovery fails.
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use grafeo_engine::{GrafeoDB, Config};
122    ///
123    /// // In-memory with a 512MB limit
124    /// let config = Config::in_memory()
125    ///     .with_memory_limit(512 * 1024 * 1024);
126    ///
127    /// let db = GrafeoDB::with_config(config)?;
128    /// # Ok::<(), grafeo_common::utils::error::Error>(())
129    /// ```
130    pub fn with_config(config: Config) -> Result<Self> {
131        let store = Arc::new(LpgStore::new());
132        #[cfg(feature = "rdf")]
133        let rdf_store = Arc::new(RdfStore::new());
134        let tx_manager = Arc::new(TransactionManager::new());
135
136        // Create buffer manager with configured limits
137        let buffer_config = BufferManagerConfig {
138            budget: config.memory_limit.unwrap_or_else(|| {
139                (BufferManagerConfig::detect_system_memory() as f64 * 0.75) as usize
140            }),
141            spill_path: config
142                .spill_path
143                .clone()
144                .or_else(|| config.path.as_ref().map(|p| p.join("spill"))),
145            ..BufferManagerConfig::default()
146        };
147        let buffer_manager = BufferManager::new(buffer_config);
148
149        // Initialize WAL if persistence is enabled
150        let wal = if config.wal_enabled {
151            if let Some(ref db_path) = config.path {
152                // Create database directory if it doesn't exist
153                std::fs::create_dir_all(db_path)?;
154
155                let wal_path = db_path.join("wal");
156
157                // Check if WAL exists and recover if needed
158                if wal_path.exists() {
159                    let recovery = WalRecovery::new(&wal_path);
160                    let records = recovery.recover()?;
161                    Self::apply_wal_records(&store, &records)?;
162                }
163
164                // Open/create WAL manager
165                let wal_config = WalConfig::default();
166                let wal_manager = WalManager::with_config(&wal_path, wal_config)?;
167                Some(Arc::new(wal_manager))
168            } else {
169                None
170            }
171        } else {
172            None
173        };
174
175        // Create query cache with default capacity (1000 queries)
176        let query_cache = Arc::new(QueryCache::default());
177
178        Ok(Self {
179            config,
180            store,
181            #[cfg(feature = "rdf")]
182            rdf_store,
183            tx_manager,
184            buffer_manager,
185            wal,
186            query_cache,
187            is_open: RwLock::new(true),
188        })
189    }
190
191    /// Applies WAL records to restore the database state.
192    fn apply_wal_records(store: &LpgStore, records: &[WalRecord]) -> Result<()> {
193        for record in records {
194            match record {
195                WalRecord::CreateNode { id, labels } => {
196                    let label_refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
197                    store.create_node_with_id(*id, &label_refs);
198                }
199                WalRecord::DeleteNode { id } => {
200                    store.delete_node(*id);
201                }
202                WalRecord::CreateEdge {
203                    id,
204                    src,
205                    dst,
206                    edge_type,
207                } => {
208                    store.create_edge_with_id(*id, *src, *dst, edge_type);
209                }
210                WalRecord::DeleteEdge { id } => {
211                    store.delete_edge(*id);
212                }
213                WalRecord::SetNodeProperty { id, key, value } => {
214                    store.set_node_property(*id, key, value.clone());
215                }
216                WalRecord::SetEdgeProperty { id, key, value } => {
217                    store.set_edge_property(*id, key, value.clone());
218                }
219                WalRecord::AddNodeLabel { id, label } => {
220                    store.add_label(*id, label);
221                }
222                WalRecord::RemoveNodeLabel { id, label } => {
223                    store.remove_label(*id, label);
224                }
225                WalRecord::TxCommit { .. }
226                | WalRecord::TxAbort { .. }
227                | WalRecord::Checkpoint { .. } => {
228                    // Transaction control records don't need replay action
229                    // (recovery already filtered to only committed transactions)
230                }
231            }
232        }
233        Ok(())
234    }
235
236    /// Opens a new session for running queries.
237    ///
238    /// Sessions are cheap to create - spin up as many as you need. Each
239    /// gets its own transaction context, so concurrent sessions won't
240    /// block each other on reads.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use grafeo_engine::GrafeoDB;
246    ///
247    /// let db = GrafeoDB::new_in_memory();
248    /// let session = db.session();
249    ///
250    /// // Run queries through the session
251    /// let result = session.execute("MATCH (n) RETURN count(n)")?;
252    /// # Ok::<(), grafeo_common::utils::error::Error>(())
253    /// ```
254    #[must_use]
255    pub fn session(&self) -> Session {
256        #[cfg(feature = "rdf")]
257        {
258            Session::with_rdf_store_and_adaptive(
259                Arc::clone(&self.store),
260                Arc::clone(&self.rdf_store),
261                Arc::clone(&self.tx_manager),
262                Arc::clone(&self.query_cache),
263                self.config.adaptive.clone(),
264                self.config.factorized_execution,
265            )
266        }
267        #[cfg(not(feature = "rdf"))]
268        {
269            Session::with_adaptive(
270                Arc::clone(&self.store),
271                Arc::clone(&self.tx_manager),
272                Arc::clone(&self.query_cache),
273                self.config.adaptive.clone(),
274                self.config.factorized_execution,
275            )
276        }
277    }
278
279    /// Returns the adaptive execution configuration.
280    #[must_use]
281    pub fn adaptive_config(&self) -> &crate::config::AdaptiveConfig {
282        &self.config.adaptive
283    }
284
285    /// Runs a query directly on the database.
286    ///
287    /// A convenience method that creates a temporary session behind the
288    /// scenes. If you're running multiple queries, grab a
289    /// [`session()`](Self::session) instead to avoid the overhead.
290    ///
291    /// # Errors
292    ///
293    /// Returns an error if parsing or execution fails.
294    pub fn execute(&self, query: &str) -> Result<QueryResult> {
295        let session = self.session();
296        session.execute(query)
297    }
298
299    /// Executes a query with parameters and returns the result.
300    ///
301    /// # Errors
302    ///
303    /// Returns an error if the query fails.
304    pub fn execute_with_params(
305        &self,
306        query: &str,
307        params: std::collections::HashMap<String, grafeo_common::types::Value>,
308    ) -> Result<QueryResult> {
309        let session = self.session();
310        session.execute_with_params(query, params)
311    }
312
313    /// Executes a Cypher query and returns the result.
314    ///
315    /// # Errors
316    ///
317    /// Returns an error if the query fails.
318    #[cfg(feature = "cypher")]
319    pub fn execute_cypher(&self, query: &str) -> Result<QueryResult> {
320        let session = self.session();
321        session.execute_cypher(query)
322    }
323
324    /// Executes a Cypher query with parameters and returns the result.
325    ///
326    /// # Errors
327    ///
328    /// Returns an error if the query fails.
329    #[cfg(feature = "cypher")]
330    pub fn execute_cypher_with_params(
331        &self,
332        query: &str,
333        params: std::collections::HashMap<String, grafeo_common::types::Value>,
334    ) -> Result<QueryResult> {
335        use crate::query::processor::{QueryLanguage, QueryProcessor};
336
337        // Create processor
338        let processor = QueryProcessor::for_lpg(Arc::clone(&self.store));
339        processor.process(query, QueryLanguage::Cypher, Some(&params))
340    }
341
342    /// Executes a Gremlin query and returns the result.
343    ///
344    /// # Errors
345    ///
346    /// Returns an error if the query fails.
347    #[cfg(feature = "gremlin")]
348    pub fn execute_gremlin(&self, query: &str) -> Result<QueryResult> {
349        let session = self.session();
350        session.execute_gremlin(query)
351    }
352
353    /// Executes a Gremlin query with parameters and returns the result.
354    ///
355    /// # Errors
356    ///
357    /// Returns an error if the query fails.
358    #[cfg(feature = "gremlin")]
359    pub fn execute_gremlin_with_params(
360        &self,
361        query: &str,
362        params: std::collections::HashMap<String, grafeo_common::types::Value>,
363    ) -> Result<QueryResult> {
364        let session = self.session();
365        session.execute_gremlin_with_params(query, params)
366    }
367
368    /// Executes a GraphQL query and returns the result.
369    ///
370    /// # Errors
371    ///
372    /// Returns an error if the query fails.
373    #[cfg(feature = "graphql")]
374    pub fn execute_graphql(&self, query: &str) -> Result<QueryResult> {
375        let session = self.session();
376        session.execute_graphql(query)
377    }
378
379    /// Executes a GraphQL query with parameters and returns the result.
380    ///
381    /// # Errors
382    ///
383    /// Returns an error if the query fails.
384    #[cfg(feature = "graphql")]
385    pub fn execute_graphql_with_params(
386        &self,
387        query: &str,
388        params: std::collections::HashMap<String, grafeo_common::types::Value>,
389    ) -> Result<QueryResult> {
390        let session = self.session();
391        session.execute_graphql_with_params(query, params)
392    }
393
394    /// Executes a SPARQL query and returns the result.
395    ///
396    /// SPARQL queries operate on the RDF triple store.
397    ///
398    /// # Errors
399    ///
400    /// Returns an error if the query fails.
401    ///
402    /// # Examples
403    ///
404    /// ```ignore
405    /// use grafeo_engine::GrafeoDB;
406    ///
407    /// let db = GrafeoDB::new_in_memory();
408    /// let result = db.execute_sparql("SELECT ?s ?p ?o WHERE { ?s ?p ?o }")?;
409    /// ```
410    #[cfg(all(feature = "sparql", feature = "rdf"))]
411    pub fn execute_sparql(&self, query: &str) -> Result<QueryResult> {
412        use crate::query::{
413            Executor, optimizer::Optimizer, planner_rdf::RdfPlanner, sparql_translator,
414        };
415
416        // Parse and translate the SPARQL query to a logical plan
417        let logical_plan = sparql_translator::translate(query)?;
418
419        // Optimize the plan
420        let optimizer = Optimizer::from_store(&self.store);
421        let optimized_plan = optimizer.optimize(logical_plan)?;
422
423        // Convert to physical plan using RDF planner
424        let planner = RdfPlanner::new(Arc::clone(&self.rdf_store));
425        let mut physical_plan = planner.plan(&optimized_plan)?;
426
427        // Execute the plan
428        let executor = Executor::with_columns(physical_plan.columns.clone());
429        executor.execute(physical_plan.operator.as_mut())
430    }
431
432    /// Returns the RDF store.
433    ///
434    /// This provides direct access to the RDF store for triple operations.
435    #[cfg(feature = "rdf")]
436    #[must_use]
437    pub fn rdf_store(&self) -> &Arc<RdfStore> {
438        &self.rdf_store
439    }
440
441    /// Executes a query and returns a single scalar value.
442    ///
443    /// # Errors
444    ///
445    /// Returns an error if the query fails or doesn't return exactly one row.
446    pub fn query_scalar<T: FromValue>(&self, query: &str) -> Result<T> {
447        let result = self.execute(query)?;
448        result.scalar()
449    }
450
451    /// Returns the configuration.
452    #[must_use]
453    pub fn config(&self) -> &Config {
454        &self.config
455    }
456
457    /// Returns the underlying store.
458    ///
459    /// This provides direct access to the LPG store for algorithm implementations.
460    #[must_use]
461    pub fn store(&self) -> &Arc<LpgStore> {
462        &self.store
463    }
464
465    /// Returns the buffer manager for memory-aware operations.
466    #[must_use]
467    pub fn buffer_manager(&self) -> &Arc<BufferManager> {
468        &self.buffer_manager
469    }
470
471    /// Closes the database, flushing all pending writes.
472    ///
473    /// For persistent databases, this ensures everything is safely on disk.
474    /// Called automatically when the database is dropped, but you can call
475    /// it explicitly if you need to guarantee durability at a specific point.
476    ///
477    /// # Errors
478    ///
479    /// Returns an error if the WAL can't be flushed (check disk space/permissions).
480    pub fn close(&self) -> Result<()> {
481        let mut is_open = self.is_open.write();
482        if !*is_open {
483            return Ok(());
484        }
485
486        // Commit and checkpoint WAL
487        if let Some(ref wal) = self.wal {
488            let epoch = self.store.current_epoch();
489
490            // Use the last assigned transaction ID, or create a checkpoint-only tx
491            let checkpoint_tx = self.tx_manager.last_assigned_tx_id().unwrap_or_else(|| {
492                // No transactions have been started; begin one for checkpoint
493                self.tx_manager.begin()
494            });
495
496            // Log a TxCommit to mark all pending records as committed
497            wal.log(&WalRecord::TxCommit {
498                tx_id: checkpoint_tx,
499            })?;
500
501            // Then checkpoint
502            wal.checkpoint(checkpoint_tx, epoch)?;
503            wal.sync()?;
504        }
505
506        *is_open = false;
507        Ok(())
508    }
509
510    /// Returns the WAL manager if available.
511    #[must_use]
512    pub fn wal(&self) -> Option<&Arc<WalManager>> {
513        self.wal.as_ref()
514    }
515
516    /// Logs a WAL record if WAL is enabled.
517    fn log_wal(&self, record: &WalRecord) -> Result<()> {
518        if let Some(ref wal) = self.wal {
519            wal.log(record)?;
520        }
521        Ok(())
522    }
523
524    /// Returns the number of nodes in the database.
525    #[must_use]
526    pub fn node_count(&self) -> usize {
527        self.store.node_count()
528    }
529
530    /// Returns the number of edges in the database.
531    #[must_use]
532    pub fn edge_count(&self) -> usize {
533        self.store.edge_count()
534    }
535
536    /// Returns the number of distinct labels in the database.
537    #[must_use]
538    pub fn label_count(&self) -> usize {
539        self.store.label_count()
540    }
541
542    /// Returns the number of distinct property keys in the database.
543    #[must_use]
544    pub fn property_key_count(&self) -> usize {
545        self.store.property_key_count()
546    }
547
548    /// Returns the number of distinct edge types in the database.
549    #[must_use]
550    pub fn edge_type_count(&self) -> usize {
551        self.store.edge_type_count()
552    }
553
554    // === Node Operations ===
555
556    /// Creates a node with the given labels and returns its ID.
557    ///
558    /// Labels categorize nodes - think of them like tags. A node can have
559    /// multiple labels (e.g., `["Person", "Employee"]`).
560    ///
561    /// # Examples
562    ///
563    /// ```
564    /// use grafeo_engine::GrafeoDB;
565    ///
566    /// let db = GrafeoDB::new_in_memory();
567    /// let alice = db.create_node(&["Person"]);
568    /// let company = db.create_node(&["Company", "Startup"]);
569    /// ```
570    pub fn create_node(&self, labels: &[&str]) -> grafeo_common::types::NodeId {
571        let id = self.store.create_node(labels);
572
573        // Log to WAL if enabled
574        if let Err(e) = self.log_wal(&WalRecord::CreateNode {
575            id,
576            labels: labels.iter().map(|s| s.to_string()).collect(),
577        }) {
578            tracing::warn!("Failed to log CreateNode to WAL: {}", e);
579        }
580
581        id
582    }
583
584    /// Creates a new node with labels and properties.
585    ///
586    /// If WAL is enabled, the operation is logged for durability.
587    pub fn create_node_with_props(
588        &self,
589        labels: &[&str],
590        properties: impl IntoIterator<
591            Item = (
592                impl Into<grafeo_common::types::PropertyKey>,
593                impl Into<grafeo_common::types::Value>,
594            ),
595        >,
596    ) -> grafeo_common::types::NodeId {
597        // Collect properties first so we can log them to WAL
598        let props: Vec<(
599            grafeo_common::types::PropertyKey,
600            grafeo_common::types::Value,
601        )> = properties
602            .into_iter()
603            .map(|(k, v)| (k.into(), v.into()))
604            .collect();
605
606        let id = self
607            .store
608            .create_node_with_props(labels, props.iter().map(|(k, v)| (k.clone(), v.clone())));
609
610        // Log node creation to WAL
611        if let Err(e) = self.log_wal(&WalRecord::CreateNode {
612            id,
613            labels: labels.iter().map(|s| s.to_string()).collect(),
614        }) {
615            tracing::warn!("Failed to log CreateNode to WAL: {}", e);
616        }
617
618        // Log each property to WAL for full durability
619        for (key, value) in props {
620            if let Err(e) = self.log_wal(&WalRecord::SetNodeProperty {
621                id,
622                key: key.to_string(),
623                value,
624            }) {
625                tracing::warn!("Failed to log SetNodeProperty to WAL: {}", e);
626            }
627        }
628
629        id
630    }
631
632    /// Gets a node by ID.
633    #[must_use]
634    pub fn get_node(
635        &self,
636        id: grafeo_common::types::NodeId,
637    ) -> Option<grafeo_core::graph::lpg::Node> {
638        self.store.get_node(id)
639    }
640
641    /// Deletes a node and all its edges.
642    ///
643    /// If WAL is enabled, the operation is logged for durability.
644    pub fn delete_node(&self, id: grafeo_common::types::NodeId) -> bool {
645        let result = self.store.delete_node(id);
646
647        if result {
648            if let Err(e) = self.log_wal(&WalRecord::DeleteNode { id }) {
649                tracing::warn!("Failed to log DeleteNode to WAL: {}", e);
650            }
651        }
652
653        result
654    }
655
656    /// Sets a property on a node.
657    ///
658    /// If WAL is enabled, the operation is logged for durability.
659    pub fn set_node_property(
660        &self,
661        id: grafeo_common::types::NodeId,
662        key: &str,
663        value: grafeo_common::types::Value,
664    ) {
665        // Log to WAL first
666        if let Err(e) = self.log_wal(&WalRecord::SetNodeProperty {
667            id,
668            key: key.to_string(),
669            value: value.clone(),
670        }) {
671            tracing::warn!("Failed to log SetNodeProperty to WAL: {}", e);
672        }
673
674        self.store.set_node_property(id, key, value);
675    }
676
677    /// Adds a label to an existing node.
678    ///
679    /// Returns `true` if the label was added, `false` if the node doesn't exist
680    /// or already has the label.
681    ///
682    /// # Examples
683    ///
684    /// ```
685    /// use grafeo_engine::GrafeoDB;
686    ///
687    /// let db = GrafeoDB::new_in_memory();
688    /// let alice = db.create_node(&["Person"]);
689    ///
690    /// // Promote Alice to Employee
691    /// let added = db.add_node_label(alice, "Employee");
692    /// assert!(added);
693    /// ```
694    pub fn add_node_label(&self, id: grafeo_common::types::NodeId, label: &str) -> bool {
695        let result = self.store.add_label(id, label);
696
697        if result {
698            // Log to WAL if enabled
699            if let Err(e) = self.log_wal(&WalRecord::AddNodeLabel {
700                id,
701                label: label.to_string(),
702            }) {
703                tracing::warn!("Failed to log AddNodeLabel to WAL: {}", e);
704            }
705        }
706
707        result
708    }
709
710    /// Removes a label from a node.
711    ///
712    /// Returns `true` if the label was removed, `false` if the node doesn't exist
713    /// or doesn't have the label.
714    ///
715    /// # Examples
716    ///
717    /// ```
718    /// use grafeo_engine::GrafeoDB;
719    ///
720    /// let db = GrafeoDB::new_in_memory();
721    /// let alice = db.create_node(&["Person", "Employee"]);
722    ///
723    /// // Remove Employee status
724    /// let removed = db.remove_node_label(alice, "Employee");
725    /// assert!(removed);
726    /// ```
727    pub fn remove_node_label(&self, id: grafeo_common::types::NodeId, label: &str) -> bool {
728        let result = self.store.remove_label(id, label);
729
730        if result {
731            // Log to WAL if enabled
732            if let Err(e) = self.log_wal(&WalRecord::RemoveNodeLabel {
733                id,
734                label: label.to_string(),
735            }) {
736                tracing::warn!("Failed to log RemoveNodeLabel to WAL: {}", e);
737            }
738        }
739
740        result
741    }
742
743    /// Gets all labels for a node.
744    ///
745    /// Returns `None` if the node doesn't exist.
746    ///
747    /// # Examples
748    ///
749    /// ```
750    /// use grafeo_engine::GrafeoDB;
751    ///
752    /// let db = GrafeoDB::new_in_memory();
753    /// let alice = db.create_node(&["Person", "Employee"]);
754    ///
755    /// let labels = db.get_node_labels(alice).unwrap();
756    /// assert!(labels.contains(&"Person".to_string()));
757    /// assert!(labels.contains(&"Employee".to_string()));
758    /// ```
759    #[must_use]
760    pub fn get_node_labels(&self, id: grafeo_common::types::NodeId) -> Option<Vec<String>> {
761        self.store
762            .get_node(id)
763            .map(|node| node.labels.iter().map(|s| s.to_string()).collect())
764    }
765
766    // === Edge Operations ===
767
768    /// Creates an edge (relationship) between two nodes.
769    ///
770    /// Edges connect nodes and have a type that describes the relationship.
771    /// They're directed - the order of `src` and `dst` matters.
772    ///
773    /// # Examples
774    ///
775    /// ```
776    /// use grafeo_engine::GrafeoDB;
777    ///
778    /// let db = GrafeoDB::new_in_memory();
779    /// let alice = db.create_node(&["Person"]);
780    /// let bob = db.create_node(&["Person"]);
781    ///
782    /// // Alice knows Bob (directed: Alice -> Bob)
783    /// let edge = db.create_edge(alice, bob, "KNOWS");
784    /// ```
785    pub fn create_edge(
786        &self,
787        src: grafeo_common::types::NodeId,
788        dst: grafeo_common::types::NodeId,
789        edge_type: &str,
790    ) -> grafeo_common::types::EdgeId {
791        let id = self.store.create_edge(src, dst, edge_type);
792
793        // Log to WAL if enabled
794        if let Err(e) = self.log_wal(&WalRecord::CreateEdge {
795            id,
796            src,
797            dst,
798            edge_type: edge_type.to_string(),
799        }) {
800            tracing::warn!("Failed to log CreateEdge to WAL: {}", e);
801        }
802
803        id
804    }
805
806    /// Creates a new edge with properties.
807    ///
808    /// If WAL is enabled, the operation is logged for durability.
809    pub fn create_edge_with_props(
810        &self,
811        src: grafeo_common::types::NodeId,
812        dst: grafeo_common::types::NodeId,
813        edge_type: &str,
814        properties: impl IntoIterator<
815            Item = (
816                impl Into<grafeo_common::types::PropertyKey>,
817                impl Into<grafeo_common::types::Value>,
818            ),
819        >,
820    ) -> grafeo_common::types::EdgeId {
821        // Collect properties first so we can log them to WAL
822        let props: Vec<(
823            grafeo_common::types::PropertyKey,
824            grafeo_common::types::Value,
825        )> = properties
826            .into_iter()
827            .map(|(k, v)| (k.into(), v.into()))
828            .collect();
829
830        let id = self.store.create_edge_with_props(
831            src,
832            dst,
833            edge_type,
834            props.iter().map(|(k, v)| (k.clone(), v.clone())),
835        );
836
837        // Log edge creation to WAL
838        if let Err(e) = self.log_wal(&WalRecord::CreateEdge {
839            id,
840            src,
841            dst,
842            edge_type: edge_type.to_string(),
843        }) {
844            tracing::warn!("Failed to log CreateEdge to WAL: {}", e);
845        }
846
847        // Log each property to WAL for full durability
848        for (key, value) in props {
849            if let Err(e) = self.log_wal(&WalRecord::SetEdgeProperty {
850                id,
851                key: key.to_string(),
852                value,
853            }) {
854                tracing::warn!("Failed to log SetEdgeProperty to WAL: {}", e);
855            }
856        }
857
858        id
859    }
860
861    /// Gets an edge by ID.
862    #[must_use]
863    pub fn get_edge(
864        &self,
865        id: grafeo_common::types::EdgeId,
866    ) -> Option<grafeo_core::graph::lpg::Edge> {
867        self.store.get_edge(id)
868    }
869
870    /// Deletes an edge.
871    ///
872    /// If WAL is enabled, the operation is logged for durability.
873    pub fn delete_edge(&self, id: grafeo_common::types::EdgeId) -> bool {
874        let result = self.store.delete_edge(id);
875
876        if result {
877            if let Err(e) = self.log_wal(&WalRecord::DeleteEdge { id }) {
878                tracing::warn!("Failed to log DeleteEdge to WAL: {}", e);
879            }
880        }
881
882        result
883    }
884
885    /// Sets a property on an edge.
886    ///
887    /// If WAL is enabled, the operation is logged for durability.
888    pub fn set_edge_property(
889        &self,
890        id: grafeo_common::types::EdgeId,
891        key: &str,
892        value: grafeo_common::types::Value,
893    ) {
894        // Log to WAL first
895        if let Err(e) = self.log_wal(&WalRecord::SetEdgeProperty {
896            id,
897            key: key.to_string(),
898            value: value.clone(),
899        }) {
900            tracing::warn!("Failed to log SetEdgeProperty to WAL: {}", e);
901        }
902        self.store.set_edge_property(id, key, value);
903    }
904
905    /// Removes a property from a node.
906    ///
907    /// Returns true if the property existed and was removed, false otherwise.
908    pub fn remove_node_property(&self, id: grafeo_common::types::NodeId, key: &str) -> bool {
909        // Note: RemoveProperty WAL records not yet implemented, but operation works in memory
910        self.store.remove_node_property(id, key).is_some()
911    }
912
913    /// Removes a property from an edge.
914    ///
915    /// Returns true if the property existed and was removed, false otherwise.
916    pub fn remove_edge_property(&self, id: grafeo_common::types::EdgeId, key: &str) -> bool {
917        // Note: RemoveProperty WAL records not yet implemented, but operation works in memory
918        self.store.remove_edge_property(id, key).is_some()
919    }
920
921    // =========================================================================
922    // PROPERTY INDEX API
923    // =========================================================================
924
925    /// Creates an index on a node property for O(1) lookups by value.
926    ///
927    /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
928    /// O(1) instead of O(n) for this property. The index is automatically
929    /// maintained when properties are set or removed.
930    ///
931    /// # Example
932    ///
933    /// ```ignore
934    /// // Create an index on the 'email' property
935    /// db.create_property_index("email");
936    ///
937    /// // Now lookups by email are O(1)
938    /// let nodes = db.find_nodes_by_property("email", &Value::from("alice@example.com"));
939    /// ```
940    pub fn create_property_index(&self, property: &str) {
941        self.store.create_property_index(property);
942    }
943
944    /// Creates a vector similarity index on a node property.
945    ///
946    /// This enables efficient approximate nearest-neighbor search on vector
947    /// properties. Currently validates the index parameters and scans existing
948    /// nodes to verify the property contains vectors of the expected dimensions.
949    ///
950    /// # Arguments
951    ///
952    /// * `label` - Node label to index (e.g., `"Doc"`)
953    /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
954    /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
955    /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
956    /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
957    /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
958    ///
959    /// # Errors
960    ///
961    /// Returns an error if the metric is invalid, no vectors are found, or
962    /// dimensions don't match.
963    pub fn create_vector_index(
964        &self,
965        label: &str,
966        property: &str,
967        dimensions: Option<usize>,
968        metric: Option<&str>,
969        m: Option<usize>,
970        ef_construction: Option<usize>,
971    ) -> Result<()> {
972        use grafeo_common::types::{PropertyKey, Value};
973        use grafeo_core::index::vector::DistanceMetric;
974
975        let metric = match metric {
976            Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
977                grafeo_common::utils::error::Error::Internal(format!(
978                    "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
979                    m
980                ))
981            })?,
982            None => DistanceMetric::Cosine,
983        };
984
985        // Scan nodes to validate vectors exist and check dimensions
986        let prop_key = PropertyKey::new(property);
987        let mut found_dims: Option<usize> = dimensions;
988        let mut vector_count = 0usize;
989
990        #[cfg(feature = "vector-index")]
991        let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
992
993        for node in self.store.nodes_with_label(label) {
994            if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
995                if let Some(expected) = found_dims {
996                    if v.len() != expected {
997                        return Err(grafeo_common::utils::error::Error::Internal(format!(
998                            "Vector dimension mismatch: expected {}, found {} on node {}",
999                            expected,
1000                            v.len(),
1001                            node.id.0
1002                        )));
1003                    }
1004                } else {
1005                    found_dims = Some(v.len());
1006                }
1007                vector_count += 1;
1008                #[cfg(feature = "vector-index")]
1009                vectors.push((node.id, v.to_vec()));
1010            }
1011        }
1012
1013        if vector_count == 0 {
1014            return Err(grafeo_common::utils::error::Error::Internal(format!(
1015                "No vector properties found on :{label}({property})"
1016            )));
1017        }
1018
1019        let dims = found_dims.unwrap_or(0);
1020
1021        // Build and populate the HNSW index
1022        #[cfg(feature = "vector-index")]
1023        {
1024            use grafeo_core::index::vector::{HnswConfig, HnswIndex};
1025
1026            let mut config = HnswConfig::new(dims, metric);
1027            if let Some(m_val) = m {
1028                config = config.with_m(m_val);
1029            }
1030            if let Some(ef_c) = ef_construction {
1031                config = config.with_ef_construction(ef_c);
1032            }
1033
1034            let index = HnswIndex::with_capacity(config, vectors.len());
1035            for (node_id, vec) in &vectors {
1036                index.insert(*node_id, vec);
1037            }
1038
1039            self.store
1040                .add_vector_index(label, property, Arc::new(index));
1041        }
1042
1043        // Suppress unused variable warnings when vector-index is off
1044        let _ = (m, ef_construction);
1045
1046        tracing::info!(
1047            "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
1048            metric_name = metric.name()
1049        );
1050
1051        Ok(())
1052    }
1053
1054    /// Searches for the k nearest neighbors of a query vector.
1055    ///
1056    /// Uses the HNSW index created by [`create_vector_index`](Self::create_vector_index).
1057    ///
1058    /// # Arguments
1059    ///
1060    /// * `label` - Node label that was indexed
1061    /// * `property` - Property that was indexed
1062    /// * `query` - Query vector (slice of floats)
1063    /// * `k` - Number of nearest neighbors to return
1064    /// * `ef` - Search beam width (higher = better recall, slower). Uses index default if `None`.
1065    ///
1066    /// # Returns
1067    ///
1068    /// Vector of `(NodeId, distance)` pairs sorted by distance ascending.
1069    #[cfg(feature = "vector-index")]
1070    pub fn vector_search(
1071        &self,
1072        label: &str,
1073        property: &str,
1074        query: &[f32],
1075        k: usize,
1076        ef: Option<usize>,
1077    ) -> Result<Vec<(grafeo_common::types::NodeId, f32)>> {
1078        let index = self.store.get_vector_index(label, property).ok_or_else(|| {
1079            grafeo_common::utils::error::Error::Internal(format!(
1080                "No vector index found for :{label}({property}). Call create_vector_index() first."
1081            ))
1082        })?;
1083
1084        let results = match ef {
1085            Some(ef_val) => index.search_with_ef(query, k, ef_val),
1086            None => index.search(query, k),
1087        };
1088
1089        Ok(results)
1090    }
1091
1092    /// Creates multiple nodes in bulk, each with a single vector property.
1093    ///
1094    /// Much faster than individual `create_node_with_props` calls because it
1095    /// acquires internal locks once and loops in Rust rather than crossing
1096    /// the FFI boundary per vector.
1097    ///
1098    /// # Arguments
1099    ///
1100    /// * `label` - Label applied to all created nodes
1101    /// * `property` - Property name for the vector data
1102    /// * `vectors` - Vector data for each node
1103    ///
1104    /// # Returns
1105    ///
1106    /// Vector of created `NodeId`s in the same order as the input vectors.
1107    pub fn batch_create_nodes(
1108        &self,
1109        label: &str,
1110        property: &str,
1111        vectors: Vec<Vec<f32>>,
1112    ) -> Vec<grafeo_common::types::NodeId> {
1113        use grafeo_common::types::{PropertyKey, Value};
1114
1115        let prop_key = PropertyKey::new(property);
1116        let labels: &[&str] = &[label];
1117
1118        vectors
1119            .into_iter()
1120            .map(|vec| {
1121                let value = Value::Vector(vec.into());
1122                let id = self.store.create_node_with_props(
1123                    labels,
1124                    std::iter::once((prop_key.clone(), value.clone())),
1125                );
1126
1127                // Log to WAL
1128                if let Err(e) = self.log_wal(&WalRecord::CreateNode {
1129                    id,
1130                    labels: labels.iter().map(|s| s.to_string()).collect(),
1131                }) {
1132                    tracing::warn!("Failed to log CreateNode to WAL: {}", e);
1133                }
1134                if let Err(e) = self.log_wal(&WalRecord::SetNodeProperty {
1135                    id,
1136                    key: property.to_string(),
1137                    value,
1138                }) {
1139                    tracing::warn!("Failed to log SetNodeProperty to WAL: {}", e);
1140                }
1141
1142                id
1143            })
1144            .collect()
1145    }
1146
1147    /// Searches for nearest neighbors for multiple query vectors in parallel.
1148    ///
1149    /// Uses rayon parallel iteration under the hood for multi-core throughput.
1150    ///
1151    /// # Arguments
1152    ///
1153    /// * `label` - Node label that was indexed
1154    /// * `property` - Property that was indexed
1155    /// * `queries` - Batch of query vectors
1156    /// * `k` - Number of nearest neighbors per query
1157    /// * `ef` - Search beam width (uses index default if `None`)
1158    #[cfg(feature = "vector-index")]
1159    pub fn batch_vector_search(
1160        &self,
1161        label: &str,
1162        property: &str,
1163        queries: &[Vec<f32>],
1164        k: usize,
1165        ef: Option<usize>,
1166    ) -> Result<Vec<Vec<(grafeo_common::types::NodeId, f32)>>> {
1167        let index = self.store.get_vector_index(label, property).ok_or_else(|| {
1168            grafeo_common::utils::error::Error::Internal(format!(
1169                "No vector index found for :{label}({property}). Call create_vector_index() first."
1170            ))
1171        })?;
1172
1173        let results = match ef {
1174            Some(ef_val) => index.batch_search_with_ef(queries, k, ef_val),
1175            None => index.batch_search(queries, k),
1176        };
1177
1178        Ok(results)
1179    }
1180
1181    /// Drops an index on a node property.
1182    ///
1183    /// Returns `true` if the index existed and was removed.
1184    pub fn drop_property_index(&self, property: &str) -> bool {
1185        self.store.drop_property_index(property)
1186    }
1187
1188    /// Returns `true` if the property has an index.
1189    #[must_use]
1190    pub fn has_property_index(&self, property: &str) -> bool {
1191        self.store.has_property_index(property)
1192    }
1193
1194    /// Finds all nodes that have a specific property value.
1195    ///
1196    /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
1197    /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
1198    ///
1199    /// # Example
1200    ///
1201    /// ```ignore
1202    /// // Create index for fast lookups (optional but recommended)
1203    /// db.create_property_index("city");
1204    ///
1205    /// // Find all nodes where city = "NYC"
1206    /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
1207    /// ```
1208    #[must_use]
1209    pub fn find_nodes_by_property(
1210        &self,
1211        property: &str,
1212        value: &grafeo_common::types::Value,
1213    ) -> Vec<grafeo_common::types::NodeId> {
1214        self.store.find_nodes_by_property(property, value)
1215    }
1216
1217    // =========================================================================
1218    // ADMIN API: Introspection
1219    // =========================================================================
1220
1221    /// Returns true if this database is backed by a file (persistent).
1222    ///
1223    /// In-memory databases return false.
1224    #[must_use]
1225    pub fn is_persistent(&self) -> bool {
1226        self.config.path.is_some()
1227    }
1228
1229    /// Returns the database file path, if persistent.
1230    ///
1231    /// In-memory databases return None.
1232    #[must_use]
1233    pub fn path(&self) -> Option<&Path> {
1234        self.config.path.as_deref()
1235    }
1236
1237    /// Returns high-level database information.
1238    ///
1239    /// Includes node/edge counts, persistence status, and mode (LPG/RDF).
1240    #[must_use]
1241    pub fn info(&self) -> crate::admin::DatabaseInfo {
1242        crate::admin::DatabaseInfo {
1243            mode: crate::admin::DatabaseMode::Lpg,
1244            node_count: self.store.node_count(),
1245            edge_count: self.store.edge_count(),
1246            is_persistent: self.is_persistent(),
1247            path: self.config.path.clone(),
1248            wal_enabled: self.config.wal_enabled,
1249            version: env!("CARGO_PKG_VERSION").to_string(),
1250        }
1251    }
1252
1253    /// Returns detailed database statistics.
1254    ///
1255    /// Includes counts, memory usage, and index information.
1256    #[must_use]
1257    pub fn detailed_stats(&self) -> crate::admin::DatabaseStats {
1258        let disk_bytes = self.config.path.as_ref().and_then(|p| {
1259            if p.exists() {
1260                Self::calculate_disk_usage(p).ok()
1261            } else {
1262                None
1263            }
1264        });
1265
1266        crate::admin::DatabaseStats {
1267            node_count: self.store.node_count(),
1268            edge_count: self.store.edge_count(),
1269            label_count: self.store.label_count(),
1270            edge_type_count: self.store.edge_type_count(),
1271            property_key_count: self.store.property_key_count(),
1272            index_count: 0, // TODO: implement index tracking
1273            memory_bytes: self.buffer_manager.allocated(),
1274            disk_bytes,
1275        }
1276    }
1277
1278    /// Calculates total disk usage for the database directory.
1279    fn calculate_disk_usage(path: &Path) -> Result<usize> {
1280        let mut total = 0usize;
1281        if path.is_dir() {
1282            for entry in std::fs::read_dir(path)? {
1283                let entry = entry?;
1284                let metadata = entry.metadata()?;
1285                if metadata.is_file() {
1286                    total += metadata.len() as usize;
1287                } else if metadata.is_dir() {
1288                    total += Self::calculate_disk_usage(&entry.path())?;
1289                }
1290            }
1291        }
1292        Ok(total)
1293    }
1294
1295    /// Returns schema information (labels, edge types, property keys).
1296    ///
1297    /// For LPG mode, returns label and edge type information.
1298    /// For RDF mode, returns predicate and named graph information.
1299    #[must_use]
1300    pub fn schema(&self) -> crate::admin::SchemaInfo {
1301        let labels = self
1302            .store
1303            .all_labels()
1304            .into_iter()
1305            .map(|name| crate::admin::LabelInfo {
1306                name: name.clone(),
1307                count: self.store.nodes_with_label(&name).count(),
1308            })
1309            .collect();
1310
1311        let edge_types = self
1312            .store
1313            .all_edge_types()
1314            .into_iter()
1315            .map(|name| crate::admin::EdgeTypeInfo {
1316                name: name.clone(),
1317                count: self.store.edges_with_type(&name).count(),
1318            })
1319            .collect();
1320
1321        let property_keys = self.store.all_property_keys();
1322
1323        crate::admin::SchemaInfo::Lpg(crate::admin::LpgSchemaInfo {
1324            labels,
1325            edge_types,
1326            property_keys,
1327        })
1328    }
1329
1330    /// Returns RDF schema information.
1331    ///
1332    /// Only available when the RDF feature is enabled.
1333    #[cfg(feature = "rdf")]
1334    #[must_use]
1335    pub fn rdf_schema(&self) -> crate::admin::SchemaInfo {
1336        let stats = self.rdf_store.stats();
1337
1338        let predicates = self
1339            .rdf_store
1340            .predicates()
1341            .into_iter()
1342            .map(|predicate| {
1343                let count = self.rdf_store.triples_with_predicate(&predicate).len();
1344                crate::admin::PredicateInfo {
1345                    iri: predicate.to_string(),
1346                    count,
1347                }
1348            })
1349            .collect();
1350
1351        crate::admin::SchemaInfo::Rdf(crate::admin::RdfSchemaInfo {
1352            predicates,
1353            named_graphs: Vec::new(), // Named graphs not yet implemented in RdfStore
1354            subject_count: stats.subject_count,
1355            object_count: stats.object_count,
1356        })
1357    }
1358
1359    /// Validates database integrity.
1360    ///
1361    /// Checks for:
1362    /// - Dangling edge references (edges pointing to non-existent nodes)
1363    /// - Internal index consistency
1364    ///
1365    /// Returns a list of errors and warnings. Empty errors = valid.
1366    #[must_use]
1367    pub fn validate(&self) -> crate::admin::ValidationResult {
1368        let mut result = crate::admin::ValidationResult::default();
1369
1370        // Check for dangling edge references
1371        for edge in self.store.all_edges() {
1372            if self.store.get_node(edge.src).is_none() {
1373                result.errors.push(crate::admin::ValidationError {
1374                    code: "DANGLING_SRC".to_string(),
1375                    message: format!(
1376                        "Edge {} references non-existent source node {}",
1377                        edge.id.0, edge.src.0
1378                    ),
1379                    context: Some(format!("edge:{}", edge.id.0)),
1380                });
1381            }
1382            if self.store.get_node(edge.dst).is_none() {
1383                result.errors.push(crate::admin::ValidationError {
1384                    code: "DANGLING_DST".to_string(),
1385                    message: format!(
1386                        "Edge {} references non-existent destination node {}",
1387                        edge.id.0, edge.dst.0
1388                    ),
1389                    context: Some(format!("edge:{}", edge.id.0)),
1390                });
1391            }
1392        }
1393
1394        // Add warnings for potential issues
1395        if self.store.node_count() > 0 && self.store.edge_count() == 0 {
1396            result.warnings.push(crate::admin::ValidationWarning {
1397                code: "NO_EDGES".to_string(),
1398                message: "Database has nodes but no edges".to_string(),
1399                context: None,
1400            });
1401        }
1402
1403        result
1404    }
1405
1406    /// Returns WAL (Write-Ahead Log) status.
1407    ///
1408    /// Returns None if WAL is not enabled.
1409    #[must_use]
1410    pub fn wal_status(&self) -> crate::admin::WalStatus {
1411        if let Some(ref wal) = self.wal {
1412            crate::admin::WalStatus {
1413                enabled: true,
1414                path: self.config.path.as_ref().map(|p| p.join("wal")),
1415                size_bytes: wal.size_bytes(),
1416                record_count: wal.record_count() as usize,
1417                last_checkpoint: wal.last_checkpoint_timestamp(),
1418                current_epoch: self.store.current_epoch().as_u64(),
1419            }
1420        } else {
1421            crate::admin::WalStatus {
1422                enabled: false,
1423                path: None,
1424                size_bytes: 0,
1425                record_count: 0,
1426                last_checkpoint: None,
1427                current_epoch: self.store.current_epoch().as_u64(),
1428            }
1429        }
1430    }
1431
1432    /// Forces a WAL checkpoint.
1433    ///
1434    /// Flushes all pending WAL records to the main storage.
1435    ///
1436    /// # Errors
1437    ///
1438    /// Returns an error if the checkpoint fails.
1439    pub fn wal_checkpoint(&self) -> Result<()> {
1440        if let Some(ref wal) = self.wal {
1441            let epoch = self.store.current_epoch();
1442            let tx_id = self
1443                .tx_manager
1444                .last_assigned_tx_id()
1445                .unwrap_or_else(|| self.tx_manager.begin());
1446            wal.checkpoint(tx_id, epoch)?;
1447            wal.sync()?;
1448        }
1449        Ok(())
1450    }
1451
1452    // =========================================================================
1453    // ADMIN API: Persistence Control
1454    // =========================================================================
1455
1456    /// Saves the database to a file path.
1457    ///
1458    /// - If in-memory: creates a new persistent database at path
1459    /// - If file-backed: creates a copy at the new path
1460    ///
1461    /// The original database remains unchanged.
1462    ///
1463    /// # Errors
1464    ///
1465    /// Returns an error if the save operation fails.
1466    pub fn save(&self, path: impl AsRef<Path>) -> Result<()> {
1467        let path = path.as_ref();
1468
1469        // Create target database with WAL enabled
1470        let target_config = Config::persistent(path);
1471        let target = Self::with_config(target_config)?;
1472
1473        // Copy all nodes using WAL-enabled methods
1474        for node in self.store.all_nodes() {
1475            let label_refs: Vec<&str> = node.labels.iter().map(|s| &**s).collect();
1476            target.store.create_node_with_id(node.id, &label_refs);
1477
1478            // Log to WAL
1479            target.log_wal(&WalRecord::CreateNode {
1480                id: node.id,
1481                labels: node.labels.iter().map(|s| s.to_string()).collect(),
1482            })?;
1483
1484            // Copy properties
1485            for (key, value) in node.properties {
1486                target
1487                    .store
1488                    .set_node_property(node.id, key.as_str(), value.clone());
1489                target.log_wal(&WalRecord::SetNodeProperty {
1490                    id: node.id,
1491                    key: key.to_string(),
1492                    value,
1493                })?;
1494            }
1495        }
1496
1497        // Copy all edges using WAL-enabled methods
1498        for edge in self.store.all_edges() {
1499            target
1500                .store
1501                .create_edge_with_id(edge.id, edge.src, edge.dst, &edge.edge_type);
1502
1503            // Log to WAL
1504            target.log_wal(&WalRecord::CreateEdge {
1505                id: edge.id,
1506                src: edge.src,
1507                dst: edge.dst,
1508                edge_type: edge.edge_type.to_string(),
1509            })?;
1510
1511            // Copy properties
1512            for (key, value) in edge.properties {
1513                target
1514                    .store
1515                    .set_edge_property(edge.id, key.as_str(), value.clone());
1516                target.log_wal(&WalRecord::SetEdgeProperty {
1517                    id: edge.id,
1518                    key: key.to_string(),
1519                    value,
1520                })?;
1521            }
1522        }
1523
1524        // Checkpoint and close the target database
1525        target.close()?;
1526
1527        Ok(())
1528    }
1529
1530    /// Creates an in-memory copy of this database.
1531    ///
1532    /// Returns a new database that is completely independent.
1533    /// Useful for:
1534    /// - Testing modifications without affecting the original
1535    /// - Faster operations when persistence isn't needed
1536    ///
1537    /// # Errors
1538    ///
1539    /// Returns an error if the copy operation fails.
1540    pub fn to_memory(&self) -> Result<Self> {
1541        let config = Config::in_memory();
1542        let target = Self::with_config(config)?;
1543
1544        // Copy all nodes
1545        for node in self.store.all_nodes() {
1546            let label_refs: Vec<&str> = node.labels.iter().map(|s| &**s).collect();
1547            target.store.create_node_with_id(node.id, &label_refs);
1548
1549            // Copy properties
1550            for (key, value) in node.properties {
1551                target.store.set_node_property(node.id, key.as_str(), value);
1552            }
1553        }
1554
1555        // Copy all edges
1556        for edge in self.store.all_edges() {
1557            target
1558                .store
1559                .create_edge_with_id(edge.id, edge.src, edge.dst, &edge.edge_type);
1560
1561            // Copy properties
1562            for (key, value) in edge.properties {
1563                target.store.set_edge_property(edge.id, key.as_str(), value);
1564            }
1565        }
1566
1567        Ok(target)
1568    }
1569
1570    /// Opens a database file and loads it entirely into memory.
1571    ///
1572    /// The returned database has no connection to the original file.
1573    /// Changes will NOT be written back to the file.
1574    ///
1575    /// # Errors
1576    ///
1577    /// Returns an error if the file can't be opened or loaded.
1578    pub fn open_in_memory(path: impl AsRef<Path>) -> Result<Self> {
1579        // Open the source database (triggers WAL recovery)
1580        let source = Self::open(path)?;
1581
1582        // Create in-memory copy
1583        let target = source.to_memory()?;
1584
1585        // Close the source (releases file handles)
1586        source.close()?;
1587
1588        Ok(target)
1589    }
1590
1591    // =========================================================================
1592    // ADMIN API: Iteration
1593    // =========================================================================
1594
1595    /// Returns an iterator over all nodes in the database.
1596    ///
1597    /// Useful for dump/export operations.
1598    pub fn iter_nodes(&self) -> impl Iterator<Item = grafeo_core::graph::lpg::Node> + '_ {
1599        self.store.all_nodes()
1600    }
1601
1602    /// Returns an iterator over all edges in the database.
1603    ///
1604    /// Useful for dump/export operations.
1605    pub fn iter_edges(&self) -> impl Iterator<Item = grafeo_core::graph::lpg::Edge> + '_ {
1606        self.store.all_edges()
1607    }
1608}
1609
1610impl Drop for GrafeoDB {
1611    fn drop(&mut self) {
1612        if let Err(e) = self.close() {
1613            tracing::error!("Error closing database: {}", e);
1614        }
1615    }
1616}
1617
1618/// The result of running a query.
1619///
1620/// Contains rows and columns, like a table. Use [`iter()`](Self::iter) to
1621/// loop through rows, or [`scalar()`](Self::scalar) if you expect a single value.
1622///
1623/// # Examples
1624///
1625/// ```
1626/// use grafeo_engine::GrafeoDB;
1627///
1628/// let db = GrafeoDB::new_in_memory();
1629/// db.create_node(&["Person"]);
1630///
1631/// let result = db.execute("MATCH (p:Person) RETURN count(p) AS total")?;
1632///
1633/// // Check what we got
1634/// println!("Columns: {:?}", result.columns);
1635/// println!("Rows: {}", result.row_count());
1636///
1637/// // Iterate through results
1638/// for row in result.iter() {
1639///     println!("{:?}", row);
1640/// }
1641/// # Ok::<(), grafeo_common::utils::error::Error>(())
1642/// ```
1643#[derive(Debug)]
1644pub struct QueryResult {
1645    /// Column names from the RETURN clause.
1646    pub columns: Vec<String>,
1647    /// Column types - useful for distinguishing NodeId/EdgeId from plain integers.
1648    pub column_types: Vec<grafeo_common::types::LogicalType>,
1649    /// The actual result rows.
1650    pub rows: Vec<Vec<grafeo_common::types::Value>>,
1651    /// Query execution time in milliseconds (if timing was enabled).
1652    pub execution_time_ms: Option<f64>,
1653    /// Number of rows scanned during query execution (estimate).
1654    pub rows_scanned: Option<u64>,
1655}
1656
1657impl QueryResult {
1658    /// Creates a new empty query result.
1659    #[must_use]
1660    pub fn new(columns: Vec<String>) -> Self {
1661        let len = columns.len();
1662        Self {
1663            columns,
1664            column_types: vec![grafeo_common::types::LogicalType::Any; len],
1665            rows: Vec::new(),
1666            execution_time_ms: None,
1667            rows_scanned: None,
1668        }
1669    }
1670
1671    /// Creates a new empty query result with column types.
1672    #[must_use]
1673    pub fn with_types(
1674        columns: Vec<String>,
1675        column_types: Vec<grafeo_common::types::LogicalType>,
1676    ) -> Self {
1677        Self {
1678            columns,
1679            column_types,
1680            rows: Vec::new(),
1681            execution_time_ms: None,
1682            rows_scanned: None,
1683        }
1684    }
1685
1686    /// Sets the execution metrics on this result.
1687    pub fn with_metrics(mut self, execution_time_ms: f64, rows_scanned: u64) -> Self {
1688        self.execution_time_ms = Some(execution_time_ms);
1689        self.rows_scanned = Some(rows_scanned);
1690        self
1691    }
1692
1693    /// Returns the execution time in milliseconds, if available.
1694    #[must_use]
1695    pub fn execution_time_ms(&self) -> Option<f64> {
1696        self.execution_time_ms
1697    }
1698
1699    /// Returns the number of rows scanned, if available.
1700    #[must_use]
1701    pub fn rows_scanned(&self) -> Option<u64> {
1702        self.rows_scanned
1703    }
1704
1705    /// Returns the number of rows.
1706    #[must_use]
1707    pub fn row_count(&self) -> usize {
1708        self.rows.len()
1709    }
1710
1711    /// Returns the number of columns.
1712    #[must_use]
1713    pub fn column_count(&self) -> usize {
1714        self.columns.len()
1715    }
1716
1717    /// Returns true if the result is empty.
1718    #[must_use]
1719    pub fn is_empty(&self) -> bool {
1720        self.rows.is_empty()
1721    }
1722
1723    /// Extracts a single value from the result.
1724    ///
1725    /// Use this when your query returns exactly one row with one column,
1726    /// like `RETURN count(n)` or `RETURN sum(p.amount)`.
1727    ///
1728    /// # Errors
1729    ///
1730    /// Returns an error if the result has multiple rows or columns.
1731    pub fn scalar<T: FromValue>(&self) -> Result<T> {
1732        if self.rows.len() != 1 || self.columns.len() != 1 {
1733            return Err(grafeo_common::utils::error::Error::InvalidValue(
1734                "Expected single value".to_string(),
1735            ));
1736        }
1737        T::from_value(&self.rows[0][0])
1738    }
1739
1740    /// Returns an iterator over the rows.
1741    pub fn iter(&self) -> impl Iterator<Item = &Vec<grafeo_common::types::Value>> {
1742        self.rows.iter()
1743    }
1744}
1745
1746/// Converts a [`Value`](grafeo_common::types::Value) to a concrete Rust type.
1747///
1748/// Implemented for common types like `i64`, `f64`, `String`, and `bool`.
1749/// Used by [`QueryResult::scalar()`] to extract typed values.
1750pub trait FromValue: Sized {
1751    /// Attempts the conversion, returning an error on type mismatch.
1752    fn from_value(value: &grafeo_common::types::Value) -> Result<Self>;
1753}
1754
1755impl FromValue for i64 {
1756    fn from_value(value: &grafeo_common::types::Value) -> Result<Self> {
1757        value
1758            .as_int64()
1759            .ok_or_else(|| grafeo_common::utils::error::Error::TypeMismatch {
1760                expected: "INT64".to_string(),
1761                found: value.type_name().to_string(),
1762            })
1763    }
1764}
1765
1766impl FromValue for f64 {
1767    fn from_value(value: &grafeo_common::types::Value) -> Result<Self> {
1768        value
1769            .as_float64()
1770            .ok_or_else(|| grafeo_common::utils::error::Error::TypeMismatch {
1771                expected: "FLOAT64".to_string(),
1772                found: value.type_name().to_string(),
1773            })
1774    }
1775}
1776
1777impl FromValue for String {
1778    fn from_value(value: &grafeo_common::types::Value) -> Result<Self> {
1779        value.as_str().map(String::from).ok_or_else(|| {
1780            grafeo_common::utils::error::Error::TypeMismatch {
1781                expected: "STRING".to_string(),
1782                found: value.type_name().to_string(),
1783            }
1784        })
1785    }
1786}
1787
1788impl FromValue for bool {
1789    fn from_value(value: &grafeo_common::types::Value) -> Result<Self> {
1790        value
1791            .as_bool()
1792            .ok_or_else(|| grafeo_common::utils::error::Error::TypeMismatch {
1793                expected: "BOOL".to_string(),
1794                found: value.type_name().to_string(),
1795            })
1796    }
1797}
1798
1799#[cfg(test)]
1800mod tests {
1801    use super::*;
1802
1803    #[test]
1804    fn test_create_in_memory_database() {
1805        let db = GrafeoDB::new_in_memory();
1806        assert_eq!(db.node_count(), 0);
1807        assert_eq!(db.edge_count(), 0);
1808    }
1809
1810    #[test]
1811    fn test_database_config() {
1812        let config = Config::in_memory().with_threads(4).with_query_logging();
1813
1814        let db = GrafeoDB::with_config(config).unwrap();
1815        assert_eq!(db.config().threads, 4);
1816        assert!(db.config().query_logging);
1817    }
1818
1819    #[test]
1820    fn test_database_session() {
1821        let db = GrafeoDB::new_in_memory();
1822        let _session = db.session();
1823        // Session should be created successfully
1824    }
1825
1826    #[test]
1827    fn test_persistent_database_recovery() {
1828        use grafeo_common::types::Value;
1829        use tempfile::tempdir;
1830
1831        let dir = tempdir().unwrap();
1832        let db_path = dir.path().join("test_db");
1833
1834        // Create database and add some data
1835        {
1836            let db = GrafeoDB::open(&db_path).unwrap();
1837
1838            let alice = db.create_node(&["Person"]);
1839            db.set_node_property(alice, "name", Value::from("Alice"));
1840
1841            let bob = db.create_node(&["Person"]);
1842            db.set_node_property(bob, "name", Value::from("Bob"));
1843
1844            let _edge = db.create_edge(alice, bob, "KNOWS");
1845
1846            // Explicitly close to flush WAL
1847            db.close().unwrap();
1848        }
1849
1850        // Reopen and verify data was recovered
1851        {
1852            let db = GrafeoDB::open(&db_path).unwrap();
1853
1854            assert_eq!(db.node_count(), 2);
1855            assert_eq!(db.edge_count(), 1);
1856
1857            // Verify nodes exist
1858            let node0 = db.get_node(grafeo_common::types::NodeId::new(0));
1859            assert!(node0.is_some());
1860
1861            let node1 = db.get_node(grafeo_common::types::NodeId::new(1));
1862            assert!(node1.is_some());
1863        }
1864    }
1865
1866    #[test]
1867    fn test_wal_logging() {
1868        use tempfile::tempdir;
1869
1870        let dir = tempdir().unwrap();
1871        let db_path = dir.path().join("wal_test_db");
1872
1873        let db = GrafeoDB::open(&db_path).unwrap();
1874
1875        // Create some data
1876        let node = db.create_node(&["Test"]);
1877        db.delete_node(node);
1878
1879        // WAL should have records
1880        if let Some(wal) = db.wal() {
1881            assert!(wal.record_count() > 0);
1882        }
1883
1884        db.close().unwrap();
1885    }
1886
1887    #[test]
1888    fn test_wal_recovery_multiple_sessions() {
1889        // Tests that WAL recovery works correctly across multiple open/close cycles
1890        use grafeo_common::types::Value;
1891        use tempfile::tempdir;
1892
1893        let dir = tempdir().unwrap();
1894        let db_path = dir.path().join("multi_session_db");
1895
1896        // Session 1: Create initial data
1897        {
1898            let db = GrafeoDB::open(&db_path).unwrap();
1899            let alice = db.create_node(&["Person"]);
1900            db.set_node_property(alice, "name", Value::from("Alice"));
1901            db.close().unwrap();
1902        }
1903
1904        // Session 2: Add more data
1905        {
1906            let db = GrafeoDB::open(&db_path).unwrap();
1907            assert_eq!(db.node_count(), 1); // Previous data recovered
1908            let bob = db.create_node(&["Person"]);
1909            db.set_node_property(bob, "name", Value::from("Bob"));
1910            db.close().unwrap();
1911        }
1912
1913        // Session 3: Verify all data
1914        {
1915            let db = GrafeoDB::open(&db_path).unwrap();
1916            assert_eq!(db.node_count(), 2);
1917
1918            // Verify properties were recovered correctly
1919            let node0 = db.get_node(grafeo_common::types::NodeId::new(0)).unwrap();
1920            assert!(node0.labels.iter().any(|l| l.as_str() == "Person"));
1921
1922            let node1 = db.get_node(grafeo_common::types::NodeId::new(1)).unwrap();
1923            assert!(node1.labels.iter().any(|l| l.as_str() == "Person"));
1924        }
1925    }
1926
1927    #[test]
1928    fn test_database_consistency_after_mutations() {
1929        // Tests that database remains consistent after a series of create/delete operations
1930        use grafeo_common::types::Value;
1931        use tempfile::tempdir;
1932
1933        let dir = tempdir().unwrap();
1934        let db_path = dir.path().join("consistency_db");
1935
1936        {
1937            let db = GrafeoDB::open(&db_path).unwrap();
1938
1939            // Create nodes
1940            let a = db.create_node(&["Node"]);
1941            let b = db.create_node(&["Node"]);
1942            let c = db.create_node(&["Node"]);
1943
1944            // Create edges
1945            let e1 = db.create_edge(a, b, "LINKS");
1946            let _e2 = db.create_edge(b, c, "LINKS");
1947
1948            // Delete middle node and its edge
1949            db.delete_edge(e1);
1950            db.delete_node(b);
1951
1952            // Set properties on remaining nodes
1953            db.set_node_property(a, "value", Value::Int64(1));
1954            db.set_node_property(c, "value", Value::Int64(3));
1955
1956            db.close().unwrap();
1957        }
1958
1959        // Reopen and verify consistency
1960        {
1961            let db = GrafeoDB::open(&db_path).unwrap();
1962
1963            // Should have 2 nodes (a and c), b was deleted
1964            // Note: node_count includes deleted nodes in some implementations
1965            // What matters is that the non-deleted nodes are accessible
1966            let node_a = db.get_node(grafeo_common::types::NodeId::new(0));
1967            assert!(node_a.is_some());
1968
1969            let node_c = db.get_node(grafeo_common::types::NodeId::new(2));
1970            assert!(node_c.is_some());
1971
1972            // Middle node should be deleted
1973            let node_b = db.get_node(grafeo_common::types::NodeId::new(1));
1974            assert!(node_b.is_none());
1975        }
1976    }
1977
1978    #[test]
1979    fn test_close_is_idempotent() {
1980        // Calling close() multiple times should not cause errors
1981        use tempfile::tempdir;
1982
1983        let dir = tempdir().unwrap();
1984        let db_path = dir.path().join("close_test_db");
1985
1986        let db = GrafeoDB::open(&db_path).unwrap();
1987        db.create_node(&["Test"]);
1988
1989        // First close should succeed
1990        assert!(db.close().is_ok());
1991
1992        // Second close should also succeed (idempotent)
1993        assert!(db.close().is_ok());
1994    }
1995
1996    #[test]
1997    fn test_query_result_has_metrics() {
1998        // Verifies that query results include execution metrics
1999        let db = GrafeoDB::new_in_memory();
2000        db.create_node(&["Person"]);
2001        db.create_node(&["Person"]);
2002
2003        #[cfg(feature = "gql")]
2004        {
2005            let result = db.execute("MATCH (n:Person) RETURN n").unwrap();
2006
2007            // Metrics should be populated
2008            assert!(result.execution_time_ms.is_some());
2009            assert!(result.rows_scanned.is_some());
2010            assert!(result.execution_time_ms.unwrap() >= 0.0);
2011            assert_eq!(result.rows_scanned.unwrap(), 2);
2012        }
2013    }
2014
2015    #[test]
2016    fn test_empty_query_result_metrics() {
2017        // Verifies metrics are correct for queries returning no results
2018        let db = GrafeoDB::new_in_memory();
2019        db.create_node(&["Person"]);
2020
2021        #[cfg(feature = "gql")]
2022        {
2023            // Query that matches nothing
2024            let result = db.execute("MATCH (n:NonExistent) RETURN n").unwrap();
2025
2026            assert!(result.execution_time_ms.is_some());
2027            assert!(result.rows_scanned.is_some());
2028            assert_eq!(result.rows_scanned.unwrap(), 0);
2029        }
2030    }
2031}