Skip to main content

sediment/
graph.rs

1//! Graph store using SQLite for relationship tracking between memories.
2//!
3//! Provides a graph layer alongside LanceDB for tracking
4//! relationships like RELATED, SUPERSEDES, and CO_ACCESSED between items.
5
6use std::path::Path;
7
8use rusqlite::{Connection, params};
9use tracing::debug;
10
11use crate::error::{Result, SedimentError};
12
13/// A relationship between two memory items in the graph.
14#[derive(Debug, Clone)]
15pub struct Edge {
16    pub target_id: String,
17    pub rel_type: String,
18    pub strength: f64,
19}
20
21/// A co-access relationship between two memory items.
22#[derive(Debug, Clone)]
23pub struct CoAccessEdge {
24    pub target_id: String,
25    pub count: i64,
26}
27
28/// Full connection info for a memory item.
29#[derive(Debug, Clone)]
30pub struct ConnectionInfo {
31    pub target_id: String,
32    pub rel_type: String,
33    pub strength: f64,
34    pub count: Option<i64>,
35}
36
37/// SQLite-backed graph store for memory relationships.
38pub struct GraphStore {
39    conn: Connection,
40}
41
42impl GraphStore {
43    /// Open or create the graph store using the given SQLite database path.
44    /// Shares the same file as access.db.
45    pub fn open(path: &Path) -> Result<Self> {
46        let conn = Connection::open(path).map_err(|e| {
47            SedimentError::Database(format!("Failed to open graph database: {}", e))
48        })?;
49
50        if let Err(e) = conn.execute_batch("PRAGMA journal_mode=WAL; PRAGMA busy_timeout=5000;") {
51            tracing::warn!("Failed to set SQLite PRAGMAs (graph): {}", e);
52        }
53
54        conn.execute_batch(
55            "CREATE TABLE IF NOT EXISTS graph_nodes (
56                id TEXT PRIMARY KEY,
57                project_id TEXT NOT NULL DEFAULT '',
58                created_at INTEGER NOT NULL
59            );
60
61            CREATE TABLE IF NOT EXISTS graph_edges (
62                from_id TEXT NOT NULL,
63                to_id TEXT NOT NULL,
64                edge_type TEXT NOT NULL,
65                strength REAL NOT NULL DEFAULT 0.0,
66                rel_type TEXT NOT NULL DEFAULT '',
67                count INTEGER NOT NULL DEFAULT 0,
68                last_at INTEGER NOT NULL DEFAULT 0,
69                created_at INTEGER NOT NULL,
70                UNIQUE(from_id, to_id, edge_type)
71            );
72
73            CREATE INDEX IF NOT EXISTS idx_edges_from ON graph_edges(from_id);
74            CREATE INDEX IF NOT EXISTS idx_edges_to ON graph_edges(to_id);",
75        )
76        .map_err(|e| SedimentError::Database(format!("Failed to create graph tables: {}", e)))?;
77
78        Ok(Self { conn })
79    }
80
81    /// Add a Memory node to the graph.
82    pub fn add_node(&self, id: &str, project_id: Option<&str>, created_at: i64) -> Result<()> {
83        let pid = project_id.unwrap_or("");
84
85        self.conn
86            .execute(
87                "INSERT OR IGNORE INTO graph_nodes (id, project_id, created_at) VALUES (?1, ?2, ?3)",
88                params![id, pid, created_at],
89            )
90            .map_err(|e| SedimentError::Database(format!("Failed to add node: {}", e)))?;
91
92        debug!("Added graph node: {}", id);
93        Ok(())
94    }
95
96    /// Ensure a node exists in the graph. Creates it if missing (for backfill).
97    pub fn ensure_node_exists(
98        &self,
99        id: &str,
100        project_id: Option<&str>,
101        created_at: i64,
102    ) -> Result<()> {
103        self.add_node(id, project_id, created_at)
104    }
105
106    /// Remove a Memory node and all its edges from the graph.
107    pub fn remove_node(&self, id: &str) -> Result<()> {
108        // Preserve incoming SUPERSEDES edges (where this node is the to_id)
109        // so that lineage/provenance chains remain intact after node removal.
110        self.conn
111            .execute(
112                "DELETE FROM graph_edges WHERE from_id = ?1 OR (to_id = ?1 AND edge_type != 'supersedes')",
113                params![id],
114            )
115            .map_err(|e| SedimentError::Database(format!("Failed to remove edges: {}", e)))?;
116
117        self.conn
118            .execute("DELETE FROM graph_nodes WHERE id = ?1", params![id])
119            .map_err(|e| SedimentError::Database(format!("Failed to remove node: {}", e)))?;
120
121        debug!("Removed graph node: {}", id);
122        Ok(())
123    }
124
125    /// Add a RELATED edge between two Memory nodes.
126    pub fn add_related_edge(
127        &self,
128        from_id: &str,
129        to_id: &str,
130        strength: f64,
131        rel_type: &str,
132    ) -> Result<()> {
133        let now = chrono::Utc::now().timestamp();
134
135        self.conn
136            .execute(
137                "INSERT OR IGNORE INTO graph_edges (from_id, to_id, edge_type, strength, rel_type, created_at)
138                 VALUES (?1, ?2, 'related', ?3, ?4, ?5)",
139                params![from_id, to_id, strength, rel_type, now],
140            )
141            .map_err(|e| SedimentError::Database(format!("Failed to add related edge: {}", e)))?;
142
143        debug!(
144            "Added RELATED edge: {} -> {} ({})",
145            from_id, to_id, rel_type
146        );
147        Ok(())
148    }
149
150    /// Add a SUPERSEDES edge from new_id to old_id.
151    pub fn add_supersedes_edge(&self, new_id: &str, old_id: &str) -> Result<()> {
152        let now = chrono::Utc::now().timestamp();
153
154        self.conn
155            .execute(
156                "INSERT OR IGNORE INTO graph_edges (from_id, to_id, edge_type, strength, created_at)
157                 VALUES (?1, ?2, 'supersedes', 1.0, ?3)",
158                params![new_id, old_id, now],
159            )
160            .map_err(|e| SedimentError::Database(format!("Failed to add supersedes edge: {}", e)))?;
161
162        debug!("Added SUPERSEDES edge: {} -> {}", new_id, old_id);
163        Ok(())
164    }
165
166    /// Get 1-hop neighbors of the given item IDs via RELATED or SUPERSEDES edges.
167    /// Returns (neighbor_id, rel_type, strength) tuples.
168    ///
169    /// Note on parameter binding: SQLite reuses the same positional parameters (?1..?N)
170    /// across all three IN clauses and the CASE expression. This is correct because
171    /// SQLite binds by position, so the same parameter set is applied to each reference.
172    pub fn get_neighbors(
173        &self,
174        ids: &[&str],
175        min_strength: f64,
176    ) -> Result<Vec<(String, String, f64)>> {
177        if ids.is_empty() {
178            return Ok(Vec::new());
179        }
180
181        let placeholders: Vec<String> = (1..=ids.len()).map(|i| format!("?{}", i)).collect();
182        let ph = placeholders.join(",");
183        let strength_idx = ids.len() + 1;
184
185        let sql = format!(
186            "SELECT
187                CASE WHEN from_id IN ({ph}) THEN to_id ELSE from_id END AS neighbor,
188                CASE WHEN edge_type = 'related' THEN rel_type ELSE 'supersedes' END AS rtype,
189                strength
190             FROM graph_edges
191             WHERE (from_id IN ({ph}) OR to_id IN ({ph}))
192               AND edge_type IN ('related', 'supersedes')
193               AND strength >= ?{strength_idx}
194             LIMIT 100"
195        );
196
197        let mut stmt = self.conn.prepare(&sql).map_err(|e| {
198            SedimentError::Database(format!("Failed to prepare neighbors query: {}", e))
199        })?;
200
201        let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
202        for id in ids {
203            param_values.push(Box::new(id.to_string()));
204        }
205        param_values.push(Box::new(min_strength));
206
207        let params_ref: Vec<&dyn rusqlite::types::ToSql> =
208            param_values.iter().map(|b| b.as_ref()).collect();
209
210        let rows = stmt
211            .query_map(params_ref.as_slice(), |row| {
212                Ok((
213                    row.get::<_, String>(0)?,
214                    row.get::<_, String>(1)?,
215                    row.get::<_, f64>(2)?,
216                ))
217            })
218            .map_err(|e| SedimentError::Database(format!("Failed to query neighbors: {}", e)))?;
219
220        // Filter out input IDs from results so we never return a query ID as its own neighbor
221        let input_set: std::collections::HashSet<&str> = ids.iter().copied().collect();
222        let mut results = Vec::new();
223        for row in rows {
224            let r = row
225                .map_err(|e| SedimentError::Database(format!("Failed to read neighbor: {}", e)))?;
226            if !input_set.contains(r.0.as_str()) {
227                results.push(r);
228            }
229        }
230
231        Ok(results)
232    }
233
234    /// Record co-access between pairs of item IDs.
235    /// Creates or increments CO_ACCESSED edges.
236    pub fn record_co_access(&self, item_ids: &[String]) -> Result<()> {
237        if item_ids.len() < 2 {
238            return Ok(());
239        }
240
241        // Performance optimization: limit co-access recording to the top 3 items
242        // by position to bound the O(n^2) pair generation. Items beyond position 3
243        // in recall results won't have co-access edges recorded.
244        let item_ids = if item_ids.len() > 3 {
245            &item_ids[..3]
246        } else {
247            item_ids
248        };
249
250        let now = chrono::Utc::now().timestamp();
251
252        for i in 0..item_ids.len() {
253            for j in (i + 1)..item_ids.len() {
254                // Normalize edge direction: smaller ID always goes first to prevent
255                // duplicate edges (A,B) and (B,A) from accumulating separately.
256                let (a, b) = if item_ids[i] <= item_ids[j] {
257                    (&item_ids[i], &item_ids[j])
258                } else {
259                    (&item_ids[j], &item_ids[i])
260                };
261
262                self.conn
263                    .execute(
264                        "INSERT INTO graph_edges (from_id, to_id, edge_type, count, last_at, created_at)
265                         VALUES (?1, ?2, 'co_accessed', 1, ?3, ?3)
266                         ON CONFLICT(from_id, to_id, edge_type)
267                         DO UPDATE SET count = count + 1, last_at = ?3",
268                        params![a, b, now],
269                    )
270                    .map_err(|e| {
271                        SedimentError::Database(format!("Failed to record co-access: {}", e))
272                    })?;
273            }
274        }
275
276        Ok(())
277    }
278
279    /// Get items that are frequently co-accessed with the given IDs.
280    /// Returns (neighbor_id, co_access_count) tuples.
281    pub fn get_co_accessed(&self, ids: &[&str], min_count: i64) -> Result<Vec<(String, i64)>> {
282        if ids.is_empty() {
283            return Ok(Vec::new());
284        }
285
286        let placeholders: Vec<String> = (1..=ids.len()).map(|i| format!("?{}", i)).collect();
287        let ph = placeholders.join(",");
288        let min_idx = ids.len() + 1;
289
290        let sql = format!(
291            "SELECT
292                CASE WHEN from_id IN ({ph}) THEN to_id ELSE from_id END AS neighbor,
293                count
294             FROM graph_edges
295             WHERE (from_id IN ({ph}) OR to_id IN ({ph}))
296               AND edge_type = 'co_accessed'
297               AND count >= ?{min_idx}
298             ORDER BY count DESC"
299        );
300
301        let mut stmt = self.conn.prepare(&sql).map_err(|e| {
302            SedimentError::Database(format!("Failed to prepare co-access query: {}", e))
303        })?;
304
305        let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
306        for id in ids {
307            param_values.push(Box::new(id.to_string()));
308        }
309        param_values.push(Box::new(min_count));
310
311        let params_ref: Vec<&dyn rusqlite::types::ToSql> =
312            param_values.iter().map(|b| b.as_ref()).collect();
313
314        let rows = stmt
315            .query_map(params_ref.as_slice(), |row| {
316                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
317            })
318            .map_err(|e| SedimentError::Database(format!("Failed to query co-access: {}", e)))?;
319
320        let mut results = Vec::new();
321        for row in rows {
322            let r = row
323                .map_err(|e| SedimentError::Database(format!("Failed to read co-access: {}", e)))?;
324            results.push(r);
325        }
326
327        // Deduplicate by target_id, keeping highest count
328        results.sort_by(|a, b| b.1.cmp(&a.1));
329        let mut seen = std::collections::HashSet::new();
330        results.retain(|(id, _)| seen.insert(id.clone()));
331
332        Ok(results)
333    }
334
335    /// Transfer all edges from one node to another (used during consolidation merge).
336    pub fn transfer_edges(&self, from_id: &str, to_id: &str) -> Result<()> {
337        // Get all RELATED edges connected to from_id (excluding edges to to_id)
338        let mut stmt = self
339            .conn
340            .prepare(
341                "SELECT from_id, to_id, strength, rel_type, created_at
342             FROM graph_edges
343             WHERE (from_id = ?1 OR to_id = ?1)
344               AND edge_type = 'related'
345               AND from_id != ?2 AND to_id != ?2",
346            )
347            .map_err(|e| {
348                SedimentError::Database(format!("Failed to prepare transfer query: {}", e))
349            })?;
350
351        let edges: Vec<(String, f64, String, i64)> = stmt
352            .query_map(params![from_id, to_id], |row| {
353                let fid: String = row.get(0)?;
354                let tid: String = row.get(1)?;
355                let neighbor = if fid == from_id { tid } else { fid };
356                Ok((neighbor, row.get(2)?, row.get(3)?, row.get(4)?))
357            })
358            .map_err(|e| {
359                SedimentError::Database(format!("Failed to query edges for transfer: {}", e))
360            })?
361            .filter_map(|r| match r {
362                Ok(v) => Some(v),
363                Err(e) => {
364                    tracing::warn!("transfer_edges: failed to read row: {}", e);
365                    None
366                }
367            })
368            .collect();
369
370        // Create edges on the new node
371        for (neighbor, strength, rel_type, _) in &edges {
372            if let Err(e) = self.add_related_edge(to_id, neighbor, *strength, rel_type) {
373                tracing::warn!("transfer edge to {} failed: {}", neighbor, e);
374            }
375        }
376
377        Ok(())
378    }
379
380    /// Detect triangles of RELATED items (for clustering).
381    /// Returns sets of 3 item IDs that form triangles.
382    pub fn detect_clusters(&self) -> Result<Vec<(String, String, String)>> {
383        let mut stmt = self
384            .conn
385            .prepare(
386                "WITH biedges AS (
387                SELECT from_id AS a, to_id AS b FROM graph_edges WHERE edge_type = 'related'
388                UNION ALL
389                SELECT to_id AS a, from_id AS b FROM graph_edges WHERE edge_type = 'related'
390            )
391            SELECT DISTINCT e1.a, e1.b, e2.b
392            FROM biedges e1
393            JOIN biedges e2 ON e1.b = e2.a
394            JOIN biedges e3 ON e2.b = e3.a AND e3.b = e1.a
395            WHERE e1.a < e1.b AND e1.b < e2.b
396            LIMIT 50",
397            )
398            .map_err(|e| SedimentError::Database(format!("Failed to detect clusters: {}", e)))?;
399
400        let rows = stmt
401            .query_map([], |row| {
402                Ok((
403                    row.get::<_, String>(0)?,
404                    row.get::<_, String>(1)?,
405                    row.get::<_, String>(2)?,
406                ))
407            })
408            .map_err(|e| SedimentError::Database(format!("Failed to read clusters: {}", e)))?;
409
410        let mut clusters = Vec::new();
411        for r in rows.flatten() {
412            clusters.push(r);
413        }
414
415        Ok(clusters)
416    }
417
418    /// Get full connection info for an item (all edge types).
419    pub fn get_full_connections(&self, item_id: &str) -> Result<Vec<ConnectionInfo>> {
420        let mut stmt = self
421            .conn
422            .prepare(
423                "SELECT
424                CASE WHEN from_id = ?1 THEN to_id ELSE from_id END AS neighbor,
425                edge_type,
426                strength,
427                rel_type,
428                count
429             FROM graph_edges
430             WHERE from_id = ?1 OR to_id = ?1",
431            )
432            .map_err(|e| {
433                SedimentError::Database(format!("Failed to prepare connections query: {}", e))
434            })?;
435
436        let rows = stmt
437            .query_map(params![item_id], |row| {
438                let neighbor: String = row.get(0)?;
439                let edge_type: String = row.get(1)?;
440                let strength: f64 = row.get(2)?;
441                let rel_type_val: String = row.get(3)?;
442                let count: i64 = row.get(4)?;
443
444                let display_type = match edge_type.as_str() {
445                    "related" => rel_type_val.clone(),
446                    "supersedes" => "supersedes".to_string(),
447                    "co_accessed" => "co_accessed".to_string(),
448                    _ => edge_type.clone(),
449                };
450
451                Ok(ConnectionInfo {
452                    target_id: neighbor,
453                    rel_type: display_type,
454                    strength,
455                    count: if edge_type == "co_accessed" {
456                        Some(count)
457                    } else {
458                        None
459                    },
460                })
461            })
462            .map_err(|e| SedimentError::Database(format!("Failed to query connections: {}", e)))?;
463
464        let mut connections = Vec::new();
465        for row in rows {
466            let r = row.map_err(|e| {
467                SedimentError::Database(format!("Failed to read connection: {}", e))
468            })?;
469            connections.push(r);
470        }
471
472        Ok(connections)
473    }
474
475    /// Get 1-hop neighbors grouped by source ID.
476    ///
477    /// For each input ID, returns a list of neighbor IDs from RELATED or SUPERSEDES edges.
478    /// Input IDs are excluded from results.
479    pub fn get_neighbors_mapped(
480        &self,
481        ids: &[&str],
482        min_strength: f64,
483    ) -> Result<std::collections::HashMap<String, Vec<String>>> {
484        if ids.is_empty() {
485            return Ok(std::collections::HashMap::new());
486        }
487
488        let placeholders: Vec<String> = (1..=ids.len()).map(|i| format!("?{}", i)).collect();
489        let ph = placeholders.join(",");
490        let strength_idx = ids.len() + 1;
491
492        let sql = format!(
493            "SELECT
494                CASE WHEN from_id IN ({ph}) THEN from_id ELSE to_id END AS source,
495                CASE WHEN from_id IN ({ph}) THEN to_id ELSE from_id END AS neighbor,
496                strength
497             FROM graph_edges
498             WHERE (from_id IN ({ph}) OR to_id IN ({ph}))
499               AND edge_type IN ('related', 'supersedes')
500               AND strength >= ?{strength_idx}
501             LIMIT 500"
502        );
503
504        let mut stmt = self.conn.prepare(&sql).map_err(|e| {
505            SedimentError::Database(format!("Failed to prepare neighbors_mapped query: {}", e))
506        })?;
507
508        let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
509        for id in ids {
510            param_values.push(Box::new(id.to_string()));
511        }
512        param_values.push(Box::new(min_strength));
513
514        let params_ref: Vec<&dyn rusqlite::types::ToSql> =
515            param_values.iter().map(|b| b.as_ref()).collect();
516
517        let rows = stmt
518            .query_map(params_ref.as_slice(), |row| {
519                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
520            })
521            .map_err(|e| {
522                SedimentError::Database(format!("Failed to query neighbors_mapped: {}", e))
523            })?;
524
525        let input_set: std::collections::HashSet<&str> = ids.iter().copied().collect();
526        let mut map: std::collections::HashMap<String, Vec<String>> =
527            std::collections::HashMap::new();
528
529        for row in rows {
530            let (source, neighbor) = row.map_err(|e| {
531                SedimentError::Database(format!("Failed to read neighbor_mapped: {}", e))
532            })?;
533            if !input_set.contains(neighbor.as_str()) {
534                map.entry(source).or_default().push(neighbor);
535            }
536        }
537
538        Ok(map)
539    }
540
541    /// Get edge counts for multiple items in a single query.
542    ///
543    /// Returns a map from item ID to its total edge count (all edge types).
544    pub fn get_edge_counts(&self, ids: &[&str]) -> Result<std::collections::HashMap<String, u32>> {
545        if ids.is_empty() {
546            return Ok(std::collections::HashMap::new());
547        }
548
549        // Build a VALUES CTE for the input IDs
550        let unions: String = (1..=ids.len())
551            .map(|i| {
552                if i == 1 {
553                    format!("SELECT ?{} AS id", i)
554                } else {
555                    format!("UNION ALL SELECT ?{}", i)
556                }
557            })
558            .collect::<Vec<_>>()
559            .join(" ");
560
561        // Count edges where the item appears on either side
562        let sql = format!(
563            "SELECT ids.id, COUNT(e.rowid) FROM ({unions}) ids
564            LEFT JOIN graph_edges e ON e.from_id = ids.id OR e.to_id = ids.id
565            GROUP BY ids.id"
566        );
567
568        let mut stmt = self.conn.prepare(&sql).map_err(|e| {
569            SedimentError::Database(format!("Failed to prepare edge_counts query: {}", e))
570        })?;
571
572        let params: Vec<&dyn rusqlite::types::ToSql> = ids
573            .iter()
574            .map(|id| id as &dyn rusqlite::types::ToSql)
575            .collect();
576
577        let rows = stmt
578            .query_map(params.as_slice(), |row| {
579                Ok((row.get::<_, String>(0)?, row.get::<_, u32>(1)?))
580            })
581            .map_err(|e| SedimentError::Database(format!("Failed to query edge_counts: {}", e)))?;
582
583        let mut map = std::collections::HashMap::new();
584        for row in rows {
585            let (id, count) = row.map_err(|e| {
586                SedimentError::Database(format!("Failed to read edge_count: {}", e))
587            })?;
588            map.insert(id, count);
589        }
590
591        Ok(map)
592    }
593
594    /// Migrate all graph nodes from one project ID to another.
595    ///
596    /// Used when a project's ID changes (e.g., UUID→git root commit hash).
597    pub fn migrate_project_id(&self, old_id: &str, new_id: &str) -> Result<usize> {
598        let updated = self
599            .conn
600            .execute(
601                "UPDATE graph_nodes SET project_id = ?1 WHERE project_id = ?2",
602                params![new_id, old_id],
603            )
604            .map_err(|e| {
605                SedimentError::Database(format!("Failed to migrate graph nodes: {}", e))
606            })?;
607
608        if updated > 0 {
609            debug!(
610                "Migrated {} graph nodes from project {} to {}",
611                updated, old_id, new_id
612            );
613        }
614        Ok(updated)
615    }
616
617    /// Get the edge count for an item (total number of edges of all types).
618    pub fn get_edge_count(&self, item_id: &str) -> Result<u32> {
619        let count: i64 = self
620            .conn
621            .query_row(
622                "SELECT COUNT(*) FROM graph_edges WHERE from_id = ?1 OR to_id = ?1",
623                params![item_id],
624                |row| row.get(0),
625            )
626            .map_err(|e| SedimentError::Database(format!("Failed to count edges: {}", e)))?;
627
628        Ok(count as u32)
629    }
630}
631
632#[cfg(test)]
633mod tests {
634    use super::*;
635    use tempfile::NamedTempFile;
636
637    fn open_test_graph() -> GraphStore {
638        let tmp = NamedTempFile::new().unwrap();
639        GraphStore::open(tmp.path()).unwrap()
640    }
641
642    #[test]
643    fn test_get_neighbors_excludes_input_ids() {
644        // Fix #7: get_neighbors should never return an input ID as a neighbor
645        let graph = open_test_graph();
646        let now = chrono::Utc::now().timestamp();
647        graph.add_node("A", Some("proj"), now).unwrap();
648        graph.add_node("B", Some("proj"), now).unwrap();
649        graph.add_node("C", Some("proj"), now).unwrap();
650
651        // Create edges A-B, B-C
652        graph.add_related_edge("A", "B", 0.9, "test").unwrap();
653        graph.add_related_edge("B", "C", 0.9, "test").unwrap();
654
655        // Query neighbors for [A, B] — should only return C, not A or B
656        let neighbors = graph.get_neighbors(&["A", "B"], 0.0).unwrap();
657        let neighbor_ids: Vec<&str> = neighbors.iter().map(|(id, _, _)| id.as_str()).collect();
658        assert!(neighbor_ids.contains(&"C"));
659        assert!(!neighbor_ids.contains(&"A"));
660        assert!(!neighbor_ids.contains(&"B"));
661    }
662
663    #[test]
664    fn test_co_access_normalized_direction() {
665        // Fix #8: co-access edges should be normalized so (A,B) and (B,A) don't create duplicates
666        let graph = open_test_graph();
667        let now = chrono::Utc::now().timestamp();
668        graph.add_node("Z", Some("proj"), now).unwrap();
669        graph.add_node("A", Some("proj"), now).unwrap();
670
671        // Record co-access with Z before A (Z > A lexicographically)
672        graph
673            .record_co_access(&["Z".to_string(), "A".to_string()])
674            .unwrap();
675        // Record again with reversed order
676        graph
677            .record_co_access(&["A".to_string(), "Z".to_string()])
678            .unwrap();
679
680        // Should only have 1 edge with count=2 (not 2 edges with count=1 each)
681        let count: i64 = graph
682            .conn
683            .query_row(
684                "SELECT COUNT(*) FROM graph_edges WHERE edge_type = 'co_accessed'",
685                [],
686                |row| row.get(0),
687            )
688            .unwrap();
689        assert_eq!(count, 1, "Should have exactly 1 co-access edge");
690
691        let edge_count: i64 = graph
692            .conn
693            .query_row(
694                "SELECT count FROM graph_edges WHERE edge_type = 'co_accessed'",
695                [],
696                |row| row.get(0),
697            )
698            .unwrap();
699        assert_eq!(edge_count, 2, "Edge count should be 2 (incremented twice)");
700    }
701
702    #[test]
703    fn test_transfer_edges_preserves_relationships() {
704        // Fix #6: transfer_edges should move edges from old node to new node
705        let graph = open_test_graph();
706        let now = chrono::Utc::now().timestamp();
707        graph.add_node("old", Some("proj"), now).unwrap();
708        graph.add_node("new", Some("proj"), now).unwrap();
709        graph.add_node("friend", Some("proj"), now).unwrap();
710
711        graph
712            .add_related_edge("old", "friend", 0.9, "test")
713            .unwrap();
714
715        // Transfer edges from old to new
716        graph.transfer_edges("old", "new").unwrap();
717
718        // New node should now have edge to friend
719        let neighbors = graph.get_neighbors(&["new"], 0.0).unwrap();
720        assert!(
721            !neighbors.is_empty(),
722            "New node should have inherited edges"
723        );
724        let neighbor_ids: Vec<&str> = neighbors.iter().map(|(id, _, _)| id.as_str()).collect();
725        assert!(neighbor_ids.contains(&"friend"));
726    }
727
728    #[test]
729    fn test_remove_node_preserves_incoming_supersedes() {
730        // Bug #4: remove_node should preserve incoming SUPERSEDES edges for lineage
731        let graph = open_test_graph();
732        let now = chrono::Utc::now().timestamp();
733        graph.add_node("new", Some("proj"), now).unwrap();
734        graph.add_node("old", Some("proj"), now).unwrap();
735
736        // Simulate replace workflow: create SUPERSEDES edge new -> old
737        graph.add_supersedes_edge("new", "old").unwrap();
738
739        // Now remove the old node (as execute_store does)
740        graph.remove_node("old").unwrap();
741
742        // The SUPERSEDES edge (new -> old) should survive because old is the to_id
743        let connections = graph.get_full_connections("new").unwrap();
744        assert_eq!(connections.len(), 1, "SUPERSEDES edge should be preserved");
745        assert_eq!(connections[0].target_id, "old");
746        assert_eq!(connections[0].rel_type, "supersedes");
747    }
748
749    #[test]
750    fn test_get_neighbors_bounded() {
751        // Bug #3: get_neighbors should return at most 100 results
752        let graph = open_test_graph();
753        let now = chrono::Utc::now().timestamp();
754        graph.add_node("center", Some("proj"), now).unwrap();
755
756        for i in 0..150 {
757            let id = format!("n{}", i);
758            graph.add_node(&id, Some("proj"), now).unwrap();
759            graph.add_related_edge("center", &id, 0.9, "test").unwrap();
760        }
761
762        let neighbors = graph.get_neighbors(&["center"], 0.0).unwrap();
763        assert!(
764            neighbors.len() <= 100,
765            "get_neighbors should return at most 100, got {}",
766            neighbors.len()
767        );
768    }
769
770    #[test]
771    fn test_get_neighbors_mapped() {
772        let graph = open_test_graph();
773        let now = chrono::Utc::now().timestamp();
774        graph.add_node("A", Some("proj"), now).unwrap();
775        graph.add_node("B", Some("proj"), now).unwrap();
776        graph.add_node("C", Some("proj"), now).unwrap();
777        graph.add_node("D", Some("proj"), now).unwrap();
778
779        graph.add_related_edge("A", "C", 0.9, "test").unwrap();
780        graph.add_related_edge("B", "D", 0.9, "test").unwrap();
781
782        let map = graph.get_neighbors_mapped(&["A", "B"], 0.0).unwrap();
783
784        // A should have C as neighbor, B should have D
785        assert!(map.get("A").unwrap().contains(&"C".to_string()));
786        assert!(map.get("B").unwrap().contains(&"D".to_string()));
787        // Input IDs should not appear as neighbors
788        for neighbors in map.values() {
789            assert!(!neighbors.contains(&"A".to_string()));
790            assert!(!neighbors.contains(&"B".to_string()));
791        }
792    }
793
794    #[test]
795    fn test_get_edge_counts() {
796        let graph = open_test_graph();
797        let now = chrono::Utc::now().timestamp();
798        graph.add_node("X", Some("proj"), now).unwrap();
799        graph.add_node("Y", Some("proj"), now).unwrap();
800        graph.add_node("Z", Some("proj"), now).unwrap();
801
802        graph.add_related_edge("X", "Y", 0.9, "test").unwrap();
803        graph.add_related_edge("X", "Z", 0.9, "test").unwrap();
804        graph.add_related_edge("Y", "Z", 0.9, "test").unwrap();
805
806        let counts = graph.get_edge_counts(&["X", "Y", "Z"]).unwrap();
807
808        // Verify batch counts match individual counts
809        assert_eq!(
810            counts.get("X").copied().unwrap_or(0),
811            graph.get_edge_count("X").unwrap()
812        );
813        assert_eq!(
814            counts.get("Y").copied().unwrap_or(0),
815            graph.get_edge_count("Y").unwrap()
816        );
817        assert_eq!(
818            counts.get("Z").copied().unwrap_or(0),
819            graph.get_edge_count("Z").unwrap()
820        );
821
822        // X has 2 edges (X-Y, X-Z), Y has 2 (X-Y, Y-Z), Z has 2 (X-Z, Y-Z)
823        assert_eq!(counts["X"], 2);
824        assert_eq!(counts["Y"], 2);
825        assert_eq!(counts["Z"], 2);
826    }
827}