Skip to main content

sqlite_graphrag/storage/
entities.rs

1//! Persistence layer for entities, relationships and their junction tables.
2//!
3//! The entity graph mirrors the conceptual content of memories: `entities`
4//! holds nodes, `relationships` holds typed edges and `memory_entities` and
5//! `memory_relationships` connect each memory to the graph slice it emitted.
6
7use crate::embedder::f32_to_bytes;
8use crate::entity_type::EntityType;
9use crate::errors::AppError;
10use crate::parsers::normalize_entity_name;
11use crate::storage::utils::with_busy_retry;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14
15/// Input payload used to upsert a single entity.
16///
17/// `name` is normalized to kebab-case by the caller. `description` is
18/// optional and preserved across upserts when the new value is `None`.
19#[derive(Debug, Serialize, Deserialize, Clone)]
20#[serde(deny_unknown_fields)]
21pub struct NewEntity {
22    pub name: String,
23    #[serde(alias = "type")]
24    pub entity_type: EntityType,
25    pub description: Option<String>,
26}
27
28/// Input payload used to upsert a typed relationship between entities.
29///
30/// `strength` must lie within `[0.0, 1.0]` and is mapped to the `weight`
31/// column of the `relationships` table.
32#[derive(Debug, Serialize, Deserialize, Clone)]
33#[serde(deny_unknown_fields)]
34pub struct NewRelationship {
35    #[serde(alias = "from")]
36    pub source: String,
37    #[serde(alias = "to")]
38    pub target: String,
39    pub relation: String,
40    pub strength: f64,
41    pub description: Option<String>,
42}
43
44/// Validates entity name against quality rules.
45///
46/// Rejects names with newlines, names shorter than 2 characters, and
47/// ALL_CAPS abbreviations of 4 characters or fewer (common NER noise).
48///
49/// # Errors
50///
51/// Returns `Err(AppError::Validation)` when the name violates any rule.
52pub fn validate_entity_name(name: &str) -> Result<(), AppError> {
53    if name.len() < 2 {
54        return Err(AppError::Validation(format!(
55            "entity name '{name}' must be at least 2 characters"
56        )));
57    }
58    if name.contains('\n') || name.contains('\r') {
59        return Err(AppError::Validation(
60            "entity name must not contain newline characters".to_string(),
61        ));
62    }
63    if name.len() <= 4
64        && name
65            .chars()
66            .all(|c| c.is_ascii_uppercase() || c == '_' || c == '-')
67    {
68        return Err(AppError::Validation(format!(
69            "entity name '{name}' rejected: short ALL_CAPS names are typically NER noise"
70        )));
71    }
72    Ok(())
73}
74
75/// Upserts an entity and returns its primary key.
76///
77/// Uses `ON CONFLICT(namespace, name)` to keep one row per entity within a
78/// namespace, refreshing `type` and `description` opportunistically.
79///
80/// # Errors
81///
82/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
83pub fn upsert_entity(conn: &Connection, namespace: &str, e: &NewEntity) -> Result<i64, AppError> {
84    // Step 1: validate the original name — catches ALL_CAPS short noise (NER artefacts),
85    // newlines, and names shorter than 2 characters before any transformation.
86    validate_entity_name(&e.name)?;
87    // Step 2: normalize to kebab-case ASCII (NFKD, lowercase, spaces/underscores → hyphens).
88    let normalized_name = normalize_entity_name(&e.name);
89    // Step 3: guard post-normalization length — a valid original could collapse to < 2 chars
90    // (e.g. a single accented character that strips entirely).
91    if normalized_name.chars().count() < 2 {
92        return Err(AppError::Validation(format!(
93            "entity name '{}' normalizes to '{}' which is too short (minimum 2 characters)",
94            e.name, normalized_name
95        )));
96    }
97    conn.execute(
98        "INSERT INTO entities (namespace, name, type, description)
99         VALUES (?1, ?2, ?3, ?4)
100         ON CONFLICT(namespace, name) DO UPDATE SET
101           type        = excluded.type,
102           description = COALESCE(excluded.description, entities.description),
103           updated_at  = unixepoch()",
104        params![namespace, normalized_name, e.entity_type, e.description],
105    )?;
106    let id: i64 = conn.query_row(
107        "SELECT id FROM entities WHERE namespace = ?1 AND name = ?2",
108        params![namespace, normalized_name],
109        |r| r.get(0),
110    )?;
111    Ok(id)
112}
113
114/// Replaces the vector row for an entity in `vec_entities`.
115///
116/// vec0 virtual tables do not honour `INSERT OR REPLACE` when the primary key
117/// already exists — they raise a UNIQUE constraint error instead of silently
118/// replacing the row. The workaround is an explicit DELETE before INSERT so
119/// that the insert never conflicts. `embedding` must have length
120/// [`crate::constants::EMBEDDING_DIM`].
121///
122/// # Errors
123///
124/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
125pub fn upsert_entity_vec(
126    conn: &Connection,
127    entity_id: i64,
128    namespace: &str,
129    entity_type: EntityType,
130    embedding: &[f32],
131    name: &str,
132) -> Result<(), AppError> {
133    // Both statements wrapped in with_busy_retry: WAL concurrency can cause
134    // SQLITE_BUSY on vec0 virtual table writes when multiple CLI instances run.
135    let embedding_bytes = f32_to_bytes(embedding);
136    with_busy_retry(|| {
137        conn.execute(
138            "DELETE FROM vec_entities WHERE entity_id = ?1",
139            params![entity_id],
140        )?;
141        conn.execute(
142            "INSERT INTO vec_entities(entity_id, namespace, type, embedding, name)
143             VALUES (?1, ?2, ?3, ?4, ?5)",
144            params![entity_id, namespace, entity_type, &embedding_bytes, name],
145        )?;
146        Ok(())
147    })
148}
149
150/// Upserts a typed relationship between two entity ids.
151///
152/// Conflicts on `(source_id, target_id, relation)` refresh `weight` and
153/// preserve a non-null `description`. Returns the `rowid` of the stored row.
154///
155/// # Errors
156///
157/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
158pub fn upsert_relationship(
159    conn: &Connection,
160    namespace: &str,
161    source_id: i64,
162    target_id: i64,
163    rel: &NewRelationship,
164) -> Result<i64, AppError> {
165    conn.execute(
166        "INSERT INTO relationships (namespace, source_id, target_id, relation, weight, description)
167         VALUES (?1, ?2, ?3, ?4, ?5, ?6)
168         ON CONFLICT(source_id, target_id, relation) DO UPDATE SET
169           weight = excluded.weight,
170           description = COALESCE(excluded.description, relationships.description)",
171        params![
172            namespace,
173            source_id,
174            target_id,
175            rel.relation,
176            rel.strength,
177            rel.description
178        ],
179    )?;
180    let id: i64 = conn.query_row(
181        "SELECT id FROM relationships WHERE source_id=?1 AND target_id=?2 AND relation=?3",
182        params![source_id, target_id, rel.relation],
183        |r| r.get(0),
184    )?;
185    Ok(id)
186}
187
188pub fn link_memory_entity(
189    conn: &Connection,
190    memory_id: i64,
191    entity_id: i64,
192) -> Result<(), AppError> {
193    conn.execute(
194        "INSERT OR IGNORE INTO memory_entities (memory_id, entity_id) VALUES (?1, ?2)",
195        params![memory_id, entity_id],
196    )?;
197    Ok(())
198}
199
200pub fn link_memory_relationship(
201    conn: &Connection,
202    memory_id: i64,
203    rel_id: i64,
204) -> Result<(), AppError> {
205    conn.execute(
206        "INSERT OR IGNORE INTO memory_relationships (memory_id, relationship_id) VALUES (?1, ?2)",
207        params![memory_id, rel_id],
208    )?;
209    Ok(())
210}
211
212pub fn increment_degree(conn: &Connection, entity_id: i64) -> Result<(), AppError> {
213    conn.execute(
214        "UPDATE entities SET degree = degree + 1 WHERE id = ?1",
215        params![entity_id],
216    )?;
217    Ok(())
218}
219
220/// Looks up the entity by name and namespace. Returns the id when it exists.
221pub fn find_entity_id(
222    conn: &Connection,
223    namespace: &str,
224    name: &str,
225) -> Result<Option<i64>, AppError> {
226    // Normalize the lookup name so it matches the normalized names written by
227    // `upsert_entity`. Without this, an entity written through normalization
228    // (e.g. "Foo Bar" -> "foo-bar") would be unreachable by its original
229    // spelling, breaking delete-entity, reclassify, merge-entities, rename and
230    // memory-entities lookups.
231    let name = normalize_entity_name(name);
232    let mut stmt =
233        conn.prepare_cached("SELECT id FROM entities WHERE namespace = ?1 AND name = ?2")?;
234    match stmt.query_row(params![namespace, &name], |r| r.get::<_, i64>(0)) {
235        Ok(id) => Ok(Some(id)),
236        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
237        Err(e) => Err(AppError::Database(e)),
238    }
239}
240
241/// Structure representing an existing relation.
242#[derive(Debug, Serialize)]
243pub struct RelationshipRow {
244    pub id: i64,
245    pub namespace: String,
246    pub source_id: i64,
247    pub target_id: i64,
248    pub relation: String,
249    pub weight: f64,
250    pub description: Option<String>,
251}
252
253/// Looks up a specific relation by (source_id, target_id, relation).
254pub fn find_relationship(
255    conn: &Connection,
256    source_id: i64,
257    target_id: i64,
258    relation: &str,
259) -> Result<Option<RelationshipRow>, AppError> {
260    let mut stmt = conn.prepare_cached(
261        "SELECT id, namespace, source_id, target_id, relation, weight, description
262         FROM relationships
263         WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
264    )?;
265    match stmt.query_row(params![source_id, target_id, relation], |r| {
266        Ok(RelationshipRow {
267            id: r.get(0)?,
268            namespace: r.get(1)?,
269            source_id: r.get(2)?,
270            target_id: r.get(3)?,
271            relation: r.get(4)?,
272            weight: r.get(5)?,
273            description: r.get(6)?,
274        })
275    }) {
276        Ok(row) => Ok(Some(row)),
277        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
278        Err(e) => Err(AppError::Database(e)),
279    }
280}
281
282/// Creates a relation if it does not exist (returns action="created")
283/// or returns the existing relation (action="already_exists") with updated weight.
284pub fn create_or_fetch_relationship(
285    conn: &Connection,
286    namespace: &str,
287    source_id: i64,
288    target_id: i64,
289    relation: &str,
290    weight: f64,
291    description: Option<&str>,
292) -> Result<(i64, bool), AppError> {
293    // Check if it exists first.
294    let existing = find_relationship(conn, source_id, target_id, relation)?;
295    if let Some(row) = existing {
296        return Ok((row.id, false));
297    }
298    conn.execute(
299        "INSERT INTO relationships (namespace, source_id, target_id, relation, weight, description)
300         VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
301        params![
302            namespace,
303            source_id,
304            target_id,
305            relation,
306            weight,
307            description
308        ],
309    )?;
310    let id: i64 = conn.query_row(
311        "SELECT id FROM relationships WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
312        params![source_id, target_id, relation],
313        |r| r.get(0),
314    )?;
315    Ok((id, true))
316}
317
318/// Removes a relation by id and cleans up memory_relationships.
319pub fn delete_relationship_by_id(conn: &Connection, relationship_id: i64) -> Result<(), AppError> {
320    conn.execute(
321        "DELETE FROM memory_relationships WHERE relationship_id = ?1",
322        params![relationship_id],
323    )?;
324    conn.execute(
325        "DELETE FROM relationships WHERE id = ?1",
326        params![relationship_id],
327    )?;
328    Ok(())
329}
330
331/// Recalculates the `degree` field of an entity.
332pub fn recalculate_degree(conn: &Connection, entity_id: i64) -> Result<(), AppError> {
333    conn.execute(
334        "UPDATE entities
335         SET degree = (SELECT COUNT(*) FROM relationships
336                       WHERE source_id = entities.id OR target_id = entities.id)
337         WHERE id = ?1",
338        params![entity_id],
339    )?;
340    Ok(())
341}
342
343/// Entity row with enough data for graph export/query.
344#[derive(Debug, Serialize, Clone)]
345pub struct EntityNode {
346    pub id: i64,
347    pub name: String,
348    pub namespace: String,
349    pub kind: String,
350}
351
352/// Lists entities, filtering by namespace if provided.
353pub fn list_entities(
354    conn: &Connection,
355    namespace: Option<&str>,
356) -> Result<Vec<EntityNode>, AppError> {
357    if let Some(ns) = namespace {
358        let mut stmt = conn.prepare(
359            "SELECT id, name, namespace, type FROM entities WHERE namespace = ?1 ORDER BY id",
360        )?;
361        let rows = stmt
362            .query_map(params![ns], |r| {
363                Ok(EntityNode {
364                    id: r.get(0)?,
365                    name: r.get(1)?,
366                    namespace: r.get(2)?,
367                    kind: r.get(3)?,
368                })
369            })?
370            .collect::<Result<Vec<_>, _>>()?;
371        Ok(rows)
372    } else {
373        let mut stmt =
374            conn.prepare("SELECT id, name, namespace, type FROM entities ORDER BY namespace, id")?;
375        let rows = stmt
376            .query_map([], |r| {
377                Ok(EntityNode {
378                    id: r.get(0)?,
379                    name: r.get(1)?,
380                    namespace: r.get(2)?,
381                    kind: r.get(3)?,
382                })
383            })?
384            .collect::<Result<Vec<_>, _>>()?;
385        Ok(rows)
386    }
387}
388
389/// Lists relations filtered by namespace (of source/target entities).
390pub fn list_relationships_by_namespace(
391    conn: &Connection,
392    namespace: Option<&str>,
393) -> Result<Vec<RelationshipRow>, AppError> {
394    if let Some(ns) = namespace {
395        let mut stmt = conn.prepare(
396            "SELECT r.id, r.namespace, r.source_id, r.target_id, r.relation, r.weight, r.description
397             FROM relationships r
398             JOIN entities se ON se.id = r.source_id AND se.namespace = ?1
399             JOIN entities te ON te.id = r.target_id AND te.namespace = ?1
400             ORDER BY r.id",
401        )?;
402        let rows = stmt
403            .query_map(params![ns], |r| {
404                Ok(RelationshipRow {
405                    id: r.get(0)?,
406                    namespace: r.get(1)?,
407                    source_id: r.get(2)?,
408                    target_id: r.get(3)?,
409                    relation: r.get(4)?,
410                    weight: r.get(5)?,
411                    description: r.get(6)?,
412                })
413            })?
414            .collect::<Result<Vec<_>, _>>()?;
415        Ok(rows)
416    } else {
417        let mut stmt = conn.prepare(
418            "SELECT id, namespace, source_id, target_id, relation, weight, description
419             FROM relationships ORDER BY id",
420        )?;
421        let rows = stmt
422            .query_map([], |r| {
423                Ok(RelationshipRow {
424                    id: r.get(0)?,
425                    namespace: r.get(1)?,
426                    source_id: r.get(2)?,
427                    target_id: r.get(3)?,
428                    relation: r.get(4)?,
429                    weight: r.get(5)?,
430                    description: r.get(6)?,
431                })
432            })?
433            .collect::<Result<Vec<_>, _>>()?;
434        Ok(rows)
435    }
436}
437
438/// Locates orphan entities: no link in memory_entities and no relations.
439pub fn find_orphan_entity_ids(
440    conn: &Connection,
441    namespace: Option<&str>,
442) -> Result<Vec<i64>, AppError> {
443    if let Some(ns) = namespace {
444        let mut stmt = conn.prepare(
445            "SELECT e.id FROM entities e
446             WHERE e.namespace = ?1
447               AND NOT EXISTS (SELECT 1 FROM memory_entities me WHERE me.entity_id = e.id)
448               AND NOT EXISTS (
449                   SELECT 1 FROM relationships r
450                   WHERE r.source_id = e.id OR r.target_id = e.id
451               )",
452        )?;
453        let ids = stmt
454            .query_map(params![ns], |r| r.get::<_, i64>(0))?
455            .collect::<Result<Vec<_>, _>>()?;
456        Ok(ids)
457    } else {
458        let mut stmt = conn.prepare(
459            "SELECT e.id FROM entities e
460             WHERE NOT EXISTS (SELECT 1 FROM memory_entities me WHERE me.entity_id = e.id)
461               AND NOT EXISTS (
462                   SELECT 1 FROM relationships r
463                   WHERE r.source_id = e.id OR r.target_id = e.id
464               )",
465        )?;
466        let ids = stmt
467            .query_map([], |r| r.get::<_, i64>(0))?
468            .collect::<Result<Vec<_>, _>>()?;
469        Ok(ids)
470    }
471}
472
473/// Deletes entities and their associated vectors. Returns the number of entities removed.
474pub fn delete_entities_by_ids(conn: &Connection, entity_ids: &[i64]) -> Result<usize, AppError> {
475    if entity_ids.is_empty() {
476        return Ok(0);
477    }
478    let mut removed = 0usize;
479    for id in entity_ids {
480        // vec0 lacks FK CASCADE — clean vec_entities explicitly.
481        let _ = conn.execute("DELETE FROM vec_entities WHERE entity_id = ?1", params![id]);
482        let affected = conn.execute("DELETE FROM entities WHERE id = ?1", params![id])?;
483        removed += affected;
484    }
485    Ok(removed)
486}
487
488/// Counts relationships matching the given relation type within a namespace.
489///
490/// Used by `prune-relations --dry-run` to preview the number of relationships
491/// that would be deleted without actually modifying the database.
492///
493/// # Errors
494///
495/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
496pub fn count_relationships_by_relation(
497    conn: &Connection,
498    namespace: &str,
499    relation: &str,
500) -> Result<usize, AppError> {
501    let count: i64 = conn.query_row(
502        "SELECT COUNT(*) FROM relationships WHERE namespace = ?1 AND relation = ?2",
503        params![namespace, relation],
504        |r| r.get(0),
505    )?;
506    Ok(count as usize)
507}
508
509/// Returns unique entity names involved in relationships of the given type.
510///
511/// Queries both source and target sides of every matching relationship row,
512/// deduplicates via `DISTINCT`, and returns the names in alphabetical order.
513///
514/// # Errors
515///
516/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
517pub fn list_entity_names_by_relation(
518    conn: &Connection,
519    namespace: &str,
520    relation: &str,
521) -> Result<Vec<String>, AppError> {
522    let mut stmt = conn.prepare(
523        "SELECT DISTINCT e.name FROM entities e
524         INNER JOIN relationships r ON (e.id = r.source_id OR e.id = r.target_id)
525         WHERE r.namespace = ?1 AND r.relation = ?2
526         ORDER BY e.name",
527    )?;
528    let names: Vec<String> = stmt
529        .query_map(params![namespace, relation], |row| row.get(0))?
530        .collect::<Result<Vec<_>, _>>()?;
531    Ok(names)
532}
533
534/// Deletes all relationships matching a relation type within a namespace.
535///
536/// Operates in chunks of 1000 to avoid holding long write locks and blocking
537/// WAL readers. After deletion, recalculates degree for every affected entity.
538///
539/// Returns `(count_deleted, affected_entity_ids)`.
540///
541/// # Errors
542///
543/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
544pub fn delete_relationships_by_relation(
545    conn: &Connection,
546    namespace: &str,
547    relation: &str,
548) -> Result<(usize, Vec<i64>), AppError> {
549    // Step 1: collect all affected entity IDs before deletion.
550    let mut stmt = conn.prepare(
551        "SELECT DISTINCT source_id FROM relationships WHERE namespace = ?1 AND relation = ?2
552         UNION
553         SELECT DISTINCT target_id FROM relationships WHERE namespace = ?1 AND relation = ?2",
554    )?;
555    let entity_ids: Vec<i64> = stmt
556        .query_map(params![namespace, relation], |r| r.get::<_, i64>(0))?
557        .collect::<Result<Vec<_>, _>>()?;
558
559    // Step 2: collect relationship IDs to delete.
560    let mut id_stmt =
561        conn.prepare("SELECT id FROM relationships WHERE namespace = ?1 AND relation = ?2")?;
562    let rel_ids: Vec<i64> = id_stmt
563        .query_map(params![namespace, relation], |r| r.get::<_, i64>(0))?
564        .collect::<Result<Vec<_>, _>>()?;
565
566    // Step 3: delete in chunks of 1000 (memory_relationships + relationships).
567    let mut total_deleted: usize = 0;
568    for chunk in rel_ids.chunks(1000) {
569        for &rel_id in chunk {
570            conn.execute(
571                "DELETE FROM memory_relationships WHERE relationship_id = ?1",
572                params![rel_id],
573            )?;
574            let affected =
575                conn.execute("DELETE FROM relationships WHERE id = ?1", params![rel_id])?;
576            total_deleted += affected;
577        }
578    }
579
580    // Step 4: recalculate degree for all affected entities.
581    for &eid in &entity_ids {
582        recalculate_degree(conn, eid)?;
583    }
584
585    Ok((total_deleted, entity_ids))
586}
587
588pub fn knn_search(
589    conn: &Connection,
590    embedding: &[f32],
591    namespace: &str,
592    k: usize,
593) -> Result<Vec<(i64, f32)>, AppError> {
594    let bytes = f32_to_bytes(embedding);
595    let mut stmt = conn.prepare(
596        "SELECT entity_id, distance FROM vec_entities
597         WHERE embedding MATCH ?1 AND namespace = ?2
598         ORDER BY distance LIMIT ?3",
599    )?;
600    let rows = stmt
601        .query_map(params![bytes, namespace, k as i64], |r| {
602            Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
603        })?
604        .collect::<Result<Vec<_>, _>>()?;
605    Ok(rows)
606}
607
608#[cfg(test)]
609mod tests {
610    use super::*;
611    use crate::constants::EMBEDDING_DIM;
612    use crate::entity_type::EntityType;
613    use crate::storage::connection::register_vec_extension;
614    use rusqlite::Connection;
615    use tempfile::TempDir;
616
617    type TestResult = Result<(), Box<dyn std::error::Error>>;
618
619    fn setup_db() -> Result<(TempDir, Connection), Box<dyn std::error::Error>> {
620        register_vec_extension();
621        let tmp = TempDir::new()?;
622        let db_path = tmp.path().join("test.db");
623        let mut conn = Connection::open(&db_path)?;
624        crate::migrations::runner().run(&mut conn)?;
625        Ok((tmp, conn))
626    }
627
628    fn insert_memory(conn: &Connection) -> Result<i64, Box<dyn std::error::Error>> {
629        conn.execute(
630            "INSERT INTO memories (namespace, name, type, description, body, body_hash)
631             VALUES ('global', 'test-mem', 'user', 'desc', 'body', 'hash1')",
632            [],
633        )?;
634        Ok(conn.last_insert_rowid())
635    }
636
637    fn new_entity_helper(name: &str) -> NewEntity {
638        NewEntity {
639            name: name.to_string(),
640            entity_type: EntityType::Project,
641            description: None,
642        }
643    }
644
645    fn embedding_zero() -> Vec<f32> {
646        vec![0.0f32; EMBEDDING_DIM]
647    }
648
649    // ------------------------------------------------------------------ //
650    // upsert_entity
651    // ------------------------------------------------------------------ //
652
653    #[test]
654    fn test_upsert_entity_creates_new() -> TestResult {
655        let (_tmp, conn) = setup_db()?;
656        let e = new_entity_helper("projeto-alpha");
657        let id = upsert_entity(&conn, "global", &e)?;
658        assert!(id > 0);
659        Ok(())
660    }
661
662    #[test]
663    fn test_upsert_entity_idempotent_returns_same_id() -> TestResult {
664        let (_tmp, conn) = setup_db()?;
665        let e = new_entity_helper("projeto-beta");
666        let id1 = upsert_entity(&conn, "global", &e)?;
667        let id2 = upsert_entity(&conn, "global", &e)?;
668        assert_eq!(id1, id2);
669        Ok(())
670    }
671
672    #[test]
673    fn test_upsert_entity_updates_description() -> TestResult {
674        let (_tmp, conn) = setup_db()?;
675        let e1 = new_entity_helper("projeto-gamma");
676        let id1 = upsert_entity(&conn, "global", &e1)?;
677
678        let e2 = NewEntity {
679            name: "projeto-gamma".to_string(),
680            entity_type: EntityType::Tool,
681            description: Some("nova desc".to_string()),
682        };
683        let id2 = upsert_entity(&conn, "global", &e2)?;
684        assert_eq!(id1, id2);
685
686        let desc: Option<String> = conn.query_row(
687            "SELECT description FROM entities WHERE id = ?1",
688            params![id1],
689            |r| r.get(0),
690        )?;
691        assert_eq!(desc.as_deref(), Some("nova desc"));
692        Ok(())
693    }
694
695    #[test]
696    fn test_upsert_entity_different_namespaces_create_distinct_records() -> TestResult {
697        let (_tmp, conn) = setup_db()?;
698        let e = new_entity_helper("compartilhada");
699        let id1 = upsert_entity(&conn, "ns1", &e)?;
700        let id2 = upsert_entity(&conn, "ns2", &e)?;
701        assert_ne!(id1, id2);
702        Ok(())
703    }
704
705    // ------------------------------------------------------------------ //
706    // upsert_entity_vec — covers DELETE+INSERT (new branch after the OOM fix)
707    // ------------------------------------------------------------------ //
708
709    #[test]
710    fn test_upsert_entity_vec_first_time_without_conflict() -> TestResult {
711        let (_tmp, conn) = setup_db()?;
712        let e = new_entity_helper("vec-nova");
713        let entity_id = upsert_entity(&conn, "global", &e)?;
714        let emb = embedding_zero();
715
716        let result = upsert_entity_vec(
717            &conn,
718            entity_id,
719            "global",
720            EntityType::Project,
721            &emb,
722            "vec-nova",
723        );
724        assert!(result.is_ok(), "first insertion must succeed");
725
726        let count: i64 = conn.query_row(
727            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
728            params![entity_id],
729            |r| r.get(0),
730        )?;
731        assert_eq!(count, 1, "must have exactly one row after insertion");
732        Ok(())
733    }
734
735    #[test]
736    fn test_upsert_entity_vec_second_time_replaces_without_error() -> TestResult {
737        // Covers the branch where DELETE removes the existing row before INSERT.
738        let (_tmp, conn) = setup_db()?;
739        let e = new_entity_helper("vec-existente");
740        let entity_id = upsert_entity(&conn, "global", &e)?;
741        let emb = embedding_zero();
742
743        upsert_entity_vec(
744            &conn,
745            entity_id,
746            "global",
747            EntityType::Project,
748            &emb,
749            "vec-existente",
750        )?;
751
752        // Second call: DELETE returns 1 removed row, INSERT must succeed.
753        let result = upsert_entity_vec(
754            &conn,
755            entity_id,
756            "global",
757            EntityType::Tool,
758            &emb,
759            "vec-existente",
760        );
761        assert!(
762            result.is_ok(),
763            "second insertion (replace) must succeed: {result:?}"
764        );
765
766        let count: i64 = conn.query_row(
767            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
768            params![entity_id],
769            |r| r.get(0),
770        )?;
771        assert_eq!(count, 1, "must have exactly one row after replacement");
772        Ok(())
773    }
774
775    #[test]
776    fn test_upsert_entity_vec_multiple_independent_entities() -> TestResult {
777        let (_tmp, conn) = setup_db()?;
778        let emb = embedding_zero();
779
780        for i in 0..3i64 {
781            let nome = format!("ent-{i}");
782            let e = new_entity_helper(&nome);
783            let entity_id = upsert_entity(&conn, "global", &e)?;
784            upsert_entity_vec(&conn, entity_id, "global", EntityType::Project, &emb, &nome)?;
785        }
786
787        let count: i64 = conn.query_row("SELECT COUNT(*) FROM vec_entities", [], |r| r.get(0))?;
788        assert_eq!(count, 3, "must have three distinct rows in vec_entities");
789        Ok(())
790    }
791
792    // ------------------------------------------------------------------ //
793    // find_entity_id
794    // ------------------------------------------------------------------ //
795
796    #[test]
797    fn test_find_entity_id_existing_returns_some() -> TestResult {
798        let (_tmp, conn) = setup_db()?;
799        let e = new_entity_helper("entidade-busca");
800        let id_inserido = upsert_entity(&conn, "global", &e)?;
801        let id_encontrado = find_entity_id(&conn, "global", "entidade-busca")?;
802        assert_eq!(id_encontrado, Some(id_inserido));
803        Ok(())
804    }
805
806    #[test]
807    fn test_find_entity_id_missing_returns_none() -> TestResult {
808        let (_tmp, conn) = setup_db()?;
809        let id = find_entity_id(&conn, "global", "nao-existe")?;
810        assert_eq!(id, None);
811        Ok(())
812    }
813
814    // ------------------------------------------------------------------ //
815    // delete_entities_by_ids
816    // ------------------------------------------------------------------ //
817
818    #[test]
819    fn test_delete_entities_by_ids_empty_list_returns_zero() -> TestResult {
820        let (_tmp, conn) = setup_db()?;
821        let removed = delete_entities_by_ids(&conn, &[])?;
822        assert_eq!(removed, 0);
823        Ok(())
824    }
825
826    #[test]
827    fn test_delete_entities_by_ids_removes_valid_entity() -> TestResult {
828        let (_tmp, conn) = setup_db()?;
829        let e = new_entity_helper("to-delete");
830        let entity_id = upsert_entity(&conn, "global", &e)?;
831
832        let removed = delete_entities_by_ids(&conn, &[entity_id])?;
833        assert_eq!(removed, 1);
834
835        let id = find_entity_id(&conn, "global", "to-delete")?;
836        assert_eq!(id, None, "entity must have been removed");
837        Ok(())
838    }
839
840    #[test]
841    fn test_delete_entities_by_ids_missing_id_returns_zero() -> TestResult {
842        let (_tmp, conn) = setup_db()?;
843        let removed = delete_entities_by_ids(&conn, &[9999])?;
844        assert_eq!(removed, 0);
845        Ok(())
846    }
847
848    #[test]
849    fn test_delete_entities_by_ids_removes_multiple() -> TestResult {
850        let (_tmp, conn) = setup_db()?;
851        let id1 = upsert_entity(&conn, "global", &new_entity_helper("del-a"))?;
852        let id2 = upsert_entity(&conn, "global", &new_entity_helper("del-b"))?;
853        let id3 = upsert_entity(&conn, "global", &new_entity_helper("del-c"))?;
854
855        let removed = delete_entities_by_ids(&conn, &[id1, id2])?;
856        assert_eq!(removed, 2);
857
858        assert!(find_entity_id(&conn, "global", "del-a")?.is_none());
859        assert!(find_entity_id(&conn, "global", "del-b")?.is_none());
860        assert!(find_entity_id(&conn, "global", "del-c")?.is_some());
861        let _ = id3;
862        Ok(())
863    }
864
865    #[test]
866    fn test_delete_entities_by_ids_also_removes_vec() -> TestResult {
867        let (_tmp, conn) = setup_db()?;
868        let e = new_entity_helper("del-com-vec");
869        let entity_id = upsert_entity(&conn, "global", &e)?;
870        let emb = embedding_zero();
871        upsert_entity_vec(
872            &conn,
873            entity_id,
874            "global",
875            EntityType::Project,
876            &emb,
877            "del-com-vec",
878        )?;
879
880        let count_antes: i64 = conn.query_row(
881            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
882            params![entity_id],
883            |r| r.get(0),
884        )?;
885        assert_eq!(count_antes, 1);
886
887        delete_entities_by_ids(&conn, &[entity_id])?;
888
889        let count_depois: i64 = conn.query_row(
890            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
891            params![entity_id],
892            |r| r.get(0),
893        )?;
894        assert_eq!(
895            count_depois, 0,
896            "vec_entities deve ser limpo junto com entities"
897        );
898        Ok(())
899    }
900
901    // ------------------------------------------------------------------ //
902    // upsert_relationship / find_relationship
903    // ------------------------------------------------------------------ //
904
905    #[test]
906    fn test_upsert_relationship_creates_new() -> TestResult {
907        let (_tmp, conn) = setup_db()?;
908        let id_a = upsert_entity(&conn, "global", &new_entity_helper("rel-a"))?;
909        let id_b = upsert_entity(&conn, "global", &new_entity_helper("rel-b"))?;
910
911        let rel = NewRelationship {
912            source: "rel-a".to_string(),
913            target: "rel-b".to_string(),
914            relation: "uses".to_string(),
915            strength: 0.8,
916            description: None,
917        };
918        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
919        assert!(rel_id > 0);
920        Ok(())
921    }
922
923    #[test]
924    fn test_upsert_relationship_idempotent() -> TestResult {
925        let (_tmp, conn) = setup_db()?;
926        let id_a = upsert_entity(&conn, "global", &new_entity_helper("idem-a"))?;
927        let id_b = upsert_entity(&conn, "global", &new_entity_helper("idem-b"))?;
928
929        let rel = NewRelationship {
930            source: "idem-a".to_string(),
931            target: "idem-b".to_string(),
932            relation: "uses".to_string(),
933            strength: 0.5,
934            description: None,
935        };
936        let id1 = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
937        let id2 = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
938        assert_eq!(id1, id2);
939        Ok(())
940    }
941
942    #[test]
943    fn test_find_relationship_existing() -> TestResult {
944        let (_tmp, conn) = setup_db()?;
945        let id_a = upsert_entity(&conn, "global", &new_entity_helper("fr-a"))?;
946        let id_b = upsert_entity(&conn, "global", &new_entity_helper("fr-b"))?;
947
948        let rel = NewRelationship {
949            source: "fr-a".to_string(),
950            target: "fr-b".to_string(),
951            relation: "depends_on".to_string(),
952            strength: 0.7,
953            description: None,
954        };
955        upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
956
957        let encontrada = find_relationship(&conn, id_a, id_b, "depends_on")?;
958        let row = encontrada.ok_or("relationship should exist")?;
959        assert_eq!(row.source_id, id_a);
960        assert_eq!(row.target_id, id_b);
961        assert!((row.weight - 0.7).abs() < 1e-9);
962        Ok(())
963    }
964
965    #[test]
966    fn test_find_relationship_missing_returns_none() -> TestResult {
967        let (_tmp, conn) = setup_db()?;
968        let resultado = find_relationship(&conn, 9999, 8888, "uses")?;
969        assert!(resultado.is_none());
970        Ok(())
971    }
972
973    // ------------------------------------------------------------------ //
974    // link_memory_entity / link_memory_relationship
975    // ------------------------------------------------------------------ //
976
977    #[test]
978    fn test_link_memory_entity_idempotent() -> TestResult {
979        let (_tmp, conn) = setup_db()?;
980        let memory_id = insert_memory(&conn)?;
981        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("me-ent"))?;
982
983        link_memory_entity(&conn, memory_id, entity_id)?;
984        let resultado = link_memory_entity(&conn, memory_id, entity_id);
985        assert!(
986            resultado.is_ok(),
987            "INSERT OR IGNORE must not fail on duplicate"
988        );
989        Ok(())
990    }
991
992    #[test]
993    fn test_link_memory_relationship_idempotent() -> TestResult {
994        let (_tmp, conn) = setup_db()?;
995        let memory_id = insert_memory(&conn)?;
996        let id_a = upsert_entity(&conn, "global", &new_entity_helper("mr-a"))?;
997        let id_b = upsert_entity(&conn, "global", &new_entity_helper("mr-b"))?;
998
999        let rel = NewRelationship {
1000            source: "mr-a".to_string(),
1001            target: "mr-b".to_string(),
1002            relation: "uses".to_string(),
1003            strength: 0.5,
1004            description: None,
1005        };
1006        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
1007
1008        link_memory_relationship(&conn, memory_id, rel_id)?;
1009        let resultado = link_memory_relationship(&conn, memory_id, rel_id);
1010        assert!(
1011            resultado.is_ok(),
1012            "INSERT OR IGNORE must not fail on duplicate"
1013        );
1014        Ok(())
1015    }
1016
1017    // ------------------------------------------------------------------ //
1018    // increment_degree / recalculate_degree
1019    // ------------------------------------------------------------------ //
1020
1021    #[test]
1022    fn test_increment_degree_increases_counter() -> TestResult {
1023        let (_tmp, conn) = setup_db()?;
1024        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("grau-ent"))?;
1025
1026        increment_degree(&conn, entity_id)?;
1027        increment_degree(&conn, entity_id)?;
1028
1029        let degree: i64 = conn.query_row(
1030            "SELECT degree FROM entities WHERE id = ?1",
1031            params![entity_id],
1032            |r| r.get(0),
1033        )?;
1034        assert_eq!(degree, 2);
1035        Ok(())
1036    }
1037
1038    #[test]
1039    fn test_recalculate_degree_reflects_actual_relations() -> TestResult {
1040        let (_tmp, conn) = setup_db()?;
1041        let id_a = upsert_entity(&conn, "global", &new_entity_helper("rc-a"))?;
1042        let id_b = upsert_entity(&conn, "global", &new_entity_helper("rc-b"))?;
1043        let id_c = upsert_entity(&conn, "global", &new_entity_helper("rc-c"))?;
1044
1045        let rel1 = NewRelationship {
1046            source: "rc-a".to_string(),
1047            target: "rc-b".to_string(),
1048            relation: "uses".to_string(),
1049            strength: 0.5,
1050            description: None,
1051        };
1052        let rel2 = NewRelationship {
1053            source: "rc-c".to_string(),
1054            target: "rc-a".to_string(),
1055            relation: "depends_on".to_string(),
1056            strength: 0.5,
1057            description: None,
1058        };
1059        upsert_relationship(&conn, "global", id_a, id_b, &rel1)?;
1060        upsert_relationship(&conn, "global", id_c, id_a, &rel2)?;
1061
1062        recalculate_degree(&conn, id_a)?;
1063
1064        let degree: i64 = conn.query_row(
1065            "SELECT degree FROM entities WHERE id = ?1",
1066            params![id_a],
1067            |r| r.get(0),
1068        )?;
1069        assert_eq!(
1070            degree, 2,
1071            "rc-a appears in two relationships (source+target)"
1072        );
1073        Ok(())
1074    }
1075
1076    // ------------------------------------------------------------------ //
1077    // find_orphan_entity_ids
1078    // ------------------------------------------------------------------ //
1079
1080    #[test]
1081    fn test_find_orphan_entity_ids_without_orphans() -> TestResult {
1082        let (_tmp, conn) = setup_db()?;
1083        let memory_id = insert_memory(&conn)?;
1084        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("nao-orfa"))?;
1085        link_memory_entity(&conn, memory_id, entity_id)?;
1086
1087        let orfas = find_orphan_entity_ids(&conn, Some("global"))?;
1088        assert!(!orfas.contains(&entity_id));
1089        Ok(())
1090    }
1091
1092    #[test]
1093    fn test_find_orphan_entity_ids_detects_orphans() -> TestResult {
1094        let (_tmp, conn) = setup_db()?;
1095        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("sim-orfa"))?;
1096
1097        let orfas = find_orphan_entity_ids(&conn, Some("global"))?;
1098        assert!(orfas.contains(&entity_id));
1099        Ok(())
1100    }
1101
1102    #[test]
1103    fn test_find_orphan_entity_ids_without_namespace_returns_all() -> TestResult {
1104        let (_tmp, conn) = setup_db()?;
1105        let id1 = upsert_entity(&conn, "ns-a", &new_entity_helper("orfa-a"))?;
1106        let id2 = upsert_entity(&conn, "ns-b", &new_entity_helper("orfa-b"))?;
1107
1108        let orfas = find_orphan_entity_ids(&conn, None)?;
1109        assert!(orfas.contains(&id1));
1110        assert!(orfas.contains(&id2));
1111        Ok(())
1112    }
1113
1114    // ------------------------------------------------------------------ //
1115    // list_entities / list_relationships_by_namespace
1116    // ------------------------------------------------------------------ //
1117
1118    #[test]
1119    fn test_list_entities_with_namespace() -> TestResult {
1120        let (_tmp, conn) = setup_db()?;
1121        upsert_entity(&conn, "le-ns", &new_entity_helper("le-ent-1"))?;
1122        upsert_entity(&conn, "le-ns", &new_entity_helper("le-ent-2"))?;
1123        upsert_entity(&conn, "outro-ns", &new_entity_helper("le-ent-3"))?;
1124
1125        let lista = list_entities(&conn, Some("le-ns"))?;
1126        assert_eq!(lista.len(), 2);
1127        assert!(lista.iter().all(|e| e.namespace == "le-ns"));
1128        Ok(())
1129    }
1130
1131    #[test]
1132    fn test_list_entities_without_namespace_returns_all() -> TestResult {
1133        let (_tmp, conn) = setup_db()?;
1134        upsert_entity(&conn, "ns1", &new_entity_helper("all-ent-1"))?;
1135        upsert_entity(&conn, "ns2", &new_entity_helper("all-ent-2"))?;
1136
1137        let lista = list_entities(&conn, None)?;
1138        assert!(lista.len() >= 2);
1139        Ok(())
1140    }
1141
1142    #[test]
1143    fn test_list_relationships_by_namespace_filters_correctly() -> TestResult {
1144        let (_tmp, conn) = setup_db()?;
1145        let id_a = upsert_entity(&conn, "rel-ns", &new_entity_helper("lr-a"))?;
1146        let id_b = upsert_entity(&conn, "rel-ns", &new_entity_helper("lr-b"))?;
1147
1148        let rel = NewRelationship {
1149            source: "lr-a".to_string(),
1150            target: "lr-b".to_string(),
1151            relation: "uses".to_string(),
1152            strength: 0.5,
1153            description: None,
1154        };
1155        upsert_relationship(&conn, "rel-ns", id_a, id_b, &rel)?;
1156
1157        let lista = list_relationships_by_namespace(&conn, Some("rel-ns"))?;
1158        assert!(!lista.is_empty());
1159        assert!(lista.iter().all(|r| r.namespace == "rel-ns"));
1160        Ok(())
1161    }
1162
1163    // ------------------------------------------------------------------ //
1164    // delete_relationship_by_id / create_or_fetch_relationship
1165    // ------------------------------------------------------------------ //
1166
1167    #[test]
1168    fn test_delete_relationship_by_id_removes_relation() -> TestResult {
1169        let (_tmp, conn) = setup_db()?;
1170        let id_a = upsert_entity(&conn, "global", &new_entity_helper("dr-a"))?;
1171        let id_b = upsert_entity(&conn, "global", &new_entity_helper("dr-b"))?;
1172
1173        let rel = NewRelationship {
1174            source: "dr-a".to_string(),
1175            target: "dr-b".to_string(),
1176            relation: "uses".to_string(),
1177            strength: 0.5,
1178            description: None,
1179        };
1180        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
1181
1182        delete_relationship_by_id(&conn, rel_id)?;
1183
1184        let encontrada = find_relationship(&conn, id_a, id_b, "uses")?;
1185        assert!(encontrada.is_none(), "relationship must have been removed");
1186        Ok(())
1187    }
1188
1189    #[test]
1190    fn test_create_or_fetch_relationship_creates_new() -> TestResult {
1191        let (_tmp, conn) = setup_db()?;
1192        let id_a = upsert_entity(&conn, "global", &new_entity_helper("cf-a"))?;
1193        let id_b = upsert_entity(&conn, "global", &new_entity_helper("cf-b"))?;
1194
1195        let (rel_id, created) =
1196            create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1197        assert!(rel_id > 0);
1198        assert!(created);
1199        Ok(())
1200    }
1201
1202    #[test]
1203    fn test_create_or_fetch_relationship_returns_existing() -> TestResult {
1204        let (_tmp, conn) = setup_db()?;
1205        let id_a = upsert_entity(&conn, "global", &new_entity_helper("cf2-a"))?;
1206        let id_b = upsert_entity(&conn, "global", &new_entity_helper("cf2-b"))?;
1207
1208        create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1209        let (_, created) =
1210            create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1211        assert!(
1212            !created,
1213            "second call must return the existing relationship"
1214        );
1215        Ok(())
1216    }
1217
1218    // ------------------------------------------------------------------ //
1219    // serde alias: field "type" accepted as a synonym for "entity_type"
1220    // ------------------------------------------------------------------ //
1221
1222    #[test]
1223    fn accepts_type_field_as_alias() -> TestResult {
1224        let json = r#"{"name": "X", "type": "concept"}"#;
1225        let ent: NewEntity = serde_json::from_str(json)?;
1226        assert_eq!(ent.entity_type, EntityType::Concept);
1227        Ok(())
1228    }
1229
1230    #[test]
1231    fn accepts_canonical_entity_type_field() -> TestResult {
1232        let json = r#"{"name": "X", "entity_type": "concept"}"#;
1233        let ent: NewEntity = serde_json::from_str(json)?;
1234        assert_eq!(ent.entity_type, EntityType::Concept);
1235        Ok(())
1236    }
1237
1238    #[test]
1239    fn both_fields_present_yields_duplicate_error() {
1240        // having both entity_type and type in the same JSON is a duplicate and must fail
1241        let json = r#"{"name": "X", "entity_type": "concept", "type": "person"}"#;
1242        let resultado: Result<NewEntity, _> = serde_json::from_str(json);
1243        assert!(
1244            resultado.is_err(),
1245            "both fields in the same JSON are a duplicate"
1246        );
1247    }
1248
1249    #[test]
1250    fn validate_entity_name_accepts_valid() {
1251        assert!(validate_entity_name("rust-lang").is_ok());
1252        assert!(validate_entity_name("sqlite-graphrag").is_ok());
1253        assert!(validate_entity_name("ab").is_ok());
1254    }
1255
1256    #[test]
1257    fn validate_entity_name_rejects_short() {
1258        assert!(validate_entity_name("a").is_err());
1259        assert!(validate_entity_name("").is_err());
1260    }
1261
1262    #[test]
1263    fn validate_entity_name_rejects_newlines() {
1264        assert!(validate_entity_name("foo\nbar").is_err());
1265        assert!(validate_entity_name("foo\rbar").is_err());
1266    }
1267
1268    #[test]
1269    fn validate_entity_name_rejects_short_allcaps() {
1270        assert!(validate_entity_name("RAM").is_err());
1271        assert!(validate_entity_name("NAO").is_err());
1272        assert!(validate_entity_name("OK").is_err());
1273    }
1274
1275    #[test]
1276    fn validate_entity_name_accepts_long_allcaps() {
1277        assert!(validate_entity_name("SQLITE").is_ok());
1278        assert!(validate_entity_name("GRAPHRAG").is_ok());
1279    }
1280
1281    #[test]
1282    fn validate_entity_name_accepts_mixed_case() {
1283        assert!(validate_entity_name("FTS5").is_ok()); // 4 chars but has digit
1284        assert!(validate_entity_name("WAL").is_err()); // 3 chars ALL_CAPS
1285    }
1286}