Skip to main content

sqlite_graphrag/storage/
entities.rs

1//! Persistence layer for entities, relationships and their junction tables.
2//!
3//! The entity graph mirrors the conceptual content of memories: `entities`
4//! holds nodes, `relationships` holds typed edges and `memory_entities` and
5//! `memory_relationships` connect each memory to the graph slice it emitted.
6
7use crate::embedder::f32_to_bytes;
8use crate::entity_type::EntityType;
9use crate::errors::AppError;
10use crate::parsers::normalize_entity_name;
11use crate::storage::utils::with_busy_retry;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14
15/// Input payload used to upsert a single entity.
16///
17/// `name` is normalized to kebab-case by the caller. `description` is
18/// optional and preserved across upserts when the new value is `None`.
19#[derive(Debug, Serialize, Deserialize, Clone)]
20#[serde(deny_unknown_fields)]
21pub struct NewEntity {
22    pub name: String,
23    #[serde(alias = "type")]
24    pub entity_type: EntityType,
25    pub description: Option<String>,
26}
27
28/// Input payload used to upsert a typed relationship between entities.
29///
30/// `strength` must lie within `[0.0, 1.0]` and is mapped to the `weight`
31/// column of the `relationships` table.
32#[derive(Debug, Serialize, Deserialize, Clone)]
33#[serde(deny_unknown_fields)]
34pub struct NewRelationship {
35    #[serde(alias = "from")]
36    pub source: String,
37    #[serde(alias = "to")]
38    pub target: String,
39    pub relation: String,
40    pub strength: f64,
41    pub description: Option<String>,
42}
43
44/// Validates entity name against quality rules.
45///
46/// Rejects names with newlines, names shorter than 2 characters, and
47/// ALL_CAPS abbreviations of 4 characters or fewer (common NER noise).
48///
49/// # Errors
50///
51/// Returns `Err(AppError::Validation)` when the name violates any rule.
52pub fn validate_entity_name(name: &str) -> Result<(), AppError> {
53    if name.len() < 2 {
54        return Err(AppError::Validation(format!(
55            "entity name '{name}' must be at least 2 characters"
56        )));
57    }
58    if name.contains('\n') || name.contains('\r') {
59        return Err(AppError::Validation(
60            "entity name must not contain newline characters".to_string(),
61        ));
62    }
63    if name.len() <= 4
64        && name
65            .chars()
66            .all(|c| c.is_ascii_uppercase() || c == '_' || c == '-')
67    {
68        return Err(AppError::Validation(format!(
69            "entity name '{name}' rejected: short ALL_CAPS names are typically NER noise"
70        )));
71    }
72    Ok(())
73}
74
75/// Upserts an entity and returns its primary key.
76///
77/// Uses `ON CONFLICT(namespace, name)` to keep one row per entity within a
78/// namespace, refreshing `type` and `description` opportunistically.
79///
80/// # Errors
81///
82/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
83pub fn upsert_entity(conn: &Connection, namespace: &str, e: &NewEntity) -> Result<i64, AppError> {
84    // Step 1: validate the original name — catches ALL_CAPS short noise (NER artefacts),
85    // newlines, and names shorter than 2 characters before any transformation.
86    validate_entity_name(&e.name)?;
87    // Step 2: normalize to kebab-case ASCII (NFKD, lowercase, spaces/underscores → hyphens).
88    let normalized_name = normalize_entity_name(&e.name);
89    // Step 3: guard post-normalization length — a valid original could collapse to < 2 chars
90    // (e.g. a single accented character that strips entirely).
91    if normalized_name.chars().count() < 2 {
92        return Err(AppError::Validation(format!(
93            "entity name '{}' normalizes to '{}' which is too short (minimum 2 characters)",
94            e.name, normalized_name
95        )));
96    }
97    conn.execute(
98        "INSERT INTO entities (namespace, name, type, description)
99         VALUES (?1, ?2, ?3, ?4)
100         ON CONFLICT(namespace, name) DO UPDATE SET
101           type        = excluded.type,
102           description = COALESCE(excluded.description, entities.description),
103           updated_at  = unixepoch()",
104        params![namespace, normalized_name, e.entity_type, e.description],
105    )?;
106    let id: i64 = conn.query_row(
107        "SELECT id FROM entities WHERE namespace = ?1 AND name = ?2",
108        params![namespace, normalized_name],
109        |r| r.get(0),
110    )?;
111    Ok(id)
112}
113
114/// Replaces the vector row for an entity in `vec_entities`.
115///
116/// vec0 virtual tables do not honour `INSERT OR REPLACE` when the primary key
117/// already exists — they raise a UNIQUE constraint error instead of silently
118/// replacing the row. The workaround is an explicit DELETE before INSERT so
119/// that the insert never conflicts. `embedding` must have length
120/// [`crate::constants::EMBEDDING_DIM`].
121///
122/// # Errors
123///
124/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
125pub fn upsert_entity_vec(
126    conn: &Connection,
127    entity_id: i64,
128    namespace: &str,
129    entity_type: EntityType,
130    embedding: &[f32],
131    name: &str,
132) -> Result<(), AppError> {
133    // Both statements wrapped in with_busy_retry: WAL concurrency can cause
134    // SQLITE_BUSY on vec0 virtual table writes when multiple CLI instances run.
135    let embedding_bytes = f32_to_bytes(embedding);
136    with_busy_retry(|| {
137        conn.execute(
138            "DELETE FROM vec_entities WHERE entity_id = ?1",
139            params![entity_id],
140        )?;
141        conn.execute(
142            "INSERT INTO vec_entities(entity_id, namespace, type, embedding, name)
143             VALUES (?1, ?2, ?3, ?4, ?5)",
144            params![entity_id, namespace, entity_type, &embedding_bytes, name],
145        )?;
146        Ok(())
147    })
148}
149
150/// Upserts a typed relationship between two entity ids.
151///
152/// Conflicts on `(source_id, target_id, relation)` refresh `weight` and
153/// preserve a non-null `description`. Returns the `rowid` of the stored row.
154///
155/// # Errors
156///
157/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
158pub fn upsert_relationship(
159    conn: &Connection,
160    namespace: &str,
161    source_id: i64,
162    target_id: i64,
163    rel: &NewRelationship,
164) -> Result<i64, AppError> {
165    conn.execute(
166        "INSERT INTO relationships (namespace, source_id, target_id, relation, weight, description)
167         VALUES (?1, ?2, ?3, ?4, ?5, ?6)
168         ON CONFLICT(source_id, target_id, relation) DO UPDATE SET
169           weight = excluded.weight,
170           description = COALESCE(excluded.description, relationships.description)",
171        params![
172            namespace,
173            source_id,
174            target_id,
175            rel.relation,
176            rel.strength,
177            rel.description
178        ],
179    )?;
180    let id: i64 = conn.query_row(
181        "SELECT id FROM relationships WHERE source_id=?1 AND target_id=?2 AND relation=?3",
182        params![source_id, target_id, rel.relation],
183        |r| r.get(0),
184    )?;
185    Ok(id)
186}
187
188pub fn link_memory_entity(
189    conn: &Connection,
190    memory_id: i64,
191    entity_id: i64,
192) -> Result<(), AppError> {
193    conn.execute(
194        "INSERT OR IGNORE INTO memory_entities (memory_id, entity_id) VALUES (?1, ?2)",
195        params![memory_id, entity_id],
196    )?;
197    Ok(())
198}
199
200pub fn link_memory_relationship(
201    conn: &Connection,
202    memory_id: i64,
203    rel_id: i64,
204) -> Result<(), AppError> {
205    conn.execute(
206        "INSERT OR IGNORE INTO memory_relationships (memory_id, relationship_id) VALUES (?1, ?2)",
207        params![memory_id, rel_id],
208    )?;
209    Ok(())
210}
211
212pub fn increment_degree(conn: &Connection, entity_id: i64) -> Result<(), AppError> {
213    conn.execute(
214        "UPDATE entities SET degree = degree + 1 WHERE id = ?1",
215        params![entity_id],
216    )?;
217    Ok(())
218}
219
220/// Looks up the entity by name and namespace. Returns the id when it exists.
221pub fn find_entity_id(
222    conn: &Connection,
223    namespace: &str,
224    name: &str,
225) -> Result<Option<i64>, AppError> {
226    // Normalize the lookup name so it matches the normalized names written by
227    // `upsert_entity`. Without this, an entity written through normalization
228    // (e.g. "Foo Bar" -> "foo-bar") would be unreachable by its original
229    // spelling, breaking delete-entity, reclassify, merge-entities, rename and
230    // memory-entities lookups.
231    let name = normalize_entity_name(name);
232    let mut stmt =
233        conn.prepare_cached("SELECT id FROM entities WHERE namespace = ?1 AND name = ?2")?;
234    match stmt.query_row(params![namespace, &name], |r| r.get::<_, i64>(0)) {
235        Ok(id) => Ok(Some(id)),
236        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
237        Err(e) => Err(AppError::Database(e)),
238    }
239}
240
241/// Structure representing an existing relation.
242#[derive(Debug, Serialize)]
243pub struct RelationshipRow {
244    pub id: i64,
245    pub namespace: String,
246    pub source_id: i64,
247    pub target_id: i64,
248    pub relation: String,
249    pub weight: f64,
250    pub description: Option<String>,
251}
252
253/// Looks up a specific relation by (source_id, target_id, relation).
254pub fn find_relationship(
255    conn: &Connection,
256    source_id: i64,
257    target_id: i64,
258    relation: &str,
259) -> Result<Option<RelationshipRow>, AppError> {
260    let mut stmt = conn.prepare_cached(
261        "SELECT id, namespace, source_id, target_id, relation, weight, description
262         FROM relationships
263         WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
264    )?;
265    match stmt.query_row(params![source_id, target_id, relation], |r| {
266        Ok(RelationshipRow {
267            id: r.get(0)?,
268            namespace: r.get(1)?,
269            source_id: r.get(2)?,
270            target_id: r.get(3)?,
271            relation: r.get(4)?,
272            weight: r.get(5)?,
273            description: r.get(6)?,
274        })
275    }) {
276        Ok(row) => Ok(Some(row)),
277        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
278        Err(e) => Err(AppError::Database(e)),
279    }
280}
281
282/// Creates a relation if it does not exist (returns action="created")
283/// or returns the existing relation (action="already_exists") with updated weight.
284pub fn create_or_fetch_relationship(
285    conn: &Connection,
286    namespace: &str,
287    source_id: i64,
288    target_id: i64,
289    relation: &str,
290    weight: f64,
291    description: Option<&str>,
292) -> Result<(i64, bool), AppError> {
293    // Check if it exists first; update weight if different.
294    let existing = find_relationship(conn, source_id, target_id, relation)?;
295    if let Some(row) = existing {
296        if (row.weight - weight).abs() > f64::EPSILON {
297            conn.execute(
298                "UPDATE relationships SET weight = ?1 WHERE id = ?2",
299                params![weight, row.id],
300            )?;
301        }
302        return Ok((row.id, false));
303    }
304    conn.execute(
305        "INSERT INTO relationships (namespace, source_id, target_id, relation, weight, description)
306         VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
307        params![
308            namespace,
309            source_id,
310            target_id,
311            relation,
312            weight,
313            description
314        ],
315    )?;
316    let id: i64 = conn.query_row(
317        "SELECT id FROM relationships WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
318        params![source_id, target_id, relation],
319        |r| r.get(0),
320    )?;
321    Ok((id, true))
322}
323
324/// Removes a relation by id and cleans up memory_relationships.
325pub fn delete_relationship_by_id(conn: &Connection, relationship_id: i64) -> Result<(), AppError> {
326    conn.execute(
327        "DELETE FROM memory_relationships WHERE relationship_id = ?1",
328        params![relationship_id],
329    )?;
330    conn.execute(
331        "DELETE FROM relationships WHERE id = ?1",
332        params![relationship_id],
333    )?;
334    Ok(())
335}
336
337/// Recalculates the `degree` field of an entity.
338pub fn recalculate_degree(conn: &Connection, entity_id: i64) -> Result<(), AppError> {
339    conn.execute(
340        "UPDATE entities
341         SET degree = (SELECT COUNT(*) FROM relationships
342                       WHERE source_id = entities.id OR target_id = entities.id)
343         WHERE id = ?1",
344        params![entity_id],
345    )?;
346    Ok(())
347}
348
349/// Entity row with enough data for graph export/query.
350#[derive(Debug, Serialize, Clone)]
351pub struct EntityNode {
352    pub id: i64,
353    pub name: String,
354    pub namespace: String,
355    pub kind: String,
356}
357
358/// Lists entities, filtering by namespace if provided.
359pub fn list_entities(
360    conn: &Connection,
361    namespace: Option<&str>,
362) -> Result<Vec<EntityNode>, AppError> {
363    if let Some(ns) = namespace {
364        let mut stmt = conn.prepare(
365            "SELECT id, name, namespace, type FROM entities WHERE namespace = ?1 ORDER BY id",
366        )?;
367        let rows = stmt
368            .query_map(params![ns], |r| {
369                Ok(EntityNode {
370                    id: r.get(0)?,
371                    name: r.get(1)?,
372                    namespace: r.get(2)?,
373                    kind: r.get(3)?,
374                })
375            })?
376            .collect::<Result<Vec<_>, _>>()?;
377        Ok(rows)
378    } else {
379        let mut stmt =
380            conn.prepare("SELECT id, name, namespace, type FROM entities ORDER BY namespace, id")?;
381        let rows = stmt
382            .query_map([], |r| {
383                Ok(EntityNode {
384                    id: r.get(0)?,
385                    name: r.get(1)?,
386                    namespace: r.get(2)?,
387                    kind: r.get(3)?,
388                })
389            })?
390            .collect::<Result<Vec<_>, _>>()?;
391        Ok(rows)
392    }
393}
394
395/// Lists relations filtered by namespace (of source/target entities).
396pub fn list_relationships_by_namespace(
397    conn: &Connection,
398    namespace: Option<&str>,
399) -> Result<Vec<RelationshipRow>, AppError> {
400    if let Some(ns) = namespace {
401        let mut stmt = conn.prepare(
402            "SELECT r.id, r.namespace, r.source_id, r.target_id, r.relation, r.weight, r.description
403             FROM relationships r
404             JOIN entities se ON se.id = r.source_id AND se.namespace = ?1
405             JOIN entities te ON te.id = r.target_id AND te.namespace = ?1
406             ORDER BY r.id",
407        )?;
408        let rows = stmt
409            .query_map(params![ns], |r| {
410                Ok(RelationshipRow {
411                    id: r.get(0)?,
412                    namespace: r.get(1)?,
413                    source_id: r.get(2)?,
414                    target_id: r.get(3)?,
415                    relation: r.get(4)?,
416                    weight: r.get(5)?,
417                    description: r.get(6)?,
418                })
419            })?
420            .collect::<Result<Vec<_>, _>>()?;
421        Ok(rows)
422    } else {
423        let mut stmt = conn.prepare(
424            "SELECT id, namespace, source_id, target_id, relation, weight, description
425             FROM relationships ORDER BY id",
426        )?;
427        let rows = stmt
428            .query_map([], |r| {
429                Ok(RelationshipRow {
430                    id: r.get(0)?,
431                    namespace: r.get(1)?,
432                    source_id: r.get(2)?,
433                    target_id: r.get(3)?,
434                    relation: r.get(4)?,
435                    weight: r.get(5)?,
436                    description: r.get(6)?,
437                })
438            })?
439            .collect::<Result<Vec<_>, _>>()?;
440        Ok(rows)
441    }
442}
443
444/// Locates orphan entities: no link in memory_entities and no relations.
445pub fn find_orphan_entity_ids(
446    conn: &Connection,
447    namespace: Option<&str>,
448) -> Result<Vec<i64>, AppError> {
449    if let Some(ns) = namespace {
450        let mut stmt = conn.prepare(
451            "SELECT e.id FROM entities e
452             WHERE e.namespace = ?1
453               AND NOT EXISTS (SELECT 1 FROM memory_entities me WHERE me.entity_id = e.id)
454               AND NOT EXISTS (
455                   SELECT 1 FROM relationships r
456                   WHERE r.source_id = e.id OR r.target_id = e.id
457               )",
458        )?;
459        let ids = stmt
460            .query_map(params![ns], |r| r.get::<_, i64>(0))?
461            .collect::<Result<Vec<_>, _>>()?;
462        Ok(ids)
463    } else {
464        let mut stmt = conn.prepare(
465            "SELECT e.id FROM entities e
466             WHERE NOT EXISTS (SELECT 1 FROM memory_entities me WHERE me.entity_id = e.id)
467               AND NOT EXISTS (
468                   SELECT 1 FROM relationships r
469                   WHERE r.source_id = e.id OR r.target_id = e.id
470               )",
471        )?;
472        let ids = stmt
473            .query_map([], |r| r.get::<_, i64>(0))?
474            .collect::<Result<Vec<_>, _>>()?;
475        Ok(ids)
476    }
477}
478
479/// Deletes entities and their associated vectors. Returns the number of entities removed.
480pub fn delete_entities_by_ids(conn: &Connection, entity_ids: &[i64]) -> Result<usize, AppError> {
481    if entity_ids.is_empty() {
482        return Ok(0);
483    }
484    let mut removed = 0usize;
485    for id in entity_ids {
486        // vec0 lacks FK CASCADE — clean vec_entities explicitly.
487        let _ = conn.execute("DELETE FROM vec_entities WHERE entity_id = ?1", params![id]);
488        let affected = conn.execute("DELETE FROM entities WHERE id = ?1", params![id])?;
489        removed += affected;
490    }
491    Ok(removed)
492}
493
494/// Counts relationships matching the given relation type within a namespace.
495///
496/// Used by `prune-relations --dry-run` to preview the number of relationships
497/// that would be deleted without actually modifying the database.
498///
499/// # Errors
500///
501/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
502pub fn count_relationships_by_relation(
503    conn: &Connection,
504    namespace: &str,
505    relation: &str,
506) -> Result<usize, AppError> {
507    let count: i64 = conn.query_row(
508        "SELECT COUNT(*) FROM relationships WHERE namespace = ?1 AND relation = ?2",
509        params![namespace, relation],
510        |r| r.get(0),
511    )?;
512    Ok(count as usize)
513}
514
515/// Returns unique entity names involved in relationships of the given type.
516///
517/// Queries both source and target sides of every matching relationship row,
518/// deduplicates via `DISTINCT`, and returns the names in alphabetical order.
519///
520/// # Errors
521///
522/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
523pub fn list_entity_names_by_relation(
524    conn: &Connection,
525    namespace: &str,
526    relation: &str,
527) -> Result<Vec<String>, AppError> {
528    let mut stmt = conn.prepare(
529        "SELECT DISTINCT e.name FROM entities e
530         INNER JOIN relationships r ON (e.id = r.source_id OR e.id = r.target_id)
531         WHERE r.namespace = ?1 AND r.relation = ?2
532         ORDER BY e.name",
533    )?;
534    let names: Vec<String> = stmt
535        .query_map(params![namespace, relation], |row| row.get(0))?
536        .collect::<Result<Vec<_>, _>>()?;
537    Ok(names)
538}
539
540/// Deletes all relationships matching a relation type within a namespace.
541///
542/// Operates in chunks of 1000 to avoid holding long write locks and blocking
543/// WAL readers. After deletion, recalculates degree for every affected entity.
544///
545/// Returns `(count_deleted, affected_entity_ids)`.
546///
547/// # Errors
548///
549/// Returns `Err(AppError::Database)` on any `rusqlite` failure.
550pub fn delete_relationships_by_relation(
551    conn: &Connection,
552    namespace: &str,
553    relation: &str,
554) -> Result<(usize, Vec<i64>), AppError> {
555    // Step 1: collect all affected entity IDs before deletion.
556    let mut stmt = conn.prepare(
557        "SELECT DISTINCT source_id FROM relationships WHERE namespace = ?1 AND relation = ?2
558         UNION
559         SELECT DISTINCT target_id FROM relationships WHERE namespace = ?1 AND relation = ?2",
560    )?;
561    let entity_ids: Vec<i64> = stmt
562        .query_map(params![namespace, relation], |r| r.get::<_, i64>(0))?
563        .collect::<Result<Vec<_>, _>>()?;
564
565    // Step 2: collect relationship IDs to delete.
566    let mut id_stmt =
567        conn.prepare("SELECT id FROM relationships WHERE namespace = ?1 AND relation = ?2")?;
568    let rel_ids: Vec<i64> = id_stmt
569        .query_map(params![namespace, relation], |r| r.get::<_, i64>(0))?
570        .collect::<Result<Vec<_>, _>>()?;
571
572    // Step 3: delete in chunks of 1000 (memory_relationships + relationships).
573    let mut total_deleted: usize = 0;
574    for chunk in rel_ids.chunks(1000) {
575        for &rel_id in chunk {
576            conn.execute(
577                "DELETE FROM memory_relationships WHERE relationship_id = ?1",
578                params![rel_id],
579            )?;
580            let affected =
581                conn.execute("DELETE FROM relationships WHERE id = ?1", params![rel_id])?;
582            total_deleted += affected;
583        }
584    }
585
586    // Step 4: recalculate degree for all affected entities.
587    for &eid in &entity_ids {
588        recalculate_degree(conn, eid)?;
589    }
590
591    Ok((total_deleted, entity_ids))
592}
593
594pub fn knn_search(
595    conn: &Connection,
596    embedding: &[f32],
597    namespace: &str,
598    k: usize,
599) -> Result<Vec<(i64, f32)>, AppError> {
600    let bytes = f32_to_bytes(embedding);
601    let mut stmt = conn.prepare(
602        "SELECT entity_id, distance FROM vec_entities
603         WHERE embedding MATCH ?1 AND namespace = ?2
604         ORDER BY distance LIMIT ?3",
605    )?;
606    let rows = stmt
607        .query_map(params![bytes, namespace, k as i64], |r| {
608            Ok((r.get::<_, i64>(0)?, r.get::<_, f32>(1)?))
609        })?
610        .collect::<Result<Vec<_>, _>>()?;
611    Ok(rows)
612}
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617    use crate::constants::EMBEDDING_DIM;
618    use crate::entity_type::EntityType;
619    use crate::storage::connection::register_vec_extension;
620    use rusqlite::Connection;
621    use tempfile::TempDir;
622
623    type TestResult = Result<(), Box<dyn std::error::Error>>;
624
625    fn setup_db() -> Result<(TempDir, Connection), Box<dyn std::error::Error>> {
626        register_vec_extension();
627        let tmp = TempDir::new()?;
628        let db_path = tmp.path().join("test.db");
629        let mut conn = Connection::open(&db_path)?;
630        crate::migrations::runner().run(&mut conn)?;
631        Ok((tmp, conn))
632    }
633
634    fn insert_memory(conn: &Connection) -> Result<i64, Box<dyn std::error::Error>> {
635        conn.execute(
636            "INSERT INTO memories (namespace, name, type, description, body, body_hash)
637             VALUES ('global', 'test-mem', 'user', 'desc', 'body', 'hash1')",
638            [],
639        )?;
640        Ok(conn.last_insert_rowid())
641    }
642
643    fn new_entity_helper(name: &str) -> NewEntity {
644        NewEntity {
645            name: name.to_string(),
646            entity_type: EntityType::Project,
647            description: None,
648        }
649    }
650
651    fn embedding_zero() -> Vec<f32> {
652        vec![0.0f32; EMBEDDING_DIM]
653    }
654
655    // ------------------------------------------------------------------ //
656    // upsert_entity
657    // ------------------------------------------------------------------ //
658
659    #[test]
660    fn test_upsert_entity_creates_new() -> TestResult {
661        let (_tmp, conn) = setup_db()?;
662        let e = new_entity_helper("projeto-alpha");
663        let id = upsert_entity(&conn, "global", &e)?;
664        assert!(id > 0);
665        Ok(())
666    }
667
668    #[test]
669    fn test_upsert_entity_idempotent_returns_same_id() -> TestResult {
670        let (_tmp, conn) = setup_db()?;
671        let e = new_entity_helper("projeto-beta");
672        let id1 = upsert_entity(&conn, "global", &e)?;
673        let id2 = upsert_entity(&conn, "global", &e)?;
674        assert_eq!(id1, id2);
675        Ok(())
676    }
677
678    #[test]
679    fn test_upsert_entity_updates_description() -> TestResult {
680        let (_tmp, conn) = setup_db()?;
681        let e1 = new_entity_helper("projeto-gamma");
682        let id1 = upsert_entity(&conn, "global", &e1)?;
683
684        let e2 = NewEntity {
685            name: "projeto-gamma".to_string(),
686            entity_type: EntityType::Tool,
687            description: Some("nova desc".to_string()),
688        };
689        let id2 = upsert_entity(&conn, "global", &e2)?;
690        assert_eq!(id1, id2);
691
692        let desc: Option<String> = conn.query_row(
693            "SELECT description FROM entities WHERE id = ?1",
694            params![id1],
695            |r| r.get(0),
696        )?;
697        assert_eq!(desc.as_deref(), Some("nova desc"));
698        Ok(())
699    }
700
701    #[test]
702    fn test_upsert_entity_different_namespaces_create_distinct_records() -> TestResult {
703        let (_tmp, conn) = setup_db()?;
704        let e = new_entity_helper("compartilhada");
705        let id1 = upsert_entity(&conn, "ns1", &e)?;
706        let id2 = upsert_entity(&conn, "ns2", &e)?;
707        assert_ne!(id1, id2);
708        Ok(())
709    }
710
711    // ------------------------------------------------------------------ //
712    // upsert_entity_vec — covers DELETE+INSERT (new branch after the OOM fix)
713    // ------------------------------------------------------------------ //
714
715    #[test]
716    fn test_upsert_entity_vec_first_time_without_conflict() -> TestResult {
717        let (_tmp, conn) = setup_db()?;
718        let e = new_entity_helper("vec-nova");
719        let entity_id = upsert_entity(&conn, "global", &e)?;
720        let emb = embedding_zero();
721
722        let result = upsert_entity_vec(
723            &conn,
724            entity_id,
725            "global",
726            EntityType::Project,
727            &emb,
728            "vec-nova",
729        );
730        assert!(result.is_ok(), "first insertion must succeed");
731
732        let count: i64 = conn.query_row(
733            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
734            params![entity_id],
735            |r| r.get(0),
736        )?;
737        assert_eq!(count, 1, "must have exactly one row after insertion");
738        Ok(())
739    }
740
741    #[test]
742    fn test_upsert_entity_vec_second_time_replaces_without_error() -> TestResult {
743        // Covers the branch where DELETE removes the existing row before INSERT.
744        let (_tmp, conn) = setup_db()?;
745        let e = new_entity_helper("vec-existente");
746        let entity_id = upsert_entity(&conn, "global", &e)?;
747        let emb = embedding_zero();
748
749        upsert_entity_vec(
750            &conn,
751            entity_id,
752            "global",
753            EntityType::Project,
754            &emb,
755            "vec-existente",
756        )?;
757
758        // Second call: DELETE returns 1 removed row, INSERT must succeed.
759        let result = upsert_entity_vec(
760            &conn,
761            entity_id,
762            "global",
763            EntityType::Tool,
764            &emb,
765            "vec-existente",
766        );
767        assert!(
768            result.is_ok(),
769            "second insertion (replace) must succeed: {result:?}"
770        );
771
772        let count: i64 = conn.query_row(
773            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
774            params![entity_id],
775            |r| r.get(0),
776        )?;
777        assert_eq!(count, 1, "must have exactly one row after replacement");
778        Ok(())
779    }
780
781    #[test]
782    fn test_upsert_entity_vec_multiple_independent_entities() -> TestResult {
783        let (_tmp, conn) = setup_db()?;
784        let emb = embedding_zero();
785
786        for i in 0..3i64 {
787            let nome = format!("ent-{i}");
788            let e = new_entity_helper(&nome);
789            let entity_id = upsert_entity(&conn, "global", &e)?;
790            upsert_entity_vec(&conn, entity_id, "global", EntityType::Project, &emb, &nome)?;
791        }
792
793        let count: i64 = conn.query_row("SELECT COUNT(*) FROM vec_entities", [], |r| r.get(0))?;
794        assert_eq!(count, 3, "must have three distinct rows in vec_entities");
795        Ok(())
796    }
797
798    // ------------------------------------------------------------------ //
799    // find_entity_id
800    // ------------------------------------------------------------------ //
801
802    #[test]
803    fn test_find_entity_id_existing_returns_some() -> TestResult {
804        let (_tmp, conn) = setup_db()?;
805        let e = new_entity_helper("entidade-busca");
806        let id_inserido = upsert_entity(&conn, "global", &e)?;
807        let id_encontrado = find_entity_id(&conn, "global", "entidade-busca")?;
808        assert_eq!(id_encontrado, Some(id_inserido));
809        Ok(())
810    }
811
812    #[test]
813    fn test_find_entity_id_missing_returns_none() -> TestResult {
814        let (_tmp, conn) = setup_db()?;
815        let id = find_entity_id(&conn, "global", "nao-existe")?;
816        assert_eq!(id, None);
817        Ok(())
818    }
819
820    // ------------------------------------------------------------------ //
821    // delete_entities_by_ids
822    // ------------------------------------------------------------------ //
823
824    #[test]
825    fn test_delete_entities_by_ids_empty_list_returns_zero() -> TestResult {
826        let (_tmp, conn) = setup_db()?;
827        let removed = delete_entities_by_ids(&conn, &[])?;
828        assert_eq!(removed, 0);
829        Ok(())
830    }
831
832    #[test]
833    fn test_delete_entities_by_ids_removes_valid_entity() -> TestResult {
834        let (_tmp, conn) = setup_db()?;
835        let e = new_entity_helper("to-delete");
836        let entity_id = upsert_entity(&conn, "global", &e)?;
837
838        let removed = delete_entities_by_ids(&conn, &[entity_id])?;
839        assert_eq!(removed, 1);
840
841        let id = find_entity_id(&conn, "global", "to-delete")?;
842        assert_eq!(id, None, "entity must have been removed");
843        Ok(())
844    }
845
846    #[test]
847    fn test_delete_entities_by_ids_missing_id_returns_zero() -> TestResult {
848        let (_tmp, conn) = setup_db()?;
849        let removed = delete_entities_by_ids(&conn, &[9999])?;
850        assert_eq!(removed, 0);
851        Ok(())
852    }
853
854    #[test]
855    fn test_delete_entities_by_ids_removes_multiple() -> TestResult {
856        let (_tmp, conn) = setup_db()?;
857        let id1 = upsert_entity(&conn, "global", &new_entity_helper("del-a"))?;
858        let id2 = upsert_entity(&conn, "global", &new_entity_helper("del-b"))?;
859        let id3 = upsert_entity(&conn, "global", &new_entity_helper("del-c"))?;
860
861        let removed = delete_entities_by_ids(&conn, &[id1, id2])?;
862        assert_eq!(removed, 2);
863
864        assert!(find_entity_id(&conn, "global", "del-a")?.is_none());
865        assert!(find_entity_id(&conn, "global", "del-b")?.is_none());
866        assert!(find_entity_id(&conn, "global", "del-c")?.is_some());
867        let _ = id3;
868        Ok(())
869    }
870
871    #[test]
872    fn test_delete_entities_by_ids_also_removes_vec() -> TestResult {
873        let (_tmp, conn) = setup_db()?;
874        let e = new_entity_helper("del-com-vec");
875        let entity_id = upsert_entity(&conn, "global", &e)?;
876        let emb = embedding_zero();
877        upsert_entity_vec(
878            &conn,
879            entity_id,
880            "global",
881            EntityType::Project,
882            &emb,
883            "del-com-vec",
884        )?;
885
886        let count_antes: i64 = conn.query_row(
887            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
888            params![entity_id],
889            |r| r.get(0),
890        )?;
891        assert_eq!(count_antes, 1);
892
893        delete_entities_by_ids(&conn, &[entity_id])?;
894
895        let count_depois: i64 = conn.query_row(
896            "SELECT COUNT(*) FROM vec_entities WHERE entity_id = ?1",
897            params![entity_id],
898            |r| r.get(0),
899        )?;
900        assert_eq!(
901            count_depois, 0,
902            "vec_entities deve ser limpo junto com entities"
903        );
904        Ok(())
905    }
906
907    // ------------------------------------------------------------------ //
908    // upsert_relationship / find_relationship
909    // ------------------------------------------------------------------ //
910
911    #[test]
912    fn test_upsert_relationship_creates_new() -> TestResult {
913        let (_tmp, conn) = setup_db()?;
914        let id_a = upsert_entity(&conn, "global", &new_entity_helper("rel-a"))?;
915        let id_b = upsert_entity(&conn, "global", &new_entity_helper("rel-b"))?;
916
917        let rel = NewRelationship {
918            source: "rel-a".to_string(),
919            target: "rel-b".to_string(),
920            relation: "uses".to_string(),
921            strength: 0.8,
922            description: None,
923        };
924        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
925        assert!(rel_id > 0);
926        Ok(())
927    }
928
929    #[test]
930    fn test_upsert_relationship_idempotent() -> TestResult {
931        let (_tmp, conn) = setup_db()?;
932        let id_a = upsert_entity(&conn, "global", &new_entity_helper("idem-a"))?;
933        let id_b = upsert_entity(&conn, "global", &new_entity_helper("idem-b"))?;
934
935        let rel = NewRelationship {
936            source: "idem-a".to_string(),
937            target: "idem-b".to_string(),
938            relation: "uses".to_string(),
939            strength: 0.5,
940            description: None,
941        };
942        let id1 = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
943        let id2 = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
944        assert_eq!(id1, id2);
945        Ok(())
946    }
947
948    #[test]
949    fn test_find_relationship_existing() -> TestResult {
950        let (_tmp, conn) = setup_db()?;
951        let id_a = upsert_entity(&conn, "global", &new_entity_helper("fr-a"))?;
952        let id_b = upsert_entity(&conn, "global", &new_entity_helper("fr-b"))?;
953
954        let rel = NewRelationship {
955            source: "fr-a".to_string(),
956            target: "fr-b".to_string(),
957            relation: "depends_on".to_string(),
958            strength: 0.7,
959            description: None,
960        };
961        upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
962
963        let encontrada = find_relationship(&conn, id_a, id_b, "depends_on")?;
964        let row = encontrada.ok_or("relationship should exist")?;
965        assert_eq!(row.source_id, id_a);
966        assert_eq!(row.target_id, id_b);
967        assert!((row.weight - 0.7).abs() < 1e-9);
968        Ok(())
969    }
970
971    #[test]
972    fn test_find_relationship_missing_returns_none() -> TestResult {
973        let (_tmp, conn) = setup_db()?;
974        let resultado = find_relationship(&conn, 9999, 8888, "uses")?;
975        assert!(resultado.is_none());
976        Ok(())
977    }
978
979    // ------------------------------------------------------------------ //
980    // link_memory_entity / link_memory_relationship
981    // ------------------------------------------------------------------ //
982
983    #[test]
984    fn test_link_memory_entity_idempotent() -> TestResult {
985        let (_tmp, conn) = setup_db()?;
986        let memory_id = insert_memory(&conn)?;
987        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("me-ent"))?;
988
989        link_memory_entity(&conn, memory_id, entity_id)?;
990        let resultado = link_memory_entity(&conn, memory_id, entity_id);
991        assert!(
992            resultado.is_ok(),
993            "INSERT OR IGNORE must not fail on duplicate"
994        );
995        Ok(())
996    }
997
998    #[test]
999    fn test_link_memory_relationship_idempotent() -> TestResult {
1000        let (_tmp, conn) = setup_db()?;
1001        let memory_id = insert_memory(&conn)?;
1002        let id_a = upsert_entity(&conn, "global", &new_entity_helper("mr-a"))?;
1003        let id_b = upsert_entity(&conn, "global", &new_entity_helper("mr-b"))?;
1004
1005        let rel = NewRelationship {
1006            source: "mr-a".to_string(),
1007            target: "mr-b".to_string(),
1008            relation: "uses".to_string(),
1009            strength: 0.5,
1010            description: None,
1011        };
1012        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
1013
1014        link_memory_relationship(&conn, memory_id, rel_id)?;
1015        let resultado = link_memory_relationship(&conn, memory_id, rel_id);
1016        assert!(
1017            resultado.is_ok(),
1018            "INSERT OR IGNORE must not fail on duplicate"
1019        );
1020        Ok(())
1021    }
1022
1023    // ------------------------------------------------------------------ //
1024    // increment_degree / recalculate_degree
1025    // ------------------------------------------------------------------ //
1026
1027    #[test]
1028    fn test_increment_degree_increases_counter() -> TestResult {
1029        let (_tmp, conn) = setup_db()?;
1030        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("grau-ent"))?;
1031
1032        increment_degree(&conn, entity_id)?;
1033        increment_degree(&conn, entity_id)?;
1034
1035        let degree: i64 = conn.query_row(
1036            "SELECT degree FROM entities WHERE id = ?1",
1037            params![entity_id],
1038            |r| r.get(0),
1039        )?;
1040        assert_eq!(degree, 2);
1041        Ok(())
1042    }
1043
1044    #[test]
1045    fn test_recalculate_degree_reflects_actual_relations() -> TestResult {
1046        let (_tmp, conn) = setup_db()?;
1047        let id_a = upsert_entity(&conn, "global", &new_entity_helper("rc-a"))?;
1048        let id_b = upsert_entity(&conn, "global", &new_entity_helper("rc-b"))?;
1049        let id_c = upsert_entity(&conn, "global", &new_entity_helper("rc-c"))?;
1050
1051        let rel1 = NewRelationship {
1052            source: "rc-a".to_string(),
1053            target: "rc-b".to_string(),
1054            relation: "uses".to_string(),
1055            strength: 0.5,
1056            description: None,
1057        };
1058        let rel2 = NewRelationship {
1059            source: "rc-c".to_string(),
1060            target: "rc-a".to_string(),
1061            relation: "depends_on".to_string(),
1062            strength: 0.5,
1063            description: None,
1064        };
1065        upsert_relationship(&conn, "global", id_a, id_b, &rel1)?;
1066        upsert_relationship(&conn, "global", id_c, id_a, &rel2)?;
1067
1068        recalculate_degree(&conn, id_a)?;
1069
1070        let degree: i64 = conn.query_row(
1071            "SELECT degree FROM entities WHERE id = ?1",
1072            params![id_a],
1073            |r| r.get(0),
1074        )?;
1075        assert_eq!(
1076            degree, 2,
1077            "rc-a appears in two relationships (source+target)"
1078        );
1079        Ok(())
1080    }
1081
1082    // ------------------------------------------------------------------ //
1083    // find_orphan_entity_ids
1084    // ------------------------------------------------------------------ //
1085
1086    #[test]
1087    fn test_find_orphan_entity_ids_without_orphans() -> TestResult {
1088        let (_tmp, conn) = setup_db()?;
1089        let memory_id = insert_memory(&conn)?;
1090        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("nao-orfa"))?;
1091        link_memory_entity(&conn, memory_id, entity_id)?;
1092
1093        let orfas = find_orphan_entity_ids(&conn, Some("global"))?;
1094        assert!(!orfas.contains(&entity_id));
1095        Ok(())
1096    }
1097
1098    #[test]
1099    fn test_find_orphan_entity_ids_detects_orphans() -> TestResult {
1100        let (_tmp, conn) = setup_db()?;
1101        let entity_id = upsert_entity(&conn, "global", &new_entity_helper("sim-orfa"))?;
1102
1103        let orfas = find_orphan_entity_ids(&conn, Some("global"))?;
1104        assert!(orfas.contains(&entity_id));
1105        Ok(())
1106    }
1107
1108    #[test]
1109    fn test_find_orphan_entity_ids_without_namespace_returns_all() -> TestResult {
1110        let (_tmp, conn) = setup_db()?;
1111        let id1 = upsert_entity(&conn, "ns-a", &new_entity_helper("orfa-a"))?;
1112        let id2 = upsert_entity(&conn, "ns-b", &new_entity_helper("orfa-b"))?;
1113
1114        let orfas = find_orphan_entity_ids(&conn, None)?;
1115        assert!(orfas.contains(&id1));
1116        assert!(orfas.contains(&id2));
1117        Ok(())
1118    }
1119
1120    // ------------------------------------------------------------------ //
1121    // list_entities / list_relationships_by_namespace
1122    // ------------------------------------------------------------------ //
1123
1124    #[test]
1125    fn test_list_entities_with_namespace() -> TestResult {
1126        let (_tmp, conn) = setup_db()?;
1127        upsert_entity(&conn, "le-ns", &new_entity_helper("le-ent-1"))?;
1128        upsert_entity(&conn, "le-ns", &new_entity_helper("le-ent-2"))?;
1129        upsert_entity(&conn, "outro-ns", &new_entity_helper("le-ent-3"))?;
1130
1131        let lista = list_entities(&conn, Some("le-ns"))?;
1132        assert_eq!(lista.len(), 2);
1133        assert!(lista.iter().all(|e| e.namespace == "le-ns"));
1134        Ok(())
1135    }
1136
1137    #[test]
1138    fn test_list_entities_without_namespace_returns_all() -> TestResult {
1139        let (_tmp, conn) = setup_db()?;
1140        upsert_entity(&conn, "ns1", &new_entity_helper("all-ent-1"))?;
1141        upsert_entity(&conn, "ns2", &new_entity_helper("all-ent-2"))?;
1142
1143        let lista = list_entities(&conn, None)?;
1144        assert!(lista.len() >= 2);
1145        Ok(())
1146    }
1147
1148    #[test]
1149    fn test_list_relationships_by_namespace_filters_correctly() -> TestResult {
1150        let (_tmp, conn) = setup_db()?;
1151        let id_a = upsert_entity(&conn, "rel-ns", &new_entity_helper("lr-a"))?;
1152        let id_b = upsert_entity(&conn, "rel-ns", &new_entity_helper("lr-b"))?;
1153
1154        let rel = NewRelationship {
1155            source: "lr-a".to_string(),
1156            target: "lr-b".to_string(),
1157            relation: "uses".to_string(),
1158            strength: 0.5,
1159            description: None,
1160        };
1161        upsert_relationship(&conn, "rel-ns", id_a, id_b, &rel)?;
1162
1163        let lista = list_relationships_by_namespace(&conn, Some("rel-ns"))?;
1164        assert!(!lista.is_empty());
1165        assert!(lista.iter().all(|r| r.namespace == "rel-ns"));
1166        Ok(())
1167    }
1168
1169    // ------------------------------------------------------------------ //
1170    // delete_relationship_by_id / create_or_fetch_relationship
1171    // ------------------------------------------------------------------ //
1172
1173    #[test]
1174    fn test_delete_relationship_by_id_removes_relation() -> TestResult {
1175        let (_tmp, conn) = setup_db()?;
1176        let id_a = upsert_entity(&conn, "global", &new_entity_helper("dr-a"))?;
1177        let id_b = upsert_entity(&conn, "global", &new_entity_helper("dr-b"))?;
1178
1179        let rel = NewRelationship {
1180            source: "dr-a".to_string(),
1181            target: "dr-b".to_string(),
1182            relation: "uses".to_string(),
1183            strength: 0.5,
1184            description: None,
1185        };
1186        let rel_id = upsert_relationship(&conn, "global", id_a, id_b, &rel)?;
1187
1188        delete_relationship_by_id(&conn, rel_id)?;
1189
1190        let encontrada = find_relationship(&conn, id_a, id_b, "uses")?;
1191        assert!(encontrada.is_none(), "relationship must have been removed");
1192        Ok(())
1193    }
1194
1195    #[test]
1196    fn test_create_or_fetch_relationship_creates_new() -> TestResult {
1197        let (_tmp, conn) = setup_db()?;
1198        let id_a = upsert_entity(&conn, "global", &new_entity_helper("cf-a"))?;
1199        let id_b = upsert_entity(&conn, "global", &new_entity_helper("cf-b"))?;
1200
1201        let (rel_id, created) =
1202            create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1203        assert!(rel_id > 0);
1204        assert!(created);
1205        Ok(())
1206    }
1207
1208    #[test]
1209    fn test_create_or_fetch_relationship_returns_existing() -> TestResult {
1210        let (_tmp, conn) = setup_db()?;
1211        let id_a = upsert_entity(&conn, "global", &new_entity_helper("cf2-a"))?;
1212        let id_b = upsert_entity(&conn, "global", &new_entity_helper("cf2-b"))?;
1213
1214        create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1215        let (_, created) =
1216            create_or_fetch_relationship(&conn, "global", id_a, id_b, "uses", 0.5, None)?;
1217        assert!(
1218            !created,
1219            "second call must return the existing relationship"
1220        );
1221        Ok(())
1222    }
1223
1224    // ------------------------------------------------------------------ //
1225    // serde alias: field "type" accepted as a synonym for "entity_type"
1226    // ------------------------------------------------------------------ //
1227
1228    #[test]
1229    fn accepts_type_field_as_alias() -> TestResult {
1230        let json = r#"{"name": "X", "type": "concept"}"#;
1231        let ent: NewEntity = serde_json::from_str(json)?;
1232        assert_eq!(ent.entity_type, EntityType::Concept);
1233        Ok(())
1234    }
1235
1236    #[test]
1237    fn accepts_canonical_entity_type_field() -> TestResult {
1238        let json = r#"{"name": "X", "entity_type": "concept"}"#;
1239        let ent: NewEntity = serde_json::from_str(json)?;
1240        assert_eq!(ent.entity_type, EntityType::Concept);
1241        Ok(())
1242    }
1243
1244    #[test]
1245    fn both_fields_present_yields_duplicate_error() {
1246        // having both entity_type and type in the same JSON is a duplicate and must fail
1247        let json = r#"{"name": "X", "entity_type": "concept", "type": "person"}"#;
1248        let resultado: Result<NewEntity, _> = serde_json::from_str(json);
1249        assert!(
1250            resultado.is_err(),
1251            "both fields in the same JSON are a duplicate"
1252        );
1253    }
1254
1255    #[test]
1256    fn validate_entity_name_accepts_valid() {
1257        assert!(validate_entity_name("rust-lang").is_ok());
1258        assert!(validate_entity_name("sqlite-graphrag").is_ok());
1259        assert!(validate_entity_name("ab").is_ok());
1260    }
1261
1262    #[test]
1263    fn validate_entity_name_rejects_short() {
1264        assert!(validate_entity_name("a").is_err());
1265        assert!(validate_entity_name("").is_err());
1266    }
1267
1268    #[test]
1269    fn validate_entity_name_rejects_newlines() {
1270        assert!(validate_entity_name("foo\nbar").is_err());
1271        assert!(validate_entity_name("foo\rbar").is_err());
1272    }
1273
1274    #[test]
1275    fn validate_entity_name_rejects_short_allcaps() {
1276        assert!(validate_entity_name("RAM").is_err());
1277        assert!(validate_entity_name("NAO").is_err());
1278        assert!(validate_entity_name("OK").is_err());
1279    }
1280
1281    #[test]
1282    fn validate_entity_name_accepts_long_allcaps() {
1283        assert!(validate_entity_name("SQLITE").is_ok());
1284        assert!(validate_entity_name("GRAPHRAG").is_ok());
1285    }
1286
1287    #[test]
1288    fn validate_entity_name_accepts_mixed_case() {
1289        assert!(validate_entity_name("FTS5").is_ok()); // 4 chars but has digit
1290        assert!(validate_entity_name("WAL").is_err()); // 3 chars ALL_CAPS
1291    }
1292}