Skip to main content

sqlite_graphrag/storage/
urls.rs

1use crate::errors::AppError;
2use rusqlite::Connection;
3
4/// URL extraída do corpo de uma memória.
5pub struct MemoryUrl {
6    pub url: String,
7    pub offset: Option<i64>,
8}
9
10/// Insere uma URL na tabela `memory_urls`. Ignora duplicatas silenciosamente.
11pub fn insert_url(conn: &Connection, memory_id: i64, entry: &MemoryUrl) -> Result<(), AppError> {
12    conn.execute(
13        "INSERT OR IGNORE INTO memory_urls (memory_id, url, url_offset) VALUES (?1, ?2, ?3)",
14        rusqlite::params![memory_id, entry.url, entry.offset],
15    )?;
16    Ok(())
17}
18
19/// Insere múltiplas URLs para uma memória. Retorna a quantidade inserida (duplicatas ignoradas).
20/// Erros individuais são logados como warn e não propagados — caminho não crítico.
21pub fn insert_urls(conn: &Connection, memory_id: i64, urls: &[MemoryUrl]) -> usize {
22    let mut inserted = 0usize;
23    for entry in urls {
24        match insert_url(conn, memory_id, entry) {
25            Ok(()) => {
26                let changed = conn.changes();
27                if changed > 0 {
28                    inserted += 1;
29                }
30            }
31            Err(e) => {
32                tracing::warn!("falha ao persistir url '{}': {e:#}", entry.url);
33            }
34        }
35    }
36    inserted
37}
38
39/// Lista todas as URLs associadas a uma memória.
40pub fn list_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<MemoryUrl>, AppError> {
41    let mut stmt =
42        conn.prepare("SELECT url, url_offset FROM memory_urls WHERE memory_id = ?1 ORDER BY id")?;
43    let rows = stmt.query_map(rusqlite::params![memory_id], |row| {
44        Ok(MemoryUrl {
45            url: row.get(0)?,
46            offset: row.get(1)?,
47        })
48    })?;
49    let mut result = Vec::new();
50    for row in rows {
51        result.push(row?);
52    }
53    Ok(result)
54}
55
56/// Remove todas as URLs de uma memória.
57pub fn delete_by_memory(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
58    conn.execute(
59        "DELETE FROM memory_urls WHERE memory_id = ?1",
60        rusqlite::params![memory_id],
61    )?;
62    Ok(())
63}
64
65#[cfg(test)]
66mod testes {
67    use super::*;
68    use rusqlite::Connection;
69    use tempfile::TempDir;
70
71    type Resultado = Result<(), Box<dyn std::error::Error>>;
72
73    fn setup_db() -> Result<(TempDir, Connection), Box<dyn std::error::Error>> {
74        crate::storage::connection::register_vec_extension();
75        let tmp = TempDir::new()?;
76        let db_path = tmp.path().join("test.db");
77        let mut conn = Connection::open(&db_path)?;
78        crate::migrations::runner().run(&mut conn)?;
79        Ok((tmp, conn))
80    }
81
82    fn insert_test_memory(conn: &Connection) -> Result<i64, Box<dyn std::error::Error>> {
83        conn.execute(
84            "INSERT INTO memories (name, type, description, body, body_hash) VALUES ('mem', 'user', 'desc', 'body', 'hash')",
85            [],
86        )?;
87        Ok(conn.last_insert_rowid())
88    }
89
90    #[test]
91    fn insert_url_persiste_e_list_retorna() -> Resultado {
92        let (_tmp, conn) = setup_db()?;
93        let mem_id = insert_test_memory(&conn)?;
94
95        insert_url(
96            &conn,
97            mem_id,
98            &MemoryUrl {
99                url: "https://example.com/page".to_string(),
100                offset: Some(5),
101            },
102        )?;
103
104        let urls = list_by_memory(&conn, mem_id)?;
105        assert_eq!(urls.len(), 1);
106        assert_eq!(urls[0].url, "https://example.com/page");
107        assert_eq!(urls[0].offset, Some(5));
108        Ok(())
109    }
110
111    #[test]
112    fn insert_url_duplicata_ignorada() -> Resultado {
113        let (_tmp, conn) = setup_db()?;
114        let mem_id = insert_test_memory(&conn)?;
115
116        let entry = MemoryUrl {
117            url: "https://example.com/dup".to_string(),
118            offset: None,
119        };
120        insert_url(&conn, mem_id, &entry)?;
121        insert_url(&conn, mem_id, &entry)?;
122
123        let urls = list_by_memory(&conn, mem_id)?;
124        assert_eq!(urls.len(), 1, "duplicata deve ser ignorada");
125        Ok(())
126    }
127
128    #[test]
129    fn insert_urls_retorna_contagem_inseridas() -> Resultado {
130        let (_tmp, conn) = setup_db()?;
131        let mem_id = insert_test_memory(&conn)?;
132
133        let batch = vec![
134            MemoryUrl {
135                url: "https://alpha.example.com".to_string(),
136                offset: Some(0),
137            },
138            MemoryUrl {
139                url: "https://beta.example.com".to_string(),
140                offset: Some(10),
141            },
142            MemoryUrl {
143                url: "https://alpha.example.com".to_string(),
144                offset: Some(0),
145            },
146        ];
147        let count = insert_urls(&conn, mem_id, &batch);
148        assert_eq!(count, 2, "apenas 2 únicas devem ser inseridas");
149        Ok(())
150    }
151
152    #[test]
153    fn delete_by_memory_remove_todas_urls() -> Resultado {
154        let (_tmp, conn) = setup_db()?;
155        let mem_id = insert_test_memory(&conn)?;
156
157        insert_url(
158            &conn,
159            mem_id,
160            &MemoryUrl {
161                url: "https://to-delete.example.com".to_string(),
162                offset: None,
163            },
164        )?;
165        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 1);
166
167        delete_by_memory(&conn, mem_id)?;
168        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 0);
169        Ok(())
170    }
171}