Skip to main content

sqlite_graphrag/storage/
urls.rs

1//! Persistence for URLs extracted from memory bodies.
2//!
3//! Manages the `memory_urls` table: insert, deduplicate, and query URLs
4//! linked to a specific memory record.
5
6use crate::errors::AppError;
7use rusqlite::Connection;
8
9/// URL extracted from a memory body.
10pub struct MemoryUrl {
11    pub url: String,
12    pub offset: Option<i64>,
13}
14
15/// Insere uma URL na tabela `memory_urls`. Ignora duplicatas silenciosamente.
16pub fn insert_url(conn: &Connection, memory_id: i64, entry: &MemoryUrl) -> Result<(), AppError> {
17    conn.execute(
18        "INSERT OR IGNORE INTO memory_urls (memory_id, url, url_offset) VALUES (?1, ?2, ?3)",
19        rusqlite::params![memory_id, entry.url, entry.offset],
20    )?;
21    Ok(())
22}
23
24/// Inserts multiple URLs for a memory. Returns the count inserted (duplicates ignored).
25/// Individual errors are logged as warn and not propagated — non-critical path.
26pub fn insert_urls(conn: &Connection, memory_id: i64, urls: &[MemoryUrl]) -> usize {
27    let mut inserted = 0usize;
28    for entry in urls {
29        match insert_url(conn, memory_id, entry) {
30            Ok(()) => {
31                let changed = conn.changes();
32                if changed > 0 {
33                    inserted += 1;
34                }
35            }
36            Err(e) => {
37                tracing::warn!("falha ao persistir url '{}': {e:#}", entry.url);
38            }
39        }
40    }
41    inserted
42}
43
44/// Lists all URLs associated with a memory.
45pub fn list_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<MemoryUrl>, AppError> {
46    let mut stmt =
47        conn.prepare("SELECT url, url_offset FROM memory_urls WHERE memory_id = ?1 ORDER BY id")?;
48    let rows = stmt.query_map(rusqlite::params![memory_id], |row| {
49        Ok(MemoryUrl {
50            url: row.get(0)?,
51            offset: row.get(1)?,
52        })
53    })?;
54    let mut result = Vec::new();
55    for row in rows {
56        result.push(row?);
57    }
58    Ok(result)
59}
60
61/// Removes all URLs for a memory.
62pub fn delete_by_memory(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
63    conn.execute(
64        "DELETE FROM memory_urls WHERE memory_id = ?1",
65        rusqlite::params![memory_id],
66    )?;
67    Ok(())
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73    use rusqlite::Connection;
74    use tempfile::TempDir;
75
76    type TestResult = Result<(), Box<dyn std::error::Error>>;
77
78    fn setup_db() -> Result<(TempDir, Connection), Box<dyn std::error::Error>> {
79        crate::storage::connection::register_vec_extension();
80        let tmp = TempDir::new()?;
81        let db_path = tmp.path().join("test.db");
82        let mut conn = Connection::open(&db_path)?;
83        crate::migrations::runner().run(&mut conn)?;
84        Ok((tmp, conn))
85    }
86
87    fn insert_test_memory(conn: &Connection) -> Result<i64, Box<dyn std::error::Error>> {
88        conn.execute(
89            "INSERT INTO memories (name, type, description, body, body_hash) VALUES ('mem', 'user', 'desc', 'body', 'hash')",
90            [],
91        )?;
92        Ok(conn.last_insert_rowid())
93    }
94
95    #[test]
96    fn insert_url_persists_and_list_returns() -> TestResult {
97        let (_tmp, conn) = setup_db()?;
98        let mem_id = insert_test_memory(&conn)?;
99
100        insert_url(
101            &conn,
102            mem_id,
103            &MemoryUrl {
104                url: "https://example.com/page".to_string(),
105                offset: Some(5),
106            },
107        )?;
108
109        let urls = list_by_memory(&conn, mem_id)?;
110        assert_eq!(urls.len(), 1);
111        assert_eq!(urls[0].url, "https://example.com/page");
112        assert_eq!(urls[0].offset, Some(5));
113        Ok(())
114    }
115
116    #[test]
117    fn insert_url_duplicate_ignored() -> TestResult {
118        let (_tmp, conn) = setup_db()?;
119        let mem_id = insert_test_memory(&conn)?;
120
121        let entry = MemoryUrl {
122            url: "https://example.com/dup".to_string(),
123            offset: None,
124        };
125        insert_url(&conn, mem_id, &entry)?;
126        insert_url(&conn, mem_id, &entry)?;
127
128        let urls = list_by_memory(&conn, mem_id)?;
129        assert_eq!(urls.len(), 1, "duplicata deve ser ignorada");
130        Ok(())
131    }
132
133    #[test]
134    fn insert_urls_returns_inserted_count() -> TestResult {
135        let (_tmp, conn) = setup_db()?;
136        let mem_id = insert_test_memory(&conn)?;
137
138        let batch = vec![
139            MemoryUrl {
140                url: "https://alpha.example.com".to_string(),
141                offset: Some(0),
142            },
143            MemoryUrl {
144                url: "https://beta.example.com".to_string(),
145                offset: Some(10),
146            },
147            MemoryUrl {
148                url: "https://alpha.example.com".to_string(),
149                offset: Some(0),
150            },
151        ];
152        let count = insert_urls(&conn, mem_id, &batch);
153        assert_eq!(count, 2, "apenas 2 únicas devem ser inseridas");
154        Ok(())
155    }
156
157    #[test]
158    fn delete_by_memory_removes_all_urls() -> TestResult {
159        let (_tmp, conn) = setup_db()?;
160        let mem_id = insert_test_memory(&conn)?;
161
162        insert_url(
163            &conn,
164            mem_id,
165            &MemoryUrl {
166                url: "https://to-delete.example.com".to_string(),
167                offset: None,
168            },
169        )?;
170        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 1);
171
172        delete_by_memory(&conn, mem_id)?;
173        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 0);
174        Ok(())
175    }
176}