Skip to main content

sqlite_graphrag/storage/
urls.rs

1//! Persistence for URLs extracted from memory bodies.
2//!
3//! Manages the `memory_urls` table: insert, deduplicate, and query URLs
4//! linked to a specific memory record.
5
6use crate::errors::AppError;
7use rusqlite::Connection;
8
9/// URL extracted from a memory body.
10pub struct MemoryUrl {
11    pub url: String,
12    pub offset: Option<i64>,
13}
14
15/// Insere uma URL na tabela `memory_urls`. Ignora duplicatas silenciosamente.
16pub fn insert_url(conn: &Connection, memory_id: i64, entry: &MemoryUrl) -> Result<(), AppError> {
17    conn.execute(
18        "INSERT OR IGNORE INTO memory_urls (memory_id, url, url_offset) VALUES (?1, ?2, ?3)",
19        rusqlite::params![memory_id, entry.url, entry.offset],
20    )?;
21    Ok(())
22}
23
24/// Inserts multiple URLs for a memory. Returns the count inserted (duplicates ignored).
25/// Individual errors are logged as warn and not propagated — non-critical path.
26pub fn insert_urls(conn: &Connection, memory_id: i64, urls: &[MemoryUrl]) -> usize {
27    let mut inserted = 0usize;
28    for entry in urls {
29        match insert_url(conn, memory_id, entry) {
30            Ok(()) => {
31                let changed = conn.changes();
32                if changed > 0 {
33                    inserted += 1;
34                }
35            }
36            Err(e) => {
37                tracing::warn!(target: "storage", url = %entry.url, error = %e, "url persistence failed");
38            }
39        }
40    }
41    inserted
42}
43
44/// Lists all URLs associated with a memory.
45pub fn list_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<MemoryUrl>, AppError> {
46    let mut stmt = conn.prepare_cached(
47        "SELECT url, url_offset FROM memory_urls WHERE memory_id = ?1 ORDER BY id",
48    )?;
49    let rows = stmt.query_map(rusqlite::params![memory_id], |row| {
50        Ok(MemoryUrl {
51            url: row.get(0)?,
52            offset: row.get(1)?,
53        })
54    })?;
55    let mut result = Vec::with_capacity(8);
56    for row in rows {
57        result.push(row?);
58    }
59    Ok(result)
60}
61
62/// Removes all URLs for a memory.
63pub fn delete_by_memory(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
64    conn.execute(
65        "DELETE FROM memory_urls WHERE memory_id = ?1",
66        rusqlite::params![memory_id],
67    )?;
68    Ok(())
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74    use rusqlite::Connection;
75    use tempfile::TempDir;
76
77    type TestResult = Result<(), Box<dyn std::error::Error>>;
78
79    fn setup_db() -> Result<(TempDir, Connection), Box<dyn std::error::Error>> {
80        crate::storage::connection::register_vec_extension();
81        let tmp = TempDir::new()?;
82        let db_path = tmp.path().join("test.db");
83        let mut conn = Connection::open(&db_path)?;
84        crate::migrations::runner().run(&mut conn)?;
85        Ok((tmp, conn))
86    }
87
88    fn insert_test_memory(conn: &Connection) -> Result<i64, Box<dyn std::error::Error>> {
89        conn.execute(
90            "INSERT INTO memories (name, type, description, body, body_hash) VALUES ('mem', 'user', 'desc', 'body', 'hash')",
91            [],
92        )?;
93        Ok(conn.last_insert_rowid())
94    }
95
96    #[test]
97    fn insert_url_persists_and_list_returns() -> TestResult {
98        let (_tmp, conn) = setup_db()?;
99        let mem_id = insert_test_memory(&conn)?;
100
101        insert_url(
102            &conn,
103            mem_id,
104            &MemoryUrl {
105                url: "https://example.com/page".to_string(),
106                offset: Some(5),
107            },
108        )?;
109
110        let urls = list_by_memory(&conn, mem_id)?;
111        assert_eq!(urls.len(), 1);
112        assert_eq!(urls[0].url, "https://example.com/page");
113        assert_eq!(urls[0].offset, Some(5));
114        Ok(())
115    }
116
117    #[test]
118    fn insert_url_duplicate_ignored() -> TestResult {
119        let (_tmp, conn) = setup_db()?;
120        let mem_id = insert_test_memory(&conn)?;
121
122        let entry = MemoryUrl {
123            url: "https://example.com/dup".to_string(),
124            offset: None,
125        };
126        insert_url(&conn, mem_id, &entry)?;
127        insert_url(&conn, mem_id, &entry)?;
128
129        let urls = list_by_memory(&conn, mem_id)?;
130        assert_eq!(urls.len(), 1, "duplicata deve ser ignorada");
131        Ok(())
132    }
133
134    #[test]
135    fn insert_urls_returns_inserted_count() -> TestResult {
136        let (_tmp, conn) = setup_db()?;
137        let mem_id = insert_test_memory(&conn)?;
138
139        let batch = vec![
140            MemoryUrl {
141                url: "https://alpha.example.com".to_string(),
142                offset: Some(0),
143            },
144            MemoryUrl {
145                url: "https://beta.example.com".to_string(),
146                offset: Some(10),
147            },
148            MemoryUrl {
149                url: "https://alpha.example.com".to_string(),
150                offset: Some(0),
151            },
152        ];
153        let count = insert_urls(&conn, mem_id, &batch);
154        assert_eq!(count, 2, "only 2 unique entries must be inserted");
155        Ok(())
156    }
157
158    #[test]
159    fn delete_by_memory_removes_all_urls() -> TestResult {
160        let (_tmp, conn) = setup_db()?;
161        let mem_id = insert_test_memory(&conn)?;
162
163        insert_url(
164            &conn,
165            mem_id,
166            &MemoryUrl {
167                url: "https://to-delete.example.com".to_string(),
168                offset: None,
169            },
170        )?;
171        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 1);
172
173        delete_by_memory(&conn, mem_id)?;
174        assert_eq!(list_by_memory(&conn, mem_id)?.len(), 0);
175        Ok(())
176    }
177}