1use crate::embedder::f32_to_bytes;
10use crate::errors::AppError;
11use rusqlite::{params, Connection};
12
13#[derive(Debug, Clone)]
14pub struct Chunk {
15 pub memory_id: i64,
16 pub chunk_idx: i32,
17 pub chunk_text: String,
18 pub start_offset: i32,
19 pub end_offset: i32,
20 pub token_count: i32,
21}
22
23pub fn insert_chunks(conn: &Connection, chunks: &[Chunk]) -> Result<(), AppError> {
24 for chunk in chunks {
25 conn.execute(
26 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
27 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
28 params![
29 chunk.memory_id,
30 chunk.chunk_idx,
31 chunk.chunk_text,
32 chunk.start_offset,
33 chunk.end_offset,
34 chunk.token_count,
35 ],
36 )?;
37 }
38 Ok(())
39}
40
41pub fn insert_chunk_slices(
42 conn: &Connection,
43 memory_id: i64,
44 body: &str,
45 chunks: &[crate::chunking::Chunk],
46) -> Result<(), AppError> {
47 for (chunk_idx, chunk) in chunks.iter().enumerate() {
48 conn.execute(
49 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
50 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
51 params![
52 memory_id,
53 chunk_idx as i32,
54 crate::chunking::chunk_text(body, chunk),
55 chunk.start_offset as i32,
56 chunk.end_offset as i32,
57 chunk.token_count_approx as i32,
58 ],
59 )?;
60 }
61 Ok(())
62}
63
64pub fn upsert_chunk_vec(
65 conn: &Connection,
66 _rowid: i64,
67 memory_id: i64,
68 chunk_idx: i32,
69 embedding: &[f32],
70) -> Result<(), AppError> {
71 conn.execute(
72 "INSERT OR REPLACE INTO vec_chunks(rowid, memory_id, chunk_idx, embedding)
73 VALUES (
74 (SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = ?2),
75 ?1, ?2, ?3
76 )",
77 params![memory_id, chunk_idx, f32_to_bytes(embedding)],
78 )?;
79 Ok(())
80}
81
82pub fn delete_chunks(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
83 conn.execute(
84 "DELETE FROM memory_chunks WHERE memory_id = ?1",
85 params![memory_id],
86 )?;
87 Ok(())
88}
89
90pub fn knn_search_chunks(
91 conn: &Connection,
92 embedding: &[f32],
93 k: usize,
94) -> Result<Vec<(i64, i32, f32)>, AppError> {
95 let bytes = f32_to_bytes(embedding);
96 let mut stmt = conn.prepare(
97 "SELECT memory_id, chunk_idx, distance FROM vec_chunks
98 WHERE embedding MATCH ?1
99 ORDER BY distance LIMIT ?2",
100 )?;
101 let rows = stmt
102 .query_map(params![bytes, k as i64], |r| {
103 Ok((
104 r.get::<_, i64>(0)?,
105 r.get::<_, i32>(1)?,
106 r.get::<_, f32>(2)?,
107 ))
108 })?
109 .collect::<Result<Vec<_>, _>>()?;
110 Ok(rows)
111}
112
113pub fn get_chunks_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<Chunk>, AppError> {
114 let mut stmt = conn.prepare(
115 "SELECT memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count
116 FROM memory_chunks WHERE memory_id = ?1 ORDER BY chunk_idx",
117 )?;
118 let rows = stmt
119 .query_map(params![memory_id], |r| {
120 Ok(Chunk {
121 memory_id: r.get(0)?,
122 chunk_idx: r.get(1)?,
123 chunk_text: r.get(2)?,
124 start_offset: r.get(3)?,
125 end_offset: r.get(4)?,
126 token_count: r.get(5)?,
127 })
128 })?
129 .collect::<Result<Vec<_>, _>>()?;
130 Ok(rows)
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136 use crate::constants::EMBEDDING_DIM;
137 use crate::storage::connection::register_vec_extension;
138 use rusqlite::Connection;
139 use tempfile::TempDir;
140
141 fn setup_db() -> (TempDir, Connection) {
142 register_vec_extension();
143 let tmp = TempDir::new().unwrap();
144 let db_path = tmp.path().join("test.db");
145 let mut conn = Connection::open(&db_path).unwrap();
146 crate::migrations::runner().run(&mut conn).unwrap();
147 (tmp, conn)
148 }
149
150 fn insert_memory(conn: &Connection) -> i64 {
151 conn.execute(
152 "INSERT INTO memories (namespace, name, type, description, body, body_hash)
153 VALUES ('global', 'test-mem', 'user', 'desc', 'body', 'hash1')",
154 [],
155 )
156 .unwrap();
157 conn.last_insert_rowid()
158 }
159
160 #[test]
161 fn test_insert_chunks_vazia_ok() {
162 let (_tmp, conn) = setup_db();
163 let resultado = insert_chunks(&conn, &[]);
164 assert!(resultado.is_ok());
165 }
166
167 #[test]
168 fn test_insert_chunks_e_get_por_memory() {
169 let (_tmp, conn) = setup_db();
170 let memory_id = insert_memory(&conn);
171
172 let chunks = vec![
173 Chunk {
174 memory_id,
175 chunk_idx: 0,
176 chunk_text: "primeiro chunk".to_string(),
177 start_offset: 0,
178 end_offset: 14,
179 token_count: 3,
180 },
181 Chunk {
182 memory_id,
183 chunk_idx: 1,
184 chunk_text: "segundo chunk".to_string(),
185 start_offset: 15,
186 end_offset: 28,
187 token_count: 3,
188 },
189 ];
190
191 insert_chunks(&conn, &chunks).unwrap();
192
193 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
194 assert_eq!(recuperados.len(), 2);
195 assert_eq!(recuperados[0].chunk_idx, 0);
196 assert_eq!(recuperados[0].chunk_text, "primeiro chunk");
197 assert_eq!(recuperados[0].start_offset, 0);
198 assert_eq!(recuperados[0].end_offset, 14);
199 assert_eq!(recuperados[0].token_count, 3);
200 assert_eq!(recuperados[1].chunk_idx, 1);
201 assert_eq!(recuperados[1].chunk_text, "segundo chunk");
202 }
203
204 #[test]
205 fn test_get_chunks_memory_inexistente_retorna_vazio() {
206 let (_tmp, conn) = setup_db();
207 let resultado = get_chunks_by_memory(&conn, 9999).unwrap();
208 assert!(resultado.is_empty());
209 }
210
211 #[test]
212 fn test_delete_chunks_remove_todos() {
213 let (_tmp, conn) = setup_db();
214 let memory_id = insert_memory(&conn);
215
216 let chunks = vec![
217 Chunk {
218 memory_id,
219 chunk_idx: 0,
220 chunk_text: "chunk a".to_string(),
221 start_offset: 0,
222 end_offset: 7,
223 token_count: 2,
224 },
225 Chunk {
226 memory_id,
227 chunk_idx: 1,
228 chunk_text: "chunk b".to_string(),
229 start_offset: 8,
230 end_offset: 15,
231 token_count: 2,
232 },
233 ];
234 insert_chunks(&conn, &chunks).unwrap();
235
236 delete_chunks(&conn, memory_id).unwrap();
237
238 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
239 assert!(recuperados.is_empty());
240 }
241
242 #[test]
243 fn test_delete_chunks_memory_sem_chunks_ok() {
244 let (_tmp, conn) = setup_db();
245 let resultado = delete_chunks(&conn, 9999);
246 assert!(resultado.is_ok());
247 }
248
249 #[test]
250 fn test_get_chunks_ordenados_por_chunk_idx() {
251 let (_tmp, conn) = setup_db();
252 let memory_id = insert_memory(&conn);
253
254 let chunks = vec![
255 Chunk {
256 memory_id,
257 chunk_idx: 2,
258 chunk_text: "terceiro".to_string(),
259 start_offset: 20,
260 end_offset: 28,
261 token_count: 1,
262 },
263 Chunk {
264 memory_id,
265 chunk_idx: 0,
266 chunk_text: "primeiro".to_string(),
267 start_offset: 0,
268 end_offset: 8,
269 token_count: 1,
270 },
271 Chunk {
272 memory_id,
273 chunk_idx: 1,
274 chunk_text: "segundo".to_string(),
275 start_offset: 9,
276 end_offset: 16,
277 token_count: 1,
278 },
279 ];
280 insert_chunks(&conn, &chunks).unwrap();
281
282 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
283 assert_eq!(recuperados.len(), 3);
284 assert_eq!(recuperados[0].chunk_idx, 0);
285 assert_eq!(recuperados[1].chunk_idx, 1);
286 assert_eq!(recuperados[2].chunk_idx, 2);
287 }
288
289 #[test]
290 fn test_upsert_chunk_vec_e_knn_search() {
291 let (_tmp, conn) = setup_db();
292 let memory_id = insert_memory(&conn);
293
294 let chunk = Chunk {
295 memory_id,
296 chunk_idx: 0,
297 chunk_text: "embedding test".to_string(),
298 start_offset: 0,
299 end_offset: 14,
300 token_count: 2,
301 };
302 insert_chunks(&conn, &[chunk]).unwrap();
303
304 let mut embedding = vec![0.0f32; EMBEDDING_DIM];
305 embedding[0] = 1.0;
306
307 let chunk_id: i64 = conn
308 .query_row(
309 "SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = 0",
310 params![memory_id],
311 |r| r.get(0),
312 )
313 .unwrap();
314
315 upsert_chunk_vec(&conn, chunk_id, memory_id, 0, &embedding).unwrap();
316
317 let resultados = knn_search_chunks(&conn, &embedding, 1).unwrap();
318 assert_eq!(resultados.len(), 1);
319 assert_eq!(resultados[0].0, memory_id);
320 assert_eq!(resultados[0].1, 0);
321 }
322
323 #[test]
324 fn test_knn_search_chunks_sem_dados_retorna_vazio() {
325 let (_tmp, conn) = setup_db();
326 let embedding = vec![0.0f32; EMBEDDING_DIM];
327 let resultado = knn_search_chunks(&conn, &embedding, 5).unwrap();
328 assert!(resultado.is_empty());
329 }
330
331 #[test]
332 fn test_insert_chunks_fk_invalida_falha() {
333 let (_tmp, conn) = setup_db();
334 let chunk = Chunk {
335 memory_id: 99999,
336 chunk_idx: 0,
337 chunk_text: "sem pai".to_string(),
338 start_offset: 0,
339 end_offset: 7,
340 token_count: 1,
341 };
342 let resultado = insert_chunks(&conn, &[chunk]);
343 assert!(resultado.is_err());
344 }
345}