1use crate::embedder::f32_to_bytes;
5use crate::errors::AppError;
6use rusqlite::{params, Connection};
7
8#[derive(Debug, Clone)]
9pub struct Chunk {
10 pub memory_id: i64,
11 pub chunk_idx: i32,
12 pub chunk_text: String,
13 pub start_offset: i32,
14 pub end_offset: i32,
15 pub token_count: i32,
16}
17
18pub fn insert_chunks(conn: &Connection, chunks: &[Chunk]) -> Result<(), AppError> {
19 for chunk in chunks {
20 conn.execute(
21 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
22 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
23 params![
24 chunk.memory_id,
25 chunk.chunk_idx,
26 chunk.chunk_text,
27 chunk.start_offset,
28 chunk.end_offset,
29 chunk.token_count,
30 ],
31 )?;
32 }
33 Ok(())
34}
35
36pub fn insert_chunk_slices(
37 conn: &Connection,
38 memory_id: i64,
39 body: &str,
40 chunks: &[crate::chunking::Chunk],
41) -> Result<(), AppError> {
42 for (chunk_idx, chunk) in chunks.iter().enumerate() {
43 conn.execute(
44 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
45 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
46 params![
47 memory_id,
48 chunk_idx as i32,
49 crate::chunking::chunk_text(body, chunk),
50 chunk.start_offset as i32,
51 chunk.end_offset as i32,
52 chunk.token_count_approx as i32,
53 ],
54 )?;
55 }
56 Ok(())
57}
58
59pub fn upsert_chunk_vec(
60 conn: &Connection,
61 _rowid: i64,
62 memory_id: i64,
63 chunk_idx: i32,
64 embedding: &[f32],
65) -> Result<(), AppError> {
66 conn.execute(
67 "INSERT OR REPLACE INTO vec_chunks(rowid, memory_id, chunk_idx, embedding)
68 VALUES (
69 (SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = ?2),
70 ?1, ?2, ?3
71 )",
72 params![memory_id, chunk_idx, f32_to_bytes(embedding)],
73 )?;
74 Ok(())
75}
76
77pub fn delete_chunks(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
78 conn.execute(
79 "DELETE FROM memory_chunks WHERE memory_id = ?1",
80 params![memory_id],
81 )?;
82 Ok(())
83}
84
85pub fn knn_search_chunks(
86 conn: &Connection,
87 embedding: &[f32],
88 k: usize,
89) -> Result<Vec<(i64, i32, f32)>, AppError> {
90 let bytes = f32_to_bytes(embedding);
91 let mut stmt = conn.prepare(
92 "SELECT memory_id, chunk_idx, distance FROM vec_chunks
93 WHERE embedding MATCH ?1
94 ORDER BY distance LIMIT ?2",
95 )?;
96 let rows = stmt
97 .query_map(params![bytes, k as i64], |r| {
98 Ok((
99 r.get::<_, i64>(0)?,
100 r.get::<_, i32>(1)?,
101 r.get::<_, f32>(2)?,
102 ))
103 })?
104 .collect::<Result<Vec<_>, _>>()?;
105 Ok(rows)
106}
107
108pub fn get_chunks_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<Chunk>, AppError> {
109 let mut stmt = conn.prepare(
110 "SELECT memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count
111 FROM memory_chunks WHERE memory_id = ?1 ORDER BY chunk_idx",
112 )?;
113 let rows = stmt
114 .query_map(params![memory_id], |r| {
115 Ok(Chunk {
116 memory_id: r.get(0)?,
117 chunk_idx: r.get(1)?,
118 chunk_text: r.get(2)?,
119 start_offset: r.get(3)?,
120 end_offset: r.get(4)?,
121 token_count: r.get(5)?,
122 })
123 })?
124 .collect::<Result<Vec<_>, _>>()?;
125 Ok(rows)
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131 use crate::constants::EMBEDDING_DIM;
132 use crate::storage::connection::register_vec_extension;
133 use rusqlite::Connection;
134 use tempfile::TempDir;
135
136 fn setup_db() -> (TempDir, Connection) {
137 register_vec_extension();
138 let tmp = TempDir::new().unwrap();
139 let db_path = tmp.path().join("test.db");
140 let mut conn = Connection::open(&db_path).unwrap();
141 crate::migrations::runner().run(&mut conn).unwrap();
142 (tmp, conn)
143 }
144
145 fn insert_memory(conn: &Connection) -> i64 {
146 conn.execute(
147 "INSERT INTO memories (namespace, name, type, description, body, body_hash)
148 VALUES ('global', 'test-mem', 'user', 'desc', 'body', 'hash1')",
149 [],
150 )
151 .unwrap();
152 conn.last_insert_rowid()
153 }
154
155 #[test]
156 fn test_insert_chunks_vazia_ok() {
157 let (_tmp, conn) = setup_db();
158 let resultado = insert_chunks(&conn, &[]);
159 assert!(resultado.is_ok());
160 }
161
162 #[test]
163 fn test_insert_chunks_e_get_por_memory() {
164 let (_tmp, conn) = setup_db();
165 let memory_id = insert_memory(&conn);
166
167 let chunks = vec![
168 Chunk {
169 memory_id,
170 chunk_idx: 0,
171 chunk_text: "primeiro chunk".to_string(),
172 start_offset: 0,
173 end_offset: 14,
174 token_count: 3,
175 },
176 Chunk {
177 memory_id,
178 chunk_idx: 1,
179 chunk_text: "segundo chunk".to_string(),
180 start_offset: 15,
181 end_offset: 28,
182 token_count: 3,
183 },
184 ];
185
186 insert_chunks(&conn, &chunks).unwrap();
187
188 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
189 assert_eq!(recuperados.len(), 2);
190 assert_eq!(recuperados[0].chunk_idx, 0);
191 assert_eq!(recuperados[0].chunk_text, "primeiro chunk");
192 assert_eq!(recuperados[0].start_offset, 0);
193 assert_eq!(recuperados[0].end_offset, 14);
194 assert_eq!(recuperados[0].token_count, 3);
195 assert_eq!(recuperados[1].chunk_idx, 1);
196 assert_eq!(recuperados[1].chunk_text, "segundo chunk");
197 }
198
199 #[test]
200 fn test_get_chunks_memory_inexistente_retorna_vazio() {
201 let (_tmp, conn) = setup_db();
202 let resultado = get_chunks_by_memory(&conn, 9999).unwrap();
203 assert!(resultado.is_empty());
204 }
205
206 #[test]
207 fn test_delete_chunks_remove_todos() {
208 let (_tmp, conn) = setup_db();
209 let memory_id = insert_memory(&conn);
210
211 let chunks = vec![
212 Chunk {
213 memory_id,
214 chunk_idx: 0,
215 chunk_text: "chunk a".to_string(),
216 start_offset: 0,
217 end_offset: 7,
218 token_count: 2,
219 },
220 Chunk {
221 memory_id,
222 chunk_idx: 1,
223 chunk_text: "chunk b".to_string(),
224 start_offset: 8,
225 end_offset: 15,
226 token_count: 2,
227 },
228 ];
229 insert_chunks(&conn, &chunks).unwrap();
230
231 delete_chunks(&conn, memory_id).unwrap();
232
233 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
234 assert!(recuperados.is_empty());
235 }
236
237 #[test]
238 fn test_delete_chunks_memory_sem_chunks_ok() {
239 let (_tmp, conn) = setup_db();
240 let resultado = delete_chunks(&conn, 9999);
241 assert!(resultado.is_ok());
242 }
243
244 #[test]
245 fn test_get_chunks_ordenados_por_chunk_idx() {
246 let (_tmp, conn) = setup_db();
247 let memory_id = insert_memory(&conn);
248
249 let chunks = vec![
250 Chunk {
251 memory_id,
252 chunk_idx: 2,
253 chunk_text: "terceiro".to_string(),
254 start_offset: 20,
255 end_offset: 28,
256 token_count: 1,
257 },
258 Chunk {
259 memory_id,
260 chunk_idx: 0,
261 chunk_text: "primeiro".to_string(),
262 start_offset: 0,
263 end_offset: 8,
264 token_count: 1,
265 },
266 Chunk {
267 memory_id,
268 chunk_idx: 1,
269 chunk_text: "segundo".to_string(),
270 start_offset: 9,
271 end_offset: 16,
272 token_count: 1,
273 },
274 ];
275 insert_chunks(&conn, &chunks).unwrap();
276
277 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
278 assert_eq!(recuperados.len(), 3);
279 assert_eq!(recuperados[0].chunk_idx, 0);
280 assert_eq!(recuperados[1].chunk_idx, 1);
281 assert_eq!(recuperados[2].chunk_idx, 2);
282 }
283
284 #[test]
285 fn test_upsert_chunk_vec_e_knn_search() {
286 let (_tmp, conn) = setup_db();
287 let memory_id = insert_memory(&conn);
288
289 let chunk = Chunk {
290 memory_id,
291 chunk_idx: 0,
292 chunk_text: "embedding test".to_string(),
293 start_offset: 0,
294 end_offset: 14,
295 token_count: 2,
296 };
297 insert_chunks(&conn, &[chunk]).unwrap();
298
299 let mut embedding = vec![0.0f32; EMBEDDING_DIM];
300 embedding[0] = 1.0;
301
302 let chunk_id: i64 = conn
303 .query_row(
304 "SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = 0",
305 params![memory_id],
306 |r| r.get(0),
307 )
308 .unwrap();
309
310 upsert_chunk_vec(&conn, chunk_id, memory_id, 0, &embedding).unwrap();
311
312 let resultados = knn_search_chunks(&conn, &embedding, 1).unwrap();
313 assert_eq!(resultados.len(), 1);
314 assert_eq!(resultados[0].0, memory_id);
315 assert_eq!(resultados[0].1, 0);
316 }
317
318 #[test]
319 fn test_knn_search_chunks_sem_dados_retorna_vazio() {
320 let (_tmp, conn) = setup_db();
321 let embedding = vec![0.0f32; EMBEDDING_DIM];
322 let resultado = knn_search_chunks(&conn, &embedding, 5).unwrap();
323 assert!(resultado.is_empty());
324 }
325
326 #[test]
327 fn test_insert_chunks_fk_invalida_falha() {
328 let (_tmp, conn) = setup_db();
329 let chunk = Chunk {
330 memory_id: 99999,
331 chunk_idx: 0,
332 chunk_text: "sem pai".to_string(),
333 start_offset: 0,
334 end_offset: 7,
335 token_count: 1,
336 };
337 let resultado = insert_chunks(&conn, &[chunk]);
338 assert!(resultado.is_err());
339 }
340}