1use crate::embedder::f32_to_bytes;
5use crate::errors::AppError;
6use rusqlite::{params, Connection};
7
8#[derive(Debug, Clone)]
9pub struct Chunk {
10 pub memory_id: i64,
11 pub chunk_idx: i32,
12 pub chunk_text: String,
13 pub start_offset: i32,
14 pub end_offset: i32,
15 pub token_count: i32,
16}
17
18pub fn insert_chunks(conn: &Connection, chunks: &[Chunk]) -> Result<(), AppError> {
19 for chunk in chunks {
20 conn.execute(
21 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
22 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
23 params![
24 chunk.memory_id,
25 chunk.chunk_idx,
26 chunk.chunk_text,
27 chunk.start_offset,
28 chunk.end_offset,
29 chunk.token_count,
30 ],
31 )?;
32 }
33 Ok(())
34}
35
36pub fn insert_chunk_slices(
37 conn: &Connection,
38 memory_id: i64,
39 chunks: &[crate::chunking::Chunk],
40) -> Result<(), AppError> {
41 for (chunk_idx, chunk) in chunks.iter().enumerate() {
42 conn.execute(
43 "INSERT INTO memory_chunks (memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count)
44 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
45 params![
46 memory_id,
47 chunk_idx as i32,
48 chunk.text,
49 chunk.start_offset as i32,
50 chunk.end_offset as i32,
51 chunk.token_count_approx as i32,
52 ],
53 )?;
54 }
55 Ok(())
56}
57
58pub fn upsert_chunk_vec(
59 conn: &Connection,
60 _rowid: i64,
61 memory_id: i64,
62 chunk_idx: i32,
63 embedding: &[f32],
64) -> Result<(), AppError> {
65 conn.execute(
66 "INSERT OR REPLACE INTO vec_chunks(rowid, memory_id, chunk_idx, embedding)
67 VALUES (
68 (SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = ?2),
69 ?1, ?2, ?3
70 )",
71 params![memory_id, chunk_idx, f32_to_bytes(embedding)],
72 )?;
73 Ok(())
74}
75
76pub fn delete_chunks(conn: &Connection, memory_id: i64) -> Result<(), AppError> {
77 conn.execute(
78 "DELETE FROM memory_chunks WHERE memory_id = ?1",
79 params![memory_id],
80 )?;
81 Ok(())
82}
83
84pub fn knn_search_chunks(
85 conn: &Connection,
86 embedding: &[f32],
87 k: usize,
88) -> Result<Vec<(i64, i32, f32)>, AppError> {
89 let bytes = f32_to_bytes(embedding);
90 let mut stmt = conn.prepare(
91 "SELECT memory_id, chunk_idx, distance FROM vec_chunks
92 WHERE embedding MATCH ?1
93 ORDER BY distance LIMIT ?2",
94 )?;
95 let rows = stmt
96 .query_map(params![bytes, k as i64], |r| {
97 Ok((
98 r.get::<_, i64>(0)?,
99 r.get::<_, i32>(1)?,
100 r.get::<_, f32>(2)?,
101 ))
102 })?
103 .collect::<Result<Vec<_>, _>>()?;
104 Ok(rows)
105}
106
107pub fn get_chunks_by_memory(conn: &Connection, memory_id: i64) -> Result<Vec<Chunk>, AppError> {
108 let mut stmt = conn.prepare(
109 "SELECT memory_id, chunk_idx, chunk_text, start_offset, end_offset, token_count
110 FROM memory_chunks WHERE memory_id = ?1 ORDER BY chunk_idx",
111 )?;
112 let rows = stmt
113 .query_map(params![memory_id], |r| {
114 Ok(Chunk {
115 memory_id: r.get(0)?,
116 chunk_idx: r.get(1)?,
117 chunk_text: r.get(2)?,
118 start_offset: r.get(3)?,
119 end_offset: r.get(4)?,
120 token_count: r.get(5)?,
121 })
122 })?
123 .collect::<Result<Vec<_>, _>>()?;
124 Ok(rows)
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130 use crate::constants::EMBEDDING_DIM;
131 use crate::storage::connection::register_vec_extension;
132 use rusqlite::Connection;
133 use tempfile::TempDir;
134
135 fn setup_db() -> (TempDir, Connection) {
136 register_vec_extension();
137 let tmp = TempDir::new().unwrap();
138 let db_path = tmp.path().join("test.db");
139 let mut conn = Connection::open(&db_path).unwrap();
140 crate::migrations::runner().run(&mut conn).unwrap();
141 (tmp, conn)
142 }
143
144 fn insert_memory(conn: &Connection) -> i64 {
145 conn.execute(
146 "INSERT INTO memories (namespace, name, type, description, body, body_hash)
147 VALUES ('global', 'test-mem', 'user', 'desc', 'body', 'hash1')",
148 [],
149 )
150 .unwrap();
151 conn.last_insert_rowid()
152 }
153
154 #[test]
155 fn test_insert_chunks_vazia_ok() {
156 let (_tmp, conn) = setup_db();
157 let resultado = insert_chunks(&conn, &[]);
158 assert!(resultado.is_ok());
159 }
160
161 #[test]
162 fn test_insert_chunks_e_get_por_memory() {
163 let (_tmp, conn) = setup_db();
164 let memory_id = insert_memory(&conn);
165
166 let chunks = vec![
167 Chunk {
168 memory_id,
169 chunk_idx: 0,
170 chunk_text: "primeiro chunk".to_string(),
171 start_offset: 0,
172 end_offset: 14,
173 token_count: 3,
174 },
175 Chunk {
176 memory_id,
177 chunk_idx: 1,
178 chunk_text: "segundo chunk".to_string(),
179 start_offset: 15,
180 end_offset: 28,
181 token_count: 3,
182 },
183 ];
184
185 insert_chunks(&conn, &chunks).unwrap();
186
187 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
188 assert_eq!(recuperados.len(), 2);
189 assert_eq!(recuperados[0].chunk_idx, 0);
190 assert_eq!(recuperados[0].chunk_text, "primeiro chunk");
191 assert_eq!(recuperados[0].start_offset, 0);
192 assert_eq!(recuperados[0].end_offset, 14);
193 assert_eq!(recuperados[0].token_count, 3);
194 assert_eq!(recuperados[1].chunk_idx, 1);
195 assert_eq!(recuperados[1].chunk_text, "segundo chunk");
196 }
197
198 #[test]
199 fn test_get_chunks_memory_inexistente_retorna_vazio() {
200 let (_tmp, conn) = setup_db();
201 let resultado = get_chunks_by_memory(&conn, 9999).unwrap();
202 assert!(resultado.is_empty());
203 }
204
205 #[test]
206 fn test_delete_chunks_remove_todos() {
207 let (_tmp, conn) = setup_db();
208 let memory_id = insert_memory(&conn);
209
210 let chunks = vec![
211 Chunk {
212 memory_id,
213 chunk_idx: 0,
214 chunk_text: "chunk a".to_string(),
215 start_offset: 0,
216 end_offset: 7,
217 token_count: 2,
218 },
219 Chunk {
220 memory_id,
221 chunk_idx: 1,
222 chunk_text: "chunk b".to_string(),
223 start_offset: 8,
224 end_offset: 15,
225 token_count: 2,
226 },
227 ];
228 insert_chunks(&conn, &chunks).unwrap();
229
230 delete_chunks(&conn, memory_id).unwrap();
231
232 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
233 assert!(recuperados.is_empty());
234 }
235
236 #[test]
237 fn test_delete_chunks_memory_sem_chunks_ok() {
238 let (_tmp, conn) = setup_db();
239 let resultado = delete_chunks(&conn, 9999);
240 assert!(resultado.is_ok());
241 }
242
243 #[test]
244 fn test_get_chunks_ordenados_por_chunk_idx() {
245 let (_tmp, conn) = setup_db();
246 let memory_id = insert_memory(&conn);
247
248 let chunks = vec![
249 Chunk {
250 memory_id,
251 chunk_idx: 2,
252 chunk_text: "terceiro".to_string(),
253 start_offset: 20,
254 end_offset: 28,
255 token_count: 1,
256 },
257 Chunk {
258 memory_id,
259 chunk_idx: 0,
260 chunk_text: "primeiro".to_string(),
261 start_offset: 0,
262 end_offset: 8,
263 token_count: 1,
264 },
265 Chunk {
266 memory_id,
267 chunk_idx: 1,
268 chunk_text: "segundo".to_string(),
269 start_offset: 9,
270 end_offset: 16,
271 token_count: 1,
272 },
273 ];
274 insert_chunks(&conn, &chunks).unwrap();
275
276 let recuperados = get_chunks_by_memory(&conn, memory_id).unwrap();
277 assert_eq!(recuperados.len(), 3);
278 assert_eq!(recuperados[0].chunk_idx, 0);
279 assert_eq!(recuperados[1].chunk_idx, 1);
280 assert_eq!(recuperados[2].chunk_idx, 2);
281 }
282
283 #[test]
284 fn test_upsert_chunk_vec_e_knn_search() {
285 let (_tmp, conn) = setup_db();
286 let memory_id = insert_memory(&conn);
287
288 let chunk = Chunk {
289 memory_id,
290 chunk_idx: 0,
291 chunk_text: "embedding test".to_string(),
292 start_offset: 0,
293 end_offset: 14,
294 token_count: 2,
295 };
296 insert_chunks(&conn, &[chunk]).unwrap();
297
298 let mut embedding = vec![0.0f32; EMBEDDING_DIM];
299 embedding[0] = 1.0;
300
301 let chunk_id: i64 = conn
302 .query_row(
303 "SELECT id FROM memory_chunks WHERE memory_id = ?1 AND chunk_idx = 0",
304 params![memory_id],
305 |r| r.get(0),
306 )
307 .unwrap();
308
309 upsert_chunk_vec(&conn, chunk_id, memory_id, 0, &embedding).unwrap();
310
311 let resultados = knn_search_chunks(&conn, &embedding, 1).unwrap();
312 assert_eq!(resultados.len(), 1);
313 assert_eq!(resultados[0].0, memory_id);
314 assert_eq!(resultados[0].1, 0);
315 }
316
317 #[test]
318 fn test_knn_search_chunks_sem_dados_retorna_vazio() {
319 let (_tmp, conn) = setup_db();
320 let embedding = vec![0.0f32; EMBEDDING_DIM];
321 let resultado = knn_search_chunks(&conn, &embedding, 5).unwrap();
322 assert!(resultado.is_empty());
323 }
324
325 #[test]
326 fn test_insert_chunks_fk_invalida_falha() {
327 let (_tmp, conn) = setup_db();
328 let chunk = Chunk {
329 memory_id: 99999,
330 chunk_idx: 0,
331 chunk_text: "sem pai".to_string(),
332 start_offset: 0,
333 end_offset: 7,
334 token_count: 1,
335 };
336 let resultado = insert_chunks(&conn, &[chunk]);
337 assert!(resultado.is_err());
338 }
339}