1use rusqlite::{Connection, Result};
2use uuid::Uuid;
3
4#[derive(Debug)]
5pub struct Chunk {
6 pub id: String,
7 pub event_id: String,
8 pub chunk_index: i64,
9 pub content: String,
10 pub start_offset: i64, pub end_offset: i64, }
13
14pub fn chunk_content(content: &str, max_chunk_size: usize) -> Vec<(usize, usize, String)> {
16 let mut chunks = Vec::new();
17
18 let paragraphs: Vec<&str> = content.split("\n\n").collect();
20
21 let mut current_chunk = String::new();
22 let mut chunk_start = 0;
23 let mut current_pos = 0;
24
25 for para in paragraphs {
26 let para_len = para.len() + 2; if !current_chunk.is_empty() && current_chunk.len() + para_len > max_chunk_size {
30 chunks.push((chunk_start, current_pos, current_chunk.trim().to_string()));
31 current_chunk.clear();
32 chunk_start = current_pos;
33 }
34
35 if !current_chunk.is_empty() {
36 current_chunk.push_str("\n\n");
37 }
38 current_chunk.push_str(para);
39 current_pos += para_len;
40 }
41
42 if !current_chunk.is_empty() {
44 chunks.push((chunk_start, current_pos, current_chunk.trim().to_string()));
45 }
46
47 if chunks.is_empty() && !content.is_empty() {
49 let mut start = 0;
50 while start < content.len() {
51 let end = (start + max_chunk_size).min(content.len());
52 let chunk_text = content[start..end].to_string();
53 chunks.push((start, end, chunk_text));
54 start = end;
55 }
56 }
57
58 chunks
59}
60
61pub fn create_chunks(
63 conn: &Connection,
64 event_id: &str,
65 content: &str,
66 timestamp: i64,
67 max_chunk_size: usize,
68) -> Result<usize> {
69 let chunks = chunk_content(content, max_chunk_size);
70
71 let mut count = 0;
72 for (idx, (start, end, chunk_content)) in chunks.iter().enumerate() {
73 let chunk_id = Uuid::new_v4().to_string();
74
75 conn.execute(
76 "INSERT INTO chunks (id, event_id, chunk_index, content, start_offset, end_offset, timestamp)
77 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
78 (
79 &chunk_id,
80 event_id,
81 idx as i64,
82 chunk_content,
83 *start as i64,
84 *end as i64,
85 timestamp,
86 ),
87 )?;
88 count += 1;
89 }
90
91 Ok(count)
92}
93
94pub fn search_chunks(conn: &Connection, term: &str, limit: Option<i64>) -> Result<Vec<Chunk>> {
96 let like = format!("%{}%", term);
97
98 let query = if let Some(lim) = limit {
99 format!(
100 "SELECT id, event_id, chunk_index, content, start_offset, end_offset
101 FROM chunks
102 WHERE content LIKE ?1
103 ORDER BY timestamp DESC
104 LIMIT {}",
105 lim
106 )
107 } else {
108 "SELECT id, event_id, chunk_index, content, start_offset, end_offset
109 FROM chunks
110 WHERE content LIKE ?1
111 ORDER BY timestamp DESC"
112 .to_string()
113 };
114
115 let mut stmt = conn.prepare(&query)?;
116
117 let rows = stmt.query_map([like], |row| {
118 Ok(Chunk {
119 id: row.get(0)?,
120 event_id: row.get(1)?,
121 chunk_index: row.get(2)?,
122 content: row.get(3)?,
123 start_offset: row.get(4)?,
124 end_offset: row.get(5)?,
125 })
126 })?;
127
128 Ok(rows.filter_map(Result::ok).collect())
129}