1use rusqlite::{Connection, Result};
2use uuid::Uuid;
3
4#[derive(Debug)]
5pub struct Chunk {
6 pub id: String,
7 pub event_id: String,
8 pub chunk_index: i64,
9 pub content: String,
10 pub start_offset: i64, pub end_offset: i64, }
13
14pub fn chunk_content(content: &str, max_chunk_size: usize) -> Vec<(usize, usize, String)> {
16 let mut chunks = Vec::new();
17
18 if content.is_empty() || max_chunk_size == 0 {
19 return chunks;
20 }
21
22 let mut start = 0;
23 while start < content.len() {
24 let remaining = content.len() - start;
25 if remaining <= max_chunk_size {
26 chunks.push((start, content.len(), content[start..].to_string()));
27 break;
28 }
29
30 let target_end = max_chunk_size.min(remaining);
31 let mut hard_end = start;
32 for (offset, _) in content[start..].char_indices() {
33 if offset <= target_end {
34 hard_end = start + offset;
35 } else {
36 break;
37 }
38 }
39 if hard_end == start {
40 if let Some((offset, ch)) = content[start..].char_indices().next() {
41 hard_end = start + offset + ch.len_utf8();
42 }
43 }
44 let window = &content[start..hard_end];
45
46 let split_at = window
48 .char_indices()
49 .filter_map(|(idx, ch)| {
50 if idx > 0 && ch.is_whitespace() {
51 Some(start + idx)
52 } else {
53 None
54 }
55 })
56 .next_back()
57 .unwrap_or(hard_end);
58
59 chunks.push((start, split_at, content[start..split_at].to_string()));
60 start = split_at;
61 }
62
63 chunks
64}
65
66pub fn create_chunks(
68 conn: &Connection,
69 event_id: &str,
70 content: &str,
71 timestamp: i64,
72 max_chunk_size: usize,
73) -> Result<usize> {
74 let chunks = chunk_content(content, max_chunk_size);
75
76 let mut count = 0;
77 for (idx, (start, end, chunk_content)) in chunks.iter().enumerate() {
78 let chunk_id = Uuid::new_v4().to_string();
79
80 conn.execute(
81 "INSERT INTO chunks (id, event_id, chunk_index, content, start_offset, end_offset, timestamp)
82 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
83 (
84 &chunk_id,
85 event_id,
86 idx as i64,
87 chunk_content,
88 *start as i64,
89 *end as i64,
90 timestamp,
91 ),
92 )?;
93 count += 1;
94 }
95
96 Ok(count)
97}
98
99pub fn search_chunks(conn: &Connection, term: &str, limit: Option<i64>) -> Result<Vec<Chunk>> {
101 let like = format!("%{}%", term);
102
103 let query = if let Some(lim) = limit {
104 format!(
105 "SELECT id, event_id, chunk_index, content, start_offset, end_offset
106 FROM chunks
107 WHERE content LIKE ?1
108 AND NOT EXISTS (
109 SELECT 1 FROM shadow_state s WHERE s.event_id = chunks.event_id
110 )
111 ORDER BY timestamp DESC
112 LIMIT {}",
113 lim
114 )
115 } else {
116 "SELECT id, event_id, chunk_index, content, start_offset, end_offset
117 FROM chunks
118 WHERE content LIKE ?1
119 AND NOT EXISTS (
120 SELECT 1 FROM shadow_state s WHERE s.event_id = chunks.event_id
121 )
122 ORDER BY timestamp DESC"
123 .to_string()
124 };
125
126 let mut stmt = conn.prepare(&query)?;
127
128 let rows = stmt.query_map([like], |row| {
129 Ok(Chunk {
130 id: row.get(0)?,
131 event_id: row.get(1)?,
132 chunk_index: row.get(2)?,
133 content: row.get(3)?,
134 start_offset: row.get(4)?,
135 end_offset: row.get(5)?,
136 })
137 })?;
138
139 rows.collect()
140}
141
142pub fn list_chunks(conn: &Connection, event_id: &str) -> Result<Vec<Chunk>> {
144 let mut stmt = conn.prepare(
145 "SELECT id, event_id, chunk_index, content, start_offset, end_offset
146 FROM chunks
147 WHERE event_id = ?1
148 AND NOT EXISTS (
149 SELECT 1 FROM shadow_state s WHERE s.event_id = chunks.event_id
150 )
151 ORDER BY chunk_index ASC",
152 )?;
153
154 let rows = stmt.query_map([event_id], |row| {
155 Ok(Chunk {
156 id: row.get(0)?,
157 event_id: row.get(1)?,
158 chunk_index: row.get(2)?,
159 content: row.get(3)?,
160 start_offset: row.get(4)?,
161 end_offset: row.get(5)?,
162 })
163 })?;
164
165 rows.collect()
166}