synapse_core/processor.rs
1/// Simple semantic chunker for text processing
2pub struct TextProcessor;
3
4impl TextProcessor {
5 pub fn new() -> Self {
6 Self
7 }
8
9 /// Split text into recursive chunks
10 /// Simple implementation: split by double newline (paragraphs), then by newline, then by length
11 pub fn chunk_text(&self, text: &str, max_chars: usize) -> Vec<String> {
12 let mut chunks = Vec::new();
13
14 // 1. Split by paragraphs
15 let paragraphs: Vec<&str> = text.split("\n\n").collect();
16
17 for p in paragraphs {
18 if p.len() <= max_chars {
19 if !p.trim().is_empty() {
20 chunks.push(p.to_string());
21 }
22 } else {
23 // 2. Split by sentences (naive period check) or newlines
24 let lines: Vec<&str> = p.split('\n').collect();
25 let mut current_chunk = String::new();
26
27 for line in lines {
28 if current_chunk.len() + line.len() + 1 > max_chars {
29 if !current_chunk.is_empty() {
30 chunks.push(current_chunk.clone());
31 current_chunk.clear();
32 }
33 // If line itself is too long, we truncate/force split (simplification)
34 if line.len() > max_chars {
35 chunks.push(line[..max_chars].to_string());
36 // Drop remainder for simplicity in this MVP
37 } else {
38 current_chunk = line.to_string();
39 }
40 } else {
41 if !current_chunk.is_empty() {
42 current_chunk.push('\n');
43 }
44 current_chunk.push_str(line);
45 }
46 }
47 if !current_chunk.is_empty() {
48 chunks.push(current_chunk);
49 }
50 }
51 }
52
53 chunks
54 }
55}