normalize_semantic/
chunks.rs1#[derive(Debug, Clone)]
24pub struct Chunk {
25 pub source_type: String,
27 pub source_path: String,
29 pub source_id: Option<i64>,
31 pub text: String,
33 pub last_commit: Option<String>,
35 pub staleness: f32,
37}
38
39#[derive(Debug, Clone)]
41pub struct SymbolRow {
42 pub rowid: i64,
43 pub file: String,
44 pub name: String,
45 pub kind: String,
46 pub start_line: i64,
47 pub end_line: i64,
48 pub parent: Option<String>,
49}
50
51pub fn build_symbol_chunk(
64 symbol: &SymbolRow,
65 doc_comment: Option<&str>,
66 callers: &[String],
67 callees: &[String],
68 co_change_files: &[String],
69) -> String {
70 let mut parts: Vec<String> = Vec::new();
71
72 let sig = if let Some(parent) = &symbol.parent {
74 format!(
75 "[{}] {}.{} -- {}:{}",
76 symbol.kind, parent, symbol.name, symbol.file, symbol.start_line
77 )
78 } else {
79 format!(
80 "[{}] {} -- {}:{}",
81 symbol.kind, symbol.name, symbol.file, symbol.start_line
82 )
83 };
84 parts.push(sig);
85
86 if let Some(doc) = doc_comment {
88 let prose = collapse_line_wraps(doc.trim());
89 if !prose.is_empty() {
90 parts.push(prose);
91 }
92 }
93
94 if !callers.is_empty() {
96 let top: Vec<&String> = callers.iter().take(5).collect();
97 parts.push(format!(
98 "Callers: {}",
99 top.iter()
100 .map(|s| s.as_str())
101 .collect::<Vec<_>>()
102 .join(", ")
103 ));
104 }
105
106 if !callees.is_empty() {
108 let top: Vec<&String> = callees.iter().take(5).collect();
109 parts.push(format!(
110 "Callees: {}",
111 top.iter()
112 .map(|s| s.as_str())
113 .collect::<Vec<_>>()
114 .join(", ")
115 ));
116 }
117
118 if !co_change_files.is_empty() {
120 let top: Vec<&String> = co_change_files.iter().take(5).collect();
121 parts.push(format!(
122 "Co-changes with: {}",
123 top.iter()
124 .map(|s| s.as_str())
125 .collect::<Vec<_>>()
126 .join(", ")
127 ));
128 }
129
130 parts.join("\n")
131}
132
133pub fn build_markdown_chunk(path: &str, heading_breadcrumb: &str, body: &str) -> String {
143 let mut out = format!("[doc] {path}");
144 if !heading_breadcrumb.is_empty() {
145 out.push('\n');
146 out.push_str(heading_breadcrumb);
147 }
148 let trimmed_body = body.trim();
149 if !trimmed_body.is_empty() {
150 out.push_str("\n\n");
151 out.push_str(trimmed_body);
152 }
153 out
154}
155
156pub fn split_markdown_sections(content: &str) -> Vec<(String, String)> {
162 let mut sections: Vec<(String, String)> = Vec::new();
163 let mut heading_stack: Vec<(usize, String)> = Vec::new();
165 let mut current_body = String::new();
166 let mut current_breadcrumb = String::new();
167
168 for line in content.lines() {
169 if line.starts_with('#') {
170 let level = line.chars().take_while(|&c| c == '#').count();
172 let title = line.trim_start_matches('#').trim().to_string();
173
174 if !current_body.trim().is_empty() || !current_breadcrumb.is_empty() {
176 sections.push((current_breadcrumb.clone(), current_body.clone()));
177 }
178
179 heading_stack.retain(|(l, _)| *l < level);
181 heading_stack.push((level, title));
182
183 current_breadcrumb = heading_stack
185 .iter()
186 .map(|(_, t)| t.as_str())
187 .collect::<Vec<_>>()
188 .join(" > ");
189 current_body = String::new();
190 } else if !current_body.is_empty() || !line.trim().is_empty() {
191 current_body.push_str(line);
192 current_body.push('\n');
193 }
194 }
195
196 if !current_body.trim().is_empty() || !current_breadcrumb.is_empty() {
198 sections.push((current_breadcrumb, current_body));
199 }
200
201 sections
202}
203
204pub fn build_commit_chunk(hash: &str, date_str: &str, subject: &str, body: &str) -> String {
215 let mut out = format!("[commit] {hash}\nDate: {date_str}\n{subject}");
216 let trimmed = body.trim();
217 if !trimmed.is_empty() {
218 out.push_str("\n\n");
219 out.push_str(trimmed);
220 }
221 out
222}
223
224fn collapse_line_wraps(text: &str) -> String {
227 let mut result = String::new();
228 let mut last_empty = false;
229
230 for line in text.lines() {
231 let trimmed = line.trim();
232 if trimmed.is_empty() {
233 if !last_empty {
234 result.push('\n');
235 }
236 last_empty = true;
237 } else {
238 if !result.is_empty() && !last_empty {
239 result.push(' ');
240 } else if !result.is_empty() && last_empty {
241 result.push('\n');
242 }
243 result.push_str(trimmed);
244 last_empty = false;
245 }
246 }
247 result
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn test_collapse_line_wraps() {
256 let text = "This is a long\nsentence that wraps.\n\nNew paragraph here.";
257 let collapsed = collapse_line_wraps(text);
258 assert!(collapsed.contains("This is a long sentence that wraps."));
259 assert!(collapsed.contains("New paragraph here."));
260 }
261
262 #[test]
263 fn test_build_symbol_chunk() {
264 let sym = SymbolRow {
265 rowid: 1,
266 file: "src/lib.rs".to_string(),
267 name: "open".to_string(),
268 kind: "function".to_string(),
269 start_line: 42,
270 end_line: 60,
271 parent: None,
272 };
273 let chunk = build_symbol_chunk(
274 &sym,
275 Some("Opens the database connection."),
276 &["main".to_string()],
277 &["connect".to_string()],
278 &[],
279 );
280 assert!(chunk.contains("[function] open"));
281 assert!(chunk.contains("Opens the database connection."));
282 assert!(chunk.contains("Callers: main"));
283 assert!(chunk.contains("Callees: connect"));
284 }
285
286 #[test]
287 fn test_split_markdown_sections_basic() {
288 let md = "# Title\n\nIntro text.\n\n## Section A\n\nBody A.\n\n## Section B\n\nBody B.\n";
289 let sections = split_markdown_sections(md);
290 assert_eq!(sections.len(), 3);
291 assert_eq!(sections[0].0, "Title");
292 assert!(sections[0].1.contains("Intro text."));
293 assert_eq!(sections[1].0, "Title > Section A");
294 assert!(sections[1].1.contains("Body A."));
295 assert_eq!(sections[2].0, "Title > Section B");
296 assert!(sections[2].1.contains("Body B."));
297 }
298
299 #[test]
300 fn test_split_markdown_sections_empty() {
301 let sections = split_markdown_sections("");
302 assert!(sections.is_empty());
303 }
304
305 #[test]
306 fn test_build_markdown_chunk() {
307 let chunk = build_markdown_chunk("docs/README.md", "Title > Section A", "Body text.");
308 assert!(chunk.contains("[doc] docs/README.md"));
309 assert!(chunk.contains("Title > Section A"));
310 assert!(chunk.contains("Body text."));
311 }
312
313 #[test]
314 fn test_build_commit_chunk() {
315 let chunk = build_commit_chunk(
316 "abc1234",
317 "2026-01-15",
318 "feat: add semantic search",
319 "Longer description here.",
320 );
321 assert!(chunk.contains("[commit] abc1234"));
322 assert!(chunk.contains("Date: 2026-01-15"));
323 assert!(chunk.contains("feat: add semantic search"));
324 assert!(chunk.contains("Longer description here."));
325 }
326}