#[derive(Debug, Clone)]
pub struct Chunk {
pub source_type: String,
pub source_path: String,
pub source_id: Option<i64>,
pub text: String,
pub last_commit: Option<String>,
pub staleness: f32,
}
#[derive(Debug, Clone)]
pub struct SymbolRow {
pub rowid: i64,
pub file: String,
pub name: String,
pub kind: String,
pub start_line: i64,
pub end_line: i64,
pub parent: Option<String>,
}
pub fn build_symbol_chunk(
symbol: &SymbolRow,
doc_comment: Option<&str>,
callers: &[String],
callees: &[String],
co_change_files: &[String],
) -> String {
let mut parts: Vec<String> = Vec::new();
let sig = if let Some(parent) = &symbol.parent {
format!(
"[{}] {}.{} -- {}:{}",
symbol.kind, parent, symbol.name, symbol.file, symbol.start_line
)
} else {
format!(
"[{}] {} -- {}:{}",
symbol.kind, symbol.name, symbol.file, symbol.start_line
)
};
parts.push(sig);
if let Some(doc) = doc_comment {
let prose = collapse_line_wraps(doc.trim());
if !prose.is_empty() {
parts.push(prose);
}
}
if !callers.is_empty() {
let top: Vec<&String> = callers.iter().take(5).collect();
parts.push(format!(
"Callers: {}",
top.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ")
));
}
if !callees.is_empty() {
let top: Vec<&String> = callees.iter().take(5).collect();
parts.push(format!(
"Callees: {}",
top.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ")
));
}
if !co_change_files.is_empty() {
let top: Vec<&String> = co_change_files.iter().take(5).collect();
parts.push(format!(
"Co-changes with: {}",
top.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ")
));
}
parts.join("\n")
}
pub fn build_markdown_chunk(path: &str, heading_breadcrumb: &str, body: &str) -> String {
let mut out = format!("[doc] {path}");
if !heading_breadcrumb.is_empty() {
out.push('\n');
out.push_str(heading_breadcrumb);
}
let trimmed_body = body.trim();
if !trimmed_body.is_empty() {
out.push_str("\n\n");
out.push_str(trimmed_body);
}
out
}
pub fn split_markdown_sections(content: &str) -> Vec<(String, String)> {
let mut sections: Vec<(String, String)> = Vec::new();
let mut heading_stack: Vec<(usize, String)> = Vec::new();
let mut current_body = String::new();
let mut current_breadcrumb = String::new();
for line in content.lines() {
if line.starts_with('#') {
let level = line.chars().take_while(|&c| c == '#').count();
let title = line.trim_start_matches('#').trim().to_string();
if !current_body.trim().is_empty() || !current_breadcrumb.is_empty() {
sections.push((current_breadcrumb.clone(), current_body.clone()));
}
heading_stack.retain(|(l, _)| *l < level);
heading_stack.push((level, title));
current_breadcrumb = heading_stack
.iter()
.map(|(_, t)| t.as_str())
.collect::<Vec<_>>()
.join(" > ");
current_body = String::new();
} else if !current_body.is_empty() || !line.trim().is_empty() {
current_body.push_str(line);
current_body.push('\n');
}
}
if !current_body.trim().is_empty() || !current_breadcrumb.is_empty() {
sections.push((current_breadcrumb, current_body));
}
sections
}
pub fn build_commit_chunk(hash: &str, date_str: &str, subject: &str, body: &str) -> String {
let mut out = format!("[commit] {hash}\nDate: {date_str}\n{subject}");
let trimmed = body.trim();
if !trimmed.is_empty() {
out.push_str("\n\n");
out.push_str(trimmed);
}
out
}
fn collapse_line_wraps(text: &str) -> String {
let mut result = String::new();
let mut last_empty = false;
for line in text.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
if !last_empty {
result.push('\n');
}
last_empty = true;
} else {
if !result.is_empty() && !last_empty {
result.push(' ');
} else if !result.is_empty() && last_empty {
result.push('\n');
}
result.push_str(trimmed);
last_empty = false;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_collapse_line_wraps() {
let text = "This is a long\nsentence that wraps.\n\nNew paragraph here.";
let collapsed = collapse_line_wraps(text);
assert!(collapsed.contains("This is a long sentence that wraps."));
assert!(collapsed.contains("New paragraph here."));
}
#[test]
fn test_build_symbol_chunk() {
let sym = SymbolRow {
rowid: 1,
file: "src/lib.rs".to_string(),
name: "open".to_string(),
kind: "function".to_string(),
start_line: 42,
end_line: 60,
parent: None,
};
let chunk = build_symbol_chunk(
&sym,
Some("Opens the database connection."),
&["main".to_string()],
&["connect".to_string()],
&[],
);
assert!(chunk.contains("[function] open"));
assert!(chunk.contains("Opens the database connection."));
assert!(chunk.contains("Callers: main"));
assert!(chunk.contains("Callees: connect"));
}
#[test]
fn test_split_markdown_sections_basic() {
let md = "# Title\n\nIntro text.\n\n## Section A\n\nBody A.\n\n## Section B\n\nBody B.\n";
let sections = split_markdown_sections(md);
assert_eq!(sections.len(), 3);
assert_eq!(sections[0].0, "Title");
assert!(sections[0].1.contains("Intro text."));
assert_eq!(sections[1].0, "Title > Section A");
assert!(sections[1].1.contains("Body A."));
assert_eq!(sections[2].0, "Title > Section B");
assert!(sections[2].1.contains("Body B."));
}
#[test]
fn test_split_markdown_sections_empty() {
let sections = split_markdown_sections("");
assert!(sections.is_empty());
}
#[test]
fn test_build_markdown_chunk() {
let chunk = build_markdown_chunk("docs/README.md", "Title > Section A", "Body text.");
assert!(chunk.contains("[doc] docs/README.md"));
assert!(chunk.contains("Title > Section A"));
assert!(chunk.contains("Body text."));
}
#[test]
fn test_build_commit_chunk() {
let chunk = build_commit_chunk(
"abc1234",
"2026-01-15",
"feat: add semantic search",
"Longer description here.",
);
assert!(chunk.contains("[commit] abc1234"));
assert!(chunk.contains("Date: 2026-01-15"));
assert!(chunk.contains("feat: add semantic search"));
assert!(chunk.contains("Longer description here."));
}
}