Skip to main content

seekr_code/parser/
summary.rs

1//! Code summary generator.
2//!
3//! Generates structured summaries for code chunks, including function
4//! signatures, documentation comments, parameter types, and return types.
5//! These summaries improve embedding quality for semantic search.
6
7use crate::parser::CodeChunk;
8
9/// Generate a text summary for a code chunk, suitable for embedding.
10///
11/// The summary combines:
12/// - The chunk kind and name
13/// - The function/method signature
14/// - Documentation comments
15/// - Key structural information
16///
17/// This produces a more semantically meaningful text than just the raw
18/// source code, improving embedding quality for semantic search.
19pub fn generate_summary(chunk: &CodeChunk) -> String {
20    let mut parts = Vec::new();
21
22    // Add kind + name
23    if let Some(ref name) = chunk.name {
24        parts.push(format!("{} {}", chunk.kind, name));
25    }
26
27    // Add language context
28    parts.push(format!("language: {}", chunk.language));
29
30    // Add signature if different from name
31    if let Some(ref sig) = chunk.signature {
32        let sig_trimmed = sig.trim();
33        if !sig_trimmed.is_empty() {
34            parts.push(format!("signature: {}", sig_trimmed));
35        }
36    }
37
38    // Add documentation
39    if let Some(ref doc) = chunk.doc_comment {
40        let doc_trimmed = doc.trim();
41        if !doc_trimmed.is_empty() {
42            // Limit doc comment length to avoid overwhelming the embedding
43            let truncated = if doc_trimmed.len() > 500 {
44                format!("{}...", &doc_trimmed[..500])
45            } else {
46                doc_trimmed.to_string()
47            };
48            parts.push(truncated);
49        }
50    }
51
52    // Add a snippet of the body (first few meaningful lines)
53    let body_snippet = extract_body_snippet(&chunk.body, 5);
54    if !body_snippet.is_empty() {
55        parts.push(body_snippet);
56    }
57
58    parts.join("\n")
59}
60
61/// Extract a brief snippet from the body, skipping boilerplate.
62fn extract_body_snippet(body: &str, max_lines: usize) -> String {
63    let meaningful_lines: Vec<&str> = body
64        .lines()
65        .filter(|line| {
66            let trimmed = line.trim();
67            !trimmed.is_empty()
68                && !trimmed.starts_with("//")
69                && !trimmed.starts_with('#')
70                && !trimmed.starts_with("///")
71                && !trimmed.starts_with("/*")
72                && !trimmed.starts_with('*')
73                && trimmed != "{"
74                && trimmed != "}"
75                && trimmed != "("
76                && trimmed != ")"
77        })
78        .take(max_lines)
79        .collect();
80
81    meaningful_lines.join("\n")
82}
83
84/// Generate summaries for a batch of chunks, returning (chunk_id, summary) pairs.
85pub fn generate_summaries(chunks: &[CodeChunk]) -> Vec<(u64, String)> {
86    chunks
87        .iter()
88        .map(|chunk| (chunk.id, generate_summary(chunk)))
89        .collect()
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::parser::ChunkKind;
96    use std::path::PathBuf;
97
98    fn make_chunk(
99        kind: ChunkKind,
100        name: &str,
101        signature: Option<&str>,
102        doc: Option<&str>,
103        body: &str,
104    ) -> CodeChunk {
105        CodeChunk {
106            id: 1,
107            file_path: PathBuf::from("test.rs"),
108            language: "rust".to_string(),
109            kind,
110            name: Some(name.to_string()),
111            signature: signature.map(String::from),
112            doc_comment: doc.map(String::from),
113            body: body.to_string(),
114            byte_range: 0..body.len(),
115            line_range: 0..body.lines().count(),
116        }
117    }
118
119    #[test]
120    fn test_generate_summary() {
121        let chunk = make_chunk(
122            ChunkKind::Function,
123            "authenticate_user",
124            Some(
125                "pub fn authenticate_user(username: &str, password: &str) -> Result<String, AuthError>",
126            ),
127            Some("Validates the provided credentials against the database."),
128            "pub fn authenticate_user(username: &str, password: &str) -> Result<String, AuthError> {\n    let user = find_user(username)?;\n    verify(password, &user.hash)\n}",
129        );
130
131        let summary = generate_summary(&chunk);
132        assert!(summary.contains("function authenticate_user"));
133        assert!(summary.contains("language: rust"));
134        assert!(summary.contains("Validates"));
135        assert!(summary.contains("signature:"));
136    }
137
138    #[test]
139    fn test_generate_summaries_batch() {
140        let chunks = vec![
141            make_chunk(ChunkKind::Function, "foo", None, None, "fn foo() {}"),
142            make_chunk(ChunkKind::Struct, "Bar", None, None, "struct Bar {}"),
143        ];
144
145        let summaries = generate_summaries(&chunks);
146        assert_eq!(summaries.len(), 2);
147        assert_eq!(summaries[0].0, 1); // chunk id
148    }
149}