Skip to main content

phago_agents/
code_digester.rs

1//! Code-aware digester agent for source code analysis.
2//!
3//! Extracts function names, type definitions, imports, and structural
4//! patterns from Rust source code. Builds a code knowledge graph
5//! where concepts are identifiers and edges are co-occurrence relations.
6
7/// A code element extracted from source files.
8#[derive(Debug, Clone)]
9pub struct CodeElement {
10    pub name: String,
11    pub kind: CodeElementKind,
12    pub file: String,
13    pub line: usize,
14}
15
16/// Types of code elements we extract.
17#[derive(Debug, Clone, PartialEq)]
18pub enum CodeElementKind {
19    Function,
20    Struct,
21    Enum,
22    Trait,
23    Impl,
24    Use,
25    Const,
26    Module,
27}
28
29impl CodeElementKind {
30    pub fn as_str(&self) -> &'static str {
31        match self {
32            Self::Function => "fn",
33            Self::Struct => "struct",
34            Self::Enum => "enum",
35            Self::Trait => "trait",
36            Self::Impl => "impl",
37            Self::Use => "use",
38            Self::Const => "const",
39            Self::Module => "mod",
40        }
41    }
42}
43
44/// Extract code elements from Rust source code.
45pub fn extract_code_elements(source: &str, filename: &str) -> Vec<CodeElement> {
46    let mut elements = Vec::new();
47
48    for (line_num, line) in source.lines().enumerate() {
49        let trimmed = line.trim();
50
51        // Function definitions
52        if trimmed.starts_with("pub fn ") || trimmed.starts_with("fn ") || trimmed.starts_with("pub(crate) fn ") {
53            if let Some(name) = extract_identifier(trimmed, "fn ") {
54                elements.push(CodeElement {
55                    name,
56                    kind: CodeElementKind::Function,
57                    file: filename.to_string(),
58                    line: line_num + 1,
59                });
60            }
61        }
62
63        // Struct definitions
64        if trimmed.starts_with("pub struct ") || trimmed.starts_with("struct ") {
65            if let Some(name) = extract_identifier(trimmed, "struct ") {
66                elements.push(CodeElement {
67                    name,
68                    kind: CodeElementKind::Struct,
69                    file: filename.to_string(),
70                    line: line_num + 1,
71                });
72            }
73        }
74
75        // Enum definitions
76        if trimmed.starts_with("pub enum ") || trimmed.starts_with("enum ") {
77            if let Some(name) = extract_identifier(trimmed, "enum ") {
78                elements.push(CodeElement {
79                    name,
80                    kind: CodeElementKind::Enum,
81                    file: filename.to_string(),
82                    line: line_num + 1,
83                });
84            }
85        }
86
87        // Trait definitions
88        if trimmed.starts_with("pub trait ") || trimmed.starts_with("trait ") {
89            if let Some(name) = extract_identifier(trimmed, "trait ") {
90                elements.push(CodeElement {
91                    name,
92                    kind: CodeElementKind::Trait,
93                    file: filename.to_string(),
94                    line: line_num + 1,
95                });
96            }
97        }
98
99        // Impl blocks
100        if trimmed.starts_with("impl ") || trimmed.starts_with("impl<") {
101            if let Some(name) = extract_impl_name(trimmed) {
102                elements.push(CodeElement {
103                    name,
104                    kind: CodeElementKind::Impl,
105                    file: filename.to_string(),
106                    line: line_num + 1,
107                });
108            }
109        }
110
111        // Use statements
112        if trimmed.starts_with("use ") || trimmed.starts_with("pub use ") {
113            if let Some(name) = extract_use_path(trimmed) {
114                elements.push(CodeElement {
115                    name,
116                    kind: CodeElementKind::Use,
117                    file: filename.to_string(),
118                    line: line_num + 1,
119                });
120            }
121        }
122
123        // Module declarations
124        if trimmed.starts_with("pub mod ") || trimmed.starts_with("mod ") {
125            if let Some(name) = extract_identifier(trimmed, "mod ") {
126                elements.push(CodeElement {
127                    name,
128                    kind: CodeElementKind::Module,
129                    file: filename.to_string(),
130                    line: line_num + 1,
131                });
132            }
133        }
134    }
135
136    elements
137}
138
139/// Extract identifier after a keyword like "fn ", "struct ", etc.
140fn extract_identifier(line: &str, keyword: &str) -> Option<String> {
141    let rest = line.split(keyword).nth(1)?;
142    let name: String = rest.chars()
143        .take_while(|c| c.is_alphanumeric() || *c == '_')
144        .collect();
145    if name.is_empty() { None } else { Some(name) }
146}
147
148/// Extract type name from an impl block.
149fn extract_impl_name(line: &str) -> Option<String> {
150    // Handle "impl Foo", "impl<T> Foo", "impl Trait for Foo"
151    let parts: Vec<&str> = line.split_whitespace().collect();
152    if parts.len() >= 2 {
153        // Check for "for" keyword indicating trait impl
154        if let Some(for_idx) = parts.iter().position(|&p| p == "for") {
155            if for_idx + 1 < parts.len() {
156                let name: String = parts[for_idx + 1].chars()
157                    .take_while(|c| c.is_alphanumeric() || *c == '_')
158                    .collect();
159                return if name.is_empty() { None } else { Some(name) };
160            }
161        }
162        // Simple impl: "impl Foo"
163        let type_part = parts[1];
164        let name: String = type_part.chars()
165            .skip_while(|c| *c == '<' || *c == '>')
166            .take_while(|c| c.is_alphanumeric() || *c == '_')
167            .collect();
168        if !name.is_empty() { return Some(name); }
169    }
170    None
171}
172
173/// Extract the last segment of a use path.
174fn extract_use_path(line: &str) -> Option<String> {
175    let rest = line.split("use ").nth(1)?;
176    let path = rest.trim_end_matches(';').trim();
177    let last = path.rsplit("::").next()?;
178    let name: String = last.chars()
179        .take_while(|c| c.is_alphanumeric() || *c == '_')
180        .collect();
181    if name.is_empty() || name == "*" { None } else { Some(name) }
182}
183
184/// Generate a document string from code elements for colony ingestion.
185pub fn elements_to_document(elements: &[CodeElement], filename: &str) -> String {
186    let mut doc = format!("Source file: {}. ", filename);
187    for elem in elements {
188        doc.push_str(&format!("{} {} defined at line {}. ", elem.kind.as_str(), elem.name, elem.line));
189    }
190    doc
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    #[test]
198    fn extract_functions() {
199        let source = "pub fn hello_world() {\n}\nfn private_fn() {}";
200        let elements = extract_code_elements(source, "test.rs");
201        let fns: Vec<_> = elements.iter()
202            .filter(|e| e.kind == CodeElementKind::Function)
203            .collect();
204        assert_eq!(fns.len(), 2);
205        assert_eq!(fns[0].name, "hello_world");
206    }
207
208    #[test]
209    fn extract_structs_and_enums() {
210        let source = "pub struct Foo {}\nenum Bar {}";
211        let elements = extract_code_elements(source, "test.rs");
212        assert_eq!(elements.len(), 2);
213    }
214
215    #[test]
216    fn extract_impl_blocks() {
217        let source = "impl Foo {\n}\nimpl Display for Bar {}\nimpl<T> Clone for Baz<T> {}";
218        let elements = extract_code_elements(source, "test.rs");
219        let impls: Vec<_> = elements.iter()
220            .filter(|e| e.kind == CodeElementKind::Impl)
221            .collect();
222        assert!(impls.len() >= 2);
223    }
224}