1mod enhanced_parser;
7pub mod project_parser;
8
9use async_trait::async_trait;
10use enhanced_parser::EnhancedParser;
11use portalis_core::{Agent, AgentCapability, AgentId, ArtifactMetadata, Error, Result};
12use serde::{Deserialize, Serialize};
13use std::path::PathBuf;
14
15pub use project_parser::{ProjectParser, PythonProject, PythonModule};
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct IngestInput {
21 pub source_path: PathBuf,
22 pub source_code: String,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct IngestOutput {
28 pub ast: PythonAst,
29 pub metadata: ArtifactMetadata,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct PythonAst {
35 pub functions: Vec<PythonFunction>,
36 pub classes: Vec<PythonClass>,
37 pub imports: Vec<PythonImport>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct PythonFunction {
42 pub name: String,
43 pub params: Vec<PythonParameter>,
44 pub return_type: Option<String>,
45 pub body: String,
46 pub decorators: Vec<String>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PythonParameter {
51 pub name: String,
52 pub type_hint: Option<String>,
53 pub default: Option<String>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct PythonClass {
58 pub name: String,
59 pub bases: Vec<String>,
60 pub methods: Vec<PythonFunction>,
61 pub attributes: Vec<PythonAttribute>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct PythonAttribute {
66 pub name: String,
67 pub type_hint: Option<String>,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct PythonImport {
72 pub module: String,
73 pub items: Vec<String>,
74 pub alias: Option<String>,
75}
76
77pub struct IngestAgent {
79 id: AgentId,
80 parser: EnhancedParser,
81 fallback_regex: bool, }
83
84impl IngestAgent {
85 pub fn new() -> Self {
86 Self {
87 id: AgentId::new(),
88 parser: EnhancedParser::new(),
89 fallback_regex: false,
90 }
91 }
92
93 pub fn with_regex_fallback(mut self) -> Self {
95 self.fallback_regex = true;
96 self
97 }
98
99 fn parse_python(&self, source: &str) -> Result<PythonAst> {
101 match self.parser.parse(source) {
103 Ok(ast) => Ok(ast),
104 Err(e) if self.fallback_regex => {
105 tracing::warn!("Enhanced parser failed, falling back to regex: {}", e);
106 self.parse_python_regex(source)
107 }
108 Err(e) => Err(e),
109 }
110 }
111
112 fn parse_python_regex(&self, source: &str) -> Result<PythonAst> {
115 let mut ast = PythonAst {
116 functions: Vec::new(),
117 classes: Vec::new(),
118 imports: Vec::new(),
119 };
120
121 for line in source.lines() {
123 let trimmed = line.trim();
124 if trimmed.starts_with("import ") || trimmed.starts_with("from ") {
125 ast.imports.push(self.parse_import(trimmed)?);
126 }
127 }
128
129 let func_regex = regex::Regex::new(r"def\s+(\w+)\s*\((.*?)\)\s*(?:->\s*(\w+))?\s*:")
131 .map_err(|e| Error::Parse(e.to_string()))?;
132
133 for cap in func_regex.captures_iter(source) {
134 let name = cap.get(1).map(|m| m.as_str().to_string()).unwrap_or_default();
135 let params_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
136 let return_type = cap.get(3).map(|m| m.as_str().to_string());
137
138 let params = self.parse_parameters(params_str)?;
139
140 ast.functions.push(PythonFunction {
141 name,
142 params,
143 return_type,
144 body: String::new(), decorators: Vec::new(),
146 });
147 }
148
149 Ok(ast)
150 }
151
152 fn parse_import(&self, line: &str) -> Result<PythonImport> {
153 if line.starts_with("import ") {
154 let module = line.strip_prefix("import ")
155 .unwrap_or("")
156 .split_whitespace()
157 .next()
158 .unwrap_or("")
159 .to_string();
160
161 Ok(PythonImport {
162 module,
163 items: Vec::new(),
164 alias: None,
165 })
166 } else {
167 Ok(PythonImport {
169 module: "unknown".to_string(),
170 items: Vec::new(),
171 alias: None,
172 })
173 }
174 }
175
176 fn parse_parameters(&self, params_str: &str) -> Result<Vec<PythonParameter>> {
177 let mut params = Vec::new();
178
179 for param in params_str.split(',') {
180 let trimmed = param.trim();
181 if trimmed.is_empty() {
182 continue;
183 }
184
185 let parts: Vec<&str> = trimmed.split(':').collect();
187 let name = parts[0].trim().to_string();
188 let type_hint = parts.get(1).map(|t| t.trim().to_string());
189
190 params.push(PythonParameter {
191 name,
192 type_hint,
193 default: None,
194 });
195 }
196
197 Ok(params)
198 }
199}
200
201impl Default for IngestAgent {
202 fn default() -> Self {
203 Self::new()
204 }
205}
206
207#[async_trait]
208impl Agent for IngestAgent {
209 type Input = IngestInput;
210 type Output = IngestOutput;
211
212 async fn execute(&self, input: Self::Input) -> Result<Self::Output> {
213 tracing::info!("Ingesting Python source from {:?}", input.source_path);
214
215 let ast = self.parse_python(&input.source_code)?;
216
217 let metadata = ArtifactMetadata::new(self.name())
218 .with_tag("source", input.source_path.display().to_string())
219 .with_tag("functions", ast.functions.len().to_string())
220 .with_tag("classes", ast.classes.len().to_string());
221
222 Ok(IngestOutput { ast, metadata })
223 }
224
225 fn id(&self) -> AgentId {
226 self.id
227 }
228
229 fn name(&self) -> &str {
230 "IngestAgent"
231 }
232
233 fn capabilities(&self) -> Vec<AgentCapability> {
234 vec![AgentCapability::Parsing]
235 }
236}
237
238#[cfg(test)]
239mod tests {
240 use super::*;
241
242 #[tokio::test]
243 async fn test_parse_simple_function() {
244 let agent = IngestAgent::new();
245 let source = r#"
246def add(a: int, b: int) -> int:
247 return a + b
248"#;
249
250 let input = IngestInput {
251 source_path: PathBuf::from("test.py"),
252 source_code: source.to_string(),
253 };
254
255 let output = agent.execute(input).await.unwrap();
256 assert_eq!(output.ast.functions.len(), 1);
257 assert_eq!(output.ast.functions[0].name, "add");
258 assert_eq!(output.ast.functions[0].params.len(), 2);
259 assert_eq!(output.ast.functions[0].return_type, Some("int".to_string()));
260 }
261
262 #[tokio::test]
263 async fn test_parse_function_without_types() {
264 let agent = IngestAgent::new();
265 let source = r#"
266def multiply(x, y):
267 return x * y
268"#;
269
270 let input = IngestInput {
271 source_path: PathBuf::from("test.py"),
272 source_code: source.to_string(),
273 };
274
275 let output = agent.execute(input).await.unwrap();
276 assert_eq!(output.ast.functions.len(), 1);
277 assert_eq!(output.ast.functions[0].name, "multiply");
278 assert_eq!(output.ast.functions[0].return_type, None);
279 }
280
281 #[tokio::test]
282 async fn test_parse_imports() {
283 let agent = IngestAgent::new();
284 let source = r#"
285import sys
286import os
287
288def main():
289 pass
290"#;
291
292 let input = IngestInput {
293 source_path: PathBuf::from("test.py"),
294 source_code: source.to_string(),
295 };
296
297 let output = agent.execute(input).await.unwrap();
298 assert_eq!(output.ast.imports.len(), 2);
299 }
300
301 #[tokio::test]
302 async fn test_parse_empty_file() {
303 let agent = IngestAgent::new();
304
305 let input = IngestInput {
306 source_path: PathBuf::from("empty.py"),
307 source_code: "".to_string(),
308 };
309
310 let output = agent.execute(input).await.unwrap();
311 assert_eq!(output.ast.functions.len(), 0);
312 assert_eq!(output.ast.classes.len(), 0);
313 assert_eq!(output.ast.imports.len(), 0);
314 }
315
316 #[tokio::test]
317 async fn test_multiple_parameters() {
318 let agent = IngestAgent::new();
319 let source = r#"
320def process(a: int, b: str, c: float, d: bool) -> bool:
321 return True
322"#;
323
324 let input = IngestInput {
325 source_path: PathBuf::from("test.py"),
326 source_code: source.to_string(),
327 };
328
329 let output = agent.execute(input).await.unwrap();
330 assert_eq!(output.ast.functions[0].params.len(), 4);
331 assert_eq!(output.ast.functions[0].params[0].name, "a");
332 assert_eq!(output.ast.functions[0].params[1].name, "b");
333 }
334
335 #[test]
336 fn test_agent_capabilities() {
337 let agent = IngestAgent::new();
338 assert_eq!(agent.capabilities(), vec![AgentCapability::Parsing]);
339 }
340}