Skip to main content

portalis_ingest/
lib.rs

1//! Ingest Agent - Python AST Parser
2//!
3//! Parses Python source code into a Portalis AST representation.
4//! Following London School TDD with outside-in development.
5
6mod enhanced_parser;
7pub mod project_parser;
8
9use async_trait::async_trait;
10use enhanced_parser::EnhancedParser;
11use portalis_core::{Agent, AgentCapability, AgentId, ArtifactMetadata, Error, Result};
12use serde::{Deserialize, Serialize};
13use std::path::PathBuf;
14
15// Re-export for external use
16pub use project_parser::{ProjectParser, PythonProject, PythonModule};
17
18/// Input for the Ingest Agent
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct IngestInput {
21    pub source_path: PathBuf,
22    pub source_code: String,
23}
24
25/// Output from the Ingest Agent
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct IngestOutput {
28    pub ast: PythonAst,
29    pub metadata: ArtifactMetadata,
30}
31
32/// Simplified Python AST representation
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct PythonAst {
35    pub functions: Vec<PythonFunction>,
36    pub classes: Vec<PythonClass>,
37    pub imports: Vec<PythonImport>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct PythonFunction {
42    pub name: String,
43    pub params: Vec<PythonParameter>,
44    pub return_type: Option<String>,
45    pub body: String,
46    pub decorators: Vec<String>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PythonParameter {
51    pub name: String,
52    pub type_hint: Option<String>,
53    pub default: Option<String>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct PythonClass {
58    pub name: String,
59    pub bases: Vec<String>,
60    pub methods: Vec<PythonFunction>,
61    pub attributes: Vec<PythonAttribute>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct PythonAttribute {
66    pub name: String,
67    pub type_hint: Option<String>,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct PythonImport {
72    pub module: String,
73    pub items: Vec<String>,
74    pub alias: Option<String>,
75}
76
77/// Ingest Agent implementation
78pub struct IngestAgent {
79    id: AgentId,
80    parser: EnhancedParser,
81    fallback_regex: bool, // Use regex parser as fallback
82}
83
84impl IngestAgent {
85    pub fn new() -> Self {
86        Self {
87            id: AgentId::new(),
88            parser: EnhancedParser::new(),
89            fallback_regex: false,
90        }
91    }
92
93    /// Enable regex fallback mode for testing
94    pub fn with_regex_fallback(mut self) -> Self {
95        self.fallback_regex = true;
96        self
97    }
98
99    /// Parse Python source using enhanced rustpython-parser
100    fn parse_python(&self, source: &str) -> Result<PythonAst> {
101        // Try enhanced parser first
102        match self.parser.parse(source) {
103            Ok(ast) => Ok(ast),
104            Err(e) if self.fallback_regex => {
105                tracing::warn!("Enhanced parser failed, falling back to regex: {}", e);
106                self.parse_python_regex(source)
107            }
108            Err(e) => Err(e),
109        }
110    }
111
112    /// Simple Python parser (proof-of-concept fallback)
113    /// For MVP, we'll use regex-based parsing for simple functions
114    fn parse_python_regex(&self, source: &str) -> Result<PythonAst> {
115        let mut ast = PythonAst {
116            functions: Vec::new(),
117            classes: Vec::new(),
118            imports: Vec::new(),
119        };
120
121        // Parse imports
122        for line in source.lines() {
123            let trimmed = line.trim();
124            if trimmed.starts_with("import ") || trimmed.starts_with("from ") {
125                ast.imports.push(self.parse_import(trimmed)?);
126            }
127        }
128
129        // Parse functions (simple regex-based for POC)
130        let func_regex = regex::Regex::new(r"def\s+(\w+)\s*\((.*?)\)\s*(?:->\s*(\w+))?\s*:")
131            .map_err(|e| Error::Parse(e.to_string()))?;
132
133        for cap in func_regex.captures_iter(source) {
134            let name = cap.get(1).map(|m| m.as_str().to_string()).unwrap_or_default();
135            let params_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
136            let return_type = cap.get(3).map(|m| m.as_str().to_string());
137
138            let params = self.parse_parameters(params_str)?;
139
140            ast.functions.push(PythonFunction {
141                name,
142                params,
143                return_type,
144                body: String::new(), // Simplified for POC
145                decorators: Vec::new(),
146            });
147        }
148
149        Ok(ast)
150    }
151
152    fn parse_import(&self, line: &str) -> Result<PythonImport> {
153        if line.starts_with("import ") {
154            let module = line.strip_prefix("import ")
155                .unwrap_or("")
156                .split_whitespace()
157                .next()
158                .unwrap_or("")
159                .to_string();
160
161            Ok(PythonImport {
162                module,
163                items: Vec::new(),
164                alias: None,
165            })
166        } else {
167            // from X import Y
168            Ok(PythonImport {
169                module: "unknown".to_string(),
170                items: Vec::new(),
171                alias: None,
172            })
173        }
174    }
175
176    fn parse_parameters(&self, params_str: &str) -> Result<Vec<PythonParameter>> {
177        let mut params = Vec::new();
178
179        for param in params_str.split(',') {
180            let trimmed = param.trim();
181            if trimmed.is_empty() {
182                continue;
183            }
184
185            // Simple parsing: name or name: type
186            let parts: Vec<&str> = trimmed.split(':').collect();
187            let name = parts[0].trim().to_string();
188            let type_hint = parts.get(1).map(|t| t.trim().to_string());
189
190            params.push(PythonParameter {
191                name,
192                type_hint,
193                default: None,
194            });
195        }
196
197        Ok(params)
198    }
199}
200
201impl Default for IngestAgent {
202    fn default() -> Self {
203        Self::new()
204    }
205}
206
207#[async_trait]
208impl Agent for IngestAgent {
209    type Input = IngestInput;
210    type Output = IngestOutput;
211
212    async fn execute(&self, input: Self::Input) -> Result<Self::Output> {
213        tracing::info!("Ingesting Python source from {:?}", input.source_path);
214
215        let ast = self.parse_python(&input.source_code)?;
216
217        let metadata = ArtifactMetadata::new(self.name())
218            .with_tag("source", input.source_path.display().to_string())
219            .with_tag("functions", ast.functions.len().to_string())
220            .with_tag("classes", ast.classes.len().to_string());
221
222        Ok(IngestOutput { ast, metadata })
223    }
224
225    fn id(&self) -> AgentId {
226        self.id
227    }
228
229    fn name(&self) -> &str {
230        "IngestAgent"
231    }
232
233    fn capabilities(&self) -> Vec<AgentCapability> {
234        vec![AgentCapability::Parsing]
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[tokio::test]
243    async fn test_parse_simple_function() {
244        let agent = IngestAgent::new();
245        let source = r#"
246def add(a: int, b: int) -> int:
247    return a + b
248"#;
249
250        let input = IngestInput {
251            source_path: PathBuf::from("test.py"),
252            source_code: source.to_string(),
253        };
254
255        let output = agent.execute(input).await.unwrap();
256        assert_eq!(output.ast.functions.len(), 1);
257        assert_eq!(output.ast.functions[0].name, "add");
258        assert_eq!(output.ast.functions[0].params.len(), 2);
259        assert_eq!(output.ast.functions[0].return_type, Some("int".to_string()));
260    }
261
262    #[tokio::test]
263    async fn test_parse_function_without_types() {
264        let agent = IngestAgent::new();
265        let source = r#"
266def multiply(x, y):
267    return x * y
268"#;
269
270        let input = IngestInput {
271            source_path: PathBuf::from("test.py"),
272            source_code: source.to_string(),
273        };
274
275        let output = agent.execute(input).await.unwrap();
276        assert_eq!(output.ast.functions.len(), 1);
277        assert_eq!(output.ast.functions[0].name, "multiply");
278        assert_eq!(output.ast.functions[0].return_type, None);
279    }
280
281    #[tokio::test]
282    async fn test_parse_imports() {
283        let agent = IngestAgent::new();
284        let source = r#"
285import sys
286import os
287
288def main():
289    pass
290"#;
291
292        let input = IngestInput {
293            source_path: PathBuf::from("test.py"),
294            source_code: source.to_string(),
295        };
296
297        let output = agent.execute(input).await.unwrap();
298        assert_eq!(output.ast.imports.len(), 2);
299    }
300
301    #[tokio::test]
302    async fn test_parse_empty_file() {
303        let agent = IngestAgent::new();
304
305        let input = IngestInput {
306            source_path: PathBuf::from("empty.py"),
307            source_code: "".to_string(),
308        };
309
310        let output = agent.execute(input).await.unwrap();
311        assert_eq!(output.ast.functions.len(), 0);
312        assert_eq!(output.ast.classes.len(), 0);
313        assert_eq!(output.ast.imports.len(), 0);
314    }
315
316    #[tokio::test]
317    async fn test_multiple_parameters() {
318        let agent = IngestAgent::new();
319        let source = r#"
320def process(a: int, b: str, c: float, d: bool) -> bool:
321    return True
322"#;
323
324        let input = IngestInput {
325            source_path: PathBuf::from("test.py"),
326            source_code: source.to_string(),
327        };
328
329        let output = agent.execute(input).await.unwrap();
330        assert_eq!(output.ast.functions[0].params.len(), 4);
331        assert_eq!(output.ast.functions[0].params[0].name, "a");
332        assert_eq!(output.ast.functions[0].params[1].name, "b");
333    }
334
335    #[test]
336    fn test_agent_capabilities() {
337        let agent = IngestAgent::new();
338        assert_eq!(agent.capabilities(), vec![AgentCapability::Parsing]);
339    }
340}