amalgam_parser/
go_ast.rs

1//! Go AST parsing for precise type extraction
2
3use crate::{imports::TypeReference, ParserError};
4use amalgam_core::types::{Field, Type};
5use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
6use reqwest;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashMap};
9use std::process::Command;
10use std::sync::Arc;
11use std::time::Duration;
12
13/// Go AST parser that uses go/ast to extract precise type information
14pub struct GoASTParser {
15    client: reqwest::Client,
16    /// Cache of parsed Go types by fully qualified name
17    type_cache: HashMap<String, GoTypeInfo>,
18    multi_progress: Arc<MultiProgress>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct GoTypeInfo {
23    pub name: String,
24    pub package_path: String,
25    pub fields: Vec<GoField>,
26    pub documentation: Option<String>,
27    pub type_kind: GoTypeKind,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct GoField {
32    pub name: String,
33    pub json_name: Option<String>, // From json tags
34    pub go_type: String,           // Fully qualified Go type
35    pub documentation: Option<String>,
36    pub tags: HashMap<String, String>,
37    pub is_pointer: bool,
38    pub is_optional: bool, // Based on omitempty tag
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub enum GoTypeKind {
43    Struct,
44    Interface,
45    Alias,
46    Basic,
47}
48
49impl Default for GoASTParser {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55impl GoASTParser {
56    pub fn new() -> Self {
57        Self {
58            client: reqwest::Client::builder()
59                .timeout(Duration::from_secs(30))
60                .user_agent("amalgam")
61                .build()
62                .unwrap(),
63            type_cache: HashMap::new(),
64            multi_progress: Arc::new(MultiProgress::new()),
65        }
66    }
67
68    /// Fetch and parse Go source files from a repository
69    pub async fn fetch_and_parse_repository(
70        &mut self,
71        repo_url: &str,
72        paths: &[&str],
73    ) -> Result<(), ParserError> {
74        let is_tty = atty::is(atty::Stream::Stdout);
75
76        let main_spinner = if is_tty {
77            let pb = self.multi_progress.add(ProgressBar::new_spinner());
78            pb.set_style(
79                ProgressStyle::default_spinner()
80                    .template("{spinner:.cyan} {msg}")
81                    .unwrap()
82                    .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
83            );
84            pb.enable_steady_tick(Duration::from_millis(100));
85            pb.set_message(format!("Parsing Go repository: {}", repo_url));
86            Some(pb)
87        } else {
88            println!("Parsing Go repository: {}", repo_url);
89            None
90        };
91
92        for path in paths {
93            if let Some(ref pb) = main_spinner {
94                pb.set_message(format!("Fetching Go files from {}", path));
95            }
96
97            let go_files = self.fetch_go_files(repo_url, path).await?;
98
99            if let Some(ref pb) = main_spinner {
100                pb.set_message(format!("Parsing {} Go files", go_files.len()));
101            }
102
103            self.parse_go_files(&go_files).await?;
104        }
105
106        if let Some(pb) = main_spinner {
107            pb.finish_with_message(format!("✓ Parsed {} types", self.type_cache.len()));
108        } else {
109            println!("Parsed {} types", self.type_cache.len());
110        }
111
112        Ok(())
113    }
114
115    /// Fetch Go files from a specific path in a repository
116    async fn fetch_go_files(
117        &self,
118        repo_url: &str,
119        path: &str,
120    ) -> Result<Vec<GoSourceFile>, ParserError> {
121        // Convert GitHub URL to API format
122        let api_url = self.github_url_to_api(repo_url, path)?;
123
124        let response = self
125            .client
126            .get(&api_url)
127            .header("User-Agent", "amalgam")
128            .send()
129            .await
130            .map_err(|e| ParserError::Network(e.to_string()))?;
131
132        if !response.status().is_success() {
133            return Err(ParserError::Network(format!(
134                "Failed to fetch Go files: {}",
135                response.status()
136            )));
137        }
138
139        let files: Vec<GitHubFile> = response
140            .json()
141            .await
142            .map_err(|e| ParserError::Parse(e.to_string()))?;
143
144        let mut go_files = Vec::new();
145        for file in files {
146            if file.name.ends_with(".go") && file.file_type == "file" {
147                let content = self.fetch_file_content(&file.download_url).await?;
148                go_files.push(GoSourceFile {
149                    name: file.name,
150                    _path: file.path,
151                    content,
152                });
153            }
154        }
155
156        Ok(go_files)
157    }
158
159    fn github_url_to_api(&self, repo_url: &str, path: &str) -> Result<String, ParserError> {
160        // Convert https://github.com/kubernetes/api/tree/master/core/v1
161        // to https://api.github.com/repos/kubernetes/api/contents/core/v1
162
163        if let Some(github_part) = repo_url.strip_prefix("https://github.com/") {
164            let parts: Vec<&str> = github_part.split("/tree/").collect();
165            if parts.len() == 2 {
166                let repo = parts[0];
167                let branch_and_path = parts[1];
168                let path_parts: Vec<&str> = branch_and_path.splitn(2, '/').collect();
169
170                let base_path = if path_parts.len() > 1 {
171                    format!("{}/{}", path_parts[1], path)
172                } else {
173                    path.to_string()
174                };
175
176                return Ok(format!(
177                    "https://api.github.com/repos/{}/contents/{}",
178                    repo, base_path
179                ));
180            }
181        }
182
183        Err(ParserError::Parse(format!(
184            "Invalid GitHub URL: {}",
185            repo_url
186        )))
187    }
188
189    async fn fetch_file_content(&self, url: &str) -> Result<String, ParserError> {
190        let response = self
191            .client
192            .get(url)
193            .header("User-Agent", "amalgam")
194            .send()
195            .await
196            .map_err(|e| ParserError::Network(e.to_string()))?;
197
198        response
199            .text()
200            .await
201            .map_err(|e| ParserError::Parse(e.to_string()))
202    }
203
204    /// Parse Go source files using a Go script
205    async fn parse_go_files(&mut self, files: &[GoSourceFile]) -> Result<(), ParserError> {
206        // Create a temporary directory with the Go files
207        let temp_dir = tempfile::tempdir().map_err(ParserError::Io)?;
208
209        // Write files to temp directory
210        for file in files {
211            let file_path = temp_dir.path().join(&file.name);
212            tokio::fs::write(&file_path, &file.content)
213                .await
214                .map_err(ParserError::Io)?;
215        }
216
217        // Create a Go parser script
218        let parser_script = self.create_go_parser_script()?;
219        let script_path = temp_dir.path().join("parser.go");
220        tokio::fs::write(&script_path, parser_script)
221            .await
222            .map_err(ParserError::Io)?;
223
224        // Run the Go parser (still synchronous since it's a subprocess)
225        let output = tokio::task::spawn_blocking({
226            let dir = temp_dir.path().to_path_buf();
227            move || {
228                Command::new("go")
229                    .args(["run", "parser.go"])
230                    .current_dir(dir)
231                    .output()
232            }
233        })
234        .await
235        .map_err(|e| ParserError::Parse(format!("Failed to spawn go parser: {}", e)))?
236        .map_err(|e| ParserError::Parse(format!("Failed to run go parser: {}", e)))?;
237
238        if !output.status.success() {
239            return Err(ParserError::Parse(format!(
240                "Go parser failed: {}",
241                String::from_utf8_lossy(&output.stderr)
242            )));
243        }
244
245        // Parse the JSON output
246        let json_output = String::from_utf8_lossy(&output.stdout);
247        let type_infos: Vec<GoTypeInfo> = serde_json::from_str(&json_output)
248            .map_err(|e| ParserError::Parse(format!("Failed to parse JSON: {}", e)))?;
249
250        // Cache the type information
251        for type_info in type_infos {
252            let qualified_name = format!("{}.{}", type_info.package_path, type_info.name);
253            self.type_cache.insert(qualified_name, type_info);
254        }
255
256        Ok(())
257    }
258
259    /// Create a Go script that uses go/ast to extract type information
260    fn create_go_parser_script(&self) -> Result<String, ParserError> {
261        Ok(r#"
262package main
263
264import (
265    "encoding/json"
266    "fmt"
267    "go/ast"
268    "go/parser"
269    "go/token"
270    "os"
271    "path/filepath"
272    "reflect"
273    "strings"
274)
275
276type GoTypeInfo struct {
277    Name        string            `json:"name"`
278    PackagePath string            `json:"package_path"`
279    Fields      []GoField         `json:"fields"`
280    Documentation *string         `json:"documentation"`
281    TypeKind    string            `json:"type_kind"`
282}
283
284type GoField struct {
285    Name         string            `json:"name"`
286    JsonName     *string           `json:"json_name"`
287    GoType       string            `json:"go_type"`
288    Documentation *string          `json:"documentation"`
289    Tags         map[string]string `json:"tags"`
290    IsPointer    bool              `json:"is_pointer"`
291    IsOptional   bool              `json:"is_optional"`
292}
293
294func main() {
295    fset := token.NewFileSet()
296    var allTypes []GoTypeInfo
297    
298    err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
299        if err != nil {
300            return err
301        }
302        
303        if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "parser.go") {
304            return nil
305        }
306        
307        node, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
308        if err != nil {
309            return err
310        }
311        
312        packagePath := node.Name.Name // This would need proper module resolution
313        
314        ast.Inspect(node, func(n ast.Node) bool {
315            switch x := n.(type) {
316            case *ast.TypeSpec:
317                if structType, ok := x.Type.(*ast.StructType); ok {
318                    typeInfo := extractStructInfo(x, structType, packagePath, node)
319                    allTypes = append(allTypes, typeInfo)
320                }
321            }
322            return true
323        })
324        
325        return nil
326    })
327    
328    if err != nil {
329        fmt.Fprintf(os.Stderr, "Error: %v\n", err)
330        os.Exit(1)
331    }
332    
333    jsonData, err := json.MarshalIndent(allTypes, "", "  ")
334    if err != nil {
335        fmt.Fprintf(os.Stderr, "JSON error: %v\n", err)
336        os.Exit(1)
337    }
338    
339    fmt.Print(string(jsonData))
340}
341
342func extractStructInfo(typeSpec *ast.TypeSpec, structType *ast.StructType, packagePath string, file *ast.File) GoTypeInfo {
343    var fields []GoField
344    
345    for _, field := range structType.Fields.List {
346        for _, name := range field.Names {
347            fieldInfo := GoField{
348                Name:      name.Name,
349                GoType:    typeToString(field.Type),
350                Tags:      make(map[string]string),
351                IsPointer: isPointerType(field.Type),
352            }
353            
354            // Extract tags
355            if field.Tag != nil {
356                tagStr := strings.Trim(field.Tag.Value, "`")
357                tags := reflect.StructTag(tagStr)
358                
359                if jsonTag := tags.Get("json"); jsonTag != "" {
360                    parts := strings.Split(jsonTag, ",")
361                    if len(parts) > 0 && parts[0] != "" && parts[0] != "-" {
362                        fieldInfo.JsonName = &parts[0]
363                    }
364                    
365                    // Check for omitempty
366                    for _, part := range parts[1:] {
367                        if part == "omitempty" {
368                            fieldInfo.IsOptional = true
369                        }
370                    }
371                }
372                
373                fieldInfo.Tags["json"] = tags.Get("json")
374                fieldInfo.Tags["yaml"] = tags.Get("yaml")
375            }
376            
377            // Extract documentation
378            if field.Doc != nil {
379                doc := strings.TrimSpace(field.Doc.Text())
380                if doc != "" {
381                    fieldInfo.Documentation = &doc
382                }
383            }
384            
385            fields = append(fields, fieldInfo)
386        }
387    }
388    
389    var doc *string
390    if typeSpec.Doc != nil {
391        docText := strings.TrimSpace(typeSpec.Doc.Text())
392        if docText != "" {
393            doc = &docText
394        }
395    }
396    
397    return GoTypeInfo{
398        Name:          typeSpec.Name.Name,
399        PackagePath:   packagePath,
400        Fields:        fields,
401        Documentation: doc,
402        TypeKind:      "Struct",
403    }
404}
405
406func typeToString(expr ast.Expr) string {
407    switch t := expr.(type) {
408    case *ast.Ident:
409        return t.Name
410    case *ast.StarExpr:
411        return "*" + typeToString(t.X)
412    case *ast.ArrayType:
413        return "[]" + typeToString(t.Elt)
414    case *ast.MapType:
415        return "map[" + typeToString(t.Key) + "]" + typeToString(t.Value)
416    case *ast.SelectorExpr:
417        return typeToString(t.X) + "." + t.Sel.Name
418    case *ast.InterfaceType:
419        return "interface{}"
420    default:
421        return "unknown"
422    }
423}
424
425func isPointerType(expr ast.Expr) bool {
426    _, ok := expr.(*ast.StarExpr)
427    return ok
428}
429"#.to_string())
430    }
431
432    /// Get type information for a fully qualified Go type
433    pub fn get_type_info(&self, qualified_name: &str) -> Option<&GoTypeInfo> {
434        self.type_cache.get(qualified_name)
435    }
436
437    /// Convert a Go type to Nickel type using precise AST information
438    pub fn go_type_to_nickel(&self, go_type_info: &GoTypeInfo) -> Result<Type, ParserError> {
439        let mut fields = BTreeMap::new();
440
441        for field in &go_type_info.fields {
442            let field_name = field.json_name.as_ref().unwrap_or(&field.name).to_string();
443
444            let field_type = self.go_type_string_to_nickel(&field.go_type)?;
445
446            // Apply pointer and optional semantics
447            let final_type = if field.is_pointer || field.is_optional {
448                Type::Optional(Box::new(field_type))
449            } else {
450                field_type
451            };
452
453            fields.insert(
454                field_name,
455                Field {
456                    ty: final_type,
457                    required: !field.is_optional && !field.is_pointer,
458                    description: field.documentation.clone(),
459                    default: None,
460                },
461            );
462        }
463
464        Ok(Type::Record {
465            fields,
466            open: false, // Go structs are closed by default
467        })
468    }
469
470    /// Convert a Go type string to Nickel type
471    #[allow(clippy::only_used_in_recursion)]
472    fn go_type_string_to_nickel(&self, go_type: &str) -> Result<Type, ParserError> {
473        match go_type {
474            "string" => Ok(Type::String),
475            "int" | "int8" | "int16" | "int32" | "int64" | "uint" | "uint8" | "uint16"
476            | "uint32" | "uint64" => Ok(Type::Integer),
477            "float32" | "float64" => Ok(Type::Number),
478            "bool" => Ok(Type::Bool),
479            "interface{}" => Ok(Type::Any),
480            s if s.starts_with("[]") => {
481                let elem_type = &s[2..];
482                let elem = self.go_type_string_to_nickel(elem_type)?;
483                Ok(Type::Array(Box::new(elem)))
484            }
485            s if s.starts_with("map[") => {
486                // Simple map handling - could be more sophisticated
487                Ok(Type::Map {
488                    key: Box::new(Type::String), // Most k8s maps are string-keyed
489                    value: Box::new(Type::Any),
490                })
491            }
492            s if s.starts_with("*") => {
493                // Pointer type - make it optional
494                let inner_type = &s[1..];
495                let inner = self.go_type_string_to_nickel(inner_type)?;
496                Ok(Type::Optional(Box::new(inner)))
497            }
498            // Handle qualified types (e.g., metav1.ObjectMeta)
499            s => Ok(Type::Reference(s.to_string())),
500        }
501    }
502
503    /// Parse specific Kubernetes types
504    pub async fn parse_k8s_core_types(
505        &mut self,
506    ) -> Result<HashMap<String, GoTypeInfo>, ParserError> {
507        // Parse core Kubernetes types from k8s.io/api and k8s.io/apimachinery
508        let repos_and_paths = vec![
509            (
510                "https://github.com/kubernetes/api/tree/master",
511                vec!["core/v1", "apps/v1", "networking/v1"],
512            ),
513            (
514                "https://github.com/kubernetes/apimachinery/tree/master",
515                vec!["pkg/apis/meta/v1", "pkg/util/intstr", "pkg/api/resource"],
516            ),
517        ];
518
519        for (repo, paths) in repos_and_paths {
520            self.fetch_and_parse_repository(repo, &paths).await?;
521        }
522
523        Ok(self.type_cache.clone())
524    }
525
526    /// Clear progress bars
527    pub fn finish(&self) {
528        self.multi_progress.clear().ok();
529    }
530}
531
532#[derive(Debug, Clone, Deserialize)]
533struct GitHubFile {
534    name: String,
535    path: String,
536    #[serde(rename = "type")]
537    file_type: String,
538    download_url: String,
539}
540
541#[derive(Debug, Clone)]
542struct GoSourceFile {
543    name: String,
544    _path: String,
545    content: String,
546}
547
548/// Known Kubernetes type mappings based on Go AST analysis
549pub fn create_k8s_type_registry() -> HashMap<String, TypeReference> {
550    let mut registry = HashMap::new();
551
552    // Core v1 types
553    let core_types = vec![
554        ("ObjectMeta", "k8s.io", "v1"),
555        ("TypeMeta", "k8s.io", "v1"),
556        ("ListMeta", "k8s.io", "v1"),
557        ("LabelSelector", "k8s.io", "v1"),
558        ("Volume", "k8s.io", "v1"),
559        ("VolumeMount", "k8s.io", "v1"),
560        ("Container", "k8s.io", "v1"),
561        ("PodSpec", "k8s.io", "v1"),
562        ("ResourceRequirements", "k8s.io", "v1"),
563        ("EnvVar", "k8s.io", "v1"),
564        ("ConfigMapKeySelector", "k8s.io", "v1"),
565        ("SecretKeySelector", "k8s.io", "v1"),
566    ];
567
568    for (kind, group, version) in core_types {
569        let go_name = format!("k8s.io/api/core/{}.{}", version, kind);
570        let type_ref = TypeReference::new(group.to_string(), version.to_string(), kind.to_string());
571        registry.insert(go_name, type_ref);
572    }
573
574    // Meta v1 types
575    let meta_types = vec![
576        ("ObjectMeta", "k8s.io", "v1"),
577        ("TypeMeta", "k8s.io", "v1"),
578        ("ListMeta", "k8s.io", "v1"),
579        ("LabelSelector", "k8s.io", "v1"),
580    ];
581
582    for (kind, group, version) in meta_types {
583        let go_name = format!("k8s.io/apimachinery/pkg/apis/meta/{}.{}", version, kind);
584        let type_ref = TypeReference::new(group.to_string(), version.to_string(), kind.to_string());
585        registry.insert(go_name, type_ref);
586    }
587
588    registry
589}