Skip to main content

project_map_cli_rust/core/
parser.rs

1use tree_sitter::{Parser, Query, QueryCursor};
2use streaming_iterator::StreamingIterator;
3use std::fs;
4use std::path::Path;
5use crate::error::{AppError, Result};
6use serde::{Serialize, Deserialize};
7
8#[derive(Debug, Serialize, Deserialize, Clone)]
9pub struct Symbol {
10    pub name: String,
11    pub kind: String,
12    pub line: usize,
13    pub start_byte: usize,
14    pub end_byte: usize,
15    pub docstring: Option<String>,
16}
17
18#[derive(Debug, Serialize, Deserialize)]
19pub struct FileOutline {
20    pub path: String,
21    pub language: String,
22    pub symbols: Vec<Symbol>,
23    pub imports: Vec<String>,
24}
25
26pub struct CodeParser {
27    parser: Parser,
28}
29
30impl CodeParser {
31    pub fn new() -> Self {
32        Self {
33            parser: Parser::new(),
34        }
35    }
36
37    pub fn parse_file(&mut self, path: &Path) -> Result<FileOutline> {
38        let extension = path.extension()
39            .and_then(|s| s.to_str())
40            .unwrap_or("");
41
42        let (language, ts_language) = match extension {
43            "py" => ("python", tree_sitter_python::LANGUAGE.into()),
44            "rs" => ("rust", tree_sitter_rust::LANGUAGE.into()),
45            "ts" => ("typescript", tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
46            "tsx" => ("typescript", tree_sitter_typescript::LANGUAGE_TSX.into()),
47            "kt" => ("kotlin", tree_sitter_kotlin_ng::LANGUAGE.into()),
48            "sql" => ("sql", tree_sitter_sequel::LANGUAGE.into()),
49            "vue" => ("vue", tree_sitter_vue_updated::language().into()),
50            "md" | "json" | "toml" | "yaml" | "yml" => {
51                let lang = match extension {
52                    "md" => "markdown",
53                    "json" => "json",
54                    "toml" => "toml",
55                    "yaml" | "yml" => "yaml",
56                    _ => "text",
57                };
58                return Ok(FileOutline {
59                    path: path.to_string_lossy().to_string(),
60                    language: lang.to_string(),
61                    symbols: Vec::new(),
62                    imports: Vec::new(),
63                });
64            }
65            _ => return Err(AppError::Parser(format!("Unsupported extension: {}", extension))),
66        };
67
68        self.parser.set_language(&ts_language)
69            .map_err(|e| AppError::Parser(format!("Failed to set language: {}", e)))?;
70
71        let content = fs::read_to_string(path)?;
72        let tree = self.parser.parse(&content, None)
73            .ok_or_else(|| AppError::Parser("Failed to parse file".to_string()))?;
74
75        let query_str = match language {
76            "python" => "((class_definition name: (identifier) @name) @class)
77                         ((function_definition name: (identifier) @name) @function)
78                         (import_statement (dotted_name) @import)
79                         (import_from_statement module_name: (dotted_name) @import)
80                         (expression_statement (string) @doc)",
81            "rust" => "((struct_item name: (type_identifier) @name) @struct)
82                       ((enum_item name: (type_identifier) @name) @enum)
83                       ((function_item name: (identifier) @name) @function)
84                       ((trait_item name: (type_identifier) @name) @trait)
85                       ((impl_item type: (_) @name) @impl)
86                       ((line_comment) @doc (#match? @doc \"^///\"))
87                       ((block_comment) @doc (#match? @doc \"^/\\\\*\\\\*\"))",
88            "typescript" => "((class_declaration name: (type_identifier) @name) @class)
89                             ((function_declaration name: (identifier) @name) @function)
90                             ((generator_function_declaration name: (identifier) @name) @function)
91                             ((interface_declaration name: (type_identifier) @name) @interface)
92                             ((type_alias_declaration name: (type_identifier) @name) @type)
93                             ((enum_declaration name: (identifier) @name) @enum)
94                             ((method_definition name: (property_identifier) @name) @function)
95                             ((variable_declarator name: (identifier) @name value: (arrow_function)) @function)
96                             ((variable_declarator name: (identifier) @name value: (function_expression)) @function)
97                             ((variable_declarator name: (identifier) @name) @variable)
98                             (internal_module name: (identifier) @name) @module
99                             (import_statement source: (string (string_fragment) @import))
100                             (export_statement source: (string (string_fragment) @import))
101                             (export_statement (export_clause (export_specifier name: (identifier) @name)) @export)
102                             (comment) @doc",
103            "kotlin" => "((class_declaration name: (identifier) @name) @class)
104                         ((object_declaration name: (identifier) @name) @class)
105                         ((companion_object name: (identifier) @name) @class)
106                         ((function_declaration name: (identifier) @name) @function)
107                         (import (qualified_identifier) @import)
108                         (line_comment) @doc
109                         (block_comment) @doc",
110            "sql" => "((identifier) @name) @symbol",
111            "vue" => "((tag_name) @name) @component",
112            _ => unreachable!(),
113        };
114
115        let query = Query::new(&ts_language, query_str)
116            .map_err(|e| AppError::Parser(format!("Failed to create query: {}", e)))?;
117        
118        let mut cursor = QueryCursor::new();
119        let mut matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
120
121        let mut symbols = Vec::new();
122        let mut imports = Vec::new();
123        let mut raw_docs = Vec::new();
124
125        while let Some(m) = matches.next() {
126            let mut name = String::new();
127            let mut kind = String::new();
128            let mut line = 0;
129            let mut start_byte = 0;
130            let mut end_byte = 0;
131            let mut is_import = false;
132            let mut is_doc = false;
133
134            for capture in m.captures {
135                let capture_name = query.capture_names()[capture.index as usize].to_string();
136                if capture_name == "import" {
137                    let imp = capture.node.utf8_text(content.as_bytes())
138                        .unwrap_or("")
139                        .to_string();
140                    if !imp.is_empty() {
141                        imports.push(imp);
142                    }
143                    is_import = true;
144                    break;
145                } else if capture_name == "doc" {
146                    let text = capture.node.utf8_text(content.as_bytes()).unwrap_or("");
147                    // For Python, only keep if it's a docstring (this is a heuristic)
148                    if language == "python" && !(text.starts_with("\"\"\"") || text.starts_with("'''")) {
149                        continue;
150                    }
151                    
152                    raw_docs.push((capture.node.start_position().row + 1, capture.node.start_byte(), capture.node.end_byte(), text.to_string()));
153                    is_doc = true;
154                    break;
155                } else if capture_name == "name" {
156                    name = capture.node.utf8_text(content.as_bytes())
157                        .unwrap_or("unknown")
158                        .to_string();
159                } else {
160                    kind = capture_name;
161                    line = capture.node.start_position().row + 1;
162                    start_byte = capture.node.start_byte();
163                    end_byte = capture.node.end_byte();
164                }
165            }
166            
167            if !is_import && !is_doc && !name.is_empty() && !kind.is_empty() {
168                let mut clean_name = name.replace("\n", " ")
169                    .split_whitespace()
170                    .collect::<Vec<_>>()
171                    .join(" ");
172                
173                if clean_name.chars().count() > 100 {
174                    clean_name = format!("{}...", clean_name.chars().take(97).collect::<String>());
175                }
176
177                symbols.push(Symbol {
178                    name: clean_name,
179                    kind,
180                    line,
181                    start_byte,
182                    end_byte,
183                    docstring: None,
184                });
185            }
186        }
187
188        // Second pass: Associate docstrings with symbols
189        for symbol in &mut symbols {
190            let mut attached_docs = Vec::new();
191            for (doc_line, doc_start, doc_end, doc_text) in &raw_docs {
192                // Case 1: Docstring is immediately before the symbol (within 2 lines)
193                if *doc_line < symbol.line && *doc_line >= symbol.line.saturating_sub(2) {
194                    attached_docs.push(doc_text.clone());
195                }
196                // Case 2: Docstring is inside the symbol's byte range
197                else if *doc_start >= symbol.start_byte && *doc_end <= symbol.end_byte {
198                    attached_docs.push(doc_text.clone());
199                }
200            }
201            if !attached_docs.is_empty() {
202                symbol.docstring = Some(attached_docs.join("\n\n"));
203            }
204        }
205
206        // Final filtering: remove noisy variables
207        symbols.retain(|s| s.kind != "variable" || s.docstring.is_some());
208
209        // For Vue, always add a component symbol based on the filename
210        if language == "vue" {
211            let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("Component");
212            symbols.push(Symbol {
213                name: file_name.trim_end_matches(".vue").to_string(),
214                kind: "component".to_string(),
215                line: 1,
216                start_byte: 0,
217                end_byte: content.len(),
218                docstring: None,
219            });
220        }
221
222        Ok(FileOutline {
223            path: path.to_string_lossy().to_string(),
224            language: language.to_string(),
225            symbols,
226            imports,
227        })
228    }
229}