project_map_cli_rust/core/
parser.rs1use tree_sitter::{Parser, Query, QueryCursor};
2use streaming_iterator::StreamingIterator;
3use std::fs;
4use std::path::Path;
5use crate::error::{AppError, Result};
6use serde::{Serialize, Deserialize};
7
8#[derive(Debug, Serialize, Deserialize, Clone)]
9pub struct Symbol {
10 pub name: String,
11 pub kind: String,
12 pub line: usize,
13 pub start_byte: usize,
14 pub end_byte: usize,
15 pub docstring: Option<String>,
16}
17
18#[derive(Debug, Serialize, Deserialize)]
19pub struct FileOutline {
20 pub path: String,
21 pub language: String,
22 pub symbols: Vec<Symbol>,
23 pub imports: Vec<String>,
24}
25
26pub struct CodeParser {
27 parser: Parser,
28}
29
30impl CodeParser {
31 pub fn new() -> Self {
32 Self {
33 parser: Parser::new(),
34 }
35 }
36
37 pub fn parse_file(&mut self, path: &Path) -> Result<FileOutline> {
38 let extension = path.extension()
39 .and_then(|s| s.to_str())
40 .unwrap_or("");
41
42 let (language, ts_language) = match extension {
43 "py" => ("python", tree_sitter_python::LANGUAGE.into()),
44 "rs" => ("rust", tree_sitter_rust::LANGUAGE.into()),
45 "ts" => ("typescript", tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
46 "tsx" => ("typescript", tree_sitter_typescript::LANGUAGE_TSX.into()),
47 "kt" => ("kotlin", tree_sitter_kotlin_ng::LANGUAGE.into()),
48 "sql" => ("sql", tree_sitter_sequel::LANGUAGE.into()),
49 "vue" => ("vue", tree_sitter_vue_updated::language().into()),
50 "md" | "json" | "toml" | "yaml" | "yml" => {
51 let lang = match extension {
52 "md" => "markdown",
53 "json" => "json",
54 "toml" => "toml",
55 "yaml" | "yml" => "yaml",
56 _ => "text",
57 };
58 return Ok(FileOutline {
59 path: path.to_string_lossy().to_string(),
60 language: lang.to_string(),
61 symbols: Vec::new(),
62 imports: Vec::new(),
63 });
64 }
65 _ => return Err(AppError::Parser(format!("Unsupported extension: {}", extension))),
66 };
67
68 self.parser.set_language(&ts_language)
69 .map_err(|e| AppError::Parser(format!("Failed to set language: {}", e)))?;
70
71 let content = fs::read_to_string(path)?;
72 let tree = self.parser.parse(&content, None)
73 .ok_or_else(|| AppError::Parser("Failed to parse file".to_string()))?;
74
75 let query_str = match language {
76 "python" => "((class_definition name: (identifier) @name) @class)
77 ((function_definition name: (identifier) @name) @function)
78 (import_statement (dotted_name) @import)
79 (import_from_statement module_name: (dotted_name) @import)
80 (expression_statement (string) @doc)",
81 "rust" => "((struct_item name: (type_identifier) @name) @struct)
82 ((enum_item name: (type_identifier) @name) @enum)
83 ((function_item name: (identifier) @name) @function)
84 ((trait_item name: (type_identifier) @name) @trait)
85 ((impl_item type: (_) @name) @impl)
86 ((line_comment) @doc (#match? @doc \"^///\"))
87 ((block_comment) @doc (#match? @doc \"^/\\\\*\\\\*\"))",
88 "typescript" => "((class_declaration name: (type_identifier) @name) @class)
89 ((function_declaration name: (identifier) @name) @function)
90 ((generator_function_declaration name: (identifier) @name) @function)
91 ((interface_declaration name: (type_identifier) @name) @interface)
92 ((type_alias_declaration name: (type_identifier) @name) @type)
93 ((enum_declaration name: (identifier) @name) @enum)
94 ((method_definition name: (property_identifier) @name) @function)
95 ((variable_declarator name: (identifier) @name value: (arrow_function)) @function)
96 ((variable_declarator name: (identifier) @name value: (function_expression)) @function)
97 ((variable_declarator name: (identifier) @name) @variable)
98 (internal_module name: (identifier) @name) @module
99 (import_statement source: (string (string_fragment) @import))
100 (export_statement source: (string (string_fragment) @import))
101 (export_statement (export_clause (export_specifier name: (identifier) @name)) @export)
102 (comment) @doc",
103 "kotlin" => "((class_declaration name: (identifier) @name) @class)
104 ((object_declaration name: (identifier) @name) @class)
105 ((companion_object name: (identifier) @name) @class)
106 ((function_declaration name: (identifier) @name) @function)
107 (import (qualified_identifier) @import)
108 (line_comment) @doc
109 (block_comment) @doc",
110 "sql" => "((identifier) @name) @symbol",
111 "vue" => "((tag_name) @name) @component",
112 _ => unreachable!(),
113 };
114
115 let query = Query::new(&ts_language, query_str)
116 .map_err(|e| AppError::Parser(format!("Failed to create query: {}", e)))?;
117
118 let mut cursor = QueryCursor::new();
119 let mut matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
120
121 let mut symbols = Vec::new();
122 let mut imports = Vec::new();
123 let mut raw_docs = Vec::new();
124
125 while let Some(m) = matches.next() {
126 let mut name = String::new();
127 let mut kind = String::new();
128 let mut line = 0;
129 let mut start_byte = 0;
130 let mut end_byte = 0;
131 let mut is_import = false;
132 let mut is_doc = false;
133
134 for capture in m.captures {
135 let capture_name = query.capture_names()[capture.index as usize].to_string();
136 if capture_name == "import" {
137 let imp = capture.node.utf8_text(content.as_bytes())
138 .unwrap_or("")
139 .to_string();
140 if !imp.is_empty() {
141 imports.push(imp);
142 }
143 is_import = true;
144 break;
145 } else if capture_name == "doc" {
146 let text = capture.node.utf8_text(content.as_bytes()).unwrap_or("");
147 if language == "python" && !(text.starts_with("\"\"\"") || text.starts_with("'''")) {
149 continue;
150 }
151
152 raw_docs.push((capture.node.start_position().row + 1, capture.node.start_byte(), capture.node.end_byte(), text.to_string()));
153 is_doc = true;
154 break;
155 } else if capture_name == "name" {
156 name = capture.node.utf8_text(content.as_bytes())
157 .unwrap_or("unknown")
158 .to_string();
159 } else {
160 kind = capture_name;
161 line = capture.node.start_position().row + 1;
162 start_byte = capture.node.start_byte();
163 end_byte = capture.node.end_byte();
164 }
165 }
166
167 if !is_import && !is_doc && !name.is_empty() && !kind.is_empty() {
168 let mut clean_name = name.replace("\n", " ")
169 .split_whitespace()
170 .collect::<Vec<_>>()
171 .join(" ");
172
173 if clean_name.chars().count() > 100 {
174 clean_name = format!("{}...", clean_name.chars().take(97).collect::<String>());
175 }
176
177 symbols.push(Symbol {
178 name: clean_name,
179 kind,
180 line,
181 start_byte,
182 end_byte,
183 docstring: None,
184 });
185 }
186 }
187
188 for symbol in &mut symbols {
190 let mut attached_docs = Vec::new();
191 for (doc_line, doc_start, doc_end, doc_text) in &raw_docs {
192 if *doc_line < symbol.line && *doc_line >= symbol.line.saturating_sub(2) {
194 attached_docs.push(doc_text.clone());
195 }
196 else if *doc_start >= symbol.start_byte && *doc_end <= symbol.end_byte {
198 attached_docs.push(doc_text.clone());
199 }
200 }
201 if !attached_docs.is_empty() {
202 symbol.docstring = Some(attached_docs.join("\n\n"));
203 }
204 }
205
206 symbols.retain(|s| s.kind != "variable" || s.docstring.is_some());
208
209 if language == "vue" {
211 let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("Component");
212 symbols.push(Symbol {
213 name: file_name.trim_end_matches(".vue").to_string(),
214 kind: "component".to_string(),
215 line: 1,
216 start_byte: 0,
217 end_byte: content.len(),
218 docstring: None,
219 });
220 }
221
222 Ok(FileOutline {
223 path: path.to_string_lossy().to_string(),
224 language: language.to_string(),
225 symbols,
226 imports,
227 })
228 }
229}