project_map_cli_rust/core/
parser.rs1use tree_sitter::{Parser, Query, QueryCursor};
2use streaming_iterator::StreamingIterator;
3use std::fs;
4use std::path::Path;
5use crate::error::{AppError, Result};
6use serde::{Serialize, Deserialize};
7
8#[derive(Debug, Serialize, Deserialize, Clone)]
9pub struct Symbol {
10 pub name: String,
11 pub kind: String,
12 pub line: usize,
13 pub start_byte: usize,
14 pub end_byte: usize,
15 pub docstring: Option<String>,
16}
17
18#[derive(Debug, Serialize, Deserialize)]
19pub struct FileOutline {
20 pub path: String,
21 pub language: String,
22 pub symbols: Vec<Symbol>,
23 pub imports: Vec<String>,
24}
25
26pub struct CodeParser {
27 parser: Parser,
28}
29
30impl CodeParser {
31 pub fn new() -> Self {
32 Self {
33 parser: Parser::new(),
34 }
35 }
36
37 pub fn parse_file(&mut self, path: &Path) -> Result<FileOutline> {
38 let extension = path.extension()
39 .and_then(|s| s.to_str())
40 .unwrap_or("");
41
42 let (language, ts_language) = match extension {
43 "py" => ("python", tree_sitter_python::LANGUAGE.into()),
44 "rs" => ("rust", tree_sitter_rust::LANGUAGE.into()),
45 "ts" => ("typescript", tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
46 "tsx" => ("typescript", tree_sitter_typescript::LANGUAGE_TSX.into()),
47 "kt" => ("kotlin", tree_sitter_kotlin_ng::LANGUAGE.into()),
48 "sql" => ("sql", tree_sitter_sequel::LANGUAGE.into()),
49 "vue" => ("vue", tree_sitter_vue_updated::language().into()),
50 "md" => {
51 return Ok(FileOutline {
52 path: path.to_string_lossy().to_string(),
53 language: "markdown".to_string(),
54 symbols: Vec::new(),
55 imports: Vec::new(),
56 });
57 }
58 _ => return Err(AppError::Parser(format!("Unsupported extension: {}", extension))),
59 };
60
61 self.parser.set_language(&ts_language)
62 .map_err(|e| AppError::Parser(format!("Failed to set language: {}", e)))?;
63
64 let content = fs::read_to_string(path)?;
65 let tree = self.parser.parse(&content, None)
66 .ok_or_else(|| AppError::Parser("Failed to parse file".to_string()))?;
67
68 let query_str = match language {
69 "python" => "((class_definition name: (identifier) @name) @class)
70 ((function_definition name: (identifier) @name) @function)
71 (import_statement (dotted_name) @import)
72 (import_from_statement module_name: (dotted_name) @import)
73 (expression_statement (string) @doc)",
74 "rust" => "((struct_item name: (type_identifier) @name) @struct)
75 ((enum_item name: (type_identifier) @name) @enum)
76 ((function_item name: (identifier) @name) @function)
77 ((trait_item name: (type_identifier) @name) @trait)
78 ((impl_item type: (_) @name) @impl)
79 (line_doc_comment) @doc
80 (block_doc_comment) @doc",
81 "typescript" => "((class_declaration name: (type_identifier) @name) @class)
82 ((function_declaration name: (identifier) @name) @function)
83 ((generator_function_declaration name: (identifier) @name) @function)
84 ((interface_declaration name: (type_identifier) @name) @interface)
85 ((type_alias_declaration name: (type_identifier) @name) @type)
86 ((enum_declaration name: (identifier) @name) @enum)
87 ((method_definition name: (property_identifier) @name) @function)
88 ((variable_declarator name: (identifier) @name value: (arrow_function)) @function)
89 ((variable_declarator name: (identifier) @name value: (function_expression)) @function)
90 ((variable_declarator name: (identifier) @name) @variable)
91 (internal_module name: (identifier) @name) @module
92 (import_statement source: (string (string_fragment) @import))
93 (export_statement source: (string (string_fragment) @import))
94 (export_statement (export_clause (export_specifier name: (identifier) @name)) @export)
95 (comment) @doc",
96 "kotlin" => "((class_declaration name: (identifier) @name) @class)
97 ((object_declaration name: (identifier) @name) @class)
98 ((companion_object name: (identifier) @name) @class)
99 ((function_declaration name: (identifier) @name) @function)
100 (import (qualified_identifier) @import)
101 (line_comment) @doc
102 (block_comment) @doc",
103 "sql" => "((identifier) @name) @symbol",
104 "vue" => "((tag_name) @name) @component",
105 _ => unreachable!(),
106 };
107
108 let query = Query::new(&ts_language, query_str)
109 .map_err(|e| AppError::Parser(format!("Failed to create query: {}", e)))?;
110
111 let mut cursor = QueryCursor::new();
112 let mut matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
113
114 let mut symbols = Vec::new();
115 let mut imports = Vec::new();
116 let mut raw_docs = Vec::new();
117
118 while let Some(m) = matches.next() {
119 let mut name = String::new();
120 let mut kind = String::new();
121 let mut line = 0;
122 let mut start_byte = 0;
123 let mut end_byte = 0;
124 let mut is_import = false;
125 let mut is_doc = false;
126
127 for capture in m.captures {
128 let capture_name = query.capture_names()[capture.index as usize].to_string();
129 if capture_name == "import" {
130 let imp = capture.node.utf8_text(content.as_bytes())
131 .unwrap_or("")
132 .to_string();
133 if !imp.is_empty() {
134 imports.push(imp);
135 }
136 is_import = true;
137 break;
138 } else if capture_name == "doc" {
139 let text = capture.node.utf8_text(content.as_bytes()).unwrap_or("");
140 if language == "python" && !(text.starts_with("\"\"\"") || text.starts_with("'''")) {
142 continue;
143 }
144
145 raw_docs.push((capture.node.start_position().row + 1, capture.node.start_byte(), capture.node.end_byte(), text.to_string()));
146 is_doc = true;
147 break;
148 } else if capture_name == "name" {
149 name = capture.node.utf8_text(content.as_bytes())
150 .unwrap_or("unknown")
151 .to_string();
152 } else {
153 kind = capture_name;
154 line = capture.node.start_position().row + 1;
155 start_byte = capture.node.start_byte();
156 end_byte = capture.node.end_byte();
157 }
158 }
159
160 if !is_import && !is_doc && !name.is_empty() && !kind.is_empty() {
161 let mut clean_name = name.replace("\n", " ")
162 .split_whitespace()
163 .collect::<Vec<_>>()
164 .join(" ");
165
166 if clean_name.chars().count() > 100 {
167 clean_name = format!("{}...", clean_name.chars().take(97).collect::<String>());
168 }
169
170 symbols.push(Symbol {
171 name: clean_name,
172 kind,
173 line,
174 start_byte,
175 end_byte,
176 docstring: None,
177 });
178 }
179 }
180
181 for symbol in &mut symbols {
183 let mut attached_docs = Vec::new();
184 for (doc_line, doc_start, doc_end, doc_text) in &raw_docs {
185 if *doc_line < symbol.line && *doc_line >= symbol.line.saturating_sub(2) {
187 attached_docs.push(doc_text.clone());
188 }
189 else if *doc_start >= symbol.start_byte && *doc_end <= symbol.end_byte {
191 attached_docs.push(doc_text.clone());
192 }
193 }
194 if !attached_docs.is_empty() {
195 symbol.docstring = Some(attached_docs.join("\n\n"));
196 }
197 }
198
199 symbols.retain(|s| s.kind != "variable" || s.docstring.is_some());
201
202 if language == "vue" {
204 let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("Component");
205 symbols.push(Symbol {
206 name: file_name.trim_end_matches(".vue").to_string(),
207 kind: "component".to_string(),
208 line: 1,
209 start_byte: 0,
210 end_byte: content.len(),
211 docstring: None,
212 });
213 }
214
215 Ok(FileOutline {
216 path: path.to_string_lossy().to_string(),
217 language: language.to_string(),
218 symbols,
219 imports,
220 })
221 }
222}