lynx_parser/symbol_extraction/
python.rs1use anyhow::Result;
2use lynx_protocol::{CodeChunk, SymbolRecord};
3use std::path::Path;
4use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
5use tree_sitter_python::LANGUAGE;
6
7pub fn extract(path: &Path, content: &str) -> Result<(Vec<CodeChunk>, Vec<SymbolRecord>)> {
8 let mut parser = Parser::new();
9 parser.set_language(&LANGUAGE.into())?;
10
11 let tree = parser
12 .parse(content, None)
13 .ok_or_else(|| anyhow::anyhow!("Failed to parse Python file"))?;
14 let root_node = tree.root_node();
15
16 let mut chunks = Vec::new();
17 let mut symbols = Vec::new();
18
19 let query_str = r#"
20 (function_definition name: (identifier) @func_name) @func
21 (class_definition name: (identifier) @class_name) @class
22 "#;
23
24 let query = Query::new(&LANGUAGE.into(), query_str)?;
25 let mut cursor = QueryCursor::new();
26 let mut captures = cursor.captures(&query, root_node, content.as_bytes());
27
28 while let Some(&(ref mat, capture_index)) = captures.next() {
29 let capture = mat.captures[capture_index];
30 let capture_name = query.capture_names()[capture.index as usize];
31
32 if !["func", "class"].contains(&capture_name) {
33 continue;
34 }
35
36 let node = capture.node;
37 let start_line = node.start_position().row + 1;
38 let end_line = node.end_position().row + 1;
39 let raw_content = node.utf8_text(content.as_bytes())?.to_string();
40
41 let symbol_name = match resolve_symbol_name(mat, node, &query, content.as_bytes()) {
42 Some(name) => name,
43 None => continue,
44 };
45
46 let file_path = path.to_string_lossy().replace('\\', "/");
47 let symbol_id = format!("{}:{}:{}", capture_name, file_path, symbol_name);
48
49 symbols.push(SymbolRecord {
50 symbol_id: symbol_id.clone(),
51 symbol_name: symbol_name.clone(),
52 file_path: file_path.clone(),
53 start_line,
54 end_line,
55 });
56
57 chunks.push(CodeChunk {
58 id: blake3::hash(raw_content.as_bytes()).to_string(),
59 file_path: file_path.clone(),
60 start_line,
61 end_line,
62 raw_content,
63 symbols_defined: vec![symbol_id],
64 });
65 }
66
67 Ok((chunks, symbols))
68}
69
70fn resolve_symbol_name(
71 mat: &tree_sitter::QueryMatch,
72 node: tree_sitter::Node,
73 query: &Query,
74 content: &[u8],
75) -> Option<String> {
76 if let Some(capture) = mat.captures.iter().find(|c| {
77 let name = query.capture_names()[c.index as usize];
78 name.ends_with("_name")
79 }) {
80 if let Ok(text) = capture.node.utf8_text(content) {
81 return Some(text.to_string());
82 }
83 }
84
85 if let Some(name_node) = node
86 .child_by_field_name("name")
87 .or_else(|| node.child_by_field_name("type"))
88 {
89 if let Ok(text) = name_node.utf8_text(content) {
90 return Some(text.to_string());
91 }
92 }
93
94 find_identifier_in_node(node, content)
95}
96
97fn find_identifier_in_node(node: tree_sitter::Node, content: &[u8]) -> Option<String> {
98 let mut cursor = node.walk();
99 for child in node.named_children(&mut cursor) {
100 if matches!(
101 child.kind(),
102 "identifier" | "type_identifier" | "field_identifier" | "property_identifier"
103 ) {
104 if let Ok(text) = child.utf8_text(content) {
105 return Some(text.to_string());
106 }
107 }
108 if let Some(name) = find_identifier_in_node(child, content) {
109 return Some(name);
110 }
111 }
112 None
113}