agentroot_core/index/ast_chunker/strategies/
mod.rs1mod go;
4mod javascript;
5mod python;
6mod rust;
7
8pub use go::GoStrategy;
9pub use javascript::JavaScriptStrategy;
10pub use python::PythonStrategy;
11pub use rust::RustStrategy;
12
13use super::language::Language;
14use super::types::{ChunkType, SemanticChunk};
15use crate::error::Result;
16use tree_sitter::Node;
17
18pub trait ChunkingStrategy: Send + Sync {
20 fn semantic_node_types(&self) -> &[&str];
22
23 fn extract_chunks(&self, source: &str, root: Node) -> Result<Vec<SemanticChunk>>;
25
26 fn chunk_type_for_node(&self, node: Node) -> ChunkType;
28
29 fn extract_leading_trivia(&self, source: &str, node: Node) -> String {
31 extract_leading_comments(source, node)
32 }
33
34 fn extract_trailing_trivia(&self, source: &str, node: Node) -> String {
36 extract_trailing_comment(source, node)
37 }
38}
39
40pub enum LanguageStrategy {
42 Rust(RustStrategy),
43 Python(PythonStrategy),
44 JavaScript(JavaScriptStrategy),
45 Go(GoStrategy),
46}
47
48impl LanguageStrategy {
49 pub fn for_language(language: Language) -> Self {
50 match language {
51 Language::Rust => Self::Rust(RustStrategy),
52 Language::Python => Self::Python(PythonStrategy),
53 Language::JavaScript => Self::JavaScript(JavaScriptStrategy::javascript()),
54 Language::TypeScript | Language::TypeScriptTsx => {
55 Self::JavaScript(JavaScriptStrategy::typescript())
56 }
57 Language::Go => Self::Go(GoStrategy),
58 }
59 }
60
61 pub fn extract_chunks(&self, source: &str, root: Node) -> Result<Vec<SemanticChunk>> {
62 match self {
63 Self::Rust(s) => s.extract_chunks(source, root),
64 Self::Python(s) => s.extract_chunks(source, root),
65 Self::JavaScript(s) => s.extract_chunks(source, root),
66 Self::Go(s) => s.extract_chunks(source, root),
67 }
68 }
69}
70
71pub fn extract_leading_comments(source: &str, node: Node) -> String {
73 let start_byte = node.start_byte();
74 if start_byte == 0 {
75 return String::new();
76 }
77
78 let preceding = &source[..start_byte];
79 let lines: Vec<&str> = preceding.lines().rev().collect();
80 let mut trivia_lines = Vec::new();
81
82 for line in lines {
83 let trimmed = line.trim();
84 if trimmed.is_empty() {
85 if !trivia_lines.is_empty() {
86 break;
87 }
88 continue;
89 }
90 if is_comment_line(trimmed) {
91 trivia_lines.push(line);
92 } else {
93 break;
94 }
95 }
96
97 trivia_lines.reverse();
98 if trivia_lines.is_empty() {
99 String::new()
100 } else {
101 trivia_lines.join("\n")
102 }
103}
104
105fn is_comment_line(line: &str) -> bool {
107 line.starts_with("//")
108 || line.starts_with('#')
109 || line.starts_with("/*")
110 || line.starts_with('*')
111 || line.starts_with("*/")
112 || line.starts_with("///")
113 || line.starts_with("//!")
114 || line.starts_with("\"\"\"")
115 || line.starts_with("'''")
116}
117
118pub fn extract_trailing_comment(source: &str, node: Node) -> String {
120 let end_byte = node.end_byte();
121 if end_byte >= source.len() {
122 return String::new();
123 }
124
125 let following = &source[end_byte..];
126 if let Some(line_end) = following.find('\n') {
127 let same_line = following[..line_end].trim();
128 if same_line.starts_with("//") || same_line.starts_with('#') {
129 return same_line.to_string();
130 }
131 }
132 String::new()
133}
134
135pub fn line_numbers(source: &str, start_byte: usize, end_byte: usize) -> (usize, usize) {
137 let start_line = source[..start_byte].matches('\n').count() + 1;
138 let end_line = source[..end_byte].matches('\n').count() + 1;
139 (start_line, end_line)
140}
141
142pub fn get_breadcrumb(source: &str, node: Node) -> Option<String> {
144 let mut parts = Vec::new();
145 let mut current = Some(node);
146
147 while let Some(n) = current {
148 if let Some(name) = extract_name_from_node(source, n) {
149 parts.push(name);
150 }
151 current = n.parent();
152 }
153
154 if parts.is_empty() {
155 None
156 } else {
157 parts.reverse();
158 Some(parts.join("::"))
159 }
160}
161
162fn extract_name_from_node(source: &str, node: Node) -> Option<String> {
164 let kind = node.kind();
165 let name_field = match kind {
166 "function_item"
167 | "function_definition"
168 | "function_declaration"
169 | "method_definition"
170 | "method_declaration" => "name",
171 "impl_item" => "type",
172 "struct_item" | "class_definition" | "class_declaration" => "name",
173 "enum_item" | "type_declaration" => "name",
174 "trait_item" | "interface_declaration" => "name",
175 _ => return None,
176 };
177
178 node.child_by_field_name(name_field)
179 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
180}