agentroot_core/index/ast_chunker/strategies/
javascript.rs

1//! JavaScript/TypeScript-specific chunking strategy
2
3use super::{get_breadcrumb, line_numbers, ChunkingStrategy};
4use crate::error::Result;
5use crate::index::ast_chunker::types::{
6    compute_chunk_hash, ChunkMetadata, ChunkType, SemanticChunk,
7};
8use tree_sitter::Node;
9
10const JS_SEMANTIC_NODES: &[&str] = &[
11    "function_declaration",
12    "class_declaration",
13    "method_definition",
14    "arrow_function",
15    "function_expression",
16    "export_statement",
17    "interface_declaration",
18    "type_alias_declaration",
19    "enum_declaration",
20];
21
22pub struct JavaScriptStrategy {
23    pub is_typescript: bool,
24}
25
26impl JavaScriptStrategy {
27    pub fn javascript() -> Self {
28        Self {
29            is_typescript: false,
30        }
31    }
32
33    pub fn typescript() -> Self {
34        Self {
35            is_typescript: true,
36        }
37    }
38}
39
40impl ChunkingStrategy for JavaScriptStrategy {
41    fn semantic_node_types(&self) -> &[&str] {
42        JS_SEMANTIC_NODES
43    }
44
45    fn extract_chunks(&self, source: &str, root: Node) -> Result<Vec<SemanticChunk>> {
46        let mut chunks = Vec::new();
47        let mut cursor = root.walk();
48        extract_js_chunks(source, &mut cursor, &mut chunks, self, None);
49
50        if chunks.is_empty() {
51            chunks.push(SemanticChunk::new(source.to_string(), ChunkType::Text, 0));
52        }
53
54        Ok(chunks)
55    }
56
57    fn chunk_type_for_node(&self, node: Node) -> ChunkType {
58        match node.kind() {
59            "function_declaration" | "function_expression" | "arrow_function" => {
60                ChunkType::Function
61            }
62            "class_declaration" => ChunkType::Class,
63            "method_definition" => ChunkType::Method,
64            "interface_declaration" => ChunkType::Interface,
65            "type_alias_declaration" => ChunkType::Struct,
66            "enum_declaration" => ChunkType::Enum,
67            "export_statement" => {
68                if let Some(decl) = get_exported_declaration(node) {
69                    self.chunk_type_for_node(decl)
70                } else {
71                    ChunkType::Module
72                }
73            }
74            _ => ChunkType::Text,
75        }
76    }
77}
78
79fn extract_js_chunks(
80    source: &str,
81    cursor: &mut tree_sitter::TreeCursor,
82    chunks: &mut Vec<SemanticChunk>,
83    strategy: &JavaScriptStrategy,
84    parent_class: Option<&str>,
85) {
86    loop {
87        let node = cursor.node();
88        let kind = node.kind();
89
90        let is_semantic = JS_SEMANTIC_NODES.contains(&kind);
91        let is_var_with_fn = is_variable_with_function(node);
92
93        if is_semantic || is_var_with_fn {
94            let actual_node = if kind == "export_statement" {
95                get_exported_declaration(node).unwrap_or(node)
96            } else {
97                node
98            };
99
100            let leading = strategy.extract_leading_trivia(source, node);
101            let trailing = strategy.extract_trailing_trivia(source, node);
102            let text = source[node.start_byte()..node.end_byte()].to_string();
103            let (start_line, end_line) = line_numbers(source, node.start_byte(), node.end_byte());
104
105            let name = get_js_name(source, actual_node);
106            let breadcrumb = match (parent_class, &name) {
107                (Some(cls), Some(n)) => Some(format!("{}::{}", cls, n)),
108                (None, Some(n)) => Some(n.clone()),
109                _ => get_breadcrumb(source, node),
110            };
111
112            let chunk_type = if parent_class.is_some() {
113                ChunkType::Method
114            } else if is_var_with_fn {
115                ChunkType::Function
116            } else {
117                strategy.chunk_type_for_node(node)
118            };
119
120            let chunk_hash = compute_chunk_hash(&text, &leading, &trailing);
121            let lang: &'static str = if strategy.is_typescript {
122                "typescript"
123            } else {
124                "javascript"
125            };
126
127            let chunk = SemanticChunk {
128                text,
129                chunk_type,
130                chunk_hash,
131                position: node.start_byte(),
132                token_count: None,
133                metadata: ChunkMetadata {
134                    leading_trivia: leading,
135                    trailing_trivia: trailing,
136                    breadcrumb,
137                    language: Some(lang),
138                    start_line,
139                    end_line,
140                },
141            };
142            chunks.push(chunk);
143
144            if actual_node.kind() == "class_declaration" || actual_node.kind() == "class" {
145                let class_name = name.as_deref();
146                if cursor.goto_first_child() {
147                    extract_js_chunks(source, cursor, chunks, strategy, class_name);
148                    cursor.goto_parent();
149                }
150            }
151        } else if cursor.goto_first_child() {
152            extract_js_chunks(source, cursor, chunks, strategy, parent_class);
153            cursor.goto_parent();
154        }
155
156        if !cursor.goto_next_sibling() {
157            break;
158        }
159    }
160}
161
162fn get_exported_declaration(node: Node) -> Option<Node> {
163    let mut cursor = node.walk();
164    if cursor.goto_first_child() {
165        loop {
166            let child = cursor.node();
167            let k = child.kind();
168            if k != "export" && k != "default" && !k.contains("comment") {
169                return Some(child);
170            }
171            if !cursor.goto_next_sibling() {
172                break;
173            }
174        }
175    }
176    None
177}
178
179fn get_js_name(source: &str, node: Node) -> Option<String> {
180    if let Some(name_node) = node.child_by_field_name("name") {
181        return Some(source[name_node.start_byte()..name_node.end_byte()].to_string());
182    }
183
184    if node.kind() == "lexical_declaration" || node.kind() == "variable_declaration" {
185        let mut cursor = node.walk();
186        if cursor.goto_first_child() {
187            loop {
188                let child = cursor.node();
189                if child.kind() == "variable_declarator" {
190                    if let Some(name) = child.child_by_field_name("name") {
191                        return Some(source[name.start_byte()..name.end_byte()].to_string());
192                    }
193                }
194                if !cursor.goto_next_sibling() {
195                    break;
196                }
197            }
198        }
199    }
200
201    None
202}
203
204fn is_variable_with_function(node: Node) -> bool {
205    let kind = node.kind();
206    if kind != "lexical_declaration" && kind != "variable_declaration" {
207        return false;
208    }
209
210    let mut cursor = node.walk();
211    if cursor.goto_first_child() {
212        loop {
213            let child = cursor.node();
214            if child.kind() == "variable_declarator" {
215                if let Some(value) = child.child_by_field_name("value") {
216                    let vk = value.kind();
217                    if vk == "arrow_function" || vk == "function_expression" || vk == "function" {
218                        return true;
219                    }
220                }
221            }
222            if !cursor.goto_next_sibling() {
223                break;
224            }
225        }
226    }
227    false
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use crate::index::ast_chunker::language::Language;
234    use crate::index::ast_chunker::parser::parse;
235
236    #[test]
237    fn test_extract_function() {
238        let source = r#"
239function hello() {
240    console.log("hello");
241}
242"#;
243        let tree = parse(source, Language::JavaScript).unwrap();
244        let strategy = JavaScriptStrategy::javascript();
245        let chunks = strategy.extract_chunks(source, tree.root_node()).unwrap();
246
247        assert!(!chunks.is_empty());
248        assert!(chunks.iter().any(|c| c.chunk_type == ChunkType::Function));
249    }
250
251    #[test]
252    fn test_extract_class() {
253        let source = r#"
254class MyClass {
255    constructor() {}
256    method() {}
257}
258"#;
259        let tree = parse(source, Language::JavaScript).unwrap();
260        let strategy = JavaScriptStrategy::javascript();
261        let chunks = strategy.extract_chunks(source, tree.root_node()).unwrap();
262
263        assert!(chunks.iter().any(|c| c.chunk_type == ChunkType::Class));
264    }
265
266    #[test]
267    fn test_extract_arrow_function() {
268        let source = r#"
269const myFunc = () => {
270    return 42;
271};
272"#;
273        let tree = parse(source, Language::JavaScript).unwrap();
274        let strategy = JavaScriptStrategy::javascript();
275        let chunks = strategy.extract_chunks(source, tree.root_node()).unwrap();
276
277        assert!(!chunks.is_empty());
278    }
279
280    #[test]
281    fn test_extract_typescript_interface() {
282        let source = r#"
283interface User {
284    name: string;
285    age: number;
286}
287"#;
288        let tree = parse(source, Language::TypeScript).unwrap();
289        let strategy = JavaScriptStrategy::typescript();
290        let chunks = strategy.extract_chunks(source, tree.root_node()).unwrap();
291
292        assert!(chunks.iter().any(|c| c.chunk_type == ChunkType::Interface));
293    }
294}