Skip to main content

argyph_parse/languages/
typescript.rs

1use crate::chunker::ast_chunks;
2use crate::error::{ParseError, Result};
3use crate::types::{ByteRange, ChunkKind, Import, ParsedFile, Symbol, SymbolId, SymbolKind};
4use argyph_fs::{FileEntry, Language};
5use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
6
7static QUERY_SRC: &str = include_str!("../../queries/typescript.scm");
8
9pub fn parse_typescript(
10    file: &FileEntry,
11    source: &str,
12    max_chunk_size: usize,
13) -> Result<ParsedFile> {
14    let lang: tree_sitter::Language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into();
15
16    let mut parser = Parser::new();
17    parser.set_language(&lang)?;
18
19    let tree = parser
20        .parse(source, None)
21        .ok_or_else(|| ParseError::Parse("tree-sitter returned None".into()))?;
22
23    let root = tree.root_node();
24    let source_bytes = source.as_bytes();
25
26    let symbols = extract_symbols(file, &lang, &root, source_bytes)?;
27    let imports = extract_imports(&root, source_bytes);
28    let chunks = ast_chunks(
29        &file.path,
30        &root,
31        source,
32        Language::TypeScript,
33        max_chunk_size,
34        chunk_kind_for_node,
35        is_chunk_boundary_ts,
36    )?;
37
38    Ok(ParsedFile {
39        symbols,
40        chunks,
41        imports,
42    })
43}
44
45fn extract_symbols(
46    file: &FileEntry,
47    lang: &tree_sitter::Language,
48    root: &tree_sitter::Node,
49    source: &[u8],
50) -> Result<Vec<Symbol>> {
51    let query = Query::new(lang, QUERY_SRC)?;
52    let mut cursor = QueryCursor::new();
53    let mut matches_iter = cursor.matches(&query, *root, source);
54    let mut symbols = Vec::new();
55
56    loop {
57        matches_iter.advance();
58        let Some(m) = matches_iter.get() else { break };
59
60        let mut def_node: Option<tree_sitter::Node> = None;
61        let mut name_node: Option<tree_sitter::Node> = None;
62
63        for cap in m.captures {
64            let cap_name = query.capture_names()[cap.index as usize];
65            match cap_name {
66                "def" => def_node = Some(cap.node),
67                "name" => name_node = Some(cap.node),
68                _ => {}
69            }
70        }
71
72        let Some(def) = def_node else { continue };
73        let name = name_node
74            .and_then(|n| n.utf8_text(source).ok())
75            .unwrap_or("");
76        if name.is_empty() {
77            continue;
78        }
79
80        let kind = match def.kind() {
81            "function_declaration" | "generator_function_declaration" => {
82                if is_method_ts(&def) {
83                    SymbolKind::Method
84                } else {
85                    SymbolKind::Function
86                }
87            }
88            "method_definition" => SymbolKind::Method,
89            "class_declaration" => SymbolKind::Class,
90            "interface_declaration" => SymbolKind::Interface,
91            "type_alias_declaration" => SymbolKind::TypeAlias,
92            "enum_declaration" => SymbolKind::Enum,
93            "lexical_declaration" | "export_statement" | "variable_declarator" => {
94                SymbolKind::Variable
95            }
96            _ => continue,
97        };
98
99        let sig = signature_node(&def, source);
100        let id = SymbolId::new(&file.path, name, def.start_byte());
101
102        symbols.push(Symbol {
103            id,
104            name: name.to_string(),
105            kind,
106            file: file.path.clone(),
107            range: ByteRange::new(def.start_byte(), def.end_byte()),
108            signature: sig,
109            parent: None,
110        });
111    }
112
113    Ok(symbols)
114}
115
116fn is_method_ts(node: &tree_sitter::Node) -> bool {
117    node.parent().is_some_and(|p| p.kind() == "class_body")
118}
119
120fn extract_imports(root: &tree_sitter::Node, source: &[u8]) -> Vec<Import> {
121    let mut imports = Vec::new();
122    collect_imports(*root, source, &mut imports);
123    imports
124}
125
126fn collect_imports(node: tree_sitter::Node, source: &[u8], out: &mut Vec<Import>) {
127    match node.kind() {
128        "import_statement" => {
129            if let Ok(raw) = node.utf8_text(source) {
130                let (mod_path, items) = parse_ts_import(raw);
131                out.push(Import {
132                    raw: raw.to_string(),
133                    module_path: mod_path,
134                    items,
135                    range: ByteRange::new(node.start_byte(), node.end_byte()),
136                });
137            }
138            return;
139        }
140        "export_statement" => {
141            if let Ok(raw) = node.utf8_text(source) {
142                if raw.contains("from") {
143                    let (mod_path, items) = parse_ts_import(raw);
144                    out.push(Import {
145                        raw: raw.to_string(),
146                        module_path: mod_path,
147                        items,
148                        range: ByteRange::new(node.start_byte(), node.end_byte()),
149                    });
150                }
151            }
152            return;
153        }
154        _ => {}
155    }
156    for i in 0..node.child_count() {
157        if let Some(child) = node.child(i as u32) {
158            collect_imports(child, source, out);
159        }
160    }
161}
162
163fn parse_ts_import(raw: &str) -> (Vec<String>, Vec<String>) {
164    let trimmed = raw.trim_end_matches(';').trim();
165    let mut mod_path = Vec::new();
166    let mut items = Vec::new();
167
168    if let Some(from_idx) = trimmed.find(" from ") {
169        let before_from = &trimmed[..from_idx];
170        let after_from = &trimmed[from_idx + 6..];
171
172        let module_str = after_from
173            .trim_matches(|c: char| c == '\'' || c == '"')
174            .trim();
175        for part in module_str.split('/') {
176            let p = part.trim();
177            if !p.is_empty() {
178                mod_path.push(p.to_string());
179            }
180        }
181
182        let specifier_str: String = if let Some(rest) = before_from.strip_prefix("import type ") {
183            rest.trim().to_string()
184        } else if let Some(rest) = before_from.strip_prefix("import ") {
185            rest.trim().to_string()
186        } else if before_from.starts_with("export") {
187            let inner = before_from.trim_start_matches("export").trim();
188            inner.to_string()
189        } else {
190            before_from.to_string()
191        };
192
193        let specifier_str = specifier_str.trim();
194        let specifier_str = specifier_str.strip_prefix("type ").unwrap_or(specifier_str);
195        if specifier_str.starts_with('{') {
196            let inner = specifier_str
197                .trim_start_matches('{')
198                .trim_end_matches('}')
199                .trim();
200            for item in inner.split(',') {
201                let item = item.trim();
202                let item = item.strip_prefix("type ").unwrap_or(item);
203                let item = if let Some((a, _)) = item.split_once(" as ") {
204                    a.trim()
205                } else {
206                    item
207                };
208                if !item.is_empty() {
209                    items.push(item.to_string());
210                }
211            }
212        } else if !specifier_str.is_empty() && !specifier_str.starts_with('{') {
213            items.push(specifier_str.to_string());
214        }
215    } else if let Some(rest) = trimmed.strip_prefix("import ") {
216        for item in rest.split(',') {
217            let item = item.trim().trim_matches(|c: char| c == '\'' || c == '"');
218            if !item.is_empty() {
219                mod_path.push(item.to_string());
220            }
221        }
222    }
223
224    (mod_path, items)
225}
226
227fn signature_node(node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
228    let sig_end = node
229        .child_by_field_name("body")
230        .map(|b| b.start_byte())
231        .unwrap_or(node.end_byte());
232
233    let sig_bytes = &source[node.start_byte()..sig_end];
234    let sig = std::str::from_utf8(sig_bytes).unwrap_or("").to_string();
235    let sig = sig.trim().to_string();
236    if sig.is_empty() {
237        None
238    } else {
239        Some(sig)
240    }
241}
242
243fn chunk_kind_for_node(kind: &str) -> ChunkKind {
244    match kind {
245        "function_declaration" | "generator_function_declaration" | "method_definition" => {
246            ChunkKind::FunctionBody
247        }
248        "class_declaration"
249        | "interface_declaration"
250        | "type_alias_declaration"
251        | "enum_declaration" => ChunkKind::TypeDef,
252        _ => ChunkKind::TopLevel,
253    }
254}
255
256fn is_chunk_boundary_ts(kind: &str) -> bool {
257    matches!(
258        kind,
259        "function_declaration"
260            | "generator_function_declaration"
261            | "method_definition"
262            | "class_declaration"
263            | "interface_declaration"
264            | "type_alias_declaration"
265            | "enum_declaration"
266            | "lexical_declaration"
267            | "export_statement"
268    )
269}
270
271#[cfg(test)]
272#[allow(clippy::unwrap_used, clippy::expect_used)]
273mod tests {
274    use super::*;
275    use camino::Utf8PathBuf;
276    use std::time::UNIX_EPOCH;
277
278    fn make_file(path: &str) -> FileEntry {
279        FileEntry {
280            path: Utf8PathBuf::from(path),
281            hash: argyph_fs::Blake3Hash::from([0u8; 32]),
282            language: Some(Language::TypeScript),
283            size: 0,
284            modified: UNIX_EPOCH,
285        }
286    }
287
288    fn symbols_contain(symbols: &[Symbol], names: &[&str]) -> bool {
289        let got: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
290        names.iter().all(|n| got.contains(n))
291    }
292
293    #[test]
294    fn parse_ts_function() {
295        let source = "export function add(a: number, b: number): number {\n  return a + b;\n}\n";
296        let file = make_file("src/math.ts");
297        let result = parse_typescript(&file, source, 4096).unwrap();
298        assert_eq!(result.symbols.len(), 1);
299        assert_eq!(result.symbols[0].name, "add");
300        assert_eq!(result.symbols[0].kind, SymbolKind::Function);
301    }
302
303    #[test]
304    fn parse_ts_class_and_method() {
305        let source = r#"export class Greeter {
306    greeting: string;
307    greet(user: string): string {
308        return `${this.greeting}, ${user}`;
309    }
310}
311"#;
312        let file = make_file("src/greeter.ts");
313        let result = parse_typescript(&file, source, 4096).unwrap();
314        assert!(symbols_contain(&result.symbols, &["Greeter", "greet"]));
315    }
316
317    #[test]
318    fn parse_ts_interface_and_type() {
319        let source = r#"export interface User {
320    name: string;
321    age: number;
322}
323
324export type Role = "admin" | "user";
325"#;
326        let file = make_file("src/types.ts");
327        let result = parse_typescript(&file, source, 4096).unwrap();
328        assert!(symbols_contain(&result.symbols, &["User", "Role"]));
329    }
330
331    #[test]
332    fn parse_ts_import() {
333        let source = "import { add, multiply } from './math';\n\nfunction f() {}\n";
334        let file = make_file("src/index.ts");
335        let result = parse_typescript(&file, source, 4096).unwrap();
336        assert_eq!(result.imports.len(), 1);
337    }
338
339    #[test]
340    fn parse_ts_chunks_produced() {
341        let source = "function a() {}\nfunction b() {}\nclass C {}\n";
342        let file = make_file("src/app.ts");
343        let result = parse_typescript(&file, source, 4096).unwrap();
344        assert!(!result.chunks.is_empty());
345    }
346
347    #[test]
348    fn parse_ts_enum() {
349        let source = "export enum Status { Active, Inactive }\n";
350        let file = make_file("src/status.ts");
351        let result = parse_typescript(&file, source, 4096).unwrap();
352        assert_eq!(result.symbols.len(), 1);
353        assert_eq!(result.symbols[0].name, "Status");
354    }
355}