Skip to main content

cha_parser/
lib.rs

1mod c_imports;
2mod c_lang;
3mod cpp;
4mod golang;
5mod golang_imports;
6mod python;
7mod python_imports;
8mod rust_imports;
9mod rust_lang;
10mod switch_arms;
11mod type_aliases;
12mod type_ref;
13mod typescript;
14mod typescript_imports;
15
16pub use c_lang::{CParser, CppParser};
17pub use cha_core::{ClassInfo, CommentInfo, FunctionInfo, ImportInfo, SourceModel};
18pub use golang::GolangParser;
19pub use python::PythonParser;
20pub use rust_lang::RustParser;
21pub use typescript::{TsxParser, TypeScriptParser};
22
23use cha_core::SourceFile;
24
25/// Result of parsing a file, including the tree-sitter tree for downstream AST queries.
26pub struct ParseResult {
27    pub model: SourceModel,
28    pub tree: tree_sitter::Tree,
29    pub ts_language: tree_sitter::Language,
30}
31
32/// Trait for language-specific parsers.
33pub trait LanguageParser: Send + Sync {
34    fn language_name(&self) -> &str;
35    fn parse(&self, file: &SourceFile) -> Option<SourceModel>;
36    fn ts_language(&self) -> tree_sitter::Language;
37    fn parse_tree(&self, content: &str) -> Option<tree_sitter::Tree> {
38        let mut parser = tree_sitter::Parser::new();
39        parser.set_language(&self.ts_language()).ok()?;
40        parser.parse(content, None)
41    }
42}
43
44/// Detect language from file extension and parse, returning model + tree.
45pub fn parse_file_full(file: &SourceFile) -> Option<ParseResult> {
46    let ext = file.path.extension()?.to_str()?;
47    // cha:ignore switch_statement
48    let parser: Box<dyn LanguageParser> = match ext {
49        "tsx" => Box::new(TsxParser),
50        "ts" | "mts" | "cts" => Box::new(TypeScriptParser),
51        "rs" => Box::new(RustParser),
52        "py" => Box::new(PythonParser),
53        "go" => Box::new(GolangParser),
54        "h" if looks_like_cpp(&file.content) => Box::new(CppParser),
55        "c" | "h" => Box::new(CParser),
56        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Box::new(CppParser),
57        _ => return None,
58    };
59    let model = parser.parse(file)?;
60    let tree = parser.parse_tree(&file.content)?;
61    let ts_language = parser.ts_language();
62    Some(ParseResult {
63        model,
64        tree,
65        ts_language,
66    })
67}
68
69/// Detect language from file extension and parse (legacy API, no tree returned).
70pub fn parse_file(file: &SourceFile) -> Option<SourceModel> {
71    let ext = file.path.extension()?.to_str()?;
72    // cha:ignore switch_statement
73    let parser: Box<dyn LanguageParser> = match ext {
74        "tsx" => Box::new(TsxParser),
75        "ts" | "mts" | "cts" => Box::new(TypeScriptParser),
76        "rs" => Box::new(RustParser),
77        "py" => Box::new(PythonParser),
78        "go" => Box::new(GolangParser),
79        "h" if looks_like_cpp(&file.content) => Box::new(CppParser),
80        "c" | "h" => Box::new(CParser),
81        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Box::new(CppParser),
82        _ => return None,
83    };
84    parser.parse(file)
85}
86
87/// Sniff whether a `.h` file contains C++ constructs.
88fn looks_like_cpp(content: &str) -> bool {
89    content.lines().any(|line| {
90        let t = line.trim();
91        t.starts_with("class ")
92            || t.starts_with("namespace ")
93            || t.starts_with("template")
94            || t.starts_with("using ")
95            || t.contains("public:")
96            || t.contains("private:")
97            || t.contains("protected:")
98    })
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    use proptest::prelude::*;
105    use std::path::PathBuf;
106
107    #[test]
108    fn tsx_grammar_produces_jsx_nodes() {
109        let src = "function App() { return <div>hi</div>; }".to_string();
110        let file = SourceFile::new(PathBuf::from("foo.tsx"), src);
111        let result = parse_file_full(&file).expect("tsx parse");
112        // Walk the tree and confirm at least one node kind contains "jsx".
113        let mut found = false;
114        fn walk(node: tree_sitter::Node, found: &mut bool) {
115            if node.kind().contains("jsx") {
116                *found = true;
117            }
118            let mut c = node.walk();
119            for child in node.children(&mut c) {
120                walk(child, found);
121            }
122        }
123        walk(result.tree.root_node(), &mut found);
124        assert!(
125            found,
126            "TsxParser should produce jsx_* nodes via LANGUAGE_TSX"
127        );
128    }
129
130    #[test]
131    fn ts_grammar_does_not_produce_jsx_nodes() {
132        // Plain .ts goes through TypeScriptParser using LANGUAGE_TYPESCRIPT,
133        // which doesn't recognize JSX. Make sure the routing distinction
134        // holds (the file would be parsed but JSX appears as ERROR).
135        let src = "type X = number; function f(): X { return 1; }".to_string();
136        let file = SourceFile::new(PathBuf::from("foo.ts"), src);
137        let result = parse_file_full(&file).expect("ts parse");
138        let mut found = false;
139        fn walk(node: tree_sitter::Node, found: &mut bool) {
140            if node.kind().contains("jsx") {
141                *found = true;
142            }
143            let mut c = node.walk();
144            for child in node.children(&mut c) {
145                walk(child, found);
146            }
147        }
148        walk(result.tree.root_node(), &mut found);
149        assert!(!found, "TypeScriptParser should not produce jsx_* nodes");
150    }
151
152    proptest! {
153        #[test]
154        fn parse_rust_never_panics(content in ".*") {
155            let file = SourceFile::new(PathBuf::from("test.rs"), content);
156            let _ = parse_file(&file);
157        }
158
159        #[test]
160        fn parse_ts_never_panics(content in ".*") {
161            let file = SourceFile::new(PathBuf::from("test.ts"), content);
162            let _ = parse_file(&file);
163        }
164
165        #[test]
166        fn parse_unknown_ext_returns_none(content in ".*") {
167            let file = SourceFile::new(PathBuf::from("test.txt"), content);
168            prop_assert!(parse_file(&file).is_none());
169        }
170
171        #[test]
172        fn parse_model_invariants(content in ".{0,500}") {
173            let file = SourceFile::new(PathBuf::from("test.rs"), content.clone());
174            if let Some(model) = parse_file(&file) {
175                prop_assert_eq!(model.language, "rust");
176                prop_assert!(model.total_lines > 0 || content.is_empty());
177                for f in &model.functions {
178                    prop_assert!(f.start_line <= f.end_line);
179                    prop_assert!(f.line_count > 0);
180                    prop_assert!(!f.name.is_empty());
181                }
182                for c in &model.classes {
183                    prop_assert!(c.start_line <= c.end_line);
184                    prop_assert!(!c.name.is_empty());
185                }
186            }
187        }
188    }
189}