Skip to main content

cha_parser/
lib.rs

1mod c_imports;
2mod c_lang;
3mod cpp;
4mod golang;
5mod golang_imports;
6mod python;
7mod python_imports;
8mod rust_imports;
9mod rust_lang;
10mod switch_arms;
11mod type_aliases;
12mod type_ref;
13mod typescript;
14mod typescript_imports;
15
16pub use c_lang::{CParser, CppParser};
17pub use cha_core::{ClassInfo, CommentInfo, FunctionInfo, ImportInfo, SourceModel};
18pub use golang::GolangParser;
19pub use python::PythonParser;
20pub use rust_lang::RustParser;
21pub use typescript::TypeScriptParser;
22
23use cha_core::SourceFile;
24
25/// Result of parsing a file, including the tree-sitter tree for downstream AST queries.
26pub struct ParseResult {
27    pub model: SourceModel,
28    pub tree: tree_sitter::Tree,
29    pub ts_language: tree_sitter::Language,
30}
31
32/// Trait for language-specific parsers.
33pub trait LanguageParser: Send + Sync {
34    fn language_name(&self) -> &str;
35    fn parse(&self, file: &SourceFile) -> Option<SourceModel>;
36    fn ts_language(&self) -> tree_sitter::Language;
37    fn parse_tree(&self, content: &str) -> Option<tree_sitter::Tree> {
38        let mut parser = tree_sitter::Parser::new();
39        parser.set_language(&self.ts_language()).ok()?;
40        parser.parse(content, None)
41    }
42}
43
44/// Detect language from file extension and parse, returning model + tree.
45pub fn parse_file_full(file: &SourceFile) -> Option<ParseResult> {
46    let ext = file.path.extension()?.to_str()?;
47    let parser: Box<dyn LanguageParser> = match ext {
48        "ts" | "tsx" => Box::new(TypeScriptParser),
49        "rs" => Box::new(RustParser),
50        "py" => Box::new(PythonParser),
51        "go" => Box::new(GolangParser),
52        "h" if looks_like_cpp(&file.content) => Box::new(CppParser),
53        "c" | "h" => Box::new(CParser),
54        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Box::new(CppParser),
55        _ => return None,
56    };
57    let model = parser.parse(file)?;
58    let tree = parser.parse_tree(&file.content)?;
59    let ts_language = parser.ts_language();
60    Some(ParseResult {
61        model,
62        tree,
63        ts_language,
64    })
65}
66
67/// Detect language from file extension and parse (legacy API, no tree returned).
68pub fn parse_file(file: &SourceFile) -> Option<SourceModel> {
69    let ext = file.path.extension()?.to_str()?;
70    let parser: Box<dyn LanguageParser> = match ext {
71        "ts" | "tsx" => Box::new(TypeScriptParser),
72        "rs" => Box::new(RustParser),
73        "py" => Box::new(PythonParser),
74        "go" => Box::new(GolangParser),
75        "h" if looks_like_cpp(&file.content) => Box::new(CppParser),
76        "c" | "h" => Box::new(CParser),
77        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Box::new(CppParser),
78        _ => return None,
79    };
80    parser.parse(file)
81}
82
83/// Sniff whether a `.h` file contains C++ constructs.
84fn looks_like_cpp(content: &str) -> bool {
85    content.lines().any(|line| {
86        let t = line.trim();
87        t.starts_with("class ")
88            || t.starts_with("namespace ")
89            || t.starts_with("template")
90            || t.starts_with("using ")
91            || t.contains("public:")
92            || t.contains("private:")
93            || t.contains("protected:")
94    })
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use proptest::prelude::*;
101    use std::path::PathBuf;
102
103    proptest! {
104        #[test]
105        fn parse_rust_never_panics(content in ".*") {
106            let file = SourceFile::new(PathBuf::from("test.rs"), content);
107            let _ = parse_file(&file);
108        }
109
110        #[test]
111        fn parse_ts_never_panics(content in ".*") {
112            let file = SourceFile::new(PathBuf::from("test.ts"), content);
113            let _ = parse_file(&file);
114        }
115
116        #[test]
117        fn parse_unknown_ext_returns_none(content in ".*") {
118            let file = SourceFile::new(PathBuf::from("test.txt"), content);
119            prop_assert!(parse_file(&file).is_none());
120        }
121
122        #[test]
123        fn parse_model_invariants(content in ".{0,500}") {
124            let file = SourceFile::new(PathBuf::from("test.rs"), content.clone());
125            if let Some(model) = parse_file(&file) {
126                prop_assert_eq!(model.language, "rust");
127                prop_assert!(model.total_lines > 0 || content.is_empty());
128                for f in &model.functions {
129                    prop_assert!(f.start_line <= f.end_line);
130                    prop_assert!(f.line_count > 0);
131                    prop_assert!(!f.name.is_empty());
132                }
133                for c in &model.classes {
134                    prop_assert!(c.start_line <= c.end_line);
135                    prop_assert!(!c.name.is_empty());
136                }
137            }
138        }
139    }
140}