similarity_core/
language_parser.rs

1use crate::tree::TreeNode;
2use std::error::Error;
3use std::rc::Rc;
4
5/// Supported programming languages
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum Language {
8    JavaScript,
9    TypeScript,
10    Python,
11    Php,
12    Rust,
13    Go,
14    Java,
15    C,
16    Cpp,
17    CSharp,
18    Ruby,
19    Unknown,
20}
21
22impl Language {
23    pub fn from_extension(ext: &str) -> Option<Self> {
24        match ext.to_lowercase().as_str() {
25            "js" | "mjs" | "cjs" => Some(Language::JavaScript),
26            "ts" | "tsx" => Some(Language::TypeScript),
27            "py" => Some(Language::Python),
28            "php" => Some(Language::Php),
29            "rs" => Some(Language::Rust),
30            "go" => Some(Language::Go),
31            "java" => Some(Language::Java),
32            "c" | "h" => Some(Language::C),
33            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "c++" => Some(Language::Cpp),
34            "cs" => Some(Language::CSharp),
35            "rb" => Some(Language::Ruby),
36            _ => None,
37        }
38    }
39
40    pub fn from_filename(filename: &str) -> Option<Self> {
41        filename.split('.').next_back().and_then(Self::from_extension)
42    }
43}
44
45/// Generic function definition that works across languages
46#[derive(Debug, Clone)]
47pub struct GenericFunctionDef {
48    pub name: String,
49    pub start_line: u32,
50    pub end_line: u32,
51    pub body_start_line: u32,
52    pub body_end_line: u32,
53    pub parameters: Vec<String>,
54    pub is_method: bool,
55    pub class_name: Option<String>,
56    pub is_async: bool,
57    pub is_generator: bool,
58    pub decorators: Vec<String>,
59}
60
61/// Generic type definition that works across languages
62#[derive(Debug, Clone)]
63pub struct GenericTypeDef {
64    pub name: String,
65    pub kind: String, // "struct", "enum", "type_alias", etc.
66    pub start_line: u32,
67    pub end_line: u32,
68    pub fields: Vec<String>, // Fields for structs, variants for enums, etc.
69}
70
71#[derive(Debug, Clone, PartialEq)]
72pub enum TypeDefKind {
73    Class,
74    Interface,
75    TypeAlias,
76    Enum,
77    Struct,
78}
79
80/// Trait for language-specific parsers
81pub trait LanguageParser: Send + Sync {
82    /// Parse source code into a TreeNode structure
83    fn parse(
84        &mut self,
85        source: &str,
86        filename: &str,
87    ) -> Result<Rc<TreeNode>, Box<dyn Error + Send + Sync>>;
88
89    /// Extract function definitions from source code
90    fn extract_functions(
91        &mut self,
92        source: &str,
93        filename: &str,
94    ) -> Result<Vec<GenericFunctionDef>, Box<dyn Error + Send + Sync>>;
95
96    /// Extract type definitions from source code
97    fn extract_types(
98        &mut self,
99        source: &str,
100        filename: &str,
101    ) -> Result<Vec<GenericTypeDef>, Box<dyn Error + Send + Sync>>;
102
103    /// Get the language this parser handles
104    fn language(&self) -> Language;
105}
106
107// ParserFactory is removed - each language CLI now manages its own parser
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn test_language_detection() {
115        assert_eq!(Language::from_filename("test.js"), Some(Language::JavaScript));
116        assert_eq!(Language::from_filename("test.ts"), Some(Language::TypeScript));
117        assert_eq!(Language::from_filename("test.py"), Some(Language::Python));
118        assert_eq!(Language::from_filename("test.php"), Some(Language::Php));
119        assert_eq!(Language::from_filename("test.rs"), Some(Language::Rust));
120        assert_eq!(Language::from_filename("test.go"), Some(Language::Go));
121        assert_eq!(Language::from_filename("test.txt"), None);
122    }
123
124    #[test]
125    fn test_case_insensitive_extension() {
126        assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
127        assert_eq!(Language::from_extension("Py"), Some(Language::Python));
128    }
129}