gitai/file_analyzers/
mod.rs

1use regex::Regex;
2use std::path::Path;
3
4use crate::{
5    core::context::{ProjectMetadata, StagedFile},
6    debug,
7};
8
9/// Trait for analyzing files and extracting relevant information
10pub trait FileAnalyzer: Send + Sync {
11    fn analyze(&self, file: &str, staged_file: &StagedFile) -> Vec<String>;
12    fn get_file_type(&self) -> &'static str;
13    fn extract_metadata(&self, file: &str, content: &str) -> ProjectMetadata;
14}
15
16/// Module for analyzing C files
17mod c;
18/// Module for analyzing C++ files
19mod cpp;
20/// Module for analyzing Gradle files
21mod gradle;
22/// Module for analyzing Java files
23mod java;
24/// Module for analyzing JavaScript files
25mod javascript;
26/// Module for analyzing JSON files
27mod json;
28/// Module for analyzing Kotlin files
29mod kotlin;
30/// Module for analyzing Markdown files
31mod markdown;
32/// Module for analyzing Python files
33mod python;
34/// Module for analyzing Rust files
35mod rust;
36/// Module for analyzing TOML files
37mod toml;
38/// Module for analyzing YAML files
39mod yaml;
40
41/// Module for analyzing generic text files
42mod text;
43
44/// Get the appropriate file analyzer based on the file extension
45pub fn get_analyzer(file: &str) -> Box<dyn FileAnalyzer + Send + Sync> {
46    let file_lower = file.to_lowercase();
47    let path = std::path::Path::new(&file_lower);
48
49    // Special cases for files with specific names
50    if file == "Makefile" {
51        return Box::new(c::CAnalyzer);
52    } else if file == "CMakeLists.txt" {
53        return Box::new(cpp::CppAnalyzer);
54    }
55
56    // Special cases for compound extensions
57    if file_lower.ends_with(".gradle") || file_lower.ends_with(".gradle.kts") {
58        return Box::new(gradle::GradleAnalyzer);
59    }
60
61    // Standard extension-based matching
62    if let Some(ext) = path.extension() {
63        if let Some(ext_str) = ext.to_str() {
64            let ext_lower = ext_str.to_lowercase();
65            match ext_lower.as_str() {
66                "c" => return Box::new(c::CAnalyzer),
67                "cpp" | "cc" | "cxx" => return Box::new(cpp::CppAnalyzer),
68                "rs" => return Box::new(rust::RustAnalyzer),
69                "py" => return Box::new(python::PythonAnalyzer),
70                "js" | "jsx" | "ts" | "tsx" => return Box::new(javascript::JavaScriptAnalyzer),
71                "java" => return Box::new(java::JavaAnalyzer),
72                "kt" | "kts" => return Box::new(kotlin::KotlinAnalyzer),
73                "json" => return Box::new(json::JsonAnalyzer),
74                "md" | "markdown" => return Box::new(markdown::MarkdownAnalyzer),
75                "yaml" | "yml" => return Box::new(yaml::YamlAnalyzer),
76                "toml" => return Box::new(toml::TomlAnalyzer),
77                // Text-like extensions should use the generic text analyzer
78                "txt" | "cfg" | "ini" | "properties" | "env" | "conf" | "config" | "xml"
79                | "htm" | "html" | "css" | "scss" | "sass" | "less" | "sql" | "sh" | "bash"
80                | "zsh" | "bat" | "cmd" | "ps1" | "dockerfile" | "editorconfig" | "gitignore"
81                | "gitattributes" | "nginx" | "service" => {
82                    return Box::new(text::GenericTextAnalyzer);
83                }
84                _ => {
85                    // Try to determine if this is likely a text file
86                    if is_likely_text_file(file) {
87                        return Box::new(text::GenericTextAnalyzer);
88                    }
89                }
90            }
91        }
92    } else {
93        // Files without extension - check if they're likely text files
94        if is_likely_text_file(file) {
95            return Box::new(text::GenericTextAnalyzer);
96        }
97    }
98
99    // Fall back to default analyzer for binary or unknown formats
100    Box::new(DefaultAnalyzer)
101}
102
103/// Heuristic to determine if a file is likely text-based
104fn is_likely_text_file(file: &str) -> bool {
105    let file_name = std::path::Path::new(file).file_name();
106    if let Some(name) = file_name
107        && let Some(name_str) = name.to_str()
108    {
109        // Common configuration files without extensions
110        let config_file_names = [
111            "dockerfile",
112            ".gitignore",
113            ".gitattributes",
114            ".env",
115            "makefile",
116            "readme",
117            "license",
118            "authors",
119            "contributors",
120            "changelog",
121            "config",
122            "codeowners",
123            ".dockerignore",
124            ".npmrc",
125            ".yarnrc",
126            ".eslintrc",
127            ".prettierrc",
128            ".babelrc",
129            ".stylelintrc",
130        ];
131
132        for name in config_file_names {
133            if name_str.to_lowercase() == name.to_lowercase() {
134                return true;
135            }
136        }
137    }
138
139    false
140}
141
142/// Default analyzer for unsupported file types (likely binary)
143struct DefaultAnalyzer;
144
145impl FileAnalyzer for DefaultAnalyzer {
146    fn analyze(&self, _file: &str, _staged_file: &StagedFile) -> Vec<String> {
147        vec!["Unable to analyze non-text or binary file".to_string()]
148    }
149
150    fn get_file_type(&self) -> &'static str {
151        "Unknown or binary file"
152    }
153
154    fn extract_metadata(&self, _file: &str, _content: &str) -> ProjectMetadata {
155        ProjectMetadata {
156            language: Some("Binary/Unknown".to_string()),
157            ..Default::default()
158        }
159    }
160}
161
162/// Checks if a file should be excluded from analysis.
163///
164/// # Arguments
165///
166/// * `path` - The path of the file to check.
167///
168/// # Returns
169///
170/// A boolean indicating whether the file should be excluded.
171pub fn should_exclude_file(path: &str) -> bool {
172    debug!("Checking if file should be excluded: {}", path);
173    let exclude_patterns = vec![
174        (String::from(r"(^|/)\.git(/|$)"), false), // Only exclude .git directory, not .github
175        (String::from(r"\.svn"), false),
176        (String::from(r"\.hg"), false),
177        (String::from(r"\.DS_Store"), false),
178        (String::from(r"node_modules"), false),
179        (String::from(r"target"), false),
180        (String::from(r"build"), false),
181        (String::from(r"dist"), false),
182        (String::from(r"\.vscode"), false),
183        (String::from(r"\.idea"), false),
184        (String::from(r"\.vs"), false),
185        (String::from(r"package-lock\.json$"), true),
186        (String::from(r"\.lock$"), true),
187        (String::from(r"\.log$"), true),
188        (String::from(r"\.tmp$"), true),
189        (String::from(r"\.temp$"), true),
190        (String::from(r"\.swp$"), true),
191        (String::from(r"\.min\.js$"), true),
192    ];
193
194    let path = Path::new(path);
195
196    for (pattern, is_extension) in exclude_patterns {
197        let re = match Regex::new(&pattern) {
198            Ok(re) => re,
199            Err(e) => {
200                debug!("Failed to compile regex '{}': {}", pattern, e);
201                continue;
202            }
203        };
204
205        if is_extension {
206            if let Some(file_name) = path.file_name()
207                && let Some(file_name_str) = file_name.to_str()
208                && re.is_match(file_name_str)
209            {
210                debug!("File excluded: {}", path.display());
211                return true;
212            }
213        } else if let Some(path_str) = path.to_str()
214            && re.is_match(path_str)
215        {
216            debug!("File excluded: {}", path.display());
217            return true;
218        }
219    }
220    debug!("File not excluded: {}", path.display());
221    false
222}