git_iris/file_analyzers/
mod.rs

1use regex::Regex;
2use std::path::Path;
3
4use crate::{
5    context::{ProjectMetadata, StagedFile},
6    log_debug,
7};
8
9/// Trait for analyzing files and extracting relevant information
10pub trait FileAnalyzer: Send + Sync {
11    fn analyze(&self, file: &str, staged_file: &StagedFile) -> Vec<String>;
12    fn get_file_type(&self) -> &'static str;
13    fn extract_metadata(&self, file: &str, content: &str) -> ProjectMetadata;
14}
15
16/// Module for analyzing C files
17mod c;
18/// Module for analyzing C++ files
19mod cpp;
20/// Module for analyzing Gradle files
21mod gradle;
22/// Module for analyzing Java files
23mod java;
24/// Module for analyzing JavaScript files
25mod javascript;
26/// Module for analyzing JSON files
27mod json;
28/// Module for analyzing Kotlin files
29mod kotlin;
30/// Module for analyzing Markdown files
31mod markdown;
32/// Module for analyzing Python files
33mod python;
34/// Module for analyzing Rust files
35mod rust;
36/// Module for analyzing TOML files
37mod toml;
38/// Module for analyzing YAML files
39mod yaml;
40
41/// Module for analyzing generic text files
42mod text;
43
44/// Get the appropriate file analyzer based on the file extension
45pub fn get_analyzer(file: &str) -> Box<dyn FileAnalyzer + Send + Sync> {
46    let file_lower = file.to_lowercase();
47    let path = std::path::Path::new(&file_lower);
48
49    // Special cases for files with specific names
50    if file == "Makefile" {
51        return Box::new(c::CAnalyzer);
52    } else if file == "CMakeLists.txt" {
53        return Box::new(cpp::CppAnalyzer);
54    }
55
56    // Special cases for compound extensions
57    if file_lower.ends_with(".gradle") || file_lower.ends_with(".gradle.kts") {
58        return Box::new(gradle::GradleAnalyzer);
59    }
60
61    // Standard extension-based matching
62    if let Some(ext) = path.extension() {
63        if let Some(ext_str) = ext.to_str() {
64            let ext_lower = ext_str.to_lowercase();
65            match ext_lower.as_str() {
66                "c" => return Box::new(c::CAnalyzer),
67                "cpp" | "cc" | "cxx" => return Box::new(cpp::CppAnalyzer),
68                "rs" => return Box::new(rust::RustAnalyzer),
69                "py" => return Box::new(python::PythonAnalyzer),
70                "js" | "jsx" | "ts" | "tsx" => return Box::new(javascript::JavaScriptAnalyzer),
71                "java" => return Box::new(java::JavaAnalyzer),
72                "kt" | "kts" => return Box::new(kotlin::KotlinAnalyzer),
73                "json" => return Box::new(json::JsonAnalyzer),
74                "md" | "markdown" => return Box::new(markdown::MarkdownAnalyzer),
75                "yaml" | "yml" => return Box::new(yaml::YamlAnalyzer),
76                "toml" => return Box::new(toml::TomlAnalyzer),
77                // Text-like extensions should use the generic text analyzer
78                "txt" | "cfg" | "ini" | "properties" | "env" | "conf" | "config" | "xml"
79                | "htm" | "html" | "css" | "scss" | "sass" | "less" | "sql" | "sh" | "bash"
80                | "zsh" | "bat" | "cmd" | "ps1" | "dockerfile" | "editorconfig" | "gitignore"
81                | "gitattributes" | "nginx" | "service" => {
82                    return Box::new(text::GenericTextAnalyzer);
83                }
84                _ => {
85                    // Try to determine if this is likely a text file
86                    if is_likely_text_file(file) {
87                        return Box::new(text::GenericTextAnalyzer);
88                    }
89                }
90            }
91        }
92    } else {
93        // Files without extension - check if they're likely text files
94        if is_likely_text_file(file) {
95            return Box::new(text::GenericTextAnalyzer);
96        }
97    }
98
99    // Fall back to default analyzer for binary or unknown formats
100    Box::new(DefaultAnalyzer)
101}
102
103/// Heuristic to determine if a file is likely text-based
104fn is_likely_text_file(file: &str) -> bool {
105    let file_name = std::path::Path::new(file).file_name();
106    if let Some(name) = file_name {
107        if let Some(name_str) = name.to_str() {
108            // Common configuration files without extensions
109            let config_file_names = [
110                "dockerfile",
111                ".gitignore",
112                ".gitattributes",
113                ".env",
114                "makefile",
115                "readme",
116                "license",
117                "authors",
118                "contributors",
119                "changelog",
120                "config",
121                "codeowners",
122                ".dockerignore",
123                ".npmrc",
124                ".yarnrc",
125                ".eslintrc",
126                ".prettierrc",
127                ".babelrc",
128                ".stylelintrc",
129            ];
130
131            for name in config_file_names {
132                if name_str.to_lowercase() == name.to_lowercase() {
133                    return true;
134                }
135            }
136        }
137    }
138
139    false
140}
141
142/// Default analyzer for unsupported file types (likely binary)
143struct DefaultAnalyzer;
144
145impl FileAnalyzer for DefaultAnalyzer {
146    fn analyze(&self, _file: &str, _staged_file: &StagedFile) -> Vec<String> {
147        vec!["Unable to analyze non-text or binary file".to_string()]
148    }
149
150    fn get_file_type(&self) -> &'static str {
151        "Unknown or binary file"
152    }
153
154    fn extract_metadata(&self, _file: &str, _content: &str) -> ProjectMetadata {
155        ProjectMetadata {
156            language: Some("Binary/Unknown".to_string()),
157            ..Default::default()
158        }
159    }
160}
161
162/// Checks if a file should be excluded from analysis.
163///
164/// # Arguments
165///
166/// * `path` - The path of the file to check.
167///
168/// # Returns
169///
170/// A boolean indicating whether the file should be excluded.
171pub fn should_exclude_file(path: &str) -> bool {
172    log_debug!("Checking if file should be excluded: {}", path);
173    let exclude_patterns = vec![
174        (String::from(r"\.git"), false),
175        (String::from(r"\.svn"), false),
176        (String::from(r"\.hg"), false),
177        (String::from(r"\.DS_Store"), false),
178        (String::from(r"node_modules"), false),
179        (String::from(r"target"), false),
180        (String::from(r"build"), false),
181        (String::from(r"dist"), false),
182        (String::from(r"\.vscode"), false),
183        (String::from(r"\.idea"), false),
184        (String::from(r"\.vs"), false),
185        (String::from(r"package-lock\.json$"), true),
186        (String::from(r"\.lock$"), true),
187        (String::from(r"\.log$"), true),
188        (String::from(r"\.tmp$"), true),
189        (String::from(r"\.temp$"), true),
190        (String::from(r"\.swp$"), true),
191        (String::from(r"\.min\.js$"), true),
192    ];
193
194    let path = Path::new(path);
195
196    for (pattern, is_extension) in exclude_patterns {
197        let re = match Regex::new(&pattern) {
198            Ok(re) => re,
199            Err(e) => {
200                log_debug!("Failed to compile regex '{}': {}", pattern, e);
201                continue;
202            }
203        };
204
205        if is_extension {
206            if let Some(file_name) = path.file_name() {
207                if let Some(file_name_str) = file_name.to_str() {
208                    if re.is_match(file_name_str) {
209                        log_debug!("File excluded: {}", path.display());
210                        return true;
211                    }
212                }
213            }
214        } else if let Some(path_str) = path.to_str() {
215            if re.is_match(path_str) {
216                log_debug!("File excluded: {}", path.display());
217                return true;
218            }
219        }
220    }
221    log_debug!("File not excluded: {}", path.display());
222    false
223}