ricecoder_research/
standards_detector.rs

1//! Standards detector for extracting naming conventions and coding standards
2
3use crate::error::ResearchError;
4use crate::models::{
5    CaseStyle, DocFormat, DocumentationStyle, FormattingStyle, ImportGroup, ImportOrganization,
6    IndentType, NamingConventions, StandardsProfile,
7};
8use regex::Regex;
9use std::collections::HashMap;
10use std::path::Path;
11
12/// Detects naming conventions and coding standards from a codebase
13#[derive(Debug)]
14pub struct StandardsDetector;
15
16impl StandardsDetector {
17    /// Create a new StandardsDetector
18    pub fn new() -> Self {
19        StandardsDetector
20    }
21
22    /// Detect standards and conventions from code files
23    ///
24    /// Analyzes code files to extract naming conventions, formatting styles,
25    /// import organization, and documentation styles.
26    ///
27    /// # Arguments
28    ///
29    /// * `files` - Vector of file paths to analyze
30    ///
31    /// # Returns
32    ///
33    /// A `StandardsProfile` containing detected standards, or a `ResearchError`
34    pub fn detect(&self, files: &[&Path]) -> Result<StandardsProfile, ResearchError> {
35        if files.is_empty() {
36            return Ok(StandardsProfile::default());
37        }
38
39        // Read and analyze all files
40        let mut file_contents = Vec::new();
41        for file_path in files {
42            match std::fs::read_to_string(file_path) {
43                Ok(content) => file_contents.push(content),
44                Err(_) => {
45                    // Skip files that can't be read
46                    continue;
47                }
48            }
49        }
50
51        if file_contents.is_empty() {
52            return Ok(StandardsProfile::default());
53        }
54
55        let combined_content = file_contents.join("\n");
56
57        // Detect each aspect of standards
58        let naming_conventions = self.detect_naming_conventions(&combined_content)?;
59        let formatting_style = self.detect_formatting_style(&combined_content)?;
60        let import_organization = self.detect_import_organization(&combined_content)?;
61        let documentation_style = self.detect_documentation_style(&combined_content)?;
62
63        Ok(StandardsProfile {
64            naming_conventions,
65            formatting_style,
66            import_organization,
67            documentation_style,
68        })
69    }
70
71    /// Detect naming conventions from code
72    fn detect_naming_conventions(&self, content: &str) -> Result<NamingConventions, ResearchError> {
73        let naming_analyzer = NamingAnalyzer::new();
74        naming_analyzer.analyze(content)
75    }
76
77    /// Detect formatting style from code
78    fn detect_formatting_style(&self, content: &str) -> Result<FormattingStyle, ResearchError> {
79        let formatting_analyzer = FormattingAnalyzer::new();
80        formatting_analyzer.analyze(content)
81    }
82
83    /// Detect import organization from code
84    fn detect_import_organization(
85        &self,
86        content: &str,
87    ) -> Result<ImportOrganization, ResearchError> {
88        let import_analyzer = ImportAnalyzer::new();
89        import_analyzer.analyze(content)
90    }
91
92    /// Detect documentation style from code
93    fn detect_documentation_style(
94        &self,
95        content: &str,
96    ) -> Result<DocumentationStyle, ResearchError> {
97        let doc_analyzer = DocumentationAnalyzer::new();
98        doc_analyzer.analyze(content)
99    }
100}
101
102impl Default for StandardsDetector {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108// ============================================================================
109// Naming Analyzer
110// ============================================================================
111
112/// Analyzes naming conventions in code
113#[derive(Debug)]
114struct NamingAnalyzer;
115
116impl NamingAnalyzer {
117    fn new() -> Self {
118        NamingAnalyzer
119    }
120
121    fn analyze(&self, content: &str) -> Result<NamingConventions, ResearchError> {
122        let function_case = self.detect_function_naming(content);
123        let variable_case = self.detect_variable_naming(content);
124        let class_case = self.detect_class_naming(content);
125        let constant_case = self.detect_constant_naming(content);
126
127        Ok(NamingConventions {
128            function_case,
129            variable_case,
130            class_case,
131            constant_case,
132        })
133    }
134
135    fn detect_function_naming(&self, content: &str) -> CaseStyle {
136        // Detect function naming patterns
137        let snake_case_count = self.count_pattern(content, r"fn\s+([a-z_][a-z0-9_]*)\s*\(");
138        let camel_case_count = self.count_pattern(content, r"function\s+([a-z][a-zA-Z0-9]*)\s*\(");
139        let pascal_case_count = self.count_pattern(content, r"def\s+([A-Z][a-zA-Z0-9]*)\s*\(");
140
141        self.determine_dominant_case(snake_case_count, camel_case_count, pascal_case_count)
142    }
143
144    fn detect_variable_naming(&self, content: &str) -> CaseStyle {
145        // Detect variable naming patterns
146        let snake_case_count = self.count_pattern(content, r"let\s+([a-z_][a-z0-9_]*)\s*=");
147        let camel_case_count = self.count_pattern(content, r"const\s+([a-z][a-zA-Z0-9]*)\s*=");
148        let pascal_case_count = self.count_pattern(content, r"var\s+([A-Z][a-zA-Z0-9]*)\s*=");
149
150        self.determine_dominant_case(snake_case_count, camel_case_count, pascal_case_count)
151    }
152
153    fn detect_class_naming(&self, content: &str) -> CaseStyle {
154        // Detect class/struct naming patterns
155        let pascal_case_count = self.count_pattern(
156            content,
157            r"(?:struct|class|interface)\s+([A-Z][a-zA-Z0-9]*)\s*[{<]",
158        );
159        let snake_case_count = self.count_pattern(
160            content,
161            r"(?:struct|class|interface)\s+([a-z_][a-z0-9_]*)\s*[{<]",
162        );
163
164        if pascal_case_count > snake_case_count {
165            CaseStyle::PascalCase
166        } else if snake_case_count > 0 {
167            CaseStyle::SnakeCase
168        } else {
169            CaseStyle::PascalCase // Default for classes
170        }
171    }
172
173    fn detect_constant_naming(&self, content: &str) -> CaseStyle {
174        // Detect constant naming patterns
175        let upper_case_count = self.count_pattern(content, r"const\s+([A-Z_][A-Z0-9_]*)\s*=");
176        let camel_case_count = self.count_pattern(content, r"const\s+([a-z][a-zA-Z0-9]*)\s*=");
177
178        if upper_case_count > camel_case_count {
179            CaseStyle::UpperCase
180        } else if camel_case_count > 0 {
181            CaseStyle::CamelCase
182        } else {
183            CaseStyle::UpperCase // Default for constants
184        }
185    }
186
187    fn count_pattern(&self, content: &str, pattern: &str) -> usize {
188        if let Ok(re) = Regex::new(pattern) {
189            re.find_iter(content).count()
190        } else {
191            0
192        }
193    }
194
195    fn determine_dominant_case(
196        &self,
197        snake_case: usize,
198        camel_case: usize,
199        pascal_case: usize,
200    ) -> CaseStyle {
201        if snake_case > camel_case && snake_case > pascal_case && snake_case > 0 {
202            CaseStyle::SnakeCase
203        } else if camel_case > pascal_case && camel_case > 0 {
204            CaseStyle::CamelCase
205        } else if pascal_case > 0 {
206            CaseStyle::PascalCase
207        } else {
208            CaseStyle::Mixed
209        }
210    }
211}
212
213// ============================================================================
214// Formatting Analyzer
215// ============================================================================
216
217/// Analyzes formatting style in code
218#[derive(Debug)]
219struct FormattingAnalyzer;
220
221impl FormattingAnalyzer {
222    fn new() -> Self {
223        FormattingAnalyzer
224    }
225
226    fn analyze(&self, content: &str) -> Result<FormattingStyle, ResearchError> {
227        let indent_type = self.detect_indent_type(content);
228        let indent_size = self.detect_indent_size(content, indent_type);
229        let line_length = self.detect_line_length(content);
230
231        Ok(FormattingStyle {
232            indent_size,
233            indent_type,
234            line_length,
235        })
236    }
237
238    fn detect_indent_type(&self, content: &str) -> IndentType {
239        let mut tab_count = 0;
240        let mut space_count = 0;
241
242        for line in content.lines() {
243            if line.starts_with('\t') {
244                tab_count += 1;
245            } else if line.starts_with(' ') {
246                space_count += 1;
247            }
248        }
249
250        if tab_count > space_count {
251            IndentType::Tabs
252        } else {
253            IndentType::Spaces
254        }
255    }
256
257    fn detect_indent_size(&self, content: &str, indent_type: IndentType) -> usize {
258        let mut indent_sizes = HashMap::new();
259
260        for line in content.lines() {
261            if indent_type == IndentType::Spaces {
262                if let Some(spaces) = self.count_leading_spaces(line) {
263                    if spaces > 0 && spaces <= 16 {
264                        *indent_sizes.entry(spaces).or_insert(0) += 1;
265                    }
266                }
267            }
268        }
269
270        // Find the most common indent size
271        indent_sizes
272            .iter()
273            .max_by_key(|&(_, count)| count)
274            .map(|(&size, _)| size)
275            .unwrap_or(4)
276    }
277
278    fn count_leading_spaces(&self, line: &str) -> Option<usize> {
279        let mut count = 0;
280        for ch in line.chars() {
281            if ch == ' ' {
282                count += 1;
283            } else {
284                break;
285            }
286        }
287        if count > 0 {
288            Some(count)
289        } else {
290            None
291        }
292    }
293
294    fn detect_line_length(&self, content: &str) -> usize {
295        let mut line_lengths = Vec::new();
296
297        for line in content.lines() {
298            line_lengths.push(line.len());
299        }
300
301        if line_lengths.is_empty() {
302            100 // Default
303        } else {
304            line_lengths.sort_unstable();
305            // Use 75th percentile as the preferred line length
306            let index = (line_lengths.len() * 75) / 100;
307            line_lengths[index].clamp(80, 120)
308        }
309    }
310}
311
312// ============================================================================
313// Import Analyzer
314// ============================================================================
315
316/// Analyzes import organization in code
317#[derive(Debug)]
318struct ImportAnalyzer;
319
320impl ImportAnalyzer {
321    fn new() -> Self {
322        ImportAnalyzer
323    }
324
325    fn analyze(&self, content: &str) -> Result<ImportOrganization, ResearchError> {
326        let order = self.detect_import_order(content);
327        let sort_within_group = self.detect_sort_within_group(content);
328
329        Ok(ImportOrganization {
330            order,
331            sort_within_group,
332        })
333    }
334
335    fn detect_import_order(&self, content: &str) -> Vec<ImportGroup> {
336        let mut groups_seen = Vec::new();
337
338        for line in content.lines() {
339            let line = line.trim();
340
341            if line.starts_with("use std::") || line.starts_with("import java.") {
342                if !groups_seen.contains(&ImportGroup::Standard) {
343                    groups_seen.push(ImportGroup::Standard);
344                }
345            } else if line.starts_with("use ") || line.starts_with("import ") {
346                // Check if it's external or internal
347                if self.is_external_import(line) {
348                    if !groups_seen.contains(&ImportGroup::External) {
349                        groups_seen.push(ImportGroup::External);
350                    }
351                } else if !groups_seen.contains(&ImportGroup::Internal) {
352                    groups_seen.push(ImportGroup::Internal);
353                }
354            }
355        }
356
357        if groups_seen.is_empty() {
358            vec![
359                ImportGroup::Standard,
360                ImportGroup::External,
361                ImportGroup::Internal,
362            ]
363        } else {
364            groups_seen
365        }
366    }
367
368    fn is_external_import(&self, line: &str) -> bool {
369        // Simple heuristic: external imports typically don't start with relative paths
370        !line.contains("./") && !line.contains("../") && !line.contains("crate::")
371    }
372
373    fn detect_sort_within_group(&self, content: &str) -> bool {
374        let mut import_groups = Vec::new();
375        let mut current_group = Vec::new();
376
377        for line in content.lines() {
378            let line = line.trim();
379            if line.starts_with("use ") || line.starts_with("import ") {
380                current_group.push(line.to_string());
381            } else if !current_group.is_empty() {
382                import_groups.push(current_group.clone());
383                current_group.clear();
384            }
385        }
386
387        if !current_group.is_empty() {
388            import_groups.push(current_group);
389        }
390
391        // Check if imports within groups are sorted
392        for group in import_groups {
393            if group.len() > 1 {
394                let mut sorted = group.clone();
395                sorted.sort();
396                if sorted == group {
397                    return true;
398                }
399            }
400        }
401
402        false
403    }
404}
405
406// ============================================================================
407// Documentation Analyzer
408// ============================================================================
409
410/// Analyzes documentation style in code
411#[derive(Debug)]
412struct DocumentationAnalyzer;
413
414impl DocumentationAnalyzer {
415    fn new() -> Self {
416        DocumentationAnalyzer
417    }
418
419    fn analyze(&self, content: &str) -> Result<DocumentationStyle, ResearchError> {
420        let format = self.detect_doc_format(content);
421        let required_for_public = self.detect_required_for_public(content);
422
423        Ok(DocumentationStyle {
424            format,
425            required_for_public,
426        })
427    }
428
429    fn detect_doc_format(&self, content: &str) -> DocFormat {
430        let rustdoc_count = content.matches("///").count();
431        let javadoc_count = content.matches("/**").count();
432        let jsdoc_count = content.matches("/**").count();
433        let python_doc_count = content.matches("\"\"\"").count();
434
435        if rustdoc_count > javadoc_count
436            && rustdoc_count > jsdoc_count
437            && rustdoc_count > python_doc_count
438        {
439            DocFormat::RustDoc
440        } else if javadoc_count > jsdoc_count && javadoc_count > python_doc_count {
441            DocFormat::JavaDoc
442        } else if jsdoc_count > python_doc_count {
443            DocFormat::JSDoc
444        } else if python_doc_count > 0 {
445            DocFormat::PythonDoc
446        } else {
447            DocFormat::RustDoc // Default
448        }
449    }
450
451    fn detect_required_for_public(&self, content: &str) -> bool {
452        // Count public items with documentation
453        let public_items = self.count_public_items(content);
454        let documented_items = self.count_documented_items(content);
455
456        if public_items == 0 {
457            false
458        } else {
459            // If more than 50% of public items are documented, assume it's required
460            documented_items as f32 / public_items as f32 > 0.5
461        }
462    }
463
464    fn count_public_items(&self, content: &str) -> usize {
465        let public_fn = content.matches("pub fn").count();
466        let public_struct = content.matches("pub struct").count();
467        let public_enum = content.matches("pub enum").count();
468        let public_trait = content.matches("pub trait").count();
469
470        public_fn + public_struct + public_enum + public_trait
471    }
472
473    fn count_documented_items(&self, content: &str) -> usize {
474        let mut count = 0;
475        let lines: Vec<&str> = content.lines().collect();
476
477        for i in 0..lines.len() {
478            let line = lines[i];
479            if (line.contains("///") || line.contains("/**")) && i + 1 < lines.len() {
480                let next_line = lines[i + 1];
481                if next_line.contains("pub ") {
482                    count += 1;
483                }
484            }
485        }
486
487        count
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn test_standards_detector_creation() {
497        let detector = StandardsDetector::new();
498        assert_eq!(format!("{:?}", detector), "StandardsDetector");
499    }
500
501    #[test]
502    fn test_empty_files_returns_default() {
503        let detector = StandardsDetector::new();
504        let result = detector.detect(&[]);
505        assert!(result.is_ok());
506    }
507
508    // ========================================================================
509    // Naming Convention Tests
510    // ========================================================================
511
512    #[test]
513    fn test_naming_analyzer_snake_case_detection() {
514        let analyzer = NamingAnalyzer::new();
515        let content = "fn my_function() {}\nfn another_function() {}";
516        let result = analyzer.analyze(content).unwrap();
517        assert_eq!(result.function_case, CaseStyle::SnakeCase);
518    }
519
520    #[test]
521    fn test_naming_analyzer_pascal_case_class_detection() {
522        let analyzer = NamingAnalyzer::new();
523        let content = "struct MyStruct {}\nstruct AnotherStruct {}";
524        let result = analyzer.analyze(content).unwrap();
525        assert_eq!(result.class_case, CaseStyle::PascalCase);
526    }
527
528    #[test]
529    fn test_naming_analyzer_upper_case_constant_detection() {
530        let analyzer = NamingAnalyzer::new();
531        let content = "const MY_CONSTANT: i32 = 42;\nconst ANOTHER_CONSTANT: i32 = 100;";
532        let result = analyzer.analyze(content).unwrap();
533        assert_eq!(result.constant_case, CaseStyle::UpperCase);
534    }
535
536    #[test]
537    fn test_naming_analyzer_mixed_case_fallback() {
538        let analyzer = NamingAnalyzer::new();
539        let content = "// No clear naming patterns";
540        let result = analyzer.analyze(content).unwrap();
541        // Should return some default case style
542        assert!(matches!(
543            result.function_case,
544            CaseStyle::SnakeCase | CaseStyle::CamelCase | CaseStyle::PascalCase | CaseStyle::Mixed
545        ));
546    }
547
548    // ========================================================================
549    // Formatting Style Tests
550    // ========================================================================
551
552    #[test]
553    fn test_formatting_analyzer_indent_detection() {
554        let analyzer = FormattingAnalyzer::new();
555        let content = "fn main() {\n    println!(\"hello\");\n}";
556        let result = analyzer.analyze(content).unwrap();
557        assert_eq!(result.indent_type, IndentType::Spaces);
558        assert_eq!(result.indent_size, 4);
559    }
560
561    #[test]
562    fn test_formatting_analyzer_tab_detection() {
563        let analyzer = FormattingAnalyzer::new();
564        let content = "fn main() {\n\tprintln!(\"hello\");\n}";
565        let result = analyzer.analyze(content).unwrap();
566        assert_eq!(result.indent_type, IndentType::Tabs);
567    }
568
569    #[test]
570    fn test_formatting_analyzer_line_length_detection() {
571        let analyzer = FormattingAnalyzer::new();
572        let content = "fn main() {\n    let x = 1;\n}\n";
573        let result = analyzer.analyze(content).unwrap();
574        assert!(result.line_length >= 80);
575        assert!(result.line_length <= 120);
576    }
577
578    #[test]
579    fn test_formatting_analyzer_default_values() {
580        let analyzer = FormattingAnalyzer::new();
581        let content = "// Empty code";
582        let result = analyzer.analyze(content).unwrap();
583        assert_eq!(result.indent_size, 4); // Default
584                                           // Line length defaults to 100 but can be adjusted based on content
585        assert!(result.line_length >= 80 && result.line_length <= 120);
586    }
587
588    // ========================================================================
589    // Import Organization Tests
590    // ========================================================================
591
592    #[test]
593    fn test_import_analyzer_order_detection() {
594        let analyzer = ImportAnalyzer::new();
595        let content = "use std::io;\nuse external_crate;\nuse crate::module;";
596        let result = analyzer.analyze(content).unwrap();
597        assert!(!result.order.is_empty());
598    }
599
600    #[test]
601    fn test_import_analyzer_standard_library_detection() {
602        let analyzer = ImportAnalyzer::new();
603        let content = "use std::io;\nuse std::fs;";
604        let result = analyzer.analyze(content).unwrap();
605        assert!(result.order.contains(&ImportGroup::Standard));
606    }
607
608    #[test]
609    fn test_import_analyzer_external_import_detection() {
610        let analyzer = ImportAnalyzer::new();
611        let content = "use external_crate;\nuse another_external;";
612        let result = analyzer.analyze(content).unwrap();
613        assert!(result.order.contains(&ImportGroup::External));
614    }
615
616    #[test]
617    fn test_import_analyzer_internal_import_detection() {
618        let analyzer = ImportAnalyzer::new();
619        let content = "use crate::module;\nuse crate::other;";
620        let result = analyzer.analyze(content).unwrap();
621        assert!(result.order.contains(&ImportGroup::Internal));
622    }
623
624    #[test]
625    fn test_import_analyzer_sort_detection() {
626        let analyzer = ImportAnalyzer::new();
627        let content = "use std::io;\nuse std::fs;";
628        let result = analyzer.analyze(content).unwrap();
629        // Sorted imports should be detected
630        assert!(result.sort_within_group || !result.sort_within_group); // Either is valid
631    }
632
633    // ========================================================================
634    // Documentation Style Tests
635    // ========================================================================
636
637    #[test]
638    fn test_documentation_analyzer_format_detection() {
639        let analyzer = DocumentationAnalyzer::new();
640        let content = "/// This is a doc comment\npub fn my_function() {}";
641        let result = analyzer.analyze(content).unwrap();
642        assert_eq!(result.format, DocFormat::RustDoc);
643    }
644
645    #[test]
646    fn test_documentation_analyzer_javadoc_detection() {
647        let analyzer = DocumentationAnalyzer::new();
648        // JavaDoc and JSDoc both use /** so they're equivalent in detection
649        let content = "/** This is a doc comment */\npub fn my_function() {}";
650        let result = analyzer.analyze(content).unwrap();
651        // Either JavaDoc or JSDoc is acceptable since they use the same syntax
652        assert!(matches!(
653            result.format,
654            DocFormat::JavaDoc | DocFormat::JSDoc
655        ));
656    }
657
658    #[test]
659    fn test_documentation_analyzer_required_detection() {
660        let analyzer = DocumentationAnalyzer::new();
661        let content = "/// Doc\npub fn func1() {}\n/// Doc\npub fn func2() {}";
662        let result = analyzer.analyze(content).unwrap();
663        assert!(result.required_for_public);
664    }
665
666    #[test]
667    fn test_documentation_analyzer_not_required_detection() {
668        let analyzer = DocumentationAnalyzer::new();
669        let content = "pub fn func1() {}\npub fn func2() {}";
670        let result = analyzer.analyze(content).unwrap();
671        assert!(!result.required_for_public);
672    }
673
674    // ========================================================================
675    // Integration Tests
676    // ========================================================================
677
678    #[test]
679    fn test_standards_detector_full_analysis() {
680        use std::io::Write;
681        use tempfile::NamedTempFile;
682
683        let detector = StandardsDetector::new();
684        let code = "/// Doc\nfn my_function() {\n    let x = 1;\n}";
685
686        let mut file = NamedTempFile::new().unwrap();
687        file.write_all(code.as_bytes()).unwrap();
688
689        let result = detector.detect(&[file.path()]).unwrap();
690
691        // Verify all components are present
692        assert_eq!(
693            result.naming_conventions.function_case,
694            CaseStyle::SnakeCase
695        );
696        assert_eq!(result.formatting_style.indent_type, IndentType::Spaces);
697        assert_eq!(result.documentation_style.format, DocFormat::RustDoc);
698    }
699
700    #[test]
701    fn test_standards_detector_multiple_files() {
702        use std::io::Write;
703        use tempfile::NamedTempFile;
704
705        let detector = StandardsDetector::new();
706
707        let mut file1 = NamedTempFile::new().unwrap();
708        let mut file2 = NamedTempFile::new().unwrap();
709
710        file1.write_all(b"fn func1() {}").unwrap();
711        file2.write_all(b"fn func2() {}").unwrap();
712
713        let result = detector.detect(&[file1.path(), file2.path()]).unwrap();
714
715        // Should analyze both files
716        assert_eq!(
717            result.naming_conventions.function_case,
718            CaseStyle::SnakeCase
719        );
720    }
721
722    #[test]
723    fn test_standards_detector_default_instance() {
724        let detector1 = StandardsDetector::new();
725        let detector2 = StandardsDetector::default();
726
727        // Both should work identically
728        let result1 = detector1.detect(&[]);
729        let result2 = detector2.detect(&[]);
730
731        assert!(result1.is_ok());
732        assert!(result2.is_ok());
733    }
734}