1use crate::error::ResearchError;
4use crate::models::{
5 CaseStyle, DocFormat, DocumentationStyle, FormattingStyle, ImportGroup, ImportOrganization,
6 IndentType, NamingConventions, StandardsProfile,
7};
8use regex::Regex;
9use std::collections::HashMap;
10use std::path::Path;
11
12#[derive(Debug)]
14pub struct StandardsDetector;
15
16impl StandardsDetector {
17 pub fn new() -> Self {
19 StandardsDetector
20 }
21
22 pub fn detect(&self, files: &[&Path]) -> Result<StandardsProfile, ResearchError> {
35 if files.is_empty() {
36 return Ok(StandardsProfile::default());
37 }
38
39 let mut file_contents = Vec::new();
41 for file_path in files {
42 match std::fs::read_to_string(file_path) {
43 Ok(content) => file_contents.push(content),
44 Err(_) => {
45 continue;
47 }
48 }
49 }
50
51 if file_contents.is_empty() {
52 return Ok(StandardsProfile::default());
53 }
54
55 let combined_content = file_contents.join("\n");
56
57 let naming_conventions = self.detect_naming_conventions(&combined_content)?;
59 let formatting_style = self.detect_formatting_style(&combined_content)?;
60 let import_organization = self.detect_import_organization(&combined_content)?;
61 let documentation_style = self.detect_documentation_style(&combined_content)?;
62
63 Ok(StandardsProfile {
64 naming_conventions,
65 formatting_style,
66 import_organization,
67 documentation_style,
68 })
69 }
70
71 fn detect_naming_conventions(&self, content: &str) -> Result<NamingConventions, ResearchError> {
73 let naming_analyzer = NamingAnalyzer::new();
74 naming_analyzer.analyze(content)
75 }
76
77 fn detect_formatting_style(&self, content: &str) -> Result<FormattingStyle, ResearchError> {
79 let formatting_analyzer = FormattingAnalyzer::new();
80 formatting_analyzer.analyze(content)
81 }
82
83 fn detect_import_organization(
85 &self,
86 content: &str,
87 ) -> Result<ImportOrganization, ResearchError> {
88 let import_analyzer = ImportAnalyzer::new();
89 import_analyzer.analyze(content)
90 }
91
92 fn detect_documentation_style(
94 &self,
95 content: &str,
96 ) -> Result<DocumentationStyle, ResearchError> {
97 let doc_analyzer = DocumentationAnalyzer::new();
98 doc_analyzer.analyze(content)
99 }
100}
101
102impl Default for StandardsDetector {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108#[derive(Debug)]
114struct NamingAnalyzer;
115
116impl NamingAnalyzer {
117 fn new() -> Self {
118 NamingAnalyzer
119 }
120
121 fn analyze(&self, content: &str) -> Result<NamingConventions, ResearchError> {
122 let function_case = self.detect_function_naming(content);
123 let variable_case = self.detect_variable_naming(content);
124 let class_case = self.detect_class_naming(content);
125 let constant_case = self.detect_constant_naming(content);
126
127 Ok(NamingConventions {
128 function_case,
129 variable_case,
130 class_case,
131 constant_case,
132 })
133 }
134
135 fn detect_function_naming(&self, content: &str) -> CaseStyle {
136 let snake_case_count = self.count_pattern(content, r"fn\s+([a-z_][a-z0-9_]*)\s*\(");
138 let camel_case_count = self.count_pattern(content, r"function\s+([a-z][a-zA-Z0-9]*)\s*\(");
139 let pascal_case_count = self.count_pattern(content, r"def\s+([A-Z][a-zA-Z0-9]*)\s*\(");
140
141 self.determine_dominant_case(snake_case_count, camel_case_count, pascal_case_count)
142 }
143
144 fn detect_variable_naming(&self, content: &str) -> CaseStyle {
145 let snake_case_count = self.count_pattern(content, r"let\s+([a-z_][a-z0-9_]*)\s*=");
147 let camel_case_count = self.count_pattern(content, r"const\s+([a-z][a-zA-Z0-9]*)\s*=");
148 let pascal_case_count = self.count_pattern(content, r"var\s+([A-Z][a-zA-Z0-9]*)\s*=");
149
150 self.determine_dominant_case(snake_case_count, camel_case_count, pascal_case_count)
151 }
152
153 fn detect_class_naming(&self, content: &str) -> CaseStyle {
154 let pascal_case_count = self.count_pattern(
156 content,
157 r"(?:struct|class|interface)\s+([A-Z][a-zA-Z0-9]*)\s*[{<]",
158 );
159 let snake_case_count = self.count_pattern(
160 content,
161 r"(?:struct|class|interface)\s+([a-z_][a-z0-9_]*)\s*[{<]",
162 );
163
164 if pascal_case_count > snake_case_count {
165 CaseStyle::PascalCase
166 } else if snake_case_count > 0 {
167 CaseStyle::SnakeCase
168 } else {
169 CaseStyle::PascalCase }
171 }
172
173 fn detect_constant_naming(&self, content: &str) -> CaseStyle {
174 let upper_case_count = self.count_pattern(content, r"const\s+([A-Z_][A-Z0-9_]*)\s*=");
176 let camel_case_count = self.count_pattern(content, r"const\s+([a-z][a-zA-Z0-9]*)\s*=");
177
178 if upper_case_count > camel_case_count {
179 CaseStyle::UpperCase
180 } else if camel_case_count > 0 {
181 CaseStyle::CamelCase
182 } else {
183 CaseStyle::UpperCase }
185 }
186
187 fn count_pattern(&self, content: &str, pattern: &str) -> usize {
188 if let Ok(re) = Regex::new(pattern) {
189 re.find_iter(content).count()
190 } else {
191 0
192 }
193 }
194
195 fn determine_dominant_case(
196 &self,
197 snake_case: usize,
198 camel_case: usize,
199 pascal_case: usize,
200 ) -> CaseStyle {
201 if snake_case > camel_case && snake_case > pascal_case && snake_case > 0 {
202 CaseStyle::SnakeCase
203 } else if camel_case > pascal_case && camel_case > 0 {
204 CaseStyle::CamelCase
205 } else if pascal_case > 0 {
206 CaseStyle::PascalCase
207 } else {
208 CaseStyle::Mixed
209 }
210 }
211}
212
213#[derive(Debug)]
219struct FormattingAnalyzer;
220
221impl FormattingAnalyzer {
222 fn new() -> Self {
223 FormattingAnalyzer
224 }
225
226 fn analyze(&self, content: &str) -> Result<FormattingStyle, ResearchError> {
227 let indent_type = self.detect_indent_type(content);
228 let indent_size = self.detect_indent_size(content, indent_type);
229 let line_length = self.detect_line_length(content);
230
231 Ok(FormattingStyle {
232 indent_size,
233 indent_type,
234 line_length,
235 })
236 }
237
238 fn detect_indent_type(&self, content: &str) -> IndentType {
239 let mut tab_count = 0;
240 let mut space_count = 0;
241
242 for line in content.lines() {
243 if line.starts_with('\t') {
244 tab_count += 1;
245 } else if line.starts_with(' ') {
246 space_count += 1;
247 }
248 }
249
250 if tab_count > space_count {
251 IndentType::Tabs
252 } else {
253 IndentType::Spaces
254 }
255 }
256
257 fn detect_indent_size(&self, content: &str, indent_type: IndentType) -> usize {
258 let mut indent_sizes = HashMap::new();
259
260 for line in content.lines() {
261 if indent_type == IndentType::Spaces {
262 if let Some(spaces) = self.count_leading_spaces(line) {
263 if spaces > 0 && spaces <= 16 {
264 *indent_sizes.entry(spaces).or_insert(0) += 1;
265 }
266 }
267 }
268 }
269
270 indent_sizes
272 .iter()
273 .max_by_key(|&(_, count)| count)
274 .map(|(&size, _)| size)
275 .unwrap_or(4)
276 }
277
278 fn count_leading_spaces(&self, line: &str) -> Option<usize> {
279 let mut count = 0;
280 for ch in line.chars() {
281 if ch == ' ' {
282 count += 1;
283 } else {
284 break;
285 }
286 }
287 if count > 0 {
288 Some(count)
289 } else {
290 None
291 }
292 }
293
294 fn detect_line_length(&self, content: &str) -> usize {
295 let mut line_lengths = Vec::new();
296
297 for line in content.lines() {
298 line_lengths.push(line.len());
299 }
300
301 if line_lengths.is_empty() {
302 100 } else {
304 line_lengths.sort_unstable();
305 let index = (line_lengths.len() * 75) / 100;
307 line_lengths[index].clamp(80, 120)
308 }
309 }
310}
311
312#[derive(Debug)]
318struct ImportAnalyzer;
319
320impl ImportAnalyzer {
321 fn new() -> Self {
322 ImportAnalyzer
323 }
324
325 fn analyze(&self, content: &str) -> Result<ImportOrganization, ResearchError> {
326 let order = self.detect_import_order(content);
327 let sort_within_group = self.detect_sort_within_group(content);
328
329 Ok(ImportOrganization {
330 order,
331 sort_within_group,
332 })
333 }
334
335 fn detect_import_order(&self, content: &str) -> Vec<ImportGroup> {
336 let mut groups_seen = Vec::new();
337
338 for line in content.lines() {
339 let line = line.trim();
340
341 if line.starts_with("use std::") || line.starts_with("import java.") {
342 if !groups_seen.contains(&ImportGroup::Standard) {
343 groups_seen.push(ImportGroup::Standard);
344 }
345 } else if line.starts_with("use ") || line.starts_with("import ") {
346 if self.is_external_import(line) {
348 if !groups_seen.contains(&ImportGroup::External) {
349 groups_seen.push(ImportGroup::External);
350 }
351 } else if !groups_seen.contains(&ImportGroup::Internal) {
352 groups_seen.push(ImportGroup::Internal);
353 }
354 }
355 }
356
357 if groups_seen.is_empty() {
358 vec![
359 ImportGroup::Standard,
360 ImportGroup::External,
361 ImportGroup::Internal,
362 ]
363 } else {
364 groups_seen
365 }
366 }
367
368 fn is_external_import(&self, line: &str) -> bool {
369 !line.contains("./") && !line.contains("../") && !line.contains("crate::")
371 }
372
373 fn detect_sort_within_group(&self, content: &str) -> bool {
374 let mut import_groups = Vec::new();
375 let mut current_group = Vec::new();
376
377 for line in content.lines() {
378 let line = line.trim();
379 if line.starts_with("use ") || line.starts_with("import ") {
380 current_group.push(line.to_string());
381 } else if !current_group.is_empty() {
382 import_groups.push(current_group.clone());
383 current_group.clear();
384 }
385 }
386
387 if !current_group.is_empty() {
388 import_groups.push(current_group);
389 }
390
391 for group in import_groups {
393 if group.len() > 1 {
394 let mut sorted = group.clone();
395 sorted.sort();
396 if sorted == group {
397 return true;
398 }
399 }
400 }
401
402 false
403 }
404}
405
406#[derive(Debug)]
412struct DocumentationAnalyzer;
413
414impl DocumentationAnalyzer {
415 fn new() -> Self {
416 DocumentationAnalyzer
417 }
418
419 fn analyze(&self, content: &str) -> Result<DocumentationStyle, ResearchError> {
420 let format = self.detect_doc_format(content);
421 let required_for_public = self.detect_required_for_public(content);
422
423 Ok(DocumentationStyle {
424 format,
425 required_for_public,
426 })
427 }
428
429 fn detect_doc_format(&self, content: &str) -> DocFormat {
430 let rustdoc_count = content.matches("///").count();
431 let javadoc_count = content.matches("/**").count();
432 let jsdoc_count = content.matches("/**").count();
433 let python_doc_count = content.matches("\"\"\"").count();
434
435 if rustdoc_count > javadoc_count
436 && rustdoc_count > jsdoc_count
437 && rustdoc_count > python_doc_count
438 {
439 DocFormat::RustDoc
440 } else if javadoc_count > jsdoc_count && javadoc_count > python_doc_count {
441 DocFormat::JavaDoc
442 } else if jsdoc_count > python_doc_count {
443 DocFormat::JSDoc
444 } else if python_doc_count > 0 {
445 DocFormat::PythonDoc
446 } else {
447 DocFormat::RustDoc }
449 }
450
451 fn detect_required_for_public(&self, content: &str) -> bool {
452 let public_items = self.count_public_items(content);
454 let documented_items = self.count_documented_items(content);
455
456 if public_items == 0 {
457 false
458 } else {
459 documented_items as f32 / public_items as f32 > 0.5
461 }
462 }
463
464 fn count_public_items(&self, content: &str) -> usize {
465 let public_fn = content.matches("pub fn").count();
466 let public_struct = content.matches("pub struct").count();
467 let public_enum = content.matches("pub enum").count();
468 let public_trait = content.matches("pub trait").count();
469
470 public_fn + public_struct + public_enum + public_trait
471 }
472
473 fn count_documented_items(&self, content: &str) -> usize {
474 let mut count = 0;
475 let lines: Vec<&str> = content.lines().collect();
476
477 for i in 0..lines.len() {
478 let line = lines[i];
479 if (line.contains("///") || line.contains("/**")) && i + 1 < lines.len() {
480 let next_line = lines[i + 1];
481 if next_line.contains("pub ") {
482 count += 1;
483 }
484 }
485 }
486
487 count
488 }
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 #[test]
496 fn test_standards_detector_creation() {
497 let detector = StandardsDetector::new();
498 assert_eq!(format!("{:?}", detector), "StandardsDetector");
499 }
500
501 #[test]
502 fn test_empty_files_returns_default() {
503 let detector = StandardsDetector::new();
504 let result = detector.detect(&[]);
505 assert!(result.is_ok());
506 }
507
508 #[test]
513 fn test_naming_analyzer_snake_case_detection() {
514 let analyzer = NamingAnalyzer::new();
515 let content = "fn my_function() {}\nfn another_function() {}";
516 let result = analyzer.analyze(content).unwrap();
517 assert_eq!(result.function_case, CaseStyle::SnakeCase);
518 }
519
520 #[test]
521 fn test_naming_analyzer_pascal_case_class_detection() {
522 let analyzer = NamingAnalyzer::new();
523 let content = "struct MyStruct {}\nstruct AnotherStruct {}";
524 let result = analyzer.analyze(content).unwrap();
525 assert_eq!(result.class_case, CaseStyle::PascalCase);
526 }
527
528 #[test]
529 fn test_naming_analyzer_upper_case_constant_detection() {
530 let analyzer = NamingAnalyzer::new();
531 let content = "const MY_CONSTANT: i32 = 42;\nconst ANOTHER_CONSTANT: i32 = 100;";
532 let result = analyzer.analyze(content).unwrap();
533 assert_eq!(result.constant_case, CaseStyle::UpperCase);
534 }
535
536 #[test]
537 fn test_naming_analyzer_mixed_case_fallback() {
538 let analyzer = NamingAnalyzer::new();
539 let content = "// No clear naming patterns";
540 let result = analyzer.analyze(content).unwrap();
541 assert!(matches!(
543 result.function_case,
544 CaseStyle::SnakeCase | CaseStyle::CamelCase | CaseStyle::PascalCase | CaseStyle::Mixed
545 ));
546 }
547
548 #[test]
553 fn test_formatting_analyzer_indent_detection() {
554 let analyzer = FormattingAnalyzer::new();
555 let content = "fn main() {\n println!(\"hello\");\n}";
556 let result = analyzer.analyze(content).unwrap();
557 assert_eq!(result.indent_type, IndentType::Spaces);
558 assert_eq!(result.indent_size, 4);
559 }
560
561 #[test]
562 fn test_formatting_analyzer_tab_detection() {
563 let analyzer = FormattingAnalyzer::new();
564 let content = "fn main() {\n\tprintln!(\"hello\");\n}";
565 let result = analyzer.analyze(content).unwrap();
566 assert_eq!(result.indent_type, IndentType::Tabs);
567 }
568
569 #[test]
570 fn test_formatting_analyzer_line_length_detection() {
571 let analyzer = FormattingAnalyzer::new();
572 let content = "fn main() {\n let x = 1;\n}\n";
573 let result = analyzer.analyze(content).unwrap();
574 assert!(result.line_length >= 80);
575 assert!(result.line_length <= 120);
576 }
577
578 #[test]
579 fn test_formatting_analyzer_default_values() {
580 let analyzer = FormattingAnalyzer::new();
581 let content = "// Empty code";
582 let result = analyzer.analyze(content).unwrap();
583 assert_eq!(result.indent_size, 4); assert!(result.line_length >= 80 && result.line_length <= 120);
586 }
587
588 #[test]
593 fn test_import_analyzer_order_detection() {
594 let analyzer = ImportAnalyzer::new();
595 let content = "use std::io;\nuse external_crate;\nuse crate::module;";
596 let result = analyzer.analyze(content).unwrap();
597 assert!(!result.order.is_empty());
598 }
599
600 #[test]
601 fn test_import_analyzer_standard_library_detection() {
602 let analyzer = ImportAnalyzer::new();
603 let content = "use std::io;\nuse std::fs;";
604 let result = analyzer.analyze(content).unwrap();
605 assert!(result.order.contains(&ImportGroup::Standard));
606 }
607
608 #[test]
609 fn test_import_analyzer_external_import_detection() {
610 let analyzer = ImportAnalyzer::new();
611 let content = "use external_crate;\nuse another_external;";
612 let result = analyzer.analyze(content).unwrap();
613 assert!(result.order.contains(&ImportGroup::External));
614 }
615
616 #[test]
617 fn test_import_analyzer_internal_import_detection() {
618 let analyzer = ImportAnalyzer::new();
619 let content = "use crate::module;\nuse crate::other;";
620 let result = analyzer.analyze(content).unwrap();
621 assert!(result.order.contains(&ImportGroup::Internal));
622 }
623
624 #[test]
625 fn test_import_analyzer_sort_detection() {
626 let analyzer = ImportAnalyzer::new();
627 let content = "use std::io;\nuse std::fs;";
628 let result = analyzer.analyze(content).unwrap();
629 assert!(result.sort_within_group || !result.sort_within_group); }
632
633 #[test]
638 fn test_documentation_analyzer_format_detection() {
639 let analyzer = DocumentationAnalyzer::new();
640 let content = "/// This is a doc comment\npub fn my_function() {}";
641 let result = analyzer.analyze(content).unwrap();
642 assert_eq!(result.format, DocFormat::RustDoc);
643 }
644
645 #[test]
646 fn test_documentation_analyzer_javadoc_detection() {
647 let analyzer = DocumentationAnalyzer::new();
648 let content = "/** This is a doc comment */\npub fn my_function() {}";
650 let result = analyzer.analyze(content).unwrap();
651 assert!(matches!(
653 result.format,
654 DocFormat::JavaDoc | DocFormat::JSDoc
655 ));
656 }
657
658 #[test]
659 fn test_documentation_analyzer_required_detection() {
660 let analyzer = DocumentationAnalyzer::new();
661 let content = "/// Doc\npub fn func1() {}\n/// Doc\npub fn func2() {}";
662 let result = analyzer.analyze(content).unwrap();
663 assert!(result.required_for_public);
664 }
665
666 #[test]
667 fn test_documentation_analyzer_not_required_detection() {
668 let analyzer = DocumentationAnalyzer::new();
669 let content = "pub fn func1() {}\npub fn func2() {}";
670 let result = analyzer.analyze(content).unwrap();
671 assert!(!result.required_for_public);
672 }
673
674 #[test]
679 fn test_standards_detector_full_analysis() {
680 use std::io::Write;
681 use tempfile::NamedTempFile;
682
683 let detector = StandardsDetector::new();
684 let code = "/// Doc\nfn my_function() {\n let x = 1;\n}";
685
686 let mut file = NamedTempFile::new().unwrap();
687 file.write_all(code.as_bytes()).unwrap();
688
689 let result = detector.detect(&[file.path()]).unwrap();
690
691 assert_eq!(
693 result.naming_conventions.function_case,
694 CaseStyle::SnakeCase
695 );
696 assert_eq!(result.formatting_style.indent_type, IndentType::Spaces);
697 assert_eq!(result.documentation_style.format, DocFormat::RustDoc);
698 }
699
700 #[test]
701 fn test_standards_detector_multiple_files() {
702 use std::io::Write;
703 use tempfile::NamedTempFile;
704
705 let detector = StandardsDetector::new();
706
707 let mut file1 = NamedTempFile::new().unwrap();
708 let mut file2 = NamedTempFile::new().unwrap();
709
710 file1.write_all(b"fn func1() {}").unwrap();
711 file2.write_all(b"fn func2() {}").unwrap();
712
713 let result = detector.detect(&[file1.path(), file2.path()]).unwrap();
714
715 assert_eq!(
717 result.naming_conventions.function_case,
718 CaseStyle::SnakeCase
719 );
720 }
721
722 #[test]
723 fn test_standards_detector_default_instance() {
724 let detector1 = StandardsDetector::new();
725 let detector2 = StandardsDetector::default();
726
727 let result1 = detector1.detect(&[]);
729 let result2 = detector2.detect(&[]);
730
731 assert!(result1.is_ok());
732 assert!(result2.is_ok());
733 }
734}