fn get_security_patterns() -> Vec<(&'static str, &'static str)> {
vec![
(
r#"(?i)password\s*=\s*["'][^"']+["']"#,
"Hardcoded password detected",
),
(
r#"(?i)api_key\s*=\s*["'][^"']+["']"#,
"Hardcoded API key detected",
),
(
r#"(?i)secret\s*=\s*["'][^"']+["']"#,
"Hardcoded secret detected",
),
]
}
async fn check_file_security(
path: &std::path::Path,
patterns: &[(&str, &str)],
violations: &mut Vec<QualityViolation>,
) -> Result<()> {
use regex::Regex;
use tokio::fs;
if let Ok(content) = fs::read_to_string(path).await {
for (pattern_str, message) in patterns {
if let Ok(regex) = Regex::new(pattern_str) {
scan_content_for_pattern(&content, ®ex, message, path, violations);
}
}
}
Ok(())
}
fn scan_content_for_pattern(
content: &str,
regex: ®ex::Regex,
message: &str,
path: &std::path::Path,
violations: &mut Vec<QualityViolation>,
) {
for (line_no, line) in content.lines().enumerate() {
if regex.is_match(line) {
violations.push(QualityViolation {
check_type: "security".to_string(),
severity: "error".to_string(),
file: path.to_string_lossy().to_string(),
line: Some(line_no + 1),
message: message.to_string(),
details: None,
});
}
}
}
pub async fn check_duplicates(project_path: &Path) -> Result<Vec<QualityViolation>> {
use std::collections::HashMap;
let mut violations = Vec::new();
let mut file_hashes: HashMap<u64, Vec<PathBuf>> = HashMap::new();
collect_file_hashes(project_path, &mut file_hashes).await?;
generate_duplicate_violations(&file_hashes, &mut violations);
Ok(violations)
}
async fn collect_file_hashes(
project_path: &Path,
file_hashes: &mut std::collections::HashMap<u64, Vec<PathBuf>>,
) -> Result<()> {
use walkdir::WalkDir;
for entry in WalkDir::new(project_path) {
let entry = entry?;
let path = entry.path();
let path_str = path.to_string_lossy();
if is_excluded_directory(&path_str) {
continue;
}
if path_str.contains("/target/") {
continue;
}
if should_process_file_for_duplicates(path) {
let hash_result = tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(process_file_for_hash(path))
});
if let Some(hash) = hash_result {
file_hashes
.entry(hash)
.or_default()
.push(path.to_path_buf());
}
}
}
Ok(())
}
fn should_process_file_for_duplicates(path: &Path) -> bool {
path.is_file() && is_source_file(path) && !is_build_artifact(path)
}
async fn process_file_for_hash(path: &Path) -> Option<u64> {
if let Ok(content) = tokio::fs::read_to_string(path).await {
let normalized = normalize_code_content(&content);
if is_file_large_enough(&normalized) {
Some(calculate_content_hash(&normalized))
} else {
None
}
} else {
None
}
}
fn is_file_large_enough(normalized_content: &str) -> bool {
normalized_content.len() > 50
}
fn generate_duplicate_violations(
file_hashes: &std::collections::HashMap<u64, Vec<PathBuf>>,
violations: &mut Vec<QualityViolation>,
) {
for paths in file_hashes.values() {
if paths.len() > 1 {
create_violations_for_duplicate_group(paths, violations);
}
}
}
fn create_violations_for_duplicate_group(
paths: &[PathBuf],
violations: &mut Vec<QualityViolation>,
) {
let files_str = format_file_list(paths);
for path in paths {
violations.push(QualityViolation {
check_type: "duplicate".to_string(),
severity: "warning".to_string(),
file: path.to_string_lossy().to_string(),
line: None,
message: format!("Duplicate code found in: {files_str}"),
details: None,
});
}
}
fn format_file_list(paths: &[PathBuf]) -> String {
paths
.iter()
.map(|p| p.to_string_lossy().to_string())
.collect::<Vec<_>>()
.join(", ")
}
pub fn normalize_code_content(content: &str) -> String {
content
.lines()
.filter(|line| {
let trimmed = line.trim();
!trimmed.is_empty() && !trimmed.starts_with("//") && !trimmed.starts_with("/*")
})
.map(str::trim)
.collect::<Vec<_>>()
.join("\n")
}
pub fn calculate_content_hash(content: &str) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}