fn get_security_patterns() -> Vec<(&'static str, &'static str)> {
vec![
(
r#"(?i)password\s*=\s*["'][^"']+["']"#,
"Hardcoded password detected",
),
(
r#"(?i)api_key\s*=\s*["'][^"']+["']"#,
"Hardcoded API key detected",
),
(
r#"(?i)secret\s*=\s*["'][^"']+["']"#,
"Hardcoded secret detected",
),
]
}
async fn check_file_security(
path: &std::path::Path,
patterns: &[(&str, &str)],
violations: &mut Vec<QualityViolation>,
) -> Result<()> {
use regex::Regex;
use tokio::fs;
if let Ok(content) = fs::read_to_string(path).await {
for (pattern_str, message) in patterns {
if let Ok(regex) = Regex::new(pattern_str) {
scan_content_for_pattern(&content, ®ex, message, path, violations);
}
}
}
Ok(())
}
fn scan_content_for_pattern(
content: &str,
regex: ®ex::Regex,
message: &str,
path: &std::path::Path,
violations: &mut Vec<QualityViolation>,
) {
for (line_no, line) in content.lines().enumerate() {
if regex.is_match(line) {
violations.push(QualityViolation {
check_type: "security".to_string(),
severity: "error".to_string(),
file: path.to_string_lossy().to_string(),
line: Some(line_no + 1),
message: message.to_string(),
details: None,
});
}
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn check_duplicates(project_path: &Path) -> Result<Vec<QualityViolation>> {
use std::collections::HashMap;
let mut violations = Vec::new();
let mut file_hashes: HashMap<u64, Vec<PathBuf>> = HashMap::new();
collect_file_hashes(project_path, &mut file_hashes).await?;
generate_duplicate_violations(&file_hashes, &mut violations);
Ok(violations)
}
async fn collect_file_hashes(
project_path: &Path,
file_hashes: &mut std::collections::HashMap<u64, Vec<PathBuf>>,
) -> Result<()> {
use walkdir::WalkDir;
for entry in WalkDir::new(project_path) {
let entry = entry?;
let path = entry.path();
let path_str = path.to_string_lossy();
if is_excluded_directory(&path_str) {
continue;
}
if path_str.contains("/target/") {
continue;
}
if should_process_file_for_duplicates(path) {
let hash_result = tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(process_file_for_hash(path))
});
if let Some(hash) = hash_result {
file_hashes
.entry(hash)
.or_default()
.push(path.to_path_buf());
}
}
}
Ok(())
}
fn should_process_file_for_duplicates(path: &Path) -> bool {
path.is_file() && is_source_file(path) && !is_build_artifact(path)
}
async fn process_file_for_hash(path: &Path) -> Option<u64> {
if let Ok(content) = tokio::fs::read_to_string(path).await {
let normalized = normalize_code_content(&content);
if is_file_large_enough(&normalized) {
Some(calculate_content_hash(&normalized))
} else {
None
}
} else {
None
}
}
fn is_file_large_enough(normalized_content: &str) -> bool {
normalized_content.len() > 50
}
fn generate_duplicate_violations(
file_hashes: &std::collections::HashMap<u64, Vec<PathBuf>>,
violations: &mut Vec<QualityViolation>,
) {
for paths in file_hashes.values() {
if paths.len() > 1 {
create_violations_for_duplicate_group(paths, violations);
}
}
}
fn create_violations_for_duplicate_group(
paths: &[PathBuf],
violations: &mut Vec<QualityViolation>,
) {
let files_str = format_file_list(paths);
for path in paths {
violations.push(QualityViolation {
check_type: "duplicate".to_string(),
severity: "warning".to_string(),
file: path.to_string_lossy().to_string(),
line: None,
message: format!("Duplicate code found in: {files_str}"),
details: None,
});
}
}
fn format_file_list(paths: &[PathBuf]) -> String {
paths
.iter()
.map(|p| p.to_string_lossy().to_string())
.collect::<Vec<_>>()
.join(", ")
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn normalize_code_content(content: &str) -> String {
content
.lines()
.filter(|line| {
let trimmed = line.trim();
!trimmed.is_empty() && !trimmed.starts_with("//") && !trimmed.starts_with("/*")
})
.map(str::trim)
.collect::<Vec<_>>()
.join("\n")
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "score_range")]
pub fn calculate_content_hash(content: &str) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}
#[cfg(test)]
mod security_duplicates_tests {
use super::*;
#[test]
fn test_get_security_patterns_contains_password_api_key_secret() {
let patterns = get_security_patterns();
assert_eq!(patterns.len(), 3);
let messages: Vec<&str> = patterns.iter().map(|(_, m)| *m).collect();
assert!(messages.iter().any(|m| m.contains("password")));
assert!(messages.iter().any(|m| m.contains("API key")));
assert!(messages.iter().any(|m| m.contains("secret")));
}
#[test]
fn test_get_security_patterns_regexes_compile() {
for (pat, _msg) in get_security_patterns() {
regex::Regex::new(pat).expect("pattern must be a valid regex");
}
}
#[test]
fn test_scan_content_for_pattern_match_adds_violation_with_1based_line() {
let re = regex::Regex::new(r#"(?i)password\s*=\s*["'][^"']+["']"#).unwrap();
let content = "// comment\nlet password = \"hunter2\"\nok";
let mut v: Vec<QualityViolation> = Vec::new();
scan_content_for_pattern(
content,
&re,
"password!",
std::path::Path::new("src/a.rs"),
&mut v,
);
assert_eq!(v.len(), 1);
assert_eq!(v[0].line, Some(2));
assert_eq!(v[0].check_type, "security");
assert_eq!(v[0].message, "password!");
}
#[test]
fn test_scan_content_for_pattern_miss_adds_nothing() {
let re = regex::Regex::new(r#"NEVER_MATCHES"#).unwrap();
let mut v = Vec::new();
scan_content_for_pattern(
"plain code",
&re,
"unused",
std::path::Path::new("f.rs"),
&mut v,
);
assert!(v.is_empty());
}
#[test]
fn test_is_file_large_enough_at_or_below_50_is_false() {
assert!(!is_file_large_enough(""));
assert!(!is_file_large_enough(&"x".repeat(50)));
}
#[test]
fn test_is_file_large_enough_above_50_is_true() {
assert!(is_file_large_enough(&"x".repeat(51)));
assert!(is_file_large_enough(&"x".repeat(1000)));
}
#[test]
fn test_generate_duplicate_violations_emits_one_per_file_in_dup_group() {
let mut map: std::collections::HashMap<u64, Vec<PathBuf>> =
std::collections::HashMap::new();
map.insert(
42,
vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")],
);
map.insert(99, vec![PathBuf::from("src/solo.rs")]);
let mut v: Vec<QualityViolation> = Vec::new();
generate_duplicate_violations(&map, &mut v);
assert_eq!(v.len(), 2);
assert!(v.iter().all(|x| x.check_type == "duplicate"));
assert!(v.iter().all(|x| x.severity == "warning"));
}
#[test]
fn test_generate_duplicate_violations_empty_map_produces_nothing() {
let map: std::collections::HashMap<u64, Vec<PathBuf>> = std::collections::HashMap::new();
let mut v: Vec<QualityViolation> = Vec::new();
generate_duplicate_violations(&map, &mut v);
assert!(v.is_empty());
}
#[test]
fn test_format_file_list_joins_with_comma_space() {
let out = format_file_list(&[PathBuf::from("a.rs"), PathBuf::from("b.rs")]);
assert_eq!(out, "a.rs, b.rs");
}
#[test]
fn test_format_file_list_single_has_no_separator() {
let out = format_file_list(&[PathBuf::from("only.rs")]);
assert_eq!(out, "only.rs");
}
#[test]
fn test_format_file_list_empty_produces_empty_string() {
let out = format_file_list(&[]);
assert_eq!(out, "");
}
#[test]
fn test_normalize_code_content_strips_blank_and_comment_lines() {
let src = "\n// comment\n/* block-start\nreal line\n indented line \n\n";
let norm = normalize_code_content(src);
assert!(norm.contains("real line"));
assert!(norm.contains("indented line"));
assert!(!norm.contains("// comment"));
assert!(!norm.contains("/* block-start"));
}
#[test]
fn test_normalize_code_content_empty_input_is_empty_output() {
assert_eq!(normalize_code_content(""), "");
}
#[test]
fn test_calculate_content_hash_deterministic() {
let a = calculate_content_hash("pmat");
let b = calculate_content_hash("pmat");
assert_eq!(a, b);
}
#[test]
fn test_calculate_content_hash_distinct_for_distinct_input() {
let a = calculate_content_hash("pmat");
let b = calculate_content_hash("pmat ");
assert_ne!(a, b);
}
#[test]
fn test_should_process_file_for_duplicates_non_file_path_rejected() {
let tmp = tempfile::tempdir().unwrap();
assert!(!should_process_file_for_duplicates(tmp.path()));
}
#[test]
fn test_should_process_file_for_duplicates_accepts_rust_source() {
let tmp = tempfile::tempdir().unwrap();
let src = tmp.path().join("a.rs");
std::fs::write(&src, "fn x() {}").unwrap();
assert!(should_process_file_for_duplicates(&src));
}
}