use crate::normalize::NormalizedComponent;
use crate::parse::UrlLike;
use crate::util::levenshtein;
use crate::verdict::{Evidence, Finding, RuleId, Severity};
pub fn check(
_url: &UrlLike,
normalized_path: Option<&NormalizedComponent>,
raw_path: Option<&str>,
) -> Vec<Finding> {
let mut findings = Vec::new();
if let Some(rp) = raw_path {
check_non_ascii_path(rp, &mut findings);
check_homoglyph_in_path(rp, &mut findings);
} else if let Some(np) = normalized_path {
check_non_ascii_path(&np.normalized, &mut findings);
check_homoglyph_in_path(&np.normalized, &mut findings);
}
if let Some(np) = normalized_path {
if np.double_encoded {
check_double_encoding(&np.raw, &mut findings);
}
}
findings
}
fn check_non_ascii_path(normalized: &str, findings: &mut Vec<Finding>) {
if normalized.bytes().any(|b| b > 0x7F) {
findings.push(Finding {
rule_id: RuleId::NonAsciiPath,
severity: Severity::Medium,
title: "Non-ASCII characters in URL path".to_string(),
description:
"URL path contains non-ASCII characters which may indicate homoglyph substitution"
.to_string(),
evidence: vec![Evidence::Url {
raw: normalized.to_string(),
}],
human_view: None,
agent_view: None,
mitre_id: None,
custom_rule_id: None,
});
}
}
fn check_homoglyph_in_path(normalized: &str, findings: &mut Vec<Finding>) {
let known_paths = [
"install", "setup", "init", "config", "login", "auth", "admin", "api", "token", "key",
"secret", "password",
];
for segment in normalized.split('/') {
if segment.is_empty() {
continue;
}
let lower = segment.to_lowercase();
let has_ascii = segment.bytes().any(|b| b.is_ascii_alphabetic());
let has_non_ascii = segment.bytes().any(|b| b > 0x7F);
if has_ascii && has_non_ascii {
for known in &known_paths {
if levenshtein(&lower, known) <= 2 {
findings.push(Finding {
rule_id: RuleId::HomoglyphInPath,
severity: Severity::Medium,
title: "Potential homoglyph in URL path".to_string(),
description: format!(
"Path segment '{segment}' looks similar to '{known}' but contains non-ASCII characters"
),
evidence: vec![Evidence::Url { raw: segment.to_string() }],
human_view: None,
agent_view: None,
mitre_id: None,
custom_rule_id: None,
});
return;
}
}
}
}
}
fn check_double_encoding(raw_path: &str, findings: &mut Vec<Finding>) {
findings.push(Finding {
rule_id: RuleId::DoubleEncoding,
severity: Severity::Medium,
title: "Double-encoded URL path detected".to_string(),
description: "URL path contains percent-encoded percent signs (%25XX) indicating double encoding, which may be used to bypass security filters".to_string(),
evidence: vec![Evidence::Url { raw: raw_path.to_string() }],
human_view: None,
agent_view: None,
mitre_id: None,
custom_rule_id: None,
});
}