Skip to main content

tirith_core/rules/
path.rs

1use crate::normalize::NormalizedComponent;
2use crate::parse::UrlLike;
3use crate::util::levenshtein;
4use crate::verdict::{Evidence, Finding, RuleId, Severity};
5
6/// Run path rules against a parsed URL.
7/// `raw_path` is the path from the original URL string (pre-percent-encoding by url crate).
8pub fn check(
9    _url: &UrlLike,
10    normalized_path: Option<&NormalizedComponent>,
11    raw_path: Option<&str>,
12) -> Vec<Finding> {
13    let mut findings = Vec::new();
14
15    // Use raw_path for non-ASCII detection — the url crate percent-encodes non-ASCII before we see it.
16    if let Some(rp) = raw_path {
17        check_non_ascii_path(rp, &mut findings);
18        check_homoglyph_in_path(rp, &mut findings);
19    } else if let Some(np) = normalized_path {
20        check_non_ascii_path(&np.normalized, &mut findings);
21        check_homoglyph_in_path(&np.normalized, &mut findings);
22    }
23
24    if let Some(np) = normalized_path {
25        if np.double_encoded {
26            check_double_encoding(&np.raw, &mut findings);
27        }
28    }
29
30    findings
31}
32
33fn check_non_ascii_path(normalized: &str, findings: &mut Vec<Finding>) {
34    if normalized.bytes().any(|b| b > 0x7F) {
35        findings.push(Finding {
36            rule_id: RuleId::NonAsciiPath,
37            severity: Severity::Medium,
38            title: "Non-ASCII characters in URL path".to_string(),
39            description:
40                "URL path contains non-ASCII characters which may indicate homoglyph substitution"
41                    .to_string(),
42            evidence: vec![Evidence::Url {
43                raw: normalized.to_string(),
44            }],
45            human_view: None,
46            agent_view: None,
47            mitre_id: None,
48            custom_rule_id: None,
49        });
50    }
51}
52
53fn check_homoglyph_in_path(normalized: &str, findings: &mut Vec<Finding>) {
54    let known_paths = [
55        "install", "setup", "init", "config", "login", "auth", "admin", "api", "token", "key",
56        "secret", "password",
57    ];
58
59    for segment in normalized.split('/') {
60        if segment.is_empty() {
61            continue;
62        }
63        let lower = segment.to_lowercase();
64
65        // Mixed ASCII + non-ASCII in one segment is the homoglyph shape we care about.
66        let has_ascii = segment.bytes().any(|b| b.is_ascii_alphabetic());
67        let has_non_ascii = segment.bytes().any(|b| b > 0x7F);
68        if has_ascii && has_non_ascii {
69            for known in &known_paths {
70                if levenshtein(&lower, known) <= 2 {
71                    findings.push(Finding {
72                        rule_id: RuleId::HomoglyphInPath,
73                        severity: Severity::Medium,
74                        title: "Potential homoglyph in URL path".to_string(),
75                        description: format!(
76                            "Path segment '{segment}' looks similar to '{known}' but contains non-ASCII characters"
77                        ),
78                        evidence: vec![Evidence::Url { raw: segment.to_string() }],
79                        human_view: None,
80                        agent_view: None,
81                mitre_id: None,
82                custom_rule_id: None,
83                    });
84                    return;
85                }
86            }
87        }
88    }
89}
90
91fn check_double_encoding(raw_path: &str, findings: &mut Vec<Finding>) {
92    findings.push(Finding {
93        rule_id: RuleId::DoubleEncoding,
94        severity: Severity::Medium,
95        title: "Double-encoded URL path detected".to_string(),
96        description: "URL path contains percent-encoded percent signs (%25XX) indicating double encoding, which may be used to bypass security filters".to_string(),
97        evidence: vec![Evidence::Url { raw: raw_path.to_string() }],
98        human_view: None,
99        agent_view: None,
100                mitre_id: None,
101                custom_rule_id: None,
102    });
103}