Skip to main content

tirith_core/rules/
path.rs

1use crate::normalize::NormalizedComponent;
2use crate::parse::UrlLike;
3use crate::util::levenshtein;
4use crate::verdict::{Evidence, Finding, RuleId, Severity};
5
6/// Run path rules against a parsed URL.
7/// `raw_path` is the path from the original URL string (pre-percent-encoding by url crate).
8pub fn check(
9    _url: &UrlLike,
10    normalized_path: Option<&NormalizedComponent>,
11    raw_path: Option<&str>,
12) -> Vec<Finding> {
13    let mut findings = Vec::new();
14
15    // Use raw_path for non-ASCII detection (url crate percent-encodes non-ASCII)
16    if let Some(rp) = raw_path {
17        check_non_ascii_path(rp, &mut findings);
18        check_homoglyph_in_path(rp, &mut findings);
19    } else if let Some(np) = normalized_path {
20        check_non_ascii_path(&np.normalized, &mut findings);
21        check_homoglyph_in_path(&np.normalized, &mut findings);
22    }
23
24    if let Some(np) = normalized_path {
25        if np.double_encoded {
26            check_double_encoding(&np.raw, &mut findings);
27        }
28    }
29
30    findings
31}
32
33fn check_non_ascii_path(normalized: &str, findings: &mut Vec<Finding>) {
34    if normalized.bytes().any(|b| b > 0x7F) {
35        findings.push(Finding {
36            rule_id: RuleId::NonAsciiPath,
37            severity: Severity::Medium,
38            title: "Non-ASCII characters in URL path".to_string(),
39            description:
40                "URL path contains non-ASCII characters which may indicate homoglyph substitution"
41                    .to_string(),
42            evidence: vec![Evidence::Url {
43                raw: normalized.to_string(),
44            }],
45            human_view: None,
46            agent_view: None,
47            mitre_id: None,
48            custom_rule_id: None,
49        });
50    }
51}
52
53fn check_homoglyph_in_path(normalized: &str, findings: &mut Vec<Finding>) {
54    // Check for confusable characters near known path patterns
55    let known_paths = [
56        "install", "setup", "init", "config", "login", "auth", "admin", "api", "token", "key",
57        "secret", "password",
58    ];
59
60    for segment in normalized.split('/') {
61        if segment.is_empty() {
62            continue;
63        }
64        let lower = segment.to_lowercase();
65
66        // Check if segment has mixed ASCII and non-ASCII suggesting homoglyphs
67        let has_ascii = segment.bytes().any(|b| b.is_ascii_alphabetic());
68        let has_non_ascii = segment.bytes().any(|b| b > 0x7F);
69        if has_ascii && has_non_ascii {
70            // Check proximity to known paths
71            for known in &known_paths {
72                if levenshtein(&lower, known) <= 2 {
73                    findings.push(Finding {
74                        rule_id: RuleId::HomoglyphInPath,
75                        severity: Severity::Medium,
76                        title: "Potential homoglyph in URL path".to_string(),
77                        description: format!(
78                            "Path segment '{segment}' looks similar to '{known}' but contains non-ASCII characters"
79                        ),
80                        evidence: vec![Evidence::Url { raw: segment.to_string() }],
81                        human_view: None,
82                        agent_view: None,
83                mitre_id: None,
84                custom_rule_id: None,
85                    });
86                    return;
87                }
88            }
89        }
90    }
91}
92
93fn check_double_encoding(raw_path: &str, findings: &mut Vec<Finding>) {
94    findings.push(Finding {
95        rule_id: RuleId::DoubleEncoding,
96        severity: Severity::Medium,
97        title: "Double-encoded URL path detected".to_string(),
98        description: "URL path contains percent-encoded percent signs (%25XX) indicating double encoding, which may be used to bypass security filters".to_string(),
99        evidence: vec![Evidence::Url { raw: raw_path.to_string() }],
100        human_view: None,
101        agent_view: None,
102                mitre_id: None,
103                custom_rule_id: None,
104    });
105}