path_security/
encoding.rs1use anyhow::{bail, Result};
4
5use crate::constants::*;
6
7pub fn detect_url_encoding(path: &str) -> Result<()> {
10 for pattern in SUSPICIOUS_ENCODED_PATTERNS.iter() {
11 if path.contains(pattern) {
12 bail!("URL-encoded characters detected in path: {}", pattern);
13 }
14 }
15
16 if path.contains("%25") {
18 bail!("Double URL encoding detected in path");
19 }
20
21 Ok(())
22}
23
24pub fn detect_overlong_utf8(path: &str) -> Result<()> {
27 let path_lower = path.to_lowercase();
28 for pattern in OVERLONG_UTF8_PATTERNS.iter() {
29 if path_lower.contains(pattern) {
30 bail!("UTF-8 overlong encoding detected: {}", pattern);
31 }
32 }
33
34 Ok(())
35}
36
37pub fn detect_unicode_encoding(path: &str) -> Result<()> {
39 if path.contains("%u") {
41 bail!("Unicode percent encoding (%u) detected in path");
42 }
43
44 if path.contains("&#") {
46 bail!("HTML entity encoding detected in path");
47 }
48
49 Ok(())
50}
51
52pub fn detect_dangerous_unicode(path: &str) -> Result<()> {
54 for ch in path.chars() {
55 match ch {
56 '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' => {
58 bail!("Zero-width Unicode character detected in path");
59 }
60 '\u{202E}' => {
62 bail!("Right-to-left override character detected in path");
63 }
64 '\u{2024}' | '\u{2025}' | '\u{2026}' => {
66 bail!("Unicode dot homoglyph detected in path");
67 }
68 '\u{2044}' | '\u{2215}' | '\u{2571}' | '\u{29F8}' | '\u{FF0F}' => {
71 bail!("Unicode slash homoglyph detected in path");
72 }
73 '\u{2216}' | '\u{FF3C}' => {
76 bail!("Unicode backslash homoglyph detected in path");
77 }
78 '\u{00A5}' | '\u{20A9}' | '\u{00B4}' => {
83 bail!("Code page specific path separator homoglyph detected in path");
84 }
85 '\u{FF01}'..='\u{FF5E}' => {
87 bail!("Full-width Unicode character detected in path");
88 }
89 '?' | '*' => {
91 bail!("Wildcard character detected in path: {}", ch);
92 }
93 _ => {}
94 }
95 }
96
97 Ok(())
98}
99
100pub fn detect_mixed_encoding(path: &str) -> bool {
102 if path.starts_with('\u{FEFF}') || path.starts_with('\u{FFFE}') {
104 return true;
105 }
106
107 if path.contains("&#x") || path.contains("&#") {
109 return true;
110 }
111
112 let bytes = path.as_bytes();
114 if bytes.len() >= 4 {
115 let mut null_count = 0;
116 for i in (0..bytes.len()).step_by(2) {
117 if i + 1 < bytes.len() && bytes[i + 1] == 0 {
118 null_count += 1;
119 }
120 }
121 if null_count > bytes.len() / 8 {
123 return true;
124 }
125 }
126
127 false
128}
129
130pub fn normalize_and_check(path: &str) -> Result<String> {
132 let mut normalized = path.to_string();
133
134 normalized = normalized.trim().to_string();
136
137 if normalized != path {
139 bail!("Leading or trailing whitespace detected in path");
140 }
141
142 if normalized.contains(" ") {
144 bail!("Multiple consecutive spaces detected in path");
145 }
146
147 Ok(normalized)
148}