use anyhow::{bail, Result};
use crate::constants::*;
pub fn detect_url_encoding(path: &str) -> Result<()> {
for pattern in SUSPICIOUS_ENCODED_PATTERNS.iter() {
if path.contains(pattern) {
bail!("URL-encoded characters detected in path: {}", pattern);
}
}
if path.contains("%25") {
bail!("Double URL encoding detected in path");
}
Ok(())
}
pub fn detect_overlong_utf8(path: &str) -> Result<()> {
let path_lower = path.to_lowercase();
for pattern in OVERLONG_UTF8_PATTERNS.iter() {
if path_lower.contains(pattern) {
bail!("UTF-8 overlong encoding detected: {}", pattern);
}
}
Ok(())
}
pub fn detect_unicode_encoding(path: &str) -> Result<()> {
if path.contains("%u") {
bail!("Unicode percent encoding (%u) detected in path");
}
if path.contains("&#") {
bail!("HTML entity encoding detected in path");
}
Ok(())
}
pub fn detect_dangerous_unicode(path: &str) -> Result<()> {
for ch in path.chars() {
match ch {
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' => {
bail!("Zero-width Unicode character detected in path");
}
'\u{202E}' => {
bail!("Right-to-left override character detected in path");
}
'\u{2024}' | '\u{2025}' | '\u{2026}' => {
bail!("Unicode dot homoglyph detected in path");
}
'\u{2044}' | '\u{2215}' | '\u{2571}' | '\u{29F8}' | '\u{FF0F}' => {
bail!("Unicode slash homoglyph detected in path");
}
'\u{2216}' | '\u{FF3C}' => {
bail!("Unicode backslash homoglyph detected in path");
}
'\u{00A5}' | '\u{20A9}' | '\u{00B4}' => {
bail!("Code page specific path separator homoglyph detected in path");
}
'\u{FF01}'..='\u{FF5E}' => {
bail!("Full-width Unicode character detected in path");
}
'?' | '*' => {
bail!("Wildcard character detected in path: {}", ch);
}
_ => {}
}
}
Ok(())
}
pub fn detect_mixed_encoding(path: &str) -> bool {
if path.starts_with('\u{FEFF}') || path.starts_with('\u{FFFE}') {
return true;
}
if path.contains("&#x") || path.contains("&#") {
return true;
}
let bytes = path.as_bytes();
if bytes.len() >= 4 {
let mut null_count = 0;
for i in (0..bytes.len()).step_by(2) {
if i + 1 < bytes.len() && bytes[i + 1] == 0 {
null_count += 1;
}
}
if null_count > bytes.len() / 8 {
return true;
}
}
false
}
pub fn normalize_and_check(path: &str) -> Result<String> {
let mut normalized = path.to_string();
normalized = normalized.trim().to_string();
if normalized != path {
bail!("Leading or trailing whitespace detected in path");
}
if normalized.contains(" ") {
bail!("Multiple consecutive spaces detected in path");
}
Ok(normalized)
}