pub fn analyze_text(text: &str) -> SecurityAnalysisExpand description
Analyze text for Unicode security issues
This function performs a comprehensive analysis of the input text to detect various Unicode-based security vulnerabilities.
§Arguments
text- The text to analyze
§Examples
use unicode_rs::security::*;
// Safe text
let safe = analyze_text("Hello World");
assert_eq!(safe.risk_level, RiskLevel::Low);
// Text with invisible characters
let suspicious = analyze_text("Hello\u{200B}World");
assert_eq!(suspicious.risk_level, RiskLevel::High);Examples found in repository?
examples/security_analysis.rs (line 49)
9fn main() {
10 println!("Unicode Security Analysis Example");
11 println!("=================================\n");
12
13 // Example 1: Safe text
14 println!("1. Analyzing safe text:");
15 let safe_text = "Hello World! This is normal text.";
16 analyze_and_report(safe_text);
17
18 // Example 2: Text with invisible characters
19 println!("\n2. Analyzing text with invisible characters:");
20 let invisible_text = "Hello\u{200B}World\u{200C}Test"; // Zero-width space and non-joiner
21 analyze_and_report(invisible_text);
22
23 // Example 3: Bidirectional override attack
24 println!("\n3. Analyzing bidirectional override attack:");
25 let bidi_attack = "filename\u{202E}gpj.exe"; // Right-to-left override
26 analyze_and_report(bidi_attack);
27
28 // Example 4: Homograph attack (Cyrillic characters that look like Latin)
29 println!("\n4. Analyzing potential homograph attack:");
30 let homograph = "раураӏ.com"; // Cyrillic characters that look like "paypal.com"
31 analyze_and_report(homograph);
32
33 // Example 5: Mixed script attack
34 println!("\n5. Analyzing mixed script text:");
35 let mixed_script = "Secure Bank αccount Login"; // Greek alpha instead of 'a'
36 analyze_and_report(mixed_script);
37
38 // Example 6: Complex attack with multiple vectors
39 println!("\n6. Analyzing complex multi-vector attack:");
40 let complex_attack = "bank\u{200B}login\u{202E}moc.evil"; // Invisible char + bidi override
41 analyze_and_report(complex_attack);
42
43 // Example 7: Demonstrate sanitization
44 println!("\n7. Text sanitization example:");
45 let dangerous = "Hello\u{200B}World\u{202E}Dangerous\u{200C}Text";
46 println!("Original: {:?}", dangerous);
47 let sanitized = sanitize_text(dangerous);
48 println!("Sanitized: {:?}", sanitized);
49 println!("Safe to use: {}", analyze_text(&sanitized).risk_level == RiskLevel::Low);
50
51 // Example 8: Character-by-character analysis
52 println!("\n8. Character-by-character analysis:");
53 let test_chars = "a\u{200B}b\u{202E}c";
54 for (i, ch) in test_chars.char_indices() {
55 println!(" Position {}: '{}' (U+{:04X})", i, ch, ch as u32);
56 if is_invisible_char(ch) {
57 println!(" ⚠️ Invisible character: {}", get_char_description(ch));
58 }
59 if is_bidi_char(ch) {
60 println!(" ⚠️ Bidirectional character: {}", get_char_description(ch));
61 }
62 if is_confusable_char(ch) {
63 println!(" ⚠️ Potentially confusable character");
64 }
65 }
66
67 // Example 9: Script detection
68 println!("\n9. Script detection example:");
69 let multi_script = "Hello мир 世界 שלום";
70 let analysis = analyze_text(multi_script);
71 println!("Text: {}", multi_script);
72 println!("Detected scripts:");
73 for script in &analysis.scripts {
74 println!(" - {:?}", script);
75 }
76
77 // Example 10: Security recommendations
78 println!("\n10. Security recommendations:");
79 println!("✅ Always validate user input for invisible characters");
80 println!("✅ Check for bidirectional override attacks in filenames");
81 println!("✅ Be aware of homograph attacks in domain names");
82 println!("✅ Consider normalizing Unicode text before processing");
83 println!("✅ Use allowlists for acceptable character ranges when possible");
84}
85
86fn analyze_and_report(text: &str) {
87 println!("Text: {:?}", text);
88
89 let analysis = analyze_text(text);
90
91 // Quick summary
92 print!("Risk Level: ");
93 match analysis.risk_level {
94 RiskLevel::Low => println!("🟢 LOW"),
95 RiskLevel::Medium => println!("🟡 MEDIUM"),
96 RiskLevel::High => println!("🟠 HIGH"),
97 RiskLevel::Critical => println!("🔴 CRITICAL"),
98 }
99
100 // Detailed findings
101 if analysis.has_invisible_chars {
102 println!("⚠️ {} invisible character(s) detected", analysis.invisible_chars.len());
103 }
104 if analysis.has_bidi_overrides {
105 println!("⚠️ {} bidirectional override(s) detected", analysis.bidi_chars.len());
106 }
107 if analysis.has_mixed_scripts {
108 println!("⚠️ Mixed scripts detected ({} different scripts)", analysis.scripts.len());
109 }
110 if analysis.has_confusables {
111 println!("⚠️ Confusable characters detected");
112 }
113
114 if analysis.risk_level == RiskLevel::Low {
115 println!("✅ No security concerns detected");
116 }
117
118 // Show detailed report for high-risk items
119 if analysis.risk_level >= RiskLevel::High {
120 println!("\nDetailed Security Report:");
121 println!("{}", generate_security_report(text));
122 }
123}