keyhog-scanner 0.5.38

keyhog-scanner: high-performance SIMD-accelerated secret detection engine
Documentation
//! Homoglyph detection: finds secrets obfuscated with lookalike Unicode characters.
//!
//! Attackers may replace 'a' with Cyrillic 'а' to bypass simple regexes.
//! This module provides a way to match patterns against homoglyph-expanded forms.

use std::collections::HashMap;
use std::sync::OnceLock;

/// Returns a map of ASCII characters to their common Unicode homoglyphs.
fn homoglyph_map() -> &'static HashMap<char, Vec<char>> {
    static MAP: OnceLock<HashMap<char, Vec<char>>> = OnceLock::new();
    MAP.get_or_init(|| {
        let mut m = HashMap::new();
        m.insert('a', vec!['а', 'α', '']);
        m.insert('b', vec!['Ь', 'β', '']);
        m.insert('c', vec!['с', '']);
        m.insert('e', vec!['е', 'ε', '']);
        m.insert('g', vec!['ɡ', '']); // U+0261
        m.insert('h', vec!['н', 'һ', '']); // U+04BB for h
        m.insert('i', vec!['і', 'ι', '']);
        m.insert('j', vec!['ј', '']);
        m.insert('k', vec!['к', 'κ', '']);
        m.insert('m', vec!['м', '']);
        m.insert('n', vec!['п', 'ν', '']);
        m.insert('o', vec!['о', 'ο', '']);
        m.insert('p', vec!['р', 'ρ', '']);
        m.insert('s', vec!['ѕ', '']);
        m.insert('t', vec!['т', 'τ', '']);
        m.insert('u', vec!['υ', '']);
        m.insert('l', vec!['і', 'І', 'ι', 'Ι', '', 'Ο', 'ο', 'о', 'O', 'o']);
        m.insert('x', vec!['х', 'χ', '']);
        m.insert('y', vec!['у', '']);
        m.insert('L', vec!['']);

        m.insert('A', vec!['А', 'Α', '']);
        m.insert('B', vec!['В', 'Β', '']);
        m.insert('E', vec!['Е', 'Ε', '']);
        m.insert('H', vec!['Н', 'Η', '']);
        m.insert('I', vec!['І', 'Ι', '']);
        m.insert('J', vec!['Ј', '']);
        m.insert('K', vec!['К', 'Κ', '']);
        m.insert('M', vec!['М', '']);
        m.insert('N', vec!['Ν', '']);
        m.insert('O', vec!['О', 'Ο', '']);
        m.insert('P', vec!['Р', 'Ρ', '']);
        m.insert('S', vec!['С', '']);
        m.insert('T', vec!['Т', 'Τ', '']);
        m.insert('X', vec!['Х', 'Χ', '']);
        m.insert('Y', vec!['Υ', '']);
        m
    })
}

/// Expand a regex pattern to include homoglyphs.
/// e.g. "ghp_" -> "[gg][hнh][pрp]_"
pub fn expand_homoglyphs(pattern: &str) -> String {
    let map = homoglyph_map();
    let mut expanded = String::new();

    // Simple implementation: replace ASCII chars with character classes
    for ch in pattern.chars() {
        if let Some(glyphs) = map.get(&ch) {
            expanded.push('[');
            expanded.push(ch);
            for &g in glyphs {
                expanded.push(g);
            }
            expanded.push(']');
        } else {
            expanded.push(ch);
        }
    }

    expanded
}