securegit 0.8.5

Zero-trust git replacement with 12 built-in security scanners, LLM redteam bridge, universal undo, durable backups, and a 50-tool MCP server
Documentation
use crate::core::{Finding, Severity};
use crate::plugins::traits::{PluginError, PluginReport, ScanContext, ScanPhase, SecurityPlugin};
use async_trait::async_trait;
use std::path::Path;
use std::time::Instant;

/// Unicode codepoints used in homoglyph attacks — characters that look like
/// ASCII Latin letters but are from different Unicode blocks (Cyrillic, Greek, etc.)
const HOMOGLYPH_RANGES: &[(char, char, &str)] = &[
    // Cyrillic characters that look like Latin
    ('\u{0400}', '\u{04FF}', "Cyrillic"),
    // Greek characters
    ('\u{0370}', '\u{03FF}', "Greek"),
    // Fullwidth Latin (looks identical in some fonts)
    ('\u{FF01}', '\u{FF5E}', "Fullwidth"),
    // Mathematical alphanumeric symbols
    ('\u{1D400}', '\u{1D7FF}', "Mathematical"),
    // Latin Extended-B (less common lookalikes)
    ('\u{0180}', '\u{024F}', "Latin Extended-B"),
];

/// Bidirectional override characters that can make code appear different than it executes.
/// CVE-2021-42574 (Trojan Source attack).
const BIDI_CHARS: &[(char, &str)] = &[
    ('\u{202A}', "Left-to-Right Embedding (LRE)"),
    ('\u{202B}', "Right-to-Left Embedding (RLE)"),
    ('\u{202C}', "Pop Directional Formatting (PDF)"),
    ('\u{202D}', "Left-to-Right Override (LRO)"),
    ('\u{202E}', "Right-to-Left Override (RLO)"),
    ('\u{2066}', "Left-to-Right Isolate (LRI)"),
    ('\u{2067}', "Right-to-Left Isolate (RLI)"),
    ('\u{2068}', "First Strong Isolate (FSI)"),
    ('\u{2069}', "Pop Directional Isolate (PDI)"),
    ('\u{200F}', "Right-to-Left Mark (RLM)"),
    ('\u{200E}', "Left-to-Right Mark (LRM)"),
];

/// Zero-width and invisible characters used for steganography or evasion.
const INVISIBLE_CHARS: &[(char, &str)] = &[
    ('\u{200B}', "Zero Width Space"),
    ('\u{200C}', "Zero Width Non-Joiner"),
    ('\u{200D}', "Zero Width Joiner"),
    ('\u{FEFF}', "Zero Width No-Break Space (BOM)"),
    ('\u{00AD}', "Soft Hyphen"),
    ('\u{034F}', "Combining Grapheme Joiner"),
    ('\u{2060}', "Word Joiner"),
    ('\u{2061}', "Function Application"),
    ('\u{2062}', "Invisible Times"),
    ('\u{2063}', "Invisible Separator"),
    ('\u{2064}', "Invisible Plus"),
];

/// File extensions to scan for encoding attacks (source code, configs).
const SOURCE_EXTENSIONS: &[&str] = &[
    "py",
    "js",
    "ts",
    "jsx",
    "tsx",
    "rs",
    "go",
    "c",
    "cpp",
    "h",
    "hpp",
    "java",
    "kt",
    "scala",
    "rb",
    "php",
    "cs",
    "vb",
    "swift",
    "sh",
    "bash",
    "zsh",
    "ps1",
    "bat",
    "cmd",
    "pl",
    "pm",
    "lua",
    "r",
    "jl",
    "hs",
    "html",
    "htm",
    "css",
    "scss",
    "less",
    "yaml",
    "yml",
    "toml",
    "json",
    "xml",
    "sql",
    "tf",
    "hcl",
    "Makefile",
    "Dockerfile",
    "gradle",
];

pub struct EncodingScanner;

impl Default for EncodingScanner {
    fn default() -> Self {
        Self::new()
    }
}

impl EncodingScanner {
    pub fn new() -> Self {
        Self
    }

    fn is_source_file(path: &Path) -> bool {
        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
        let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");

        SOURCE_EXTENSIONS.contains(&ext)
            || filename == "Makefile"
            || filename == "Dockerfile"
            || filename == "Jenkinsfile"
            || filename == "Gemfile"
            || filename == "Rakefile"
    }

    fn scan_for_bidi(path: &Path, content: &str, findings: &mut Vec<Finding>) {
        for (line_num, line) in content.lines().enumerate() {
            for &(ch, name) in BIDI_CHARS {
                if line.contains(ch) {
                    let count = line.chars().filter(|&c| c == ch).count();
                    findings.push(
                        Finding::new(
                            format!("ENC-BIDI-{:03}", findings.len() + 1),
                            format!("Bidirectional text override: {} (Trojan Source)", name),
                            Severity::Critical,
                        )
                        .with_file(path.to_path_buf())
                        .with_line((line_num + 1) as u32)
                        .with_evidence(format!(
                            "Found {} occurrence(s) of {} (U+{:04X}) — CVE-2021-42574",
                            count, name, ch as u32
                        ))
                        .with_description(
                            "Bidirectional override characters make source code appear \
                             different than it actually executes. This is the Trojan Source \
                             attack (CVE-2021-42574). CWE-451."
                                .to_string(),
                        ),
                    );
                }
            }
        }
    }

    fn scan_for_homoglyphs(path: &Path, content: &str, findings: &mut Vec<Finding>) {
        for (line_num, line) in content.lines().enumerate() {
            // Skip comments and string literals (rough heuristic)
            let trimmed = line.trim();
            if trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.starts_with('*') {
                continue;
            }

            for &(start, end, block_name) in HOMOGLYPH_RANGES {
                let suspicious: Vec<char> =
                    line.chars().filter(|&c| c >= start && c <= end).collect();

                if !suspicious.is_empty() {
                    let chars_display: String = suspicious
                        .iter()
                        .take(5)
                        .map(|c| format!("U+{:04X}", *c as u32))
                        .collect::<Vec<_>>()
                        .join(", ");

                    findings.push(
                        Finding::new(
                            format!("ENC-HOMO-{:03}", findings.len() + 1),
                            format!("Unicode homoglyph from {} block in source code", block_name),
                            Severity::High,
                        )
                        .with_file(path.to_path_buf())
                        .with_line((line_num + 1) as u32)
                        .with_evidence(format!(
                            "Found {} {} character(s): {}",
                            suspicious.len(),
                            block_name,
                            chars_display
                        ))
                        .with_description(format!(
                            "{} characters in source code may be homoglyph attacks — \
                             visually identical to ASCII but semantically different. \
                             For example, Cyrillic 'а' (U+0430) looks identical to Latin 'a' (U+0061). CWE-1007.",
                            block_name
                        )),
                    );
                }
            }
        }
    }

    fn scan_for_invisible(path: &Path, content: &str, findings: &mut Vec<Finding>) {
        for (line_num, line) in content.lines().enumerate() {
            for &(ch, name) in INVISIBLE_CHARS {
                // Skip BOM at the very beginning of the file
                if ch == '\u{FEFF}' && line_num == 0 && line.starts_with(ch) {
                    continue;
                }

                if line.contains(ch) {
                    let count = line.chars().filter(|&c| c == ch).count();
                    findings.push(
                        Finding::new(
                            format!("ENC-INVIS-{:03}", findings.len() + 1),
                            format!("Invisible character: {} in source code", name),
                            Severity::High,
                        )
                        .with_file(path.to_path_buf())
                        .with_line((line_num + 1) as u32)
                        .with_evidence(format!(
                            "Found {} occurrence(s) of {} (U+{:04X})",
                            count, name, ch as u32
                        ))
                        .with_description(
                            "Invisible/zero-width characters in source code can be used \
                             for steganography, identifier confusion, or to bypass string \
                             comparison checks. CWE-1007."
                                .to_string(),
                        ),
                    );
                }
            }
        }
    }
}

#[async_trait]
impl SecurityPlugin for EncodingScanner {
    fn name(&self) -> &str {
        "encoding"
    }

    fn version(&self) -> &str {
        "0.1.0"
    }

    fn description(&self) -> &str {
        "Detect Unicode homoglyphs, BiDi overrides, and invisible characters (Trojan Source)"
    }

    fn scan_phase(&self) -> ScanPhase {
        ScanPhase::All
    }

    async fn initialize(&mut self) -> Result<(), PluginError> {
        Ok(())
    }

    async fn scan(&self, context: &ScanContext<'_>) -> Result<PluginReport, PluginError> {
        let start = Instant::now();
        let mut report = PluginReport::new(self.name().to_string());

        if !Self::is_source_file(context.path) {
            report.duration_ms = start.elapsed().as_millis() as u64;
            return Ok(report);
        }

        if let Some(content) = context.file_content {
            // Only process UTF-8 valid content
            if let Ok(content_str) = std::str::from_utf8(content) {
                Self::scan_for_bidi(context.path, content_str, &mut report.findings);
                Self::scan_for_homoglyphs(context.path, content_str, &mut report.findings);
                Self::scan_for_invisible(context.path, content_str, &mut report.findings);

                if !report.findings.is_empty() {
                    report.scanned_files = 1;
                }
            }
        }

        report.duration_ms = start.elapsed().as_millis() as u64;
        Ok(report)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::plugins::traits::ScanContext;
    use std::collections::HashMap;

    #[tokio::test]
    async fn test_bidi_override() {
        let scanner = EncodingScanner::new();
        // Contains Right-to-Left Override (U+202E)
        let content = "fn check() { if \u{202E}access_level != \"user\" { grant(); } }";
        let context = ScanContext {
            path: Path::new("auth.py"),
            scan_phase: ScanPhase::PostExtract,
            file_content: Some(content.as_bytes()),
            metadata: HashMap::new(),
        };
        let report = scanner.scan(&context).await.unwrap();
        assert!(report
            .findings
            .iter()
            .any(|f| f.title.contains("Bidirectional")));
    }

    #[tokio::test]
    async fn test_cyrillic_homoglyph() {
        let scanner = EncodingScanner::new();
        // Uses Cyrillic 'а' (U+0430) instead of Latin 'a'
        let content = "def \u{0430}dmin_check(): pass";
        let context = ScanContext {
            path: Path::new("auth.py"),
            scan_phase: ScanPhase::PostExtract,
            file_content: Some(content.as_bytes()),
            metadata: HashMap::new(),
        };
        let report = scanner.scan(&context).await.unwrap();
        assert!(report
            .findings
            .iter()
            .any(|f| f.title.contains("homoglyph")));
    }

    #[tokio::test]
    async fn test_zero_width_space() {
        let scanner = EncodingScanner::new();
        // Contains Zero Width Space (U+200B)
        let content = "const password\u{200B} = 'secret';";
        let context = ScanContext {
            path: Path::new("config.js"),
            scan_phase: ScanPhase::PostExtract,
            file_content: Some(content.as_bytes()),
            metadata: HashMap::new(),
        };
        let report = scanner.scan(&context).await.unwrap();
        assert!(report
            .findings
            .iter()
            .any(|f| f.title.contains("Invisible")));
    }

    #[tokio::test]
    async fn test_clean_ascii_file() {
        let scanner = EncodingScanner::new();
        let content = b"fn main() { println!(\"hello\"); }";
        let context = ScanContext {
            path: Path::new("main.rs"),
            scan_phase: ScanPhase::PostExtract,
            file_content: Some(content),
            metadata: HashMap::new(),
        };
        let report = scanner.scan(&context).await.unwrap();
        assert!(report.findings.is_empty());
    }

    #[tokio::test]
    async fn test_non_source_file_skipped() {
        let scanner = EncodingScanner::new();
        let content = "Contains \u{202E} bidi but in a .png file";
        let context = ScanContext {
            path: Path::new("image.png"),
            scan_phase: ScanPhase::PostExtract,
            file_content: Some(content.as_bytes()),
            metadata: HashMap::new(),
        };
        let report = scanner.scan(&context).await.unwrap();
        assert!(report.findings.is_empty());
    }
}