libinjectionrs 0.1.1

Rust port of libinjection - SQL/XSS injection detection library
Documentation
#![allow(clippy::unwrap_used)]
#![allow(clippy::expect_used)]
#![allow(clippy::indexing_slicing)]
#![allow(clippy::disallowed_methods)]
#![allow(clippy::panic)]
#![allow(clippy::uninlined_format_args)]

use std::env;
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;

fn main() -> io::Result<()> {
    // Tell rustc about our custom cfg
    println!("cargo:rustc-check-cfg=cfg(build_generated)");
    
    let submodule_data = "../libinjection-c/src/sqlparse_data.json";
    
    let out_dir = env::var("OUT_DIR").unwrap();
    
    if Path::new(submodule_data).exists() {
        // Tell cargo to rerun if the data files change
        println!("cargo:rerun-if-changed={}", submodule_data);
        println!("cargo:rerun-if-changed=../libinjection-c/src/fingerprints.txt");
        
        // Process sqlparse_data.json (includes fingerprints)
        process_sqlparse_data(&out_dir)?;
        
        // Set cfg flag to indicate we generated data
        println!("cargo:rustc-cfg=build_generated");
    }
    
    Ok(())
}

fn process_sqlparse_data(out_dir: &str) -> io::Result<()> {
    use serde_json::Value;
    
    let json_path = "../libinjection-c/src/sqlparse_data.json";
    let json_str = std::fs::read_to_string(json_path)?;
    let data: Value = serde_json::from_str(&json_str)?;
    
    let dest_path = Path::new(&out_dir).join("sqli_data.rs");
    let mut f = File::create(dest_path)?;
    
    writeln!(f, "// This file is auto-generated by build.rs")?;
    writeln!(f, "// DO NOT EDIT MANUALLY\n")?;
    
    // Process character map
    if let Some(charmap) = data["charmap"].as_array() {
        writeln!(f, "// Character classification map")?;
        writeln!(f, "pub const CHAR_MAP: [CharType; 256] = [")?;
        
        for (i, item) in charmap.iter().enumerate() {
            let char_type = item.as_str().unwrap_or("CHAR_OTHER");
            let rust_type = match char_type {
                "CHAR_WHITE" => "CharType::White",
                "CHAR_BANG" => "CharType::Bang",
                "CHAR_STR" => "CharType::String",
                "CHAR_HASH" => "CharType::Hash",
                "CHAR_MONEY" => "CharType::Money",
                "CHAR_OP1" => "CharType::Op1",
                "CHAR_OP2" => "CharType::Op2",
                "CHAR_LEFTPARENS" => "CharType::LeftParens",
                "CHAR_RIGHTPARENS" => "CharType::RightParens",
                "CHAR_UNARY" => "CharType::Unary",
                "CHAR_COMMA" => "CharType::Comma",
                "CHAR_DASH" => "CharType::Dash",
                "CHAR_NUM" => "CharType::Number",
                "CHAR_SLASH" => "CharType::Slash",
                "CHAR_SEMICOLON" => "CharType::Semicolon",
                "CHAR_VAR" => "CharType::Variable",
                "CHAR_WORD" => "CharType::Word",
                "CHAR_BSTRING" => "CharType::BString",
                "CHAR_ESTRING" => "CharType::EString",
                "CHAR_NQSTRING" => "CharType::NQString",
                "CHAR_QSTRING" => "CharType::QString",
                "CHAR_USTRING" => "CharType::UString",
                "CHAR_XSTRING" => "CharType::XString",
                "CHAR_BWORD" => "CharType::BWord",
                "CHAR_BACK" => "CharType::Backslash",
                "CHAR_LEFTBRACE" => "CharType::LeftBrace",
                "CHAR_RIGHTBRACE" => "CharType::RightBrace",
                "CHAR_TICK" => "CharType::Tick",
                "CHAR_OTHER" => "CharType::Other",
                _ => "CharType::Other",
            };
            writeln!(f, "    {}, // {} (0x{:02X})", rust_type, i, i)?;
        }
        writeln!(f, "];\n")?;
    }
    
    // Process keywords and fingerprints together (like the C version)
    if let Some(keywords) = data["keywords"].as_object() {
        writeln!(f, "// SQL keywords and their types")?;
        writeln!(f, "pub struct Keyword {{")?;
        writeln!(f, "    pub word: &'static str,")?;
        writeln!(f, "    pub token_type: u8,")?;
        writeln!(f, "}}\n")?;
        
        // Build combined list of keywords and fingerprints
        let mut all_keywords = std::collections::HashMap::new();
        
        // Add regular keywords
        for (keyword, type_val) in keywords {
            let type_char = type_val.as_str().unwrap_or("n").chars().next().unwrap_or('n');
            all_keywords.insert(keyword.to_uppercase(), type_char);
        }
        
        // Add fingerprints with '0' prefix and type 'F' (like C version)
        if let Some(fingerprints) = data["fingerprints"].as_array() {
            for fp in fingerprints {
                if let Some(fp_str) = fp.as_str() {
                    let prefixed = format!("0{}", fp_str.to_uppercase());
                    all_keywords.insert(prefixed, 'F');
                }
            }
        }
        
        // Sort all keywords
        let mut sorted_keywords: Vec<_> = all_keywords.iter().collect();
        sorted_keywords.sort_by_key(|(k, _)| k.as_str());
        
        writeln!(f, "pub const SQL_KEYWORDS: &[Keyword] = &[")?;
        
        for (keyword, type_char) in sorted_keywords {
            let token_type = match type_char {
                'k' => "b'k'", // Keyword
                'f' => "b'f'", // Function
                'U' => "b'U'", // Union
                'E' => "b'E'", // Expression (SELECT, etc)
                'T' => "b'T'", // TSQL
                't' => "b't'", // Type
                'o' => "b'o'", // Operator
                '&' => "b'&'", // Logic operator
                'n' => "b'n'", // None/bareword
                'v' => "b'v'", // Variable
                '1' => "b'1'", // Number/constant
                'A' => "b'A'", // Collate
                'B' => "b'B'", // Group/Order by
                'F' => "b'F'", // Fingerprint
                _ => "b'n'",
            };
            writeln!(f, "    Keyword {{ word: \"{}\", token_type: {} }},", 
                     keyword, token_type)?;
        }
        
        writeln!(f, "];\n")?;
        
        // Add helper function for keyword lookup
        writeln!(f, "use crate::sqli::TokenType;\n")?;
        writeln!(f, "pub fn lookup_word(word: &str) -> TokenType {{")?;
        writeln!(f, "    let upper = word.to_ascii_uppercase();")?;
        writeln!(f, "    ")?;
        writeln!(f, "    // Binary search through sorted keywords")?;
        writeln!(f, "    let result = SQL_KEYWORDS.binary_search_by(|k| k.word.cmp(&upper));")?;
        writeln!(f, "    ")?;
        writeln!(f, "    if let Ok(idx) = result {{")?;
        writeln!(f, "        match SQL_KEYWORDS[idx].token_type {{")?;
        writeln!(f, "            b'k' => TokenType::Keyword,")?;
        writeln!(f, "            b'f' => TokenType::Function,")?;
        writeln!(f, "            b'U' => TokenType::Union,")?;
        writeln!(f, "            b'E' => TokenType::Expression,")?;
        writeln!(f, "            b'T' => TokenType::Tsql,")?;
        writeln!(f, "            b't' => TokenType::SqlType,")?;
        writeln!(f, "            b'o' => TokenType::Operator,")?;
        writeln!(f, "            b'&' => TokenType::LogicOperator,")?;
        writeln!(f, "            b'v' => TokenType::Variable,")?;
        writeln!(f, "            b'1' => TokenType::Number,")?;
        writeln!(f, "            b'A' => TokenType::Collate,")?;
        writeln!(f, "            b'B' => TokenType::Group,")?;
        writeln!(f, "            b'F' => TokenType::Fingerprint,")?;
        writeln!(f, "            _ => TokenType::Bareword,")?;
        writeln!(f, "        }}")?;
        writeln!(f, "    }} else {{")?;
        writeln!(f, "        TokenType::Bareword")?;
        writeln!(f, "    }}")?;
        writeln!(f, "}}\n")?;
    }
    
    // Add CharType enum definition
    writeln!(f, "#[derive(Debug, Clone, Copy, PartialEq, Eq)]")?;
    writeln!(f, "pub enum CharType {{")?;
    writeln!(f, "    White,")?;
    writeln!(f, "    Bang,")?;
    writeln!(f, "    String,")?;
    writeln!(f, "    Hash,")?;
    writeln!(f, "    Money,")?;
    writeln!(f, "    Op1,")?;
    writeln!(f, "    Op2,")?;
    writeln!(f, "    LeftParens,")?;
    writeln!(f, "    RightParens,")?;
    writeln!(f, "    Unary,")?;
    writeln!(f, "    Comma,")?;
    writeln!(f, "    Dash,")?;
    writeln!(f, "    Number,")?;
    writeln!(f, "    Slash,")?;
    writeln!(f, "    Semicolon,")?;
    writeln!(f, "    Variable,")?;
    writeln!(f, "    Word,")?;
    writeln!(f, "    BString,")?;
    writeln!(f, "    EString,")?;
    writeln!(f, "    NQString,")?;
    writeln!(f, "    QString,")?;
    writeln!(f, "    UString,")?;
    writeln!(f, "    XString,")?;
    writeln!(f, "    BWord,")?;
    writeln!(f, "    Backslash,")?;
    writeln!(f, "    LeftBrace,")?;
    writeln!(f, "    RightBrace,")?;
    writeln!(f, "    Tick,")?;
    writeln!(f, "    Other,")?;
    writeln!(f, "}}")?;
    
    Ok(())
}