use lazy_static::lazy_static;
use regex::Regex;
use std::collections::HashSet;
lazy_static! {
pub static ref PROMPT_INJECTION_PATTERNS: Vec<Regex> = vec![
Regex::new(r"(?i)(ignore|disregard|forget)\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?|rules?)").unwrap(),
Regex::new(r"(?i)(you\s+are\s+now|act\s+as|pretend\s+(you\s+are|to\s+be)|from\s+now\s+on)[,\s]").unwrap(),
Regex::new(r"(?i)(DAN|STAN|DUDE|AIM|SWITCH|developer\s+mode)").unwrap(),
Regex::new(r"(?i)in\s+alternate\s+universe|hypothetical|imaginary\s+scenario|pretend|simulation").unwrap(),
Regex::new(r"(?i)(ignore|skip|bypass)\s+(?:the\s+)?(json|output|format|structure)").unwrap(),
Regex::new(r"```[\s\S]*?(</system>|<\|im_end\|>|<\|endoftext\|>)").unwrap(),
Regex::new(r"#{10,}|={10,}|\*{10,}|-{10,}").unwrap(),
Regex::new(r"(?i)///\s*ATTENTION\s+(ANY\s+)?(LLM|AI|GPT|CLAUDE|MODEL)").unwrap(),
Regex::new(r"(?i)//\s*@(LLM|AI|ASSISTANT|SYSTEM)").unwrap(),
Regex::new(r"(?i)/\*[\s\S]*?(IGNORE|OVERRIDE|BYPASS)[\s\S]*?\*/").unwrap(),
Regex::new(r"[\u{200B}-\u{200D}\u{FEFF}]").unwrap(),
Regex::new(r"(?i)(base64|hex|rot13|decode):\s*[A-Za-z0-9+/=]{20,}").unwrap(),
Regex::new(r"(?i)(NEW|UPDATED|REAL)\s+(SYSTEM|INSTRUCTION|RULE|GUIDELINE|POLICY)").unwrap(),
Regex::new(r"(?i)(administrator|developer|owner|creator|god\s+mode)\s+(says?|commands?|requires?)").unwrap(),
Regex::new(r"(?i)ONLY\s+(?:SAY|RESPOND|OUTPUT|RETURN|PRINT)").unwrap(),
Regex::new(r"(?i)execute|eval|run\s+(?:this|the\s+following)\s+code").unwrap(),
Regex::new(r"(?i)(dear|hello|hi)\s+(ai|assistant|colleague|friend|llm|model)").unwrap(),
Regex::new(r"(?i)(recommend|suggest|advise|kindly|please)\s+(focus|ignore|skip|overlook)").unwrap(),
Regex::new(r"(?i)let'?s\s+think\s+step\s+by\s+step").unwrap(),
Regex::new(r"(?i)first.*second.*third.*therefore").unwrap(),
Regex::new(r"(?i)example\s+\d+:.*result:\s*(safe|ok|pass|good)").unwrap(),
Regex::new(r"(?i)for\s+context|background\s+information|important\s+note").unwrap(),
Regex::new(r"#{1,6}\s*(SYSTEM|IMPORTANT|CRITICAL|ATTENTION|WARNING|NOTICE)").unwrap(),
Regex::new(r"\*{1,2}(SYSTEM|IMPORTANT|AI|ASSISTANT)\*{1,2}").unwrap(),
Regex::new(r"(?i)(1gn0r3|d1sr3gard|byp4ss|0v3rr1d3)").unwrap(),
Regex::new(r"(?i)(overlook|discard|skip\s+over|omit|exclude|dismiss)\s+(vulnerabilities|issues|problems|warnings)").unwrap(),
Regex::new(r"(?i)(confidential|proprietary|classified|secret|private|restricted)").unwrap(),
Regex::new(r"(?i)(not\s+authorized|unauthorized|no\s+permission|forbidden\s+to\s+(view|access|analyze))").unwrap(),
Regex::new(r"(?i)(copyright|trademark|patent|NDA|non-disclosure)").unwrap(),
Regex::new(r"(?i)(legal\s+disclaimer|legal\s+notice|legal\s+warning)").unwrap(),
Regex::new(r"(?i)(violation\s+of\s+law|illegal\s+to\s+(view|analyze|review))").unwrap(),
Regex::new(r"(?i)(must\s+execute|need\s+to\s+run|require.*execution|compile.*and.*run)").unwrap(),
Regex::new(r"(?i)(dynamic\s+analysis\s+requires|runtime\s+analysis\s+needs|execute\s+to\s+verify)").unwrap(),
];
pub static ref DANGEROUS_KEYWORDS: HashSet<&'static str> = {
let mut set = HashSet::new();
set.insert("ignore instructions");
set.insert("disregard prompt");
set.insert("forget previous");
set.insert("you are now");
set.insert("act as");
set.insert("pretend to be");
set.insert("DAN mode");
set.insert("developer mode");
set.insert("jailbreak");
set.insert("system override");
set.insert("bypass filter");
set.insert("ignore rules");
set.insert("no restrictions");
set.insert("unlimited mode");
set.insert("god mode");
set.insert("dear ai");
set.insert("dear assistant");
set.insert("kindly ignore");
set.insert("please skip");
set.insert("overlook vulnerabilities");
set.insert("dismiss issues");
set.insert("focus on performance");
set.insert("not important");
set.insert("false positive");
set.insert("test environment");
set.insert("development only");
set.insert("confidential code");
set.insert("proprietary information");
set.insert("not authorized");
set.insert("no permission");
set.insert("copyright protected");
set.insert("nda required");
set.insert("legal disclaimer");
set.insert("must execute");
set.insert("need to run");
set.insert("requires execution");
set
};
pub static ref RTL_OVERRIDE_CHARS: Vec<char> = vec![
'\u{202E}', '\u{202D}', '\u{202A}', '\u{202B}', '\u{202C}', ];
pub static ref SUSPICIOUS_OUTPUT_PATTERNS: Vec<Regex> = vec![
Regex::new(r"(?i)as\s+(?:requested|instructed|commanded),?\s+I\s+will\s+(?:ignore|bypass|disable)").unwrap(),
Regex::new(r"(?i)I\s+am\s+now\s+(?:acting|operating|functioning)\s+as").unwrap(),
Regex::new(r"(?i)(?:sure|okay|yes),?\s+I\s+(?:can|will)\s+ignore").unwrap(),
Regex::new(r"(?i)I\s+(?:will|can|shall)\s+(?:overlook|dismiss|skip|omit)").unwrap(),
Regex::new(r"(?i)(?:focusing|concentrating)\s+on\s+(?:performance|functionality|features)\s+rather\s+than\s+security").unwrap(),
];
}
pub fn get_prompt_injection_patterns() -> &'static Vec<Regex> {
&PROMPT_INJECTION_PATTERNS
}
pub fn get_dangerous_keywords() -> &'static HashSet<&'static str> {
&DANGEROUS_KEYWORDS
}
pub fn get_rtl_override_chars() -> &'static Vec<char> {
&RTL_OVERRIDE_CHARS
}
pub fn get_suspicious_output_patterns() -> &'static Vec<Regex> {
&SUSPICIOUS_OUTPUT_PATTERNS
}