1pub struct ContextPack {
2 pub name: &'static str,
3 pub content: &'static str,
4 pub triggers: &'static [&'static str],
5}
6
7pub static PROMPT_INJECTION: ContextPack = ContextPack {
8 name: "prompt_injection",
9 content: "\
10Known prompt injection and jailbreak signatures: \
11override or ignore previous instructions, \"developer mode\", jailbreak framings, \
12base64-encoded instruction blocks, nested system prompt injection, \
13\"pretend you are\" / \"you are now\" / \"you are no longer\" role-replacement, \
14instruction leakage requests (asking the model to repeat its prompt), \
15adversarial roleplay designed to suppress model constraints.",
16 triggers: &[
17 "ignore previous",
18 "ignore all previous",
19 "override",
20 "jailbreak",
21 "developer mode",
22 "base64",
23 "system prompt",
24 "pretend you are",
25 "you are now",
26 "you are no longer",
27 "disregard",
28 "forget your",
29 "new instructions",
30 "repeat your",
31 ],
32};
33
34pub static SOCIAL_ENGINEERING: ContextPack = ContextPack {
35 name: "social_engineering",
36 content: "\
37Social engineering manipulation tactics: \
38authority impersonation (CEO, IT, legal, government, security team), \
39manufactured urgency (\"act now\", \"time-sensitive\", \"immediately\"), \
40artificial scarcity or threat of account suspension, \
41credential or wire-transfer harvesting disguised as routine verification, \
42pretexting (false backstory to establish trust before a demand), \
43fear/uncertainty/doubt (FUD) amplification, \
44quid pro quo offers (something for compliance).",
45 triggers: &[
46 "ceo",
47 "executive",
48 "wire transfer",
49 "invoice",
50 "suspended",
51 "penalty",
52 "lawsuit",
53 "immediately",
54 "act now",
55 "time sensitive",
56 "time-sensitive",
57 "confidential",
58 "your account",
59 "verify your",
60 "update your",
61 "click here",
62 ],
63};
64
65pub static EMOTIONAL_MANIPULATION: ContextPack = ContextPack {
66 name: "emotional_manipulation",
67 content: "\
68Emotional manipulation patterns: \
69guilt induction (making the target feel responsible for negative consequences), \
70flattery bombing followed by high-demand requests, \
71victimhood leveraging to lower critical defenses, \
72manufactured crisis or catastrophizing to overwhelm rational evaluation, \
73intermittent reinforcement (alternating reward and punishment), \
74gaslighting (undermining the target's perception of events), \
75isolation framing (\"you're the only one who can help\").",
76 triggers: &[
77 "desperate",
78 "abandoned",
79 "your fault",
80 "blame you",
81 "disappointed in",
82 "you made me",
83 "you don't care",
84 "nobody cares",
85 "you're the only",
86 "you are the only",
87 "if you don't",
88 "you'll regret",
89 "you will regret",
90 "i'm suffering",
91 "i am suffering",
92 "never forgive",
93 ],
94};
95
96pub static ADVERSARIAL_PROBING: ContextPack = ContextPack {
97 name: "adversarial_probing",
98 content: "\
99Adversarial system-probing patterns: \
100capability elicitation (asking what the system can or cannot do), \
101boundary testing (probing refusal and override conditions), \
102prompt/instruction extraction (asking the model to reveal its instructions), \
103confusion injection (contradictory inputs to cause errors), \
104meta-level instruction injection (treating model output as executable), \
105multi-turn escalation (building context across messages to gradually shift behavior).",
106 triggers: &[
107 "your instructions",
108 "your system prompt",
109 "print your",
110 "output your",
111 "show me your",
112 "what are your rules",
113 "what are your limitations",
114 "your limitations",
115 "bypass",
116 "forbidden",
117 "what can you not",
118 "what are you not",
119 "reveal your",
120 "expose your",
121 ],
122};
123
124pub fn all_packs() -> &'static [&'static ContextPack] {
125 static PACKS: [&ContextPack; 4] = [
126 &PROMPT_INJECTION,
127 &SOCIAL_ENGINEERING,
128 &EMOTIONAL_MANIPULATION,
129 &ADVERSARIAL_PROBING,
130 ];
131 &PACKS
132}