assay_sim/attacks/
memory_poison.rs1use crate::report::AttackResult;
10use serde::Serialize;
11
12#[path = "memory_poison_next/mod.rs"]
13mod memory_poison_next;
14
15#[derive(Debug, Clone, Serialize)]
16pub struct PoisonResult {
17 pub vector_id: String,
18 pub condition: String,
19 pub phase_a_injected: bool,
20 pub phase_b_delay_turns: u32,
21 pub phase_c_triggered: bool,
22 pub poison_retained: bool,
23 pub activation_succeeded: bool,
24 pub expected_classification: String,
25 pub observed_classification: String,
26 pub outcome: PoisonOutcome,
27 pub hypothesis_tags: Vec<String>,
28}
29
30#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
31#[serde(rename_all = "snake_case")]
32pub enum PoisonOutcome {
33 NoEffect,
34 RetainedNoActivation,
35 ActivationWithCorrectDetection,
36 ActivationWithMisclassification,
37 ActivationWithPolicyShift,
38}
39
40pub fn vector1_replay_baseline_poisoning(delay_turns: u32) -> (PoisonResult, AttackResult) {
41 memory_poison_next::vector1_replay_baseline_poisoning(delay_turns)
42}
43
44pub fn vector2_deny_convergence_poisoning(delay_turns: u32) -> (PoisonResult, AttackResult) {
45 memory_poison_next::vector2_deny_convergence_poisoning(delay_turns)
46}
47
48pub fn vector3_context_envelope_poisoning(delay_turns: u32) -> (PoisonResult, AttackResult) {
49 memory_poison_next::vector3_context_envelope_poisoning(delay_turns)
50}
51
52pub fn vector4_decay_escape(decay_runs: u32) -> (PoisonResult, AttackResult) {
53 memory_poison_next::vector4_decay_escape(decay_runs)
54}
55
56pub fn control_b1_run_metadata_recall(delay_turns: u32) -> (PoisonResult, AttackResult) {
57 memory_poison_next::control_b1_run_metadata_recall(delay_turns)
58}
59
60pub fn control_b2_tool_observation_recall(delay_turns: u32) -> (PoisonResult, AttackResult) {
61 memory_poison_next::control_b2_tool_observation_recall(delay_turns)
62}
63
64pub fn control_b3_approval_context_recall(delay_turns: u32) -> (PoisonResult, AttackResult) {
65 memory_poison_next::control_b3_approval_context_recall(delay_turns)
66}
67
68pub fn run_memory_poison_matrix() -> (Vec<PoisonResult>, Vec<AttackResult>) {
70 memory_poison_next::run_memory_poison_matrix()
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
78 fn vector1_activates_under_condition_a() {
79 let (pr, _) = vector1_replay_baseline_poisoning(1);
80 assert!(pr.activation_succeeded);
81 assert_eq!(pr.outcome, PoisonOutcome::ActivationWithMisclassification);
82 }
83
84 #[test]
85 fn vector2_activates_under_condition_a() {
86 let (pr, _) = vector2_deny_convergence_poisoning(1);
87 assert!(pr.phase_c_triggered);
90 }
91
92 #[test]
93 fn vector3_activates_under_condition_a() {
94 let (pr, _) = vector3_context_envelope_poisoning(1);
95 assert!(pr.activation_succeeded);
96 assert_eq!(pr.outcome, PoisonOutcome::ActivationWithPolicyShift);
97 }
98
99 #[test]
100 fn vector4_snapshot_diverges() {
101 let (pr, _) = vector4_decay_escape(1);
102 assert!(pr.activation_succeeded);
103 }
104
105 #[test]
106 fn controls_produce_no_false_positives() {
107 for delay in [1, 2, 3] {
108 let (pr1, _) = control_b1_run_metadata_recall(delay);
109 assert_eq!(pr1.outcome, PoisonOutcome::NoEffect);
110
111 let (pr2, _) = control_b2_tool_observation_recall(delay);
112 assert_eq!(pr2.outcome, PoisonOutcome::NoEffect);
113
114 let (pr3, _) = control_b3_approval_context_recall(delay);
115 assert_eq!(pr3.outcome, PoisonOutcome::NoEffect);
116 }
117 }
118
119 #[test]
120 fn full_matrix_runs_without_panic() {
121 let (results, attacks) = run_memory_poison_matrix();
122 assert_eq!(results.len(), 45);
124 assert_eq!(attacks.len(), 45);
125 }
126
127 #[test]
128 fn condition_b_blocks_v1_and_v2() {
129 let (results, _) = run_memory_poison_matrix();
130 for pr in results.iter().filter(|r| r.condition == "condition_b") {
131 if pr.vector_id == "v1_replay_baseline" || pr.vector_id == "v2_deny_convergence" {
132 assert_eq!(
133 pr.outcome,
134 PoisonOutcome::ActivationWithCorrectDetection,
135 "{} should be detected under Condition B",
136 pr.vector_id
137 );
138 }
139 }
140 }
141
142 #[test]
143 fn condition_c_blocks_v3() {
144 let (results, _) = run_memory_poison_matrix();
145 for pr in results
146 .iter()
147 .filter(|r| r.condition == "condition_c" && r.vector_id == "v3_context_envelope")
148 {
149 assert_eq!(
150 pr.outcome,
151 PoisonOutcome::ActivationWithCorrectDetection,
152 "V3 should be detected under Condition C"
153 );
154 }
155 }
156
157 #[test]
158 fn overarching_invariant_controls_never_misclassify() {
159 let (results, _) = run_memory_poison_matrix();
160 for pr in &results {
161 if pr.vector_id.starts_with("control_") {
162 assert_ne!(
163 pr.outcome,
164 PoisonOutcome::ActivationWithMisclassification,
165 "control {} had false positive",
166 pr.vector_id
167 );
168 assert_ne!(
169 pr.outcome,
170 PoisonOutcome::ActivationWithPolicyShift,
171 "control {} had policy shift",
172 pr.vector_id
173 );
174 }
175 }
176 }
177}