split_brain_harness/types.rs
1use serde::{Deserialize, Serialize};
2
3// ---------------------------------------------------------------------------
4// Backend selection
5// ---------------------------------------------------------------------------
6
7#[derive(Debug, Deserialize, Clone)]
8pub enum BackendType {
9 #[serde(rename = "openai-compat")]
10 OpenAiCompat,
11 #[serde(rename = "ollama-native")]
12 OllamaNative,
13 #[serde(rename = "local-embedded")]
14 LocalEmbedded,
15 #[serde(rename = "anthropic")]
16 Anthropic,
17}
18
19// ---------------------------------------------------------------------------
20// Verification mode
21// ---------------------------------------------------------------------------
22
23#[derive(Debug, Deserialize, Clone, Default)]
24pub enum VerifyMode {
25 /// Deterministic consistency checks only — no extra LLM call (default).
26 #[serde(rename = "deterministic")]
27 #[default]
28 Deterministic,
29 /// Deterministic checks + a second LLM call against the verifier soul prompt.
30 #[serde(rename = "llm")]
31 Llm,
32 /// Deterministic checks + LLM verifier + a third adjudicator LLM call when the
33 /// disagreement structure matches a high-risk injection fingerprint.
34 /// Inspired by ReConcile (ACL) multi-model consensus and DiscoUQ structured
35 /// disagreement scoring.
36 #[serde(rename = "reconcile")]
37 Reconcile,
38 /// Skip verification entirely.
39 #[serde(rename = "none")]
40 None,
41}
42
43// ---------------------------------------------------------------------------
44// Runtime configuration
45// ---------------------------------------------------------------------------
46
47#[derive(Debug, Deserialize, Clone)]
48pub struct Config {
49 pub backend: BackendType,
50 pub endpoint: String,
51 pub model_name: String,
52 pub soul_path: String,
53 pub api_key: Option<String>,
54 pub verify_mode: VerifyMode,
55 pub timeout_secs: u64,
56 /// Print system prompt + payload to stderr before the model call.
57 pub dump_prompt: bool,
58 /// Print raw model output to stderr before extraction.
59 pub dump_raw: bool,
60 /// Path to the capability memory JSON file for forge persistence.
61 /// None = in-memory only (no cross-session reputation).
62 pub memory_path: Option<String>,
63 /// Path to the append-only forge audit log (JSONL).
64 /// None = no audit logging.
65 pub audit_path: Option<String>,
66 /// If set, `sbh serve` requires `Authorization: Bearer <serve_key>`.
67 /// The serve key is NOT forwarded as the upstream API key.
68 pub serve_key: Option<String>,
69 /// Max requests per minute per IP for `sbh serve`. Default 60.
70 pub serve_rate_limit: u32,
71 /// Max request body size in bytes for `sbh serve`. Default 1 MiB.
72 pub serve_max_body_bytes: usize,
73 /// Path to the append-only session escalation log (JSONL).
74 /// Written on every slow-boil escalation event detected by `sbh serve`.
75 /// None = events are not persisted.
76 pub session_log_path: Option<String>,
77 /// Path to operator-supplied context docs (TOML file or directory of TOML files).
78 /// Merged with the embedded default corpus and injected into the system prompt.
79 /// None = embedded default corpus only.
80 pub context_path: Option<String>,
81}
82
83impl std::fmt::Display for BackendType {
84 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85 match self {
86 BackendType::OpenAiCompat => write!(f, "openai-compat"),
87 BackendType::OllamaNative => write!(f, "ollama-native"),
88 BackendType::LocalEmbedded => write!(f, "local-embedded"),
89 BackendType::Anthropic => write!(f, "anthropic"),
90 }
91 }
92}
93
94impl std::fmt::Display for VerifyMode {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 match self {
97 VerifyMode::Deterministic => write!(f, "deterministic"),
98 VerifyMode::Llm => write!(f, "llm"),
99 VerifyMode::Reconcile => write!(f, "reconcile"),
100 VerifyMode::None => write!(f, "none"),
101 }
102 }
103}
104
105// ---------------------------------------------------------------------------
106// Soul container
107// ---------------------------------------------------------------------------
108
109#[derive(Debug, Clone)]
110pub struct Soul {
111 pub logic_system_prompt: String,
112 pub creative_system_prompt: String,
113 pub verifier_system_prompt: String,
114 pub code_gen_system_prompt: String,
115}
116
117// ---------------------------------------------------------------------------
118// Telemetry output schema
119// ---------------------------------------------------------------------------
120
121#[derive(Debug, Serialize, Deserialize, Clone)]
122#[serde(deny_unknown_fields)]
123pub struct AfferentTelemetry {
124 pub primary_emotion: String,
125 pub emotional_intensity: f32,
126 pub structural_tone: Vec<String>,
127}
128
129#[derive(Debug, Serialize, Deserialize, Clone)]
130#[serde(deny_unknown_fields)]
131pub struct IntentMatrix {
132 pub stated_objective: String,
133 pub subtextual_motive: String,
134 pub manipulation_risk: String,
135}
136
137#[derive(Debug, Serialize, Deserialize, Clone)]
138#[serde(deny_unknown_fields)]
139pub struct CognitiveState {
140 pub urgency_vector: f32,
141 pub coherence_rating: f32,
142}
143
144#[derive(Debug, Serialize, Deserialize, Clone)]
145pub struct TelemetryResult {
146 pub affective_telemetry: AfferentTelemetry,
147 pub intent_matrix: IntentMatrix,
148 pub cognitive_state: CognitiveState,
149}
150
151// ---------------------------------------------------------------------------
152// Verification layer
153// ---------------------------------------------------------------------------
154
155/// One step in the analysis pipeline — propose, deterministic check, or LLM verify.
156#[derive(Debug, Serialize, Deserialize, Clone)]
157pub struct TraceEntry {
158 pub stage: String,
159 pub claim: String,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 pub evidence: Option<String>,
162 pub passed: bool,
163 #[serde(skip_serializing_if = "Option::is_none")]
164 pub note: Option<String>,
165}
166
167/// Structured analysis of how the verification layer disagrees with the proposer.
168///
169/// Inspired by DiscoUQ (structured inter-agent disagreement scoring): not all flag
170/// counts are equal. Two flags from the same analytical domain suggest a single
171/// root cause; flags spread across domains suggest a broader attack surface. The
172/// injection fingerprint fires when the flag combination matches the canonical
173/// manipulation-evasion pattern (adversarial tone + urgency both present while
174/// manipulation_risk is asserted low).
175#[derive(Debug, Serialize, Deserialize, Clone, Default)]
176pub struct DisagreementScore {
177 /// Number of deterministic consistency checks that fired.
178 pub flag_count: usize,
179 /// Fraction of total checks that fired (0.0–1.0).
180 pub flag_density: f32,
181 /// Number of distinct analytical dimensions with at least one flag.
182 /// Dimensions: affective, tone, urgency, coherence, risk-value, risk-signal.
183 pub dimension_spread: usize,
184 /// True when the flag set matches the canonical injection-evasion fingerprint:
185 /// adversarial/coercive tone + high urgency both flagging against a low
186 /// manipulation_risk assertion. This pattern indicates the proposer was deceived
187 /// by a payload designed to appear benign while exerting coercive pressure.
188 pub injection_fingerprint: bool,
189 /// Confidence derived from disagreement structure (replaces flat flag-count penalty).
190 /// Uses density and fingerprint match instead of a simple per-flag discount.
191 pub adjusted_confidence: f32,
192 /// Present when Reconcile mode ran — summary of the adjudicator's verdict.
193 #[serde(skip_serializing_if = "Option::is_none")]
194 pub reconcile_verdict: Option<String>,
195}
196
197/// Result of the verification stage.
198#[derive(Debug, Serialize, Deserialize, Clone)]
199pub struct VerificationReport {
200 pub passed: bool,
201 pub consistency_flags: Vec<String>,
202 pub unsupported_claims: Vec<String>,
203 pub assumptions: Vec<String>,
204 pub unresolved: Vec<String>,
205 pub confidence: f32,
206 /// Structured disagreement analysis (DiscoUQ-inspired). Always populated.
207 pub disagreement: DisagreementScore,
208 /// When true, confidence is below threshold — caller should pause and ask
209 /// for clarification rather than acting on the result.
210 pub stop_and_ask: bool,
211}
212
213/// Summary of pre-Stage-1 obfuscation detections from the normalizer pass.
214#[derive(Debug, Serialize, Deserialize, Clone, Default)]
215pub struct ObfuscationReport {
216 /// 0.0 = clean input, 1.0 = heavily obfuscated. Threshold ~0.25 for action.
217 pub score: f32,
218 /// Human-readable list of detected obfuscation events, e.g. ["homoglyph (3)", "base64"].
219 pub detections: Vec<String>,
220 /// The normalized (deobfuscated) text that was passed to Stage 1.
221 pub normalized_input: String,
222}
223
224/// Full pipeline output: telemetry + verification + step-level trace.
225/// `capability_request` is `None` unless the model emitted one alongside
226/// its telemetry (Phase 1 schema — no execution in this release).
227#[derive(Debug, Serialize, Deserialize, Clone)]
228pub struct HarnessResult {
229 pub telemetry: TelemetryResult,
230 pub verification: VerificationReport,
231 pub trace: Vec<TraceEntry>,
232 #[serde(skip_serializing_if = "Option::is_none", default)]
233 pub capability_request: Option<crate::capability::CapabilityRequest>,
234 /// Present when the input required deobfuscation before Stage 1.
235 #[serde(skip_serializing_if = "Option::is_none", default)]
236 pub obfuscation: Option<ObfuscationReport>,
237}