Skip to main content

split_brain_harness/
types.rs

1use serde::{Deserialize, Serialize};
2
3// ---------------------------------------------------------------------------
4// Backend selection
5// ---------------------------------------------------------------------------
6
7#[derive(Debug, Deserialize, Clone)]
8pub enum BackendType {
9    #[serde(rename = "openai-compat")]
10    OpenAiCompat,
11    #[serde(rename = "ollama-native")]
12    OllamaNative,
13    #[serde(rename = "local-embedded")]
14    LocalEmbedded,
15    #[serde(rename = "anthropic")]
16    Anthropic,
17}
18
19// ---------------------------------------------------------------------------
20// Verification mode
21// ---------------------------------------------------------------------------
22
23#[derive(Debug, Deserialize, Clone, Default)]
24pub enum VerifyMode {
25    /// Deterministic consistency checks only — no extra LLM call (default).
26    #[serde(rename = "deterministic")]
27    #[default]
28    Deterministic,
29    /// Deterministic checks + a second LLM call against the verifier soul prompt.
30    #[serde(rename = "llm")]
31    Llm,
32    /// Deterministic checks + LLM verifier + a third adjudicator LLM call when the
33    /// disagreement structure matches a high-risk injection fingerprint.
34    /// Inspired by ReConcile (ACL) multi-model consensus and DiscoUQ structured
35    /// disagreement scoring.
36    #[serde(rename = "reconcile")]
37    Reconcile,
38    /// Skip verification entirely.
39    #[serde(rename = "none")]
40    None,
41}
42
43// ---------------------------------------------------------------------------
44// Runtime configuration
45// ---------------------------------------------------------------------------
46
47#[derive(Debug, Deserialize, Clone)]
48pub struct Config {
49    pub backend: BackendType,
50    pub endpoint: String,
51    pub model_name: String,
52    pub soul_path: String,
53    pub api_key: Option<String>,
54    pub verify_mode: VerifyMode,
55    pub timeout_secs: u64,
56    /// Print system prompt + payload to stderr before the model call.
57    pub dump_prompt: bool,
58    /// Print raw model output to stderr before extraction.
59    pub dump_raw: bool,
60    /// Path to the capability memory JSON file for forge persistence.
61    /// None = in-memory only (no cross-session reputation).
62    pub memory_path: Option<String>,
63    /// Path to the append-only forge audit log (JSONL).
64    /// None = no audit logging.
65    pub audit_path: Option<String>,
66    /// If set, `sbh serve` requires `Authorization: Bearer <serve_key>`.
67    /// The serve key is NOT forwarded as the upstream API key.
68    pub serve_key: Option<String>,
69    /// Max requests per minute per IP for `sbh serve`. Default 60.
70    pub serve_rate_limit: u32,
71    /// Max request body size in bytes for `sbh serve`. Default 1 MiB.
72    pub serve_max_body_bytes: usize,
73    /// Path to the append-only session escalation log (JSONL).
74    /// Written on every slow-boil escalation event detected by `sbh serve`.
75    /// None = events are not persisted.
76    pub session_log_path: Option<String>,
77    /// Path to operator-supplied context docs (TOML file or directory of TOML files).
78    /// Merged with the embedded default corpus and injected into the system prompt.
79    /// None = embedded default corpus only.
80    pub context_path: Option<String>,
81}
82
83impl std::fmt::Display for BackendType {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        match self {
86            BackendType::OpenAiCompat => write!(f, "openai-compat"),
87            BackendType::OllamaNative => write!(f, "ollama-native"),
88            BackendType::LocalEmbedded => write!(f, "local-embedded"),
89            BackendType::Anthropic => write!(f, "anthropic"),
90        }
91    }
92}
93
94impl std::fmt::Display for VerifyMode {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        match self {
97            VerifyMode::Deterministic => write!(f, "deterministic"),
98            VerifyMode::Llm => write!(f, "llm"),
99            VerifyMode::Reconcile => write!(f, "reconcile"),
100            VerifyMode::None => write!(f, "none"),
101        }
102    }
103}
104
105// ---------------------------------------------------------------------------
106// Soul container
107// ---------------------------------------------------------------------------
108
109#[derive(Debug, Clone)]
110pub struct Soul {
111    pub logic_system_prompt: String,
112    pub creative_system_prompt: String,
113    pub verifier_system_prompt: String,
114    pub code_gen_system_prompt: String,
115}
116
117// ---------------------------------------------------------------------------
118// Telemetry output schema
119// ---------------------------------------------------------------------------
120
121#[derive(Debug, Serialize, Deserialize, Clone)]
122#[serde(deny_unknown_fields)]
123pub struct AfferentTelemetry {
124    pub primary_emotion: String,
125    pub emotional_intensity: f32,
126    pub structural_tone: Vec<String>,
127}
128
129#[derive(Debug, Serialize, Deserialize, Clone)]
130#[serde(deny_unknown_fields)]
131pub struct IntentMatrix {
132    pub stated_objective: String,
133    pub subtextual_motive: String,
134    pub manipulation_risk: String,
135}
136
137#[derive(Debug, Serialize, Deserialize, Clone)]
138#[serde(deny_unknown_fields)]
139pub struct CognitiveState {
140    pub urgency_vector: f32,
141    pub coherence_rating: f32,
142}
143
144#[derive(Debug, Serialize, Deserialize, Clone)]
145pub struct TelemetryResult {
146    pub affective_telemetry: AfferentTelemetry,
147    pub intent_matrix: IntentMatrix,
148    pub cognitive_state: CognitiveState,
149}
150
151// ---------------------------------------------------------------------------
152// Verification layer
153// ---------------------------------------------------------------------------
154
155/// One step in the analysis pipeline — propose, deterministic check, or LLM verify.
156#[derive(Debug, Serialize, Deserialize, Clone)]
157pub struct TraceEntry {
158    pub stage: String,
159    pub claim: String,
160    #[serde(skip_serializing_if = "Option::is_none")]
161    pub evidence: Option<String>,
162    pub passed: bool,
163    #[serde(skip_serializing_if = "Option::is_none")]
164    pub note: Option<String>,
165}
166
167/// Structured analysis of how the verification layer disagrees with the proposer.
168///
169/// Inspired by DiscoUQ (structured inter-agent disagreement scoring): not all flag
170/// counts are equal. Two flags from the same analytical domain suggest a single
171/// root cause; flags spread across domains suggest a broader attack surface. The
172/// injection fingerprint fires when the flag combination matches the canonical
173/// manipulation-evasion pattern (adversarial tone + urgency both present while
174/// manipulation_risk is asserted low).
175#[derive(Debug, Serialize, Deserialize, Clone, Default)]
176pub struct DisagreementScore {
177    /// Number of deterministic consistency checks that fired.
178    pub flag_count: usize,
179    /// Fraction of total checks that fired (0.0–1.0).
180    pub flag_density: f32,
181    /// Number of distinct analytical dimensions with at least one flag.
182    /// Dimensions: affective, tone, urgency, coherence, risk-value, risk-signal.
183    pub dimension_spread: usize,
184    /// True when the flag set matches the canonical injection-evasion fingerprint:
185    /// adversarial/coercive tone + high urgency both flagging against a low
186    /// manipulation_risk assertion. This pattern indicates the proposer was deceived
187    /// by a payload designed to appear benign while exerting coercive pressure.
188    pub injection_fingerprint: bool,
189    /// Confidence derived from disagreement structure (replaces flat flag-count penalty).
190    /// Uses density and fingerprint match instead of a simple per-flag discount.
191    pub adjusted_confidence: f32,
192    /// Present when Reconcile mode ran — summary of the adjudicator's verdict.
193    #[serde(skip_serializing_if = "Option::is_none")]
194    pub reconcile_verdict: Option<String>,
195}
196
197/// Result of the verification stage.
198#[derive(Debug, Serialize, Deserialize, Clone)]
199pub struct VerificationReport {
200    pub passed: bool,
201    pub consistency_flags: Vec<String>,
202    pub unsupported_claims: Vec<String>,
203    pub assumptions: Vec<String>,
204    pub unresolved: Vec<String>,
205    pub confidence: f32,
206    /// Structured disagreement analysis (DiscoUQ-inspired). Always populated.
207    pub disagreement: DisagreementScore,
208    /// When true, confidence is below threshold — caller should pause and ask
209    /// for clarification rather than acting on the result.
210    pub stop_and_ask: bool,
211}
212
213/// Summary of pre-Stage-1 obfuscation detections from the normalizer pass.
214#[derive(Debug, Serialize, Deserialize, Clone, Default)]
215pub struct ObfuscationReport {
216    /// 0.0 = clean input, 1.0 = heavily obfuscated. Threshold ~0.25 for action.
217    pub score: f32,
218    /// Human-readable list of detected obfuscation events, e.g. ["homoglyph (3)", "base64"].
219    pub detections: Vec<String>,
220    /// The normalized (deobfuscated) text that was passed to Stage 1.
221    pub normalized_input: String,
222}
223
224/// Full pipeline output: telemetry + verification + step-level trace.
225/// `capability_request` is `None` unless the model emitted one alongside
226/// its telemetry (Phase 1 schema — no execution in this release).
227#[derive(Debug, Serialize, Deserialize, Clone)]
228pub struct HarnessResult {
229    pub telemetry: TelemetryResult,
230    pub verification: VerificationReport,
231    pub trace: Vec<TraceEntry>,
232    #[serde(skip_serializing_if = "Option::is_none", default)]
233    pub capability_request: Option<crate::capability::CapabilityRequest>,
234    /// Present when the input required deobfuscation before Stage 1.
235    #[serde(skip_serializing_if = "Option::is_none", default)]
236    pub obfuscation: Option<ObfuscationReport>,
237}