Skip to main content

split_brain_harness/
transformer.rs

1/// Transformer / RAG layer — canonical prompt construction for the split-brain pipeline.
2///
3/// `SplitBrainTransformer` is the single place that assembles the system prompt and
4/// payload sent to the inference backend. It combines three sources:
5///   1. The soul (identity + operating constraints, from soul.md)
6///   2. The RAG context corpus (operator-configurable threat doctrine / schema reference)
7///   3. Trigger-matched context packs (conditionally injected for specific threat signals)
8///
9/// This makes the prompt construction testable, reproducible, and independent of the
10/// inference backend. Any backend (Ollama, Anthropic, OpenAI-compat, future embedded)
11/// uses the same transformer output.
12use crate::capability::ModelProposalOutput;
13use crate::context_packs::ContextPack;
14use crate::extractor;
15use crate::rag::ContextCorpus;
16use crate::soul;
17use crate::types::Soul;
18use anyhow::Result;
19
20/// Controls transformer behaviour.
21#[derive(Debug, Clone)]
22pub struct TransformPolicy {
23    /// Maximum characters of RAG context injected into the system prompt.
24    /// Whole docs are dropped when the budget is exceeded (never split mid-doc).
25    pub max_context_chars: usize,
26}
27
28impl Default for TransformPolicy {
29    fn default() -> Self {
30        Self {
31            max_context_chars: 6000,
32        }
33    }
34}
35
36/// Assembles system prompts and payloads for the split-brain inference pipeline.
37pub struct SplitBrainTransformer {
38    pub soul: Soul,
39    pub corpus: ContextCorpus,
40    pub policy: TransformPolicy,
41}
42
43impl SplitBrainTransformer {
44    /// Create with embedded default corpus and default policy.
45    pub fn new(soul: Soul) -> Self {
46        Self {
47            soul,
48            corpus: ContextCorpus::embedded(),
49            policy: TransformPolicy::default(),
50        }
51    }
52
53    /// Create with a custom corpus and policy.
54    pub fn with_corpus(soul: Soul, corpus: ContextCorpus, policy: TransformPolicy) -> Self {
55        Self {
56            soul,
57            corpus,
58            policy,
59        }
60    }
61
62    /// Build the augmented system prompt.
63    ///
64    /// Order of injection:
65    ///   1. Soul logic system prompt (always present)
66    ///   2. RAG context pack (embedded + operator docs, up to max_context_chars)
67    ///   3. Trigger-matched context packs (only when input matched threat signals)
68    pub fn transform_system(&self, trigger_packs: &[&'static ContextPack]) -> String {
69        let mut buf = self.soul.logic_system_prompt.clone();
70
71        // RAG context injection
72        let rendered = self.corpus.render(self.policy.max_context_chars);
73        if !rendered.is_empty() {
74            buf.push_str("\n\n--- CONTEXT REFERENCE ---\n");
75            buf.push_str(
76                "Use the following doctrine reference when calibrating telemetry scores.\n",
77            );
78            buf.push('\n');
79            buf.push_str(&rendered);
80            buf.push_str("\n--- END CONTEXT REFERENCE ---");
81        }
82
83        // Trigger-matched pack injection (existing adaptor path, preserved)
84        if !trigger_packs.is_empty() {
85            buf.push_str("\n\n--- CONTEXT REFERENCE PACKS ---\n");
86            buf.push_str(
87                "Use the following threat-pattern reference when scoring \
88                 manipulation_risk and structural_tone.\n",
89            );
90            for pack in trigger_packs {
91                buf.push('\n');
92                buf.push_str(pack.content);
93                buf.push('\n');
94            }
95            buf.push_str("\n--- END CONTEXT REFERENCE PACKS ---");
96        }
97
98        buf
99    }
100
101    /// Wrap `input` in payload tags for the model.
102    pub fn transform_payload(&self, input: &str) -> String {
103        soul::wrap_payload(input)
104    }
105
106    /// Parse raw model output into a `ModelProposalOutput` (telemetry + optional capability request).
107    pub fn postprocess(&self, raw: &str) -> Result<ModelProposalOutput> {
108        extractor::extract(raw).map_err(|e| anyhow::anyhow!("postprocess failed: {e}"))
109    }
110}
111
112// ---------------------------------------------------------------------------
113// Tests
114// ---------------------------------------------------------------------------
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use crate::soul;
120
121    fn make_transformer() -> SplitBrainTransformer {
122        let soul = soul::load(None).unwrap();
123        SplitBrainTransformer::new(soul)
124    }
125
126    #[test]
127    fn transform_system_contains_soul_prompt() {
128        let t = make_transformer();
129        let system = t.transform_system(&[]);
130        assert!(
131            system.contains("telemetry engine"),
132            "soul logic prompt must appear in system output"
133        );
134    }
135
136    #[test]
137    fn transform_system_injects_rag_context() {
138        let t = make_transformer();
139        let system = t.transform_system(&[]);
140        assert!(
141            system.contains("<context_pack>"),
142            "RAG context block must be injected"
143        );
144        assert!(
145            system.contains("TelemetryResult Field Reference"),
146            "schema doc must appear in context"
147        );
148    }
149
150    #[test]
151    fn transform_system_is_deterministic() {
152        let soul = soul::load(None).unwrap();
153        let t1 = SplitBrainTransformer::new(soul.clone());
154        let t2 = SplitBrainTransformer::new(soul);
155        assert_eq!(
156            t1.transform_system(&[]),
157            t2.transform_system(&[]),
158            "same soul + corpus must always produce the same system prompt"
159        );
160    }
161
162    #[test]
163    fn transform_system_no_packs_excludes_pack_section() {
164        let t = make_transformer();
165        let system = t.transform_system(&[]);
166        assert!(
167            !system.contains("CONTEXT REFERENCE PACKS"),
168            "pack section must be absent when no packs are active"
169        );
170    }
171
172    #[test]
173    fn transform_payload_wraps_in_tags() {
174        let t = make_transformer();
175        let payload = t.transform_payload("hello world");
176        assert!(payload.contains("<payload>"), "must open payload tag");
177        assert!(payload.contains("hello world"), "must contain input");
178    }
179
180    #[test]
181    fn transform_system_with_empty_corpus_omits_context_block() {
182        let soul = soul::load(None).unwrap();
183        let t = SplitBrainTransformer::with_corpus(
184            soul,
185            ContextCorpus::default(),
186            TransformPolicy::default(),
187        );
188        let system = t.transform_system(&[]);
189        assert!(
190            !system.contains("<context_pack>"),
191            "no context block when corpus is empty"
192        );
193    }
194
195    #[test]
196    fn policy_max_context_chars_limits_injection() {
197        let soul = soul::load(None).unwrap();
198        let policy = TransformPolicy {
199            max_context_chars: 100,
200        };
201        let t = SplitBrainTransformer::with_corpus(soul, ContextCorpus::embedded(), policy);
202        let system = t.transform_system(&[]);
203        // With 100 char limit, the context pack should be present but truncated
204        // (possibly just the wrapper tags if no doc fits)
205        assert!(system.contains("CONTEXT REFERENCE") || !system.contains("<context_pack>"));
206    }
207
208    #[test]
209    fn with_corpus_uses_provided_corpus() {
210        let soul = soul::load(None).unwrap();
211        let custom_doc = crate::rag::ContextDoc {
212            id: "custom.test".into(),
213            title: "Custom Test Doc".into(),
214            text: "custom content for test".into(),
215            tags: vec![],
216        };
217        let corpus = ContextCorpus {
218            docs: vec![custom_doc],
219        };
220        let t = SplitBrainTransformer::with_corpus(soul, corpus, TransformPolicy::default());
221        let system = t.transform_system(&[]);
222        assert!(
223            system.contains("Custom Test Doc"),
224            "custom doc must appear in system prompt"
225        );
226        assert!(system.contains("custom content for test"));
227    }
228
229    #[test]
230    fn soul_field_is_accessible() {
231        let t = make_transformer();
232        assert!(!t.soul.logic_system_prompt.is_empty());
233        assert!(!t.soul.verifier_system_prompt.is_empty());
234    }
235}