Skip to main content

split_brain_harness/
transformer.rs

1/// Transformer / RAG layer — canonical prompt construction for the split-brain pipeline.
2///
3/// `SplitBrainTransformer` is the single place that assembles the system prompt and
4/// payload sent to the inference backend. It combines three sources:
5///   1. The soul (identity + operating constraints, from soul.md)
6///   2. The RAG context corpus (operator-configurable threat doctrine / schema reference)
7///   3. Trigger-matched context packs (conditionally injected for specific threat signals)
8///
9/// This makes the prompt construction testable, reproducible, and independent of the
10/// inference backend. Any backend (Ollama, Anthropic, OpenAI-compat, future embedded)
11/// uses the same transformer output.
12use crate::capability::ModelProposalOutput;
13use crate::context_packs::ContextPack;
14use crate::extractor;
15use crate::rag::ContextCorpus;
16use crate::soul;
17use crate::types::Soul;
18use anyhow::Result;
19
20/// Controls transformer behaviour.
21#[derive(Debug, Clone)]
22pub struct TransformPolicy {
23    /// Maximum characters of RAG context injected into the system prompt.
24    /// Whole docs are dropped when the budget is exceeded (never split mid-doc).
25    pub max_context_chars: usize,
26}
27
28impl Default for TransformPolicy {
29    fn default() -> Self {
30        Self {
31            max_context_chars: 6000,
32        }
33    }
34}
35
36/// Assembles system prompts and payloads for the split-brain inference pipeline.
37pub struct SplitBrainTransformer {
38    pub soul: Soul,
39    pub corpus: ContextCorpus,
40    pub policy: TransformPolicy,
41}
42
43impl SplitBrainTransformer {
44    /// Create with embedded default corpus and default policy.
45    pub fn new(soul: Soul) -> Self {
46        Self {
47            soul,
48            corpus: ContextCorpus::embedded(),
49            policy: TransformPolicy::default(),
50        }
51    }
52
53    /// Create with a custom corpus and policy.
54    pub fn with_corpus(soul: Soul, corpus: ContextCorpus, policy: TransformPolicy) -> Self {
55        Self { soul, corpus, policy }
56    }
57
58    /// Build the augmented system prompt.
59    ///
60    /// Order of injection:
61    ///   1. Soul logic system prompt (always present)
62    ///   2. RAG context pack (embedded + operator docs, up to max_context_chars)
63    ///   3. Trigger-matched context packs (only when input matched threat signals)
64    pub fn transform_system(&self, trigger_packs: &[&'static ContextPack]) -> String {
65        let mut buf = self.soul.logic_system_prompt.clone();
66
67        // RAG context injection
68        let rendered = self.corpus.render(self.policy.max_context_chars);
69        if !rendered.is_empty() {
70            buf.push_str("\n\n--- CONTEXT REFERENCE ---\n");
71            buf.push_str(
72                "Use the following doctrine reference when calibrating telemetry scores.\n",
73            );
74            buf.push('\n');
75            buf.push_str(&rendered);
76            buf.push_str("\n--- END CONTEXT REFERENCE ---");
77        }
78
79        // Trigger-matched pack injection (existing adaptor path, preserved)
80        if !trigger_packs.is_empty() {
81            buf.push_str("\n\n--- CONTEXT REFERENCE PACKS ---\n");
82            buf.push_str(
83                "Use the following threat-pattern reference when scoring \
84                 manipulation_risk and structural_tone.\n",
85            );
86            for pack in trigger_packs {
87                buf.push('\n');
88                buf.push_str(pack.content);
89                buf.push('\n');
90            }
91            buf.push_str("\n--- END CONTEXT REFERENCE PACKS ---");
92        }
93
94        buf
95    }
96
97    /// Wrap `input` in payload tags for the model.
98    pub fn transform_payload(&self, input: &str) -> String {
99        soul::wrap_payload(input)
100    }
101
102    /// Parse raw model output into a `ModelProposalOutput` (telemetry + optional capability request).
103    pub fn postprocess(&self, raw: &str) -> Result<ModelProposalOutput> {
104        extractor::extract(raw)
105            .map_err(|e| anyhow::anyhow!("postprocess failed: {e}"))
106    }
107}
108
109// ---------------------------------------------------------------------------
110// Tests
111// ---------------------------------------------------------------------------
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use crate::soul;
117
118    fn make_transformer() -> SplitBrainTransformer {
119        let soul = soul::load(None).unwrap();
120        SplitBrainTransformer::new(soul)
121    }
122
123    #[test]
124    fn transform_system_contains_soul_prompt() {
125        let t = make_transformer();
126        let system = t.transform_system(&[]);
127        assert!(
128            system.contains("telemetry engine"),
129            "soul logic prompt must appear in system output"
130        );
131    }
132
133    #[test]
134    fn transform_system_injects_rag_context() {
135        let t = make_transformer();
136        let system = t.transform_system(&[]);
137        assert!(
138            system.contains("<context_pack>"),
139            "RAG context block must be injected"
140        );
141        assert!(
142            system.contains("TelemetryResult Field Reference"),
143            "schema doc must appear in context"
144        );
145    }
146
147    #[test]
148    fn transform_system_is_deterministic() {
149        let soul = soul::load(None).unwrap();
150        let t1 = SplitBrainTransformer::new(soul.clone());
151        let t2 = SplitBrainTransformer::new(soul);
152        assert_eq!(
153            t1.transform_system(&[]),
154            t2.transform_system(&[]),
155            "same soul + corpus must always produce the same system prompt"
156        );
157    }
158
159    #[test]
160    fn transform_system_no_packs_excludes_pack_section() {
161        let t = make_transformer();
162        let system = t.transform_system(&[]);
163        assert!(
164            !system.contains("CONTEXT REFERENCE PACKS"),
165            "pack section must be absent when no packs are active"
166        );
167    }
168
169    #[test]
170    fn transform_payload_wraps_in_tags() {
171        let t = make_transformer();
172        let payload = t.transform_payload("hello world");
173        assert!(payload.contains("<payload>"), "must open payload tag");
174        assert!(payload.contains("hello world"), "must contain input");
175    }
176
177    #[test]
178    fn transform_system_with_empty_corpus_omits_context_block() {
179        let soul = soul::load(None).unwrap();
180        let t = SplitBrainTransformer::with_corpus(
181            soul,
182            ContextCorpus::default(),
183            TransformPolicy::default(),
184        );
185        let system = t.transform_system(&[]);
186        assert!(
187            !system.contains("<context_pack>"),
188            "no context block when corpus is empty"
189        );
190    }
191
192    #[test]
193    fn policy_max_context_chars_limits_injection() {
194        let soul = soul::load(None).unwrap();
195        let policy = TransformPolicy { max_context_chars: 100 };
196        let t = SplitBrainTransformer::with_corpus(
197            soul,
198            ContextCorpus::embedded(),
199            policy,
200        );
201        let system = t.transform_system(&[]);
202        // With 100 char limit, the context pack should be present but truncated
203        // (possibly just the wrapper tags if no doc fits)
204        assert!(system.contains("CONTEXT REFERENCE") || !system.contains("<context_pack>"));
205    }
206
207    #[test]
208    fn with_corpus_uses_provided_corpus() {
209        let soul = soul::load(None).unwrap();
210        let custom_doc = crate::rag::ContextDoc {
211            id: "custom.test".into(),
212            title: "Custom Test Doc".into(),
213            text: "custom content for test".into(),
214            tags: vec![],
215        };
216        let corpus = ContextCorpus { docs: vec![custom_doc] };
217        let t = SplitBrainTransformer::with_corpus(soul, corpus, TransformPolicy::default());
218        let system = t.transform_system(&[]);
219        assert!(system.contains("Custom Test Doc"), "custom doc must appear in system prompt");
220        assert!(system.contains("custom content for test"));
221    }
222
223    #[test]
224    fn soul_field_is_accessible() {
225        let t = make_transformer();
226        assert!(!t.soul.logic_system_prompt.is_empty());
227        assert!(!t.soul.verifier_system_prompt.is_empty());
228    }
229}