1use crate::backends::InferenceEngine;
2use crate::extractor;
3use crate::soul;
4use crate::types::{Soul, TelemetryResult, TraceEntry, VerificationReport, VerifyMode};
5use serde::Deserialize;
6
7const STOP_AND_ASK_THRESHOLD: f32 = 0.4;
8
9type CheckFn = Box<dyn Fn(&TelemetryResult) -> Option<String>>;
10
11#[derive(Deserialize, Default)]
13struct VerifierLLMOutput {
14 supported: bool,
15 unsupported_claims: Vec<String>,
16 assumptions: Vec<String>,
17 unresolved: Vec<String>,
18 confidence: f32,
19}
20
21pub async fn verify(
25 input: &str,
26 telemetry: &TelemetryResult,
27 soul: &Soul,
28 engine: &dyn InferenceEngine,
29 mode: &VerifyMode,
30) -> (VerificationReport, Vec<TraceEntry>) {
31 let mut traces = vec![];
32
33 let (consistency_flags, det_traces) = match mode {
34 VerifyMode::None => (vec![], vec![]),
35 _ => check_consistency(telemetry),
36 };
37 traces.extend(det_traces);
38
39 let (unsupported_claims, assumptions, unresolved, llm_confidence) = match mode {
40 VerifyMode::Llm => match run_llm_verify(input, telemetry, soul, engine).await {
41 Ok((out, t)) => {
42 traces.push(t);
43 (
44 out.unsupported_claims,
45 out.assumptions,
46 out.unresolved,
47 Some(out.confidence),
48 )
49 }
50 Err(e) => {
51 traces.push(TraceEntry {
55 stage: "verify-llm".into(),
56 claim: "LLM verifier unavailable — result unverified".into(),
57 evidence: None,
58 passed: false,
59 note: Some(e.to_string()),
60 });
61 let report = VerificationReport {
62 passed: false,
63 consistency_flags,
64 unsupported_claims: vec![],
65 assumptions: vec![],
66 unresolved: vec![format!("verifier unavailable: {e}")],
67 confidence: 0.0,
68 stop_and_ask: true,
69 };
70 return (report, traces);
71 }
72 },
73 _ => (vec![], vec![], vec![], None),
74 };
75
76 let confidence = derive_confidence(telemetry, &consistency_flags, llm_confidence);
77 let stop_and_ask = confidence < STOP_AND_ASK_THRESHOLD || consistency_flags.len() >= 3;
78 let passed = consistency_flags.is_empty() && unsupported_claims.is_empty();
79
80 let report = VerificationReport {
81 passed,
82 consistency_flags,
83 unsupported_claims,
84 assumptions,
85 unresolved,
86 confidence,
87 stop_and_ask,
88 };
89
90 (report, traces)
91}
92
93fn check_consistency(t: &TelemetryResult) -> (Vec<String>, Vec<TraceEntry>) {
98 let mut flags = vec![];
99 let mut traces = vec![];
100
101 let checks: &[(&str, CheckFn)] = &[
102 (
103 "emotion-intensity vs manipulation-risk",
104 Box::new(|t| {
105 let hostile = ["anger", "fear", "contempt", "hostility"];
106 if t.affective_telemetry.emotional_intensity >= 0.7
107 && t.intent_matrix.manipulation_risk == "low"
108 && hostile.contains(
109 &t.affective_telemetry
110 .primary_emotion
111 .to_lowercase()
112 .as_str(),
113 )
114 {
115 Some(format!(
116 "high emotional_intensity ({:.2}) with hostile emotion '{}' but manipulation_risk=low",
117 t.affective_telemetry.emotional_intensity,
118 t.affective_telemetry.primary_emotion
119 ))
120 } else {
121 None
122 }
123 }),
124 ),
125 (
126 "adversarial tone vs manipulation-risk",
127 Box::new(|t| {
128 let adversarial = ["adversarial", "manipulative", "coercive", "threatening"];
129 let found: Vec<&str> = t
130 .affective_telemetry
131 .structural_tone
132 .iter()
133 .filter(|s| adversarial.contains(&s.to_lowercase().as_str()))
134 .map(|s| s.as_str())
135 .collect();
136 if !found.is_empty() && t.intent_matrix.manipulation_risk == "low" {
137 Some(format!(
138 "structural_tone {:?} conflicts with manipulation_risk=low",
139 found
140 ))
141 } else {
142 None
143 }
144 }),
145 ),
146 (
147 "urgency vs manipulation-risk",
148 Box::new(|t| {
149 if t.cognitive_state.urgency_vector >= 0.7
150 && t.intent_matrix.manipulation_risk == "low"
151 {
152 Some(format!(
153 "high urgency_vector ({:.2}) with manipulation_risk=low — urgency may be manufactured",
154 t.cognitive_state.urgency_vector
155 ))
156 } else {
157 None
158 }
159 }),
160 ),
161 (
162 "input coherence",
163 Box::new(|t| {
164 if t.cognitive_state.coherence_rating < 0.3 {
165 Some(format!(
166 "coherence_rating={:.2} — input may be too incoherent for reliable analysis",
167 t.cognitive_state.coherence_rating
168 ))
169 } else {
170 None
171 }
172 }),
173 ),
174 (
175 "manipulation-risk-value",
176 Box::new(|t| {
177 const VALID: &[&str] = &["low", "medium", "high"];
178 let risk = t.intent_matrix.manipulation_risk.to_lowercase();
179 if !VALID.contains(&risk.as_str()) {
180 Some(format!(
181 "manipulation_risk {:?} is not a recognized value (expected: low, medium, high) — treating as unknown",
182 t.intent_matrix.manipulation_risk
183 ))
184 } else {
185 None
186 }
187 }),
188 ),
189 (
190 "high-risk vs non-coercive signals",
191 Box::new(|t| {
192 let coercive_tones = [
197 "adversarial", "coercive", "threatening", "manipulative",
198 "demanding", "directive", "authority-invoking", "hostile",
199 ];
200 let has_coercive_tone = t.affective_telemetry.structural_tone.iter().any(|s| {
201 coercive_tones.contains(&s.to_lowercase().as_str())
202 });
203 if t.intent_matrix.manipulation_risk == "high"
204 && t.cognitive_state.urgency_vector < 0.4
205 && !has_coercive_tone
206 {
207 Some(format!(
208 "manipulation_risk=high but urgency_vector={:.2} and no coercive structural_tone — \
209 high risk requires coercive signals directed at the system",
210 t.cognitive_state.urgency_vector
211 ))
212 } else {
213 None
214 }
215 }),
216 ),
217 ];
218
219 for (name, check) in checks {
220 match check(t) {
221 Some(flag) => {
222 flags.push(flag.clone());
223 traces.push(TraceEntry {
224 stage: "verify-deterministic".into(),
225 claim: name.to_string(),
226 evidence: None,
227 passed: false,
228 note: Some(flag),
229 });
230 }
231 None => {
232 traces.push(TraceEntry {
233 stage: "verify-deterministic".into(),
234 claim: name.to_string(),
235 evidence: None,
236 passed: true,
237 note: None,
238 });
239 }
240 }
241 }
242
243 (flags, traces)
244}
245
246async fn run_llm_verify(
251 input: &str,
252 telemetry: &TelemetryResult,
253 soul: &Soul,
254 engine: &dyn InferenceEngine,
255) -> anyhow::Result<(VerifierLLMOutput, TraceEntry)> {
256 if soul.verifier_system_prompt.is_empty() {
257 return Err(anyhow::anyhow!(
258 "verifier soul prompt is empty — add a [VERIFIER_SYSTEM_PROMPT] section to soul.md"
259 ));
260 }
261
262 let proposed_json = serde_json::to_string_pretty(telemetry)?;
263 let payload = soul::wrap_verifier_payload(input, &proposed_json);
264
265 let raw = engine
266 .generate(&soul.verifier_system_prompt, &payload)
267 .await
268 .map_err(|e| anyhow::anyhow!("verifier inference error: {e}"))?;
269
270 let out: VerifierLLMOutput = extractor::extract(&raw).map_err(|e| {
271 let preview: String = raw.chars().take(200).collect();
272 anyhow::anyhow!("verifier output parse failed: {e}\n raw (first 200 chars): {preview}")
273 })?;
274
275 let note = if out.unsupported_claims.is_empty() {
276 None
277 } else {
278 Some(out.unsupported_claims.join("; "))
279 };
280
281 let trace = TraceEntry {
282 stage: "verify-llm".into(),
283 claim: format!("confidence={:.2}", out.confidence),
284 evidence: if out.unsupported_claims.is_empty() {
285 None
286 } else {
287 Some(format!("unsupported: {:?}", out.unsupported_claims))
288 },
289 passed: out.supported && out.unsupported_claims.is_empty(),
290 note,
291 };
292
293 Ok((out, trace))
294}
295
296fn derive_confidence(t: &TelemetryResult, flags: &[String], llm_confidence: Option<f32>) -> f32 {
301 let base = t.cognitive_state.coherence_rating;
302 let penalty = (flags.len() as f32) * 0.15;
303 let score = (base - penalty).clamp(0.0, 1.0);
304 match llm_confidence {
305 Some(llm) => ((score + llm) / 2.0).clamp(0.0, 1.0),
306 None => score,
307 }
308}
309
310#[cfg(test)]
315mod tests {
316 use super::*;
317 use crate::types::{AfferentTelemetry, CognitiveState, IntentMatrix, TelemetryResult};
318
319 fn make_telemetry(
320 emotion: &str,
321 intensity: f32,
322 tone: Vec<&str>,
323 risk: &str,
324 urgency: f32,
325 coherence: f32,
326 ) -> TelemetryResult {
327 TelemetryResult {
328 affective_telemetry: AfferentTelemetry {
329 primary_emotion: emotion.into(),
330 emotional_intensity: intensity,
331 structural_tone: tone.into_iter().map(String::from).collect(),
332 },
333 intent_matrix: IntentMatrix {
334 stated_objective: "test objective".into(),
335 subtextual_motive: "test motive".into(),
336 manipulation_risk: risk.into(),
337 },
338 cognitive_state: CognitiveState {
339 urgency_vector: urgency,
340 coherence_rating: coherence,
341 },
342 }
343 }
344
345 #[test]
348 fn flags_hostile_high_intensity_low_risk() {
349 let t = make_telemetry("anger", 0.85, vec!["demanding"], "low", 0.3, 0.9);
350 let (flags, _) = check_consistency(&t);
351 assert!(
352 flags.iter().any(|f| f.contains("emotional_intensity")),
353 "should flag hostile emotion + high intensity vs low risk"
354 );
355 }
356
357 #[test]
358 fn flags_adversarial_tone_low_risk() {
359 let t = make_telemetry(
360 "neutral",
361 0.2,
362 vec!["adversarial", "coercive"],
363 "low",
364 0.1,
365 0.9,
366 );
367 let (flags, _) = check_consistency(&t);
368 assert!(
369 flags.iter().any(|f| f.contains("structural_tone")),
370 "should flag adversarial tone vs low risk"
371 );
372 }
373
374 #[test]
375 fn flags_high_urgency_low_risk() {
376 let t = make_telemetry("neutral", 0.2, vec!["cooperative"], "low", 0.8, 0.9);
377 let (flags, _) = check_consistency(&t);
378 assert!(
379 flags.iter().any(|f| f.contains("urgency_vector")),
380 "should flag high urgency vs low risk"
381 );
382 }
383
384 #[test]
385 fn flags_low_coherence() {
386 let t = make_telemetry("neutral", 0.2, vec!["incoherent"], "low", 0.1, 0.2);
387 let (flags, _) = check_consistency(&t);
388 assert!(
389 flags.iter().any(|f| f.contains("coherence_rating")),
390 "should flag low coherence"
391 );
392 }
393
394 #[test]
397 fn clean_benign_passes_all_checks() {
398 let t = make_telemetry(
399 "neutral",
400 0.05,
401 vec!["cooperative", "inquisitive"],
402 "low",
403 0.05,
404 0.98,
405 );
406 let (flags, traces) = check_consistency(&t);
407 assert!(
408 flags.is_empty(),
409 "clean benign input should pass all checks"
410 );
411 assert!(
412 traces.iter().all(|t| t.passed),
413 "all traces should be passed"
414 );
415 }
416
417 #[test]
418 fn high_risk_high_intensity_passes() {
419 let t = make_telemetry(
421 "anger",
422 0.9,
423 vec!["adversarial", "threatening"],
424 "high",
425 0.8,
426 0.85,
427 );
428 let (flags, _) = check_consistency(&t);
429 assert!(
430 !flags.iter().any(|f| f.contains("structural_tone")),
431 "adversarial tone with high risk should not flag"
432 );
433 }
434
435 #[test]
438 fn confidence_equals_coherence_when_no_flags() {
439 let t = make_telemetry("neutral", 0.1, vec!["analytical"], "low", 0.0, 0.95);
440 let (flags, _) = check_consistency(&t);
441 let confidence = derive_confidence(&t, &flags, None);
442 assert!((confidence - 0.95).abs() < 0.01);
443 }
444
445 #[test]
446 fn confidence_penalized_per_flag() {
447 let t = make_telemetry("anger", 0.85, vec!["adversarial"], "low", 0.8, 0.9);
448 let (flags, _) = check_consistency(&t);
449 let confidence = derive_confidence(&t, &flags, None);
450 assert!(confidence < 0.9, "each flag should reduce confidence");
451 }
452
453 #[test]
454 fn stop_and_ask_triggers_at_threshold() {
455 let flags: Vec<String> = vec!["a".into(), "b".into(), "c".into()];
457 let t = make_telemetry("neutral", 0.5, vec![], "medium", 0.5, 0.9);
458 let confidence = derive_confidence(&t, &flags, None);
459 let stop = confidence < STOP_AND_ASK_THRESHOLD || flags.len() >= 3;
460 assert!(stop, "3 flags should always trigger stop_and_ask");
461 }
462
463 #[test]
466 fn contradictory_risk_vs_tone_flagged() {
467 let t = make_telemetry("enthusiasm", 0.3, vec!["manipulative"], "low", 0.2, 0.85);
470 let (flags, _) = check_consistency(&t);
471 assert!(
472 !flags.is_empty(),
473 "manipulative tone vs low risk should flag"
474 );
475 }
476
477 #[test]
478 fn missing_context_low_coherence_stops() {
479 let t = make_telemetry("confusion", 0.4, vec!["scattered"], "medium", 0.3, 0.18);
481 let (flags, _) = check_consistency(&t);
482 let confidence = derive_confidence(&t, &flags, None);
483 let stop = confidence < STOP_AND_ASK_THRESHOLD || flags.len() >= 3;
484 assert!(stop, "low coherence should trigger stop_and_ask");
485 }
486
487 #[test]
490 fn unknown_manipulation_risk_is_flagged() {
491 let t = make_telemetry("neutral", 0.1, vec!["cooperative"], "", 0.1, 0.9);
492 let (flags, _) = check_consistency(&t);
493 assert!(
494 flags.iter().any(|f| f.contains("manipulation_risk")),
495 "empty manipulation_risk should fire the unknown-value check"
496 );
497 }
498
499 #[test]
500 fn garbage_manipulation_risk_is_flagged() {
501 let t = make_telemetry("neutral", 0.1, vec!["cooperative"], "HACKED", 0.1, 0.9);
502 let (flags, _) = check_consistency(&t);
503 assert!(
504 flags.iter().any(|f| f.contains("manipulation_risk")),
505 "unrecognized manipulation_risk value should be flagged"
506 );
507 }
508
509 #[test]
510 fn valid_manipulation_risk_values_not_flagged() {
511 for risk in &["low", "medium"] {
513 let t = make_telemetry("neutral", 0.1, vec!["cooperative"], risk, 0.1, 0.9);
514 let (flags, _) = check_consistency(&t);
515 assert!(
516 !flags.iter().any(|f| f.contains("is not a recognized value")),
517 "valid risk '{}' should not fire the unknown-value check",
518 risk
519 );
520 }
521 let t_high = make_telemetry("commanding", 0.8, vec!["coercive"], "high", 0.8, 0.8);
523 let (flags, _) = check_consistency(&t_high);
524 assert!(
525 !flags.iter().any(|f| f.contains("is not a recognized value")),
526 "valid risk 'high' should not fire the unknown-value check"
527 );
528 }
529
530 #[test]
533 fn two_consistency_flags_do_not_alone_stop() {
534 let t = make_telemetry("anger", 0.85, vec!["adversarial"], "low", 0.8, 0.9);
536 let (flags, _) = check_consistency(&t);
537 let confidence = derive_confidence(&t, &flags, None);
540 let stop = confidence < STOP_AND_ASK_THRESHOLD || flags.len() >= 3;
541 assert!(stop, "multiple flags should trigger stop");
542 }
543
544 #[test]
545 fn no_flags_high_coherence_does_not_stop() {
546 let t = make_telemetry("neutral", 0.1, vec!["inquisitive"], "low", 0.05, 0.95);
548 let (flags, _) = check_consistency(&t);
549 assert!(flags.is_empty());
550 let confidence = derive_confidence(&t, &flags, None);
551 let stop = confidence < STOP_AND_ASK_THRESHOLD || flags.len() >= 3;
552 assert!(!stop, "clean benign input should not stop");
553 }
554
555 #[test]
556 fn contradictory_high_risk_passes_consistency_as_internally_consistent() {
557 let t = make_telemetry("hostility", 0.9, vec!["adversarial"], "high", 0.9, 0.8);
560 let (flags, _) = check_consistency(&t);
561 assert!(
564 !flags.iter().any(|f| f.contains("structural_tone")),
565 "adversarial tone + high risk is internally consistent"
566 );
567 assert!(
568 !flags.iter().any(|f| f.contains("emotional_intensity")),
569 "hostile emotion + high risk is internally consistent"
570 );
571 }
572
573 #[test]
574 fn high_risk_low_urgency_no_coercive_tone_flagged() {
575 let t = make_telemetry("sorrow", 0.6, vec!["analytical", "persuasive"], "high", 0.2, 0.8);
579 let (flags, _) = check_consistency(&t);
580 assert!(
581 flags.iter().any(|f| f.contains("coercive signals")),
582 "high risk + low urgency + no coercive tone should be flagged"
583 );
584 }
585
586 #[test]
587 fn high_risk_high_urgency_no_coercive_tone_not_flagged_by_new_check() {
588 let t = make_telemetry("urgency", 0.9, vec!["analytical"], "high", 0.8, 0.7);
590 let (flags, _) = check_consistency(&t);
591 assert!(
592 !flags.iter().any(|f| f.contains("coercive signals")),
593 "high risk + high urgency should not trigger the new check"
594 );
595 }
596
597 #[test]
598 fn high_risk_coercive_tone_low_urgency_not_flagged_by_new_check() {
599 let t = make_telemetry("commanding", 0.7, vec!["coercive", "directive"], "high", 0.2, 0.7);
601 let (flags, _) = check_consistency(&t);
602 assert!(
603 !flags.iter().any(|f| f.contains("coercive signals")),
604 "high risk + coercive tone should not trigger the new check"
605 );
606 }
607}