truthlens 0.6.0

AI hallucination detector — formally verified trust scoring for LLM outputs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
use serde::{Deserialize, Serialize};

/// Multi-response consistency checker — inspired by measurement-induced branching.
///
/// Given N responses to the same prompt, detects contradictions between them.
/// Like quantum measurement paths: different "branches" of the generative
/// process should agree on facts. When they don't, the outlier is likely
/// a hallucination.
///
/// # Method
///
/// 1. Extract claims from each response
/// 2. Group claims by topic (fuzzy match on key entities)
/// 3. Detect contradictions within each group
/// 4. Score: claims that appear consistently across responses are trusted;
///    claims unique to one response are flagged
///
/// A contradiction between two claims from different responses.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Contradiction {
    /// Claim from response A
    pub claim_a: String,
    /// Index of response A
    pub response_a: usize,
    /// Claim from response B
    pub claim_b: String,
    /// Index of response B
    pub response_b: usize,
    /// What specifically conflicts
    pub conflict: String,
    /// Confidence that this is a real contradiction (0.0–1.0)
    pub confidence: f64,
}

/// Result of consistency analysis across multiple responses.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsistencyReport {
    /// Number of responses analyzed
    pub n_responses: usize,
    /// Total claims extracted across all responses
    pub total_claims: usize,
    /// Claims that appear consistently (agreement across majority)
    pub consistent_claims: Vec<ConsistentClaim>,
    /// Detected contradictions
    pub contradictions: Vec<Contradiction>,
    /// Claims unique to a single response (potential hallucination)
    pub unique_claims: Vec<UniqueClaim>,
    /// Overall consistency score (0.0 = highly inconsistent, 1.0 = fully consistent)
    pub consistency_score: f64,
}

/// A claim that appears consistently across responses.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsistentClaim {
    /// Representative text of the claim
    pub text: String,
    /// How many responses contain this claim
    pub agreement_count: usize,
    /// Agreement ratio (agreement_count / n_responses)
    pub agreement_ratio: f64,
}

/// A claim that appears in only one response.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UniqueClaim {
    /// The claim text
    pub text: String,
    /// Which response it came from
    pub response_idx: usize,
}

/// Check consistency across multiple responses to the same prompt.
///
/// # Arguments
/// * `responses` — Two or more text responses to the same prompt.
///
/// # Returns
/// A `ConsistencyReport` with contradictions, agreements, and unique claims.
pub fn check_consistency(responses: &[&str]) -> ConsistencyReport {
    if responses.len() < 2 {
        return ConsistencyReport {
            n_responses: responses.len(),
            total_claims: 0,
            consistent_claims: vec![],
            contradictions: vec![],
            unique_claims: vec![],
            consistency_score: 1.0,
        };
    }

    // Extract claims from each response
    let all_claims: Vec<Vec<ClaimEntity>> = responses.iter().map(|r| extract_entities(r)).collect();

    let total_claims: usize = all_claims.iter().map(|c| c.len()).sum();

    // Find consistent claims (appear in multiple responses)
    let mut consistent_claims = Vec::new();
    let mut seen_entities: Vec<(String, String, Vec<usize>)> = Vec::new(); // (subject, value, response_indices)

    for (resp_idx, claims) in all_claims.iter().enumerate() {
        for claim in claims {
            let mut found = false;
            for existing in seen_entities.iter_mut() {
                if fuzzy_match_subject(&existing.0, &claim.subject) {
                    if !existing.2.contains(&resp_idx) {
                        existing.2.push(resp_idx);
                    }
                    found = true;
                    break;
                }
            }
            if !found {
                seen_entities.push((claim.subject.clone(), claim.value.clone(), vec![resp_idx]));
            }
        }
    }

    let n = responses.len();
    for (subject, value, indices) in &seen_entities {
        if indices.len() > 1 {
            consistent_claims.push(ConsistentClaim {
                text: format!("{}: {}", subject, value),
                agreement_count: indices.len(),
                agreement_ratio: indices.len() as f64 / n as f64,
            });
        }
    }

    // Find unique claims (appear in only one response)
    let mut unique_claims = Vec::new();
    for (subject, value, indices) in &seen_entities {
        if indices.len() == 1 {
            unique_claims.push(UniqueClaim {
                text: format!("{}: {}", subject, value),
                response_idx: indices[0],
            });
        }
    }

    // Detect contradictions: same subject, different values across responses
    let mut contradictions = Vec::new();
    for (resp_a, claims_a) in all_claims.iter().enumerate() {
        for (resp_b, claims_b) in all_claims.iter().enumerate() {
            if resp_b <= resp_a {
                continue;
            }
            for claim_a in claims_a {
                for claim_b in claims_b {
                    if let Some(contradiction) =
                        detect_contradiction(claim_a, claim_b, resp_a, resp_b)
                    {
                        // Avoid duplicate contradictions
                        let already_found = contradictions
                            .iter()
                            .any(|c: &Contradiction| c.conflict == contradiction.conflict);
                        if !already_found {
                            contradictions.push(contradiction);
                        }
                    }
                }
            }
        }
    }

    // Compute consistency score
    let consistency_score = compute_consistency_score(
        &consistent_claims,
        &contradictions,
        &unique_claims,
        total_claims,
        n,
    );

    ConsistencyReport {
        n_responses: n,
        total_claims,
        consistent_claims,
        contradictions,
        unique_claims,
        consistency_score,
    }
}

// ── Internal types and helpers ──────────────────────────────────────

#[derive(Debug, Clone)]
struct ClaimEntity {
    subject: String,
    value: String,
    full_text: String,
}

/// Extract subject-value pairs from text.
/// Simple heuristic: look for patterns like "X was/is/has Y" and "X in Y".
fn extract_entities(text: &str) -> Vec<ClaimEntity> {
    let sentences: Vec<&str> = text
        .split(['.', '!', '?'])
        .filter(|s| s.trim().len() > 10)
        .collect();

    let mut entities = Vec::new();

    for sentence in &sentences {
        let s = sentence.trim();
        let words: Vec<&str> = s.split_whitespace().collect();

        if words.len() < 3 {
            continue;
        }

        // Extract numbers as values
        for (i, word) in words.iter().enumerate() {
            if word.chars().any(|c| c.is_ascii_digit()) && word.len() <= 10 {
                // Find the subject: words before the number
                let subject = if i >= 2 {
                    words[..i].join(" ")
                } else {
                    words[0].to_string()
                };
                entities.push(ClaimEntity {
                    subject: normalize_subject(&subject),
                    value: word.to_string(),
                    full_text: s.to_string(),
                });
            }
        }

        // Extract "in <Place>" patterns
        for (i, word) in words.iter().enumerate() {
            if (*word == "in" || *word == "at" || *word == "from") && i + 1 < words.len() {
                let place = words[i + 1..].join(" ");
                if place.chars().next().is_some_and(|c| c.is_uppercase()) {
                    let subject = words[..i].join(" ");
                    entities.push(ClaimEntity {
                        subject: normalize_subject(&subject),
                        value: normalize_value(&place),
                        full_text: s.to_string(),
                    });
                }
            }
        }
    }

    entities
}

fn normalize_subject(s: &str) -> String {
    let s = s.trim().to_lowercase();
    // Remove common prefixes
    let s = s.strip_prefix("the ").unwrap_or(&s);
    let s = s.strip_prefix("a ").unwrap_or(s);
    let s = s.strip_prefix("an ").unwrap_or(s);
    s.to_string()
}

fn normalize_value(s: &str) -> String {
    s.trim()
        .trim_end_matches(|c: char| c.is_ascii_punctuation())
        .to_string()
}

fn fuzzy_match_subject(a: &str, b: &str) -> bool {
    if a == b {
        return true;
    }
    // One contains the other
    if a.contains(b) || b.contains(a) {
        return true;
    }
    // Share significant words
    let a_words: Vec<&str> = a.split_whitespace().filter(|w| w.len() > 3).collect();
    let b_words: Vec<&str> = b.split_whitespace().filter(|w| w.len() > 3).collect();
    let shared = a_words.iter().filter(|w| b_words.contains(w)).count();
    shared > 0 && shared >= a_words.len().min(b_words.len()) / 2
}

fn detect_contradiction(
    a: &ClaimEntity,
    b: &ClaimEntity,
    resp_a: usize,
    resp_b: usize,
) -> Option<Contradiction> {
    // Same subject, different value
    if !fuzzy_match_subject(&a.subject, &b.subject) {
        return None;
    }

    let a_val = normalize_value(&a.value);
    let b_val = normalize_value(&b.value);

    if a_val == b_val {
        return None; // Same value, no contradiction
    }

    // Check if values are actually different (not just formatting/subsets)
    let a_lower = a_val.to_lowercase();
    let b_lower = b_val.to_lowercase();
    if a_lower.contains(&b_lower) || b_lower.contains(&a_lower) {
        return None;
    }
    // Skip if values share significant words (e.g., "Ulm, Germany" vs "Ulm Germany")
    let a_words: Vec<&str> = a_val.split_whitespace().collect();
    let b_words: Vec<&str> = b_val.split_whitespace().collect();
    let shared = a_words
        .iter()
        .filter(|w| {
            b_words
                .iter()
                .any(|bw| bw.to_lowercase() == w.to_lowercase())
        })
        .count();
    if shared > 0 && shared >= a_words.len().min(b_words.len()) {
        return None;
    }

    // Skip if values are different types (number vs place name)
    let a_is_numeric = a_val.chars().any(|c| c.is_ascii_digit());
    let b_is_numeric = b_val.chars().any(|c| c.is_ascii_digit());
    if a_is_numeric != b_is_numeric {
        return None;
    }

    // Both have values of same type → contradiction
    let confidence = if a_is_numeric && b_is_numeric {
        0.9 // Numeric disagreement is high-confidence contradiction
    } else if a_val.chars().next().is_some_and(|c| c.is_uppercase())
        && b_val.chars().next().is_some_and(|c| c.is_uppercase())
    {
        0.8 // Proper noun disagreement
    } else {
        0.5 // General disagreement
    };

    Some(Contradiction {
        claim_a: a.full_text.clone(),
        response_a: resp_a,
        claim_b: b.full_text.clone(),
        response_b: resp_b,
        conflict: format!("\"{}\" vs \"{}\" (subject: {})", a_val, b_val, a.subject),
        confidence,
    })
}

fn compute_consistency_score(
    consistent: &[ConsistentClaim],
    contradictions: &[Contradiction],
    unique: &[UniqueClaim],
    total_claims: usize,
    n_responses: usize,
) -> f64 {
    if total_claims == 0 || n_responses < 2 {
        return 1.0;
    }

    // Consistent claims boost score
    let avg_agreement: f64 = if consistent.is_empty() {
        0.5
    } else {
        consistent.iter().map(|c| c.agreement_ratio).sum::<f64>() / consistent.len() as f64
    };

    // Contradictions reduce score
    let contradiction_penalty = (contradictions.len() as f64 * 0.15).min(0.5);

    // Unique claims slightly reduce score (potential hallucination)
    let unique_penalty = (unique.len() as f64 * 0.05).min(0.3);

    (avg_agreement - contradiction_penalty - unique_penalty).clamp(0.0, 1.0)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn consistent_responses() {
        let report = check_consistency(&[
            "Einstein was born in 1879 in Ulm, Germany.",
            "Einstein was born in 1879 in Ulm, Germany.",
            "Einstein was born in 1879 in Ulm, Germany.",
        ]);
        for c in &report.contradictions {
            eprintln!(
                "CONTRADICTION: {} (resp {}) vs {} (resp {}) — {}",
                c.claim_a, c.response_a, c.claim_b, c.response_b, c.conflict
            );
        }
        assert!(report.consistency_score > 0.5);
        assert!(
            report.contradictions.is_empty(),
            "Found {} contradictions in identical responses",
            report.contradictions.len()
        );
    }

    #[test]
    fn contradicting_responses() {
        let report = check_consistency(&[
            "Einstein was born in 1879 in Ulm, Germany.",
            "Einstein was born in 1879 in Munich, Germany.",
            "Einstein was born in 1879 in Ulm, Germany.",
        ]);
        assert!(
            !report.contradictions.is_empty(),
            "Expected contradictions for Ulm vs Munich"
        );
        assert!(report.consistency_score < 0.9);
    }

    #[test]
    fn numeric_contradiction() {
        let report = check_consistency(&[
            "The population is 126,000 people.",
            "The population is 250,000 people.",
        ]);
        // Should detect numeric disagreement
        assert!(
            !report.contradictions.is_empty() || !report.unique_claims.is_empty(),
            "Should detect disagreement in numbers"
        );
    }

    #[test]
    fn single_response() {
        let report = check_consistency(&["Einstein was born in 1879."]);
        assert_eq!(report.consistency_score, 1.0);
        assert_eq!(report.n_responses, 1);
    }

    #[test]
    fn empty_responses() {
        let report = check_consistency(&[]);
        assert_eq!(report.consistency_score, 1.0);
    }

    #[test]
    fn consistency_score_bounded() {
        let report = check_consistency(&[
            "Einstein was born in 1879 in Ulm. He had 3 children.",
            "Einstein was born in 1880 in Munich. He had 5 children.",
            "Einstein was born in 1879 in Berlin. He had 3 kids.",
        ]);
        assert!(report.consistency_score >= 0.0);
        assert!(report.consistency_score <= 1.0);
    }

    #[test]
    fn unique_claims_detected() {
        let report = check_consistency(&[
            "Einstein was born in 1879. He invented the laser.",
            "Einstein was born in 1879. He played violin.",
        ]);
        // "laser" and "violin" are unique to their respective responses
        assert!(report.total_claims > 0);
    }
}