Skip to main content

integrity_calc/
lib.rs

1#![doc = "Text integrity detection pipeline"]
2//! Integrity detection pipeline — full orchestration
3//!
4//! Tier: T3 | Primitives: σ Sequence, → Causality, ∂ Boundary
5
6#![deny(clippy::unwrap_used)]
7#![deny(clippy::expect_used)]
8#![deny(clippy::panic)]
9#![warn(missing_docs)]
10pub mod aggregation;
11pub mod bloom;
12pub mod burstiness;
13pub mod chemistry;
14pub mod classify;
15pub mod entropy;
16pub mod perplexity;
17pub mod tokenize;
18pub mod zipf;
19
20use crate::aggregation::RawFeatures;
21use crate::bloom::BloomThresholds;
22use crate::classify::Verdict;
23use serde::{Deserialize, Serialize};
24
25/// Minimum token count for reliable analysis.
26pub const MIN_TOKENS: usize = 50;
27/// Default entropy window size.
28pub const DEFAULT_WINDOW_SIZE: usize = 50;
29/// Default entropy window step.
30pub const DEFAULT_WINDOW_STEP: usize = 25;
31
32/// Flat pipeline result for server→client transfer.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct PipelineResult {
35    // Verdict
36    pub verdict: Verdict,
37    pub probability: f64,
38    pub confidence: f64,
39    pub threshold: f64,
40
41    // Config
42    pub bloom_level: u8,
43    pub bloom_name: String,
44    pub preset_name: String,
45
46    // Token stats
47    pub total_tokens: usize,
48    pub unique_tokens: usize,
49    pub ttr: f64,
50
51    // Raw features
52    pub zipf_deviation: f64,
53    pub zipf_alpha: f64,
54    pub zipf_r_squared: f64,
55    pub entropy_mean: f64,
56    pub entropy_std: f64,
57    pub entropy_window_count: usize,
58    pub burstiness_coeff: f64,
59    pub burstiness_tokens_analyzed: usize,
60    pub perplexity_mean: f64,
61    pub perplexity_var: f64,
62    pub perplexity_sentence_count: usize,
63    pub ttr_deviation: f64,
64
65    // Normalized features [0,1]
66    pub normalized: [f64; 5],
67    pub weights: [f64; 5],
68
69    // Aggregation
70    pub beer_lambert_score: f64,
71    pub composite: f64,
72    pub hill_score: f64,
73
74    // Error (empty if success)
75    pub error: String,
76}
77
78impl PipelineResult {
79    /// Create an error result.
80    fn error(msg: String) -> Self {
81        Self {
82            verdict: Verdict::Human,
83            probability: 0.0,
84            confidence: 0.0,
85            threshold: 0.0,
86            bloom_level: 0,
87            bloom_name: String::new(),
88            preset_name: String::new(),
89            total_tokens: 0,
90            unique_tokens: 0,
91            ttr: 0.0,
92            zipf_deviation: 0.0,
93            zipf_alpha: 0.0,
94            zipf_r_squared: 0.0,
95            entropy_mean: 0.0,
96            entropy_std: 0.0,
97            entropy_window_count: 0,
98            burstiness_coeff: 0.0,
99            burstiness_tokens_analyzed: 0,
100            perplexity_mean: 0.0,
101            perplexity_var: 0.0,
102            perplexity_sentence_count: 0,
103            ttr_deviation: 0.0,
104            normalized: [0.0; 5],
105            weights: aggregation::WEIGHTS,
106            beer_lambert_score: 0.0,
107            composite: 0.0,
108            hill_score: 0.0,
109            error: msg,
110        }
111    }
112}
113
114/// Run the full integrity detection pipeline.
115pub fn run_pipeline(text: &str, bloom_level: u8, preset: &str) -> PipelineResult {
116    // Stage 1: Tokenize
117    let stats = tokenize::tokenize(text);
118
119    if stats.total_tokens < MIN_TOKENS {
120        return PipelineResult::error(format!(
121            "Need at least {} tokens, got {}",
122            MIN_TOKENS, stats.total_tokens
123        ));
124    }
125
126    // Resolve threshold
127    let bloom_thresholds = BloomThresholds::from_name(preset);
128    let threshold = bloom_thresholds
129        .threshold_for_level(bloom_level)
130        .unwrap_or(0.64);
131    let bloom_name = BloomThresholds::level_name(bloom_level)
132        .unwrap_or("Unknown")
133        .to_string();
134
135    // Stage 2: Feature extraction
136    let zipf_result = zipf::zipf_analysis(&stats.frequencies);
137    let entropy_profile =
138        entropy::entropy_profile(&stats.tokens, DEFAULT_WINDOW_SIZE, DEFAULT_WINDOW_STEP);
139    let burst_result = burstiness::burstiness_analysis(&stats.tokens, &stats.frequencies);
140    let perp_result = perplexity::perplexity_variance(text);
141    let ttr_dev = tokenize::ttr_deviation(stats.ttr);
142
143    // Stage 3: Aggregate
144    let raw = RawFeatures {
145        zipf_deviation: zipf_result.deviation,
146        entropy_std: entropy_profile.std_dev,
147        burstiness: burst_result.coefficient,
148        perplexity_var: perp_result.variance,
149        ttr_deviation: ttr_dev,
150    };
151    let agg = aggregation::aggregate(&raw);
152
153    // Stage 4: Classify
154    let classification = classify::classify_with_threshold(agg.hill_score, threshold);
155
156    PipelineResult {
157        verdict: classification.verdict,
158        probability: classification.probability,
159        confidence: classification.confidence,
160        threshold,
161        bloom_level,
162        bloom_name,
163        preset_name: bloom_thresholds.name,
164        total_tokens: stats.total_tokens,
165        unique_tokens: stats.unique_tokens,
166        ttr: stats.ttr,
167        zipf_deviation: zipf_result.deviation,
168        zipf_alpha: zipf_result.alpha,
169        zipf_r_squared: zipf_result.r_squared,
170        entropy_mean: entropy_profile.mean,
171        entropy_std: entropy_profile.std_dev,
172        entropy_window_count: entropy_profile.window_count,
173        burstiness_coeff: burst_result.coefficient,
174        burstiness_tokens_analyzed: burst_result.tokens_analyzed,
175        perplexity_mean: perp_result.mean_entropy,
176        perplexity_var: perp_result.variance,
177        perplexity_sentence_count: perp_result.sentence_count,
178        ttr_deviation: ttr_dev,
179        normalized: agg.normalized,
180        weights: aggregation::WEIGHTS,
181        beer_lambert_score: agg.beer_lambert_score,
182        composite: agg.composite,
183        hill_score: agg.hill_score,
184        error: String::new(),
185    }
186}