Skip to main content

every_other_token/
lib.rs

1//! # every-other-token
2//!
3//! A real-time LLM token stream interceptor for token-level interaction research.
4//!
5//! This crate sits between the caller and the model, intercepts the token stream
6//! as it arrives over SSE, applies one of five transform strategies to tokens at
7//! configurable positions, scores model confidence at each position using the
8//! OpenAI logprob API, and routes the enriched events to a terminal renderer, a
9//! zero-dependency web UI, and an optional WebSocket collaboration room.
10//!
11//! ## Feature flags
12//!
13//! | Flag | Description |
14//! |------|-------------|
15//! | `sqlite-log` | Persist experiment runs to a local SQLite database via `store::ExperimentStore`. |
16//! | `self-tune` | Enable the self-improvement telemetry bus and tuning controller. |
17//! | `self-modify` | Enable snapshot-based parameter mutation (requires `self-tune`). |
18//! | `intelligence` | Reserved namespace for future interpretability features. |
19//! | `evolution` | Reserved namespace for future evolutionary optimisation. |
20//! | `helix-bridge` | HTTP bridge that polls `/api/stats` and pushes config patches. |
21//! | `redis-backing` | Write-through Redis persistence for agent memory and snapshots. |
22//! | `wasm` | WASM target bindings via `wasm-bindgen`. |
23//!
24//! ## Quickstart
25//!
26//! ```bash
27//! export OPENAI_API_KEY="sk-..."
28//! cargo run -- "What is consciousness?" --visual
29//! cargo run -- "What is consciousness?" --web
30//! cargo run -- "Explain recursion" --research --runs 20 --output results.json
31//! ```
32
33pub mod cli;
34pub mod collab;
35pub mod config;
36pub mod error;
37pub mod heatmap;
38pub mod providers;
39pub mod render;
40pub mod replay;
41pub mod research;
42pub mod store;
43pub mod transforms;
44pub mod web;
45
46#[cfg(feature = "self-tune")]
47pub mod self_tune;
48
49#[cfg(feature = "self-modify")]
50pub mod self_modify;
51
52#[cfg(feature = "self-modify")]
53pub mod semantic_dedup;
54
55#[cfg(feature = "helix-bridge")]
56pub mod helix_bridge;
57
58#[cfg(feature = "sqlite-log")]
59pub mod experiment_log;
60
61#[cfg(feature = "intelligence")]
62pub mod intelligence {
63    //! Stub module for the intelligence feature flag.
64    //! Reserved namespace for future interpretability and reasoning features.
65
66    #[cfg(test)]
67    mod tests {
68        #[test]
69        fn stub_compiles() {}
70    }
71}
72
73#[cfg(feature = "evolution")]
74pub mod evolution {
75    //! Stub module for the evolution feature flag.
76    //! Reserved namespace for future evolutionary/genetic optimization features.
77
78    #[cfg(test)]
79    mod tests {
80        #[test]
81        fn stub_compiles() {}
82    }
83}
84
85use colored::*;
86use rand::rngs::StdRng;
87use rand::SeedableRng;
88use reqwest::Client;
89use serde::{Deserialize, Serialize};
90use std::env;
91use std::io::{self, Write};
92use tokio::sync::mpsc;
93use tokio_stream::StreamExt;
94
95use providers::*;
96use transforms::{apply_heatmap_color, calculate_token_importance, tokenize, Transform};
97
98// ---------------------------------------------------------------------------
99// Token probability types
100// ---------------------------------------------------------------------------
101
102/// One alternative token and its probability (for top-K logprob display).
103///
104/// Returned in the `alternatives` field of a [`TokenEvent`] when the provider
105/// supports `top_logprobs` (currently OpenAI only).  Probabilities have already
106/// been converted from log-space via `exp(logprob)` and clamped to `[0.0, 1.0]`.
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct TokenAlternative {
109    /// The alternative token string (may include leading whitespace, e.g. `" world"`).
110    pub token: String,
111    /// Linear probability in `[0.0, 1.0]`, computed as `exp(logprob)`.
112    pub probability: f32,
113}
114
115// ---------------------------------------------------------------------------
116// Token event (for web UI streaming)
117// ---------------------------------------------------------------------------
118
119/// A single processed token emitted by the streaming pipeline.
120///
121/// Every token the interceptor produces — whether transformed or not — is
122/// represented as a `TokenEvent`.  Events are sent over the `web_tx` channel
123/// for SSE fan-out to the web UI, written as JSON lines in `--json-stream`
124/// mode, or recorded to a replay file.
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct TokenEvent {
127    /// The (possibly transformed) token text shown to the user.
128    pub text: String,
129    /// The original token text before any transform was applied.
130    pub original: String,
131    /// Zero-based position of this token in the full response.
132    pub index: usize,
133    /// Whether the active transform was applied to this token.
134    pub transformed: bool,
135    /// Scalar token importance in `[0.0, 1.0]` — derived from API confidence
136    /// when available, otherwise computed by the heuristic importance scorer.
137    pub importance: f64,
138    /// For Chaos transform: which sub-transform was applied. None for other transforms.
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub chaos_label: Option<String>,
141    /// For diff mode: which provider produced this token ("openai" or "anthropic").
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub provider: Option<String>,
144    /// Model confidence 0.0–1.0, derived from API logprob. None when unavailable.
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub confidence: Option<f32>,
147    /// Per-token perplexity (exp(-log_prob)). None when logprobs unavailable.
148    #[serde(skip_serializing_if = "Option::is_none")]
149    pub perplexity: Option<f32>,
150    /// Top alternative tokens with their probabilities (from top_logprobs).
151    #[serde(default, skip_serializing_if = "Vec::is_empty")]
152    pub alternatives: Vec<TokenAlternative>,
153    /// When true, this event represents an error notification rather than a real token.
154    #[serde(default)]
155    pub is_error: bool,
156    /// Milliseconds elapsed since stream start when this token arrived (for latency tracking).
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub arrival_ms: Option<u64>,
159}
160
161// ---------------------------------------------------------------------------
162// TokenInterceptor — multi-provider streaming engine
163// ---------------------------------------------------------------------------
164
165/// The core streaming engine that sits between the caller and the LLM.
166///
167/// `TokenInterceptor` manages the HTTP connection to the configured provider,
168/// iterates the server-sent-event (SSE) stream, applies the active [`Transform`]
169/// to every N-th token (controlled by `rate`), attaches per-token confidence and
170/// perplexity from API logprobs, and routes enriched [`TokenEvent`]s to one of
171/// three output sinks:
172///
173/// - **Terminal** — ANSI-colored text written to stdout.
174/// - **Web UI** — events sent over the `web_tx` unbounded channel for SSE fan-out.
175/// - **JSON stream** — one JSON line per token written to stdout (`json_stream = true`).
176///
177/// Construct with [`TokenInterceptor::new`] then call [`TokenInterceptor::intercept_stream`].
178pub struct TokenInterceptor {
179    client: Client,
180    api_key: String,
181    pub provider: Provider,
182    pub transform: Transform,
183    pub model: String,
184    pub token_count: usize,
185    pub transformed_count: usize,
186    pub visual_mode: bool,
187    pub heatmap_mode: bool,
188    pub orchestrator: bool,
189    pub orchestrator_url: String,
190    /// When set, token events are sent here instead of printed to stdout.
191    pub web_tx: Option<mpsc::UnboundedSender<TokenEvent>>,
192    /// When set, each emitted TokenEvent carries this provider label (for diff mode).
193    pub web_provider_label: Option<String>,
194    /// Optional system prompt prepended to the conversation.
195    pub system_prompt: Option<String>,
196    /// When set, token processing metrics are recorded into the self-improvement bus.
197    #[cfg(feature = "self-tune")]
198    pub telemetry_bus: Option<std::sync::Arc<crate::self_tune::telemetry_bus::TelemetryBus>>,
199    /// Optional in-session prompt deduplication cache.
200    ///
201    /// When set, `intercept_stream` checks whether the incoming prompt has been
202    /// seen recently (within the configured TTL).  If a live hit is found the
203    /// API call is skipped and a cache-hit notice is printed or sent to the web
204    /// channel, avoiding redundant spend on repeated prompts (common in
205    /// research mode).
206    ///
207    /// Enabled by setting `dedup` after construction (see [`TokenInterceptor::enable_dedup`]).
208    #[cfg(feature = "self-modify")]
209    pub dedup: Option<std::sync::Arc<std::sync::Mutex<crate::semantic_dedup::SemanticDedup>>>,
210    /// Fraction of tokens to transform (0.0–1.0).  Bresenham-spread so the
211    /// distribution is deterministic and uniform rather than probabilistic.
212    pub rate: f64,
213    /// Number of top alternative tokens to request per position (OpenAI only, 0–20).
214    pub top_logprobs: u8,
215    /// Per-session RNG used for Noise/Chaos transforms.  Seeded from entropy
216    /// unless a fixed seed is provided via `with_seed()`.
217    rng: StdRng,
218    /// Optional replay recorder — records each emitted TokenEvent.
219    pub recorder: Option<crate::replay::Recorder>,
220    /// When true, print one JSON line per token instead of colored text.
221    pub json_stream: bool,
222    /// Pending async delay in ms to be awaited after process_content_logprob returns.
223    pending_delay_ms: u64,
224    /// Minimum confidence threshold for transform gating. When set, only tokens
225    /// with confidence at or below this value are transformed.
226    pub min_confidence: Option<f64>,
227    /// Timestamp of the last received token, used for timing-based confidence proxy.
228    last_token_instant: Option<std::time::Instant>,
229    /// Maximum retry attempts for API calls on 429/5xx (configurable via --max-retries).
230    pub max_retries: u32,
231    /// Maximum tokens in the Anthropic response (configurable via --anthropic-max-tokens).
232    pub anthropic_max_tokens: u32,
233    /// Instant recorded at stream start for per-token arrival latency measurement.
234    stream_start_instant: Option<std::time::Instant>,
235    /// Optional stream timeout in seconds. When set, `intercept_stream` will fail
236    /// with a timeout error if the entire stream does not complete within this duration.
237    pub timeout_secs: Option<u64>,
238}
239
240// ---------------------------------------------------------------------------
241// HTTP retry helper (#5) + circuit breaker (#12)
242// ---------------------------------------------------------------------------
243
244/// Per-provider circuit breaker state stored in a global registry.
245///
246/// The breaker has three states:
247/// - **Closed** (normal) — requests pass through.
248/// - **Open** — consecutive failures exceeded `TRIP_THRESHOLD`; requests are
249///   rejected immediately for `RECOVERY_MS` milliseconds.
250/// - **Half-open** — a single probe request is allowed through after recovery;
251///   success resets the counter, failure re-opens for another `RECOVERY_MS`.
252static CIRCUIT_BREAKER: std::sync::OnceLock<
253    std::sync::Mutex<CircuitBreakerState>,
254> = std::sync::OnceLock::new();
255
256struct CircuitBreakerState {
257    consecutive_failures: u32,
258    open_until_ms: u64,
259}
260
261/// Trip after this many consecutive failures.
262const CB_TRIP_THRESHOLD: u32 = 5;
263/// Duration the breaker stays open after tripping (30 seconds).
264const CB_RECOVERY_MS: u64 = 30_000;
265
266fn now_unix_ms() -> u64 {
267    std::time::SystemTime::now()
268        .duration_since(std::time::UNIX_EPOCH)
269        .map(|d| d.as_millis() as u64)
270        .unwrap_or(0)
271}
272
273/// Returns `true` if the circuit breaker is currently open (requests should
274/// be short-circuited), `false` if the request should be attempted.
275fn circuit_is_open() -> bool {
276    let state = CIRCUIT_BREAKER.get_or_init(|| {
277        std::sync::Mutex::new(CircuitBreakerState {
278            consecutive_failures: 0,
279            open_until_ms: 0,
280        })
281    });
282    if let Ok(s) = state.lock() {
283        s.open_until_ms > now_unix_ms()
284    } else {
285        false
286    }
287}
288
289fn circuit_record_success() {
290    let state = CIRCUIT_BREAKER.get_or_init(|| {
291        std::sync::Mutex::new(CircuitBreakerState {
292            consecutive_failures: 0,
293            open_until_ms: 0,
294        })
295    });
296    if let Ok(mut s) = state.lock() {
297        s.consecutive_failures = 0;
298        s.open_until_ms = 0;
299    }
300}
301
302fn circuit_record_failure() {
303    let state = CIRCUIT_BREAKER.get_or_init(|| {
304        std::sync::Mutex::new(CircuitBreakerState {
305            consecutive_failures: 0,
306            open_until_ms: 0,
307        })
308    });
309    if let Ok(mut s) = state.lock() {
310        s.consecutive_failures += 1;
311        if s.consecutive_failures >= CB_TRIP_THRESHOLD {
312            s.open_until_ms = now_unix_ms() + CB_RECOVERY_MS;
313            tracing::warn!(
314                consecutive_failures = s.consecutive_failures,
315                recovery_ms = CB_RECOVERY_MS,
316                "circuit breaker tripped — blocking requests for recovery period"
317            );
318        }
319    }
320}
321
322/// Execute a pre-built `reqwest::Request`, retrying up to `max_attempts`
323/// times on 429 / 5xx responses and network errors with exponential back-off.
324///
325/// Integrates with a process-wide circuit breaker: after `CB_TRIP_THRESHOLD`
326/// consecutive failures the breaker opens for `CB_RECOVERY_MS` ms, rejecting
327/// all requests immediately.  A single successful response resets the counter.
328///
329/// Returns the first successful (or non-retryable) response.
330async fn execute_with_retry(
331    client: &reqwest::Client,
332    req: reqwest::Request,
333    max_attempts: u32,
334) -> Result<reqwest::Response, Box<dyn std::error::Error + Send + Sync>> {
335    if circuit_is_open() {
336        return Err("circuit breaker open — provider unavailable, try again shortly".into());
337    }
338
339    let mut last_err: Option<String> = None;
340    for attempt in 0..max_attempts {
341        if attempt > 0 {
342            let delay_ms = 400u64 * (1u64 << attempt.min(4));
343            tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
344            tracing::warn!(attempt, "retrying API request after transient error");
345        }
346        let to_send = match req.try_clone() {
347            Some(r) => r,
348            None => {
349                // Body is a stream — cannot retry; just execute once.
350                return client.execute(req).await.map_err(|e| e.into());
351            }
352        };
353        match client.execute(to_send).await {
354            Ok(resp) => {
355                let status = resp.status().as_u16();
356                if attempt + 1 < max_attempts
357                    && (status == 429 || status == 500 || status == 502 || status == 503)
358                {
359                    tracing::warn!(status, attempt, "got retryable HTTP status");
360                    last_err = Some(format!("HTTP {status}"));
361                    // HTTP 429 is a rate-limit — do NOT trip the circuit breaker.
362                    // Only 5xx server errors count as service failures.
363                    if status != 429 {
364                        circuit_record_failure();
365                    }
366                    continue;
367                }
368                circuit_record_success();
369                return Ok(resp);
370            }
371            Err(e) => {
372                circuit_record_failure();
373                if attempt + 1 < max_attempts {
374                    tracing::warn!(error = %e, attempt, "network error, will retry");
375                    last_err = Some(e.to_string());
376                } else {
377                    return Err(Box::new(e));
378                }
379            }
380        }
381    }
382    Err(last_err
383        .unwrap_or_else(|| "max retries exceeded".to_string())
384        .into())
385}
386
387impl TokenInterceptor {
388    /// Construct a new `TokenInterceptor`.
389    ///
390    /// Reads the API key from the environment (`OPENAI_API_KEY` or `ANTHROPIC_API_KEY`)
391    /// and validates its format.  The `Mock` provider does not require a key.
392    ///
393    /// # Errors
394    /// Returns an error if the required API key environment variable is not set.
395    pub fn new(
396        provider: Provider,
397        transform: Transform,
398        model: String,
399        visual_mode: bool,
400        heatmap_mode: bool,
401        orchestrator: bool,
402    ) -> Result<Self, Box<dyn std::error::Error>> {
403        let api_key = match provider {
404            Provider::Openai => {
405                let key = env::var("OPENAI_API_KEY")
406                    .map_err(|_| "OPENAI_API_KEY not set. Export it or pass via environment.")?;
407                // Basic format validation (#9): OpenAI keys start with "sk-"
408                if !key.starts_with("sk-") {
409                    eprintln!(
410                        "[warn] OPENAI_API_KEY does not start with 'sk-' — verify it is correct"
411                    );
412                }
413                key
414            }
415            Provider::Anthropic => {
416                let key = env::var("ANTHROPIC_API_KEY")
417                    .map_err(|_| "ANTHROPIC_API_KEY not set. Export it or pass via environment.")?;
418                // Anthropic keys start with "sk-ant-"
419                if !key.starts_with("sk-ant-") {
420                    eprintln!("[warn] ANTHROPIC_API_KEY does not start with 'sk-ant-' — verify it is correct");
421                }
422                key
423            }
424            Provider::Mock => String::new(),
425        };
426
427        Ok(TokenInterceptor {
428            client: Client::new(),
429            api_key,
430            provider,
431            transform,
432            model,
433            token_count: 0,
434            transformed_count: 0,
435            visual_mode,
436            heatmap_mode,
437            orchestrator,
438            orchestrator_url: "http://localhost:3000".to_string(),
439            web_tx: None,
440            web_provider_label: None,
441            system_prompt: None,
442            #[cfg(feature = "self-tune")]
443            telemetry_bus: None,
444            #[cfg(feature = "self-modify")]
445            dedup: None,
446            rate: 0.5,
447            top_logprobs: 5,
448            rng: StdRng::from_entropy(),
449            recorder: None,
450            json_stream: false,
451            pending_delay_ms: 0,
452            min_confidence: None,
453            last_token_instant: None,
454            max_retries: 3,
455            anthropic_max_tokens: 4096,
456            stream_start_instant: None,
457            timeout_secs: None,
458        })
459    }
460
461    /// Set the intercept rate (0.0–1.0).  Clamped to [0.0, 1.0].
462    pub fn with_rate(mut self, rate: f64) -> Self {
463        debug_assert!(rate.is_finite(), "with_rate: rate must be finite, got {}", rate);
464        self.rate = rate.clamp(0.0, 1.0);
465        self
466    }
467
468    /// Seed the internal RNG for reproducible Noise/Chaos output.
469    pub fn with_seed(mut self, seed: u64) -> Self {
470        self.rng = StdRng::seed_from_u64(seed);
471        self
472    }
473
474    /// Set the channel used to fan out token events to the web UI.
475    ///
476    /// Calling this completes the builder chain for web-mode construction
477    /// so callers do not need to set `web_tx` as a bare field assignment.
478    pub fn with_web_tx(mut self, tx: mpsc::UnboundedSender<TokenEvent>) -> Self {
479        self.web_tx = Some(tx);
480        self
481    }
482
483    /// Set an optional provider label attached to every emitted [`TokenEvent`].
484    /// Used in diff mode to tag events with `"openai"` or `"anthropic"`.
485    pub fn with_provider_label(mut self, label: impl Into<String>) -> Self {
486        self.web_provider_label = Some(label.into());
487        self
488    }
489
490    /// Prepend a system prompt to the conversation.
491    pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
492        self.system_prompt = Some(prompt.into());
493        self
494    }
495
496    /// Number of top alternative tokens to request per position (OpenAI only, 0–20).
497    pub fn with_top_logprobs(mut self, n: u8) -> Self {
498        self.top_logprobs = n;
499        self
500    }
501
502    /// Enable JSON-stream mode: emit one JSON line per token instead of ANSI text.
503    pub fn with_json_stream(mut self, enabled: bool) -> Self {
504        self.json_stream = enabled;
505        self
506    }
507
508    /// Override the MCP orchestrator base URL (default: `http://localhost:3000`).
509    pub fn with_orchestrator_url(mut self, url: impl Into<String>) -> Self {
510        self.orchestrator_url = url.into();
511        self
512    }
513
514    /// Maximum retry attempts on 429/5xx errors.
515    pub fn with_max_retries(mut self, n: u32) -> Self {
516        self.max_retries = n;
517        self
518    }
519
520    /// Set a stream timeout in seconds. If the entire stream does not complete within
521    /// this duration, `intercept_stream` returns a timeout error.
522    pub fn with_timeout(mut self, secs: u64) -> Self {
523        self.timeout_secs = Some(secs);
524        self
525    }
526
527    /// Only transform tokens whose API confidence is at or below this threshold.
528    pub fn with_min_confidence(mut self, threshold: f64) -> Self {
529        self.min_confidence = Some(threshold);
530        self
531    }
532
533    /// Enable in-session prompt deduplication with the given TTL and capacity.
534    ///
535    /// After calling this, `intercept_stream` will check whether an incoming
536    /// prompt has been seen recently and skip the API call on a cache hit.
537    ///
538    /// # Panics
539    /// This function never panics.
540    #[cfg(feature = "self-modify")]
541    pub fn enable_dedup(&mut self, ttl_ms: u64, capacity: usize) {
542        use crate::semantic_dedup::{DedupConfig, SemanticDedup};
543        let sd = SemanticDedup::new(DedupConfig { ttl_ms, capacity });
544        self.dedup = Some(std::sync::Arc::new(std::sync::Mutex::new(sd)));
545    }
546
547    // -----------------------------------------------------------------------
548    // Public entry point
549    // -----------------------------------------------------------------------
550
551    /// Stream the given `prompt` through the configured provider, applying
552    /// the active transform to every other token.
553    ///
554    /// In terminal mode the tokens are printed to stdout; in web mode they are
555    /// sent over the `web_tx` channel for SSE fan-out.
556    ///
557    /// # Errors
558    /// Returns an error if the prompt is empty, exceeds 512 KB, the API key is
559    /// missing, the HTTP request fails after all retries, or JSON parsing fails.
560    pub async fn intercept_stream(
561        &mut self,
562        prompt: &str,
563    ) -> Result<(), Box<dyn std::error::Error>> {
564        let timeout_duration = self.timeout_secs.map(std::time::Duration::from_secs);
565        if let Some(duration) = timeout_duration {
566            return match tokio::time::timeout(duration, self.intercept_stream_inner(prompt)).await {
567                Ok(result) => result,
568                Err(_) => Err(format!(
569                    "stream timed out after {} seconds",
570                    duration.as_secs()
571                )
572                .into()),
573            };
574        }
575        self.intercept_stream_inner(prompt).await
576    }
577
578    async fn intercept_stream_inner(
579        &mut self,
580        prompt: &str,
581    ) -> Result<(), Box<dyn std::error::Error>> {
582        // Record stream start for per-token arrival latency measurement (item 8).
583        self.stream_start_instant = Some(std::time::Instant::now());
584        // Note: we log diagnostics here but do not hold an entered span across
585        // await points -- EnteredSpan is !Send and would prevent tokio::spawn.
586        tracing::info!(
587            provider = %self.provider,
588            model = %self.model,
589            prompt_len = prompt.len(),
590            "starting token stream interception",
591        );
592
593        // ── Input validation (#11) ───────────────────────────────────────────
594        if prompt.trim().is_empty() {
595            tracing::error!("prompt is empty — aborting");
596            return Err("Prompt must not be empty".into());
597        }
598        // Rough guard against prompts that would exceed typical API limits.
599        // 512 KB ≈ ~128K tokens at 4 bytes/token; APIs will reject anyway but
600        // failing fast gives a clearer error message.
601        if prompt.len() > 512_000 {
602            return Err(format!(
603                "Prompt is too long ({} bytes; max 512 KB). Use a shorter prompt.",
604                prompt.len()
605            )
606            .into());
607        }
608
609        // ── Prompt deduplication gate ─────────────────────────────────────────
610        // Check before printing the header so skipped prompts are silent.
611        #[cfg(feature = "self-modify")]
612        {
613            use std::time::{SystemTime, UNIX_EPOCH};
614            let now_ms = SystemTime::now()
615                .duration_since(UNIX_EPOCH)
616                .map(|d| d.as_millis() as u64)
617                .unwrap_or(0);
618
619            if let Some(dedup_arc) = &self.dedup {
620                if let Ok(mut guard) = dedup_arc.lock() {
621                    if let Some(entry) = guard.check(prompt, now_ms) {
622                        let hits = entry.hit_count;
623                        drop(guard); // release lock before any I/O
624
625                        let msg = format!(
626                            "[dedup] Skipping duplicate prompt (seen {} time{} recently, TTL active)",
627                            hits,
628                            if hits == 1 { "" } else { "s" },
629                        );
630                        if let Some(tx) = &self.web_tx {
631                            let evt = TokenEvent {
632                                text: msg.clone(),
633                                original: prompt.to_string(),
634                                index: 0,
635                                transformed: false,
636                                importance: 0.0,
637                                chaos_label: None,
638                                provider: self.web_provider_label.clone(),
639                                confidence: None,
640                                perplexity: None,
641                                alternatives: vec![],
642                                is_error: false,
643                                arrival_ms: None,
644                            };
645                            let _ = tx.send(evt);
646                        } else {
647                            eprintln!("{}", msg);
648                        }
649                        return Ok(());
650                    } else {
651                        // Register prompt so the next identical call is caught.
652                        // Value is empty — we use this purely as a seen-prompt gate.
653                        guard.register(prompt, String::new(), now_ms);
654                    }
655                }
656            }
657        }
658
659        if self.web_tx.is_none() {
660            self.print_header(prompt);
661        }
662
663        // If --orchestrator is active, pre-process through MCP pipeline
664        let effective_prompt = if self.orchestrator {
665            eprintln!(
666                "{}",
667                "[orchestrator] routing through MCP pipeline at localhost:3000".bright_magenta()
668            );
669            match self.orchestrator_infer(prompt).await {
670                Ok(enriched) => enriched,
671                Err(e) => {
672                    eprintln!(
673                        "{} {}",
674                        "[orchestrator] pipeline unavailable, using raw prompt:".bright_red(),
675                        e
676                    );
677                    if let Some(tx) = &self.web_tx {
678                        let evt = TokenEvent {
679                            text: format!("[orchestrator error] {}", e),
680                            original: String::new(),
681                            index: 0,
682                            transformed: false,
683                            importance: 0.0,
684                            chaos_label: None,
685                            provider: self.web_provider_label.clone(),
686                            confidence: None,
687                            perplexity: None,
688                            alternatives: vec![],
689                            is_error: true,
690                            arrival_ms: None,
691                        };
692                        let _ = tx.send(evt);
693                    }
694                    prompt.to_string()
695                }
696            }
697        } else {
698            prompt.to_string()
699        };
700
701        match self.provider {
702            Provider::Openai => self.stream_openai(&effective_prompt).await?,
703            Provider::Anthropic => self.stream_anthropic(&effective_prompt).await?,
704            Provider::Mock => self.stream_mock(&effective_prompt).await?,
705        }
706
707        if self.web_tx.is_none() {
708            self.print_footer();
709        }
710        Ok(())
711    }
712
713    // -----------------------------------------------------------------------
714    // OpenAI streaming
715    // -----------------------------------------------------------------------
716
717    async fn stream_openai(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
718        let mut messages = Vec::new();
719        if let Some(sys) = &self.system_prompt {
720            messages.push(OpenAIChatMessage {
721                role: "system".to_string(),
722                content: sys.clone(),
723            });
724        }
725        messages.push(OpenAIChatMessage {
726            role: "user".to_string(),
727            content: prompt.to_string(),
728        });
729        let request = OpenAIChatRequest {
730            model: self.model.clone(),
731            messages,
732            stream: true,
733            temperature: 0.7,
734            logprobs: true,
735            top_logprobs: self.top_logprobs,
736        };
737
738        let req = self
739            .client
740            .post("https://api.openai.com/v1/chat/completions")
741            .header("Authorization", format!("Bearer {}", self.api_key))
742            .header("Content-Type", "application/json")
743            .json(&request)
744            .build()?;
745
746        // Retry on 429 / 5xx with exponential back-off (#5).
747        let response = execute_with_retry(&self.client, req, self.max_retries)
748            .await
749            .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?;
750
751        if !response.status().is_success() {
752            let error_text = response.text().await?;
753            return Err(format!("OpenAI API error: {}", error_text).into());
754        }
755
756        let mut stream = response.bytes_stream();
757        let mut buffer = String::new();
758        let mut dropped_chunks: usize = 0;
759
760        while let Some(chunk) = stream.next().await {
761            let chunk = chunk?;
762            // Reject invalid UTF-8 rather than silently replacing bytes (#4).
763            let chunk_str = match std::str::from_utf8(&chunk) {
764                Ok(s) => s.to_string(),
765                Err(e) => {
766                    tracing::warn!(error = %e, "invalid UTF-8 in OpenAI stream chunk — skipping");
767                    continue;
768                }
769            };
770            buffer.push_str(&chunk_str);
771
772            while let Some(line_end) = buffer.find('\n') {
773                let line = buffer[..line_end].trim().to_string();
774                buffer.drain(..=line_end);
775
776                if line.starts_with("data: ") && line != "data: [DONE]" {
777                    let json_str = line.strip_prefix("data: ").unwrap_or(&line);
778                    match serde_json::from_str::<OpenAIChunk>(json_str) {
779                        Ok(parsed) => {
780                            if let Some(choice) = parsed.choices.first() {
781                                if let Some(content) = &choice.delta.content {
782                                    // Extract logprob data from the first API token in this chunk
783                                    let (log_prob, top_alts) = choice
784                                        .logprobs
785                                        .as_ref()
786                                        .and_then(|lp| lp.content.first())
787                                        .map(|lc| {
788                                            let alts = lc
789                                                .top_logprobs
790                                                .iter()
791                                                .map(|t| TokenAlternative {
792                                                    token: t.token.clone(),
793                                                    probability: t.logprob.exp().clamp(0.0, 1.0),
794                                                })
795                                                .collect::<Vec<_>>();
796                                            (Some(lc.logprob), alts)
797                                        })
798                                        .unwrap_or((None, vec![]));
799                                    self.process_content_logprob(content, log_prob, top_alts);
800                                    if self.pending_delay_ms > 0 {
801                                        tokio::time::sleep(std::time::Duration::from_millis(
802                                            self.pending_delay_ms,
803                                        ))
804                                        .await;
805                                        self.pending_delay_ms = 0;
806                                    }
807                                }
808                            }
809                        }
810                        Err(_) => {
811                            tracing::warn!(line = %json_str, "failed to parse SSE chunk; skipping");
812                            dropped_chunks += 1;
813                        }
814                    }
815                }
816            }
817        }
818
819        if dropped_chunks > 0 {
820            tracing::warn!(dropped_chunks, "SSE chunks were dropped during stream");
821        }
822
823        Ok(())
824    }
825
826    // -----------------------------------------------------------------------
827    // Anthropic streaming
828    // -----------------------------------------------------------------------
829
830    async fn stream_anthropic(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
831        // Anthropic's streaming API does not expose logprobs (#8).
832        // confidence/perplexity fields will be None for every token in this
833        // stream. Cross-provider perplexity comparisons require normalisation
834        // because the models operate over different vocabulary sizes (#20).
835        tracing::debug!(
836            "Anthropic stream: logprobs unavailable; confidence/perplexity will be None"
837        );
838        if self.web_tx.is_none() {
839            eprintln!("[info] Anthropic does not provide logprobs — confidence metrics will be unavailable for this run");
840        }
841
842        let request = AnthropicRequest {
843            model: self.model.clone(),
844            messages: vec![AnthropicMessage {
845                role: "user".to_string(),
846                content: prompt.to_string(),
847            }],
848            max_tokens: self.anthropic_max_tokens,
849            stream: true,
850            temperature: 0.7,
851            system: self.system_prompt.clone(),
852        };
853
854        let req = self
855            .client
856            .post("https://api.anthropic.com/v1/messages")
857            .header("x-api-key", &self.api_key)
858            .header("anthropic-version", providers::ANTHROPIC_API_VERSION)
859            .header("Content-Type", "application/json")
860            .json(&request)
861            .build()?;
862
863        // Retry on 429 / 5xx with exponential back-off (#5).
864        let response = execute_with_retry(&self.client, req, self.max_retries)
865            .await
866            .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?;
867
868        if !response.status().is_success() {
869            let error_text = response.text().await?;
870            return Err(format!("Anthropic API error: {}", error_text).into());
871        }
872
873        let mut stream = response.bytes_stream();
874        let mut buffer = String::new();
875        let mut dropped_chunks: usize = 0;
876
877        while let Some(chunk) = stream.next().await {
878            let chunk = chunk?;
879            // Reject invalid UTF-8 rather than silently replacing bytes (#4).
880            let chunk_str = match std::str::from_utf8(&chunk) {
881                Ok(s) => s.to_string(),
882                Err(e) => {
883                    tracing::warn!(error = %e, "invalid UTF-8 in Anthropic stream chunk — skipping");
884                    continue;
885                }
886            };
887            buffer.push_str(&chunk_str);
888
889            while let Some(line_end) = buffer.find('\n') {
890                let line = buffer[..line_end].trim().to_string();
891                buffer.drain(..=line_end);
892
893                if line.starts_with("data: ") {
894                    let json_str = line.strip_prefix("data: ").unwrap_or(&line);
895                    match serde_json::from_str::<AnthropicStreamEvent>(json_str) {
896                        Ok(event) => {
897                            if event.event_type == "content_block_delta" {
898                                if let Some(delta) = &event.delta {
899                                    if let Some(text) = &delta.text {
900                                        // Estimate confidence from inter-token latency for Anthropic
901                                        // Fast tokens (< 50ms) → high confidence proxy; slow tokens → lower
902                                        let now = std::time::Instant::now();
903                                        let timing_confidence = if let Some(last) =
904                                            self.last_token_instant
905                                        {
906                                            let delta_ms = now.duration_since(last).as_millis() as f64;
907                                            // Normalize: tokens arriving in < 50ms get confidence ~0.9, > 500ms → ~0.1
908                                            let conf = (1.0 - (delta_ms / 500.0).min(1.0)) * 0.8 + 0.1;
909                                            Some(conf as f32)
910                                        } else {
911                                            None
912                                        };
913                                        self.last_token_instant = Some(now);
914                                        // Convert timing_confidence to a log_prob approximation if available
915                                        let timing_logprob =
916                                            timing_confidence.map(|c| c.ln().max(-10.0));
917                                        self.process_content_logprob(text, timing_logprob, vec![]);
918                                        if self.pending_delay_ms > 0 {
919                                            tokio::time::sleep(std::time::Duration::from_millis(
920                                                self.pending_delay_ms,
921                                            ))
922                                            .await;
923                                            self.pending_delay_ms = 0;
924                                        }
925                                    }
926                                }
927                            }
928                        }
929                        Err(_) => {
930                            tracing::warn!(line = %json_str, "failed to parse SSE chunk; skipping");
931                            dropped_chunks += 1;
932                        }
933                    }
934                }
935            }
936        }
937
938        if dropped_chunks > 0 {
939            tracing::warn!(dropped_chunks, "SSE chunks were dropped during stream");
940        }
941
942        Ok(())
943    }
944
945    // -----------------------------------------------------------------------
946    // Mock streaming (no network call — replays a canned fixture)
947    // -----------------------------------------------------------------------
948
949    async fn stream_mock(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
950        // Canned fixture: realistic token stream with logprob data.
951        // Simulates a response to any prompt without hitting any API.
952        let prompt_prefix = prompt[..prompt.len().min(20)].to_string();
953        let fixture: Vec<(String, f32)> = vec![
954            ("The".to_string(), -0.12),
955            (" quick".to_string(), -0.45),
956            (" brown".to_string(), -0.78),
957            (" fox".to_string(), -0.23),
958            (" jumps".to_string(), -0.56),
959            (" over".to_string(), -0.34),
960            (" the".to_string(), -0.11),
961            (" lazy".to_string(), -0.89),
962            (" dog".to_string(), -0.19),
963            (".".to_string(), -0.07),
964            (" This".to_string(), -0.62),
965            (" is".to_string(), -0.15),
966            (" a".to_string(), -0.08),
967            (" mock".to_string(), -0.31),
968            (" response".to_string(), -0.44),
969            (" for".to_string(), -0.27),
970            (" prompt".to_string(), -0.53),
971            (":".to_string(), -0.18),
972            (" \"".to_string(), -0.39),
973            (prompt_prefix, -0.71),
974        ];
975
976        // Vary fixture starting position based on prompt content for more realistic tests
977        let prompt_hash: usize = prompt
978            .bytes()
979            .fold(0usize, |acc, b| acc.wrapping_add(b as usize));
980        let offset = prompt_hash % fixture.len();
981
982        for idx in 0..fixture.len() {
983            let (token_text, logprob) = &fixture[(idx + offset) % fixture.len()];
984            let token_text = token_text.clone();
985            let confidence = logprob.exp().clamp(0.0_f32, 1.0_f32);
986            let perplexity = (-logprob).exp();
987            let importance = calculate_token_importance(&token_text, idx);
988            let should_transform = idx % 2 == 1;
989
990            let (display_text, chaos_label) = if should_transform {
991                let (t, label) = self.transform.apply_with_label(&token_text);
992                let cl = if matches!(self.transform, Transform::Chaos) {
993                    Some(label.to_string())
994                } else {
995                    None
996                };
997                (t, cl)
998            } else {
999                (token_text.clone(), None)
1000            };
1001
1002            if should_transform {
1003                self.transformed_count += 1;
1004            }
1005            self.token_count += 1;
1006
1007            if let Some(tx) = &self.web_tx {
1008                let evt = TokenEvent {
1009                    text: display_text.clone(),
1010                    original: token_text.clone(),
1011                    index: idx,
1012                    transformed: should_transform,
1013                    importance,
1014                    chaos_label,
1015                    provider: self.web_provider_label.clone(),
1016                    confidence: Some(confidence),
1017                    perplexity: Some(perplexity),
1018                    alternatives: vec![
1019                        TokenAlternative {
1020                            token: "a".to_string(),
1021                            probability: 0.15,
1022                        },
1023                        TokenAlternative {
1024                            token: "the".to_string(),
1025                            probability: 0.10,
1026                        },
1027                    ],
1028                    is_error: false,
1029                    arrival_ms: None,
1030                };
1031                let _ = tx.send(evt);
1032            } else {
1033                self.process_content_logprob(&token_text, Some(*logprob), vec![]);
1034            }
1035        }
1036        Ok(())
1037    }
1038
1039    // -----------------------------------------------------------------------
1040    // Orchestrator MCP infer call
1041    // -----------------------------------------------------------------------
1042
1043    async fn orchestrator_infer(&self, prompt: &str) -> Result<String, Box<dyn std::error::Error>> {
1044        let mcp_request = McpInferRequest {
1045            jsonrpc: "2.0".to_string(),
1046            method: "tools/call".to_string(),
1047            id: 1,
1048            params: McpInferParams {
1049                name: "infer".to_string(),
1050                arguments: McpInferArguments {
1051                    prompt: prompt.to_string(),
1052                    worker: "llama_cpp".to_string(),
1053                },
1054            },
1055        };
1056
1057        let response = self
1058            .client
1059            .post(&self.orchestrator_url)
1060            .header("Content-Type", "application/json")
1061            .json(&mcp_request)
1062            .send()
1063            .await?;
1064
1065        if !response.status().is_success() {
1066            return Err(format!("Orchestrator returned HTTP {}", response.status()).into());
1067        }
1068
1069        let mcp_resp: McpInferResponse = response.json().await?;
1070
1071        if let Some(err) = mcp_resp.error {
1072            return Err(format!("Orchestrator MCP error: {}", err.message).into());
1073        }
1074
1075        if let Some(result) = mcp_resp.result {
1076            if let Some(content) = result.content.first() {
1077                if let Some(text) = &content.text {
1078                    return Ok(text.clone());
1079                }
1080            }
1081        }
1082
1083        Err("Orchestrator returned empty result".into())
1084    }
1085
1086    // -----------------------------------------------------------------------
1087    // Token processing (shared by both providers)
1088    // -----------------------------------------------------------------------
1089
1090    /// Process a content chunk without logprob data.
1091    pub fn process_content(&mut self, content: &str) {
1092        self.process_content_logprob(content, None, vec![]);
1093    }
1094    /// Process a content chunk with optional logprob data (research mode API).
1095    pub fn process_content_with_logprob(
1096        &mut self,
1097        content: &str,
1098        lp: Option<providers::OpenAILogprobContent>,
1099    ) {
1100        let (log_prob, top_alts) = if let Some(ref entry) = lp {
1101            let alts: Vec<TokenAlternative> = entry
1102                .top_logprobs
1103                .iter()
1104                .map(|t| TokenAlternative {
1105                    token: t.token.clone(),
1106                    probability: t.logprob.exp().clamp(0.0, 1.0),
1107                })
1108                .collect();
1109            // Pass the raw log-prob so process_content_logprob can derive
1110            // confidence and perplexity via exp(lp) and exp(-lp) respectively.
1111            // Previously this incorrectly passed exp(entry.logprob), causing
1112            // process_content_logprob to double-exponentiate.
1113            (Some(entry.logprob), alts)
1114        } else {
1115            (None, vec![])
1116        };
1117        self.process_content_logprob(content, log_prob, top_alts);
1118    }
1119
1120    /// Process a content chunk, optionally attaching logprob-derived fields to
1121    /// the first non-whitespace token produced.
1122    ///
1123    /// * `log_prob` — natural-log probability of the leading API token, if known.
1124    /// * `top_alts` — alternative tokens from `top_logprobs`, already converted
1125    ///   to probabilities (`exp(logprob)`).
1126    pub fn process_content_logprob(
1127        &mut self,
1128        content: &str,
1129        log_prob: Option<f32>,
1130        top_alts: Vec<TokenAlternative>,
1131    ) {
1132        let tokens = tokenize(content);
1133        let mut first_real = true; // attach logprob data to first non-whitespace token
1134
1135        for token in tokens {
1136            if !token.trim().is_empty() {
1137                let i = self.token_count;
1138
1139                // Bresenham-style spread: transform token i when
1140                // floor((i+1)*rate) > floor(i*rate), giving a uniform
1141                // distribution at any rate without probabilistic sampling.
1142                let rate = self.rate;
1143                let should_transform = ((i + 1) as f64 * rate).floor() > (i as f64 * rate).floor();
1144
1145                // Logprob data only goes on the first real token of each API chunk.
1146                // Compute before the transform so confidence can drive importance.
1147                let (token_confidence, token_perplexity, token_alts) = if first_real {
1148                    first_real = false;
1149                    let conf = log_prob.map(|lp| lp.exp().clamp(0.0, 1.0));
1150                    let perp = log_prob.map(|lp| (-lp).exp());
1151                    (conf, perp, top_alts.clone())
1152                } else {
1153                    (None, None, vec![])
1154                };
1155
1156                // Confidence gating: if min_confidence is set and token has API confidence,
1157                // only transform tokens whose confidence is BELOW the threshold
1158                let should_transform =
1159                    if let (Some(min_conf), Some(conf)) = (self.min_confidence, token_confidence) {
1160                        conf as f64 <= min_conf
1161                    } else {
1162                        should_transform
1163                    };
1164
1165                // Use real API confidence as importance when available; fall back
1166                // to the heuristic scorer for tokens without logprob data.
1167                let importance = token_confidence.map(|c| c as f64).unwrap_or_else(|| {
1168                    transforms::calculate_token_importance_rng(&token, i, &mut self.rng)
1169                });
1170
1171                let (display_text, chaos_label) = if should_transform {
1172                    self.transformed_count += 1;
1173                    let (text, label) = self.transform.apply_with_label_rng(&token, &mut self.rng);
1174                    let cl = if matches!(self.transform, Transform::Chaos) || text.is_empty() {
1175                        // Chaos: use sub-transform label; Delete: mark explicitly as "deleted"
1176                        Some(if text.is_empty() {
1177                            "deleted".to_string()
1178                        } else {
1179                            label.to_string()
1180                        })
1181                    } else {
1182                        None
1183                    };
1184                    (text, cl)
1185                } else {
1186                    (token.clone(), None)
1187                };
1188
1189                // Delay transform: record the desired delay so the caller can
1190                // await it asynchronously after this (non-async) method returns.
1191                if should_transform {
1192                    if let Transform::Delay(ms) = self.transform {
1193                        self.pending_delay_ms = ms;
1194                    }
1195                }
1196
1197                // Delete transform: the result is an empty string (chaos_label="deleted").
1198                let is_deleted = should_transform && display_text.is_empty();
1199
1200                // Web / terminal / json output — skip deleted tokens for display.
1201                if !is_deleted {
1202                    // Record per-token arrival latency relative to stream start.
1203                    let arrival_ms = self.stream_start_instant
1204                        .map(|start| start.elapsed().as_millis() as u64);
1205                    if let Some(tx) = &self.web_tx {
1206                        let event = TokenEvent {
1207                            text: display_text.clone(),
1208                            original: token.clone(),
1209                            index: i,
1210                            transformed: should_transform,
1211                            importance,
1212                            chaos_label,
1213                            provider: self.web_provider_label.clone(),
1214                            confidence: token_confidence,
1215                            perplexity: token_perplexity,
1216                            alternatives: token_alts,
1217                            is_error: false,
1218                            arrival_ms,
1219                        };
1220                        if let Some(rec) = &mut self.recorder {
1221                            rec.record(&event);
1222                        }
1223                        let _ = tx.send(event);
1224                    } else if self.json_stream {
1225                        // JSON stream mode: one line per token
1226                        let event = TokenEvent {
1227                            text: display_text.clone(),
1228                            original: token.clone(),
1229                            index: i,
1230                            transformed: should_transform,
1231                            importance,
1232                            chaos_label: chaos_label.clone(),
1233                            provider: self.web_provider_label.clone(),
1234                            confidence: token_confidence,
1235                            perplexity: token_perplexity,
1236                            alternatives: token_alts.clone(),
1237                            is_error: false,
1238                            arrival_ms,
1239                        };
1240                        if let Ok(line) = serde_json::to_string(&event) {
1241                            println!("{}", line);
1242                        }
1243                    } else {
1244                        // Terminal mode: print with colors
1245                        if self.heatmap_mode {
1246                            print!("{}", apply_heatmap_color(&display_text, importance));
1247                        } else if self.visual_mode && should_transform {
1248                            print!("{}", display_text.bright_cyan().bold());
1249                        } else if self.visual_mode {
1250                            print!("{}", display_text.normal());
1251                        } else {
1252                            print!("{}", display_text);
1253                        }
1254                        let _ = io::stdout().flush();
1255                    }
1256                }
1257
1258                self.token_count += 1;
1259            }
1260        }
1261
1262        // Feed token-processing metrics into the self-improvement telemetry bus.
1263        #[cfg(feature = "self-tune")]
1264        if let Some(bus) = &self.telemetry_bus {
1265            use crate::self_tune::telemetry_bus::PipelineStage;
1266            // Record latency as synthetic 1ms per token (real timing requires instrumentation)
1267            bus.record_latency(PipelineStage::Inference, 1_000);
1268            // Record confidence as quality proxy (if available)
1269            if let Some(lp) = log_prob {
1270                let confidence_pct = (lp.exp().clamp(0.0, 1.0) * 100.0) as u64;
1271                bus.record_latency(PipelineStage::Other, confidence_pct.max(1));
1272            }
1273        }
1274    }
1275
1276    /// Print a formatted session header to stdout.
1277    ///
1278    /// Displays provider, transform, model, and prompt. When `visual_mode` or
1279    /// `heatmap_mode` is active, additional legend lines are printed.
1280    /// This method is a no-op when `web_tx` is set (web mode handles its own header).
1281    pub fn print_header(&self, prompt: &str) {
1282        println!("{}", "EVERY OTHER TOKEN INTERCEPTOR".bright_cyan().bold());
1283        println!(
1284            "{}: {}",
1285            "Provider".bright_yellow(),
1286            self.provider.to_string().bright_white()
1287        );
1288        println!("{}: {:?}", "Transform".bright_yellow(), self.transform);
1289        println!("{}: {}", "Model".bright_yellow(), self.model);
1290        println!("{}: {}", "Prompt".bright_yellow(), prompt);
1291        if self.orchestrator {
1292            println!(
1293                "{}: {}",
1294                "Orchestrator".bright_magenta(),
1295                "ON (MCP pipeline at localhost:3000)".bright_magenta()
1296            );
1297        }
1298        if self.visual_mode {
1299            println!(
1300                "{}: {}",
1301                "Visual Mode".bright_green(),
1302                "ON (even=normal, odd=cyan+bold)".bright_green()
1303            );
1304        }
1305        if self.heatmap_mode {
1306            println!(
1307                "{}: {}",
1308                "Heatmap Mode".bright_magenta(),
1309                "ON (color intensity = token importance)".bright_magenta()
1310            );
1311            println!(
1312                "{}: {} {} {} {}",
1313                "Legend".bright_white(),
1314                "Low".on_blue(),
1315                "Medium".on_yellow(),
1316                "High".on_red(),
1317                "Critical".on_bright_red().bright_white()
1318            );
1319        }
1320        println!("{}", "=".repeat(50).bright_blue());
1321        println!("{}", "Response (with transformations):".bright_green());
1322        println!();
1323    }
1324
1325    /// Print a summary footer to stdout after a streaming session completes.
1326    ///
1327    /// Reports total token count and how many tokens were transformed.
1328    pub fn print_footer(&self) {
1329        println!("\n{}", "=".repeat(50).bright_blue());
1330        println!("Complete! Processed {} tokens.", self.token_count);
1331        println!("Transform applied to {} tokens.", self.transformed_count);
1332    }
1333}
1334
1335// ---------------------------------------------------------------------------
1336// Headless research session
1337// ---------------------------------------------------------------------------
1338
1339/// Aggregated statistics from one or more headless inference runs.
1340///
1341/// Produced by [`run_research_headless`].  Fields summarise token-level metrics
1342/// across all runs; fields that require logprob data are `Option` because not
1343/// all providers expose logprobs (Anthropic does not).
1344#[derive(Debug, Clone, serde::Serialize)]
1345pub struct ResearchSession {
1346    /// The prompt submitted to the provider for all runs in this session.
1347    pub prompt: String,
1348    /// Provider identifier (`"openai"`, `"anthropic"`, or `"mock"`).
1349    pub provider: String,
1350    /// Model identifier used for all runs (e.g. `"gpt-4"`).
1351    pub model: String,
1352    /// Transform applied to intercepted tokens (e.g. `"reverse"`).
1353    pub transform: String,
1354    /// Number of inference runs executed.
1355    pub runs: u32,
1356    /// Total tokens streamed across all runs.
1357    pub total_tokens: usize,
1358    /// Total tokens that had a transform applied across all runs.
1359    pub total_transformed: usize,
1360    /// Unique-token fraction: `unique_tokens / total_tokens`.
1361    pub vocabulary_diversity: f64,
1362    /// Mean character length of all original (pre-transform) tokens.
1363    pub mean_token_length: f64,
1364    /// Mean per-token perplexity across all runs, or `None` when unavailable.
1365    pub mean_perplexity: Option<f64>,
1366    /// Mean per-token model confidence across all runs, or `None` when unavailable.
1367    pub mean_confidence: Option<f64>,
1368    /// The 10 tokens with the highest perplexity values (most uncertain positions).
1369    pub top_perplexity_tokens: Vec<String>,
1370    /// Rough cost estimate in USD based on token count and GPT-3.5 pricing.
1371    pub estimated_cost_usd: f64,
1372    /// Human-readable citation string recording key run parameters for reproducibility.
1373    pub citation: String,
1374}
1375
1376/// Run `runs` headless inference calls, collect all `TokenEvent`s, and return
1377/// an aggregated `ResearchSession`.  Call sites must provide a constructed
1378/// interceptor (no web_tx set — events are returned via the mpsc channel).
1379pub async fn run_research_headless(
1380    prompt: &str,
1381    provider: providers::Provider,
1382    transform: transforms::Transform,
1383    model: String,
1384    runs: u32,
1385) -> Result<ResearchSession, Box<dyn std::error::Error>> {
1386    let mut all_tokens: Vec<TokenEvent> = Vec::new();
1387
1388    for _ in 0..runs {
1389        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1390        let mut interceptor = TokenInterceptor::new(
1391            provider.clone(),
1392            transform.clone(),
1393            model.clone(),
1394            false,
1395            false,
1396            false,
1397        )?;
1398        interceptor.web_tx = Some(tx);
1399        interceptor.intercept_stream(prompt).await?;
1400        // Drain channel
1401        while let Ok(ev) = rx.try_recv() {
1402            all_tokens.push(ev);
1403        }
1404    }
1405
1406    let total = all_tokens.len();
1407    let total_transformed = all_tokens.iter().filter(|t| t.transformed).count();
1408
1409    let unique: std::collections::HashSet<String> = all_tokens
1410        .iter()
1411        .map(|t| t.original.to_lowercase())
1412        .collect();
1413    let vocab_diversity = if total > 0 {
1414        unique.len() as f64 / total as f64
1415    } else {
1416        0.0
1417    };
1418
1419    let mean_token_length = if total > 0 {
1420        all_tokens
1421            .iter()
1422            .map(|t| t.original.len() as f64)
1423            .sum::<f64>()
1424            / total as f64
1425    } else {
1426        0.0
1427    };
1428
1429    let perp_tokens: Vec<f64> = all_tokens
1430        .iter()
1431        .filter_map(|t| t.perplexity.map(|p| p as f64))
1432        .collect();
1433    let mean_perplexity = if perp_tokens.is_empty() {
1434        None
1435    } else {
1436        Some(perp_tokens.iter().sum::<f64>() / perp_tokens.len() as f64)
1437    };
1438
1439    let conf_tokens: Vec<f64> = all_tokens
1440        .iter()
1441        .filter_map(|t| t.confidence.map(|c| c as f64))
1442        .collect();
1443    let mean_confidence = if conf_tokens.is_empty() {
1444        None
1445    } else {
1446        Some(conf_tokens.iter().sum::<f64>() / conf_tokens.len() as f64)
1447    };
1448
1449    // Top 10 highest-perplexity original tokens
1450    let mut by_perp: Vec<&TokenEvent> = all_tokens
1451        .iter()
1452        .filter(|t| t.perplexity.is_some())
1453        .collect();
1454    by_perp.sort_by(|a, b| {
1455        b.perplexity
1456            .partial_cmp(&a.perplexity)
1457            .unwrap_or(std::cmp::Ordering::Equal)
1458    });
1459    let top_perplexity_tokens: Vec<String> = by_perp
1460        .iter()
1461        .take(10)
1462        .map(|t| t.original.clone())
1463        .collect();
1464
1465    // Cost estimate: GPT-3.5 rate $0.002 / 1K tokens
1466    let estimated_cost_usd = total as f64 / 1000.0 * 0.002;
1467
1468    let citation = format!(
1469        "Every Other Token v4.0.0 | prompt=\"{}\" | provider={} | model={} | transform={:?} | runs={} | tokens={}",
1470        prompt, provider, model, transform, runs, total
1471    );
1472
1473    Ok(ResearchSession {
1474        prompt: prompt.to_string(),
1475        provider: provider.to_string(),
1476        model,
1477        transform: format!("{:?}", transform),
1478        runs,
1479        total_tokens: total,
1480        total_transformed,
1481        vocabulary_diversity: vocab_diversity,
1482        mean_token_length,
1483        mean_perplexity,
1484        mean_confidence,
1485        top_perplexity_tokens,
1486        estimated_cost_usd,
1487        citation,
1488    })
1489}
1490
1491#[cfg(test)]
1492mod tests {
1493    use super::*;
1494    use tokio::sync::mpsc;
1495
1496    fn make_test_interceptor() -> TokenInterceptor {
1497        TokenInterceptor {
1498            client: Client::new(),
1499            api_key: "test-key".to_string(),
1500            provider: Provider::Openai,
1501            transform: Transform::Reverse,
1502            model: "test-model".to_string(),
1503            token_count: 0,
1504            transformed_count: 0,
1505            visual_mode: false,
1506            heatmap_mode: false,
1507            orchestrator: false,
1508            orchestrator_url: "http://localhost:3000".to_string(),
1509            web_tx: None,
1510            web_provider_label: None,
1511            system_prompt: None,
1512            #[cfg(feature = "self-tune")]
1513            telemetry_bus: None,
1514            #[cfg(feature = "self-modify")]
1515            dedup: None,
1516            rate: 0.5,
1517            rng: StdRng::seed_from_u64(42),
1518            top_logprobs: 5,
1519            recorder: None,
1520            json_stream: false,
1521            pending_delay_ms: 0,
1522            min_confidence: None,
1523            last_token_instant: None,
1524            max_retries: 3,
1525            anthropic_max_tokens: 4096,
1526            stream_start_instant: None,
1527            timeout_secs: None,
1528        }
1529    }
1530
1531    // -- TokenInterceptor construction --
1532
1533    #[test]
1534    fn test_new_openai_requires_api_key() {
1535        std::env::remove_var("OPENAI_API_KEY");
1536        let result = TokenInterceptor::new(
1537            Provider::Openai,
1538            Transform::Reverse,
1539            "gpt-4".to_string(),
1540            false,
1541            false,
1542            false,
1543        );
1544        assert!(result.is_err());
1545    }
1546
1547    #[test]
1548    fn test_new_anthropic_requires_api_key() {
1549        std::env::remove_var("ANTHROPIC_API_KEY");
1550        let result = TokenInterceptor::new(
1551            Provider::Anthropic,
1552            Transform::Reverse,
1553            "claude".to_string(),
1554            false,
1555            false,
1556            false,
1557        );
1558        assert!(result.is_err());
1559    }
1560
1561    #[test]
1562    fn test_interceptor_initial_counts_zero() {
1563        let interceptor = make_test_interceptor();
1564        assert_eq!(interceptor.token_count, 0);
1565        assert_eq!(interceptor.transformed_count, 0);
1566    }
1567
1568    #[test]
1569    fn test_interceptor_fields_match_construction() {
1570        let interceptor = make_test_interceptor();
1571        assert_eq!(interceptor.provider, Provider::Openai);
1572        assert_eq!(interceptor.model, "test-model");
1573        assert!(!interceptor.visual_mode);
1574        assert!(!interceptor.heatmap_mode);
1575        assert!(!interceptor.orchestrator);
1576        assert!(interceptor.web_tx.is_none());
1577    }
1578
1579    // -- process_content tests --
1580
1581    #[test]
1582    fn test_process_content_two_tokens() {
1583        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1584        let mut interceptor = make_test_interceptor();
1585        interceptor.web_tx = Some(tx);
1586
1587        interceptor.process_content("hello world");
1588        assert_eq!(interceptor.token_count, 2);
1589
1590        let mut events = Vec::new();
1591        while let Ok(e) = rx.try_recv() {
1592            events.push(e);
1593        }
1594        assert_eq!(events.len(), 2);
1595        assert_eq!(events[0].text, "hello");
1596        assert_eq!(events[0].original, "hello");
1597        assert!(!events[0].transformed);
1598        assert_eq!(events[0].index, 0);
1599        assert_eq!(events[1].original, "world");
1600        assert!(events[1].transformed);
1601        assert_eq!(events[1].index, 1);
1602    }
1603
1604    #[test]
1605    fn test_process_content_transforms_odd_tokens() {
1606        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1607        let mut interceptor = make_test_interceptor();
1608        interceptor.web_tx = Some(tx);
1609
1610        interceptor.process_content("hello world");
1611
1612        let mut events = Vec::new();
1613        while let Ok(e) = rx.try_recv() {
1614            events.push(e);
1615        }
1616        // "world" reversed = "dlrow"
1617        assert_eq!(events[1].text, "dlrow");
1618        assert_eq!(events[1].original, "world");
1619    }
1620
1621    #[test]
1622    fn test_process_content_empty_string() {
1623        let (tx, _rx) = mpsc::unbounded_channel::<TokenEvent>();
1624        let mut interceptor = make_test_interceptor();
1625        interceptor.web_tx = Some(tx);
1626
1627        interceptor.process_content("");
1628        assert_eq!(interceptor.token_count, 0);
1629        assert_eq!(interceptor.transformed_count, 0);
1630    }
1631
1632    #[test]
1633    fn test_process_content_whitespace_only() {
1634        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1635        let mut interceptor = make_test_interceptor();
1636        interceptor.web_tx = Some(tx);
1637
1638        interceptor.process_content("   ");
1639        let mut events = Vec::new();
1640        while let Ok(e) = rx.try_recv() {
1641            events.push(e);
1642        }
1643        assert!(events.is_empty());
1644    }
1645
1646    #[test]
1647    fn test_process_content_single_token() {
1648        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1649        let mut interceptor = make_test_interceptor();
1650        interceptor.web_tx = Some(tx);
1651
1652        interceptor.process_content("hello");
1653
1654        let mut events = Vec::new();
1655        while let Ok(e) = rx.try_recv() {
1656            events.push(e);
1657        }
1658        assert_eq!(events.len(), 1);
1659        assert_eq!(events[0].text, "hello");
1660        assert!(!events[0].transformed);
1661    }
1662
1663    #[test]
1664    fn test_process_content_cross_call_continuity() {
1665        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1666        let mut interceptor = make_test_interceptor();
1667        interceptor.web_tx = Some(tx);
1668
1669        interceptor.process_content("hello");
1670        interceptor.process_content("world");
1671
1672        let mut events = Vec::new();
1673        while let Ok(e) = rx.try_recv() {
1674            events.push(e);
1675        }
1676        assert_eq!(events.len(), 2);
1677        assert_eq!(events[0].index, 0);
1678        assert_eq!(events[1].index, 1);
1679        assert!(events[1].transformed);
1680    }
1681
1682    #[test]
1683    fn test_process_content_increments_transformed_count() {
1684        let (tx, _rx) = mpsc::unbounded_channel::<TokenEvent>();
1685        let mut interceptor = make_test_interceptor();
1686        interceptor.web_tx = Some(tx);
1687
1688        interceptor.process_content("hello world foo bar");
1689        assert_eq!(interceptor.transformed_count, 2);
1690    }
1691
1692    #[test]
1693    fn test_process_content_six_tokens_three_transformed() {
1694        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1695        let mut interceptor = make_test_interceptor();
1696        interceptor.web_tx = Some(tx);
1697
1698        interceptor.process_content("one two three four five six");
1699
1700        let mut events = Vec::new();
1701        while let Ok(e) = rx.try_recv() {
1702            events.push(e);
1703        }
1704        assert_eq!(events.len(), 6);
1705        let xformed: Vec<_> = events.iter().filter(|e| e.transformed).collect();
1706        assert_eq!(xformed.len(), 3);
1707    }
1708
1709    // -- original field preservation --
1710
1711    #[test]
1712    fn test_original_field_preserved_for_all_tokens() {
1713        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1714        let mut interceptor = make_test_interceptor();
1715        interceptor.web_tx = Some(tx);
1716
1717        interceptor.process_content("the quick brown fox");
1718
1719        let mut events = Vec::new();
1720        while let Ok(e) = rx.try_recv() {
1721            events.push(e);
1722        }
1723
1724        for event in &events {
1725            assert!(!event.original.is_empty());
1726            if event.transformed {
1727                assert_ne!(event.text, event.original);
1728            } else {
1729                assert_eq!(event.text, event.original);
1730            }
1731        }
1732    }
1733
1734    #[test]
1735    fn test_sidebyside_original_is_raw_token() {
1736        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1737        let mut interceptor = make_test_interceptor();
1738        interceptor.web_tx = Some(tx);
1739
1740        interceptor.process_content("quick brown fox");
1741
1742        let mut events = Vec::new();
1743        while let Ok(e) = rx.try_recv() {
1744            events.push(e);
1745        }
1746
1747        let originals: Vec<&str> = events.iter().map(|e| e.original.as_str()).collect();
1748        assert!(originals.contains(&"quick"));
1749        assert!(originals.contains(&"brown"));
1750        assert!(originals.contains(&"fox"));
1751    }
1752
1753    // -- even/odd alternation for graph --
1754
1755    #[test]
1756    fn test_even_odd_alternation() {
1757        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1758        let mut interceptor = make_test_interceptor();
1759        interceptor.web_tx = Some(tx);
1760
1761        interceptor.process_content("a b c d");
1762
1763        let mut events = Vec::new();
1764        while let Ok(e) = rx.try_recv() {
1765            events.push(e);
1766        }
1767
1768        for event in &events {
1769            if event.index % 2 == 0 {
1770                assert!(!event.transformed);
1771            } else {
1772                assert!(event.transformed);
1773            }
1774        }
1775    }
1776
1777    #[test]
1778    fn test_graph_pairs_alternate() {
1779        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1780        let mut interceptor = make_test_interceptor();
1781        interceptor.web_tx = Some(tx);
1782
1783        interceptor.process_content("alpha beta gamma delta epsilon zeta");
1784
1785        let mut events = Vec::new();
1786        while let Ok(e) = rx.try_recv() {
1787            events.push(e);
1788        }
1789
1790        for pair in events.chunks(2) {
1791            assert!(!pair[0].transformed);
1792            if pair.len() > 1 {
1793                assert!(pair[1].transformed);
1794            }
1795        }
1796    }
1797
1798    #[test]
1799    fn test_graph_indices_sequential() {
1800        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1801        let mut interceptor = make_test_interceptor();
1802        interceptor.web_tx = Some(tx);
1803
1804        interceptor.process_content("one two three four");
1805
1806        let mut events = Vec::new();
1807        while let Ok(e) = rx.try_recv() {
1808            events.push(e);
1809        }
1810
1811        for (i, event) in events.iter().enumerate() {
1812            assert_eq!(event.index, i);
1813        }
1814    }
1815
1816    // -- export structure tests --
1817
1818    #[test]
1819    fn test_export_array_sequential_indices() {
1820        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1821        let mut interceptor = make_test_interceptor();
1822        interceptor.web_tx = Some(tx);
1823
1824        interceptor.process_content("the quick brown fox jumps over");
1825
1826        let mut events = Vec::new();
1827        while let Ok(e) = rx.try_recv() {
1828            events.push(e);
1829        }
1830
1831        for (i, event) in events.iter().enumerate() {
1832            assert_eq!(event.index, i);
1833        }
1834    }
1835
1836    #[test]
1837    fn test_export_all_tokens_have_valid_importance() {
1838        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1839        let mut interceptor = make_test_interceptor();
1840        interceptor.web_tx = Some(tx);
1841
1842        interceptor.process_content("testing export importance values");
1843
1844        let mut events = Vec::new();
1845        while let Ok(e) = rx.try_recv() {
1846            events.push(e);
1847        }
1848
1849        for event in &events {
1850            assert!(event.importance >= 0.0 && event.importance <= 1.0);
1851        }
1852    }
1853
1854    #[test]
1855    fn test_export_large_set_serializes() {
1856        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1857        let mut interceptor = make_test_interceptor();
1858        interceptor.web_tx = Some(tx);
1859
1860        interceptor.process_content("the quick brown fox jumps over the lazy dog and runs around");
1861
1862        let mut events = Vec::new();
1863        while let Ok(e) = rx.try_recv() {
1864            events.push(e);
1865        }
1866
1867        let json = serde_json::to_string(&events).expect("serialize");
1868        let parsed: Vec<serde_json::Value> = serde_json::from_str(&json).expect("parse");
1869        assert_eq!(parsed.len(), events.len());
1870        assert!(parsed.len() > 5);
1871    }
1872
1873    #[test]
1874    fn test_multiple_tokens_form_valid_export_array() {
1875        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1876        let mut interceptor = make_test_interceptor();
1877        interceptor.web_tx = Some(tx);
1878
1879        interceptor.process_content("hello world foo bar");
1880
1881        let mut events = Vec::new();
1882        while let Ok(e) = rx.try_recv() {
1883            events.push(e);
1884        }
1885
1886        let json = serde_json::to_string(&events).expect("serialize");
1887        let parsed: Vec<serde_json::Value> = serde_json::from_str(&json).expect("parse");
1888        assert_eq!(parsed.len(), events.len());
1889        for (i, entry) in parsed.iter().enumerate() {
1890            assert_eq!(entry["index"].as_u64().expect("index"), i as u64);
1891        }
1892    }
1893
1894    // -- print header/footer (no crash) --
1895
1896    #[test]
1897    fn test_print_header_all_modes() {
1898        let interceptor = make_test_interceptor();
1899        interceptor.print_header("test prompt");
1900    }
1901
1902    #[test]
1903    fn test_print_header_with_orchestrator() {
1904        let mut interceptor = make_test_interceptor();
1905        interceptor.orchestrator = true;
1906        interceptor.print_header("test");
1907    }
1908
1909    #[test]
1910    fn test_print_header_with_visual_mode() {
1911        let mut interceptor = make_test_interceptor();
1912        interceptor.visual_mode = true;
1913        interceptor.print_header("test");
1914    }
1915
1916    #[test]
1917    fn test_print_header_with_heatmap_mode() {
1918        let mut interceptor = make_test_interceptor();
1919        interceptor.heatmap_mode = true;
1920        interceptor.print_header("test");
1921    }
1922
1923    #[test]
1924    fn test_print_footer() {
1925        let interceptor = make_test_interceptor();
1926        interceptor.print_footer();
1927    }
1928
1929    #[test]
1930    fn test_print_footer_after_processing() {
1931        let mut interceptor = make_test_interceptor();
1932        interceptor.token_count = 42;
1933        interceptor.transformed_count = 21;
1934        interceptor.print_footer();
1935    }
1936
1937    // -- different transform types --
1938
1939    #[test]
1940    fn test_process_content_uppercase_transform() {
1941        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1942        let mut interceptor = make_test_interceptor();
1943        interceptor.transform = Transform::Uppercase;
1944        interceptor.web_tx = Some(tx);
1945
1946        interceptor.process_content("hello world");
1947
1948        let mut events = Vec::new();
1949        while let Ok(e) = rx.try_recv() {
1950            events.push(e);
1951        }
1952        assert_eq!(events[1].text, "WORLD");
1953        assert_eq!(events[1].original, "world");
1954    }
1955
1956    #[test]
1957    fn test_process_content_mock_transform() {
1958        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1959        let mut interceptor = make_test_interceptor();
1960        interceptor.transform = Transform::Mock;
1961        interceptor.web_tx = Some(tx);
1962
1963        interceptor.process_content("hello world");
1964
1965        let mut events = Vec::new();
1966        while let Ok(e) = rx.try_recv() {
1967            events.push(e);
1968        }
1969        assert_eq!(events[1].text, "wOrLd");
1970    }
1971
1972    #[test]
1973    fn test_process_content_noise_transform() {
1974        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1975        let mut interceptor = make_test_interceptor();
1976        interceptor.transform = Transform::Noise;
1977        interceptor.web_tx = Some(tx);
1978
1979        interceptor.process_content("hello world");
1980
1981        let mut events = Vec::new();
1982        while let Ok(e) = rx.try_recv() {
1983            events.push(e);
1984        }
1985        assert!(events[1].text.starts_with("world"));
1986        assert_eq!(events[1].text.len(), 6); // "world" + 1 noise char
1987    }
1988
1989    // -- web_tx none falls back to terminal mode --
1990
1991    #[test]
1992    fn test_process_content_terminal_mode_no_crash() {
1993        let mut interceptor = make_test_interceptor();
1994        // web_tx is None, so this prints to stdout (terminal mode)
1995        interceptor.process_content("hello world");
1996        assert_eq!(interceptor.token_count, 2);
1997    }
1998
1999    #[test]
2000    fn test_process_content_visual_mode_no_crash() {
2001        let mut interceptor = make_test_interceptor();
2002        interceptor.visual_mode = true;
2003        interceptor.process_content("hello world");
2004        assert_eq!(interceptor.token_count, 2);
2005    }
2006
2007    #[test]
2008    fn test_process_content_heatmap_mode_no_crash() {
2009        let mut interceptor = make_test_interceptor();
2010        interceptor.heatmap_mode = true;
2011        interceptor.process_content("hello world");
2012        assert_eq!(interceptor.token_count, 2);
2013    }
2014
2015    // -- chaos_label field tests --
2016
2017    #[test]
2018    fn test_chaos_label_set_for_chaos_transform() {
2019        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2020        let mut interceptor = make_test_interceptor();
2021        interceptor.transform = Transform::Chaos;
2022        interceptor.web_tx = Some(tx);
2023
2024        interceptor.process_content("hello world");
2025
2026        let mut events = Vec::new();
2027        while let Ok(e) = rx.try_recv() {
2028            events.push(e);
2029        }
2030        // "world" is the odd token — should have chaos_label
2031        let known = ["reverse", "uppercase", "mock", "noise"];
2032        let odd = events
2033            .iter()
2034            .find(|e| e.transformed)
2035            .expect("should have odd token");
2036        let label = odd
2037            .chaos_label
2038            .as_ref()
2039            .expect("chaos_label should be Some for Chaos transform");
2040        assert!(
2041            known.contains(&label.as_str()),
2042            "unexpected label: {}",
2043            label
2044        );
2045    }
2046
2047    #[test]
2048    fn test_chaos_label_none_for_reverse_transform() {
2049        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2050        let mut interceptor = make_test_interceptor();
2051        interceptor.transform = Transform::Reverse;
2052        interceptor.web_tx = Some(tx);
2053
2054        interceptor.process_content("hello world");
2055
2056        let mut events = Vec::new();
2057        while let Ok(e) = rx.try_recv() {
2058            events.push(e);
2059        }
2060        for event in &events {
2061            assert!(
2062                event.chaos_label.is_none(),
2063                "Reverse should not set chaos_label"
2064            );
2065        }
2066    }
2067
2068    #[test]
2069    fn test_chaos_label_none_for_even_tokens() {
2070        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2071        let mut interceptor = make_test_interceptor();
2072        interceptor.transform = Transform::Chaos;
2073        interceptor.web_tx = Some(tx);
2074
2075        interceptor.process_content("hello world foo bar");
2076
2077        let mut events = Vec::new();
2078        while let Ok(e) = rx.try_recv() {
2079            events.push(e);
2080        }
2081        for event in events.iter().filter(|e| !e.transformed) {
2082            assert!(
2083                event.chaos_label.is_none(),
2084                "Even tokens should not have chaos_label"
2085            );
2086        }
2087    }
2088
2089    #[test]
2090    fn test_chaos_label_serialization() {
2091        let event = TokenEvent {
2092            text: "dlrow".to_string(),
2093            original: "world".to_string(),
2094            index: 1,
2095            transformed: true,
2096            importance: 0.5,
2097            chaos_label: Some("reverse".to_string()),
2098            provider: None,
2099            confidence: None,
2100            perplexity: None,
2101            alternatives: vec![],
2102            is_error: false,
2103            arrival_ms: None,
2104        };
2105        let json = serde_json::to_string(&event).expect("serialize");
2106        assert!(json.contains("chaos_label"));
2107        assert!(json.contains("reverse"));
2108    }
2109
2110    #[test]
2111    fn test_chaos_label_skipped_when_none() {
2112        let event = TokenEvent {
2113            text: "hello".to_string(),
2114            original: "hello".to_string(),
2115            index: 0,
2116            transformed: false,
2117            importance: 0.3,
2118            chaos_label: None,
2119            provider: None,
2120            confidence: None,
2121            perplexity: None,
2122            alternatives: vec![],
2123            is_error: false,
2124            arrival_ms: None,
2125        };
2126        let json = serde_json::to_string(&event).expect("serialize");
2127        assert!(
2128            !json.contains("chaos_label"),
2129            "None chaos_label should be skipped in JSON"
2130        );
2131    }
2132
2133    // -- provider field tests --
2134
2135    #[test]
2136    fn test_provider_label_none_by_default() {
2137        let interceptor = make_test_interceptor();
2138        assert!(interceptor.web_provider_label.is_none());
2139    }
2140
2141    #[test]
2142    fn test_provider_label_propagates_to_event() {
2143        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2144        let mut interceptor = make_test_interceptor();
2145        interceptor.web_tx = Some(tx);
2146        interceptor.web_provider_label = Some("openai".to_string());
2147
2148        interceptor.process_content("hello world");
2149
2150        let mut events = Vec::new();
2151        while let Ok(e) = rx.try_recv() {
2152            events.push(e);
2153        }
2154        for event in &events {
2155            assert_eq!(
2156                event.provider.as_deref(),
2157                Some("openai"),
2158                "provider label should propagate to all events"
2159            );
2160        }
2161    }
2162
2163    #[test]
2164    fn test_provider_label_none_means_skipped_in_json() {
2165        let event = TokenEvent {
2166            text: "hello".to_string(),
2167            original: "hello".to_string(),
2168            index: 0,
2169            transformed: false,
2170            importance: 0.5,
2171            chaos_label: None,
2172            provider: None,
2173            confidence: None,
2174            perplexity: None,
2175            alternatives: vec![],
2176            is_error: false,
2177            arrival_ms: None,
2178        };
2179        let json = serde_json::to_string(&event).expect("serialize");
2180        assert!(
2181            !json.contains("\"provider\""),
2182            "None provider should be skipped in JSON"
2183        );
2184    }
2185
2186    #[test]
2187    fn test_provider_label_some_appears_in_json() {
2188        let event = TokenEvent {
2189            text: "hello".to_string(),
2190            original: "hello".to_string(),
2191            index: 0,
2192            transformed: false,
2193            importance: 0.5,
2194            chaos_label: None,
2195            provider: Some("anthropic".to_string()),
2196            confidence: None,
2197            perplexity: None,
2198            alternatives: vec![],
2199            is_error: false,
2200            arrival_ms: None,
2201        };
2202        let json = serde_json::to_string(&event).expect("serialize");
2203        assert!(json.contains("\"provider\""));
2204        assert!(json.contains("anthropic"));
2205    }
2206
2207    #[test]
2208    fn test_provider_label_openai_and_anthropic_distinct() {
2209        let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2210        let mut openai_i = make_test_interceptor();
2211        openai_i.web_tx = Some(tx1);
2212        openai_i.web_provider_label = Some("openai".to_string());
2213
2214        let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2215        let mut anthropic_i = make_test_interceptor();
2216        anthropic_i.web_tx = Some(tx2);
2217        anthropic_i.web_provider_label = Some("anthropic".to_string());
2218
2219        openai_i.process_content("hello");
2220        anthropic_i.process_content("hello");
2221
2222        let e1 = rx1.try_recv().expect("openai event");
2223        let e2 = rx2.try_recv().expect("anthropic event");
2224        assert_eq!(e1.provider.as_deref(), Some("openai"));
2225        assert_eq!(e2.provider.as_deref(), Some("anthropic"));
2226        assert_ne!(e1.provider, e2.provider);
2227    }
2228
2229    // -- TokenAlternative tests --
2230
2231    #[test]
2232    fn test_token_alternative_serializes() {
2233        let alt = TokenAlternative {
2234            token: "hello".to_string(),
2235            probability: 0.75,
2236        };
2237        let json = serde_json::to_string(&alt).expect("serialize");
2238        assert!(json.contains("\"token\":\"hello\""));
2239        assert!(json.contains("\"probability\":0.75") || json.contains("probability"));
2240    }
2241
2242    #[test]
2243    fn test_token_alternative_clone() {
2244        let alt = TokenAlternative {
2245            token: "world".to_string(),
2246            probability: 0.5,
2247        };
2248        let alt2 = alt.clone();
2249        assert_eq!(alt2.token, alt.token);
2250    }
2251
2252    // -- process_content_logprob tests --
2253
2254    #[test]
2255    fn test_process_content_logprob_attaches_confidence() {
2256        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2257        let mut i = make_test_interceptor();
2258        i.web_tx = Some(tx);
2259        // logprob of 0.0 → probability = 1.0 (max confidence)
2260        i.process_content_logprob("hello world", Some(0.0_f32), vec![]);
2261        let ev = rx.try_recv().expect("event");
2262        assert_eq!(ev.confidence, Some(1.0_f32));
2263    }
2264
2265    #[test]
2266    fn test_process_content_logprob_none_gives_none_confidence() {
2267        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2268        let mut i = make_test_interceptor();
2269        i.web_tx = Some(tx);
2270        i.process_content_logprob("hello", None, vec![]);
2271        let ev = rx.try_recv().expect("event");
2272        assert!(ev.confidence.is_none());
2273        assert!(ev.perplexity.is_none());
2274    }
2275
2276    #[test]
2277    fn test_process_content_logprob_computes_perplexity() {
2278        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2279        let mut i = make_test_interceptor();
2280        i.web_tx = Some(tx);
2281        // logprob = -1.0 → perplexity = exp(1.0) ≈ 2.718
2282        i.process_content_logprob("word", Some(-1.0_f32), vec![]);
2283        let ev = rx.try_recv().expect("event");
2284        let perp = ev.perplexity.expect("perplexity present");
2285        assert!(
2286            (perp - std::f32::consts::E).abs() < 0.01,
2287            "expected ~e, got {}",
2288            perp
2289        );
2290    }
2291
2292    #[test]
2293    fn test_process_content_logprob_attaches_alternatives_to_first_token() {
2294        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2295        let mut i = make_test_interceptor();
2296        i.web_tx = Some(tx);
2297        let alts = vec![
2298            TokenAlternative {
2299                token: "hi".to_string(),
2300                probability: 0.9,
2301            },
2302            TokenAlternative {
2303                token: "hey".to_string(),
2304                probability: 0.05,
2305            },
2306        ];
2307        i.process_content_logprob("hello world", Some(-0.1_f32), alts);
2308        let first = rx.try_recv().expect("first token");
2309        assert_eq!(first.alternatives.len(), 2);
2310        // second token gets no alternatives
2311        let second = rx.try_recv().expect("second token");
2312        assert!(second.alternatives.is_empty());
2313    }
2314
2315    #[test]
2316    fn test_process_content_delegates_to_logprob() {
2317        // process_content is a thin wrapper around process_content_logprob
2318        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2319        let mut i = make_test_interceptor();
2320        i.web_tx = Some(tx);
2321        i.process_content("hello");
2322        let ev = rx.try_recv().expect("event");
2323        assert!(ev.confidence.is_none());
2324        assert!(ev.alternatives.is_empty());
2325    }
2326
2327    #[test]
2328    fn test_confidence_serialized_when_some() {
2329        let event = TokenEvent {
2330            text: "hi".to_string(),
2331            original: "hi".to_string(),
2332            index: 0,
2333            transformed: false,
2334            importance: 0.5,
2335            chaos_label: None,
2336            provider: None,
2337            confidence: Some(0.92),
2338            perplexity: Some(1.08),
2339            alternatives: vec![TokenAlternative {
2340                token: "hey".to_string(),
2341                probability: 0.05,
2342            }],
2343            is_error: false,
2344            arrival_ms: None,
2345        };
2346        let json = serde_json::to_string(&event).expect("serialize");
2347        assert!(json.contains("confidence"));
2348        assert!(json.contains("perplexity"));
2349        assert!(json.contains("alternatives"));
2350        assert!(json.contains("hey"));
2351    }
2352
2353    #[test]
2354    fn test_confidence_omitted_when_none() {
2355        let event = TokenEvent {
2356            text: "hi".to_string(),
2357            original: "hi".to_string(),
2358            index: 0,
2359            transformed: false,
2360            importance: 0.5,
2361            chaos_label: None,
2362            provider: None,
2363            confidence: None,
2364            perplexity: None,
2365            alternatives: vec![],
2366            is_error: false,
2367            arrival_ms: None,
2368        };
2369        let json = serde_json::to_string(&event).expect("serialize");
2370        assert!(!json.contains("confidence"));
2371        assert!(!json.contains("perplexity"));
2372        assert!(!json.contains("alternatives"));
2373    }
2374
2375    #[test]
2376    fn test_system_prompt_field_initializes_none() {
2377        let i = make_test_interceptor();
2378        assert!(i.system_prompt.is_none());
2379    }
2380
2381    #[test]
2382    fn test_system_prompt_can_be_set() {
2383        let mut i = make_test_interceptor();
2384        i.system_prompt = Some("Be concise.".to_string());
2385        assert_eq!(i.system_prompt.as_deref(), Some("Be concise."));
2386    }
2387
2388    #[test]
2389    fn test_logprob_confidence_clamps_at_one() {
2390        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2391        let mut i = make_test_interceptor();
2392        i.web_tx = Some(tx);
2393        // logprob > 0 is theoretically invalid but clamp should protect us
2394        i.process_content_logprob("token", Some(2.0_f32), vec![]);
2395        let ev = rx.try_recv().expect("event");
2396        let conf = ev.confidence.expect("confidence");
2397        assert!(conf <= 1.0, "confidence should not exceed 1.0");
2398    }
2399
2400    #[test]
2401    fn test_process_content_logprob_multiple_tokens_only_first_gets_logprob() {
2402        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2403        let mut i = make_test_interceptor();
2404        i.web_tx = Some(tx);
2405        i.process_content_logprob("the quick brown fox", Some(-0.5_f32), vec![]);
2406        let mut events: Vec<TokenEvent> = Vec::new();
2407        while let Ok(ev) = rx.try_recv() {
2408            events.push(ev);
2409        }
2410        assert!(events.len() >= 2);
2411        assert!(
2412            events[0].confidence.is_some(),
2413            "first token should have confidence"
2414        );
2415        assert!(
2416            events[1].confidence.is_none(),
2417            "subsequent tokens should not"
2418        );
2419    }
2420}
2421
2422#[cfg(test)]
2423mod research_tests {
2424    use super::*;
2425
2426    fn make_session(
2427        tokens: usize,
2428        confidence: Option<f32>,
2429        perplexity: Option<f32>,
2430    ) -> ResearchSession {
2431        ResearchSession {
2432            prompt: "test prompt".to_string(),
2433            provider: "openai".to_string(),
2434            model: "gpt-3.5-turbo".to_string(),
2435            transform: "Reverse".to_string(),
2436            runs: 1,
2437            total_tokens: tokens,
2438            total_transformed: tokens / 2,
2439            vocabulary_diversity: 0.8,
2440            mean_token_length: 4.5,
2441            mean_perplexity: perplexity.map(|p| p as f64),
2442            mean_confidence: confidence.map(|c| c as f64),
2443            top_perplexity_tokens: vec!["word".to_string()],
2444            estimated_cost_usd: tokens as f64 / 1000.0 * 0.002,
2445            citation: format!("Every Other Token v4.0.0 | tokens={}", tokens),
2446        }
2447    }
2448
2449    /// Construct a minimal [`TokenInterceptor`] suitable for unit tests.
2450    ///
2451    /// Uses a fixed RNG seed, a no-op web channel, and the `Reverse` transform.
2452    fn make_test_interceptor() -> TokenInterceptor {
2453        TokenInterceptor {
2454            client: reqwest::Client::new(),
2455            api_key: "test-key".to_string(),
2456            provider: Provider::Openai,
2457            transform: Transform::Reverse,
2458            model: "test-model".to_string(),
2459            token_count: 0,
2460            transformed_count: 0,
2461            visual_mode: false,
2462            heatmap_mode: false,
2463            orchestrator: false,
2464            orchestrator_url: "http://localhost:3000".to_string(),
2465            web_tx: None,
2466            web_provider_label: None,
2467            system_prompt: None,
2468            #[cfg(feature = "self-tune")]
2469            telemetry_bus: None,
2470            #[cfg(feature = "self-modify")]
2471            dedup: None,
2472            rate: 0.5,
2473            rng: StdRng::seed_from_u64(42),
2474            top_logprobs: 5,
2475            recorder: None,
2476            json_stream: false,
2477            pending_delay_ms: 0,
2478            min_confidence: None,
2479            last_token_instant: None,
2480            max_retries: 3,
2481            anthropic_max_tokens: 4096,
2482            stream_start_instant: None,
2483            timeout_secs: None,
2484        }
2485    }
2486
2487    #[test]
2488    fn test_research_session_serializes_basic_fields() {
2489        let s = make_session(10, Some(0.85), Some(2.3));
2490        let json = serde_json::to_string(&s).expect("serialize");
2491        let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2492        assert_eq!(v["prompt"], "test prompt");
2493        assert_eq!(v["total_tokens"], 10);
2494        assert_eq!(v["runs"], 1);
2495        assert_eq!(v["provider"], "openai");
2496    }
2497
2498    #[test]
2499    fn test_research_session_none_fields_serialize_as_null() {
2500        let s = make_session(5, None, None);
2501        let json = serde_json::to_string(&s).expect("serialize");
2502        let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2503        assert!(v["mean_perplexity"].is_null());
2504        assert!(v["mean_confidence"].is_null());
2505    }
2506
2507    #[test]
2508    fn test_research_session_estimated_cost_scales_with_tokens() {
2509        let s100 = make_session(100, None, None);
2510        let s1000 = make_session(1000, None, None);
2511        assert!(s1000.estimated_cost_usd > s100.estimated_cost_usd);
2512        assert!((s100.estimated_cost_usd - 0.0002).abs() < 1e-10);
2513        assert!((s1000.estimated_cost_usd - 0.002).abs() < 1e-10);
2514    }
2515
2516    #[test]
2517    fn test_research_session_vocab_diversity_in_bounds() {
2518        let s = make_session(20, None, None);
2519        assert!(s.vocabulary_diversity >= 0.0 && s.vocabulary_diversity <= 1.0);
2520    }
2521
2522    #[test]
2523    fn test_research_session_top_tokens_at_most_ten() {
2524        let s = ResearchSession {
2525            top_perplexity_tokens: (0..10).map(|i| format!("t{}", i)).collect(),
2526            ..make_session(100, None, None)
2527        };
2528        assert_eq!(s.top_perplexity_tokens.len(), 10);
2529    }
2530
2531    #[test]
2532    fn test_research_session_citation_contains_prompt() {
2533        let s = make_session(5, None, None);
2534        assert!(s.citation.contains("Every Other Token"));
2535    }
2536
2537    #[test]
2538    fn test_research_session_runs_field_roundtrips() {
2539        let s = ResearchSession {
2540            runs: 42,
2541            ..make_session(10, None, None)
2542        };
2543        let json = serde_json::to_string(&s).expect("serialize");
2544        let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2545        assert_eq!(v["runs"], 42);
2546    }
2547
2548    #[test]
2549    fn test_research_session_transform_field() {
2550        let s = make_session(10, None, None);
2551        assert_eq!(s.transform, "Reverse");
2552    }
2553
2554    // -- with_rate tests --
2555
2556    #[test]
2557    fn test_with_rate_sets_rate() {
2558        let mut i = make_test_interceptor();
2559        i = i.with_rate(0.3);
2560        assert!((i.rate - 0.3).abs() < 1e-9);
2561    }
2562
2563    #[test]
2564    fn test_with_rate_clamps_above_one() {
2565        let mut i = make_test_interceptor();
2566        i = i.with_rate(1.5);
2567        assert_eq!(i.rate, 1.0);
2568    }
2569
2570    #[test]
2571    fn test_with_rate_clamps_below_zero() {
2572        let mut i = make_test_interceptor();
2573        i = i.with_rate(-0.5);
2574        assert_eq!(i.rate, 0.0);
2575    }
2576
2577    #[test]
2578    fn test_with_rate_zero_transforms_no_tokens() {
2579        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2580        let mut i = make_test_interceptor();
2581        i = i.with_rate(0.0);
2582        i.web_tx = Some(tx);
2583        i.process_content("hello world foo bar");
2584        let mut transformed = 0usize;
2585        while let Ok(ev) = rx.try_recv() {
2586            if ev.transformed {
2587                transformed += 1;
2588            }
2589        }
2590        assert_eq!(transformed, 0, "rate=0 should transform no tokens");
2591    }
2592
2593    #[test]
2594    fn test_with_rate_one_transforms_all_tokens() {
2595        let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2596        let mut i = make_test_interceptor();
2597        i = i.with_rate(1.0);
2598        i.web_tx = Some(tx);
2599        i.process_content("hello world foo bar baz");
2600        let mut total = 0usize;
2601        let mut transformed = 0usize;
2602        while let Ok(ev) = rx.try_recv() {
2603            total += 1;
2604            if ev.transformed {
2605                transformed += 1;
2606            }
2607        }
2608        assert!(total > 0);
2609        assert_eq!(transformed, total, "rate=1.0 should transform every token");
2610    }
2611
2612    // -- with_seed tests --
2613
2614    #[test]
2615    fn test_with_seed_produces_deterministic_noise_output() {
2616        // Two interceptors with the same seed and Noise transform should produce
2617        // the same transformed tokens.
2618        let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2619        let mut i1 = make_test_interceptor();
2620        i1.transform = Transform::Noise;
2621        i1 = i1.with_seed(12345);
2622        i1.web_tx = Some(tx1);
2623        i1.process_content("hello world");
2624
2625        let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2626        let mut i2 = make_test_interceptor();
2627        i2.transform = Transform::Noise;
2628        i2 = i2.with_seed(12345);
2629        i2.web_tx = Some(tx2);
2630        i2.process_content("hello world");
2631
2632        let events1: Vec<TokenEvent> = std::iter::from_fn(|| rx1.try_recv().ok()).collect();
2633        let events2: Vec<TokenEvent> = std::iter::from_fn(|| rx2.try_recv().ok()).collect();
2634
2635        assert_eq!(events1.len(), events2.len());
2636        for (e1, e2) in events1.iter().zip(events2.iter()) {
2637            assert_eq!(
2638                e1.text, e2.text,
2639                "seeded runs should produce identical output"
2640            );
2641        }
2642    }
2643
2644    #[test]
2645    fn test_with_seed_different_seeds_may_differ() {
2646        // Different seeds should (in practice) produce at least one different token
2647        // for the Noise transform over a sufficiently long sequence.
2648        let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2649        let mut i1 = make_test_interceptor();
2650        i1.transform = Transform::Noise;
2651        i1 = i1.with_seed(1);
2652        i1.web_tx = Some(tx1);
2653        i1.process_content("alpha beta gamma delta epsilon zeta eta theta iota kappa");
2654
2655        let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2656        let mut i2 = make_test_interceptor();
2657        i2.transform = Transform::Noise;
2658        i2 = i2.with_seed(999999);
2659        i2.web_tx = Some(tx2);
2660        i2.process_content("alpha beta gamma delta epsilon zeta eta theta iota kappa");
2661
2662        let texts1: Vec<String> = std::iter::from_fn(|| rx1.try_recv().ok())
2663            .map(|e| e.text)
2664            .collect();
2665        let texts2: Vec<String> = std::iter::from_fn(|| rx2.try_recv().ok())
2666            .map(|e| e.text)
2667            .collect();
2668
2669        // At minimum, both should produce some output
2670        assert!(!texts1.is_empty());
2671        assert!(!texts2.is_empty());
2672    }
2673
2674    // -- run_research_headless tests (Mock provider, no API key required) --
2675
2676    #[tokio::test]
2677    async fn test_run_research_headless_mock_returns_session() {
2678        let session = run_research_headless(
2679            "test prompt",
2680            Provider::Mock,
2681            Transform::Reverse,
2682            "mock-fixture-v1".to_string(),
2683            1,
2684        )
2685        .await
2686        .expect("run_research_headless with Mock should not fail");
2687        assert_eq!(session.runs, 1);
2688        assert_eq!(session.prompt, "test prompt");
2689        assert_eq!(session.provider, "mock");
2690    }
2691
2692    #[tokio::test]
2693    async fn test_run_research_headless_mock_token_count_positive() {
2694        let session = run_research_headless(
2695            "hello",
2696            Provider::Mock,
2697            Transform::Uppercase,
2698            "mock-fixture-v1".to_string(),
2699            1,
2700        )
2701        .await
2702        .expect("should succeed");
2703        assert!(session.total_tokens > 0, "mock provider should emit tokens");
2704    }
2705
2706    #[tokio::test]
2707    async fn test_run_research_headless_mock_multiple_runs_accumulate() {
2708        let session = run_research_headless(
2709            "hello",
2710            Provider::Mock,
2711            Transform::Reverse,
2712            "mock-fixture-v1".to_string(),
2713            3,
2714        )
2715        .await
2716        .expect("should succeed");
2717        assert_eq!(session.runs, 3);
2718    }
2719
2720    #[tokio::test]
2721    async fn test_run_research_headless_mock_vocab_diversity_in_bounds() {
2722        let session = run_research_headless(
2723            "test",
2724            Provider::Mock,
2725            Transform::Reverse,
2726            "mock-fixture-v1".to_string(),
2727            1,
2728        )
2729        .await
2730        .expect("should succeed");
2731        assert!(session.vocabulary_diversity >= 0.0);
2732        assert!(session.vocabulary_diversity <= 1.0);
2733    }
2734
2735    #[tokio::test]
2736    async fn test_run_research_headless_mock_transform_label_in_citation() {
2737        let session = run_research_headless(
2738            "sample",
2739            Provider::Mock,
2740            Transform::Uppercase,
2741            "mock-fixture-v1".to_string(),
2742            1,
2743        )
2744        .await
2745        .expect("should succeed");
2746        assert!(session.citation.contains("Every Other Token"));
2747    }
2748
2749    #[tokio::test]
2750    async fn test_run_research_headless_empty_prompt_returns_error() {
2751        let result = run_research_headless(
2752            "",
2753            Provider::Mock,
2754            Transform::Reverse,
2755            "mock-fixture-v1".to_string(),
2756            1,
2757        )
2758        .await;
2759        assert!(result.is_err(), "empty prompt should produce an error");
2760    }
2761
2762    // -- Item 1: timeout_secs field --
2763    #[test]
2764    fn test_timeout_field_default() {
2765        let interceptor = TokenInterceptor::new(
2766            Provider::Mock,
2767            Transform::Reverse,
2768            "mock-fixture-v1".to_string(),
2769            false,
2770            false,
2771            false,
2772        )
2773        .unwrap();
2774        assert_eq!(interceptor.timeout_secs, None);
2775        let with_timeout = interceptor.with_timeout(120);
2776        assert_eq!(with_timeout.timeout_secs, Some(120));
2777    }
2778
2779    // -- Item 2: dropped SSE chunk counter --
2780    fn count_dropped_sse_chunks_test(lines: &[&str]) -> usize {
2781        lines.iter().filter(|line| {
2782            if line.starts_with("data: ") && **line != "data: [DONE]" {
2783                let json_str = line.strip_prefix("data: ").unwrap_or(line);
2784                serde_json::from_str::<serde_json::Value>(json_str).is_err()
2785            } else {
2786                false
2787            }
2788        }).count()
2789    }
2790
2791    #[test]
2792    fn test_dropped_chunk_counter_increments() {
2793        let lines = vec![
2794            "data: {\"valid\": true}",
2795            "data: not-valid-json",
2796            "data: also-bad",
2797            "data: {\"ok\": 1}",
2798            "data: [DONE]",
2799        ];
2800        let dropped = count_dropped_sse_chunks_test(&lines);
2801        assert_eq!(dropped, 2);
2802    }
2803
2804    // -- Item 3 & 19: circuit breaker helpers --
2805    fn reset_circuit_breaker_for_test() {
2806        let state = CIRCUIT_BREAKER.get_or_init(|| {
2807            std::sync::Mutex::new(CircuitBreakerState {
2808                consecutive_failures: 0,
2809                open_until_ms: 0,
2810            })
2811        });
2812        if let Ok(mut s) = state.lock() {
2813            s.consecutive_failures = 0;
2814            s.open_until_ms = 0;
2815        }
2816    }
2817
2818    #[test]
2819    fn test_circuit_breaker_429_does_not_trip() {
2820        reset_circuit_breaker_for_test();
2821        // Record failures up to threshold-1 — still not tripped
2822        for _ in 0..(CB_TRIP_THRESHOLD - 1) {
2823            circuit_record_failure();
2824        }
2825        assert!(!circuit_is_open(), "should not be open before threshold");
2826        // Simulating a 429: the retry logic skips circuit_record_failure for 429,
2827        // so no additional failure is recorded — breaker remains closed.
2828        assert!(!circuit_is_open(), "429 should not trip the breaker");
2829    }
2830
2831    #[test]
2832    fn test_circuit_breaker_reopens_after_timeout() {
2833        reset_circuit_breaker_for_test();
2834        for _ in 0..CB_TRIP_THRESHOLD {
2835            circuit_record_failure();
2836        }
2837        assert!(circuit_is_open(), "breaker should be open after threshold");
2838        // Fast-forward recovery by setting open_until_ms to the past
2839        let state = CIRCUIT_BREAKER.get_or_init(|| {
2840            std::sync::Mutex::new(CircuitBreakerState {
2841                consecutive_failures: 0,
2842                open_until_ms: 0,
2843            })
2844        });
2845        if let Ok(mut s) = state.lock() {
2846            s.open_until_ms = 1; // epoch 1ms — definitely in the past
2847        }
2848        assert!(!circuit_is_open(), "breaker should close after recovery timeout passes");
2849    }
2850}
2851
2852// ---------------------------------------------------------------------------
2853// WASM stub
2854// ---------------------------------------------------------------------------
2855
2856#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2857mod wasm_support {
2858    use wasm_bindgen::prelude::*;
2859
2860    #[wasm_bindgen]
2861    pub fn wasm_run() -> JsValue {
2862        JsValue::from_str("wasm not yet fully implemented")
2863    }
2864}
2865
2866#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2867pub use wasm_support::wasm_run;