1pub mod cli;
34pub mod collab;
35pub mod config;
36pub mod error;
37pub mod heatmap;
38pub mod providers;
39pub mod render;
40pub mod replay;
41pub mod research;
42pub mod store;
43pub mod transforms;
44pub mod web;
45
46#[cfg(feature = "self-tune")]
47pub mod self_tune;
48
49#[cfg(feature = "self-modify")]
50pub mod self_modify;
51
52#[cfg(feature = "self-modify")]
53pub mod semantic_dedup;
54
55#[cfg(feature = "helix-bridge")]
56pub mod helix_bridge;
57
58#[cfg(feature = "sqlite-log")]
59pub mod experiment_log;
60
61#[cfg(feature = "intelligence")]
62pub mod intelligence {
63 #[cfg(test)]
67 mod tests {
68 #[test]
69 fn stub_compiles() {}
70 }
71}
72
73#[cfg(feature = "evolution")]
74pub mod evolution {
75 #[cfg(test)]
79 mod tests {
80 #[test]
81 fn stub_compiles() {}
82 }
83}
84
85use colored::*;
86use rand::rngs::StdRng;
87use rand::SeedableRng;
88use reqwest::Client;
89use serde::{Deserialize, Serialize};
90use std::env;
91use std::io::{self, Write};
92use tokio::sync::mpsc;
93use tokio_stream::StreamExt;
94
95use providers::*;
96use transforms::{apply_heatmap_color, calculate_token_importance, tokenize, Transform};
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct TokenAlternative {
109 pub token: String,
111 pub probability: f32,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct TokenEvent {
127 pub text: String,
129 pub original: String,
131 pub index: usize,
133 pub transformed: bool,
135 pub importance: f64,
138 #[serde(skip_serializing_if = "Option::is_none")]
140 pub chaos_label: Option<String>,
141 #[serde(skip_serializing_if = "Option::is_none")]
143 pub provider: Option<String>,
144 #[serde(skip_serializing_if = "Option::is_none")]
146 pub confidence: Option<f32>,
147 #[serde(skip_serializing_if = "Option::is_none")]
149 pub perplexity: Option<f32>,
150 #[serde(default, skip_serializing_if = "Vec::is_empty")]
152 pub alternatives: Vec<TokenAlternative>,
153 #[serde(default)]
155 pub is_error: bool,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub arrival_ms: Option<u64>,
159}
160
161pub struct TokenInterceptor {
179 client: Client,
180 api_key: String,
181 pub provider: Provider,
182 pub transform: Transform,
183 pub model: String,
184 pub token_count: usize,
185 pub transformed_count: usize,
186 pub visual_mode: bool,
187 pub heatmap_mode: bool,
188 pub orchestrator: bool,
189 pub orchestrator_url: String,
190 pub web_tx: Option<mpsc::UnboundedSender<TokenEvent>>,
192 pub web_provider_label: Option<String>,
194 pub system_prompt: Option<String>,
196 #[cfg(feature = "self-tune")]
198 pub telemetry_bus: Option<std::sync::Arc<crate::self_tune::telemetry_bus::TelemetryBus>>,
199 #[cfg(feature = "self-modify")]
209 pub dedup: Option<std::sync::Arc<std::sync::Mutex<crate::semantic_dedup::SemanticDedup>>>,
210 pub rate: f64,
213 pub top_logprobs: u8,
215 rng: StdRng,
218 pub recorder: Option<crate::replay::Recorder>,
220 pub json_stream: bool,
222 pending_delay_ms: u64,
224 pub min_confidence: Option<f64>,
227 last_token_instant: Option<std::time::Instant>,
229 pub max_retries: u32,
231 pub anthropic_max_tokens: u32,
233 stream_start_instant: Option<std::time::Instant>,
235 pub timeout_secs: Option<u64>,
238}
239
240static CIRCUIT_BREAKER: std::sync::OnceLock<
253 std::sync::Mutex<CircuitBreakerState>,
254> = std::sync::OnceLock::new();
255
256struct CircuitBreakerState {
257 consecutive_failures: u32,
258 open_until_ms: u64,
259}
260
261const CB_TRIP_THRESHOLD: u32 = 5;
263const CB_RECOVERY_MS: u64 = 30_000;
265
266fn now_unix_ms() -> u64 {
267 std::time::SystemTime::now()
268 .duration_since(std::time::UNIX_EPOCH)
269 .map(|d| d.as_millis() as u64)
270 .unwrap_or(0)
271}
272
273fn circuit_is_open() -> bool {
276 let state = CIRCUIT_BREAKER.get_or_init(|| {
277 std::sync::Mutex::new(CircuitBreakerState {
278 consecutive_failures: 0,
279 open_until_ms: 0,
280 })
281 });
282 if let Ok(s) = state.lock() {
283 s.open_until_ms > now_unix_ms()
284 } else {
285 false
286 }
287}
288
289fn circuit_record_success() {
290 let state = CIRCUIT_BREAKER.get_or_init(|| {
291 std::sync::Mutex::new(CircuitBreakerState {
292 consecutive_failures: 0,
293 open_until_ms: 0,
294 })
295 });
296 if let Ok(mut s) = state.lock() {
297 s.consecutive_failures = 0;
298 s.open_until_ms = 0;
299 }
300}
301
302fn circuit_record_failure() {
303 let state = CIRCUIT_BREAKER.get_or_init(|| {
304 std::sync::Mutex::new(CircuitBreakerState {
305 consecutive_failures: 0,
306 open_until_ms: 0,
307 })
308 });
309 if let Ok(mut s) = state.lock() {
310 s.consecutive_failures += 1;
311 if s.consecutive_failures >= CB_TRIP_THRESHOLD {
312 s.open_until_ms = now_unix_ms() + CB_RECOVERY_MS;
313 tracing::warn!(
314 consecutive_failures = s.consecutive_failures,
315 recovery_ms = CB_RECOVERY_MS,
316 "circuit breaker tripped — blocking requests for recovery period"
317 );
318 }
319 }
320}
321
322async fn execute_with_retry(
331 client: &reqwest::Client,
332 req: reqwest::Request,
333 max_attempts: u32,
334) -> Result<reqwest::Response, Box<dyn std::error::Error + Send + Sync>> {
335 if circuit_is_open() {
336 return Err("circuit breaker open — provider unavailable, try again shortly".into());
337 }
338
339 let mut last_err: Option<String> = None;
340 for attempt in 0..max_attempts {
341 if attempt > 0 {
342 let delay_ms = 400u64 * (1u64 << attempt.min(4));
343 tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
344 tracing::warn!(attempt, "retrying API request after transient error");
345 }
346 let to_send = match req.try_clone() {
347 Some(r) => r,
348 None => {
349 return client.execute(req).await.map_err(|e| e.into());
351 }
352 };
353 match client.execute(to_send).await {
354 Ok(resp) => {
355 let status = resp.status().as_u16();
356 if attempt + 1 < max_attempts
357 && (status == 429 || status == 500 || status == 502 || status == 503)
358 {
359 tracing::warn!(status, attempt, "got retryable HTTP status");
360 last_err = Some(format!("HTTP {status}"));
361 if status != 429 {
364 circuit_record_failure();
365 }
366 continue;
367 }
368 circuit_record_success();
369 return Ok(resp);
370 }
371 Err(e) => {
372 circuit_record_failure();
373 if attempt + 1 < max_attempts {
374 tracing::warn!(error = %e, attempt, "network error, will retry");
375 last_err = Some(e.to_string());
376 } else {
377 return Err(Box::new(e));
378 }
379 }
380 }
381 }
382 Err(last_err
383 .unwrap_or_else(|| "max retries exceeded".to_string())
384 .into())
385}
386
387impl TokenInterceptor {
388 pub fn new(
396 provider: Provider,
397 transform: Transform,
398 model: String,
399 visual_mode: bool,
400 heatmap_mode: bool,
401 orchestrator: bool,
402 ) -> Result<Self, Box<dyn std::error::Error>> {
403 let api_key = match provider {
404 Provider::Openai => {
405 let key = env::var("OPENAI_API_KEY")
406 .map_err(|_| "OPENAI_API_KEY not set. Export it or pass via environment.")?;
407 if !key.starts_with("sk-") {
409 eprintln!(
410 "[warn] OPENAI_API_KEY does not start with 'sk-' — verify it is correct"
411 );
412 }
413 key
414 }
415 Provider::Anthropic => {
416 let key = env::var("ANTHROPIC_API_KEY")
417 .map_err(|_| "ANTHROPIC_API_KEY not set. Export it or pass via environment.")?;
418 if !key.starts_with("sk-ant-") {
420 eprintln!("[warn] ANTHROPIC_API_KEY does not start with 'sk-ant-' — verify it is correct");
421 }
422 key
423 }
424 Provider::Mock => String::new(),
425 };
426
427 Ok(TokenInterceptor {
428 client: Client::new(),
429 api_key,
430 provider,
431 transform,
432 model,
433 token_count: 0,
434 transformed_count: 0,
435 visual_mode,
436 heatmap_mode,
437 orchestrator,
438 orchestrator_url: "http://localhost:3000".to_string(),
439 web_tx: None,
440 web_provider_label: None,
441 system_prompt: None,
442 #[cfg(feature = "self-tune")]
443 telemetry_bus: None,
444 #[cfg(feature = "self-modify")]
445 dedup: None,
446 rate: 0.5,
447 top_logprobs: 5,
448 rng: StdRng::from_entropy(),
449 recorder: None,
450 json_stream: false,
451 pending_delay_ms: 0,
452 min_confidence: None,
453 last_token_instant: None,
454 max_retries: 3,
455 anthropic_max_tokens: 4096,
456 stream_start_instant: None,
457 timeout_secs: None,
458 })
459 }
460
461 pub fn with_rate(mut self, rate: f64) -> Self {
463 debug_assert!(rate.is_finite(), "with_rate: rate must be finite, got {}", rate);
464 self.rate = rate.clamp(0.0, 1.0);
465 self
466 }
467
468 pub fn with_seed(mut self, seed: u64) -> Self {
470 self.rng = StdRng::seed_from_u64(seed);
471 self
472 }
473
474 pub fn with_web_tx(mut self, tx: mpsc::UnboundedSender<TokenEvent>) -> Self {
479 self.web_tx = Some(tx);
480 self
481 }
482
483 pub fn with_provider_label(mut self, label: impl Into<String>) -> Self {
486 self.web_provider_label = Some(label.into());
487 self
488 }
489
490 pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
492 self.system_prompt = Some(prompt.into());
493 self
494 }
495
496 pub fn with_top_logprobs(mut self, n: u8) -> Self {
498 self.top_logprobs = n;
499 self
500 }
501
502 pub fn with_json_stream(mut self, enabled: bool) -> Self {
504 self.json_stream = enabled;
505 self
506 }
507
508 pub fn with_orchestrator_url(mut self, url: impl Into<String>) -> Self {
510 self.orchestrator_url = url.into();
511 self
512 }
513
514 pub fn with_max_retries(mut self, n: u32) -> Self {
516 self.max_retries = n;
517 self
518 }
519
520 pub fn with_timeout(mut self, secs: u64) -> Self {
523 self.timeout_secs = Some(secs);
524 self
525 }
526
527 pub fn with_min_confidence(mut self, threshold: f64) -> Self {
529 self.min_confidence = Some(threshold);
530 self
531 }
532
533 #[cfg(feature = "self-modify")]
541 pub fn enable_dedup(&mut self, ttl_ms: u64, capacity: usize) {
542 use crate::semantic_dedup::{DedupConfig, SemanticDedup};
543 let sd = SemanticDedup::new(DedupConfig { ttl_ms, capacity });
544 self.dedup = Some(std::sync::Arc::new(std::sync::Mutex::new(sd)));
545 }
546
547 pub async fn intercept_stream(
561 &mut self,
562 prompt: &str,
563 ) -> Result<(), Box<dyn std::error::Error>> {
564 let timeout_duration = self.timeout_secs.map(std::time::Duration::from_secs);
565 if let Some(duration) = timeout_duration {
566 return match tokio::time::timeout(duration, self.intercept_stream_inner(prompt)).await {
567 Ok(result) => result,
568 Err(_) => Err(format!(
569 "stream timed out after {} seconds",
570 duration.as_secs()
571 )
572 .into()),
573 };
574 }
575 self.intercept_stream_inner(prompt).await
576 }
577
578 async fn intercept_stream_inner(
579 &mut self,
580 prompt: &str,
581 ) -> Result<(), Box<dyn std::error::Error>> {
582 self.stream_start_instant = Some(std::time::Instant::now());
584 tracing::info!(
587 provider = %self.provider,
588 model = %self.model,
589 prompt_len = prompt.len(),
590 "starting token stream interception",
591 );
592
593 if prompt.trim().is_empty() {
595 tracing::error!("prompt is empty — aborting");
596 return Err("Prompt must not be empty".into());
597 }
598 if prompt.len() > 512_000 {
602 return Err(format!(
603 "Prompt is too long ({} bytes; max 512 KB). Use a shorter prompt.",
604 prompt.len()
605 )
606 .into());
607 }
608
609 #[cfg(feature = "self-modify")]
612 {
613 use std::time::{SystemTime, UNIX_EPOCH};
614 let now_ms = SystemTime::now()
615 .duration_since(UNIX_EPOCH)
616 .map(|d| d.as_millis() as u64)
617 .unwrap_or(0);
618
619 if let Some(dedup_arc) = &self.dedup {
620 if let Ok(mut guard) = dedup_arc.lock() {
621 if let Some(entry) = guard.check(prompt, now_ms) {
622 let hits = entry.hit_count;
623 drop(guard); let msg = format!(
626 "[dedup] Skipping duplicate prompt (seen {} time{} recently, TTL active)",
627 hits,
628 if hits == 1 { "" } else { "s" },
629 );
630 if let Some(tx) = &self.web_tx {
631 let evt = TokenEvent {
632 text: msg.clone(),
633 original: prompt.to_string(),
634 index: 0,
635 transformed: false,
636 importance: 0.0,
637 chaos_label: None,
638 provider: self.web_provider_label.clone(),
639 confidence: None,
640 perplexity: None,
641 alternatives: vec![],
642 is_error: false,
643 arrival_ms: None,
644 };
645 let _ = tx.send(evt);
646 } else {
647 eprintln!("{}", msg);
648 }
649 return Ok(());
650 } else {
651 guard.register(prompt, String::new(), now_ms);
654 }
655 }
656 }
657 }
658
659 if self.web_tx.is_none() {
660 self.print_header(prompt);
661 }
662
663 let effective_prompt = if self.orchestrator {
665 eprintln!(
666 "{}",
667 "[orchestrator] routing through MCP pipeline at localhost:3000".bright_magenta()
668 );
669 match self.orchestrator_infer(prompt).await {
670 Ok(enriched) => enriched,
671 Err(e) => {
672 eprintln!(
673 "{} {}",
674 "[orchestrator] pipeline unavailable, using raw prompt:".bright_red(),
675 e
676 );
677 if let Some(tx) = &self.web_tx {
678 let evt = TokenEvent {
679 text: format!("[orchestrator error] {}", e),
680 original: String::new(),
681 index: 0,
682 transformed: false,
683 importance: 0.0,
684 chaos_label: None,
685 provider: self.web_provider_label.clone(),
686 confidence: None,
687 perplexity: None,
688 alternatives: vec![],
689 is_error: true,
690 arrival_ms: None,
691 };
692 let _ = tx.send(evt);
693 }
694 prompt.to_string()
695 }
696 }
697 } else {
698 prompt.to_string()
699 };
700
701 match self.provider {
702 Provider::Openai => self.stream_openai(&effective_prompt).await?,
703 Provider::Anthropic => self.stream_anthropic(&effective_prompt).await?,
704 Provider::Mock => self.stream_mock(&effective_prompt).await?,
705 }
706
707 if self.web_tx.is_none() {
708 self.print_footer();
709 }
710 Ok(())
711 }
712
713 async fn stream_openai(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
718 let mut messages = Vec::new();
719 if let Some(sys) = &self.system_prompt {
720 messages.push(OpenAIChatMessage {
721 role: "system".to_string(),
722 content: sys.clone(),
723 });
724 }
725 messages.push(OpenAIChatMessage {
726 role: "user".to_string(),
727 content: prompt.to_string(),
728 });
729 let request = OpenAIChatRequest {
730 model: self.model.clone(),
731 messages,
732 stream: true,
733 temperature: 0.7,
734 logprobs: true,
735 top_logprobs: self.top_logprobs,
736 };
737
738 let req = self
739 .client
740 .post("https://api.openai.com/v1/chat/completions")
741 .header("Authorization", format!("Bearer {}", self.api_key))
742 .header("Content-Type", "application/json")
743 .json(&request)
744 .build()?;
745
746 let response = execute_with_retry(&self.client, req, self.max_retries)
748 .await
749 .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?;
750
751 if !response.status().is_success() {
752 let error_text = response.text().await?;
753 return Err(format!("OpenAI API error: {}", error_text).into());
754 }
755
756 let mut stream = response.bytes_stream();
757 let mut buffer = String::new();
758 let mut dropped_chunks: usize = 0;
759
760 while let Some(chunk) = stream.next().await {
761 let chunk = chunk?;
762 let chunk_str = match std::str::from_utf8(&chunk) {
764 Ok(s) => s.to_string(),
765 Err(e) => {
766 tracing::warn!(error = %e, "invalid UTF-8 in OpenAI stream chunk — skipping");
767 continue;
768 }
769 };
770 buffer.push_str(&chunk_str);
771
772 while let Some(line_end) = buffer.find('\n') {
773 let line = buffer[..line_end].trim().to_string();
774 buffer.drain(..=line_end);
775
776 if line.starts_with("data: ") && line != "data: [DONE]" {
777 let json_str = line.strip_prefix("data: ").unwrap_or(&line);
778 match serde_json::from_str::<OpenAIChunk>(json_str) {
779 Ok(parsed) => {
780 if let Some(choice) = parsed.choices.first() {
781 if let Some(content) = &choice.delta.content {
782 let (log_prob, top_alts) = choice
784 .logprobs
785 .as_ref()
786 .and_then(|lp| lp.content.first())
787 .map(|lc| {
788 let alts = lc
789 .top_logprobs
790 .iter()
791 .map(|t| TokenAlternative {
792 token: t.token.clone(),
793 probability: t.logprob.exp().clamp(0.0, 1.0),
794 })
795 .collect::<Vec<_>>();
796 (Some(lc.logprob), alts)
797 })
798 .unwrap_or((None, vec![]));
799 self.process_content_logprob(content, log_prob, top_alts);
800 if self.pending_delay_ms > 0 {
801 tokio::time::sleep(std::time::Duration::from_millis(
802 self.pending_delay_ms,
803 ))
804 .await;
805 self.pending_delay_ms = 0;
806 }
807 }
808 }
809 }
810 Err(_) => {
811 tracing::warn!(line = %json_str, "failed to parse SSE chunk; skipping");
812 dropped_chunks += 1;
813 }
814 }
815 }
816 }
817 }
818
819 if dropped_chunks > 0 {
820 tracing::warn!(dropped_chunks, "SSE chunks were dropped during stream");
821 }
822
823 Ok(())
824 }
825
826 async fn stream_anthropic(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
831 tracing::debug!(
836 "Anthropic stream: logprobs unavailable; confidence/perplexity will be None"
837 );
838 if self.web_tx.is_none() {
839 eprintln!("[info] Anthropic does not provide logprobs — confidence metrics will be unavailable for this run");
840 }
841
842 let request = AnthropicRequest {
843 model: self.model.clone(),
844 messages: vec![AnthropicMessage {
845 role: "user".to_string(),
846 content: prompt.to_string(),
847 }],
848 max_tokens: self.anthropic_max_tokens,
849 stream: true,
850 temperature: 0.7,
851 system: self.system_prompt.clone(),
852 };
853
854 let req = self
855 .client
856 .post("https://api.anthropic.com/v1/messages")
857 .header("x-api-key", &self.api_key)
858 .header("anthropic-version", providers::ANTHROPIC_API_VERSION)
859 .header("Content-Type", "application/json")
860 .json(&request)
861 .build()?;
862
863 let response = execute_with_retry(&self.client, req, self.max_retries)
865 .await
866 .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?;
867
868 if !response.status().is_success() {
869 let error_text = response.text().await?;
870 return Err(format!("Anthropic API error: {}", error_text).into());
871 }
872
873 let mut stream = response.bytes_stream();
874 let mut buffer = String::new();
875 let mut dropped_chunks: usize = 0;
876
877 while let Some(chunk) = stream.next().await {
878 let chunk = chunk?;
879 let chunk_str = match std::str::from_utf8(&chunk) {
881 Ok(s) => s.to_string(),
882 Err(e) => {
883 tracing::warn!(error = %e, "invalid UTF-8 in Anthropic stream chunk — skipping");
884 continue;
885 }
886 };
887 buffer.push_str(&chunk_str);
888
889 while let Some(line_end) = buffer.find('\n') {
890 let line = buffer[..line_end].trim().to_string();
891 buffer.drain(..=line_end);
892
893 if line.starts_with("data: ") {
894 let json_str = line.strip_prefix("data: ").unwrap_or(&line);
895 match serde_json::from_str::<AnthropicStreamEvent>(json_str) {
896 Ok(event) => {
897 if event.event_type == "content_block_delta" {
898 if let Some(delta) = &event.delta {
899 if let Some(text) = &delta.text {
900 let now = std::time::Instant::now();
903 let timing_confidence = if let Some(last) =
904 self.last_token_instant
905 {
906 let delta_ms = now.duration_since(last).as_millis() as f64;
907 let conf = (1.0 - (delta_ms / 500.0).min(1.0)) * 0.8 + 0.1;
909 Some(conf as f32)
910 } else {
911 None
912 };
913 self.last_token_instant = Some(now);
914 let timing_logprob =
916 timing_confidence.map(|c| c.ln().max(-10.0));
917 self.process_content_logprob(text, timing_logprob, vec![]);
918 if self.pending_delay_ms > 0 {
919 tokio::time::sleep(std::time::Duration::from_millis(
920 self.pending_delay_ms,
921 ))
922 .await;
923 self.pending_delay_ms = 0;
924 }
925 }
926 }
927 }
928 }
929 Err(_) => {
930 tracing::warn!(line = %json_str, "failed to parse SSE chunk; skipping");
931 dropped_chunks += 1;
932 }
933 }
934 }
935 }
936 }
937
938 if dropped_chunks > 0 {
939 tracing::warn!(dropped_chunks, "SSE chunks were dropped during stream");
940 }
941
942 Ok(())
943 }
944
945 async fn stream_mock(&mut self, prompt: &str) -> Result<(), Box<dyn std::error::Error>> {
950 let prompt_prefix = prompt[..prompt.len().min(20)].to_string();
953 let fixture: Vec<(String, f32)> = vec![
954 ("The".to_string(), -0.12),
955 (" quick".to_string(), -0.45),
956 (" brown".to_string(), -0.78),
957 (" fox".to_string(), -0.23),
958 (" jumps".to_string(), -0.56),
959 (" over".to_string(), -0.34),
960 (" the".to_string(), -0.11),
961 (" lazy".to_string(), -0.89),
962 (" dog".to_string(), -0.19),
963 (".".to_string(), -0.07),
964 (" This".to_string(), -0.62),
965 (" is".to_string(), -0.15),
966 (" a".to_string(), -0.08),
967 (" mock".to_string(), -0.31),
968 (" response".to_string(), -0.44),
969 (" for".to_string(), -0.27),
970 (" prompt".to_string(), -0.53),
971 (":".to_string(), -0.18),
972 (" \"".to_string(), -0.39),
973 (prompt_prefix, -0.71),
974 ];
975
976 let prompt_hash: usize = prompt
978 .bytes()
979 .fold(0usize, |acc, b| acc.wrapping_add(b as usize));
980 let offset = prompt_hash % fixture.len();
981
982 for idx in 0..fixture.len() {
983 let (token_text, logprob) = &fixture[(idx + offset) % fixture.len()];
984 let token_text = token_text.clone();
985 let confidence = logprob.exp().clamp(0.0_f32, 1.0_f32);
986 let perplexity = (-logprob).exp();
987 let importance = calculate_token_importance(&token_text, idx);
988 let should_transform = idx % 2 == 1;
989
990 let (display_text, chaos_label) = if should_transform {
991 let (t, label) = self.transform.apply_with_label(&token_text);
992 let cl = if matches!(self.transform, Transform::Chaos) {
993 Some(label.to_string())
994 } else {
995 None
996 };
997 (t, cl)
998 } else {
999 (token_text.clone(), None)
1000 };
1001
1002 if should_transform {
1003 self.transformed_count += 1;
1004 }
1005 self.token_count += 1;
1006
1007 if let Some(tx) = &self.web_tx {
1008 let evt = TokenEvent {
1009 text: display_text.clone(),
1010 original: token_text.clone(),
1011 index: idx,
1012 transformed: should_transform,
1013 importance,
1014 chaos_label,
1015 provider: self.web_provider_label.clone(),
1016 confidence: Some(confidence),
1017 perplexity: Some(perplexity),
1018 alternatives: vec![
1019 TokenAlternative {
1020 token: "a".to_string(),
1021 probability: 0.15,
1022 },
1023 TokenAlternative {
1024 token: "the".to_string(),
1025 probability: 0.10,
1026 },
1027 ],
1028 is_error: false,
1029 arrival_ms: None,
1030 };
1031 let _ = tx.send(evt);
1032 } else {
1033 self.process_content_logprob(&token_text, Some(*logprob), vec![]);
1034 }
1035 }
1036 Ok(())
1037 }
1038
1039 async fn orchestrator_infer(&self, prompt: &str) -> Result<String, Box<dyn std::error::Error>> {
1044 let mcp_request = McpInferRequest {
1045 jsonrpc: "2.0".to_string(),
1046 method: "tools/call".to_string(),
1047 id: 1,
1048 params: McpInferParams {
1049 name: "infer".to_string(),
1050 arguments: McpInferArguments {
1051 prompt: prompt.to_string(),
1052 worker: "llama_cpp".to_string(),
1053 },
1054 },
1055 };
1056
1057 let response = self
1058 .client
1059 .post(&self.orchestrator_url)
1060 .header("Content-Type", "application/json")
1061 .json(&mcp_request)
1062 .send()
1063 .await?;
1064
1065 if !response.status().is_success() {
1066 return Err(format!("Orchestrator returned HTTP {}", response.status()).into());
1067 }
1068
1069 let mcp_resp: McpInferResponse = response.json().await?;
1070
1071 if let Some(err) = mcp_resp.error {
1072 return Err(format!("Orchestrator MCP error: {}", err.message).into());
1073 }
1074
1075 if let Some(result) = mcp_resp.result {
1076 if let Some(content) = result.content.first() {
1077 if let Some(text) = &content.text {
1078 return Ok(text.clone());
1079 }
1080 }
1081 }
1082
1083 Err("Orchestrator returned empty result".into())
1084 }
1085
1086 pub fn process_content(&mut self, content: &str) {
1092 self.process_content_logprob(content, None, vec![]);
1093 }
1094 pub fn process_content_with_logprob(
1096 &mut self,
1097 content: &str,
1098 lp: Option<providers::OpenAILogprobContent>,
1099 ) {
1100 let (log_prob, top_alts) = if let Some(ref entry) = lp {
1101 let alts: Vec<TokenAlternative> = entry
1102 .top_logprobs
1103 .iter()
1104 .map(|t| TokenAlternative {
1105 token: t.token.clone(),
1106 probability: t.logprob.exp().clamp(0.0, 1.0),
1107 })
1108 .collect();
1109 (Some(entry.logprob), alts)
1114 } else {
1115 (None, vec![])
1116 };
1117 self.process_content_logprob(content, log_prob, top_alts);
1118 }
1119
1120 pub fn process_content_logprob(
1127 &mut self,
1128 content: &str,
1129 log_prob: Option<f32>,
1130 top_alts: Vec<TokenAlternative>,
1131 ) {
1132 let tokens = tokenize(content);
1133 let mut first_real = true; for token in tokens {
1136 if !token.trim().is_empty() {
1137 let i = self.token_count;
1138
1139 let rate = self.rate;
1143 let should_transform = ((i + 1) as f64 * rate).floor() > (i as f64 * rate).floor();
1144
1145 let (token_confidence, token_perplexity, token_alts) = if first_real {
1148 first_real = false;
1149 let conf = log_prob.map(|lp| lp.exp().clamp(0.0, 1.0));
1150 let perp = log_prob.map(|lp| (-lp).exp());
1151 (conf, perp, top_alts.clone())
1152 } else {
1153 (None, None, vec![])
1154 };
1155
1156 let should_transform =
1159 if let (Some(min_conf), Some(conf)) = (self.min_confidence, token_confidence) {
1160 conf as f64 <= min_conf
1161 } else {
1162 should_transform
1163 };
1164
1165 let importance = token_confidence.map(|c| c as f64).unwrap_or_else(|| {
1168 transforms::calculate_token_importance_rng(&token, i, &mut self.rng)
1169 });
1170
1171 let (display_text, chaos_label) = if should_transform {
1172 self.transformed_count += 1;
1173 let (text, label) = self.transform.apply_with_label_rng(&token, &mut self.rng);
1174 let cl = if matches!(self.transform, Transform::Chaos) || text.is_empty() {
1175 Some(if text.is_empty() {
1177 "deleted".to_string()
1178 } else {
1179 label.to_string()
1180 })
1181 } else {
1182 None
1183 };
1184 (text, cl)
1185 } else {
1186 (token.clone(), None)
1187 };
1188
1189 if should_transform {
1192 if let Transform::Delay(ms) = self.transform {
1193 self.pending_delay_ms = ms;
1194 }
1195 }
1196
1197 let is_deleted = should_transform && display_text.is_empty();
1199
1200 if !is_deleted {
1202 let arrival_ms = self.stream_start_instant
1204 .map(|start| start.elapsed().as_millis() as u64);
1205 if let Some(tx) = &self.web_tx {
1206 let event = TokenEvent {
1207 text: display_text.clone(),
1208 original: token.clone(),
1209 index: i,
1210 transformed: should_transform,
1211 importance,
1212 chaos_label,
1213 provider: self.web_provider_label.clone(),
1214 confidence: token_confidence,
1215 perplexity: token_perplexity,
1216 alternatives: token_alts,
1217 is_error: false,
1218 arrival_ms,
1219 };
1220 if let Some(rec) = &mut self.recorder {
1221 rec.record(&event);
1222 }
1223 let _ = tx.send(event);
1224 } else if self.json_stream {
1225 let event = TokenEvent {
1227 text: display_text.clone(),
1228 original: token.clone(),
1229 index: i,
1230 transformed: should_transform,
1231 importance,
1232 chaos_label: chaos_label.clone(),
1233 provider: self.web_provider_label.clone(),
1234 confidence: token_confidence,
1235 perplexity: token_perplexity,
1236 alternatives: token_alts.clone(),
1237 is_error: false,
1238 arrival_ms,
1239 };
1240 if let Ok(line) = serde_json::to_string(&event) {
1241 println!("{}", line);
1242 }
1243 } else {
1244 if self.heatmap_mode {
1246 print!("{}", apply_heatmap_color(&display_text, importance));
1247 } else if self.visual_mode && should_transform {
1248 print!("{}", display_text.bright_cyan().bold());
1249 } else if self.visual_mode {
1250 print!("{}", display_text.normal());
1251 } else {
1252 print!("{}", display_text);
1253 }
1254 let _ = io::stdout().flush();
1255 }
1256 }
1257
1258 self.token_count += 1;
1259 }
1260 }
1261
1262 #[cfg(feature = "self-tune")]
1264 if let Some(bus) = &self.telemetry_bus {
1265 use crate::self_tune::telemetry_bus::PipelineStage;
1266 bus.record_latency(PipelineStage::Inference, 1_000);
1268 if let Some(lp) = log_prob {
1270 let confidence_pct = (lp.exp().clamp(0.0, 1.0) * 100.0) as u64;
1271 bus.record_latency(PipelineStage::Other, confidence_pct.max(1));
1272 }
1273 }
1274 }
1275
1276 pub fn print_header(&self, prompt: &str) {
1282 println!("{}", "EVERY OTHER TOKEN INTERCEPTOR".bright_cyan().bold());
1283 println!(
1284 "{}: {}",
1285 "Provider".bright_yellow(),
1286 self.provider.to_string().bright_white()
1287 );
1288 println!("{}: {:?}", "Transform".bright_yellow(), self.transform);
1289 println!("{}: {}", "Model".bright_yellow(), self.model);
1290 println!("{}: {}", "Prompt".bright_yellow(), prompt);
1291 if self.orchestrator {
1292 println!(
1293 "{}: {}",
1294 "Orchestrator".bright_magenta(),
1295 "ON (MCP pipeline at localhost:3000)".bright_magenta()
1296 );
1297 }
1298 if self.visual_mode {
1299 println!(
1300 "{}: {}",
1301 "Visual Mode".bright_green(),
1302 "ON (even=normal, odd=cyan+bold)".bright_green()
1303 );
1304 }
1305 if self.heatmap_mode {
1306 println!(
1307 "{}: {}",
1308 "Heatmap Mode".bright_magenta(),
1309 "ON (color intensity = token importance)".bright_magenta()
1310 );
1311 println!(
1312 "{}: {} {} {} {}",
1313 "Legend".bright_white(),
1314 "Low".on_blue(),
1315 "Medium".on_yellow(),
1316 "High".on_red(),
1317 "Critical".on_bright_red().bright_white()
1318 );
1319 }
1320 println!("{}", "=".repeat(50).bright_blue());
1321 println!("{}", "Response (with transformations):".bright_green());
1322 println!();
1323 }
1324
1325 pub fn print_footer(&self) {
1329 println!("\n{}", "=".repeat(50).bright_blue());
1330 println!("Complete! Processed {} tokens.", self.token_count);
1331 println!("Transform applied to {} tokens.", self.transformed_count);
1332 }
1333}
1334
1335#[derive(Debug, Clone, serde::Serialize)]
1345pub struct ResearchSession {
1346 pub prompt: String,
1348 pub provider: String,
1350 pub model: String,
1352 pub transform: String,
1354 pub runs: u32,
1356 pub total_tokens: usize,
1358 pub total_transformed: usize,
1360 pub vocabulary_diversity: f64,
1362 pub mean_token_length: f64,
1364 pub mean_perplexity: Option<f64>,
1366 pub mean_confidence: Option<f64>,
1368 pub top_perplexity_tokens: Vec<String>,
1370 pub estimated_cost_usd: f64,
1372 pub citation: String,
1374}
1375
1376pub async fn run_research_headless(
1380 prompt: &str,
1381 provider: providers::Provider,
1382 transform: transforms::Transform,
1383 model: String,
1384 runs: u32,
1385) -> Result<ResearchSession, Box<dyn std::error::Error>> {
1386 let mut all_tokens: Vec<TokenEvent> = Vec::new();
1387
1388 for _ in 0..runs {
1389 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1390 let mut interceptor = TokenInterceptor::new(
1391 provider.clone(),
1392 transform.clone(),
1393 model.clone(),
1394 false,
1395 false,
1396 false,
1397 )?;
1398 interceptor.web_tx = Some(tx);
1399 interceptor.intercept_stream(prompt).await?;
1400 while let Ok(ev) = rx.try_recv() {
1402 all_tokens.push(ev);
1403 }
1404 }
1405
1406 let total = all_tokens.len();
1407 let total_transformed = all_tokens.iter().filter(|t| t.transformed).count();
1408
1409 let unique: std::collections::HashSet<String> = all_tokens
1410 .iter()
1411 .map(|t| t.original.to_lowercase())
1412 .collect();
1413 let vocab_diversity = if total > 0 {
1414 unique.len() as f64 / total as f64
1415 } else {
1416 0.0
1417 };
1418
1419 let mean_token_length = if total > 0 {
1420 all_tokens
1421 .iter()
1422 .map(|t| t.original.len() as f64)
1423 .sum::<f64>()
1424 / total as f64
1425 } else {
1426 0.0
1427 };
1428
1429 let perp_tokens: Vec<f64> = all_tokens
1430 .iter()
1431 .filter_map(|t| t.perplexity.map(|p| p as f64))
1432 .collect();
1433 let mean_perplexity = if perp_tokens.is_empty() {
1434 None
1435 } else {
1436 Some(perp_tokens.iter().sum::<f64>() / perp_tokens.len() as f64)
1437 };
1438
1439 let conf_tokens: Vec<f64> = all_tokens
1440 .iter()
1441 .filter_map(|t| t.confidence.map(|c| c as f64))
1442 .collect();
1443 let mean_confidence = if conf_tokens.is_empty() {
1444 None
1445 } else {
1446 Some(conf_tokens.iter().sum::<f64>() / conf_tokens.len() as f64)
1447 };
1448
1449 let mut by_perp: Vec<&TokenEvent> = all_tokens
1451 .iter()
1452 .filter(|t| t.perplexity.is_some())
1453 .collect();
1454 by_perp.sort_by(|a, b| {
1455 b.perplexity
1456 .partial_cmp(&a.perplexity)
1457 .unwrap_or(std::cmp::Ordering::Equal)
1458 });
1459 let top_perplexity_tokens: Vec<String> = by_perp
1460 .iter()
1461 .take(10)
1462 .map(|t| t.original.clone())
1463 .collect();
1464
1465 let estimated_cost_usd = total as f64 / 1000.0 * 0.002;
1467
1468 let citation = format!(
1469 "Every Other Token v4.0.0 | prompt=\"{}\" | provider={} | model={} | transform={:?} | runs={} | tokens={}",
1470 prompt, provider, model, transform, runs, total
1471 );
1472
1473 Ok(ResearchSession {
1474 prompt: prompt.to_string(),
1475 provider: provider.to_string(),
1476 model,
1477 transform: format!("{:?}", transform),
1478 runs,
1479 total_tokens: total,
1480 total_transformed,
1481 vocabulary_diversity: vocab_diversity,
1482 mean_token_length,
1483 mean_perplexity,
1484 mean_confidence,
1485 top_perplexity_tokens,
1486 estimated_cost_usd,
1487 citation,
1488 })
1489}
1490
1491#[cfg(test)]
1492mod tests {
1493 use super::*;
1494 use tokio::sync::mpsc;
1495
1496 fn make_test_interceptor() -> TokenInterceptor {
1497 TokenInterceptor {
1498 client: Client::new(),
1499 api_key: "test-key".to_string(),
1500 provider: Provider::Openai,
1501 transform: Transform::Reverse,
1502 model: "test-model".to_string(),
1503 token_count: 0,
1504 transformed_count: 0,
1505 visual_mode: false,
1506 heatmap_mode: false,
1507 orchestrator: false,
1508 orchestrator_url: "http://localhost:3000".to_string(),
1509 web_tx: None,
1510 web_provider_label: None,
1511 system_prompt: None,
1512 #[cfg(feature = "self-tune")]
1513 telemetry_bus: None,
1514 #[cfg(feature = "self-modify")]
1515 dedup: None,
1516 rate: 0.5,
1517 rng: StdRng::seed_from_u64(42),
1518 top_logprobs: 5,
1519 recorder: None,
1520 json_stream: false,
1521 pending_delay_ms: 0,
1522 min_confidence: None,
1523 last_token_instant: None,
1524 max_retries: 3,
1525 anthropic_max_tokens: 4096,
1526 stream_start_instant: None,
1527 timeout_secs: None,
1528 }
1529 }
1530
1531 #[test]
1534 fn test_new_openai_requires_api_key() {
1535 std::env::remove_var("OPENAI_API_KEY");
1536 let result = TokenInterceptor::new(
1537 Provider::Openai,
1538 Transform::Reverse,
1539 "gpt-4".to_string(),
1540 false,
1541 false,
1542 false,
1543 );
1544 assert!(result.is_err());
1545 }
1546
1547 #[test]
1548 fn test_new_anthropic_requires_api_key() {
1549 std::env::remove_var("ANTHROPIC_API_KEY");
1550 let result = TokenInterceptor::new(
1551 Provider::Anthropic,
1552 Transform::Reverse,
1553 "claude".to_string(),
1554 false,
1555 false,
1556 false,
1557 );
1558 assert!(result.is_err());
1559 }
1560
1561 #[test]
1562 fn test_interceptor_initial_counts_zero() {
1563 let interceptor = make_test_interceptor();
1564 assert_eq!(interceptor.token_count, 0);
1565 assert_eq!(interceptor.transformed_count, 0);
1566 }
1567
1568 #[test]
1569 fn test_interceptor_fields_match_construction() {
1570 let interceptor = make_test_interceptor();
1571 assert_eq!(interceptor.provider, Provider::Openai);
1572 assert_eq!(interceptor.model, "test-model");
1573 assert!(!interceptor.visual_mode);
1574 assert!(!interceptor.heatmap_mode);
1575 assert!(!interceptor.orchestrator);
1576 assert!(interceptor.web_tx.is_none());
1577 }
1578
1579 #[test]
1582 fn test_process_content_two_tokens() {
1583 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1584 let mut interceptor = make_test_interceptor();
1585 interceptor.web_tx = Some(tx);
1586
1587 interceptor.process_content("hello world");
1588 assert_eq!(interceptor.token_count, 2);
1589
1590 let mut events = Vec::new();
1591 while let Ok(e) = rx.try_recv() {
1592 events.push(e);
1593 }
1594 assert_eq!(events.len(), 2);
1595 assert_eq!(events[0].text, "hello");
1596 assert_eq!(events[0].original, "hello");
1597 assert!(!events[0].transformed);
1598 assert_eq!(events[0].index, 0);
1599 assert_eq!(events[1].original, "world");
1600 assert!(events[1].transformed);
1601 assert_eq!(events[1].index, 1);
1602 }
1603
1604 #[test]
1605 fn test_process_content_transforms_odd_tokens() {
1606 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1607 let mut interceptor = make_test_interceptor();
1608 interceptor.web_tx = Some(tx);
1609
1610 interceptor.process_content("hello world");
1611
1612 let mut events = Vec::new();
1613 while let Ok(e) = rx.try_recv() {
1614 events.push(e);
1615 }
1616 assert_eq!(events[1].text, "dlrow");
1618 assert_eq!(events[1].original, "world");
1619 }
1620
1621 #[test]
1622 fn test_process_content_empty_string() {
1623 let (tx, _rx) = mpsc::unbounded_channel::<TokenEvent>();
1624 let mut interceptor = make_test_interceptor();
1625 interceptor.web_tx = Some(tx);
1626
1627 interceptor.process_content("");
1628 assert_eq!(interceptor.token_count, 0);
1629 assert_eq!(interceptor.transformed_count, 0);
1630 }
1631
1632 #[test]
1633 fn test_process_content_whitespace_only() {
1634 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1635 let mut interceptor = make_test_interceptor();
1636 interceptor.web_tx = Some(tx);
1637
1638 interceptor.process_content(" ");
1639 let mut events = Vec::new();
1640 while let Ok(e) = rx.try_recv() {
1641 events.push(e);
1642 }
1643 assert!(events.is_empty());
1644 }
1645
1646 #[test]
1647 fn test_process_content_single_token() {
1648 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1649 let mut interceptor = make_test_interceptor();
1650 interceptor.web_tx = Some(tx);
1651
1652 interceptor.process_content("hello");
1653
1654 let mut events = Vec::new();
1655 while let Ok(e) = rx.try_recv() {
1656 events.push(e);
1657 }
1658 assert_eq!(events.len(), 1);
1659 assert_eq!(events[0].text, "hello");
1660 assert!(!events[0].transformed);
1661 }
1662
1663 #[test]
1664 fn test_process_content_cross_call_continuity() {
1665 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1666 let mut interceptor = make_test_interceptor();
1667 interceptor.web_tx = Some(tx);
1668
1669 interceptor.process_content("hello");
1670 interceptor.process_content("world");
1671
1672 let mut events = Vec::new();
1673 while let Ok(e) = rx.try_recv() {
1674 events.push(e);
1675 }
1676 assert_eq!(events.len(), 2);
1677 assert_eq!(events[0].index, 0);
1678 assert_eq!(events[1].index, 1);
1679 assert!(events[1].transformed);
1680 }
1681
1682 #[test]
1683 fn test_process_content_increments_transformed_count() {
1684 let (tx, _rx) = mpsc::unbounded_channel::<TokenEvent>();
1685 let mut interceptor = make_test_interceptor();
1686 interceptor.web_tx = Some(tx);
1687
1688 interceptor.process_content("hello world foo bar");
1689 assert_eq!(interceptor.transformed_count, 2);
1690 }
1691
1692 #[test]
1693 fn test_process_content_six_tokens_three_transformed() {
1694 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1695 let mut interceptor = make_test_interceptor();
1696 interceptor.web_tx = Some(tx);
1697
1698 interceptor.process_content("one two three four five six");
1699
1700 let mut events = Vec::new();
1701 while let Ok(e) = rx.try_recv() {
1702 events.push(e);
1703 }
1704 assert_eq!(events.len(), 6);
1705 let xformed: Vec<_> = events.iter().filter(|e| e.transformed).collect();
1706 assert_eq!(xformed.len(), 3);
1707 }
1708
1709 #[test]
1712 fn test_original_field_preserved_for_all_tokens() {
1713 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1714 let mut interceptor = make_test_interceptor();
1715 interceptor.web_tx = Some(tx);
1716
1717 interceptor.process_content("the quick brown fox");
1718
1719 let mut events = Vec::new();
1720 while let Ok(e) = rx.try_recv() {
1721 events.push(e);
1722 }
1723
1724 for event in &events {
1725 assert!(!event.original.is_empty());
1726 if event.transformed {
1727 assert_ne!(event.text, event.original);
1728 } else {
1729 assert_eq!(event.text, event.original);
1730 }
1731 }
1732 }
1733
1734 #[test]
1735 fn test_sidebyside_original_is_raw_token() {
1736 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1737 let mut interceptor = make_test_interceptor();
1738 interceptor.web_tx = Some(tx);
1739
1740 interceptor.process_content("quick brown fox");
1741
1742 let mut events = Vec::new();
1743 while let Ok(e) = rx.try_recv() {
1744 events.push(e);
1745 }
1746
1747 let originals: Vec<&str> = events.iter().map(|e| e.original.as_str()).collect();
1748 assert!(originals.contains(&"quick"));
1749 assert!(originals.contains(&"brown"));
1750 assert!(originals.contains(&"fox"));
1751 }
1752
1753 #[test]
1756 fn test_even_odd_alternation() {
1757 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1758 let mut interceptor = make_test_interceptor();
1759 interceptor.web_tx = Some(tx);
1760
1761 interceptor.process_content("a b c d");
1762
1763 let mut events = Vec::new();
1764 while let Ok(e) = rx.try_recv() {
1765 events.push(e);
1766 }
1767
1768 for event in &events {
1769 if event.index % 2 == 0 {
1770 assert!(!event.transformed);
1771 } else {
1772 assert!(event.transformed);
1773 }
1774 }
1775 }
1776
1777 #[test]
1778 fn test_graph_pairs_alternate() {
1779 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1780 let mut interceptor = make_test_interceptor();
1781 interceptor.web_tx = Some(tx);
1782
1783 interceptor.process_content("alpha beta gamma delta epsilon zeta");
1784
1785 let mut events = Vec::new();
1786 while let Ok(e) = rx.try_recv() {
1787 events.push(e);
1788 }
1789
1790 for pair in events.chunks(2) {
1791 assert!(!pair[0].transformed);
1792 if pair.len() > 1 {
1793 assert!(pair[1].transformed);
1794 }
1795 }
1796 }
1797
1798 #[test]
1799 fn test_graph_indices_sequential() {
1800 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1801 let mut interceptor = make_test_interceptor();
1802 interceptor.web_tx = Some(tx);
1803
1804 interceptor.process_content("one two three four");
1805
1806 let mut events = Vec::new();
1807 while let Ok(e) = rx.try_recv() {
1808 events.push(e);
1809 }
1810
1811 for (i, event) in events.iter().enumerate() {
1812 assert_eq!(event.index, i);
1813 }
1814 }
1815
1816 #[test]
1819 fn test_export_array_sequential_indices() {
1820 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1821 let mut interceptor = make_test_interceptor();
1822 interceptor.web_tx = Some(tx);
1823
1824 interceptor.process_content("the quick brown fox jumps over");
1825
1826 let mut events = Vec::new();
1827 while let Ok(e) = rx.try_recv() {
1828 events.push(e);
1829 }
1830
1831 for (i, event) in events.iter().enumerate() {
1832 assert_eq!(event.index, i);
1833 }
1834 }
1835
1836 #[test]
1837 fn test_export_all_tokens_have_valid_importance() {
1838 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1839 let mut interceptor = make_test_interceptor();
1840 interceptor.web_tx = Some(tx);
1841
1842 interceptor.process_content("testing export importance values");
1843
1844 let mut events = Vec::new();
1845 while let Ok(e) = rx.try_recv() {
1846 events.push(e);
1847 }
1848
1849 for event in &events {
1850 assert!(event.importance >= 0.0 && event.importance <= 1.0);
1851 }
1852 }
1853
1854 #[test]
1855 fn test_export_large_set_serializes() {
1856 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1857 let mut interceptor = make_test_interceptor();
1858 interceptor.web_tx = Some(tx);
1859
1860 interceptor.process_content("the quick brown fox jumps over the lazy dog and runs around");
1861
1862 let mut events = Vec::new();
1863 while let Ok(e) = rx.try_recv() {
1864 events.push(e);
1865 }
1866
1867 let json = serde_json::to_string(&events).expect("serialize");
1868 let parsed: Vec<serde_json::Value> = serde_json::from_str(&json).expect("parse");
1869 assert_eq!(parsed.len(), events.len());
1870 assert!(parsed.len() > 5);
1871 }
1872
1873 #[test]
1874 fn test_multiple_tokens_form_valid_export_array() {
1875 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1876 let mut interceptor = make_test_interceptor();
1877 interceptor.web_tx = Some(tx);
1878
1879 interceptor.process_content("hello world foo bar");
1880
1881 let mut events = Vec::new();
1882 while let Ok(e) = rx.try_recv() {
1883 events.push(e);
1884 }
1885
1886 let json = serde_json::to_string(&events).expect("serialize");
1887 let parsed: Vec<serde_json::Value> = serde_json::from_str(&json).expect("parse");
1888 assert_eq!(parsed.len(), events.len());
1889 for (i, entry) in parsed.iter().enumerate() {
1890 assert_eq!(entry["index"].as_u64().expect("index"), i as u64);
1891 }
1892 }
1893
1894 #[test]
1897 fn test_print_header_all_modes() {
1898 let interceptor = make_test_interceptor();
1899 interceptor.print_header("test prompt");
1900 }
1901
1902 #[test]
1903 fn test_print_header_with_orchestrator() {
1904 let mut interceptor = make_test_interceptor();
1905 interceptor.orchestrator = true;
1906 interceptor.print_header("test");
1907 }
1908
1909 #[test]
1910 fn test_print_header_with_visual_mode() {
1911 let mut interceptor = make_test_interceptor();
1912 interceptor.visual_mode = true;
1913 interceptor.print_header("test");
1914 }
1915
1916 #[test]
1917 fn test_print_header_with_heatmap_mode() {
1918 let mut interceptor = make_test_interceptor();
1919 interceptor.heatmap_mode = true;
1920 interceptor.print_header("test");
1921 }
1922
1923 #[test]
1924 fn test_print_footer() {
1925 let interceptor = make_test_interceptor();
1926 interceptor.print_footer();
1927 }
1928
1929 #[test]
1930 fn test_print_footer_after_processing() {
1931 let mut interceptor = make_test_interceptor();
1932 interceptor.token_count = 42;
1933 interceptor.transformed_count = 21;
1934 interceptor.print_footer();
1935 }
1936
1937 #[test]
1940 fn test_process_content_uppercase_transform() {
1941 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1942 let mut interceptor = make_test_interceptor();
1943 interceptor.transform = Transform::Uppercase;
1944 interceptor.web_tx = Some(tx);
1945
1946 interceptor.process_content("hello world");
1947
1948 let mut events = Vec::new();
1949 while let Ok(e) = rx.try_recv() {
1950 events.push(e);
1951 }
1952 assert_eq!(events[1].text, "WORLD");
1953 assert_eq!(events[1].original, "world");
1954 }
1955
1956 #[test]
1957 fn test_process_content_mock_transform() {
1958 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1959 let mut interceptor = make_test_interceptor();
1960 interceptor.transform = Transform::Mock;
1961 interceptor.web_tx = Some(tx);
1962
1963 interceptor.process_content("hello world");
1964
1965 let mut events = Vec::new();
1966 while let Ok(e) = rx.try_recv() {
1967 events.push(e);
1968 }
1969 assert_eq!(events[1].text, "wOrLd");
1970 }
1971
1972 #[test]
1973 fn test_process_content_noise_transform() {
1974 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
1975 let mut interceptor = make_test_interceptor();
1976 interceptor.transform = Transform::Noise;
1977 interceptor.web_tx = Some(tx);
1978
1979 interceptor.process_content("hello world");
1980
1981 let mut events = Vec::new();
1982 while let Ok(e) = rx.try_recv() {
1983 events.push(e);
1984 }
1985 assert!(events[1].text.starts_with("world"));
1986 assert_eq!(events[1].text.len(), 6); }
1988
1989 #[test]
1992 fn test_process_content_terminal_mode_no_crash() {
1993 let mut interceptor = make_test_interceptor();
1994 interceptor.process_content("hello world");
1996 assert_eq!(interceptor.token_count, 2);
1997 }
1998
1999 #[test]
2000 fn test_process_content_visual_mode_no_crash() {
2001 let mut interceptor = make_test_interceptor();
2002 interceptor.visual_mode = true;
2003 interceptor.process_content("hello world");
2004 assert_eq!(interceptor.token_count, 2);
2005 }
2006
2007 #[test]
2008 fn test_process_content_heatmap_mode_no_crash() {
2009 let mut interceptor = make_test_interceptor();
2010 interceptor.heatmap_mode = true;
2011 interceptor.process_content("hello world");
2012 assert_eq!(interceptor.token_count, 2);
2013 }
2014
2015 #[test]
2018 fn test_chaos_label_set_for_chaos_transform() {
2019 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2020 let mut interceptor = make_test_interceptor();
2021 interceptor.transform = Transform::Chaos;
2022 interceptor.web_tx = Some(tx);
2023
2024 interceptor.process_content("hello world");
2025
2026 let mut events = Vec::new();
2027 while let Ok(e) = rx.try_recv() {
2028 events.push(e);
2029 }
2030 let known = ["reverse", "uppercase", "mock", "noise"];
2032 let odd = events
2033 .iter()
2034 .find(|e| e.transformed)
2035 .expect("should have odd token");
2036 let label = odd
2037 .chaos_label
2038 .as_ref()
2039 .expect("chaos_label should be Some for Chaos transform");
2040 assert!(
2041 known.contains(&label.as_str()),
2042 "unexpected label: {}",
2043 label
2044 );
2045 }
2046
2047 #[test]
2048 fn test_chaos_label_none_for_reverse_transform() {
2049 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2050 let mut interceptor = make_test_interceptor();
2051 interceptor.transform = Transform::Reverse;
2052 interceptor.web_tx = Some(tx);
2053
2054 interceptor.process_content("hello world");
2055
2056 let mut events = Vec::new();
2057 while let Ok(e) = rx.try_recv() {
2058 events.push(e);
2059 }
2060 for event in &events {
2061 assert!(
2062 event.chaos_label.is_none(),
2063 "Reverse should not set chaos_label"
2064 );
2065 }
2066 }
2067
2068 #[test]
2069 fn test_chaos_label_none_for_even_tokens() {
2070 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2071 let mut interceptor = make_test_interceptor();
2072 interceptor.transform = Transform::Chaos;
2073 interceptor.web_tx = Some(tx);
2074
2075 interceptor.process_content("hello world foo bar");
2076
2077 let mut events = Vec::new();
2078 while let Ok(e) = rx.try_recv() {
2079 events.push(e);
2080 }
2081 for event in events.iter().filter(|e| !e.transformed) {
2082 assert!(
2083 event.chaos_label.is_none(),
2084 "Even tokens should not have chaos_label"
2085 );
2086 }
2087 }
2088
2089 #[test]
2090 fn test_chaos_label_serialization() {
2091 let event = TokenEvent {
2092 text: "dlrow".to_string(),
2093 original: "world".to_string(),
2094 index: 1,
2095 transformed: true,
2096 importance: 0.5,
2097 chaos_label: Some("reverse".to_string()),
2098 provider: None,
2099 confidence: None,
2100 perplexity: None,
2101 alternatives: vec![],
2102 is_error: false,
2103 arrival_ms: None,
2104 };
2105 let json = serde_json::to_string(&event).expect("serialize");
2106 assert!(json.contains("chaos_label"));
2107 assert!(json.contains("reverse"));
2108 }
2109
2110 #[test]
2111 fn test_chaos_label_skipped_when_none() {
2112 let event = TokenEvent {
2113 text: "hello".to_string(),
2114 original: "hello".to_string(),
2115 index: 0,
2116 transformed: false,
2117 importance: 0.3,
2118 chaos_label: None,
2119 provider: None,
2120 confidence: None,
2121 perplexity: None,
2122 alternatives: vec![],
2123 is_error: false,
2124 arrival_ms: None,
2125 };
2126 let json = serde_json::to_string(&event).expect("serialize");
2127 assert!(
2128 !json.contains("chaos_label"),
2129 "None chaos_label should be skipped in JSON"
2130 );
2131 }
2132
2133 #[test]
2136 fn test_provider_label_none_by_default() {
2137 let interceptor = make_test_interceptor();
2138 assert!(interceptor.web_provider_label.is_none());
2139 }
2140
2141 #[test]
2142 fn test_provider_label_propagates_to_event() {
2143 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2144 let mut interceptor = make_test_interceptor();
2145 interceptor.web_tx = Some(tx);
2146 interceptor.web_provider_label = Some("openai".to_string());
2147
2148 interceptor.process_content("hello world");
2149
2150 let mut events = Vec::new();
2151 while let Ok(e) = rx.try_recv() {
2152 events.push(e);
2153 }
2154 for event in &events {
2155 assert_eq!(
2156 event.provider.as_deref(),
2157 Some("openai"),
2158 "provider label should propagate to all events"
2159 );
2160 }
2161 }
2162
2163 #[test]
2164 fn test_provider_label_none_means_skipped_in_json() {
2165 let event = TokenEvent {
2166 text: "hello".to_string(),
2167 original: "hello".to_string(),
2168 index: 0,
2169 transformed: false,
2170 importance: 0.5,
2171 chaos_label: None,
2172 provider: None,
2173 confidence: None,
2174 perplexity: None,
2175 alternatives: vec![],
2176 is_error: false,
2177 arrival_ms: None,
2178 };
2179 let json = serde_json::to_string(&event).expect("serialize");
2180 assert!(
2181 !json.contains("\"provider\""),
2182 "None provider should be skipped in JSON"
2183 );
2184 }
2185
2186 #[test]
2187 fn test_provider_label_some_appears_in_json() {
2188 let event = TokenEvent {
2189 text: "hello".to_string(),
2190 original: "hello".to_string(),
2191 index: 0,
2192 transformed: false,
2193 importance: 0.5,
2194 chaos_label: None,
2195 provider: Some("anthropic".to_string()),
2196 confidence: None,
2197 perplexity: None,
2198 alternatives: vec![],
2199 is_error: false,
2200 arrival_ms: None,
2201 };
2202 let json = serde_json::to_string(&event).expect("serialize");
2203 assert!(json.contains("\"provider\""));
2204 assert!(json.contains("anthropic"));
2205 }
2206
2207 #[test]
2208 fn test_provider_label_openai_and_anthropic_distinct() {
2209 let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2210 let mut openai_i = make_test_interceptor();
2211 openai_i.web_tx = Some(tx1);
2212 openai_i.web_provider_label = Some("openai".to_string());
2213
2214 let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2215 let mut anthropic_i = make_test_interceptor();
2216 anthropic_i.web_tx = Some(tx2);
2217 anthropic_i.web_provider_label = Some("anthropic".to_string());
2218
2219 openai_i.process_content("hello");
2220 anthropic_i.process_content("hello");
2221
2222 let e1 = rx1.try_recv().expect("openai event");
2223 let e2 = rx2.try_recv().expect("anthropic event");
2224 assert_eq!(e1.provider.as_deref(), Some("openai"));
2225 assert_eq!(e2.provider.as_deref(), Some("anthropic"));
2226 assert_ne!(e1.provider, e2.provider);
2227 }
2228
2229 #[test]
2232 fn test_token_alternative_serializes() {
2233 let alt = TokenAlternative {
2234 token: "hello".to_string(),
2235 probability: 0.75,
2236 };
2237 let json = serde_json::to_string(&alt).expect("serialize");
2238 assert!(json.contains("\"token\":\"hello\""));
2239 assert!(json.contains("\"probability\":0.75") || json.contains("probability"));
2240 }
2241
2242 #[test]
2243 fn test_token_alternative_clone() {
2244 let alt = TokenAlternative {
2245 token: "world".to_string(),
2246 probability: 0.5,
2247 };
2248 let alt2 = alt.clone();
2249 assert_eq!(alt2.token, alt.token);
2250 }
2251
2252 #[test]
2255 fn test_process_content_logprob_attaches_confidence() {
2256 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2257 let mut i = make_test_interceptor();
2258 i.web_tx = Some(tx);
2259 i.process_content_logprob("hello world", Some(0.0_f32), vec![]);
2261 let ev = rx.try_recv().expect("event");
2262 assert_eq!(ev.confidence, Some(1.0_f32));
2263 }
2264
2265 #[test]
2266 fn test_process_content_logprob_none_gives_none_confidence() {
2267 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2268 let mut i = make_test_interceptor();
2269 i.web_tx = Some(tx);
2270 i.process_content_logprob("hello", None, vec![]);
2271 let ev = rx.try_recv().expect("event");
2272 assert!(ev.confidence.is_none());
2273 assert!(ev.perplexity.is_none());
2274 }
2275
2276 #[test]
2277 fn test_process_content_logprob_computes_perplexity() {
2278 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2279 let mut i = make_test_interceptor();
2280 i.web_tx = Some(tx);
2281 i.process_content_logprob("word", Some(-1.0_f32), vec![]);
2283 let ev = rx.try_recv().expect("event");
2284 let perp = ev.perplexity.expect("perplexity present");
2285 assert!(
2286 (perp - std::f32::consts::E).abs() < 0.01,
2287 "expected ~e, got {}",
2288 perp
2289 );
2290 }
2291
2292 #[test]
2293 fn test_process_content_logprob_attaches_alternatives_to_first_token() {
2294 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2295 let mut i = make_test_interceptor();
2296 i.web_tx = Some(tx);
2297 let alts = vec![
2298 TokenAlternative {
2299 token: "hi".to_string(),
2300 probability: 0.9,
2301 },
2302 TokenAlternative {
2303 token: "hey".to_string(),
2304 probability: 0.05,
2305 },
2306 ];
2307 i.process_content_logprob("hello world", Some(-0.1_f32), alts);
2308 let first = rx.try_recv().expect("first token");
2309 assert_eq!(first.alternatives.len(), 2);
2310 let second = rx.try_recv().expect("second token");
2312 assert!(second.alternatives.is_empty());
2313 }
2314
2315 #[test]
2316 fn test_process_content_delegates_to_logprob() {
2317 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2319 let mut i = make_test_interceptor();
2320 i.web_tx = Some(tx);
2321 i.process_content("hello");
2322 let ev = rx.try_recv().expect("event");
2323 assert!(ev.confidence.is_none());
2324 assert!(ev.alternatives.is_empty());
2325 }
2326
2327 #[test]
2328 fn test_confidence_serialized_when_some() {
2329 let event = TokenEvent {
2330 text: "hi".to_string(),
2331 original: "hi".to_string(),
2332 index: 0,
2333 transformed: false,
2334 importance: 0.5,
2335 chaos_label: None,
2336 provider: None,
2337 confidence: Some(0.92),
2338 perplexity: Some(1.08),
2339 alternatives: vec![TokenAlternative {
2340 token: "hey".to_string(),
2341 probability: 0.05,
2342 }],
2343 is_error: false,
2344 arrival_ms: None,
2345 };
2346 let json = serde_json::to_string(&event).expect("serialize");
2347 assert!(json.contains("confidence"));
2348 assert!(json.contains("perplexity"));
2349 assert!(json.contains("alternatives"));
2350 assert!(json.contains("hey"));
2351 }
2352
2353 #[test]
2354 fn test_confidence_omitted_when_none() {
2355 let event = TokenEvent {
2356 text: "hi".to_string(),
2357 original: "hi".to_string(),
2358 index: 0,
2359 transformed: false,
2360 importance: 0.5,
2361 chaos_label: None,
2362 provider: None,
2363 confidence: None,
2364 perplexity: None,
2365 alternatives: vec![],
2366 is_error: false,
2367 arrival_ms: None,
2368 };
2369 let json = serde_json::to_string(&event).expect("serialize");
2370 assert!(!json.contains("confidence"));
2371 assert!(!json.contains("perplexity"));
2372 assert!(!json.contains("alternatives"));
2373 }
2374
2375 #[test]
2376 fn test_system_prompt_field_initializes_none() {
2377 let i = make_test_interceptor();
2378 assert!(i.system_prompt.is_none());
2379 }
2380
2381 #[test]
2382 fn test_system_prompt_can_be_set() {
2383 let mut i = make_test_interceptor();
2384 i.system_prompt = Some("Be concise.".to_string());
2385 assert_eq!(i.system_prompt.as_deref(), Some("Be concise."));
2386 }
2387
2388 #[test]
2389 fn test_logprob_confidence_clamps_at_one() {
2390 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2391 let mut i = make_test_interceptor();
2392 i.web_tx = Some(tx);
2393 i.process_content_logprob("token", Some(2.0_f32), vec![]);
2395 let ev = rx.try_recv().expect("event");
2396 let conf = ev.confidence.expect("confidence");
2397 assert!(conf <= 1.0, "confidence should not exceed 1.0");
2398 }
2399
2400 #[test]
2401 fn test_process_content_logprob_multiple_tokens_only_first_gets_logprob() {
2402 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2403 let mut i = make_test_interceptor();
2404 i.web_tx = Some(tx);
2405 i.process_content_logprob("the quick brown fox", Some(-0.5_f32), vec![]);
2406 let mut events: Vec<TokenEvent> = Vec::new();
2407 while let Ok(ev) = rx.try_recv() {
2408 events.push(ev);
2409 }
2410 assert!(events.len() >= 2);
2411 assert!(
2412 events[0].confidence.is_some(),
2413 "first token should have confidence"
2414 );
2415 assert!(
2416 events[1].confidence.is_none(),
2417 "subsequent tokens should not"
2418 );
2419 }
2420}
2421
2422#[cfg(test)]
2423mod research_tests {
2424 use super::*;
2425
2426 fn make_session(
2427 tokens: usize,
2428 confidence: Option<f32>,
2429 perplexity: Option<f32>,
2430 ) -> ResearchSession {
2431 ResearchSession {
2432 prompt: "test prompt".to_string(),
2433 provider: "openai".to_string(),
2434 model: "gpt-3.5-turbo".to_string(),
2435 transform: "Reverse".to_string(),
2436 runs: 1,
2437 total_tokens: tokens,
2438 total_transformed: tokens / 2,
2439 vocabulary_diversity: 0.8,
2440 mean_token_length: 4.5,
2441 mean_perplexity: perplexity.map(|p| p as f64),
2442 mean_confidence: confidence.map(|c| c as f64),
2443 top_perplexity_tokens: vec!["word".to_string()],
2444 estimated_cost_usd: tokens as f64 / 1000.0 * 0.002,
2445 citation: format!("Every Other Token v4.0.0 | tokens={}", tokens),
2446 }
2447 }
2448
2449 fn make_test_interceptor() -> TokenInterceptor {
2453 TokenInterceptor {
2454 client: reqwest::Client::new(),
2455 api_key: "test-key".to_string(),
2456 provider: Provider::Openai,
2457 transform: Transform::Reverse,
2458 model: "test-model".to_string(),
2459 token_count: 0,
2460 transformed_count: 0,
2461 visual_mode: false,
2462 heatmap_mode: false,
2463 orchestrator: false,
2464 orchestrator_url: "http://localhost:3000".to_string(),
2465 web_tx: None,
2466 web_provider_label: None,
2467 system_prompt: None,
2468 #[cfg(feature = "self-tune")]
2469 telemetry_bus: None,
2470 #[cfg(feature = "self-modify")]
2471 dedup: None,
2472 rate: 0.5,
2473 rng: StdRng::seed_from_u64(42),
2474 top_logprobs: 5,
2475 recorder: None,
2476 json_stream: false,
2477 pending_delay_ms: 0,
2478 min_confidence: None,
2479 last_token_instant: None,
2480 max_retries: 3,
2481 anthropic_max_tokens: 4096,
2482 stream_start_instant: None,
2483 timeout_secs: None,
2484 }
2485 }
2486
2487 #[test]
2488 fn test_research_session_serializes_basic_fields() {
2489 let s = make_session(10, Some(0.85), Some(2.3));
2490 let json = serde_json::to_string(&s).expect("serialize");
2491 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2492 assert_eq!(v["prompt"], "test prompt");
2493 assert_eq!(v["total_tokens"], 10);
2494 assert_eq!(v["runs"], 1);
2495 assert_eq!(v["provider"], "openai");
2496 }
2497
2498 #[test]
2499 fn test_research_session_none_fields_serialize_as_null() {
2500 let s = make_session(5, None, None);
2501 let json = serde_json::to_string(&s).expect("serialize");
2502 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2503 assert!(v["mean_perplexity"].is_null());
2504 assert!(v["mean_confidence"].is_null());
2505 }
2506
2507 #[test]
2508 fn test_research_session_estimated_cost_scales_with_tokens() {
2509 let s100 = make_session(100, None, None);
2510 let s1000 = make_session(1000, None, None);
2511 assert!(s1000.estimated_cost_usd > s100.estimated_cost_usd);
2512 assert!((s100.estimated_cost_usd - 0.0002).abs() < 1e-10);
2513 assert!((s1000.estimated_cost_usd - 0.002).abs() < 1e-10);
2514 }
2515
2516 #[test]
2517 fn test_research_session_vocab_diversity_in_bounds() {
2518 let s = make_session(20, None, None);
2519 assert!(s.vocabulary_diversity >= 0.0 && s.vocabulary_diversity <= 1.0);
2520 }
2521
2522 #[test]
2523 fn test_research_session_top_tokens_at_most_ten() {
2524 let s = ResearchSession {
2525 top_perplexity_tokens: (0..10).map(|i| format!("t{}", i)).collect(),
2526 ..make_session(100, None, None)
2527 };
2528 assert_eq!(s.top_perplexity_tokens.len(), 10);
2529 }
2530
2531 #[test]
2532 fn test_research_session_citation_contains_prompt() {
2533 let s = make_session(5, None, None);
2534 assert!(s.citation.contains("Every Other Token"));
2535 }
2536
2537 #[test]
2538 fn test_research_session_runs_field_roundtrips() {
2539 let s = ResearchSession {
2540 runs: 42,
2541 ..make_session(10, None, None)
2542 };
2543 let json = serde_json::to_string(&s).expect("serialize");
2544 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
2545 assert_eq!(v["runs"], 42);
2546 }
2547
2548 #[test]
2549 fn test_research_session_transform_field() {
2550 let s = make_session(10, None, None);
2551 assert_eq!(s.transform, "Reverse");
2552 }
2553
2554 #[test]
2557 fn test_with_rate_sets_rate() {
2558 let mut i = make_test_interceptor();
2559 i = i.with_rate(0.3);
2560 assert!((i.rate - 0.3).abs() < 1e-9);
2561 }
2562
2563 #[test]
2564 fn test_with_rate_clamps_above_one() {
2565 let mut i = make_test_interceptor();
2566 i = i.with_rate(1.5);
2567 assert_eq!(i.rate, 1.0);
2568 }
2569
2570 #[test]
2571 fn test_with_rate_clamps_below_zero() {
2572 let mut i = make_test_interceptor();
2573 i = i.with_rate(-0.5);
2574 assert_eq!(i.rate, 0.0);
2575 }
2576
2577 #[test]
2578 fn test_with_rate_zero_transforms_no_tokens() {
2579 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2580 let mut i = make_test_interceptor();
2581 i = i.with_rate(0.0);
2582 i.web_tx = Some(tx);
2583 i.process_content("hello world foo bar");
2584 let mut transformed = 0usize;
2585 while let Ok(ev) = rx.try_recv() {
2586 if ev.transformed {
2587 transformed += 1;
2588 }
2589 }
2590 assert_eq!(transformed, 0, "rate=0 should transform no tokens");
2591 }
2592
2593 #[test]
2594 fn test_with_rate_one_transforms_all_tokens() {
2595 let (tx, mut rx) = mpsc::unbounded_channel::<TokenEvent>();
2596 let mut i = make_test_interceptor();
2597 i = i.with_rate(1.0);
2598 i.web_tx = Some(tx);
2599 i.process_content("hello world foo bar baz");
2600 let mut total = 0usize;
2601 let mut transformed = 0usize;
2602 while let Ok(ev) = rx.try_recv() {
2603 total += 1;
2604 if ev.transformed {
2605 transformed += 1;
2606 }
2607 }
2608 assert!(total > 0);
2609 assert_eq!(transformed, total, "rate=1.0 should transform every token");
2610 }
2611
2612 #[test]
2615 fn test_with_seed_produces_deterministic_noise_output() {
2616 let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2619 let mut i1 = make_test_interceptor();
2620 i1.transform = Transform::Noise;
2621 i1 = i1.with_seed(12345);
2622 i1.web_tx = Some(tx1);
2623 i1.process_content("hello world");
2624
2625 let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2626 let mut i2 = make_test_interceptor();
2627 i2.transform = Transform::Noise;
2628 i2 = i2.with_seed(12345);
2629 i2.web_tx = Some(tx2);
2630 i2.process_content("hello world");
2631
2632 let events1: Vec<TokenEvent> = std::iter::from_fn(|| rx1.try_recv().ok()).collect();
2633 let events2: Vec<TokenEvent> = std::iter::from_fn(|| rx2.try_recv().ok()).collect();
2634
2635 assert_eq!(events1.len(), events2.len());
2636 for (e1, e2) in events1.iter().zip(events2.iter()) {
2637 assert_eq!(
2638 e1.text, e2.text,
2639 "seeded runs should produce identical output"
2640 );
2641 }
2642 }
2643
2644 #[test]
2645 fn test_with_seed_different_seeds_may_differ() {
2646 let (tx1, mut rx1) = mpsc::unbounded_channel::<TokenEvent>();
2649 let mut i1 = make_test_interceptor();
2650 i1.transform = Transform::Noise;
2651 i1 = i1.with_seed(1);
2652 i1.web_tx = Some(tx1);
2653 i1.process_content("alpha beta gamma delta epsilon zeta eta theta iota kappa");
2654
2655 let (tx2, mut rx2) = mpsc::unbounded_channel::<TokenEvent>();
2656 let mut i2 = make_test_interceptor();
2657 i2.transform = Transform::Noise;
2658 i2 = i2.with_seed(999999);
2659 i2.web_tx = Some(tx2);
2660 i2.process_content("alpha beta gamma delta epsilon zeta eta theta iota kappa");
2661
2662 let texts1: Vec<String> = std::iter::from_fn(|| rx1.try_recv().ok())
2663 .map(|e| e.text)
2664 .collect();
2665 let texts2: Vec<String> = std::iter::from_fn(|| rx2.try_recv().ok())
2666 .map(|e| e.text)
2667 .collect();
2668
2669 assert!(!texts1.is_empty());
2671 assert!(!texts2.is_empty());
2672 }
2673
2674 #[tokio::test]
2677 async fn test_run_research_headless_mock_returns_session() {
2678 let session = run_research_headless(
2679 "test prompt",
2680 Provider::Mock,
2681 Transform::Reverse,
2682 "mock-fixture-v1".to_string(),
2683 1,
2684 )
2685 .await
2686 .expect("run_research_headless with Mock should not fail");
2687 assert_eq!(session.runs, 1);
2688 assert_eq!(session.prompt, "test prompt");
2689 assert_eq!(session.provider, "mock");
2690 }
2691
2692 #[tokio::test]
2693 async fn test_run_research_headless_mock_token_count_positive() {
2694 let session = run_research_headless(
2695 "hello",
2696 Provider::Mock,
2697 Transform::Uppercase,
2698 "mock-fixture-v1".to_string(),
2699 1,
2700 )
2701 .await
2702 .expect("should succeed");
2703 assert!(session.total_tokens > 0, "mock provider should emit tokens");
2704 }
2705
2706 #[tokio::test]
2707 async fn test_run_research_headless_mock_multiple_runs_accumulate() {
2708 let session = run_research_headless(
2709 "hello",
2710 Provider::Mock,
2711 Transform::Reverse,
2712 "mock-fixture-v1".to_string(),
2713 3,
2714 )
2715 .await
2716 .expect("should succeed");
2717 assert_eq!(session.runs, 3);
2718 }
2719
2720 #[tokio::test]
2721 async fn test_run_research_headless_mock_vocab_diversity_in_bounds() {
2722 let session = run_research_headless(
2723 "test",
2724 Provider::Mock,
2725 Transform::Reverse,
2726 "mock-fixture-v1".to_string(),
2727 1,
2728 )
2729 .await
2730 .expect("should succeed");
2731 assert!(session.vocabulary_diversity >= 0.0);
2732 assert!(session.vocabulary_diversity <= 1.0);
2733 }
2734
2735 #[tokio::test]
2736 async fn test_run_research_headless_mock_transform_label_in_citation() {
2737 let session = run_research_headless(
2738 "sample",
2739 Provider::Mock,
2740 Transform::Uppercase,
2741 "mock-fixture-v1".to_string(),
2742 1,
2743 )
2744 .await
2745 .expect("should succeed");
2746 assert!(session.citation.contains("Every Other Token"));
2747 }
2748
2749 #[tokio::test]
2750 async fn test_run_research_headless_empty_prompt_returns_error() {
2751 let result = run_research_headless(
2752 "",
2753 Provider::Mock,
2754 Transform::Reverse,
2755 "mock-fixture-v1".to_string(),
2756 1,
2757 )
2758 .await;
2759 assert!(result.is_err(), "empty prompt should produce an error");
2760 }
2761
2762 #[test]
2764 fn test_timeout_field_default() {
2765 let interceptor = TokenInterceptor::new(
2766 Provider::Mock,
2767 Transform::Reverse,
2768 "mock-fixture-v1".to_string(),
2769 false,
2770 false,
2771 false,
2772 )
2773 .unwrap();
2774 assert_eq!(interceptor.timeout_secs, None);
2775 let with_timeout = interceptor.with_timeout(120);
2776 assert_eq!(with_timeout.timeout_secs, Some(120));
2777 }
2778
2779 fn count_dropped_sse_chunks_test(lines: &[&str]) -> usize {
2781 lines.iter().filter(|line| {
2782 if line.starts_with("data: ") && **line != "data: [DONE]" {
2783 let json_str = line.strip_prefix("data: ").unwrap_or(line);
2784 serde_json::from_str::<serde_json::Value>(json_str).is_err()
2785 } else {
2786 false
2787 }
2788 }).count()
2789 }
2790
2791 #[test]
2792 fn test_dropped_chunk_counter_increments() {
2793 let lines = vec![
2794 "data: {\"valid\": true}",
2795 "data: not-valid-json",
2796 "data: also-bad",
2797 "data: {\"ok\": 1}",
2798 "data: [DONE]",
2799 ];
2800 let dropped = count_dropped_sse_chunks_test(&lines);
2801 assert_eq!(dropped, 2);
2802 }
2803
2804 fn reset_circuit_breaker_for_test() {
2806 let state = CIRCUIT_BREAKER.get_or_init(|| {
2807 std::sync::Mutex::new(CircuitBreakerState {
2808 consecutive_failures: 0,
2809 open_until_ms: 0,
2810 })
2811 });
2812 if let Ok(mut s) = state.lock() {
2813 s.consecutive_failures = 0;
2814 s.open_until_ms = 0;
2815 }
2816 }
2817
2818 #[test]
2819 fn test_circuit_breaker_429_does_not_trip() {
2820 reset_circuit_breaker_for_test();
2821 for _ in 0..(CB_TRIP_THRESHOLD - 1) {
2823 circuit_record_failure();
2824 }
2825 assert!(!circuit_is_open(), "should not be open before threshold");
2826 assert!(!circuit_is_open(), "429 should not trip the breaker");
2829 }
2830
2831 #[test]
2832 fn test_circuit_breaker_reopens_after_timeout() {
2833 reset_circuit_breaker_for_test();
2834 for _ in 0..CB_TRIP_THRESHOLD {
2835 circuit_record_failure();
2836 }
2837 assert!(circuit_is_open(), "breaker should be open after threshold");
2838 let state = CIRCUIT_BREAKER.get_or_init(|| {
2840 std::sync::Mutex::new(CircuitBreakerState {
2841 consecutive_failures: 0,
2842 open_until_ms: 0,
2843 })
2844 });
2845 if let Ok(mut s) = state.lock() {
2846 s.open_until_ms = 1; }
2848 assert!(!circuit_is_open(), "breaker should close after recovery timeout passes");
2849 }
2850}
2851
2852#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2857mod wasm_support {
2858 use wasm_bindgen::prelude::*;
2859
2860 #[wasm_bindgen]
2861 pub fn wasm_run() -> JsValue {
2862 JsValue::from_str("wasm not yet fully implemented")
2863 }
2864}
2865
2866#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2867pub use wasm_support::wasm_run;