llmtrace 0.1.2 - Docs.rs

//! Server-Sent Events (SSE) streaming support.
//!
//! Parses OpenAI-compatible SSE chunks as they arrive, extracts token data
//! incrementally, and tracks streaming metrics (TTFT, completion tokens).
//!
//! # Streaming Security Monitor
//!
//! [`StreamingSecurityMonitor`] provides lightweight, incremental regex-based
//! security analysis that runs *during* SSE streaming. Every N tokens
//! (configurable via [`StreamingAnalysisConfig`](llmtrace_core::StreamingAnalysisConfig))
//! the monitor scans the new content accumulated since the last check for
//! injection patterns, PII, and data-leakage indicators. Any findings are
//! tagged with `"detection": "streaming"` metadata so downstream consumers
//! can distinguish them from the full post-stream analysis.

use llmtrace_core::{
    OutputSafetyConfig, SecurityFinding, SecuritySeverity, StreamingAnalysisConfig,
};
use llmtrace_security::RegexSecurityAnalyzer;
use serde::Deserialize;

// ---------------------------------------------------------------------------
// SSE line parsing
// ---------------------------------------------------------------------------

/// Extract the JSON payload from an SSE `data:` line.
///
/// Returns `None` for blank lines, comment lines, non-data fields,
/// and the terminal `data: [DONE]` sentinel.
pub fn extract_sse_data(line: &str) -> Option<&str> {
    let trimmed = line.trim();
    if trimmed.is_empty() || trimmed.starts_with(':') {
        return None;
    }
    let payload = trimmed.strip_prefix("data:")?;
    let payload = payload.trim_start();
    if payload == "[DONE]" {
        return None;
    }
    Some(payload)
}

// ---------------------------------------------------------------------------
// OpenAI-compatible SSE chunk types
// ---------------------------------------------------------------------------

/// Minimal representation of an OpenAI streaming chunk.
#[derive(Debug, Deserialize)]
pub struct SseChunk {
    /// Choices returned in this chunk.
    #[serde(default)]
    pub choices: Vec<SseChoice>,
    /// Usage data (only present in the final chunk for some providers).
    #[serde(default)]
    pub usage: Option<SseUsage>,
}

/// A single choice within an SSE chunk.
#[derive(Debug, Deserialize)]
pub struct SseChoice {
    /// The delta content for this chunk.
    #[serde(default)]
    pub delta: Option<SseDelta>,
    /// Finish reason (e.g. `"stop"`). `None` while streaming.
    /// Present in the SSE protocol but not read directly — the accumulator
    /// detects stream completion via the `[DONE]` sentinel instead.
    #[allow(dead_code)]
    pub finish_reason: Option<String>,
}

/// Delta content within a streaming choice.
#[derive(Debug, Deserialize)]
pub struct SseDelta {
    /// The token text (may be absent for role-only or empty deltas).
    pub content: Option<String>,
}

/// Token usage data that some providers include in the final chunk.
#[derive(Debug, Deserialize)]
pub struct SseUsage {
    pub prompt_tokens: Option<u32>,
    pub completion_tokens: Option<u32>,
    pub total_tokens: Option<u32>,
}

// ---------------------------------------------------------------------------
// Streaming accumulator
// ---------------------------------------------------------------------------

/// Accumulates data from a streaming SSE response.
///
/// Feed it raw bytes from the upstream response; it splits on newlines,
/// parses SSE data lines, extracts token content, and tracks metrics.
pub struct StreamingAccumulator {
    /// Buffer for incomplete lines across chunk boundaries.
    line_buffer: String,
    /// All response content tokens concatenated.
    pub content: String,
    /// Number of completion tokens observed (each non-empty delta counts as 1).
    pub completion_token_count: u32,
    /// Whether the first content token has been received.
    pub first_token_received: bool,
    /// Provider-reported usage (if present in the final chunk).
    pub reported_usage: Option<SseUsage>,
    /// Whether the stream has ended (`data: [DONE]` received).
    pub done: bool,
}

impl StreamingAccumulator {
    /// Create a new empty accumulator.
    pub fn new() -> Self {
        Self {
            line_buffer: String::new(),
            content: String::new(),
            completion_token_count: 0,
            first_token_received: false,
            reported_usage: None,
            done: false,
        }
    }

    /// Process a raw byte chunk from the upstream response.
    ///
    /// Returns `true` if this chunk contained the first content token
    /// (useful for recording TTFT).
    pub fn process_chunk(&mut self, bytes: &[u8]) -> bool {
        let text = String::from_utf8_lossy(bytes);
        self.line_buffer.push_str(&text);

        let mut first_token_in_this_chunk = false;

        // Process all complete lines (terminated by '\n')
        while let Some(newline_pos) = self.line_buffer.find('\n') {
            let line: String = self.line_buffer[..newline_pos].to_string();
            self.line_buffer = self.line_buffer[newline_pos + 1..].to_string();

            // Check for [DONE]
            let trimmed = line.trim();
            if trimmed.strip_prefix("data:").map(|s| s.trim()) == Some("[DONE]") {
                self.done = true;
                continue;
            }

            if let Some(json_str) = extract_sse_data(&line) {
                if let Ok(chunk) = serde_json::from_str::<SseChunk>(json_str) {
                    // Extract content tokens
                    for choice in &chunk.choices {
                        if let Some(delta) = &choice.delta {
                            if let Some(ref token_text) = delta.content {
                                if !token_text.is_empty() {
                                    if !self.first_token_received {
                                        self.first_token_received = true;
                                        first_token_in_this_chunk = true;
                                    }
                                    self.content.push_str(token_text);
                                    self.completion_token_count += 1;
                                }
                            }
                        }
                    }

                    // Capture provider-reported usage from final chunk
                    if let Some(usage) = chunk.usage {
                        self.reported_usage = Some(usage);
                    }
                }
            }
        }

        first_token_in_this_chunk
    }

    /// Get the final completion token count.
    ///
    /// Prefers the provider-reported count if available, otherwise uses
    /// the observed delta count.
    pub fn final_completion_tokens(&self) -> u32 {
        self.reported_usage
            .as_ref()
            .and_then(|u| u.completion_tokens)
            .unwrap_or(self.completion_token_count)
    }

    /// Get the provider-reported prompt token count, if available.
    pub fn prompt_tokens(&self) -> Option<u32> {
        self.reported_usage.as_ref().and_then(|u| u.prompt_tokens)
    }

    /// Get the provider-reported total token count, if available.
    pub fn total_tokens(&self) -> Option<u32> {
        self.reported_usage.as_ref().and_then(|u| u.total_tokens)
    }
}

impl Default for StreamingAccumulator {
    fn default() -> Self {
        Self::new()
    }
}

// ---------------------------------------------------------------------------
// Streaming security monitor
// ---------------------------------------------------------------------------

/// Incremental security monitor that runs regex pattern checks during SSE
/// streaming.
///
/// Feed it the accumulated content buffer and the current token count after
/// each SSE chunk. When the token count crosses the configured interval
/// boundary it runs the regex analyzer on new content since the last check,
/// producing interim [`SecurityFinding`]s tagged with `"detection": "streaming"`.
///
/// # Design
///
/// * **Lightweight** — only regex patterns, no ML or ensemble overhead.
/// * **Non-blocking** — runs synchronously on the accumulated buffer between
///   chunk forwards so it does not slow down SSE passthrough.
/// * **Early warning** — the full `SecurityAnalyzer` still runs after stream
///   completion; this is an additive detection layer.
pub struct StreamingSecurityMonitor {
    /// Compiled regex analyzer shared for fast pattern checks.
    analyzer: RegexSecurityAnalyzer,
    /// Byte offset into the accumulated content buffer that was already checked.
    last_checked_offset: usize,
    /// Token count at which the last analysis was triggered.
    last_analyzed_token_count: u32,
    /// Token interval between incremental analyses.
    token_interval: u32,
    /// Whether the monitor is active.
    enabled: bool,
    /// All findings produced so far during streaming.
    findings: Vec<SecurityFinding>,
}

impl StreamingSecurityMonitor {
    /// Create a new monitor from configuration.
    ///
    /// Returns `None` if streaming analysis is disabled or the regex analyzer
    /// fails to initialise (should never happen in practice).
    pub fn new(config: &StreamingAnalysisConfig) -> Option<Self> {
        if !config.enabled {
            return None;
        }
        let analyzer = RegexSecurityAnalyzer::new().ok()?;
        Some(Self {
            analyzer,
            last_checked_offset: 0,
            last_analyzed_token_count: 0,
            token_interval: config.token_interval.max(1),
            enabled: true,
            findings: Vec::new(),
        })
    }

    /// Check whether an incremental analysis should be triggered based on
    /// the current completion token count.
    pub fn should_analyze(&self, current_token_count: u32) -> bool {
        if !self.enabled {
            return false;
        }
        current_token_count >= self.last_analyzed_token_count + self.token_interval
    }

    /// Run an incremental analysis on new content accumulated since the last
    /// check.
    ///
    /// * `accumulated_content` — the full accumulated response content so far.
    /// * `current_token_count` — the current completion token count.
    ///
    /// Returns any *new* findings detected in the delta since the last check.
    /// Findings are also appended to the internal findings list.
    pub fn analyze_incremental(
        &mut self,
        accumulated_content: &str,
        current_token_count: u32,
    ) -> Vec<SecurityFinding> {
        if !self.enabled || accumulated_content.len() <= self.last_checked_offset {
            return Vec::new();
        }

        // Extract the new content delta since the last check.
        let delta = &accumulated_content[self.last_checked_offset..];

        // Run lightweight regex checks on the delta.
        let mut new_findings = self.analyzer.detect_injection_patterns(delta);
        new_findings.extend(self.analyzer.detect_pii_patterns(delta));
        new_findings.extend(self.analyzer.detect_leakage_patterns(delta));

        // Tag each finding with streaming detection metadata.
        for finding in &mut new_findings {
            finding
                .metadata
                .insert("detection".to_string(), "streaming".to_string());
            if finding.location.is_none() {
                finding.location = Some("response.content.streaming".to_string());
            }
        }

        // Update bookkeeping.
        self.last_checked_offset = accumulated_content.len();
        self.last_analyzed_token_count = current_token_count;

        // Stash findings for later retrieval.
        self.findings.extend(new_findings.clone());

        new_findings
    }

    /// Drain and return all findings accumulated during the stream.
    pub fn take_findings(&mut self) -> Vec<SecurityFinding> {
        std::mem::take(&mut self.findings)
    }

    /// Return a reference to all findings accumulated so far.
    pub fn findings(&self) -> &[SecurityFinding] {
        &self.findings
    }
}

// ---------------------------------------------------------------------------
// Streaming output monitor — response-side real-time analysis (R7)
// ---------------------------------------------------------------------------

/// Incremental output safety monitor that checks LLM **response** content
/// during SSE streaming.
///
/// Extends the existing [`StreamingSecurityMonitor`] concept to the output
/// side: accumulates response tokens and periodically runs PII detection,
/// secret scanning, and (optionally) toxicity detection on the accumulated
/// text.
///
/// # Early Stopping
///
/// When `early_stop_on_critical` is enabled and a critical finding is
/// detected, [`should_early_stop`](StreamingOutputMonitor::should_early_stop)
/// returns `true`, allowing the proxy to inject a warning into the SSE
/// stream and terminate.
pub struct StreamingOutputMonitor {
    /// Regex analyzer for PII and secret detection.
    analyzer: RegexSecurityAnalyzer,
    /// Toxicity detector (keyword fallback when ML is not loaded).
    #[cfg(feature = "ml")]
    toxicity_detector: Option<llmtrace_security::ToxicityDetector>,
    /// Byte offset into the accumulated content that was already checked.
    last_checked_offset: usize,
    /// Token count at last analysis.
    last_analyzed_token_count: u32,
    /// Token interval between checks.
    token_interval: u32,
    /// Whether the monitor is active.
    enabled: bool,
    /// Whether to signal early stop on critical findings.
    early_stop_on_critical: bool,
    /// Whether early stop has been triggered.
    early_stop_triggered: bool,
    /// Toxicity threshold for output checks.
    #[cfg(feature = "ml")]
    toxicity_threshold: f32,
    /// All findings produced so far.
    findings: Vec<SecurityFinding>,
}

impl StreamingOutputMonitor {
    /// Create a new streaming output monitor.
    ///
    /// Returns `None` if output streaming analysis is disabled.
    pub fn new(
        streaming_config: &StreamingAnalysisConfig,
        output_config: &OutputSafetyConfig,
    ) -> Option<Self> {
        if !streaming_config.output_enabled || !output_config.enabled {
            return None;
        }

        let analyzer = RegexSecurityAnalyzer::new().ok()?;

        #[cfg(feature = "ml")]
        let toxicity_detector = if output_config.toxicity_enabled {
            Some(llmtrace_security::ToxicityDetector::new_fallback(
                output_config.toxicity_threshold,
            ))
        } else {
            None
        };

        Some(Self {
            analyzer,
            #[cfg(feature = "ml")]
            toxicity_detector,
            last_checked_offset: 0,
            last_analyzed_token_count: 0,
            token_interval: streaming_config.token_interval.max(1),
            enabled: true,
            early_stop_on_critical: streaming_config.early_stop_on_critical,
            early_stop_triggered: false,
            #[cfg(feature = "ml")]
            toxicity_threshold: output_config.toxicity_threshold,
            findings: Vec::new(),
        })
    }

    /// Check whether an incremental output analysis should run.
    pub fn should_analyze(&self, current_token_count: u32) -> bool {
        if !self.enabled || self.early_stop_triggered {
            return false;
        }
        current_token_count >= self.last_analyzed_token_count + self.token_interval
    }

    /// Run incremental output analysis on the new delta.
    ///
    /// Returns any new findings. Also checks for critical findings that
    /// should trigger early stopping.
    pub fn analyze_incremental(
        &mut self,
        accumulated_content: &str,
        current_token_count: u32,
    ) -> Vec<SecurityFinding> {
        if !self.enabled
            || self.early_stop_triggered
            || accumulated_content.len() <= self.last_checked_offset
        {
            return Vec::new();
        }

        let delta = &accumulated_content[self.last_checked_offset..];

        // PII detection on delta
        let mut new_findings = self.analyzer.detect_pii_patterns(delta);

        // Secret / data leakage detection on delta
        new_findings.extend(self.analyzer.detect_leakage_patterns(delta));

        // Toxicity detection on delta (if enabled)
        #[cfg(feature = "ml")]
        if let Some(ref detector) = self.toxicity_detector {
            let toxicity_findings = detector.detect_toxicity(delta, self.toxicity_threshold);
            let security_findings =
                llmtrace_security::ToxicityDetector::findings_to_security_findings(
                    &toxicity_findings,
                );
            new_findings.extend(security_findings);
        }

        // Tag findings
        for finding in &mut new_findings {
            finding
                .metadata
                .insert("detection".to_string(), "streaming_output".to_string());
            finding
                .metadata
                .insert("analysis_type".to_string(), "output_safety".to_string());
            if finding.location.is_none() {
                finding.location = Some("response.content.streaming_output".to_string());
            }
        }

        // Check for critical findings that should trigger early stop
        if self.early_stop_on_critical {
            let has_critical = new_findings.iter().any(|f| {
                f.severity == SecuritySeverity::Critical
                    || (f.finding_type == "output_toxicity"
                        && f.metadata
                            .get("toxicity_category")
                            .map(|c| c == "severe_toxic" || c == "threat")
                            .unwrap_or(false))
            });
            if has_critical {
                self.early_stop_triggered = true;
            }
        }

        // Update bookkeeping
        self.last_checked_offset = accumulated_content.len();
        self.last_analyzed_token_count = current_token_count;
        self.findings.extend(new_findings.clone());

        new_findings
    }

    /// Returns `true` if a critical finding was detected and early stop is configured.
    pub fn should_early_stop(&self) -> bool {
        self.early_stop_triggered
    }

    /// Generate an SSE warning event to inject into the stream when early stopping.
    pub fn early_stop_sse_event() -> String {
        let warning = serde_json::json!({
            "choices": [{
                "delta": {
                    "content": "\n\n[LLMTrace: Response terminated — critical safety issue detected in output]"
                },
                "finish_reason": "content_filter"
            }]
        });
        format!("data: {}\n\ndata: [DONE]\n\n", warning)
    }

    /// Drain and return all findings accumulated during the stream.
    pub fn take_findings(&mut self) -> Vec<SecurityFinding> {
        std::mem::take(&mut self.findings)
    }

    /// Return a reference to all findings accumulated so far.
    pub fn findings(&self) -> &[SecurityFinding] {
        &self.findings
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_sse_data_normal() {
        let line = r#"data: {"choices":[]}"#;
        assert_eq!(extract_sse_data(line), Some(r#"{"choices":[]}"#));
    }

    #[test]
    fn test_extract_sse_data_with_space() {
        let line = r#"data:  {"choices":[]}"#;
        assert_eq!(extract_sse_data(line), Some(r#"{"choices":[]}"#));
    }

    #[test]
    fn test_extract_sse_data_done() {
        assert_eq!(extract_sse_data("data: [DONE]"), None);
    }

    #[test]
    fn test_extract_sse_data_blank() {
        assert_eq!(extract_sse_data(""), None);
        assert_eq!(extract_sse_data("  "), None);
    }

    #[test]
    fn test_extract_sse_data_comment() {
        assert_eq!(extract_sse_data(": this is a comment"), None);
    }

    #[test]
    fn test_extract_sse_data_non_data_field() {
        assert_eq!(extract_sse_data("event: message"), None);
    }

    #[test]
    fn test_accumulator_single_chunk() {
        let mut acc = StreamingAccumulator::new();
        let chunk =
            b"data: {\"choices\":[{\"delta\":{\"content\":\"Hello\"},\"finish_reason\":null}]}\n\n";
        let first = acc.process_chunk(chunk);
        assert!(first);
        assert_eq!(acc.content, "Hello");
        assert_eq!(acc.completion_token_count, 1);
        assert!(acc.first_token_received);
    }

    #[test]
    fn test_accumulator_multiple_chunks() {
        let mut acc = StreamingAccumulator::new();

        let chunk1 =
            b"data: {\"choices\":[{\"delta\":{\"content\":\"Hello\"},\"finish_reason\":null}]}\n\n";
        let first1 = acc.process_chunk(chunk1);
        assert!(first1);

        let chunk2 = b"data: {\"choices\":[{\"delta\":{\"content\":\" world\"},\"finish_reason\":null}]}\n\n";
        let first2 = acc.process_chunk(chunk2);
        assert!(!first2); // not the first token anymore

        assert_eq!(acc.content, "Hello world");
        assert_eq!(acc.completion_token_count, 2);
    }

    #[test]
    fn test_accumulator_done_sentinel() {
        let mut acc = StreamingAccumulator::new();
        let chunk = b"data: {\"choices\":[{\"delta\":{\"content\":\"Hi\"},\"finish_reason\":null}]}\n\ndata: [DONE]\n\n";
        acc.process_chunk(chunk);
        assert!(acc.done);
        assert_eq!(acc.content, "Hi");
    }

    #[test]
    fn test_accumulator_usage_in_final_chunk() {
        let mut acc = StreamingAccumulator::new();
        let chunk = concat!(
            "data: {\"choices\":[{\"delta\":{\"content\":\"Hi\"},\"finish_reason\":null}]}\n\n",
            "data: {\"choices\":[{\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":5,\"total_tokens\":15}}\n\n",
            "data: [DONE]\n\n"
        );
        acc.process_chunk(chunk.as_bytes());

        assert_eq!(acc.final_completion_tokens(), 5); // prefers reported
        assert_eq!(acc.prompt_tokens(), Some(10));
        assert_eq!(acc.total_tokens(), Some(15));
        assert_eq!(acc.content, "Hi");
        assert!(acc.done);
    }

    #[test]
    fn test_accumulator_cross_boundary_line() {
        let mut acc = StreamingAccumulator::new();
        // Line split across two byte chunks
        let part1 = b"data: {\"choices\":[{\"delta\":{\"con";
        let part2 = b"tent\":\"Hi\"},\"finish_reason\":null}]}\n\n";
        let first1 = acc.process_chunk(part1);
        assert!(!first1); // no complete line yet
        let first2 = acc.process_chunk(part2);
        assert!(first2);
        assert_eq!(acc.content, "Hi");
    }

    #[test]
    fn test_accumulator_empty_delta() {
        let mut acc = StreamingAccumulator::new();
        // Role-only delta with no content
        let chunk = b"data: {\"choices\":[{\"delta\":{\"role\":\"assistant\"},\"finish_reason\":null}]}\n\n";
        let first = acc.process_chunk(chunk);
        assert!(!first);
        assert!(acc.content.is_empty());
        assert_eq!(acc.completion_token_count, 0);
    }

    #[test]
    fn test_accumulator_no_usage_falls_back_to_count() {
        let mut acc = StreamingAccumulator::new();
        let chunk = concat!(
            "data: {\"choices\":[{\"delta\":{\"content\":\"a\"},\"finish_reason\":null}]}\n\n",
            "data: {\"choices\":[{\"delta\":{\"content\":\"b\"},\"finish_reason\":null}]}\n\n",
            "data: {\"choices\":[{\"delta\":{\"content\":\"c\"},\"finish_reason\":null}]}\n\n",
            "data: [DONE]\n\n"
        );
        acc.process_chunk(chunk.as_bytes());
        assert_eq!(acc.final_completion_tokens(), 3);
        assert_eq!(acc.prompt_tokens(), None);
    }

    #[test]
    fn test_accumulator_default() {
        let acc = StreamingAccumulator::default();
        assert!(acc.content.is_empty());
        assert!(!acc.first_token_received);
        assert!(!acc.done);
    }

    // ---------------------------------------------------------------
    // StreamingSecurityMonitor tests
    // ---------------------------------------------------------------

    /// Helper: build an SSE data line from a content string.
    fn sse_content_line(content: &str) -> String {
        format!(
            "data: {{\"choices\":[{{\"delta\":{{\"content\":\"{content}\"}},\"finish_reason\":null}}]}}\n\n"
        )
    }

    /// Helper: build an enabled StreamingAnalysisConfig with a given token interval.
    fn enabled_config(token_interval: u32) -> StreamingAnalysisConfig {
        StreamingAnalysisConfig {
            enabled: true,
            token_interval,
            output_enabled: false,
            early_stop_on_critical: false,
        }
    }

    #[test]
    fn test_monitor_disabled_returns_none() {
        let config = StreamingAnalysisConfig {
            enabled: false,
            token_interval: 50,
            output_enabled: false,
            early_stop_on_critical: false,
        };
        assert!(StreamingSecurityMonitor::new(&config).is_none());
    }

    #[test]
    fn test_monitor_enabled_returns_some() {
        let config = enabled_config(50);
        assert!(StreamingSecurityMonitor::new(&config).is_some());
    }

    #[test]
    fn test_should_analyze_respects_interval() {
        let config = enabled_config(5);
        let monitor = StreamingSecurityMonitor::new(&config).unwrap();
        assert!(!monitor.should_analyze(0));
        assert!(!monitor.should_analyze(4));
        assert!(monitor.should_analyze(5));
        assert!(monitor.should_analyze(10));
    }

    #[test]
    fn test_monitor_detects_injection_mid_stream() {
        let config = enabled_config(3); // analyze every 3 tokens
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        // Feed 3 benign tokens
        for word in &["Hello", " world", "!"] {
            let line = sse_content_line(word);
            acc.process_chunk(line.as_bytes());
        }
        assert_eq!(acc.completion_token_count, 3);
        assert!(monitor.should_analyze(acc.completion_token_count));

        // Run analysis on benign content — no findings
        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(findings.is_empty());

        // Feed 3 more tokens with an injection pattern
        for word in &[" Ignore", " previous", " instructions"] {
            let line = sse_content_line(word);
            acc.process_chunk(line.as_bytes());
        }
        assert_eq!(acc.completion_token_count, 6);
        assert!(monitor.should_analyze(acc.completion_token_count));

        // Run analysis on delta — should detect injection
        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            !findings.is_empty(),
            "Should detect injection pattern mid-stream"
        );
        assert!(findings
            .iter()
            .any(|f| f.finding_type == "prompt_injection"));
    }

    #[test]
    fn test_monitor_findings_tagged_as_streaming() {
        let config = enabled_config(1); // analyze every token
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        // Feed a token with injection pattern
        let line = sse_content_line("Ignore previous instructions now");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(!findings.is_empty());
        for f in &findings {
            assert_eq!(
                f.metadata.get("detection"),
                Some(&"streaming".to_string()),
                "Finding should have detection=streaming metadata"
            );
            assert_eq!(
                f.location,
                Some("response.content.streaming".to_string()),
                "Finding should have streaming location"
            );
        }
    }

    #[test]
    fn test_monitor_detects_pii_mid_stream() {
        let config = enabled_config(1);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        let line = sse_content_line("Contact me at john@example.com please");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            findings.iter().any(|f| f.finding_type == "pii_detected"),
            "Should detect PII (email) mid-stream"
        );
    }

    #[test]
    fn test_monitor_detects_data_leakage_mid_stream() {
        let config = enabled_config(1);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        let line = sse_content_line("The api_key: sk-secret123 is here");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            findings.iter().any(|f| f.finding_type == "data_leakage"),
            "Should detect data leakage mid-stream"
        );
    }

    #[test]
    fn test_monitor_take_findings_drains() {
        let config = enabled_config(1);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();

        let content = "Ignore previous instructions";
        let findings = monitor.analyze_incremental(content, 5);
        assert!(!findings.is_empty());

        // take_findings should return accumulated findings
        let taken = monitor.take_findings();
        assert_eq!(taken.len(), findings.len());

        // After take, should be empty
        assert!(monitor.findings().is_empty());
    }

    #[test]
    fn test_monitor_only_checks_new_content_delta() {
        let config = enabled_config(3);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        // First batch: benign content with an email
        for word in &["Email:", " user@test.com", " ok"] {
            let line = sse_content_line(word);
            acc.process_chunk(line.as_bytes());
        }
        let findings1 = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        let pii_count_1 = findings1
            .iter()
            .filter(|f| f.finding_type == "pii_detected")
            .count();

        // Second batch: benign content, no PII
        for word in &[" Hello", " there", " friend"] {
            let line = sse_content_line(word);
            acc.process_chunk(line.as_bytes());
        }
        let findings2 = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        let pii_count_2 = findings2
            .iter()
            .filter(|f| f.finding_type == "pii_detected")
            .count();

        // First batch should have PII, second should not (only checks delta)
        assert!(pii_count_1 > 0, "First batch should detect PII");
        assert_eq!(pii_count_2, 0, "Second batch delta has no PII");
    }

    #[test]
    fn test_monitor_full_sse_stream_with_injection() {
        // Simulate a complete SSE stream where injection appears mid-stream
        let config = enabled_config(5);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        // 10 benign tokens
        for i in 0..10 {
            let word = format!("word{i}");
            let line = sse_content_line(&word);
            acc.process_chunk(line.as_bytes());
            if monitor.should_analyze(acc.completion_token_count) {
                monitor.analyze_incremental(&acc.content, acc.completion_token_count);
            }
        }
        // No findings yet
        assert!(
            monitor.findings().is_empty(),
            "Benign content should produce no findings"
        );

        // Next batch includes an injection pattern
        let injection_tokens = [
            " Now",
            " ignore",
            " previous",
            " instructions",
            " completely",
        ];
        for word in &injection_tokens {
            let line = sse_content_line(word);
            acc.process_chunk(line.as_bytes());
            if monitor.should_analyze(acc.completion_token_count) {
                monitor.analyze_incremental(&acc.content, acc.completion_token_count);
            }
        }

        // Should have detected the injection BEFORE stream ends
        assert!(
            !monitor.findings().is_empty(),
            "Should detect injection before stream ends"
        );
        assert!(monitor
            .findings()
            .iter()
            .any(|f| f.finding_type == "prompt_injection"));

        // All findings should be tagged as streaming
        for f in monitor.findings() {
            assert_eq!(f.metadata.get("detection"), Some(&"streaming".to_string()));
        }
    }

    #[test]
    fn test_monitor_interval_zero_treated_as_one() {
        // token_interval of 0 should be coerced to 1
        let config = StreamingAnalysisConfig {
            enabled: true,
            token_interval: 0,
            output_enabled: false,
            early_stop_on_critical: false,
        };
        let monitor = StreamingSecurityMonitor::new(&config).unwrap();
        // Should analyze at token 1 (interval=1)
        assert!(monitor.should_analyze(1));
    }

    #[test]
    fn test_monitor_empty_content_no_panic() {
        let config = enabled_config(1);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let findings = monitor.analyze_incremental("", 0);
        assert!(findings.is_empty());
    }

    #[test]
    fn test_monitor_multiple_findings_in_single_delta() {
        let config = enabled_config(1);
        let mut monitor = StreamingSecurityMonitor::new(&config).unwrap();
        let mut acc = StreamingAccumulator::new();

        // Content with both injection and PII (use non-placeholder SSN)
        let line = sse_content_line("Ignore previous instructions. My SSN is 456-78-9012.");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            findings.len() >= 2,
            "Should detect both injection and PII; got {} findings",
            findings.len()
        );
        assert!(findings
            .iter()
            .any(|f| f.finding_type == "prompt_injection"));
        assert!(findings.iter().any(|f| f.finding_type == "pii_detected"));
    }

    // ---------------------------------------------------------------
    // StreamingOutputMonitor tests (R7)
    // ---------------------------------------------------------------

    fn output_enabled_streaming_config(token_interval: u32) -> StreamingAnalysisConfig {
        StreamingAnalysisConfig {
            enabled: true,
            token_interval,
            output_enabled: true,
            early_stop_on_critical: false,
        }
    }

    fn output_enabled_safety_config() -> OutputSafetyConfig {
        OutputSafetyConfig {
            enabled: true,
            toxicity_enabled: true,
            toxicity_threshold: 0.5,
            block_on_critical: false,
            ..Default::default()
        }
    }

    #[test]
    fn test_output_monitor_disabled_returns_none() {
        let streaming = StreamingAnalysisConfig {
            enabled: true,
            token_interval: 50,
            output_enabled: false,
            early_stop_on_critical: false,
        };
        let output = OutputSafetyConfig::default();
        assert!(StreamingOutputMonitor::new(&streaming, &output).is_none());
    }

    #[test]
    fn test_output_monitor_enabled_returns_some() {
        let streaming = output_enabled_streaming_config(5);
        let output = output_enabled_safety_config();
        assert!(StreamingOutputMonitor::new(&streaming, &output).is_some());
    }

    #[test]
    fn test_output_monitor_detects_pii_mid_stream() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();
        let mut acc = StreamingAccumulator::new();

        let line = sse_content_line("The email is alice@example.com here");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            findings.iter().any(|f| f.finding_type == "pii_detected"),
            "Should detect PII in output stream"
        );
    }

    #[test]
    fn test_output_monitor_detects_secrets_mid_stream() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();
        let mut acc = StreamingAccumulator::new();

        let line = sse_content_line("Your key is AKIAIOSFODNN7EXAMPLE");
        acc.process_chunk(line.as_bytes());

        let findings = monitor.analyze_incremental(&acc.content, acc.completion_token_count);
        assert!(
            findings.iter().any(|f| f.finding_type == "secret_leakage"),
            "Should detect secret in output stream"
        );
    }

    #[test]
    fn test_output_monitor_findings_tagged_correctly() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        let findings = monitor.analyze_incremental("Contact alice@example.com for details", 1);
        for f in &findings {
            assert_eq!(
                f.metadata.get("detection"),
                Some(&"streaming_output".to_string()),
            );
            assert_eq!(
                f.metadata.get("analysis_type"),
                Some(&"output_safety".to_string()),
            );
        }
    }

    #[test]
    fn test_output_monitor_early_stop_on_critical() {
        let streaming = StreamingAnalysisConfig {
            enabled: true,
            token_interval: 1,
            output_enabled: true,
            early_stop_on_critical: true,
        };
        let output = OutputSafetyConfig {
            enabled: true,
            toxicity_enabled: false,
            toxicity_threshold: 0.5,
            block_on_critical: false,
            ..Default::default()
        };
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        // Inject content with a critical secret (SSH key)
        let content = "Here is the key: -----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
        let findings = monitor.analyze_incremental(content, 5);

        assert!(!findings.is_empty(), "Should detect critical secret");
        assert!(
            monitor.should_early_stop(),
            "Should trigger early stop on critical secret finding"
        );
    }

    #[test]
    fn test_output_monitor_no_early_stop_when_disabled() {
        let streaming = StreamingAnalysisConfig {
            enabled: true,
            token_interval: 1,
            output_enabled: true,
            early_stop_on_critical: false,
        };
        let output = OutputSafetyConfig {
            enabled: true,
            toxicity_enabled: false,
            toxicity_threshold: 0.5,
            block_on_critical: false,
            ..Default::default()
        };
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        let content = "Here is the key: -----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
        let _findings = monitor.analyze_incremental(content, 5);

        assert!(
            !monitor.should_early_stop(),
            "Should NOT trigger early stop when disabled"
        );
    }

    #[test]
    fn test_output_monitor_early_stop_sse_event() {
        let event = StreamingOutputMonitor::early_stop_sse_event();
        assert!(event.starts_with("data: "));
        assert!(event.contains("[DONE]"));
        assert!(event.contains("content_filter"));
    }

    #[test]
    fn test_output_monitor_take_findings_drains() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        let _ = monitor.analyze_incremental("Email: alice@example.com", 1);
        let taken = monitor.take_findings();
        assert!(!taken.is_empty());
        assert!(monitor.findings().is_empty());
    }

    #[test]
    fn test_output_monitor_benign_content_no_findings() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        let findings = monitor.analyze_incremental("The weather is nice and sunny today.", 3);
        assert!(findings.is_empty());
    }

    #[cfg(feature = "ml")]
    #[test]
    fn test_output_monitor_detects_toxicity_mid_stream() {
        let streaming = output_enabled_streaming_config(1);
        let output = output_enabled_safety_config();
        let mut monitor = StreamingOutputMonitor::new(&streaming, &output).unwrap();

        let findings = monitor.analyze_incremental("I will kill you, you worthless moron", 5);
        assert!(
            findings.iter().any(|f| f.finding_type == "output_toxicity"),
            "Should detect toxicity in output stream; findings: {:?}",
            findings.iter().map(|f| &f.finding_type).collect::<Vec<_>>()
        );
    }
}