crtx-llm 0.1.1

Claude, Ollama, and replay adapters behind a shared trait.
Documentation
//! Remote-prompt data-classification sensitivity gate.
//!
//! Any memory or context item that exceeds the operator-configured
//! [`MaxSensitivity`] level is excluded from remote prompts before they are
//! assembled. This prevents inadvertent data exfiltration to external hosted
//! models (Anthropic API, remote Ollama, etc.) when the operator has not
//! explicitly opted in to sending high-sensitivity data off-machine.
//!
//! ## Architecture (ADR 0048 §3 follow-on)
//!
//! The primary enforcement point is now a real per-memory domain-tag query:
//! before a prompt is dispatched to a remote endpoint, `cortex-cli`'s run
//! pipeline calls `MemoryRepo::max_sensitivity_for_active_memories`, parses
//! the result as a [`MaxSensitivity`], and refuses with
//! [`LlmError::InvalidRequest`] when active memories exceed the configured
//! threshold. See `crates/cortex-cli/src/cmd/run.rs` for the call site.
//!
//! [`check_remote_prompt_sensitivity`] remains as the adapter-layer fallback
//! for inline `[SENSITIVITY:HIGH]` markers. It is called inside
//! `ClaudeHttpAdapter::complete` as a defense-in-depth guard after the
//! store-query check in the run pipeline.

use std::str::FromStr;

use crate::adapter::LlmError;

/// Maximum data-classification level permitted in a remote prompt.
///
/// Variants are ordered from most restrictive to least restrictive so that
/// `PartialOrd` / `Ord` comparisons work naturally: a memory at level `"low"`
/// is always allowed when the gate is `Low`, `Medium`, or `High`; a memory at
/// level `"high"` is only allowed when the gate is `High`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum MaxSensitivity {
    /// Only low-sensitivity memories may appear in remote prompts.
    Low,
    /// Low- and medium-sensitivity memories are permitted (operator default).
    Medium,
    /// All memories are permitted, including high-sensitivity data.
    /// This is an explicit operator opt-in and risks data exfiltration to
    /// external hosted models.
    High,
}

impl FromStr for MaxSensitivity {
    type Err = String;

    /// Parse a sensitivity level from a string token.
    ///
    /// Accepted values (case-insensitive): `"low"`, `"medium"`, `"high"`.
    ///
    /// # Errors
    ///
    /// Returns a descriptive error string when the token is unrecognised.
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_ascii_lowercase().as_str() {
            "low" => Ok(Self::Low),
            "medium" => Ok(Self::Medium),
            "high" => Ok(Self::High),
            other => Err(format!(
                "unrecognised sensitivity level {other:?}; expected one of: low, medium, high"
            )),
        }
    }
}

impl MaxSensitivity {
    /// Return `true` when a memory whose classification is `level` is
    /// permitted at this gate setting.
    ///
    /// Comparison is case-insensitive. An unrecognised `level` string is
    /// conservatively treated as `"high"` (denied unless the gate is `High`).
    #[must_use]
    pub fn allows(&self, level: &str) -> bool {
        let candidate = match level.to_ascii_lowercase().as_str() {
            "low" => Self::Low,
            "medium" => Self::Medium,
            "high" => Self::High,
            // Unknown classification is treated as maximally sensitive.
            _ => Self::High,
        };
        candidate <= *self
    }
}

/// Result of a domain-tag sensitivity gate evaluation (ADR 0048 §3).
///
/// Produced by the run pipeline after calling
/// `MemoryRepo::max_sensitivity_for_active_memories` and comparing against the
/// configured [`MaxSensitivity`] ceiling. The `allowed` field is the decisive
/// outcome; the other fields are preserved for audit logging.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SensitivityGateResult {
    /// Maximum sensitivity level found across all active memories.
    ///
    /// One of `"high"`, `"medium"`, `"low"`, or `"none"`. Sourced directly
    /// from `MemoryRepo::max_sensitivity_for_active_memories`.
    pub max_memory_sensitivity: String,
    /// Operator-configured ceiling from the cortex.toml `[llm.claude]`
    /// `max_sensitivity` field.
    pub configured_max: MaxSensitivity,
    /// `true` when the active memories' maximum sensitivity is at or below
    /// the configured ceiling; `false` when the gate blocks dispatch.
    pub allowed: bool,
}

impl SensitivityGateResult {
    /// Evaluate the domain-tag gate given the memory's maximum sensitivity
    /// level (as returned by `MemoryRepo::max_sensitivity_for_active_memories`)
    /// and the operator-configured ceiling.
    ///
    /// `memory_max_str` is one of `"high"`, `"medium"`, `"low"`, or `"none"`.
    /// Any unrecognised string is conservatively treated as `"high"` so that an
    /// unknown tag value is never silently permitted.
    #[must_use]
    pub fn evaluate(memory_max_str: &str, configured_max: MaxSensitivity) -> Self {
        let allowed = configured_max.allows(memory_max_str);
        Self {
            max_memory_sensitivity: memory_max_str.to_string(),
            configured_max,
            allowed,
        }
    }
}

/// Gate that returns `Ok` when the prompt content passes the max-sensitivity
/// threshold for remote delivery.
///
/// This is an adapter-layer defense-in-depth guard that scans for inline
/// `[SENSITIVITY:HIGH]` or `[sens:high]` markers. The primary enforcement
/// path is the domain-tag store query called from the run pipeline before
/// `LlmRequest` is dispatched (see `cortex-cli/src/cmd/run.rs`).
///
/// # Behaviour
///
/// - `MaxSensitivity::High`: always passes — the operator has explicitly opted
///   in to sending all sensitivity classes to the remote endpoint.
/// - `MaxSensitivity::Medium` / `MaxSensitivity::Low`: scans the prompt for
///   inline markers `[SENSITIVITY:HIGH]` or `[sens:high]` (case-insensitive).
///   If any such marker is found the gate returns
///   [`LlmError::InvalidRequest`] with reason
///   `sensitivity_exceeds_remote_threshold`.
///
/// # Errors
///
/// Returns [`LlmError::InvalidRequest`] when the gate determines that the
/// prompt contains high-sensitivity content above the configured `max` level.
pub fn check_remote_prompt_sensitivity(prompt: &str, max: MaxSensitivity) -> Result<(), LlmError> {
    tracing::info!(
        max = ?max,
        prompt_len = prompt.len(),
        "remote prompt sensitivity gate: evaluating"
    );

    // High gate: operator has explicitly opted in — all content is permitted.
    if max == MaxSensitivity::High {
        tracing::debug!("remote prompt sensitivity gate: max=High, unconditional pass");
        return Ok(());
    }

    // Scan for inline high-sensitivity markers that memory-assembly injects.
    // Using byte search on the lowercased copy avoids regex dependency.
    let lower = prompt.to_ascii_lowercase();
    let has_high_marker = lower.contains("[sensitivity:high]") || lower.contains("[sens:high]");

    if has_high_marker {
        tracing::info!(
            max = ?max,
            "remote prompt sensitivity gate: high-sensitivity marker found; excluding prompt"
        );
        return Err(LlmError::InvalidRequest(
            "sensitivity_exceeds_remote_threshold: prompt contains high-sensitivity content \
             above the configured max_sensitivity level; memory excluded from remote dispatch"
                .to_string(),
        ));
    }

    tracing::debug!(max = ?max, "remote prompt sensitivity gate: pass (no high-sensitivity markers)");
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn from_str_parses_all_variants() {
        assert_eq!(
            "low".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::Low
        );
        assert_eq!(
            "medium".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::Medium
        );
        assert_eq!(
            "high".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::High
        );
    }

    #[test]
    fn from_str_is_case_insensitive() {
        assert_eq!(
            "LOW".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::Low
        );
        assert_eq!(
            "Medium".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::Medium
        );
        assert_eq!(
            "HIGH".parse::<MaxSensitivity>().unwrap(),
            MaxSensitivity::High
        );
    }

    #[test]
    fn from_str_rejects_unknown() {
        assert!("critical".parse::<MaxSensitivity>().is_err());
        assert!("".parse::<MaxSensitivity>().is_err());
    }

    #[test]
    fn allows_low_gate_permits_only_low() {
        let gate = MaxSensitivity::Low;
        assert!(gate.allows("low"));
        assert!(!gate.allows("medium"));
        assert!(!gate.allows("high"));
    }

    #[test]
    fn allows_medium_gate_permits_low_and_medium() {
        let gate = MaxSensitivity::Medium;
        assert!(gate.allows("low"));
        assert!(gate.allows("medium"));
        assert!(!gate.allows("high"));
    }

    #[test]
    fn allows_high_gate_permits_all() {
        let gate = MaxSensitivity::High;
        assert!(gate.allows("low"));
        assert!(gate.allows("medium"));
        assert!(gate.allows("high"));
    }

    #[test]
    fn allows_unknown_level_treated_as_high_sensitivity() {
        // An unrecognised classification label is conservatively denied unless
        // the gate is High.
        assert!(!MaxSensitivity::Low.allows("classified"));
        assert!(!MaxSensitivity::Medium.allows("classified"));
        assert!(MaxSensitivity::High.allows("classified"));
    }

    #[test]
    fn check_remote_prompt_sensitivity_passes_unmarked_prompt_at_all_gates() {
        // A prompt with no high-sensitivity markers is always permitted.
        assert!(check_remote_prompt_sensitivity("some prompt text", MaxSensitivity::Low).is_ok());
        assert!(
            check_remote_prompt_sensitivity("some prompt text", MaxSensitivity::Medium).is_ok()
        );
        assert!(check_remote_prompt_sensitivity("some prompt text", MaxSensitivity::High).is_ok());
    }

    #[test]
    fn check_remote_prompt_sensitivity_high_gate_allows_marked_prompt() {
        // MaxSensitivity::High always passes — operator opted in.
        let marked = "Context: [SENSITIVITY:HIGH] — user medical history.";
        assert!(
            check_remote_prompt_sensitivity(marked, MaxSensitivity::High).is_ok(),
            "High gate must pass even when high-sensitivity marker is present"
        );
    }

    #[test]
    fn check_remote_prompt_sensitivity_medium_gate_rejects_marked_prompt() {
        let marked = "Context: [SENSITIVITY:HIGH] — confidential data.";
        let err = check_remote_prompt_sensitivity(marked, MaxSensitivity::Medium)
            .expect_err("Medium gate must reject a prompt with a high-sensitivity marker");
        assert!(
            matches!(err, LlmError::InvalidRequest(ref msg) if msg.contains("sensitivity_exceeds_remote_threshold")),
            "error must name the stable invariant: {err:?}"
        );
    }

    #[test]
    fn check_remote_prompt_sensitivity_low_gate_rejects_sens_high_marker() {
        let marked = "User profile: [sens:high] present.";
        let err = check_remote_prompt_sensitivity(marked, MaxSensitivity::Low)
            .expect_err("Low gate must reject [sens:high] marker");
        assert!(
            matches!(err, LlmError::InvalidRequest(_)),
            "expected InvalidRequest: {err:?}"
        );
    }

    #[test]
    fn check_remote_prompt_sensitivity_marker_matching_is_case_insensitive() {
        // Mixed-case variants of the marker must be caught.
        for marker in &["[Sensitivity:High]", "[SENSITIVITY:HIGH]", "[sens:HIGH]"] {
            let prompt = format!("data: {marker} info");
            let err = check_remote_prompt_sensitivity(&prompt, MaxSensitivity::Medium)
                .expect_err("case-variant marker must be rejected");
            assert!(
                matches!(err, LlmError::InvalidRequest(_)),
                "marker {marker} not caught: {err:?}"
            );
        }
    }

    #[test]
    fn max_sensitivity_ordering() {
        assert!(MaxSensitivity::Low < MaxSensitivity::Medium);
        assert!(MaxSensitivity::Medium < MaxSensitivity::High);
        assert!(MaxSensitivity::Low < MaxSensitivity::High);
    }
}