exochain-gatekeeper 0.2.0-beta

// Copyright 2026 Exochain Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at:
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// SPDX-License-Identifier: Apache-2.0

//! MCP rule enforcement audit trail.
//!
//! Records every MCP rule enforcement outcome in a BLAKE3 hash-chained log
//! that is independent of the governance AuditLog. This keeps the judicial
//! branch (exo-gatekeeper) self-contained — no exo-governance dependency —
//! while providing a tamper-evident record of AI boundary enforcement.
//!
//! The rule ID type is [`McpRule`], an enum defined in this crate. Using a
//! typed enum rather than a plain `String` prevents injection of fabricated
//! rule identifiers into the tamper-evident chain.

use exo_core::{Did, Timestamp, hash::hash_structured};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

use crate::{error::GatekeeperError, mcp::McpRule};

const MCP_AUDIT_RECORD_HASH_DOMAIN: &str = "exo.gatekeeper.mcp_audit_record.v1";
const MCP_AUDIT_RECORD_HASH_SCHEMA_VERSION: u16 = 1;
pub const MAX_MCP_AUDIT_RECORDS: usize = 10_000;

#[derive(Debug, Clone, Serialize)]
struct McpAuditRecordHashPayload {
    domain: &'static str,
    schema_version: u16,
    record_id: Uuid,
    timestamp: Timestamp,
    rule: McpRule,
    actor: Did,
    outcome: McpEnforcementOutcome,
    data_residency_region: Option<String>,
    chain_hash: [u8; 32],
}

// ---------------------------------------------------------------------------
// Enforcement outcome
// ---------------------------------------------------------------------------

/// The outcome of evaluating an MCP rule against an AI actor.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum McpEnforcementOutcome {
    /// The rule was satisfied; the action is permitted.
    Allowed,
    /// The rule was violated; the action is blocked.
    Blocked,
    /// The rule triggered escalation to a human authority.
    Escalated,
}

// ---------------------------------------------------------------------------
// MCP audit record
// ---------------------------------------------------------------------------

/// A single enforcement event appended to [`McpAuditLog`].
///
/// `rule` is typed as [`McpRule`] (a registered enum), NOT a free-form
/// `String`. This prevents MCP rule ID injection attacks where a malicious
/// or misconfigured MCP server inserts rule identifiers that pattern-match
/// compliant rules without actually being enforced.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct McpAuditRecord {
    pub id: Uuid,
    pub timestamp: Timestamp,
    /// The MCP rule that was evaluated. Typed — not a free-form string.
    pub rule: McpRule,
    pub actor: Did,
    pub outcome: McpEnforcementOutcome,
    /// Optional data residency region for cross-border transfer impact
    /// assessments (GDPR Chapter V). `None` is valid for intra-jurisdiction
    /// deployments but must be set for cross-border processing.
    pub data_residency_region: Option<String>,
    /// BLAKE3 hash of the previous record; `[0u8; 32]` for the first entry.
    pub chain_hash: [u8; 32],
}

// ---------------------------------------------------------------------------
// Hash function
// ---------------------------------------------------------------------------

fn mcp_audit_record_hash_payload(r: &McpAuditRecord) -> McpAuditRecordHashPayload {
    McpAuditRecordHashPayload {
        domain: MCP_AUDIT_RECORD_HASH_DOMAIN,
        schema_version: MCP_AUDIT_RECORD_HASH_SCHEMA_VERSION,
        record_id: r.id,
        timestamp: r.timestamp,
        rule: r.rule,
        actor: r.actor.clone(),
        outcome: r.outcome,
        data_residency_region: r.data_residency_region.clone(),
        chain_hash: r.chain_hash,
    }
}

fn hash_record(r: &McpAuditRecord) -> Result<[u8; 32], GatekeeperError> {
    hash_structured(&mcp_audit_record_hash_payload(r))
        .map(|hash| *hash.as_bytes())
        .map_err(|e| GatekeeperError::McpAuditHashEncodingFailed {
            reason: format!("MCP audit record canonical CBOR hash failed: {e}"),
        })
}

// ---------------------------------------------------------------------------
// MCP audit log
// ---------------------------------------------------------------------------

/// Append-only, BLAKE3 hash-chained log of MCP enforcement events.
///
/// Structurally mirrors `exo_governance::audit::AuditLog` but is
/// self-contained within exo-gatekeeper to preserve branch separation.
#[derive(Debug, Clone, Default)]
pub struct McpAuditLog {
    pub records: Vec<McpAuditRecord>,
}

impl McpAuditLog {
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Hash of the last record; `[0u8; 32]` for an empty log.
    ///
    /// # Errors
    ///
    /// Returns [`GatekeeperError::McpAuditHashEncodingFailed`] if canonical
    /// CBOR hashing of the latest record fails.
    pub fn head_hash(&self) -> Result<[u8; 32], GatekeeperError> {
        match self.records.last() {
            Some(record) => hash_record(record),
            None => Ok([0u8; 32]),
        }
    }

    #[must_use]
    pub fn len(&self) -> usize {
        self.records.len()
    }

    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.records.is_empty()
    }
}

/// Append a pre-built record to the log.
///
/// # Errors
/// Returns [`GatekeeperError::McpAuditChainBroken`] if `record.chain_hash`
/// does not match the current log head — indicating either an ordering error
/// or tampering.
pub fn append(log: &mut McpAuditLog, record: McpAuditRecord) -> Result<(), GatekeeperError> {
    if log.records.len() >= MAX_MCP_AUDIT_RECORDS {
        return Err(GatekeeperError::McpAuditInvalidRecord {
            reason: format!(
                "MCP audit log capacity exceeded: {} >= {}",
                log.records.len(),
                MAX_MCP_AUDIT_RECORDS
            ),
        });
    }
    if record.chain_hash != log.head_hash()? {
        return Err(GatekeeperError::McpAuditChainBroken {
            index: log.records.len(),
        });
    }
    log.records.push(record);
    Ok(())
}

/// Verify the integrity of the entire log chain.
///
/// # Errors
/// Returns [`GatekeeperError::McpAuditChainBroken`] at the first broken link.
pub fn verify_chain(log: &McpAuditLog) -> Result<(), GatekeeperError> {
    let mut prev = [0u8; 32];
    for (i, record) in log.records.iter().enumerate() {
        if record.chain_hash != prev {
            return Err(GatekeeperError::McpAuditChainBroken { index: i });
        }
        prev = hash_record(record)?;
    }
    Ok(())
}

/// Build a new record linked to the current log head.
pub fn create_record(
    log: &McpAuditLog,
    id: Uuid,
    timestamp: Timestamp,
    rule: McpRule,
    actor: Did,
    outcome: McpEnforcementOutcome,
    data_residency_region: Option<String>,
) -> Result<McpAuditRecord, GatekeeperError> {
    if id.is_nil() {
        return Err(GatekeeperError::McpAuditInvalidRecord {
            reason: "record id must be caller-supplied and non-nil".into(),
        });
    }
    if timestamp == Timestamp::ZERO {
        return Err(GatekeeperError::McpAuditInvalidRecord {
            reason: "timestamp must be caller-supplied and non-zero".into(),
        });
    }

    Ok(McpAuditRecord {
        id,
        timestamp,
        rule,
        actor,
        outcome,
        data_residency_region,
        chain_hash: log.head_hash()?,
    })
}

// ===========================================================================
// Tests
// ===========================================================================

#[cfg(test)]
mod tests {
    use exo_core::Did;

    use super::*;
    use crate::mcp::McpRule;

    fn did(s: &str) -> Did {
        Did::new(&format!("did:exo:{s}")).expect("valid DID")
    }

    fn record_id(n: u128) -> Uuid {
        Uuid::from_u128(n)
    }

    fn ts(ms: u64) -> Timestamp {
        Timestamp::new(ms, 0)
    }

    fn create_record_source() -> &'static str {
        let source = include_str!("mcp_audit.rs");
        let start = source
            .find("pub fn create_record(")
            .expect("create_record source must exist");
        let end = source[start..]
            .find("// ===========================================================================")
            .expect("tests section marker must exist");
        &source[start..start + end]
    }

    fn production_source() -> &'static str {
        let source = include_str!("mcp_audit.rs");
        let end = source
            .find("// ===========================================================================")
            .expect("tests section marker must exist");
        &source[..end]
    }

    fn sample_record() -> McpAuditRecord {
        McpAuditRecord {
            id: record_id(0xD001),
            timestamp: Timestamp::new(1000, 4),
            rule: McpRule::Mcp003ProvenanceRequired,
            actor: did("agent"),
            outcome: McpEnforcementOutcome::Blocked,
            data_residency_region: Some("EU-WEST-1".into()),
            chain_hash: [0x22u8; 32],
        }
    }

    #[test]
    fn mcp_audit_record_hash_payload_is_domain_separated_cbor() {
        let record = sample_record();
        let payload = mcp_audit_record_hash_payload(&record);
        assert_eq!(payload.domain, MCP_AUDIT_RECORD_HASH_DOMAIN);
        assert_eq!(payload.schema_version, 1);
        assert_eq!(payload.record_id, record.id);
        assert_eq!(payload.timestamp, record.timestamp);
        assert_eq!(payload.rule, record.rule);
        assert_eq!(payload.actor, record.actor);
        assert_eq!(payload.outcome, record.outcome);
        assert_eq!(payload.data_residency_region, record.data_residency_region);
        assert_eq!(payload.chain_hash, record.chain_hash);
    }

    #[test]
    fn mcp_audit_record_hash_rejects_legacy_debug_concat_hash() {
        let record = sample_record();
        let mut h = blake3::Hasher::new();
        h.update(record.id.as_bytes());
        h.update(&record.timestamp.physical_ms.to_le_bytes());
        h.update(&record.timestamp.logical.to_le_bytes());
        h.update(format!("{:?}", record.rule).as_bytes());
        h.update(record.actor.as_str().as_bytes());
        h.update(format!("{:?}", record.outcome).as_bytes());
        if let Some(region) = &record.data_residency_region {
            h.update(region.as_bytes());
        }
        h.update(&record.chain_hash);
        let legacy = *h.finalize().as_bytes();

        assert_ne!(
            hash_record(&record).expect("canonical MCP audit hash"),
            legacy
        );
    }

    #[test]
    fn mcp_audit_production_source_has_no_raw_hash_loop_or_debug_string_hashing() {
        let production = production_source();
        assert!(
            !production.contains("blake3::Hasher"),
            "MCP audit hashes must use domain-separated canonical CBOR"
        );
        assert!(
            !production.contains("format!(\"{:?}\""),
            "MCP audit hashes must not bind rule/outcome through debug strings"
        );
        assert!(
            !production.contains("unwrap_or([0u8; 32])"),
            "MCP audit hashing must not hide hash failures behind a zero hash"
        );
    }

    #[test]
    fn create_record_has_no_internal_entropy_or_wall_clock() {
        let source = create_record_source();
        assert!(
            !source.contains("Uuid::new_v4"),
            "MCP audit records must not fabricate nondeterministic UUIDs internally"
        );
        let forbidden_timestamp = ["Timestamp::", "now_utc"].concat();
        assert!(
            !source.contains(&forbidden_timestamp),
            "MCP audit records must not read wall-clock time internally"
        );
    }

    fn append_ok(log: &mut McpAuditLog, rule: McpRule, outcome: McpEnforcementOutcome) {
        let offset = u128::try_from(log.len()).expect("log length fits u128");
        let timestamp_offset = u64::try_from(log.len()).expect("log length fits u64");
        let r = create_record(
            log,
            record_id(0xA000 + offset),
            ts(10_000 + timestamp_offset),
            rule,
            did("agent"),
            outcome,
            None,
        )
        .expect("deterministic MCP audit record");
        append(log, r).expect("append failed");
    }

    #[test]
    fn empty_log_verifies() {
        assert!(verify_chain(&McpAuditLog::new()).is_ok());
    }

    #[test]
    fn single_record_appended() {
        let mut log = McpAuditLog::new();
        append_ok(
            &mut log,
            McpRule::Mcp001BctsScope,
            McpEnforcementOutcome::Allowed,
        );
        assert_eq!(log.len(), 1);
        assert!(!log.is_empty());
        assert!(verify_chain(&log).is_ok());
    }

    #[test]
    fn chain_of_records_verifies() {
        let mut log = McpAuditLog::new();
        for rule in McpRule::all() {
            append_ok(&mut log, rule, McpEnforcementOutcome::Allowed);
        }
        assert_eq!(log.len(), 6);
        assert!(verify_chain(&log).is_ok());
    }

    #[test]
    fn tamper_detected() {
        let mut log = McpAuditLog::new();
        for rule in McpRule::all() {
            append_ok(&mut log, rule, McpEnforcementOutcome::Allowed);
        }
        log.records[2].chain_hash = [0xffu8; 32];
        assert!(verify_chain(&log).is_err());
    }

    #[test]
    fn wrong_chain_hash_rejected() {
        let mut log = McpAuditLog::new();
        append_ok(
            &mut log,
            McpRule::Mcp001BctsScope,
            McpEnforcementOutcome::Allowed,
        );
        let bad = McpAuditRecord {
            id: record_id(0xB001),
            timestamp: ts(9000),
            rule: McpRule::Mcp002NoSelfEscalation,
            actor: did("agent"),
            outcome: McpEnforcementOutcome::Blocked,
            data_residency_region: None,
            chain_hash: [0xffu8; 32], // wrong
        };
        assert!(append(&mut log, bad).is_err());
    }

    #[test]
    fn head_hash_changes_on_append() {
        let mut log = McpAuditLog::new();
        let h0 = log.head_hash().expect("empty MCP audit head hash");
        assert_eq!(h0, [0u8; 32]);
        append_ok(
            &mut log,
            McpRule::Mcp003ProvenanceRequired,
            McpEnforcementOutcome::Allowed,
        );
        assert_ne!(log.head_hash().expect("MCP audit head hash"), h0);
    }

    #[test]
    fn deterministic_hash() {
        let r = McpAuditRecord {
            id: Uuid::nil(),
            timestamp: Timestamp::new(1000, 0),
            rule: McpRule::Mcp001BctsScope,
            actor: did("test"),
            outcome: McpEnforcementOutcome::Allowed,
            data_residency_region: None,
            chain_hash: [0u8; 32],
        };
        assert_eq!(
            hash_record(&r).expect("first MCP audit record hash"),
            hash_record(&r).expect("second MCP audit record hash")
        );
    }

    #[test]
    fn data_residency_region_stored() {
        let mut log = McpAuditLog::new();
        let r = create_record(
            &log,
            record_id(0xC001),
            ts(20_000),
            McpRule::Mcp001BctsScope,
            did("agent"),
            McpEnforcementOutcome::Allowed,
            Some("EU-WEST-1".into()),
        )
        .expect("deterministic MCP audit record");
        assert_eq!(r.data_residency_region, Some("EU-WEST-1".into()));
        append(&mut log, r).expect("append ok");
        assert!(verify_chain(&log).is_ok());
    }

    #[test]
    fn create_record_preserves_caller_supplied_metadata() {
        let log = McpAuditLog::new();
        let id = record_id(0xC002);
        let timestamp = ts(21_000);
        let record = create_record(
            &log,
            id,
            timestamp,
            McpRule::Mcp001BctsScope,
            did("agent"),
            McpEnforcementOutcome::Allowed,
            None,
        )
        .expect("deterministic MCP audit record");

        assert_eq!(record.id, id);
        assert_eq!(record.timestamp, timestamp);
    }

    #[test]
    fn create_record_rejects_nil_id() {
        let err = create_record(
            &McpAuditLog::new(),
            Uuid::nil(),
            ts(21_001),
            McpRule::Mcp001BctsScope,
            did("agent"),
            McpEnforcementOutcome::Allowed,
            None,
        )
        .expect_err("nil record IDs must be rejected");

        assert!(matches!(err, GatekeeperError::McpAuditInvalidRecord { .. }));
    }

    #[test]
    fn create_record_rejects_zero_timestamp() {
        let err = create_record(
            &McpAuditLog::new(),
            record_id(0xC003),
            Timestamp::ZERO,
            McpRule::Mcp001BctsScope,
            did("agent"),
            McpEnforcementOutcome::Allowed,
            None,
        )
        .expect_err("zero timestamps must be rejected");

        assert!(matches!(err, GatekeeperError::McpAuditInvalidRecord { .. }));
    }

    #[test]
    fn blocked_outcome_recorded() {
        let mut log = McpAuditLog::new();
        append_ok(
            &mut log,
            McpRule::Mcp002NoSelfEscalation,
            McpEnforcementOutcome::Blocked,
        );
        assert_eq!(log.records[0].outcome, McpEnforcementOutcome::Blocked);
    }

    #[test]
    fn append_rejects_log_at_capacity_without_growing_records() {
        let mut log = McpAuditLog {
            records: vec![sample_record(); MAX_MCP_AUDIT_RECORDS],
        };
        let record = create_record(
            &log,
            record_id(0xF001),
            ts(30_000),
            McpRule::Mcp001BctsScope,
            did("agent"),
            McpEnforcementOutcome::Allowed,
            None,
        )
        .expect("capacity regression record has valid deterministic metadata");

        let err = append(&mut log, record).expect_err("full MCP audit log must reject append");

        assert_eq!(log.len(), MAX_MCP_AUDIT_RECORDS);
        assert!(
            err.to_string().contains("capacity"),
            "capacity rejection should be explicit: {err}"
        );
    }
}