crtx-llm 0.1.1 - Docs.rs

//! [`ClaudeSummaryBackend`] — Anthropic-backed implementation of
//! [`SummaryBackend`] for the Phase 4.D decay LLM-summary path.
//!
//! Uses [`ClaudeHttpAdapter`] for the actual HTTP call (ureq inside
//! `spawn_blocking`) and bridges the synchronous [`SummaryBackend`] contract
//! by spinning up a `new_current_thread` Tokio runtime for each call. The
//! decay runner is called from synchronous transactional context so the
//! runtime-per-call pattern is correct here.
//!
//! ## Construction
//!
//! Construction requires `CORTEX_CLAUDE_API_KEY` to be set. Absence returns
//! [`SummaryError::BackendNotConfigured`] immediately so the caller can
//! decide whether to fall back to [`NoopSummaryBackend`] or surface the error.
//!
//! ## Prompt template
//!
//! The prompt template used for every call is:
//!
//! ```text
//! Summarize the following memory claims into a single concise claim. Claims:
//! - <claim 1>
//! - <claim 2>
//! ...
//!
//! Summary:
//! ```
//!
//! The BLAKE3 of this template is checked against
//! [`SummaryRequest::prompt_template_blake3`] before any call is made.
//!
//! ## Output validation
//!
//! After a successful call the backend checks:
//! 1. The response text is non-empty.
//! 2. The byte length does not exceed `max_output_bytes` (4096 by default,
//!    overridable via the `SummaryRequest::max_output_bytes` field).
//! 3. The model name echoed by Anthropic byte-equals `request.model_name`.

use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
use crate::claude_http::ClaudeHttpAdapter;
use crate::sensitivity::MaxSensitivity;
use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
use crate::TokenUsage;

/// Prompt template fed to Claude for every summary call.
///
/// The BLAKE3 digest of this string (as UTF-8 bytes) is the canonical
/// `prompt_template_blake3` pin that operator attestation envelopes must carry
/// when targeting this backend.
const SUMMARY_PROMPT_TEMPLATE: &str =
    "Summarize the following memory claims into a single concise claim. Claims:\n{claims}\n\nSummary:";

/// Default byte budget for the produced summary text.
const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;

/// Maximum tokens requested from the Anthropic API per summary call. Generous
/// ceiling — the actual output is bounded by `max_output_bytes` on the
/// response side.
const SUMMARY_MAX_TOKENS: u32 = 1024;

/// Default timeout for summary calls in milliseconds.
const SUMMARY_TIMEOUT_MS: u64 = 60_000;

/// Returns the canonical BLAKE3 digest of [`SUMMARY_PROMPT_TEMPLATE`].
///
/// Use this to generate the correct `prompt_template_blake3` pin for operator
/// attestation envelopes that target `ClaudeSummaryBackend`.
#[must_use]
pub fn canonical_prompt_template_blake3() -> String {
    format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
}

/// Anthropic-backed [`SummaryBackend`] for the Phase 4.D decay path.
///
/// See the module documentation for construction, prompt shape, and output
/// validation rules.
#[derive(Debug, Clone)]
pub struct ClaudeSummaryBackend {
    adapter: ClaudeHttpAdapter,
    max_output_bytes: usize,
}

impl ClaudeSummaryBackend {
    /// Construct a `ClaudeSummaryBackend` for `model` with `max_sensitivity`.
    ///
    /// Returns [`SummaryError::BackendNotConfigured`] when
    /// `CORTEX_CLAUDE_API_KEY` is absent or empty. Returns
    /// [`SummaryError::CallFailed`] for model validation errors (empty,
    /// contains `"latest"`, etc.).
    pub fn new(
        model: String,
        max_sensitivity: Option<MaxSensitivity>,
    ) -> Result<Self, SummaryError> {
        let adapter = ClaudeHttpAdapter::new(model, max_sensitivity).map_err(|e| {
            let msg = e.to_string();
            // Distinguish missing API key (BackendNotConfigured, recoverable
            // by wiring the env var) from model-shape errors (CallFailed,
            // operator must fix the model string).
            if msg.contains(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV) {
                SummaryError::BackendNotConfigured
            } else {
                SummaryError::CallFailed(msg)
            }
        })?;
        Ok(Self {
            adapter,
            max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
        })
    }
}

impl SummaryBackend for ClaudeSummaryBackend {
    fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
        // --- Prompt template pin check ---
        let expected_blake3 = canonical_prompt_template_blake3();
        if request.prompt_template_blake3 != expected_blake3 {
            return Err(SummaryError::PromptTemplateMismatch(format!(
                "request pin `{}` != backend template `{}`",
                request.prompt_template_blake3, expected_blake3,
            )));
        }

        // --- Build the prompt from source claims ---
        let claims_joined = request
            .source_claims
            .iter()
            .map(|c| format!("- {c}"))
            .collect::<Vec<_>>()
            .join("\n");
        let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{claims}", &claims_joined);

        // --- Determine effective byte budget ---
        let byte_budget = request
            .max_output_bytes
            .unwrap_or(self.max_output_bytes)
            .min(self.max_output_bytes);

        // --- Construct the LlmRequest ---
        let llm_req = LlmRequest {
            model: request.model_name.clone(),
            system: String::new(),
            messages: vec![LlmMessage {
                role: LlmRole::User,
                content: prompt_text,
            }],
            temperature: 0.0,
            max_tokens: SUMMARY_MAX_TOKENS,
            json_schema: None,
            timeout_ms: SUMMARY_TIMEOUT_MS,
        };

        // --- Drive the async adapter synchronously ---
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
            .map_err(|e| {
                SummaryError::CallFailed(format!("tokio runtime construction failed: {e}"))
            })?;

        let llm_resp = rt
            .block_on(self.adapter.complete(llm_req))
            .map_err(|e| SummaryError::CallFailed(e.to_string()))?;

        // --- Model name echo check ---
        if llm_resp.model != request.model_name {
            return Err(SummaryError::OutputValidationFailed(format!(
                "provider echoed model `{}` but request pinned `{}`",
                llm_resp.model, request.model_name,
            )));
        }

        // --- Output validation ---
        if llm_resp.text.is_empty() {
            return Err(SummaryError::OutputValidationFailed(
                "provider returned an empty summary".to_string(),
            ));
        }
        if llm_resp.text.len() > byte_budget {
            return Err(SummaryError::OutputValidationFailed(format!(
                "summary byte length {} exceeds budget {}",
                llm_resp.text.len(),
                byte_budget,
            )));
        }

        let token_usage = llm_resp.usage.map(|u| TokenUsage {
            prompt_tokens: u.prompt_tokens,
            completion_tokens: u.completion_tokens,
        });

        Ok(SummaryResponse {
            claim: llm_resp.text,
            token_usage,
            model_name_echoed: llm_resp.model,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::{BufRead, BufReader, Write};
    use std::net::TcpListener;

    /// Verify that construction fails closed with `BackendNotConfigured` when
    /// `CORTEX_CLAUDE_API_KEY` is absent.
    #[test]
    fn claude_summary_backend_fails_without_api_key() {
        // Temporarily unset the env var for this test. We restore it after.
        let saved = std::env::var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV).ok();
        std::env::remove_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV);

        let result = ClaudeSummaryBackend::new("claude-3-5-sonnet-20241022".into(), None);

        // Restore before any assertion (test hygiene).
        if let Some(key) = saved {
            std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, key);
        }

        match result {
            Err(SummaryError::BackendNotConfigured) => {}
            other => panic!("expected BackendNotConfigured, got {other:?}"),
        }
    }

    /// Verify end-to-end summarise via a mock TCP server that returns a minimal
    /// Anthropic Messages API response.
    #[test]
    fn claude_summary_backend_summarizes_via_mock_adapter() {
        // Bind on loopback before spawning the server thread so the port is
        // known before the client thread starts.
        let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
        let addr = listener.local_addr().expect("local addr");

        // Minimal Anthropic response body.
        let summary_text = "Alpha and beta are combined into a single claim.";
        let response_body = format!(
            r#"{{"id":"msg_01","type":"message","role":"assistant","content":[{{"type":"text","text":"{summary_text}"}}],"model":"claude-3-5-sonnet-20241022","stop_reason":"end_turn","usage":{{"input_tokens":30,"output_tokens":12}}}}"#
        );
        let http_response = format!(
            "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
            response_body.len(),
            response_body
        );

        // Spawn a single-request mock server.
        let server_thread = std::thread::spawn(move || {
            let (mut stream, _) = listener.accept().expect("accept");
            // Drain the request headers.
            let mut reader = BufReader::new(stream.try_clone().expect("clone stream"));
            let mut line = String::new();
            loop {
                line.clear();
                reader.read_line(&mut line).expect("read line");
                if line == "\r\n" || line.is_empty() {
                    break;
                }
            }
            // Read the body (Content-Length is set by ureq; we just drain).
            // For simplicity consume up to 4096 bytes.
            let mut buf = vec![0u8; 4096];
            let _ = std::io::Read::read(&mut reader, &mut buf);

            stream
                .write_all(http_response.as_bytes())
                .expect("write response");
        });

        // Construct the backend pointed at our mock server.
        std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, "test-key-mock");
        let backend = ClaudeSummaryBackend {
            adapter: ClaudeHttpAdapter::new_with_base_url(
                "claude-3-5-sonnet-20241022".into(),
                format!("http://{addr}"),
                None,
            )
            .expect("build adapter"),
            max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
        };

        let request = SummaryRequest {
            model_name: "claude-3-5-sonnet-20241022".into(),
            prompt_template_blake3: canonical_prompt_template_blake3(),
            source_claims: vec!["alpha".to_string(), "beta".to_string()],
            max_output_bytes: None,
            decay_job_id: Some("dcy_test".into()),
        };

        let resp = backend.summarize(&request).expect("summarize");

        server_thread.join().expect("server thread joined");

        assert_eq!(resp.claim, summary_text);
        assert_eq!(resp.model_name_echoed, "claude-3-5-sonnet-20241022");
        assert!(resp.token_usage.is_some());
    }
}