erebyx-sdk 0.1.1

// SPDX-License-Identifier: MIT OR Apache-2.0
//! LLM middleware -- intercept any LLM call to add memory automatically.
//!
//! This is the "one line change" layer. Wraps any function that takes messages
//! and returns a response, adding memory retrieval before and storage after.
//!
//! Design principle: memory failure NEVER breaks the LLM call.
//! The `before`/`after` hooks fail open — on any error they return empty
//! context (the client's circuit breaker handles backoff underneath).
//!
//! ## Usage
//!
//! ```rust,no_run
//! use erebyx_sdk::{Memory, middleware::MemoryMiddleware};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), erebyx_sdk::Error> {
//!     let memory = Memory::new("erebyx_your_key")?;
//!     let mw = MemoryMiddleware::new(memory);
//!
//!     // Before sending to LLM: retrieve relevant memories.
//!     // context.memories holds the relevant past context;
//!     // context.system_prompt_addition is pre-formatted text to prepend.
//!     let context = mw.before("What encryption do we use?").await;
//!     let _ = context;
//!
//!     // After receiving LLM response: optionally store it as a memory.
//!     mw.after("What encryption do we use?", "You decided on AES-256-GCM...").await;
//!
//!     Ok(())
//! }
//! ```

use tracing::{debug, warn};

use crate::client::Memory;

/// Memory middleware for wrapping LLM calls.
#[derive(Clone)]
pub struct MemoryMiddleware {
    memory: Memory,
    /// Max tokens worth of memory to inject (approximate, by char count).
    max_context_chars: usize,
    /// Whether to auto-save AI responses.
    auto_save: bool,
    /// Minimum message length to trigger memory retrieval.
    min_message_length: usize,
}

/// Result of the `before` phase -- memories to inject into the LLM call.
#[derive(Debug, Clone)]
pub struct BeforeResult {
    /// Relevant memories found.
    pub memories: Vec<crate::types::MemoryRecord>,
    /// Pre-formatted text to inject into the system prompt.
    /// Empty string if no relevant memories found.
    pub system_prompt_addition: String,
    /// Whether memory retrieval succeeded.
    pub success: bool,
}

impl MemoryMiddleware {
    /// Create middleware with default settings.
    pub fn new(memory: Memory) -> Self {
        Self {
            memory,
            max_context_chars: 1500,
            auto_save: false,
            min_message_length: 15,
        }
    }

    /// Enable auto-saving of AI responses.
    pub fn with_auto_save(mut self) -> Self {
        self.auto_save = true;
        self
    }

    /// Set maximum context injection size (in characters).
    pub fn with_max_context(mut self, chars: usize) -> Self {
        self.max_context_chars = chars;
        self
    }

    /// Set minimum message length to trigger memory retrieval.
    pub fn with_min_message_length(mut self, len: usize) -> Self {
        self.min_message_length = len;
        self
    }

    /// **Before phase**: Retrieve relevant memories for the user's message.
    ///
    /// Call this before sending the message to the LLM. Inject
    /// `result.system_prompt_addition` into the system prompt.
    ///
    /// Always returns a result -- never errors. On failure, returns
    /// empty memories with `success: false`.
    pub async fn before(&self, user_message: &str) -> BeforeResult {
        // Smart gate: skip short messages and greetings
        if user_message.len() < self.min_message_length || is_greeting(user_message) {
            return BeforeResult {
                memories: vec![],
                system_prompt_addition: String::new(),
                success: true,
            };
        }

        // Search for relevant memories
        match self.memory.search(user_message).limit(5).send().await {
            Ok(result) => {
                if result.memories.is_empty() {
                    return BeforeResult {
                        memories: vec![],
                        system_prompt_addition: String::new(),
                        success: true,
                    };
                }

                // Format memories for system prompt injection
                let mut addition = String::from("[EREBYX Memory Context]\n");
                let mut chars = addition.len();

                for m in &result.memories {
                    let line = format!("- {}\n", truncate_safe(&m.content, 300));
                    if chars + line.len() > self.max_context_chars {
                        break;
                    }
                    addition.push_str(&line);
                    chars += line.len();
                }

                debug!(
                    memories = result.memories.len(),
                    chars = chars,
                    "erebyx middleware: injecting context"
                );

                BeforeResult {
                    memories: result.memories,
                    system_prompt_addition: addition,
                    success: true,
                }
            }
            Err(e) => {
                // Fail open -- memory error never blocks the LLM call
                warn!(error = %e, "erebyx middleware: memory retrieval failed (proceeding without)");
                BeforeResult {
                    memories: vec![],
                    system_prompt_addition: String::new(),
                    success: false,
                }
            }
        }
    }

    /// **After phase**: Optionally store the AI's response as a memory.
    ///
    /// Only fires if `auto_save` is enabled. Fire-and-forget -- errors are logged
    /// but never propagated.
    pub async fn after(&self, user_message: &str, ai_response: &str) {
        if !self.auto_save {
            return;
        }

        // Skip trivial exchanges
        if user_message.len() < self.min_message_length || ai_response.len() < 50 {
            return;
        }

        // Fire-and-forget save
        let content = format!(
            "User asked: {}\n\nResponse: {}",
            truncate_safe(user_message, 200),
            truncate_safe(ai_response, 500)
        );

        match self.memory.save(&content, "episodic").send().await {
            Ok(_) => debug!("erebyx middleware: auto-saved response"),
            Err(e) => warn!(error = %e, "erebyx middleware: auto-save failed (non-critical)"),
        }
    }
}

/// Check if a message is a greeting/trivial exchange not worth searching memory for.
fn is_greeting(msg: &str) -> bool {
    let lower = msg.trim().to_lowercase();
    let greetings = [
        "hey",
        "hi",
        "hello",
        "thanks",
        "thank you",
        "bye",
        "ok",
        "yes",
        "no",
        "sure",
        "cool",
        "nice",
        "got it",
        "sounds good",
        "okay",
        "yep",
        "nope",
        "alright",
    ];
    greetings
        .iter()
        .any(|g| lower.starts_with(g) && lower.len() < 30)
}

/// Truncate a string to at most `max` bytes, respecting UTF-8 char boundaries.
/// Never panics on multi-byte characters (emoji, CJK, etc.).
fn truncate_safe(s: &str, max: usize) -> &str {
    if s.len() <= max {
        return s;
    }
    let mut end = max;
    while end > 0 && !s.is_char_boundary(end) {
        end -= 1;
    }
    &s[..end]
}