lha 1.0.2 - Docs.rs

use std::ops::Deref;

use crate::product::agent::codex::TurnContext;
use crate::product::agent::compact::is_active_goal_plan_reminder;
use crate::product::agent::compact::is_backfilled_proposed_plan_reminder;
use crate::product::agent::context_manager::normalize;
use crate::product::agent::instructions::SkillInstructionSource;
use crate::product::agent::instructions::SkillInstructions;
use crate::product::agent::instructions::UserInstructions;
use crate::product::agent::session_prefix::is_session_prefix;
use crate::product::agent::truncate::TruncationPolicy;
use crate::product::agent::truncate::approx_token_count;
use crate::product::agent::truncate::approx_tokens_from_byte_count;
use crate::product::agent::truncate::truncate_function_output_items_with_policy;
use crate::product::agent::truncate::truncate_text;
use crate::product::agent::user_shell_command::is_user_shell_command_text;
use crate::product::protocol::models::ContentItem;
use crate::product::protocol::models::TranscriptItem;
use crate::product::protocol::protocol::TokenUsage;
use crate::product::protocol::protocol::TokenUsageInfo;
use lha_llm::ToolResultContentItem;
use lha_llm::ToolResultPayload;

/// Transcript of thread history
#[derive(Debug, Clone, Default)]
pub(crate) struct ContextManager {
    /// The oldest items are at the beginning of the vector.
    items: Vec<TranscriptItem>,
    token_info: Option<TokenUsageInfo>,
}

impl ContextManager {
    pub(crate) fn new() -> Self {
        Self {
            items: Vec::new(),
            token_info: TokenUsageInfo::new_or_append(&None, &None, None),
        }
    }

    pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
        self.token_info.clone()
    }

    pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
        self.token_info = info;
    }

    pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
        match &mut self.token_info {
            Some(info) => info.fill_to_context_window(context_window),
            None => {
                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
            }
        }
    }

    /// `items` is ordered from oldest to newest.
    pub(crate) fn record_items<I>(&mut self, items: I, policy: TruncationPolicy)
    where
        I: IntoIterator,
        I::Item: Deref,
        <I::Item as Deref>::Target: Clone + Into<TranscriptItem>,
    {
        for item in items {
            let item_ref: TranscriptItem = item.deref().clone().into();
            if !is_api_message(&item_ref) {
                continue;
            }

            let processed = self.process_item(&item_ref, policy);
            self.items.push(processed);
        }
    }

    /// Returns the history prepared for sending to the model. This applies a proper
    /// normalization and drop un-suited items.
    pub(crate) fn for_prompt(mut self) -> Vec<TranscriptItem> {
        self.normalize_history();
        self.items
    }

    /// Returns the history prepared for sending to the model during compaction.
    /// Synthetic compact-backfilled skills are preserved in history for follow-up
    /// turns, but excluded from the next compaction prompt to avoid re-summarizing
    /// their contents.
    pub(crate) fn for_compaction_prompt(mut self) -> Vec<TranscriptItem> {
        self.normalize_history();
        self.items.retain(|item| match item {
            TranscriptItem::Message { role, content, .. } if role == "user" => !matches!(
                SkillInstructions::from_message_with_source(content),
                Some((_, SkillInstructionSource::CompactBackfill))
            ),
            _ => true,
        });
        self.items
    }

    /// Returns raw items in the history.
    pub(crate) fn raw_items(&self) -> &[TranscriptItem] {
        &self.items
    }

    // Estimate token usage using byte-based heuristics from the truncation helpers.
    // This is a coarse lower bound, not a tokenizer-accurate count.
    pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
        let model_info = turn_context.runtime.get_model_info();
        let personality = turn_context
            .personality
            .or(turn_context.runtime.config().personality);
        let base_instructions = model_info.get_model_instructions(personality);
        let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);

        let items_tokens = self.items.iter().fold(0i64, |acc, item| {
            acc + match item {
                TranscriptItem::Reasoning {
                    encrypted_content: Some(content),
                    ..
                } => {
                    let reasoning_bytes = estimate_reasoning_length(content.len());
                    i64::try_from(approx_tokens_from_byte_count(reasoning_bytes))
                        .unwrap_or(i64::MAX)
                }
                item => {
                    let serialized = serde_json::to_string(item).unwrap_or_default();
                    i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
                }
            }
        });

        Some(base_tokens.saturating_add(items_tokens))
    }

    pub(crate) fn remove_first_item(&mut self) {
        if !self.items.is_empty() {
            // Remove the oldest item (front of the list). Items are ordered from
            // oldest → newest, so index 0 is the first entry recorded.
            let removed = self.items.remove(0);
            // If the removed item participates in a call/output pair, also remove
            // its corresponding counterpart to keep the invariants intact without
            // running a full normalization pass.
            normalize::remove_corresponding_for(&mut self.items, &removed);
        }
    }

    pub(crate) fn replace<T>(&mut self, items: Vec<T>)
    where
        T: Into<TranscriptItem>,
    {
        self.items = items.into_iter().map(Into::into).collect();
    }

    /// Replace image content in the last turn if it originated from a tool output.
    /// Returns true when a tool image was replaced, false otherwise.
    pub(crate) fn replace_last_turn_images(&mut self, placeholder: &str) -> bool {
        let Some(index) = self.items.iter().rposition(|item| {
            matches!(item, TranscriptItem::ToolResult { .. }) || is_user_turn_boundary(item)
        }) else {
            return false;
        };

        match &mut self.items[index] {
            TranscriptItem::ToolResult {
                payload:
                    ToolResultPayload::Structured {
                        content_items: Some(content_items),
                        ..
                    },
                ..
            } => {
                let mut replaced = false;
                let placeholder = placeholder.to_string();
                for item in content_items.iter_mut() {
                    if matches!(item, ToolResultContentItem::InputImage { .. }) {
                        *item = ToolResultContentItem::InputText {
                            text: placeholder.clone(),
                        };
                        replaced = true;
                    }
                }
                replaced
            }
            TranscriptItem::ToolResult { .. } => false,
            TranscriptItem::Message { role, .. } if role == "user" => false,
            _ => false,
        }
    }

    /// Drop the last `num_turns` user turns from this history.
    ///
    /// "User turns" are identified as real user messages, excluding synthetic context messages.
    ///
    /// This mirrors thread-rollback semantics:
    /// - `num_turns == 0` is a no-op
    /// - if there are no user turns, this is a no-op
    /// - if `num_turns` exceeds the number of user turns, all user turns are dropped while
    ///   preserving any items that occurred before the first user message.
    pub(crate) fn drop_last_n_user_turns(&mut self, num_turns: u32) {
        if num_turns == 0 {
            return;
        }

        let snapshot = self.items.clone();
        let user_positions = user_message_positions(&snapshot);
        let Some(&first_user_idx) = user_positions.first() else {
            self.replace(snapshot);
            return;
        };

        let n_from_end = usize::try_from(num_turns).unwrap_or(usize::MAX);
        let cut_idx = if n_from_end >= user_positions.len() {
            first_user_idx
        } else {
            user_positions[user_positions.len() - n_from_end]
        };

        self.replace(snapshot[..cut_idx].to_vec());
    }

    pub(crate) fn update_token_info(
        &mut self,
        usage: &TokenUsage,
        model_context_window: Option<i64>,
    ) {
        self.token_info = TokenUsageInfo::new_or_append(
            &self.token_info,
            &Some(usage.clone()),
            model_context_window,
        );
    }

    fn get_non_last_reasoning_items_tokens(&self) -> usize {
        // get reasoning items excluding all the ones after the last user message
        let Some(last_user_index) = self.items.iter().rposition(is_user_turn_boundary) else {
            return 0usize;
        };

        let total_reasoning_bytes = self
            .items
            .iter()
            .take(last_user_index)
            .filter_map(|item| {
                if let TranscriptItem::Reasoning {
                    encrypted_content: Some(content),
                    ..
                } = item
                {
                    Some(content.len())
                } else {
                    None
                }
            })
            .map(estimate_reasoning_length)
            .fold(0usize, usize::saturating_add);

        let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes);
        token_estimate as usize
    }

    /// When true, the server already accounted for past reasoning tokens and
    /// the client should not re-estimate them.
    pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 {
        let last_tokens = self
            .token_info
            .as_ref()
            .map(|info| info.last_token_usage.total_tokens)
            .unwrap_or(0);
        if server_reasoning_included {
            last_tokens
        } else {
            last_tokens.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
        }
    }

    /// This function enforces a couple of invariants on the in-memory history:
    /// 1. every tool call has a corresponding result entry
    /// 2. every result has a corresponding call entry
    fn normalize_history(&mut self) {
        // all tool calls must have a corresponding result
        normalize::ensure_call_outputs_present(&mut self.items);

        // all results must have a corresponding tool call
        normalize::remove_orphan_outputs(&mut self.items);
    }

    fn process_item(&self, item: &TranscriptItem, policy: TruncationPolicy) -> TranscriptItem {
        let policy_with_serialization_budget = policy * 1.2;
        match item {
            TranscriptItem::ToolResult {
                call_id,
                tool_name,
                payload:
                    ToolResultPayload::Structured {
                        content,
                        content_items,
                        success,
                    },
            } => {
                let truncated = truncate_text(content.as_str(), policy_with_serialization_budget);
                let truncated_items = content_items.as_ref().map(|items| {
                    truncate_function_output_items_with_policy(
                        items,
                        policy_with_serialization_budget,
                    )
                });
                TranscriptItem::ToolResult {
                    call_id: call_id.clone(),
                    tool_name: tool_name.clone(),
                    payload: ToolResultPayload::Structured {
                        content: truncated,
                        content_items: truncated_items,
                        success: *success,
                    },
                }
            }
            TranscriptItem::ToolResult {
                call_id,
                tool_name,
                payload: ToolResultPayload::Text { output },
            } => {
                let truncated = truncate_text(output, policy_with_serialization_budget);
                TranscriptItem::ToolResult {
                    call_id: call_id.clone(),
                    tool_name: tool_name.clone(),
                    payload: ToolResultPayload::Text { output: truncated },
                }
            }
            TranscriptItem::Message { .. }
            | TranscriptItem::Reasoning { .. }
            | TranscriptItem::HostedActivity { .. }
            | TranscriptItem::ToolCall { .. }
            | TranscriptItem::Unknown { .. } => item.clone(),
        }
    }
}

/// API messages include every non-system semantic transcript item.
fn is_api_message(message: &TranscriptItem) -> bool {
    match message {
        TranscriptItem::Message { role, .. } => role.as_str() != "system",
        TranscriptItem::Reasoning { .. }
        | TranscriptItem::HostedActivity { .. }
        | TranscriptItem::ToolCall { .. }
        | TranscriptItem::ToolResult { .. } => true,
        TranscriptItem::Unknown { .. } => false,
    }
}

fn estimate_reasoning_length(encoded_len: usize) -> usize {
    encoded_len
        .saturating_mul(3)
        .checked_div(4)
        .unwrap_or(0)
        .saturating_sub(650)
}

pub(crate) fn is_user_turn_boundary(item: &TranscriptItem) -> bool {
    let TranscriptItem::Message { role, content, .. } = item else {
        return false;
    };

    if role != "user" {
        return false;
    }

    if UserInstructions::is_user_instructions(content)
        || SkillInstructions::is_skill_instructions(content)
    {
        return false;
    }

    for content_item in content {
        match content_item {
            ContentItem::InputText { text } => {
                if is_session_prefix(text)
                    || is_user_shell_command_text(text)
                    || is_backfilled_proposed_plan_reminder(text)
                    || is_active_goal_plan_reminder(text)
                {
                    return false;
                }
            }
            ContentItem::OutputText { text } => {
                if is_session_prefix(text) || is_active_goal_plan_reminder(text) {
                    return false;
                }
            }
            ContentItem::InputImage { .. } => {}
        }
    }

    true
}

fn user_message_positions(items: &[TranscriptItem]) -> Vec<usize> {
    let mut positions = Vec::new();
    for (idx, item) in items.iter().enumerate() {
        if is_user_turn_boundary(item) {
            positions.push(idx);
        }
    }
    positions
}

#[cfg(test)]
#[path = "history_tests.rs"]
mod tests;