Skip to main content

stakpak_api/local/hooks/task_board_context/
mod.rs

1use stakpak_shared::define_hook;
2use stakpak_shared::hooks::{Hook, HookAction, HookContext, HookError, LifecycleEvent};
3use stakpak_shared::models::integrations::openai::Role;
4use stakpak_shared::models::llm::{LLMInput, LLMMessage, LLMMessageContent};
5
6use crate::local::context_managers::task_board_context_manager::{
7    TaskBoardContextManager, TaskBoardContextManagerOptions,
8};
9use crate::models::AgentState;
10
11const SYSTEM_PROMPT: &str = include_str!("./system_prompt.txt");
12
13pub struct TaskBoardContextHook {
14    pub context_manager: TaskBoardContextManager,
15}
16
17pub struct TaskBoardContextHookOptions {
18    /// How many recent assistant messages to keep untrimmed when context
19    /// trimming is triggered. Only assistant (and tool) messages are trimmed;
20    /// user and system messages are always preserved in full.
21    pub keep_last_n_assistant_messages: Option<usize>,
22    /// Fraction of the context window at which trimming triggers (e.g. 0.8 = 80%).
23    pub context_budget_threshold: Option<f32>,
24}
25
26impl TaskBoardContextHook {
27    pub fn new(options: TaskBoardContextHookOptions) -> Self {
28        let context_manager = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
29            keep_last_n_assistant_messages: options.keep_last_n_assistant_messages.unwrap_or(50),
30            context_budget_threshold: options.context_budget_threshold.unwrap_or(0.8),
31        });
32
33        Self { context_manager }
34    }
35}
36
37define_hook!(
38    TaskBoardContextHook,
39    "task_board_context",
40    async |&self, ctx: &mut HookContext<AgentState>, event: &LifecycleEvent| {
41        if *event != LifecycleEvent::BeforeInference {
42            return Ok(HookAction::Continue);
43        }
44
45        let model = ctx.state.active_model.clone();
46        let max_output_tokens: u64 = 16000;
47
48        // Subtract fixed overhead from context window so the trimmer budgets
49        // only the space actually available for chat messages.
50        // - System prompt: added after trimming (line 67+), not in message list
51        // - max_output_tokens: reserved for the model's response
52        let system_prompt_tokens = TaskBoardContextManager::estimate_tokens(&[LLMMessage {
53            role: Role::System.to_string(),
54            content: LLMMessageContent::String(SYSTEM_PROMPT.to_string()),
55        }]);
56        let context_window = model
57            .limit
58            .context
59            .saturating_sub(system_prompt_tokens + max_output_tokens);
60
61        let llm_tools: Option<Vec<_>> = ctx
62            .state
63            .tools
64            .clone()
65            .map(|t| t.into_iter().map(Into::into).collect());
66
67        // Use budget-aware trimming with metadata from checkpoint.
68        // Tool definitions are passed in so the context manager can account
69        // for their token overhead internally.
70        let (reduced_messages, updated_metadata) = self.context_manager.reduce_context_with_budget(
71            ctx.state.messages.clone(),
72            context_window,
73            ctx.state.metadata.clone(),
74            llm_tools.as_deref(),
75        );
76
77        // Write updated metadata back to state for checkpoint persistence
78        ctx.state.metadata = updated_metadata;
79
80        let mut messages = Vec::new();
81        messages.push(LLMMessage {
82            role: Role::System.to_string(),
83            content: LLMMessageContent::String(SYSTEM_PROMPT.to_string()),
84        });
85        messages.extend(reduced_messages);
86
87        ctx.state.llm_input = Some(LLMInput {
88            model,
89            messages,
90            max_tokens: max_output_tokens as u32,
91            tools: llm_tools,
92            provider_options: None,
93            headers: None,
94        });
95
96        Ok(HookAction::Continue)
97    }
98);