Skip to main content

offline_intelligence/context_engine/
context_builder.rs

1//! Builds optimal context from multiple memory sources
2
3use crate::memory::Message;
4use crate::memory_db::{StoredMessage, Summary as DbSummary};
5use tracing::{info, debug};
6
7/// Builds context from multiple memory sources
8pub struct ContextBuilder {
9    config: ContextBuilderConfig,
10}
11
12/// Configuration for context building
13#[derive(Debug, Clone)]
14pub struct ContextBuilderConfig {
15    pub max_total_tokens: usize,
16    pub min_current_context_ratio: f32,
17    pub max_summary_ratio: f32,
18    pub preserve_system_messages: bool,
19    pub enable_detail_injection: bool,
20    pub detail_injection_threshold: f32,
21}
22
23impl Default for ContextBuilderConfig {
24    fn default() -> Self {
25        Self {
26            max_total_tokens: 4000,
27            min_current_context_ratio: 0.4,
28            max_summary_ratio: 0.4,
29            preserve_system_messages: true,
30            enable_detail_injection: true,
31            detail_injection_threshold: 0.7,
32        }
33    }
34}
35
36impl ContextBuilder {
37    /// Create a new context builder
38    pub fn new(config: ContextBuilderConfig) -> Self {
39        Self {
40            config,
41        }
42    }
43    
44    /// Build optimal context from multiple sources
45    pub async fn build_context(
46        &mut self,
47        current_messages: &[Message],
48        tier1_content: Option<Vec<Message>>,
49        tier2_summaries: Option<Vec<DbSummary>>,
50        tier3_messages: Option<Vec<StoredMessage>>,
51        cross_session_messages: Option<Vec<StoredMessage>>, // NEW parameter
52        user_query: Option<&str>,
53    ) -> anyhow::Result<Vec<Message>> {
54        info!("Building context from {} current messages", current_messages.len());
55        
56        // Start with current messages (incorporates tier1 if provided)
57        let mut context = self.prepare_context_with_tier1(current_messages, tier1_content);
58        
59        // Add cross-session messages if available
60        if let Some(ref cross_messages) = cross_session_messages {
61            self.add_cross_session_context(&mut context, cross_messages, user_query)
62                .await?;
63        }
64        
65        // Add summaries if available
66        if let Some(ref summaries) = tier2_summaries {
67            self.add_summaries_to_context(&mut context, summaries, current_messages, user_query)
68                .await?;
69        }
70        
71        // Add specific details from full messages if needed
72        if let Some(ref full_messages) = tier3_messages {
73            self.add_specific_details(&mut context, full_messages, user_query)
74                .await?;
75        }
76        
77        // Ensure we don't exceed token limits
78        self.trim_to_token_limit(&mut context);
79        
80        // Add bridging between summarized and current content
81        self.add_bridging(&mut context, current_messages, tier2_summaries.as_ref())
82            .await?;
83        
84        debug!("Built context with {} messages", context.len());
85        
86        Ok(context)
87    }
88
89    /// Add historical messages from other sessions to the current context
90    async fn add_cross_session_context(
91        &mut self,
92        context: &mut Vec<Message>,
93        cross_messages: &[StoredMessage],
94        _user_query: Option<&str>,
95    ) -> anyhow::Result<()> {
96        if cross_messages.is_empty() {
97            return Ok(());
98        }
99        
100        // Create a bridging message to inform the model of the source
101        let bridge = Message {
102            role: "system".to_string(),
103            content: "[Context from previous conversations]".to_string(),
104        };
105        context.insert(0, bridge);
106        
107        // Add relevant cross-session messages (limit to 3 to avoid context bloat)
108        for message in cross_messages.iter().take(3) {
109            let cross_msg = Message {
110                role: message.role.clone(),
111                content: format!("[From earlier: {}]", message.content),
112            };
113            context.insert(1, cross_msg); // Insert after bridge
114        }
115        
116        Ok(())
117    }
118    
119    /// Prepare context incorporating Tier 1 content if available
120    fn prepare_context_with_tier1(
121        &self, 
122        current_messages: &[Message], 
123        tier1_content: Option<Vec<Message>>
124    ) -> Vec<Message> {
125        let mut context = Vec::new();
126        
127        // Always preserve system messages from current
128        if self.config.preserve_system_messages {
129            for message in current_messages.iter().filter(|m| m.role == "system") {
130                context.push(message.clone());
131            }
132        }
133        
134        // Use Tier 1 content if available, otherwise use recent current messages
135        if let Some(tier1_messages) = tier1_content {
136            context.extend(tier1_messages);
137        } else {
138            let recent_messages = self.select_recent_messages(current_messages);
139            context.extend(recent_messages);
140        }
141        
142        context
143    }
144    
145    /// Select recent messages to keep
146    fn select_recent_messages(&self, messages: &[Message]) -> Vec<Message> {
147        if messages.is_empty() {
148            return Vec::new();
149        }
150        
151        let target_count = (messages.len() as f32 * self.config.min_current_context_ratio).ceil() as usize;
152        let target_count = target_count.max(1).min(messages.len());
153        
154        messages.iter()
155            .rev()
156            .take(target_count)
157            .rev()
158            .cloned()
159            .collect()
160    }
161    
162    /// Add summaries to context
163    async fn add_summaries_to_context(
164        &mut self,
165        context: &mut Vec<Message>,
166        summaries: &[DbSummary],
167        current_messages: &[Message],
168        user_query: Option<&str>,
169    ) -> anyhow::Result<()> {
170        if summaries.is_empty() {
171            return Ok(());
172        }
173        
174        let relevant_summaries = self.select_relevant_summaries(summaries, current_messages, user_query);
175        
176        for summary in &relevant_summaries {
177            let summary_message = self.summary_to_message(summary, current_messages);
178            context.insert(0, summary_message);
179        }
180        
181        Ok(())
182    }
183    
184    fn select_relevant_summaries<'a>(
185        &self,
186        summaries: &'a [DbSummary],
187        current_messages: &[Message],
188        user_query: Option<&str>,
189    ) -> Vec<&'a DbSummary> {
190        let mut relevant = Vec::new();
191        let current_topics = self.extract_topics(current_messages);
192        
193        let mut scored: Vec<(&DbSummary, f32)> = summaries.iter()
194            .map(|summary| {
195                let score = self.score_summary_relevance(summary, &current_topics, user_query);
196                (summary, score)
197            })
198            .collect();
199        
200        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
201        
202        let mut total_tokens = 0;
203        let max_summary_tokens = (self.config.max_total_tokens as f32 * self.config.max_summary_ratio) as usize;
204        
205        for (summary, score) in scored {
206            if score < 0.3 { continue; }
207            
208            let summary_tokens = summary.summary_text.len() / 4;
209            
210            if total_tokens + summary_tokens > max_summary_tokens { break; }
211            
212            relevant.push(summary);
213            total_tokens += summary_tokens;
214        }
215        
216        relevant
217    }
218    
219    fn score_summary_relevance(&self, summary: &DbSummary, current_topics: &[String], user_query: Option<&str>) -> f32 {
220        let mut score = 0.0;
221        
222        // Topic matching
223        for topic in current_topics {
224            if summary.key_topics.iter().any(|t| t.to_lowercase().contains(&topic.to_lowercase())) {
225                score += 0.4;
226            }
227        }
228        
229        // Query matching
230        if let Some(query) = user_query {
231            let query_lower = query.to_lowercase();
232            for topic in &summary.key_topics {
233                if query_lower.contains(&topic.to_lowercase()) {
234                    score += 0.5;
235                }
236            }
237        }
238        
239        // Recency scoring
240        let age_hours = chrono::Utc::now().signed_duration_since(summary.generated_at).num_hours();
241        let recency_score = 1.0 / (1.0 + age_hours as f32 / 24.0);
242        score += recency_score * 0.3;
243        
244        // Compression ratio (more compressed = potentially more relevant for context)
245        score += summary.compression_ratio.min(1.0) * 0.2;
246        
247        score.min(1.0)
248    }
249    
250    fn summary_to_message(&self, summary: &DbSummary, current_messages: &[Message]) -> Message {
251        let content = if current_messages.len() > 5 {
252            format!("[Summary of earlier conversation: {}]", summary.summary_text)
253        } else {
254            format!("[Earlier: {}]", summary.summary_text)
255        };
256        Message { role: "system".to_string(), content }
257    }
258
259    async fn add_specific_details(
260        &mut self, 
261        context: &mut Vec<Message>, 
262        full_messages: &[StoredMessage], 
263        user_query: Option<&str>
264    ) -> anyhow::Result<()> {
265        if !self.config.enable_detail_injection || full_messages.is_empty() {
266            return Ok(());
267        }
268        
269        let detail_requests = self.extract_detail_requests(user_query);
270        if detail_requests.is_empty() { 
271            return Ok(()); 
272        }
273        
274        let relevant_messages = self.find_relevant_details(full_messages, &detail_requests);
275        for message in &relevant_messages {
276            let detail_message = Message {
277                role: message.role.clone(),
278                content: format!("[Earlier detail: {}]", message.content),
279            };
280            
281            // Insert details before the last user message if possible
282            if let Some(pos) = context.iter().rposition(|m| m.role == "user") {
283                context.insert(pos, detail_message);
284            } else {
285                context.insert(0, detail_message);
286            }
287        }
288        
289        Ok(())
290    }
291
292    fn extract_detail_requests(&self, user_query: Option<&str>) -> Vec<String> {
293        let mut requests = Vec::new();
294        if let Some(query) = user_query {
295            let query_lower = query.to_lowercase();
296            let words: Vec<&str> = query_lower.split_whitespace().collect();
297            
298            for i in 0..words.len().saturating_sub(1) {
299                if ["the", "that", "those", "specific", "exact"].contains(&words[i]) {
300                    let potential = words[i + 1..].iter()
301                        .take(3)
302                        .copied()
303                        .collect::<Vec<&str>>()
304                        .join(" ");
305                    
306                    if !potential.is_empty() { 
307                        requests.push(potential); 
308                    }
309                }
310            }
311        }
312        
313        requests.dedup();
314        requests
315    }
316
317    fn find_relevant_details<'a>(
318        &self, 
319        messages: &'a [StoredMessage], 
320        detail_requests: &[String]
321    ) -> Vec<&'a StoredMessage> {
322        let mut relevant = Vec::new();
323        
324        for message in messages {
325            let content_lower = message.content.to_lowercase();
326            
327            for request in detail_requests {
328                if content_lower.contains(&request.to_lowercase()) {
329                    relevant.push(message);
330                    break;
331                }
332            }
333            
334            if relevant.len() >= 3 { 
335                break; 
336            }
337        }
338        
339        relevant
340    }
341
342    fn trim_to_token_limit(&self, context: &mut Vec<Message>) {
343        let mut total_tokens = 0;
344        let mut to_remove = Vec::new();
345        
346        for (idx, message) in context.iter().enumerate() {
347            let message_tokens = message.content.len() / 4;
348            
349            if total_tokens + message_tokens > self.config.max_total_tokens {
350                to_remove.push(idx);
351            } else {
352                total_tokens += message_tokens;
353            }
354        }
355        
356        // Remove from end to preserve order
357        for idx in to_remove.iter().rev() {
358            context.remove(*idx);
359        }
360    }
361
362    /// Add bridging between summarized and current content
363    async fn add_bridging(
364        &mut self,
365        context: &mut Vec<Message>,
366        _current_messages: &[Message],
367        summaries: Option<&Vec<DbSummary>>,
368    ) -> anyhow::Result<()> {
369        if !self.config.enable_detail_injection || context.len() < 2 || summaries.is_none() {
370            return Ok(());
371        }
372        
373        let transition_idx = self.find_transition_point(context);
374        
375        if transition_idx > 0 && transition_idx < context.len() {
376            let summary_count = context[..transition_idx].iter()
377                .filter(|m| m.role == "system" && 
378                        (m.content.starts_with("[Summary") || m.content.starts_with("[Earlier:")))
379                .count();
380            
381            if summary_count > 0 {
382                let bridge_message = Message {
383                    role: "system".to_string(),
384                    content: format!("[Continuing from earlier conversation with {} summary{}]",
385                        summary_count, if summary_count > 1 { "s" } else { "" }),
386                };
387                
388                context.insert(transition_idx, bridge_message);
389            }
390        }
391        
392        Ok(())
393    }
394    
395    fn find_transition_point(&self, context: &[Message]) -> usize {
396        for (idx, message) in context.iter().enumerate() {
397            if !(message.role == "system" && 
398                 (message.content.starts_with("[Summary") || message.content.starts_with("[Earlier:") || message.content.starts_with("[Context"))) {
399                return idx;
400            }
401        }
402        context.len()
403    }
404    
405    fn extract_topics(&self, messages: &[Message]) -> Vec<String> {
406        let mut topics = Vec::new();
407        
408        for message in messages.iter().rev().take(5) {
409            let words: Vec<&str> = message.content.split_whitespace().collect();
410            
411            for i in 0..words.len().saturating_sub(2) {
412                let word_lower = words[i].to_lowercase();
413                
414                if word_lower == "about" || word_lower == "regarding" {
415                    let topic = words[i + 1..].iter()
416                        .take(3)
417                        .copied()
418                        .collect::<Vec<&str>>()
419                        .join(" ");
420                    
421                    if !topic.is_empty() { 
422                        topics.push(topic); 
423                    }
424                }
425                
426                if ["what", "how", "why", "when", "where", "who", "which"].contains(&word_lower.as_str()) {
427                    let topic = words[i + 1..].iter()
428                        .take(4)
429                        .copied()
430                        .collect::<Vec<&str>>()
431                        .join(" ");
432                    
433                    if !topic.is_empty() { 
434                        topics.push(topic); 
435                    }
436                }
437            }
438        }
439        
440        topics.dedup();
441        topics.truncate(3);
442        topics
443    }
444}
445
446impl Clone for ContextBuilder {
447    fn clone(&self) -> Self {
448        Self {
449            config: self.config.clone(),
450        }
451    }
452}