Skip to main content

post_cortex_memory/
context_assembly.rs

1// Copyright (c) 2026 Julius ML
2//
3// Graph-aware context assembly for PCX.
4//
5// Given a query/hint and a session, assembles the most relevant context by:
6// 1. Extracting entities from the query using NER
7// 2. Traversing the entity graph to find related entities (typed edges)
8// 3. Boosting semantic search results that mention graph-connected entities
9// 4. Impact analysis: which entities depend on the query entities
10//
11// Used by Axon to build LLM context that is structurally relevant,
12// not just keyword-similar.
13
14use chrono::Utc;
15use post_cortex_core::core::context_update::{EntityRelationship, RelationType};
16use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet};
19use tracing::{debug, info};
20
21/// A single piece of assembled context with its relevance score.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ContextItem {
24    /// The text content
25    pub text: String,
26    /// Combined relevance score (0.0 - 1.0)
27    pub score: f32,
28    /// Why this item was included
29    pub source: ContextSource,
30    /// Entities mentioned in this content
31    pub entities: Vec<String>,
32    /// Approximate token count
33    pub token_estimate: usize,
34    /// Stable ID of the ContextUpdate this item was sourced from.
35    /// Consumers can use this to track which entries were materialised
36    /// into a session's context window and later check their freshness.
37    pub entry_id: String,
38}
39
40/// How a context item was found
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub enum ContextSource {
43    /// Direct semantic search match
44    SemanticMatch,
45    /// Found via entity graph traversal (entity → related content)
46    GraphTraversal {
47        /// Entity name that was traversed.
48        via_entity: String,
49    },
50    /// Recent update in the session
51    RecentUpdate,
52}
53
54/// Result of graph-aware context assembly
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AssembledContext {
57    /// Context items sorted by relevance (highest first)
58    pub items: Vec<ContextItem>,
59    /// Entities relevant to the query, with their graph connections
60    pub entity_context: Vec<EntityContext>,
61    /// Impact analysis: entities that depend on query entities
62    pub impact: Vec<ImpactEntry>,
63    /// Total estimated tokens
64    pub total_tokens: usize,
65}
66
67/// An entity and its graph neighborhood relevant to the query
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct EntityContext {
70    /// Entity name as stored in the graph.
71    pub name: String,
72    /// How this entity relates to the query (direct mention, or via graph)
73    pub relevance: EntityRelevance,
74    /// Typed relationships from the graph
75    pub relationships: Vec<EntityRelationship>,
76}
77
78/// How an entity relates to the original query.
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80pub enum EntityRelevance {
81    /// Directly mentioned in the query
82    DirectMention,
83    /// Connected via typed edge in the graph
84    GraphNeighbor {
85        /// Entity through which this neighbor was discovered.
86        via: String,
87        /// Type of the graph edge connecting the entities.
88        relation: String,
89    },
90}
91
92/// An entity that would be impacted by changes to a query entity
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct ImpactEntry {
95    /// The entity that depends on the query entity
96    pub entity: String,
97    /// The query entity it depends on
98    pub depends_on: String,
99    /// The relationship type
100    pub relation_type: RelationType,
101    /// How the dependency was found
102    pub context: String,
103}
104
105/// Rough token estimate: ~4 chars per token for English
106fn estimate_tokens(text: &str) -> usize {
107    text.len().div_ceil(4)
108}
109
110/// Extract entity names that appear in the query text.
111///
112/// Uses two strategies:
113/// 1. **Exact substring** — entity name appears verbatim in query (case-insensitive)
114/// 2. **Token overlap** — split entity names on camelCase/snake_case/hyphens and match
115///    individual tokens against query words. Requires ≥50% of entity tokens to match
116///    (or all tokens if entity has ≤2 tokens) to avoid false positives.
117///
118/// Returns entities from the graph that are mentioned in the query, sorted by match quality.
119pub fn find_query_entities(query: &str, graph: &SimpleEntityGraph) -> Vec<String> {
120    let query_lower = query.to_lowercase();
121    let query_tokens: std::collections::HashSet<&str> = query_lower
122        .split(|c: char| !c.is_alphanumeric() && c != '_')
123        .filter(|t| t.len() >= 3)
124        .collect();
125
126    let all_entities = graph.get_all_entities();
127    let mut found: Vec<(String, usize, bool)> = Vec::new(); // (name, score, is_exact)
128
129    for entity_data in &all_entities {
130        let name_lower = entity_data.name.to_lowercase();
131
132        // Skip very short entity names (< 2 chars) to avoid false matches
133        if name_lower.len() < 2 {
134            continue;
135        }
136
137        // Strategy 1: Exact substring match (highest confidence)
138        if query_lower.contains(&name_lower) {
139            found.push((entity_data.name.clone(), name_lower.len() * 10, true));
140            continue;
141        }
142
143        // Strategy 2: Token overlap — split entity name into tokens
144        let entity_tokens: Vec<String> = split_entity_tokens(&name_lower);
145        if entity_tokens.is_empty() {
146            continue;
147        }
148
149        let matched = entity_tokens
150            .iter()
151            .filter(|et| {
152                if et.len() < 3 {
153                    return false;
154                }
155                query_tokens.iter().any(|qt| {
156                    // Exact match or prefix match (stem-like):
157                    // "stream" matches "streaming", "chat" matches "chat"
158                    let (shorter, longer) = if et.len() <= qt.len() {
159                        (et.as_str(), *qt)
160                    } else {
161                        (*qt, et.as_str())
162                    };
163                    shorter.len() >= 3 && longer.starts_with(shorter)
164                })
165            })
166            .count();
167
168        if matched == 0 {
169            continue;
170        }
171
172        // Require sufficient overlap to avoid false positives.
173        // For short names (1-2 tokens), require all to match.
174        // For medium (3 tokens), at least 1.
175        // For longer names, at least 40%.
176        let threshold = if entity_tokens.len() <= 2 {
177            entity_tokens.len()
178        } else {
179            1.max((entity_tokens.len() * 2).div_ceil(5)) // ceil(40%)
180        };
181
182        if matched >= threshold {
183            let score = matched * 5 + name_lower.len();
184            found.push((entity_data.name.clone(), score, false));
185        }
186    }
187
188    // Sort: exact matches first, then by score descending
189    found.sort_by(|a, b| b.2.cmp(&a.2).then_with(|| b.1.cmp(&a.1)));
190    found.into_iter().map(|(name, _, _)| name).collect()
191}
192
193/// Split an entity name into searchable tokens.
194///
195/// Handles camelCase, PascalCase, snake_case, kebab-case:
196///   "ChatRepositoryImpl" → ["chat", "repository", "impl"]
197///   "svc-social" → ["svc", "social"]
198///   "PG LISTEN/NOTIFY" → ["pg", "listen", "notify"]
199fn split_entity_tokens(name: &str) -> Vec<String> {
200    let mut tokens = Vec::new();
201    let mut current = String::new();
202
203    for c in name.chars() {
204        if c == '_' || c == '-' || c == '/' || c == ' ' || c == '.' {
205            if !current.is_empty() {
206                tokens.push(std::mem::take(&mut current));
207            }
208        } else if c.is_uppercase() && !current.is_empty() {
209            // camelCase boundary
210            tokens.push(std::mem::take(&mut current));
211            current.push(c.to_ascii_lowercase());
212        } else {
213            current.push(c.to_ascii_lowercase());
214        }
215    }
216    if !current.is_empty() {
217        tokens.push(current);
218    }
219    tokens
220}
221
222/// Build entity context: for each query entity, traverse the graph to find
223/// related entities and their typed relationships.
224pub fn build_entity_context(
225    query_entities: &[String],
226    graph: &SimpleEntityGraph,
227    max_depth: usize,
228) -> Vec<EntityContext> {
229    let mut result: Vec<EntityContext> = Vec::new();
230    let mut seen: HashSet<String> = HashSet::new();
231
232    // First: direct mentions
233    for entity in query_entities {
234        if seen.contains(entity) {
235            continue;
236        }
237        seen.insert(entity.clone());
238
239        let rels = get_entity_relationships(entity, graph);
240        result.push(EntityContext {
241            name: entity.clone(),
242            relevance: EntityRelevance::DirectMention,
243            relationships: rels,
244        });
245    }
246
247    // Then: graph neighbors (depth 1 and optionally 2)
248    for depth in 0..max_depth {
249        let current_entities: Vec<String> = result
250            .iter()
251            .filter(|ec| {
252                if depth == 0 {
253                    ec.relevance == EntityRelevance::DirectMention
254                } else {
255                    true
256                }
257            })
258            .map(|ec| ec.name.clone())
259            .collect();
260
261        for entity in &current_entities {
262            let neighbors = graph.find_related_entities(entity);
263            for neighbor in neighbors {
264                if seen.contains(&neighbor) {
265                    continue;
266                }
267                seen.insert(neighbor.clone());
268
269                // Find the relationship type between entity and neighbor
270                let rel_desc = get_relationship_description(entity, &neighbor, graph);
271                let rels = get_entity_relationships(&neighbor, graph);
272
273                result.push(EntityContext {
274                    name: neighbor.clone(),
275                    relevance: EntityRelevance::GraphNeighbor {
276                        via: entity.clone(),
277                        relation: rel_desc,
278                    },
279                    relationships: rels,
280                });
281            }
282        }
283    }
284
285    result
286}
287
288/// Get all relationships for an entity (both outgoing and incoming)
289fn get_entity_relationships(entity: &str, graph: &SimpleEntityGraph) -> Vec<EntityRelationship> {
290    graph
291        .get_all_relationships()
292        .into_iter()
293        .filter(|r| r.from_entity == entity || r.to_entity == entity)
294        .collect()
295}
296
297/// Get a human-readable description of the relationship between two entities
298fn get_relationship_description(from: &str, to: &str, graph: &SimpleEntityGraph) -> String {
299    for rel in graph.get_all_relationships() {
300        if rel.from_entity == from && rel.to_entity == to {
301            return format!("{:?}", rel.relation_type);
302        }
303        if rel.from_entity == to && rel.to_entity == from {
304            return format!("{:?} (reverse)", rel.relation_type);
305        }
306    }
307    "RelatedTo".to_string()
308}
309
310/// Perform impact analysis: find all entities that depend on any of the query entities.
311/// Traverses DependsOn, RequiredBy, and Implements edges in reverse.
312pub fn analyze_impact(query_entities: &[String], graph: &SimpleEntityGraph) -> Vec<ImpactEntry> {
313    let dependency_types = [
314        RelationType::DependsOn,
315        RelationType::RequiredBy,
316        RelationType::Implements,
317    ];
318
319    let all_rels = graph.get_all_relationships();
320    let mut impacts: Vec<ImpactEntry> = Vec::new();
321    let query_set: HashSet<&String> = query_entities.iter().collect();
322
323    for rel in &all_rels {
324        // For DependsOn: if B depends on A, and A is a query entity → B is impacted
325        // The edge is: from=B, to=A, type=DependsOn
326        if dependency_types.contains(&rel.relation_type) && query_set.contains(&rel.to_entity) {
327            // Skip self-references
328            if rel.from_entity == rel.to_entity {
329                continue;
330            }
331            impacts.push(ImpactEntry {
332                entity: rel.from_entity.clone(),
333                depends_on: rel.to_entity.clone(),
334                relation_type: rel.relation_type.clone(),
335                context: rel.context.clone(),
336            });
337        }
338
339        // For RequiredBy: if A is required by B, and A is a query entity → B is impacted
340        // The edge is: from=A, to=B, type=RequiredBy
341        if rel.relation_type == RelationType::RequiredBy && query_set.contains(&rel.from_entity) {
342            if rel.from_entity == rel.to_entity {
343                continue;
344            }
345            impacts.push(ImpactEntry {
346                entity: rel.to_entity.clone(),
347                depends_on: rel.from_entity.clone(),
348                relation_type: rel.relation_type.clone(),
349                context: rel.context.clone(),
350            });
351        }
352    }
353
354    // Deduplicate by (entity, depends_on)
355    let mut seen: HashSet<(String, String)> = HashSet::new();
356    impacts.retain(|i| seen.insert((i.entity.clone(), i.depends_on.clone())));
357    impacts
358}
359
360/// Score and boost semantic search results based on entity graph connections.
361///
362/// Results that mention graph-connected entities get a score boost.
363/// This makes structurally related content rank higher than
364/// keyword-similar but structurally unrelated content.
365pub fn boost_by_graph(
366    results: &mut Vec<(String, f32)>, // (text, score)
367    entity_context: &[EntityContext],
368) {
369    // Build a set of all relevant entity names (direct + neighbors)
370    let relevant_entities: HashMap<String, f32> = entity_context
371        .iter()
372        .map(|ec| {
373            let boost = match &ec.relevance {
374                EntityRelevance::DirectMention => 0.15,
375                EntityRelevance::GraphNeighbor { .. } => 0.08,
376            };
377            (ec.name.to_lowercase(), boost)
378        })
379        .collect();
380
381    for (text, score) in results.iter_mut() {
382        let text_lower = text.to_lowercase();
383        let mut total_boost: f32 = 0.0;
384
385        for (entity, boost) in &relevant_entities {
386            if text_lower.contains(entity) {
387                total_boost += boost;
388            }
389        }
390
391        // Cap the boost at 0.25 to prevent over-weighting
392        *score += total_boost.min(0.25);
393        // Clamp to [0, 1]
394        *score = score.min(1.0);
395    }
396}
397
398/// Assemble context from a session's entity graph and context updates.
399///
400/// This is the main entry point for graph-aware context assembly.
401/// It combines entity graph traversal with content scoring to produce
402/// a ranked list of context items within a token budget.
403pub fn assemble_context(
404    query: &str,
405    graph: &SimpleEntityGraph,
406    updates: &[post_cortex_core::core::context_update::ContextUpdate],
407    token_budget: usize,
408) -> AssembledContext {
409    info!(
410        "Assembling context for query: '{}' (budget: {} tokens)",
411        query, token_budget
412    );
413
414    // Step 1: Find entities mentioned in the query
415    let query_entities = find_query_entities(query, graph);
416    debug!("Query entities: {:?}", query_entities);
417
418    // Step 2: Build entity context (graph traversal)
419    let entity_context = build_entity_context(&query_entities, graph, 1);
420    debug!(
421        "Entity context: {} entities (direct + neighbors)",
422        entity_context.len()
423    );
424
425    // Step 3: Impact analysis
426    let impact = analyze_impact(&query_entities, graph);
427    if !impact.is_empty() {
428        debug!("Impact analysis: {} dependent entities", impact.len());
429    }
430
431    // Step 4: Score all updates
432    let _relevant_entity_names: HashSet<String> = entity_context
433        .iter()
434        .map(|ec| ec.name.to_lowercase())
435        .collect();
436
437    let mut scored_items: Vec<ContextItem> = Vec::new();
438
439    for update in updates {
440        let text = format!("{}: {}", update.content.title, update.content.description);
441        let tokens = estimate_tokens(&text);
442
443        // Base score: recency (newer updates score higher)
444        let age_hours = (Utc::now() - update.timestamp).num_hours().max(0) as f32;
445        let recency_score = 1.0 / (1.0 + age_hours / 24.0); // Decays over days
446
447        // Entity match boost
448        let text_lower = text.to_lowercase();
449        let mut entity_boost: f32 = 0.0;
450        let mut matched_entities: Vec<String> = Vec::new();
451
452        for ec in &entity_context {
453            let name_lower = ec.name.to_lowercase();
454            if text_lower.contains(&name_lower) {
455                matched_entities.push(ec.name.clone());
456                entity_boost += match &ec.relevance {
457                    EntityRelevance::DirectMention => 0.4,
458                    EntityRelevance::GraphNeighbor { .. } => 0.2,
459                };
460            }
461        }
462
463        // Importance boost
464        let importance_boost = if update.user_marked_important {
465            0.2
466        } else {
467            0.0
468        };
469
470        let score = (recency_score * 0.3 + entity_boost + importance_boost).min(1.0);
471
472        // Determine source
473        let source = if !matched_entities.is_empty() {
474            if query_entities.iter().any(|qe| {
475                matched_entities
476                    .iter()
477                    .any(|me| me.eq_ignore_ascii_case(qe))
478            }) {
479                ContextSource::SemanticMatch
480            } else {
481                ContextSource::GraphTraversal {
482                    via_entity: matched_entities[0].clone(),
483                }
484            }
485        } else {
486            ContextSource::RecentUpdate
487        };
488
489        scored_items.push(ContextItem {
490            text,
491            score,
492            source,
493            entities: matched_entities,
494            token_estimate: tokens,
495            entry_id: update.id.to_string(),
496        });
497    }
498
499    // Step 5: Greedy knapsack — sort by score, pack within budget
500    scored_items.sort_by(|a, b| {
501        b.score
502            .partial_cmp(&a.score)
503            .unwrap_or(std::cmp::Ordering::Equal)
504    });
505
506    let mut selected: Vec<ContextItem> = Vec::new();
507    let mut used_tokens = 0;
508
509    // Reserve tokens for entity context summary (~50 tokens per entity)
510    let entity_summary_tokens = entity_context.len() * 50;
511    let content_budget = token_budget.saturating_sub(entity_summary_tokens);
512
513    for item in scored_items {
514        if used_tokens + item.token_estimate > content_budget {
515            // Try to fit — skip items that are too large
516            continue;
517        }
518        used_tokens += item.token_estimate;
519        selected.push(item);
520    }
521
522    let total_tokens = used_tokens + entity_summary_tokens;
523    info!(
524        "Assembled {} items ({} tokens), {} entity contexts, {} impact entries",
525        selected.len(),
526        total_tokens,
527        entity_context.len(),
528        impact.len()
529    );
530
531    AssembledContext {
532        items: selected,
533        entity_context,
534        impact,
535        total_tokens,
536    }
537}
538
539/// Format assembled context as a text block suitable for LLM injection.
540pub fn format_for_llm(ctx: &AssembledContext) -> String {
541    let mut parts: Vec<String> = Vec::new();
542
543    // Entity graph summary
544    if !ctx.entity_context.is_empty() {
545        let mut graph_lines: Vec<String> = Vec::new();
546        for ec in &ctx.entity_context {
547            if ec.relationships.is_empty() {
548                continue;
549            }
550            for rel in &ec.relationships {
551                graph_lines.push(format!(
552                    "  {} --[{:?}]--> {}",
553                    rel.from_entity, rel.relation_type, rel.to_entity
554                ));
555            }
556        }
557        if !graph_lines.is_empty() {
558            // Deduplicate relationship lines
559            graph_lines.sort();
560            graph_lines.dedup();
561            parts.push(format!("Entity relationships:\n{}", graph_lines.join("\n")));
562        }
563    }
564
565    // Impact warnings
566    if !ctx.impact.is_empty() {
567        let impact_lines: Vec<String> = ctx
568            .impact
569            .iter()
570            .map(|i| {
571                format!(
572                    "  {} depends on {} ({:?})",
573                    i.entity, i.depends_on, i.relation_type
574                )
575            })
576            .collect();
577        parts.push(format!(
578            "Impact analysis — these entities depend on what you're working with:\n{}",
579            impact_lines.join("\n")
580        ));
581    }
582
583    // Context items
584    if !ctx.items.is_empty() {
585        let content_lines: Vec<String> = ctx.items.iter().map(|item| item.text.clone()).collect();
586        parts.push(format!(
587            "Relevant context:\n{}",
588            content_lines.join("\n---\n")
589        ));
590    }
591
592    parts.join("\n\n")
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598    use post_cortex_core::core::context_update::*;
599    use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
600    fn make_graph() -> SimpleEntityGraph {
601        let mut graph = SimpleEntityGraph::new();
602        let now = Utc::now();
603
604        // Add entities
605        graph.add_or_update_entity("Axon".into(), EntityType::Technology, now, "");
606        graph.add_or_update_entity("Post-Cortex".into(), EntityType::Technology, now, "");
607        graph.add_or_update_entity("gRPC".into(), EntityType::Technology, now, "");
608        graph.add_or_update_entity("tonic".into(), EntityType::Technology, now, "");
609        graph.add_or_update_entity("RocksDB".into(), EntityType::Technology, now, "");
610        graph.add_or_update_entity("Rust".into(), EntityType::Technology, now, "");
611
612        // Add typed relationships
613        graph.add_relationship(EntityRelationship {
614            from_entity: "Axon".to_string(),
615            to_entity: "Post-Cortex".to_string(),
616            relation_type: RelationType::DependsOn,
617            context: "Axon connects to Post-Cortex".to_string(),
618        });
619        graph.add_relationship(EntityRelationship {
620            from_entity: "Axon".to_string(),
621            to_entity: "gRPC".to_string(),
622            relation_type: RelationType::DependsOn,
623            context: "Axon uses gRPC".to_string(),
624        });
625        graph.add_relationship(EntityRelationship {
626            from_entity: "gRPC".to_string(),
627            to_entity: "tonic".to_string(),
628            relation_type: RelationType::DependsOn,
629            context: "gRPC implemented via tonic".to_string(),
630        });
631        graph.add_relationship(EntityRelationship {
632            from_entity: "Post-Cortex".to_string(),
633            to_entity: "RocksDB".to_string(),
634            relation_type: RelationType::DependsOn,
635            context: "Post-Cortex uses RocksDB for storage".to_string(),
636        });
637        graph.add_relationship(EntityRelationship {
638            from_entity: "Post-Cortex".to_string(),
639            to_entity: "Rust".to_string(),
640            relation_type: RelationType::DependsOn,
641            context: "Post-Cortex built with Rust".to_string(),
642        });
643
644        graph
645    }
646
647    #[test]
648    fn test_find_query_entities() {
649        let graph = make_graph();
650        let entities = find_query_entities("I'm working on the gRPC service in Axon", &graph);
651        assert!(entities.contains(&"gRPC".to_string()));
652        assert!(entities.contains(&"Axon".to_string()));
653    }
654
655    #[test]
656    fn test_build_entity_context_includes_neighbors() {
657        let graph = make_graph();
658        let query_entities = vec!["gRPC".to_string()];
659        let ctx = build_entity_context(&query_entities, &graph, 1);
660
661        let names: Vec<&str> = ctx.iter().map(|ec| ec.name.as_str()).collect();
662        // gRPC is direct, tonic and Axon are neighbors
663        assert!(names.contains(&"gRPC"));
664        assert!(names.contains(&"tonic") || names.contains(&"Axon"));
665    }
666
667    #[test]
668    fn test_impact_analysis() {
669        let graph = make_graph();
670
671        // If RocksDB changes, Post-Cortex is impacted (depends on RocksDB)
672        let impact = analyze_impact(&["RocksDB".to_string()], &graph);
673        let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
674        assert!(
675            impacted.contains(&"Post-Cortex"),
676            "Post-Cortex should be impacted by RocksDB change, got: {:?}",
677            impacted
678        );
679
680        // If gRPC changes, Axon is impacted
681        let impact = analyze_impact(&["gRPC".to_string()], &graph);
682        let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
683        assert!(
684            impacted.contains(&"Axon"),
685            "Axon should be impacted by gRPC change, got: {:?}",
686            impacted
687        );
688    }
689
690    #[test]
691    fn test_assemble_context_with_budget() {
692        let graph = make_graph();
693        let updates = vec![
694            ContextUpdate {
695                id: uuid::Uuid::new_v4(),
696                update_type: UpdateType::ConceptDefined,
697                content: UpdateContent {
698                    title: "gRPC Setup".to_string(),
699                    description: "Added gRPC service using tonic for Axon communication"
700                        .to_string(),
701                    details: vec![],
702                    examples: vec![],
703                    implications: vec![],
704                },
705                timestamp: Utc::now(),
706                related_code: None,
707                parent_update: None,
708                user_marked_important: false,
709                creates_entities: vec![],
710                creates_relationships: vec![],
711                references_entities: vec![],
712                typed_entities: vec![],
713            },
714            ContextUpdate {
715                id: uuid::Uuid::new_v4(),
716                update_type: UpdateType::ConceptDefined,
717                content: UpdateContent {
718                    title: "Unrelated Update".to_string(),
719                    description: "Fixed a CSS bug in the landing page".to_string(),
720                    details: vec![],
721                    examples: vec![],
722                    implications: vec![],
723                },
724                timestamp: Utc::now(),
725                related_code: None,
726                parent_update: None,
727                user_marked_important: false,
728                creates_entities: vec![],
729                creates_relationships: vec![],
730                references_entities: vec![],
731                typed_entities: vec![],
732            },
733        ];
734
735        // Capture IDs so we can verify `entry_id` round-trips below.
736        let grpc_id = updates[0].id;
737        let css_id = updates[1].id;
738
739        let result = assemble_context("working on gRPC", &graph, &updates, 1000);
740
741        // gRPC-related update should rank higher than CSS bug
742        assert!(!result.items.is_empty());
743        assert!(result.items[0].text.contains("gRPC"));
744
745        // Entity context should include gRPC and neighbors
746        let entity_names: Vec<&str> = result
747            .entity_context
748            .iter()
749            .map(|ec| ec.name.as_str())
750            .collect();
751        assert!(entity_names.contains(&"gRPC"));
752
753        // Impact: Axon depends on gRPC
754        let impacted: Vec<&str> = result.impact.iter().map(|i| i.entity.as_str()).collect();
755        assert!(impacted.contains(&"Axon"));
756
757        // Every assembled item should carry the underlying ContextUpdate's
758        // ID so consumers (e.g. Axon resume-freshness) can track which
759        // entries were materialised into the context window.
760        for item in &result.items {
761            assert!(!item.entry_id.is_empty(), "entry_id should be populated");
762            let id = uuid::Uuid::parse_str(&item.entry_id).expect("entry_id should parse as UUID");
763            assert!(
764                id == grpc_id || id == css_id,
765                "entry_id must match one of the input updates"
766            );
767        }
768    }
769
770    #[test]
771    fn test_format_for_llm() {
772        let graph = make_graph();
773        let updates = vec![ContextUpdate {
774            id: uuid::Uuid::new_v4(),
775            update_type: UpdateType::ConceptDefined,
776            content: UpdateContent {
777                title: "RocksDB Migration".to_string(),
778                description: "Migrating from sled to RocksDB for better performance".to_string(),
779                details: vec![],
780                examples: vec![],
781                implications: vec![],
782            },
783            timestamp: Utc::now(),
784            related_code: None,
785            parent_update: None,
786            user_marked_important: false,
787            creates_entities: vec![],
788            creates_relationships: vec![],
789            references_entities: vec![],
790            typed_entities: vec![],
791        }];
792
793        let result = assemble_context("changing RocksDB", &graph, &updates, 2000);
794        let formatted = format_for_llm(&result);
795
796        assert!(formatted.contains("Entity relationships"));
797        assert!(formatted.contains("Impact analysis"));
798        assert!(formatted.contains("Post-Cortex depends on RocksDB"));
799    }
800}