Skip to main content

post_cortex_memory/
context_assembly.rs

1// Copyright (c) 2026 Julius ML
2//
3// Graph-aware context assembly for PCX.
4//
5// Given a query/hint and a session, assembles the most relevant context by:
6// 1. Extracting entities from the query using NER
7// 2. Traversing the entity graph to find related entities (typed edges)
8// 3. Boosting semantic search results that mention graph-connected entities
9// 4. Impact analysis: which entities depend on the query entities
10//
11// Used by Axon to build LLM context that is structurally relevant,
12// not just keyword-similar.
13
14use post_cortex_core::core::context_update::{EntityRelationship, RelationType};
15use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
16use chrono::Utc;
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet};
19use tracing::{debug, info};
20
21/// A single piece of assembled context with its relevance score.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ContextItem {
24    /// The text content
25    pub text: String,
26    /// Combined relevance score (0.0 - 1.0)
27    pub score: f32,
28    /// Why this item was included
29    pub source: ContextSource,
30    /// Entities mentioned in this content
31    pub entities: Vec<String>,
32    /// Approximate token count
33    pub token_estimate: usize,
34    /// Stable ID of the ContextUpdate this item was sourced from.
35    /// Consumers can use this to track which entries were materialised
36    /// into a session's context window and later check their freshness.
37    pub entry_id: String,
38}
39
40/// How a context item was found
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub enum ContextSource {
43    /// Direct semantic search match
44    SemanticMatch,
45    /// Found via entity graph traversal (entity → related content)
46    GraphTraversal {
47        /// Entity name that was traversed.
48        via_entity: String,
49    },
50    /// Recent update in the session
51    RecentUpdate,
52}
53
54/// Result of graph-aware context assembly
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AssembledContext {
57    /// Context items sorted by relevance (highest first)
58    pub items: Vec<ContextItem>,
59    /// Entities relevant to the query, with their graph connections
60    pub entity_context: Vec<EntityContext>,
61    /// Impact analysis: entities that depend on query entities
62    pub impact: Vec<ImpactEntry>,
63    /// Total estimated tokens
64    pub total_tokens: usize,
65}
66
67/// An entity and its graph neighborhood relevant to the query
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct EntityContext {
70    /// Entity name as stored in the graph.
71    pub name: String,
72    /// How this entity relates to the query (direct mention, or via graph)
73    pub relevance: EntityRelevance,
74    /// Typed relationships from the graph
75    pub relationships: Vec<EntityRelationship>,
76}
77
78/// How an entity relates to the original query.
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80pub enum EntityRelevance {
81    /// Directly mentioned in the query
82    DirectMention,
83    /// Connected via typed edge in the graph
84    GraphNeighbor {
85        /// Entity through which this neighbor was discovered.
86        via: String,
87        /// Type of the graph edge connecting the entities.
88        relation: String,
89    },
90}
91
92/// An entity that would be impacted by changes to a query entity
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct ImpactEntry {
95    /// The entity that depends on the query entity
96    pub entity: String,
97    /// The query entity it depends on
98    pub depends_on: String,
99    /// The relationship type
100    pub relation_type: RelationType,
101    /// How the dependency was found
102    pub context: String,
103}
104
105/// Rough token estimate: ~4 chars per token for English
106fn estimate_tokens(text: &str) -> usize {
107    text.len().div_ceil(4)
108}
109
110/// Extract entity names that appear in the query text.
111///
112/// Uses two strategies:
113/// 1. **Exact substring** — entity name appears verbatim in query (case-insensitive)
114/// 2. **Token overlap** — split entity names on camelCase/snake_case/hyphens and match
115///    individual tokens against query words. Requires ≥50% of entity tokens to match
116///    (or all tokens if entity has ≤2 tokens) to avoid false positives.
117///
118/// Returns entities from the graph that are mentioned in the query, sorted by match quality.
119pub fn find_query_entities(query: &str, graph: &SimpleEntityGraph) -> Vec<String> {
120    let query_lower = query.to_lowercase();
121    let query_tokens: std::collections::HashSet<&str> = query_lower
122        .split(|c: char| !c.is_alphanumeric() && c != '_')
123        .filter(|t| t.len() >= 3)
124        .collect();
125
126    let all_entities = graph.get_all_entities();
127    let mut found: Vec<(String, usize, bool)> = Vec::new(); // (name, score, is_exact)
128
129    for entity_data in &all_entities {
130        let name_lower = entity_data.name.to_lowercase();
131
132        // Skip very short entity names (< 2 chars) to avoid false matches
133        if name_lower.len() < 2 {
134            continue;
135        }
136
137        // Strategy 1: Exact substring match (highest confidence)
138        if query_lower.contains(&name_lower) {
139            found.push((entity_data.name.clone(), name_lower.len() * 10, true));
140            continue;
141        }
142
143        // Strategy 2: Token overlap — split entity name into tokens
144        let entity_tokens: Vec<String> = split_entity_tokens(&name_lower);
145        if entity_tokens.is_empty() {
146            continue;
147        }
148
149        let matched = entity_tokens.iter()
150            .filter(|et| {
151                if et.len() < 3 { return false; }
152                query_tokens.iter().any(|qt| {
153                    // Exact match or prefix match (stem-like):
154                    // "stream" matches "streaming", "chat" matches "chat"
155                    let (shorter, longer) = if et.len() <= qt.len() {
156                        (et.as_str(), *qt)
157                    } else {
158                        (*qt, et.as_str())
159                    };
160                    shorter.len() >= 3 && longer.starts_with(shorter)
161                })
162            })
163            .count();
164
165        if matched == 0 {
166            continue;
167        }
168
169        // Require sufficient overlap to avoid false positives.
170        // For short names (1-2 tokens), require all to match.
171        // For medium (3 tokens), at least 1.
172        // For longer names, at least 40%.
173        let threshold = if entity_tokens.len() <= 2 {
174            entity_tokens.len()
175        } else {
176            1.max((entity_tokens.len() * 2).div_ceil(5)) // ceil(40%)
177        };
178
179        if matched >= threshold {
180            let score = matched * 5 + name_lower.len();
181            found.push((entity_data.name.clone(), score, false));
182        }
183    }
184
185    // Sort: exact matches first, then by score descending
186    found.sort_by(|a, b| {
187        b.2.cmp(&a.2).then_with(|| b.1.cmp(&a.1))
188    });
189    found.into_iter().map(|(name, _, _)| name).collect()
190}
191
192/// Split an entity name into searchable tokens.
193///
194/// Handles camelCase, PascalCase, snake_case, kebab-case:
195///   "ChatRepositoryImpl" → ["chat", "repository", "impl"]
196///   "svc-social" → ["svc", "social"]
197///   "PG LISTEN/NOTIFY" → ["pg", "listen", "notify"]
198fn split_entity_tokens(name: &str) -> Vec<String> {
199    let mut tokens = Vec::new();
200    let mut current = String::new();
201
202    for c in name.chars() {
203        if c == '_' || c == '-' || c == '/' || c == ' ' || c == '.' {
204            if !current.is_empty() {
205                tokens.push(std::mem::take(&mut current));
206            }
207        } else if c.is_uppercase() && !current.is_empty() {
208            // camelCase boundary
209            tokens.push(std::mem::take(&mut current));
210            current.push(c.to_ascii_lowercase());
211        } else {
212            current.push(c.to_ascii_lowercase());
213        }
214    }
215    if !current.is_empty() {
216        tokens.push(current);
217    }
218    tokens
219}
220
221/// Build entity context: for each query entity, traverse the graph to find
222/// related entities and their typed relationships.
223pub fn build_entity_context(
224    query_entities: &[String],
225    graph: &SimpleEntityGraph,
226    max_depth: usize,
227) -> Vec<EntityContext> {
228    let mut result: Vec<EntityContext> = Vec::new();
229    let mut seen: HashSet<String> = HashSet::new();
230
231    // First: direct mentions
232    for entity in query_entities {
233        if seen.contains(entity) {
234            continue;
235        }
236        seen.insert(entity.clone());
237
238        let rels = get_entity_relationships(entity, graph);
239        result.push(EntityContext {
240            name: entity.clone(),
241            relevance: EntityRelevance::DirectMention,
242            relationships: rels,
243        });
244    }
245
246    // Then: graph neighbors (depth 1 and optionally 2)
247    for depth in 0..max_depth {
248        let current_entities: Vec<String> = result
249            .iter()
250            .filter(|ec| {
251                if depth == 0 {
252                    ec.relevance == EntityRelevance::DirectMention
253                } else {
254                    true
255                }
256            })
257            .map(|ec| ec.name.clone())
258            .collect();
259
260        for entity in &current_entities {
261            let neighbors = graph.find_related_entities(entity);
262            for neighbor in neighbors {
263                if seen.contains(&neighbor) {
264                    continue;
265                }
266                seen.insert(neighbor.clone());
267
268                // Find the relationship type between entity and neighbor
269                let rel_desc = get_relationship_description(entity, &neighbor, graph);
270                let rels = get_entity_relationships(&neighbor, graph);
271
272                result.push(EntityContext {
273                    name: neighbor.clone(),
274                    relevance: EntityRelevance::GraphNeighbor {
275                        via: entity.clone(),
276                        relation: rel_desc,
277                    },
278                    relationships: rels,
279                });
280            }
281        }
282    }
283
284    result
285}
286
287/// Get all relationships for an entity (both outgoing and incoming)
288fn get_entity_relationships(entity: &str, graph: &SimpleEntityGraph) -> Vec<EntityRelationship> {
289    graph
290        .get_all_relationships()
291        .into_iter()
292        .filter(|r| r.from_entity == entity || r.to_entity == entity)
293        .collect()
294}
295
296/// Get a human-readable description of the relationship between two entities
297fn get_relationship_description(from: &str, to: &str, graph: &SimpleEntityGraph) -> String {
298    for rel in graph.get_all_relationships() {
299        if rel.from_entity == from && rel.to_entity == to {
300            return format!("{:?}", rel.relation_type);
301        }
302        if rel.from_entity == to && rel.to_entity == from {
303            return format!("{:?} (reverse)", rel.relation_type);
304        }
305    }
306    "RelatedTo".to_string()
307}
308
309/// Perform impact analysis: find all entities that depend on any of the query entities.
310/// Traverses DependsOn, RequiredBy, and Implements edges in reverse.
311pub fn analyze_impact(
312    query_entities: &[String],
313    graph: &SimpleEntityGraph,
314) -> Vec<ImpactEntry> {
315    let dependency_types = [
316        RelationType::DependsOn,
317        RelationType::RequiredBy,
318        RelationType::Implements,
319    ];
320
321    let all_rels = graph.get_all_relationships();
322    let mut impacts: Vec<ImpactEntry> = Vec::new();
323    let query_set: HashSet<&String> = query_entities.iter().collect();
324
325    for rel in &all_rels {
326        // For DependsOn: if B depends on A, and A is a query entity → B is impacted
327        // The edge is: from=B, to=A, type=DependsOn
328        if dependency_types.contains(&rel.relation_type) && query_set.contains(&rel.to_entity) {
329            // Skip self-references
330            if rel.from_entity == rel.to_entity {
331                continue;
332            }
333            impacts.push(ImpactEntry {
334                entity: rel.from_entity.clone(),
335                depends_on: rel.to_entity.clone(),
336                relation_type: rel.relation_type.clone(),
337                context: rel.context.clone(),
338            });
339        }
340
341        // For RequiredBy: if A is required by B, and A is a query entity → B is impacted
342        // The edge is: from=A, to=B, type=RequiredBy
343        if rel.relation_type == RelationType::RequiredBy
344            && query_set.contains(&rel.from_entity)
345        {
346            if rel.from_entity == rel.to_entity {
347                continue;
348            }
349            impacts.push(ImpactEntry {
350                entity: rel.to_entity.clone(),
351                depends_on: rel.from_entity.clone(),
352                relation_type: rel.relation_type.clone(),
353                context: rel.context.clone(),
354            });
355        }
356    }
357
358    // Deduplicate by (entity, depends_on)
359    let mut seen: HashSet<(String, String)> = HashSet::new();
360    impacts.retain(|i| seen.insert((i.entity.clone(), i.depends_on.clone())));
361    impacts
362}
363
364/// Score and boost semantic search results based on entity graph connections.
365///
366/// Results that mention graph-connected entities get a score boost.
367/// This makes structurally related content rank higher than
368/// keyword-similar but structurally unrelated content.
369pub fn boost_by_graph(
370    results: &mut Vec<(String, f32)>, // (text, score)
371    entity_context: &[EntityContext],
372) {
373    // Build a set of all relevant entity names (direct + neighbors)
374    let relevant_entities: HashMap<String, f32> = entity_context
375        .iter()
376        .map(|ec| {
377            let boost = match &ec.relevance {
378                EntityRelevance::DirectMention => 0.15,
379                EntityRelevance::GraphNeighbor { .. } => 0.08,
380            };
381            (ec.name.to_lowercase(), boost)
382        })
383        .collect();
384
385    for (text, score) in results.iter_mut() {
386        let text_lower = text.to_lowercase();
387        let mut total_boost: f32 = 0.0;
388
389        for (entity, boost) in &relevant_entities {
390            if text_lower.contains(entity) {
391                total_boost += boost;
392            }
393        }
394
395        // Cap the boost at 0.25 to prevent over-weighting
396        *score += total_boost.min(0.25);
397        // Clamp to [0, 1]
398        *score = score.min(1.0);
399    }
400}
401
402/// Assemble context from a session's entity graph and context updates.
403///
404/// This is the main entry point for graph-aware context assembly.
405/// It combines entity graph traversal with content scoring to produce
406/// a ranked list of context items within a token budget.
407pub fn assemble_context(
408    query: &str,
409    graph: &SimpleEntityGraph,
410    updates: &[post_cortex_core::core::context_update::ContextUpdate],
411    token_budget: usize,
412) -> AssembledContext {
413    info!("Assembling context for query: '{}' (budget: {} tokens)", query, token_budget);
414
415    // Step 1: Find entities mentioned in the query
416    let query_entities = find_query_entities(query, graph);
417    debug!("Query entities: {:?}", query_entities);
418
419    // Step 2: Build entity context (graph traversal)
420    let entity_context = build_entity_context(&query_entities, graph, 1);
421    debug!(
422        "Entity context: {} entities (direct + neighbors)",
423        entity_context.len()
424    );
425
426    // Step 3: Impact analysis
427    let impact = analyze_impact(&query_entities, graph);
428    if !impact.is_empty() {
429        debug!("Impact analysis: {} dependent entities", impact.len());
430    }
431
432    // Step 4: Score all updates
433    let _relevant_entity_names: HashSet<String> = entity_context
434        .iter()
435        .map(|ec| ec.name.to_lowercase())
436        .collect();
437
438    let mut scored_items: Vec<ContextItem> = Vec::new();
439
440    for update in updates {
441        let text = format!(
442            "{}: {}",
443            update.content.title, update.content.description
444        );
445        let tokens = estimate_tokens(&text);
446
447        // Base score: recency (newer updates score higher)
448        let age_hours = (Utc::now() - update.timestamp).num_hours().max(0) as f32;
449        let recency_score = 1.0 / (1.0 + age_hours / 24.0); // Decays over days
450
451        // Entity match boost
452        let text_lower = text.to_lowercase();
453        let mut entity_boost: f32 = 0.0;
454        let mut matched_entities: Vec<String> = Vec::new();
455
456        for ec in &entity_context {
457            let name_lower = ec.name.to_lowercase();
458            if text_lower.contains(&name_lower) {
459                matched_entities.push(ec.name.clone());
460                entity_boost += match &ec.relevance {
461                    EntityRelevance::DirectMention => 0.4,
462                    EntityRelevance::GraphNeighbor { .. } => 0.2,
463                };
464            }
465        }
466
467        // Importance boost
468        let importance_boost = if update.user_marked_important {
469            0.2
470        } else {
471            0.0
472        };
473
474        let score = (recency_score * 0.3 + entity_boost + importance_boost).min(1.0);
475
476        // Determine source
477        let source = if !matched_entities.is_empty() {
478            if query_entities
479                .iter()
480                .any(|qe| matched_entities.iter().any(|me| me.eq_ignore_ascii_case(qe)))
481            {
482                ContextSource::SemanticMatch
483            } else {
484                ContextSource::GraphTraversal {
485                    via_entity: matched_entities[0].clone(),
486                }
487            }
488        } else {
489            ContextSource::RecentUpdate
490        };
491
492        scored_items.push(ContextItem {
493            text,
494            score,
495            source,
496            entities: matched_entities,
497            token_estimate: tokens,
498            entry_id: update.id.to_string(),
499        });
500    }
501
502    // Step 5: Greedy knapsack — sort by score, pack within budget
503    scored_items.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
504
505    let mut selected: Vec<ContextItem> = Vec::new();
506    let mut used_tokens = 0;
507
508    // Reserve tokens for entity context summary (~50 tokens per entity)
509    let entity_summary_tokens = entity_context.len() * 50;
510    let content_budget = token_budget.saturating_sub(entity_summary_tokens);
511
512    for item in scored_items {
513        if used_tokens + item.token_estimate > content_budget {
514            // Try to fit — skip items that are too large
515            continue;
516        }
517        used_tokens += item.token_estimate;
518        selected.push(item);
519    }
520
521    let total_tokens = used_tokens + entity_summary_tokens;
522    info!(
523        "Assembled {} items ({} tokens), {} entity contexts, {} impact entries",
524        selected.len(),
525        total_tokens,
526        entity_context.len(),
527        impact.len()
528    );
529
530    AssembledContext {
531        items: selected,
532        entity_context,
533        impact,
534        total_tokens,
535    }
536}
537
538/// Format assembled context as a text block suitable for LLM injection.
539pub fn format_for_llm(ctx: &AssembledContext) -> String {
540    let mut parts: Vec<String> = Vec::new();
541
542    // Entity graph summary
543    if !ctx.entity_context.is_empty() {
544        let mut graph_lines: Vec<String> = Vec::new();
545        for ec in &ctx.entity_context {
546            if ec.relationships.is_empty() {
547                continue;
548            }
549            for rel in &ec.relationships {
550                graph_lines.push(format!(
551                    "  {} --[{:?}]--> {}",
552                    rel.from_entity, rel.relation_type, rel.to_entity
553                ));
554            }
555        }
556        if !graph_lines.is_empty() {
557            // Deduplicate relationship lines
558            graph_lines.sort();
559            graph_lines.dedup();
560            parts.push(format!("Entity relationships:\n{}", graph_lines.join("\n")));
561        }
562    }
563
564    // Impact warnings
565    if !ctx.impact.is_empty() {
566        let impact_lines: Vec<String> = ctx
567            .impact
568            .iter()
569            .map(|i| format!("  {} depends on {} ({:?})", i.entity, i.depends_on, i.relation_type))
570            .collect();
571        parts.push(format!(
572            "Impact analysis — these entities depend on what you're working with:\n{}",
573            impact_lines.join("\n")
574        ));
575    }
576
577    // Context items
578    if !ctx.items.is_empty() {
579        let content_lines: Vec<String> = ctx
580            .items
581            .iter()
582            .map(|item| item.text.clone())
583            .collect();
584        parts.push(format!("Relevant context:\n{}", content_lines.join("\n---\n")));
585    }
586
587    parts.join("\n\n")
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593    use post_cortex_core::core::context_update::*;
594    use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
595    fn make_graph() -> SimpleEntityGraph {
596        let mut graph = SimpleEntityGraph::new();
597        let now = Utc::now();
598
599        // Add entities
600        graph.add_or_update_entity("Axon".into(), EntityType::Technology, now, "");
601        graph.add_or_update_entity("Post-Cortex".into(), EntityType::Technology, now, "");
602        graph.add_or_update_entity("gRPC".into(), EntityType::Technology, now, "");
603        graph.add_or_update_entity("tonic".into(), EntityType::Technology, now, "");
604        graph.add_or_update_entity("RocksDB".into(), EntityType::Technology, now, "");
605        graph.add_or_update_entity("Rust".into(), EntityType::Technology, now, "");
606
607        // Add typed relationships
608        graph.add_relationship(EntityRelationship {
609            from_entity: "Axon".to_string(),
610            to_entity: "Post-Cortex".to_string(),
611            relation_type: RelationType::DependsOn,
612            context: "Axon connects to Post-Cortex".to_string(),
613        });
614        graph.add_relationship(EntityRelationship {
615            from_entity: "Axon".to_string(),
616            to_entity: "gRPC".to_string(),
617            relation_type: RelationType::DependsOn,
618            context: "Axon uses gRPC".to_string(),
619        });
620        graph.add_relationship(EntityRelationship {
621            from_entity: "gRPC".to_string(),
622            to_entity: "tonic".to_string(),
623            relation_type: RelationType::DependsOn,
624            context: "gRPC implemented via tonic".to_string(),
625        });
626        graph.add_relationship(EntityRelationship {
627            from_entity: "Post-Cortex".to_string(),
628            to_entity: "RocksDB".to_string(),
629            relation_type: RelationType::DependsOn,
630            context: "Post-Cortex uses RocksDB for storage".to_string(),
631        });
632        graph.add_relationship(EntityRelationship {
633            from_entity: "Post-Cortex".to_string(),
634            to_entity: "Rust".to_string(),
635            relation_type: RelationType::DependsOn,
636            context: "Post-Cortex built with Rust".to_string(),
637        });
638
639        graph
640    }
641
642    #[test]
643    fn test_find_query_entities() {
644        let graph = make_graph();
645        let entities = find_query_entities("I'm working on the gRPC service in Axon", &graph);
646        assert!(entities.contains(&"gRPC".to_string()));
647        assert!(entities.contains(&"Axon".to_string()));
648    }
649
650    #[test]
651    fn test_build_entity_context_includes_neighbors() {
652        let graph = make_graph();
653        let query_entities = vec!["gRPC".to_string()];
654        let ctx = build_entity_context(&query_entities, &graph, 1);
655
656        let names: Vec<&str> = ctx.iter().map(|ec| ec.name.as_str()).collect();
657        // gRPC is direct, tonic and Axon are neighbors
658        assert!(names.contains(&"gRPC"));
659        assert!(names.contains(&"tonic") || names.contains(&"Axon"));
660    }
661
662    #[test]
663    fn test_impact_analysis() {
664        let graph = make_graph();
665
666        // If RocksDB changes, Post-Cortex is impacted (depends on RocksDB)
667        let impact = analyze_impact(&["RocksDB".to_string()], &graph);
668        let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
669        assert!(
670            impacted.contains(&"Post-Cortex"),
671            "Post-Cortex should be impacted by RocksDB change, got: {:?}",
672            impacted
673        );
674
675        // If gRPC changes, Axon is impacted
676        let impact = analyze_impact(&["gRPC".to_string()], &graph);
677        let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
678        assert!(
679            impacted.contains(&"Axon"),
680            "Axon should be impacted by gRPC change, got: {:?}",
681            impacted
682        );
683    }
684
685    #[test]
686    fn test_assemble_context_with_budget() {
687        let graph = make_graph();
688        let updates = vec![
689            ContextUpdate {
690                id: uuid::Uuid::new_v4(),
691                update_type: UpdateType::ConceptDefined,
692                content: UpdateContent {
693                    title: "gRPC Setup".to_string(),
694                    description: "Added gRPC service using tonic for Axon communication".to_string(),
695                    details: vec![],
696                    examples: vec![],
697                    implications: vec![],
698                },
699                timestamp: Utc::now(),
700                related_code: None,
701                parent_update: None,
702                user_marked_important: false,
703                creates_entities: vec![],
704                creates_relationships: vec![],
705                references_entities: vec![],
706                typed_entities: vec![],
707            },
708            ContextUpdate {
709                id: uuid::Uuid::new_v4(),
710                update_type: UpdateType::ConceptDefined,
711                content: UpdateContent {
712                    title: "Unrelated Update".to_string(),
713                    description: "Fixed a CSS bug in the landing page".to_string(),
714                    details: vec![],
715                    examples: vec![],
716                    implications: vec![],
717                },
718                timestamp: Utc::now(),
719                related_code: None,
720                parent_update: None,
721                user_marked_important: false,
722                creates_entities: vec![],
723                creates_relationships: vec![],
724                references_entities: vec![],
725                typed_entities: vec![],
726            },
727        ];
728
729        // Capture IDs so we can verify `entry_id` round-trips below.
730        let grpc_id = updates[0].id;
731        let css_id = updates[1].id;
732
733        let result = assemble_context("working on gRPC", &graph, &updates, 1000);
734
735        // gRPC-related update should rank higher than CSS bug
736        assert!(!result.items.is_empty());
737        assert!(result.items[0].text.contains("gRPC"));
738
739        // Entity context should include gRPC and neighbors
740        let entity_names: Vec<&str> = result.entity_context.iter().map(|ec| ec.name.as_str()).collect();
741        assert!(entity_names.contains(&"gRPC"));
742
743        // Impact: Axon depends on gRPC
744        let impacted: Vec<&str> = result.impact.iter().map(|i| i.entity.as_str()).collect();
745        assert!(impacted.contains(&"Axon"));
746
747        // Every assembled item should carry the underlying ContextUpdate's
748        // ID so consumers (e.g. Axon resume-freshness) can track which
749        // entries were materialised into the context window.
750        for item in &result.items {
751            assert!(!item.entry_id.is_empty(), "entry_id should be populated");
752            let id = uuid::Uuid::parse_str(&item.entry_id)
753                .expect("entry_id should parse as UUID");
754            assert!(
755                id == grpc_id || id == css_id,
756                "entry_id must match one of the input updates"
757            );
758        }
759    }
760
761    #[test]
762    fn test_format_for_llm() {
763        let graph = make_graph();
764        let updates = vec![ContextUpdate {
765            id: uuid::Uuid::new_v4(),
766            update_type: UpdateType::ConceptDefined,
767            content: UpdateContent {
768                title: "RocksDB Migration".to_string(),
769                description: "Migrating from sled to RocksDB for better performance".to_string(),
770                details: vec![],
771                examples: vec![],
772                implications: vec![],
773            },
774            timestamp: Utc::now(),
775            related_code: None,
776            parent_update: None,
777            user_marked_important: false,
778            creates_entities: vec![],
779            creates_relationships: vec![],
780            references_entities: vec![],
781            typed_entities: vec![],
782        }];
783
784        let result = assemble_context("changing RocksDB", &graph, &updates, 2000);
785        let formatted = format_for_llm(&result);
786
787        assert!(formatted.contains("Entity relationships"));
788        assert!(formatted.contains("Impact analysis"));
789        assert!(formatted.contains("Post-Cortex depends on RocksDB"));
790    }
791}