1use chrono::Utc;
15use post_cortex_core::core::context_update::{EntityRelationship, RelationType};
16use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet};
19use tracing::{debug, info};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ContextItem {
24 pub text: String,
26 pub score: f32,
28 pub source: ContextSource,
30 pub entities: Vec<String>,
32 pub token_estimate: usize,
34 pub entry_id: String,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub enum ContextSource {
43 SemanticMatch,
45 GraphTraversal {
47 via_entity: String,
49 },
50 RecentUpdate,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AssembledContext {
57 pub items: Vec<ContextItem>,
59 pub entity_context: Vec<EntityContext>,
61 pub impact: Vec<ImpactEntry>,
63 pub total_tokens: usize,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct EntityContext {
70 pub name: String,
72 pub relevance: EntityRelevance,
74 pub relationships: Vec<EntityRelationship>,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80pub enum EntityRelevance {
81 DirectMention,
83 GraphNeighbor {
85 via: String,
87 relation: String,
89 },
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct ImpactEntry {
95 pub entity: String,
97 pub depends_on: String,
99 pub relation_type: RelationType,
101 pub context: String,
103}
104
105fn estimate_tokens(text: &str) -> usize {
107 text.len().div_ceil(4)
108}
109
110pub fn find_query_entities(query: &str, graph: &SimpleEntityGraph) -> Vec<String> {
120 let query_lower = query.to_lowercase();
121 let query_tokens: std::collections::HashSet<&str> = query_lower
122 .split(|c: char| !c.is_alphanumeric() && c != '_')
123 .filter(|t| t.len() >= 3)
124 .collect();
125
126 let all_entities = graph.get_all_entities();
127 let mut found: Vec<(String, usize, bool)> = Vec::new(); for entity_data in &all_entities {
130 let name_lower = entity_data.name.to_lowercase();
131
132 if name_lower.len() < 2 {
134 continue;
135 }
136
137 if query_lower.contains(&name_lower) {
139 found.push((entity_data.name.clone(), name_lower.len() * 10, true));
140 continue;
141 }
142
143 let entity_tokens: Vec<String> = split_entity_tokens(&name_lower);
145 if entity_tokens.is_empty() {
146 continue;
147 }
148
149 let matched = entity_tokens
150 .iter()
151 .filter(|et| {
152 if et.len() < 3 {
153 return false;
154 }
155 query_tokens.iter().any(|qt| {
156 let (shorter, longer) = if et.len() <= qt.len() {
159 (et.as_str(), *qt)
160 } else {
161 (*qt, et.as_str())
162 };
163 shorter.len() >= 3 && longer.starts_with(shorter)
164 })
165 })
166 .count();
167
168 if matched == 0 {
169 continue;
170 }
171
172 let threshold = if entity_tokens.len() <= 2 {
177 entity_tokens.len()
178 } else {
179 1.max((entity_tokens.len() * 2).div_ceil(5)) };
181
182 if matched >= threshold {
183 let score = matched * 5 + name_lower.len();
184 found.push((entity_data.name.clone(), score, false));
185 }
186 }
187
188 found.sort_by(|a, b| b.2.cmp(&a.2).then_with(|| b.1.cmp(&a.1)));
190 found.into_iter().map(|(name, _, _)| name).collect()
191}
192
193fn split_entity_tokens(name: &str) -> Vec<String> {
200 let mut tokens = Vec::new();
201 let mut current = String::new();
202
203 for c in name.chars() {
204 if c == '_' || c == '-' || c == '/' || c == ' ' || c == '.' {
205 if !current.is_empty() {
206 tokens.push(std::mem::take(&mut current));
207 }
208 } else if c.is_uppercase() && !current.is_empty() {
209 tokens.push(std::mem::take(&mut current));
211 current.push(c.to_ascii_lowercase());
212 } else {
213 current.push(c.to_ascii_lowercase());
214 }
215 }
216 if !current.is_empty() {
217 tokens.push(current);
218 }
219 tokens
220}
221
222pub fn build_entity_context(
225 query_entities: &[String],
226 graph: &SimpleEntityGraph,
227 max_depth: usize,
228) -> Vec<EntityContext> {
229 let mut result: Vec<EntityContext> = Vec::new();
230 let mut seen: HashSet<String> = HashSet::new();
231
232 for entity in query_entities {
234 if seen.contains(entity) {
235 continue;
236 }
237 seen.insert(entity.clone());
238
239 let rels = get_entity_relationships(entity, graph);
240 result.push(EntityContext {
241 name: entity.clone(),
242 relevance: EntityRelevance::DirectMention,
243 relationships: rels,
244 });
245 }
246
247 for depth in 0..max_depth {
249 let current_entities: Vec<String> = result
250 .iter()
251 .filter(|ec| {
252 if depth == 0 {
253 ec.relevance == EntityRelevance::DirectMention
254 } else {
255 true
256 }
257 })
258 .map(|ec| ec.name.clone())
259 .collect();
260
261 for entity in ¤t_entities {
262 let neighbors = graph.find_related_entities(entity);
263 for neighbor in neighbors {
264 if seen.contains(&neighbor) {
265 continue;
266 }
267 seen.insert(neighbor.clone());
268
269 let rel_desc = get_relationship_description(entity, &neighbor, graph);
271 let rels = get_entity_relationships(&neighbor, graph);
272
273 result.push(EntityContext {
274 name: neighbor.clone(),
275 relevance: EntityRelevance::GraphNeighbor {
276 via: entity.clone(),
277 relation: rel_desc,
278 },
279 relationships: rels,
280 });
281 }
282 }
283 }
284
285 result
286}
287
288fn get_entity_relationships(entity: &str, graph: &SimpleEntityGraph) -> Vec<EntityRelationship> {
290 graph
291 .get_all_relationships()
292 .into_iter()
293 .filter(|r| r.from_entity == entity || r.to_entity == entity)
294 .collect()
295}
296
297fn get_relationship_description(from: &str, to: &str, graph: &SimpleEntityGraph) -> String {
299 for rel in graph.get_all_relationships() {
300 if rel.from_entity == from && rel.to_entity == to {
301 return format!("{:?}", rel.relation_type);
302 }
303 if rel.from_entity == to && rel.to_entity == from {
304 return format!("{:?} (reverse)", rel.relation_type);
305 }
306 }
307 "RelatedTo".to_string()
308}
309
310pub fn analyze_impact(query_entities: &[String], graph: &SimpleEntityGraph) -> Vec<ImpactEntry> {
313 let dependency_types = [
314 RelationType::DependsOn,
315 RelationType::RequiredBy,
316 RelationType::Implements,
317 ];
318
319 let all_rels = graph.get_all_relationships();
320 let mut impacts: Vec<ImpactEntry> = Vec::new();
321 let query_set: HashSet<&String> = query_entities.iter().collect();
322
323 for rel in &all_rels {
324 if dependency_types.contains(&rel.relation_type) && query_set.contains(&rel.to_entity) {
327 if rel.from_entity == rel.to_entity {
329 continue;
330 }
331 impacts.push(ImpactEntry {
332 entity: rel.from_entity.clone(),
333 depends_on: rel.to_entity.clone(),
334 relation_type: rel.relation_type.clone(),
335 context: rel.context.clone(),
336 });
337 }
338
339 if rel.relation_type == RelationType::RequiredBy && query_set.contains(&rel.from_entity) {
342 if rel.from_entity == rel.to_entity {
343 continue;
344 }
345 impacts.push(ImpactEntry {
346 entity: rel.to_entity.clone(),
347 depends_on: rel.from_entity.clone(),
348 relation_type: rel.relation_type.clone(),
349 context: rel.context.clone(),
350 });
351 }
352 }
353
354 let mut seen: HashSet<(String, String)> = HashSet::new();
356 impacts.retain(|i| seen.insert((i.entity.clone(), i.depends_on.clone())));
357 impacts
358}
359
360pub fn boost_by_graph(
366 results: &mut Vec<(String, f32)>, entity_context: &[EntityContext],
368) {
369 let relevant_entities: HashMap<String, f32> = entity_context
371 .iter()
372 .map(|ec| {
373 let boost = match &ec.relevance {
374 EntityRelevance::DirectMention => 0.15,
375 EntityRelevance::GraphNeighbor { .. } => 0.08,
376 };
377 (ec.name.to_lowercase(), boost)
378 })
379 .collect();
380
381 for (text, score) in results.iter_mut() {
382 let text_lower = text.to_lowercase();
383 let mut total_boost: f32 = 0.0;
384
385 for (entity, boost) in &relevant_entities {
386 if text_lower.contains(entity) {
387 total_boost += boost;
388 }
389 }
390
391 *score += total_boost.min(0.25);
393 *score = score.min(1.0);
395 }
396}
397
398pub fn assemble_context(
404 query: &str,
405 graph: &SimpleEntityGraph,
406 updates: &[post_cortex_core::core::context_update::ContextUpdate],
407 token_budget: usize,
408) -> AssembledContext {
409 info!(
410 "Assembling context for query: '{}' (budget: {} tokens)",
411 query, token_budget
412 );
413
414 let query_entities = find_query_entities(query, graph);
416 debug!("Query entities: {:?}", query_entities);
417
418 let entity_context = build_entity_context(&query_entities, graph, 1);
420 debug!(
421 "Entity context: {} entities (direct + neighbors)",
422 entity_context.len()
423 );
424
425 let impact = analyze_impact(&query_entities, graph);
427 if !impact.is_empty() {
428 debug!("Impact analysis: {} dependent entities", impact.len());
429 }
430
431 let _relevant_entity_names: HashSet<String> = entity_context
433 .iter()
434 .map(|ec| ec.name.to_lowercase())
435 .collect();
436
437 let mut scored_items: Vec<ContextItem> = Vec::new();
438
439 for update in updates {
440 let text = format!("{}: {}", update.content.title, update.content.description);
441 let tokens = estimate_tokens(&text);
442
443 let age_hours = (Utc::now() - update.timestamp).num_hours().max(0) as f32;
445 let recency_score = 1.0 / (1.0 + age_hours / 24.0); let text_lower = text.to_lowercase();
449 let mut entity_boost: f32 = 0.0;
450 let mut matched_entities: Vec<String> = Vec::new();
451
452 for ec in &entity_context {
453 let name_lower = ec.name.to_lowercase();
454 if text_lower.contains(&name_lower) {
455 matched_entities.push(ec.name.clone());
456 entity_boost += match &ec.relevance {
457 EntityRelevance::DirectMention => 0.4,
458 EntityRelevance::GraphNeighbor { .. } => 0.2,
459 };
460 }
461 }
462
463 let importance_boost = if update.user_marked_important {
465 0.2
466 } else {
467 0.0
468 };
469
470 let score = (recency_score * 0.3 + entity_boost + importance_boost).min(1.0);
471
472 let source = if !matched_entities.is_empty() {
474 if query_entities.iter().any(|qe| {
475 matched_entities
476 .iter()
477 .any(|me| me.eq_ignore_ascii_case(qe))
478 }) {
479 ContextSource::SemanticMatch
480 } else {
481 ContextSource::GraphTraversal {
482 via_entity: matched_entities[0].clone(),
483 }
484 }
485 } else {
486 ContextSource::RecentUpdate
487 };
488
489 scored_items.push(ContextItem {
490 text,
491 score,
492 source,
493 entities: matched_entities,
494 token_estimate: tokens,
495 entry_id: update.id.to_string(),
496 });
497 }
498
499 scored_items.sort_by(|a, b| {
501 b.score
502 .partial_cmp(&a.score)
503 .unwrap_or(std::cmp::Ordering::Equal)
504 });
505
506 let mut selected: Vec<ContextItem> = Vec::new();
507 let mut used_tokens = 0;
508
509 let entity_summary_tokens = entity_context.len() * 50;
511 let content_budget = token_budget.saturating_sub(entity_summary_tokens);
512
513 for item in scored_items {
514 if used_tokens + item.token_estimate > content_budget {
515 continue;
517 }
518 used_tokens += item.token_estimate;
519 selected.push(item);
520 }
521
522 let total_tokens = used_tokens + entity_summary_tokens;
523 info!(
524 "Assembled {} items ({} tokens), {} entity contexts, {} impact entries",
525 selected.len(),
526 total_tokens,
527 entity_context.len(),
528 impact.len()
529 );
530
531 AssembledContext {
532 items: selected,
533 entity_context,
534 impact,
535 total_tokens,
536 }
537}
538
539pub fn format_for_llm(ctx: &AssembledContext) -> String {
541 let mut parts: Vec<String> = Vec::new();
542
543 if !ctx.entity_context.is_empty() {
545 let mut graph_lines: Vec<String> = Vec::new();
546 for ec in &ctx.entity_context {
547 if ec.relationships.is_empty() {
548 continue;
549 }
550 for rel in &ec.relationships {
551 graph_lines.push(format!(
552 " {} --[{:?}]--> {}",
553 rel.from_entity, rel.relation_type, rel.to_entity
554 ));
555 }
556 }
557 if !graph_lines.is_empty() {
558 graph_lines.sort();
560 graph_lines.dedup();
561 parts.push(format!("Entity relationships:\n{}", graph_lines.join("\n")));
562 }
563 }
564
565 if !ctx.impact.is_empty() {
567 let impact_lines: Vec<String> = ctx
568 .impact
569 .iter()
570 .map(|i| {
571 format!(
572 " {} depends on {} ({:?})",
573 i.entity, i.depends_on, i.relation_type
574 )
575 })
576 .collect();
577 parts.push(format!(
578 "Impact analysis — these entities depend on what you're working with:\n{}",
579 impact_lines.join("\n")
580 ));
581 }
582
583 if !ctx.items.is_empty() {
585 let content_lines: Vec<String> = ctx.items.iter().map(|item| item.text.clone()).collect();
586 parts.push(format!(
587 "Relevant context:\n{}",
588 content_lines.join("\n---\n")
589 ));
590 }
591
592 parts.join("\n\n")
593}
594
595#[cfg(test)]
596mod tests {
597 use super::*;
598 use post_cortex_core::core::context_update::*;
599 use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
600 fn make_graph() -> SimpleEntityGraph {
601 let mut graph = SimpleEntityGraph::new();
602 let now = Utc::now();
603
604 graph.add_or_update_entity("Axon".into(), EntityType::Technology, now, "");
606 graph.add_or_update_entity("Post-Cortex".into(), EntityType::Technology, now, "");
607 graph.add_or_update_entity("gRPC".into(), EntityType::Technology, now, "");
608 graph.add_or_update_entity("tonic".into(), EntityType::Technology, now, "");
609 graph.add_or_update_entity("RocksDB".into(), EntityType::Technology, now, "");
610 graph.add_or_update_entity("Rust".into(), EntityType::Technology, now, "");
611
612 graph.add_relationship(EntityRelationship {
614 from_entity: "Axon".to_string(),
615 to_entity: "Post-Cortex".to_string(),
616 relation_type: RelationType::DependsOn,
617 context: "Axon connects to Post-Cortex".to_string(),
618 });
619 graph.add_relationship(EntityRelationship {
620 from_entity: "Axon".to_string(),
621 to_entity: "gRPC".to_string(),
622 relation_type: RelationType::DependsOn,
623 context: "Axon uses gRPC".to_string(),
624 });
625 graph.add_relationship(EntityRelationship {
626 from_entity: "gRPC".to_string(),
627 to_entity: "tonic".to_string(),
628 relation_type: RelationType::DependsOn,
629 context: "gRPC implemented via tonic".to_string(),
630 });
631 graph.add_relationship(EntityRelationship {
632 from_entity: "Post-Cortex".to_string(),
633 to_entity: "RocksDB".to_string(),
634 relation_type: RelationType::DependsOn,
635 context: "Post-Cortex uses RocksDB for storage".to_string(),
636 });
637 graph.add_relationship(EntityRelationship {
638 from_entity: "Post-Cortex".to_string(),
639 to_entity: "Rust".to_string(),
640 relation_type: RelationType::DependsOn,
641 context: "Post-Cortex built with Rust".to_string(),
642 });
643
644 graph
645 }
646
647 #[test]
648 fn test_find_query_entities() {
649 let graph = make_graph();
650 let entities = find_query_entities("I'm working on the gRPC service in Axon", &graph);
651 assert!(entities.contains(&"gRPC".to_string()));
652 assert!(entities.contains(&"Axon".to_string()));
653 }
654
655 #[test]
656 fn test_build_entity_context_includes_neighbors() {
657 let graph = make_graph();
658 let query_entities = vec!["gRPC".to_string()];
659 let ctx = build_entity_context(&query_entities, &graph, 1);
660
661 let names: Vec<&str> = ctx.iter().map(|ec| ec.name.as_str()).collect();
662 assert!(names.contains(&"gRPC"));
664 assert!(names.contains(&"tonic") || names.contains(&"Axon"));
665 }
666
667 #[test]
668 fn test_impact_analysis() {
669 let graph = make_graph();
670
671 let impact = analyze_impact(&["RocksDB".to_string()], &graph);
673 let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
674 assert!(
675 impacted.contains(&"Post-Cortex"),
676 "Post-Cortex should be impacted by RocksDB change, got: {:?}",
677 impacted
678 );
679
680 let impact = analyze_impact(&["gRPC".to_string()], &graph);
682 let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
683 assert!(
684 impacted.contains(&"Axon"),
685 "Axon should be impacted by gRPC change, got: {:?}",
686 impacted
687 );
688 }
689
690 #[test]
691 fn test_assemble_context_with_budget() {
692 let graph = make_graph();
693 let updates = vec![
694 ContextUpdate {
695 id: uuid::Uuid::new_v4(),
696 update_type: UpdateType::ConceptDefined,
697 content: UpdateContent {
698 title: "gRPC Setup".to_string(),
699 description: "Added gRPC service using tonic for Axon communication"
700 .to_string(),
701 details: vec![],
702 examples: vec![],
703 implications: vec![],
704 },
705 timestamp: Utc::now(),
706 related_code: None,
707 parent_update: None,
708 user_marked_important: false,
709 creates_entities: vec![],
710 creates_relationships: vec![],
711 references_entities: vec![],
712 typed_entities: vec![],
713 },
714 ContextUpdate {
715 id: uuid::Uuid::new_v4(),
716 update_type: UpdateType::ConceptDefined,
717 content: UpdateContent {
718 title: "Unrelated Update".to_string(),
719 description: "Fixed a CSS bug in the landing page".to_string(),
720 details: vec![],
721 examples: vec![],
722 implications: vec![],
723 },
724 timestamp: Utc::now(),
725 related_code: None,
726 parent_update: None,
727 user_marked_important: false,
728 creates_entities: vec![],
729 creates_relationships: vec![],
730 references_entities: vec![],
731 typed_entities: vec![],
732 },
733 ];
734
735 let grpc_id = updates[0].id;
737 let css_id = updates[1].id;
738
739 let result = assemble_context("working on gRPC", &graph, &updates, 1000);
740
741 assert!(!result.items.is_empty());
743 assert!(result.items[0].text.contains("gRPC"));
744
745 let entity_names: Vec<&str> = result
747 .entity_context
748 .iter()
749 .map(|ec| ec.name.as_str())
750 .collect();
751 assert!(entity_names.contains(&"gRPC"));
752
753 let impacted: Vec<&str> = result.impact.iter().map(|i| i.entity.as_str()).collect();
755 assert!(impacted.contains(&"Axon"));
756
757 for item in &result.items {
761 assert!(!item.entry_id.is_empty(), "entry_id should be populated");
762 let id = uuid::Uuid::parse_str(&item.entry_id).expect("entry_id should parse as UUID");
763 assert!(
764 id == grpc_id || id == css_id,
765 "entry_id must match one of the input updates"
766 );
767 }
768 }
769
770 #[test]
771 fn test_format_for_llm() {
772 let graph = make_graph();
773 let updates = vec![ContextUpdate {
774 id: uuid::Uuid::new_v4(),
775 update_type: UpdateType::ConceptDefined,
776 content: UpdateContent {
777 title: "RocksDB Migration".to_string(),
778 description: "Migrating from sled to RocksDB for better performance".to_string(),
779 details: vec![],
780 examples: vec![],
781 implications: vec![],
782 },
783 timestamp: Utc::now(),
784 related_code: None,
785 parent_update: None,
786 user_marked_important: false,
787 creates_entities: vec![],
788 creates_relationships: vec![],
789 references_entities: vec![],
790 typed_entities: vec![],
791 }];
792
793 let result = assemble_context("changing RocksDB", &graph, &updates, 2000);
794 let formatted = format_for_llm(&result);
795
796 assert!(formatted.contains("Entity relationships"));
797 assert!(formatted.contains("Impact analysis"));
798 assert!(formatted.contains("Post-Cortex depends on RocksDB"));
799 }
800}