1use post_cortex_core::core::context_update::{EntityRelationship, RelationType};
15use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
16use chrono::Utc;
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet};
19use tracing::{debug, info};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ContextItem {
24 pub text: String,
26 pub score: f32,
28 pub source: ContextSource,
30 pub entities: Vec<String>,
32 pub token_estimate: usize,
34 pub entry_id: String,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub enum ContextSource {
43 SemanticMatch,
45 GraphTraversal {
47 via_entity: String,
49 },
50 RecentUpdate,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AssembledContext {
57 pub items: Vec<ContextItem>,
59 pub entity_context: Vec<EntityContext>,
61 pub impact: Vec<ImpactEntry>,
63 pub total_tokens: usize,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct EntityContext {
70 pub name: String,
72 pub relevance: EntityRelevance,
74 pub relationships: Vec<EntityRelationship>,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80pub enum EntityRelevance {
81 DirectMention,
83 GraphNeighbor {
85 via: String,
87 relation: String,
89 },
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct ImpactEntry {
95 pub entity: String,
97 pub depends_on: String,
99 pub relation_type: RelationType,
101 pub context: String,
103}
104
105fn estimate_tokens(text: &str) -> usize {
107 text.len().div_ceil(4)
108}
109
110pub fn find_query_entities(query: &str, graph: &SimpleEntityGraph) -> Vec<String> {
120 let query_lower = query.to_lowercase();
121 let query_tokens: std::collections::HashSet<&str> = query_lower
122 .split(|c: char| !c.is_alphanumeric() && c != '_')
123 .filter(|t| t.len() >= 3)
124 .collect();
125
126 let all_entities = graph.get_all_entities();
127 let mut found: Vec<(String, usize, bool)> = Vec::new(); for entity_data in &all_entities {
130 let name_lower = entity_data.name.to_lowercase();
131
132 if name_lower.len() < 2 {
134 continue;
135 }
136
137 if query_lower.contains(&name_lower) {
139 found.push((entity_data.name.clone(), name_lower.len() * 10, true));
140 continue;
141 }
142
143 let entity_tokens: Vec<String> = split_entity_tokens(&name_lower);
145 if entity_tokens.is_empty() {
146 continue;
147 }
148
149 let matched = entity_tokens.iter()
150 .filter(|et| {
151 if et.len() < 3 { return false; }
152 query_tokens.iter().any(|qt| {
153 let (shorter, longer) = if et.len() <= qt.len() {
156 (et.as_str(), *qt)
157 } else {
158 (*qt, et.as_str())
159 };
160 shorter.len() >= 3 && longer.starts_with(shorter)
161 })
162 })
163 .count();
164
165 if matched == 0 {
166 continue;
167 }
168
169 let threshold = if entity_tokens.len() <= 2 {
174 entity_tokens.len()
175 } else {
176 1.max((entity_tokens.len() * 2).div_ceil(5)) };
178
179 if matched >= threshold {
180 let score = matched * 5 + name_lower.len();
181 found.push((entity_data.name.clone(), score, false));
182 }
183 }
184
185 found.sort_by(|a, b| {
187 b.2.cmp(&a.2).then_with(|| b.1.cmp(&a.1))
188 });
189 found.into_iter().map(|(name, _, _)| name).collect()
190}
191
192fn split_entity_tokens(name: &str) -> Vec<String> {
199 let mut tokens = Vec::new();
200 let mut current = String::new();
201
202 for c in name.chars() {
203 if c == '_' || c == '-' || c == '/' || c == ' ' || c == '.' {
204 if !current.is_empty() {
205 tokens.push(std::mem::take(&mut current));
206 }
207 } else if c.is_uppercase() && !current.is_empty() {
208 tokens.push(std::mem::take(&mut current));
210 current.push(c.to_ascii_lowercase());
211 } else {
212 current.push(c.to_ascii_lowercase());
213 }
214 }
215 if !current.is_empty() {
216 tokens.push(current);
217 }
218 tokens
219}
220
221pub fn build_entity_context(
224 query_entities: &[String],
225 graph: &SimpleEntityGraph,
226 max_depth: usize,
227) -> Vec<EntityContext> {
228 let mut result: Vec<EntityContext> = Vec::new();
229 let mut seen: HashSet<String> = HashSet::new();
230
231 for entity in query_entities {
233 if seen.contains(entity) {
234 continue;
235 }
236 seen.insert(entity.clone());
237
238 let rels = get_entity_relationships(entity, graph);
239 result.push(EntityContext {
240 name: entity.clone(),
241 relevance: EntityRelevance::DirectMention,
242 relationships: rels,
243 });
244 }
245
246 for depth in 0..max_depth {
248 let current_entities: Vec<String> = result
249 .iter()
250 .filter(|ec| {
251 if depth == 0 {
252 ec.relevance == EntityRelevance::DirectMention
253 } else {
254 true
255 }
256 })
257 .map(|ec| ec.name.clone())
258 .collect();
259
260 for entity in ¤t_entities {
261 let neighbors = graph.find_related_entities(entity);
262 for neighbor in neighbors {
263 if seen.contains(&neighbor) {
264 continue;
265 }
266 seen.insert(neighbor.clone());
267
268 let rel_desc = get_relationship_description(entity, &neighbor, graph);
270 let rels = get_entity_relationships(&neighbor, graph);
271
272 result.push(EntityContext {
273 name: neighbor.clone(),
274 relevance: EntityRelevance::GraphNeighbor {
275 via: entity.clone(),
276 relation: rel_desc,
277 },
278 relationships: rels,
279 });
280 }
281 }
282 }
283
284 result
285}
286
287fn get_entity_relationships(entity: &str, graph: &SimpleEntityGraph) -> Vec<EntityRelationship> {
289 graph
290 .get_all_relationships()
291 .into_iter()
292 .filter(|r| r.from_entity == entity || r.to_entity == entity)
293 .collect()
294}
295
296fn get_relationship_description(from: &str, to: &str, graph: &SimpleEntityGraph) -> String {
298 for rel in graph.get_all_relationships() {
299 if rel.from_entity == from && rel.to_entity == to {
300 return format!("{:?}", rel.relation_type);
301 }
302 if rel.from_entity == to && rel.to_entity == from {
303 return format!("{:?} (reverse)", rel.relation_type);
304 }
305 }
306 "RelatedTo".to_string()
307}
308
309pub fn analyze_impact(
312 query_entities: &[String],
313 graph: &SimpleEntityGraph,
314) -> Vec<ImpactEntry> {
315 let dependency_types = [
316 RelationType::DependsOn,
317 RelationType::RequiredBy,
318 RelationType::Implements,
319 ];
320
321 let all_rels = graph.get_all_relationships();
322 let mut impacts: Vec<ImpactEntry> = Vec::new();
323 let query_set: HashSet<&String> = query_entities.iter().collect();
324
325 for rel in &all_rels {
326 if dependency_types.contains(&rel.relation_type) && query_set.contains(&rel.to_entity) {
329 if rel.from_entity == rel.to_entity {
331 continue;
332 }
333 impacts.push(ImpactEntry {
334 entity: rel.from_entity.clone(),
335 depends_on: rel.to_entity.clone(),
336 relation_type: rel.relation_type.clone(),
337 context: rel.context.clone(),
338 });
339 }
340
341 if rel.relation_type == RelationType::RequiredBy
344 && query_set.contains(&rel.from_entity)
345 {
346 if rel.from_entity == rel.to_entity {
347 continue;
348 }
349 impacts.push(ImpactEntry {
350 entity: rel.to_entity.clone(),
351 depends_on: rel.from_entity.clone(),
352 relation_type: rel.relation_type.clone(),
353 context: rel.context.clone(),
354 });
355 }
356 }
357
358 let mut seen: HashSet<(String, String)> = HashSet::new();
360 impacts.retain(|i| seen.insert((i.entity.clone(), i.depends_on.clone())));
361 impacts
362}
363
364pub fn boost_by_graph(
370 results: &mut Vec<(String, f32)>, entity_context: &[EntityContext],
372) {
373 let relevant_entities: HashMap<String, f32> = entity_context
375 .iter()
376 .map(|ec| {
377 let boost = match &ec.relevance {
378 EntityRelevance::DirectMention => 0.15,
379 EntityRelevance::GraphNeighbor { .. } => 0.08,
380 };
381 (ec.name.to_lowercase(), boost)
382 })
383 .collect();
384
385 for (text, score) in results.iter_mut() {
386 let text_lower = text.to_lowercase();
387 let mut total_boost: f32 = 0.0;
388
389 for (entity, boost) in &relevant_entities {
390 if text_lower.contains(entity) {
391 total_boost += boost;
392 }
393 }
394
395 *score += total_boost.min(0.25);
397 *score = score.min(1.0);
399 }
400}
401
402pub fn assemble_context(
408 query: &str,
409 graph: &SimpleEntityGraph,
410 updates: &[post_cortex_core::core::context_update::ContextUpdate],
411 token_budget: usize,
412) -> AssembledContext {
413 info!("Assembling context for query: '{}' (budget: {} tokens)", query, token_budget);
414
415 let query_entities = find_query_entities(query, graph);
417 debug!("Query entities: {:?}", query_entities);
418
419 let entity_context = build_entity_context(&query_entities, graph, 1);
421 debug!(
422 "Entity context: {} entities (direct + neighbors)",
423 entity_context.len()
424 );
425
426 let impact = analyze_impact(&query_entities, graph);
428 if !impact.is_empty() {
429 debug!("Impact analysis: {} dependent entities", impact.len());
430 }
431
432 let _relevant_entity_names: HashSet<String> = entity_context
434 .iter()
435 .map(|ec| ec.name.to_lowercase())
436 .collect();
437
438 let mut scored_items: Vec<ContextItem> = Vec::new();
439
440 for update in updates {
441 let text = format!(
442 "{}: {}",
443 update.content.title, update.content.description
444 );
445 let tokens = estimate_tokens(&text);
446
447 let age_hours = (Utc::now() - update.timestamp).num_hours().max(0) as f32;
449 let recency_score = 1.0 / (1.0 + age_hours / 24.0); let text_lower = text.to_lowercase();
453 let mut entity_boost: f32 = 0.0;
454 let mut matched_entities: Vec<String> = Vec::new();
455
456 for ec in &entity_context {
457 let name_lower = ec.name.to_lowercase();
458 if text_lower.contains(&name_lower) {
459 matched_entities.push(ec.name.clone());
460 entity_boost += match &ec.relevance {
461 EntityRelevance::DirectMention => 0.4,
462 EntityRelevance::GraphNeighbor { .. } => 0.2,
463 };
464 }
465 }
466
467 let importance_boost = if update.user_marked_important {
469 0.2
470 } else {
471 0.0
472 };
473
474 let score = (recency_score * 0.3 + entity_boost + importance_boost).min(1.0);
475
476 let source = if !matched_entities.is_empty() {
478 if query_entities
479 .iter()
480 .any(|qe| matched_entities.iter().any(|me| me.eq_ignore_ascii_case(qe)))
481 {
482 ContextSource::SemanticMatch
483 } else {
484 ContextSource::GraphTraversal {
485 via_entity: matched_entities[0].clone(),
486 }
487 }
488 } else {
489 ContextSource::RecentUpdate
490 };
491
492 scored_items.push(ContextItem {
493 text,
494 score,
495 source,
496 entities: matched_entities,
497 token_estimate: tokens,
498 entry_id: update.id.to_string(),
499 });
500 }
501
502 scored_items.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
504
505 let mut selected: Vec<ContextItem> = Vec::new();
506 let mut used_tokens = 0;
507
508 let entity_summary_tokens = entity_context.len() * 50;
510 let content_budget = token_budget.saturating_sub(entity_summary_tokens);
511
512 for item in scored_items {
513 if used_tokens + item.token_estimate > content_budget {
514 continue;
516 }
517 used_tokens += item.token_estimate;
518 selected.push(item);
519 }
520
521 let total_tokens = used_tokens + entity_summary_tokens;
522 info!(
523 "Assembled {} items ({} tokens), {} entity contexts, {} impact entries",
524 selected.len(),
525 total_tokens,
526 entity_context.len(),
527 impact.len()
528 );
529
530 AssembledContext {
531 items: selected,
532 entity_context,
533 impact,
534 total_tokens,
535 }
536}
537
538pub fn format_for_llm(ctx: &AssembledContext) -> String {
540 let mut parts: Vec<String> = Vec::new();
541
542 if !ctx.entity_context.is_empty() {
544 let mut graph_lines: Vec<String> = Vec::new();
545 for ec in &ctx.entity_context {
546 if ec.relationships.is_empty() {
547 continue;
548 }
549 for rel in &ec.relationships {
550 graph_lines.push(format!(
551 " {} --[{:?}]--> {}",
552 rel.from_entity, rel.relation_type, rel.to_entity
553 ));
554 }
555 }
556 if !graph_lines.is_empty() {
557 graph_lines.sort();
559 graph_lines.dedup();
560 parts.push(format!("Entity relationships:\n{}", graph_lines.join("\n")));
561 }
562 }
563
564 if !ctx.impact.is_empty() {
566 let impact_lines: Vec<String> = ctx
567 .impact
568 .iter()
569 .map(|i| format!(" {} depends on {} ({:?})", i.entity, i.depends_on, i.relation_type))
570 .collect();
571 parts.push(format!(
572 "Impact analysis — these entities depend on what you're working with:\n{}",
573 impact_lines.join("\n")
574 ));
575 }
576
577 if !ctx.items.is_empty() {
579 let content_lines: Vec<String> = ctx
580 .items
581 .iter()
582 .map(|item| item.text.clone())
583 .collect();
584 parts.push(format!("Relevant context:\n{}", content_lines.join("\n---\n")));
585 }
586
587 parts.join("\n\n")
588}
589
590#[cfg(test)]
591mod tests {
592 use super::*;
593 use post_cortex_core::core::context_update::*;
594 use post_cortex_core::graph::entity_graph::SimpleEntityGraph;
595 fn make_graph() -> SimpleEntityGraph {
596 let mut graph = SimpleEntityGraph::new();
597 let now = Utc::now();
598
599 graph.add_or_update_entity("Axon".into(), EntityType::Technology, now, "");
601 graph.add_or_update_entity("Post-Cortex".into(), EntityType::Technology, now, "");
602 graph.add_or_update_entity("gRPC".into(), EntityType::Technology, now, "");
603 graph.add_or_update_entity("tonic".into(), EntityType::Technology, now, "");
604 graph.add_or_update_entity("RocksDB".into(), EntityType::Technology, now, "");
605 graph.add_or_update_entity("Rust".into(), EntityType::Technology, now, "");
606
607 graph.add_relationship(EntityRelationship {
609 from_entity: "Axon".to_string(),
610 to_entity: "Post-Cortex".to_string(),
611 relation_type: RelationType::DependsOn,
612 context: "Axon connects to Post-Cortex".to_string(),
613 });
614 graph.add_relationship(EntityRelationship {
615 from_entity: "Axon".to_string(),
616 to_entity: "gRPC".to_string(),
617 relation_type: RelationType::DependsOn,
618 context: "Axon uses gRPC".to_string(),
619 });
620 graph.add_relationship(EntityRelationship {
621 from_entity: "gRPC".to_string(),
622 to_entity: "tonic".to_string(),
623 relation_type: RelationType::DependsOn,
624 context: "gRPC implemented via tonic".to_string(),
625 });
626 graph.add_relationship(EntityRelationship {
627 from_entity: "Post-Cortex".to_string(),
628 to_entity: "RocksDB".to_string(),
629 relation_type: RelationType::DependsOn,
630 context: "Post-Cortex uses RocksDB for storage".to_string(),
631 });
632 graph.add_relationship(EntityRelationship {
633 from_entity: "Post-Cortex".to_string(),
634 to_entity: "Rust".to_string(),
635 relation_type: RelationType::DependsOn,
636 context: "Post-Cortex built with Rust".to_string(),
637 });
638
639 graph
640 }
641
642 #[test]
643 fn test_find_query_entities() {
644 let graph = make_graph();
645 let entities = find_query_entities("I'm working on the gRPC service in Axon", &graph);
646 assert!(entities.contains(&"gRPC".to_string()));
647 assert!(entities.contains(&"Axon".to_string()));
648 }
649
650 #[test]
651 fn test_build_entity_context_includes_neighbors() {
652 let graph = make_graph();
653 let query_entities = vec!["gRPC".to_string()];
654 let ctx = build_entity_context(&query_entities, &graph, 1);
655
656 let names: Vec<&str> = ctx.iter().map(|ec| ec.name.as_str()).collect();
657 assert!(names.contains(&"gRPC"));
659 assert!(names.contains(&"tonic") || names.contains(&"Axon"));
660 }
661
662 #[test]
663 fn test_impact_analysis() {
664 let graph = make_graph();
665
666 let impact = analyze_impact(&["RocksDB".to_string()], &graph);
668 let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
669 assert!(
670 impacted.contains(&"Post-Cortex"),
671 "Post-Cortex should be impacted by RocksDB change, got: {:?}",
672 impacted
673 );
674
675 let impact = analyze_impact(&["gRPC".to_string()], &graph);
677 let impacted: Vec<&str> = impact.iter().map(|i| i.entity.as_str()).collect();
678 assert!(
679 impacted.contains(&"Axon"),
680 "Axon should be impacted by gRPC change, got: {:?}",
681 impacted
682 );
683 }
684
685 #[test]
686 fn test_assemble_context_with_budget() {
687 let graph = make_graph();
688 let updates = vec![
689 ContextUpdate {
690 id: uuid::Uuid::new_v4(),
691 update_type: UpdateType::ConceptDefined,
692 content: UpdateContent {
693 title: "gRPC Setup".to_string(),
694 description: "Added gRPC service using tonic for Axon communication".to_string(),
695 details: vec![],
696 examples: vec![],
697 implications: vec![],
698 },
699 timestamp: Utc::now(),
700 related_code: None,
701 parent_update: None,
702 user_marked_important: false,
703 creates_entities: vec![],
704 creates_relationships: vec![],
705 references_entities: vec![],
706 typed_entities: vec![],
707 },
708 ContextUpdate {
709 id: uuid::Uuid::new_v4(),
710 update_type: UpdateType::ConceptDefined,
711 content: UpdateContent {
712 title: "Unrelated Update".to_string(),
713 description: "Fixed a CSS bug in the landing page".to_string(),
714 details: vec![],
715 examples: vec![],
716 implications: vec![],
717 },
718 timestamp: Utc::now(),
719 related_code: None,
720 parent_update: None,
721 user_marked_important: false,
722 creates_entities: vec![],
723 creates_relationships: vec![],
724 references_entities: vec![],
725 typed_entities: vec![],
726 },
727 ];
728
729 let grpc_id = updates[0].id;
731 let css_id = updates[1].id;
732
733 let result = assemble_context("working on gRPC", &graph, &updates, 1000);
734
735 assert!(!result.items.is_empty());
737 assert!(result.items[0].text.contains("gRPC"));
738
739 let entity_names: Vec<&str> = result.entity_context.iter().map(|ec| ec.name.as_str()).collect();
741 assert!(entity_names.contains(&"gRPC"));
742
743 let impacted: Vec<&str> = result.impact.iter().map(|i| i.entity.as_str()).collect();
745 assert!(impacted.contains(&"Axon"));
746
747 for item in &result.items {
751 assert!(!item.entry_id.is_empty(), "entry_id should be populated");
752 let id = uuid::Uuid::parse_str(&item.entry_id)
753 .expect("entry_id should parse as UUID");
754 assert!(
755 id == grpc_id || id == css_id,
756 "entry_id must match one of the input updates"
757 );
758 }
759 }
760
761 #[test]
762 fn test_format_for_llm() {
763 let graph = make_graph();
764 let updates = vec![ContextUpdate {
765 id: uuid::Uuid::new_v4(),
766 update_type: UpdateType::ConceptDefined,
767 content: UpdateContent {
768 title: "RocksDB Migration".to_string(),
769 description: "Migrating from sled to RocksDB for better performance".to_string(),
770 details: vec![],
771 examples: vec![],
772 implications: vec![],
773 },
774 timestamp: Utc::now(),
775 related_code: None,
776 parent_update: None,
777 user_marked_important: false,
778 creates_entities: vec![],
779 creates_relationships: vec![],
780 references_entities: vec![],
781 typed_entities: vec![],
782 }];
783
784 let result = assemble_context("changing RocksDB", &graph, &updates, 2000);
785 let formatted = format_for_llm(&result);
786
787 assert!(formatted.contains("Entity relationships"));
788 assert!(formatted.contains("Impact analysis"));
789 assert!(formatted.contains("Post-Cortex depends on RocksDB"));
790 }
791}