1use chrono::Utc;
2use open_kioku_core::{
3 AnalysisFact, ChangeBoundary, CodeChunk, Confidence, ConfidenceBreakdown,
4 ConfidenceSignalInput, ContextPack, Evidence, EvidenceId, EvidenceSourceType, File, FileRange,
5 GraphEdge, GraphEdgeType, GraphNodeType, NegativeEvidence, RiskReport, RuntimeSignal,
6 ScoreComponent, SearchResult, Symbol, ValidationPlan,
7};
8use open_kioku_errors::Result;
9use open_kioku_impact::ImpactEngine;
10use open_kioku_ranking::{rerank_with_options, RankingOptions};
11use open_kioku_search_regex::search_chunks;
12use open_kioku_storage::OkStore;
13use open_kioku_tests::TestSelector;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
16pub enum ContextPackFormat {
17 Json,
18 Markdown,
19 PromptText,
20 Toon,
21}
22
23impl ContextPackFormat {
24 pub fn render(&self, pack: &ContextPack) -> Result<String> {
25 match self {
26 Self::Json => Ok(serde_json::to_string_pretty(pack)?),
27 Self::Toon => Ok(open_kioku_format::render_context_pack_toon(pack)),
28 Self::Markdown => {
29 let mut out = String::new();
30 out.push_str(&format!("# Task: {}\n\n", pack.task));
31 out.push_str("## Confidence\n\n");
32 out.push_str(&format!(
33 "- Overall: `{:?}` (`{:.2}`)\n",
34 pack.confidence_breakdown.overall_enum, pack.confidence_breakdown.overall_score
35 ));
36 write_markdown_confidence_breakdown(&mut out, &pack.confidence_breakdown);
37 out.push('\n');
38 out.push_str("## Primary Context\n\n");
39 for result in &pack.primary_files {
40 out.push_str(&format!("### {}\n", result.path.display()));
41 if let Some(range) = &result.line_range {
42 out.push_str(&format!("Lines {}-{}\n", range.start, range.end));
43 }
44 out.push_str("```\n");
45 out.push_str(&result.snippet);
46 out.push_str("\n```\n\n");
47 }
48
49 out.push_str("## Supporting Impact\n\n");
50 for result in &pack.supporting_files {
51 out.push_str(&format!("- {}\n", result.path.display()));
52 }
53
54 out.push_str("\n## Runtime Signals\n\n");
55 if pack.runtime_signals.is_empty() {
56 out.push_str("- None found\n");
57 } else {
58 for signal in &pack.runtime_signals {
59 let location = signal
60 .file_range
61 .as_ref()
62 .map(|range| {
63 let lines = range
64 .line_range
65 .as_ref()
66 .map(|line_range| {
67 format!(":{}-{}", line_range.start, line_range.end)
68 })
69 .unwrap_or_default();
70 format!("{}{}", range.path.display(), lines)
71 })
72 .unwrap_or_else(|| "unknown location".into());
73 out.push_str(&format!(
74 "- `{}` at `{}` ({:?})\n",
75 signal.message, location, signal.confidence
76 ));
77 }
78 }
79
80 out.push_str("\n## Validation Plan\n\n");
81 for test in &pack.validation_plan.tests {
82 out.push_str(&format!("- {}\n", test.name));
83 }
84
85 Ok(out)
86 }
87 Self::PromptText => {
88 let mut out = String::new();
89 out.push_str(&format!("TASK: {}\n", pack.task));
90 for result in &pack.primary_files {
91 out.push_str(&format!("[FILE: {}]\n", result.path.display()));
92 if let Some(range) = &result.line_range {
93 out.push_str(&format!("SYM: lines {}-{}\n", range.start, range.end));
94 }
95 out.push_str(&result.snippet);
96 out.push_str("\n[END FILE]\n");
97 }
98 for result in &pack.supporting_files {
99 out.push_str(&format!("IMPACT: {}\n", result.path.display()));
100 }
101 for test in &pack.validation_plan.tests {
102 out.push_str(&format!("TEST: {}\n", test.name));
103 }
104 Ok(out)
105 }
106 }
107 }
108}
109
110fn write_markdown_confidence_breakdown(out: &mut String, breakdown: &ConfidenceBreakdown) {
111 if !breakdown.blockers.is_empty() {
112 out.push_str("- Blockers:\n");
113 for blocker in &breakdown.blockers {
114 out.push_str(&format!(" - {blocker}\n"));
115 }
116 }
117 if !breakdown.caveats.is_empty() {
118 out.push_str("- Caveats:\n");
119 for caveat in &breakdown.caveats {
120 out.push_str(&format!(" - {caveat}\n"));
121 }
122 }
123 out.push_str("- Components:\n");
124 for component in &breakdown.components {
125 out.push_str(&format!(
126 " - `{}` score `{:.2}`, weight `{:.2}`, contribution `{:.2}`\n",
127 component.signal, component.normalized_value, component.weight, component.contribution
128 ));
129 }
130}
131
132pub struct ContextPackBuilder<'a> {
133 store: &'a dyn OkStore,
134 ranking_options: RankingOptions,
135}
136
137impl<'a> ContextPackBuilder<'a> {
138 pub fn new(store: &'a dyn OkStore) -> Self {
139 Self {
140 store,
141 ranking_options: RankingOptions::default(),
142 }
143 }
144
145 pub fn with_ranking_options(mut self, ranking_options: RankingOptions) -> Self {
146 self.ranking_options = ranking_options;
147 self
148 }
149
150 pub fn build(&self, task: &str, limit: usize) -> Result<ContextPack> {
151 let files = self.store.list_files(usize::MAX, 0)?;
152 let chunks = self.store.all_chunks()?;
153 let symbols = self.store.list_symbols(None, usize::MAX, 0)?;
154 let intent = TaskSearchIntent::parse(task);
155 let primary = rerank_for_task(
156 search_candidates(&chunks, &files, &symbols, task, limit, &intent)?,
157 &intent,
158 &self.ranking_options,
159 );
160 self.build_from_primary_with_impact(task, limit, primary, true)
161 }
162
163 pub fn build_from_primary(
164 &self,
165 task: &str,
166 limit: usize,
167 primary: Vec<SearchResult>,
168 ) -> Result<ContextPack> {
169 self.build_from_primary_with_impact(
170 task,
171 limit,
172 rerank_with_options(primary, &self.ranking_options),
173 false,
174 )
175 }
176
177 fn build_from_primary_with_impact(
178 &self,
179 task: &str,
180 limit: usize,
181 primary: Vec<SearchResult>,
182 expand_impact: bool,
183 ) -> Result<ContextPack> {
184 let mut primary = primary;
185 augment_primary_with_runtime(self.store, task, &mut primary, limit)?;
186 let primary_symbols = primary
187 .iter()
188 .filter_map(|result| result.symbol.clone())
189 .take(10)
190 .collect::<Vec<_>>();
191 let mut tests = Vec::new();
192 let selector = TestSelector::new(self.store as &dyn open_kioku_storage::MetadataStore);
193 for result in primary.iter().take(3) {
194 tests.extend(selector.for_changed_path_with_evidence(&result.path, 5)?);
195 }
196 tests.truncate(10);
197 let impact = if expand_impact {
198 if let Some(first) = primary.first() {
199 ImpactEngine::new(self.store as &dyn open_kioku_storage::MetadataStore)
200 .for_file(&first.path)?
201 } else {
202 empty_impact(task)
203 }
204 } else if primary.is_empty() {
205 empty_impact(task)
206 } else {
207 bounded_impact(task)
208 };
209
210 let mut dependency_edges: Vec<GraphEdge> = Vec::new();
211 for result in primary.iter().take(5) {
212 let node_id = format!("file:{}", result.path.display());
213 if let Ok((_nodes, edges)) = self.store.neighbors(&node_id, 20) {
214 dependency_edges.extend(edges);
215 }
216 }
217 dependency_edges.sort_by(|a, b| a.id.0.cmp(&b.id.0));
218 dependency_edges.dedup_by(|a, b| a.id == b.id);
219 dependency_edges.truncate(50);
220
221 let mut primary_files = primary.iter().take(limit).cloned().collect::<Vec<_>>();
222 let mut supporting_files = impact
223 .direct_impacts
224 .iter()
225 .take(10)
226 .cloned()
227 .collect::<Vec<_>>();
228 let runtime_signals =
229 runtime_signals_for_context(self.store, task, &primary_files, &supporting_files, 12)?;
230 annotate_results_with_runtime(&mut primary_files, &runtime_signals);
231 annotate_results_with_runtime(&mut supporting_files, &runtime_signals);
232 annotate_results_with_git_history(self.store, &mut primary_files)?;
233 annotate_results_with_git_history(self.store, &mut supporting_files)?;
234 let runtime_evidence = runtime_signals
235 .iter()
236 .map(runtime_signal_evidence)
237 .collect::<Vec<_>>();
238 let git_evidence = git_history_evidence_for_results(self.store, &primary_files)?;
239
240 let evidence = primary_files
241 .iter()
242 .take(20)
243 .flat_map(|result| {
244 result.evidence.iter().map(|msg| Evidence {
245 id: EvidenceId::new(format!("context:{}", result.path.display())),
246 source: "open-kioku-search".into(),
247 source_type: EvidenceSourceType::Lexical,
248 file_range: result
249 .line_range
250 .clone()
251 .map(|lr| open_kioku_core::FileRange {
252 path: result.path.clone(),
253 line_range: Some(lr),
254 }),
255 symbol_id: result.symbol.as_ref().map(|s| s.id.clone()),
256 confidence: Confidence::Medium,
257 message: msg.clone(),
258 indexed_at: Utc::now(),
259 })
260 })
261 .chain(impact.evidence.clone())
262 .chain(runtime_evidence.clone())
263 .chain(git_evidence)
264 .collect::<Vec<_>>();
265 let allowed_files = primary
266 .iter()
267 .take(8)
268 .map(|result| result.path.clone())
269 .collect::<Vec<_>>();
270 let confidence_breakdown = confidence_for_context(
271 &primary_files,
272 &supporting_files,
273 &tests,
274 &impact.risk_report,
275 allowed_files.len(),
276 evidence.len(),
277 runtime_signals.len(),
278 );
279 let negative_evidence = negative_evidence_for_context(
280 task,
281 &primary_files,
282 &supporting_files,
283 &tests,
284 &impact.risk_report,
285 &runtime_signals,
286 );
287 let boundary_evidence_refs = primary_files
288 .iter()
289 .flat_map(|result| result.derived_evidence_ids())
290 .collect::<Vec<_>>();
291 let confidence_summary = confidence_summary(&confidence_breakdown);
292 Ok(ContextPack {
293 task: task.into(),
294 intent: classify_intent(task).into(),
295 primary_files,
296 primary_symbols,
297 supporting_files,
298 dependency_edges,
299 runtime_signals,
300 test_candidates: tests.clone(),
301 risk_report: impact.risk_report,
302 recommended_change_boundary: ChangeBoundary {
303 allowed_files,
304 caution_files: impact
305 .direct_impacts
306 .iter()
307 .take(8)
308 .map(|result| result.path.clone())
309 .collect(),
310 forbidden_files: Vec::new(),
311 evidence_refs: boundary_evidence_refs,
312 ..Default::default()
313 },
314 validation_plan: ValidationPlan {
315 commands: tests
316 .iter()
317 .filter_map(|test| test.command.clone())
318 .collect(),
319 tests,
320 requires_approval: true,
321 evidence: evidence.clone(),
322 },
323 evidence,
324 negative_evidence,
325 confidence_summary,
326 confidence_breakdown,
327 })
328 }
329}
330
331fn negative_evidence_for_context(
332 task: &str,
333 primary_files: &[SearchResult],
334 supporting_files: &[SearchResult],
335 tests: &[open_kioku_core::TestTarget],
336 risk: &RiskReport,
337 runtime_signals: &[RuntimeSignal],
338) -> Vec<NegativeEvidence> {
339 let mut items = Vec::new();
340 if primary_files.is_empty() {
341 items.push(NegativeEvidence {
342 query: task.into(),
343 scope: "primary_context".into(),
344 inspected_sources: vec!["lexical_search".into(), "ranking_fusion".into()],
345 reason: "no primary context matched the task".into(),
346 confidence: 0.95,
347 suggested_next_probe: Some("Run `ok search <task> --explain-ranking` with named symbols or paths from the ticket.".into()),
348 });
349 }
350 if exact_reference_count(primary_files, supporting_files) == 0 {
351 items.push(NegativeEvidence {
352 query: task.into(),
353 scope: "exact_references".into(),
354 inspected_sources: vec![
355 "search_result.evidence".into(),
356 "search_result.match_reason".into(),
357 ],
358 reason: "no explicit exact symbol reference or SCIP evidence was found".into(),
359 confidence: 0.85,
360 suggested_next_probe: Some(
361 "Run `ok scip setup .` and re-index with `ok index . --with-scip auto`.".into(),
362 ),
363 });
364 }
365 if tests.is_empty() {
366 items.push(NegativeEvidence {
367 query: task.into(),
368 scope: "validation".into(),
369 inspected_sources: vec!["indexed_tests".into(), "test_selector".into()],
370 reason: "no nearby validation target was selected".into(),
371 confidence: 0.80,
372 suggested_next_probe: primary_files.first().map(|result| {
373 format!(
374 "Run `ok tests {}` to inspect validation candidates for the top file.",
375 result.path.display()
376 )
377 }),
378 });
379 }
380 if runtime_signals.is_empty() && runtime_signal_count(primary_files, supporting_files) == 0 {
381 items.push(NegativeEvidence {
382 query: task.into(),
383 scope: "runtime".into(),
384 inspected_sources: vec!["runtime_signals".into(), "search_result.evidence".into()],
385 reason:
386 "no runtime trace, incident, or error artifact corroborated the selected context"
387 .into(),
388 confidence: 0.75,
389 suggested_next_probe: Some(
390 "Import or configure runtime artifacts, then rerun `ok plan`.".into(),
391 ),
392 });
393 }
394 if docs_or_tests_only(primary_files) {
395 items.push(NegativeEvidence {
396 query: task.into(),
397 scope: "boundary".into(),
398 inspected_sources: vec!["primary_context.paths".into()],
399 reason: "task anchors only matched docs or test fixtures, not source edit targets"
400 .into(),
401 confidence: 0.90,
402 suggested_next_probe: Some(
403 "Search for the production symbol or source path named by the ticket.".into(),
404 ),
405 });
406 }
407 for reason in &risk.reasons {
408 let lower = reason.to_ascii_lowercase();
409 if lower.contains("low confidence") || lower.contains("no matching") {
410 items.push(NegativeEvidence {
411 query: task.into(),
412 scope: "risk".into(),
413 inspected_sources: vec!["risk_report.reasons".into()],
414 reason: reason.clone(),
415 confidence: 0.85,
416 suggested_next_probe: Some(
417 "Resolve the missing task anchor before editing.".into(),
418 ),
419 });
420 }
421 }
422 items
423}
424
425fn confidence_for_context(
426 primary_files: &[SearchResult],
427 supporting_files: &[SearchResult],
428 tests: &[open_kioku_core::TestTarget],
429 risk: &RiskReport,
430 allowed_file_count: usize,
431 evidence_count: usize,
432 runtime_signal_count_value: usize,
433) -> ConfidenceBreakdown {
434 ConfidenceBreakdown::from_signals(ConfidenceSignalInput {
435 primary_file_count: primary_files.len(),
436 evidence_count,
437 exact_reference_count: exact_reference_count(primary_files, supporting_files),
438 validation_count: tests.len(),
439 validation_with_command_count: tests.iter().filter(|test| test.command.is_some()).count(),
440 negative_evidence_count: negative_evidence_count(risk),
441 allowed_file_count,
442 runtime_signal_count: runtime_signal_count_value
443 + runtime_signal_count(primary_files, supporting_files),
444 })
445}
446
447fn confidence_summary(breakdown: &ConfidenceBreakdown) -> String {
448 let mut parts = vec![format!(
449 "overall {:?} ({:.2}) from explainable evidence signals",
450 breakdown.overall_enum, breakdown.overall_score
451 )];
452 if let Some(blocker) = breakdown.blockers.first() {
453 parts.push(format!("blocker: {blocker}"));
454 }
455 if let Some(caveat) = breakdown.caveats.first() {
456 parts.push(format!("caveat: {caveat}"));
457 }
458 parts.join("; ")
459}
460
461fn exact_reference_count(
462 primary_files: &[SearchResult],
463 supporting_files: &[SearchResult],
464) -> usize {
465 primary_files
466 .iter()
467 .chain(supporting_files.iter())
468 .filter(|result| has_exact_reference_signal(result))
469 .count()
470}
471
472fn has_exact_reference_signal(result: &SearchResult) -> bool {
473 result
474 .evidence
475 .iter()
476 .any(|evidence| contains_exact_reference(evidence))
477 || contains_exact_reference(&result.match_reason)
478}
479
480fn contains_exact_reference(value: &str) -> bool {
481 let lower = value.to_ascii_lowercase();
482 lower.contains("exact reference")
483 || lower.contains("exact symbol reference")
484 || lower.contains("scip")
485}
486
487fn runtime_signal_count(
488 primary_files: &[SearchResult],
489 supporting_files: &[SearchResult],
490) -> usize {
491 primary_files
492 .iter()
493 .chain(supporting_files.iter())
494 .filter(|result| {
495 result.score_breakdown.iter().any(|component| {
496 component.signal == "runtime_corroboration" && component.contribution > 0.0
497 }) || result
498 .evidence
499 .iter()
500 .any(|evidence| evidence.to_ascii_lowercase().contains("runtime"))
501 })
502 .count()
503}
504
505fn runtime_signals_for_context(
506 store: &dyn OkStore,
507 task: &str,
508 primary_files: &[SearchResult],
509 supporting_files: &[SearchResult],
510 limit: usize,
511) -> Result<Vec<RuntimeSignal>> {
512 let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
513 if facts.is_empty() {
514 return Ok(Vec::new());
515 }
516 let files = store.list_files(usize::MAX, 0)?;
517 let files_by_id = files
518 .into_iter()
519 .map(|file| (file.id.clone(), file))
520 .collect::<std::collections::HashMap<_, _>>();
521 let selected_paths = primary_files
522 .iter()
523 .chain(supporting_files.iter())
524 .map(|result| normalize_path(&result.path))
525 .collect::<std::collections::HashSet<_>>();
526 let searchable_context = primary_files
527 .iter()
528 .chain(supporting_files.iter())
529 .flat_map(|result| {
530 [
531 result.path.display().to_string(),
532 result.snippet.clone(),
533 result.match_reason.clone(),
534 result.evidence.join(" "),
535 ]
536 })
537 .chain(std::iter::once(task.to_string()))
538 .collect::<Vec<_>>()
539 .join(" ")
540 .to_ascii_lowercase();
541 let mut signals = facts
542 .into_iter()
543 .filter_map(|fact| {
544 let file = files_by_id.get(&fact.file_id)?;
545 if selected_paths.contains(&normalize_path(&file.path))
546 || runtime_fact_matches_query(&fact, &searchable_context)
547 {
548 Some(runtime_signal_from_fact(&fact, file))
549 } else {
550 None
551 }
552 })
553 .collect::<Vec<_>>();
554 signals.sort_by(|a, b| a.id.cmp(&b.id));
555 signals.dedup_by(|a, b| a.id == b.id);
556 signals.truncate(limit);
557 Ok(signals)
558}
559
560fn augment_primary_with_runtime(
561 store: &dyn OkStore,
562 task: &str,
563 primary: &mut Vec<SearchResult>,
564 limit: usize,
565) -> Result<()> {
566 let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
567 if facts.is_empty() {
568 return Ok(());
569 }
570 let task = task.to_ascii_lowercase();
571 let files = store.list_files(usize::MAX, 0)?;
572 let files_by_id = files
573 .into_iter()
574 .map(|file| (file.id.clone(), file))
575 .collect::<std::collections::HashMap<_, _>>();
576 let mut existing_paths = primary
577 .iter()
578 .map(|result| normalize_path(&result.path))
579 .collect::<std::collections::HashSet<_>>();
580 let mut additions = Vec::new();
581 for fact in facts
582 .into_iter()
583 .filter(|fact| runtime_fact_matches_query(fact, &task))
584 {
585 let Some(file) = files_by_id.get(&fact.file_id) else {
586 continue;
587 };
588 let normalized_path = normalize_path(&file.path);
589 if !existing_paths.insert(normalized_path) {
590 continue;
591 }
592 if let Some(result) = runtime_seed_result(store, file, &fact)? {
593 additions.push(result);
594 }
595 if additions.len() >= limit {
596 break;
597 }
598 }
599 primary.extend(additions);
600 primary.sort_by(|a, b| {
601 b.score
602 .partial_cmp(&a.score)
603 .unwrap_or(std::cmp::Ordering::Equal)
604 .then_with(|| a.path.cmp(&b.path))
605 });
606 primary.truncate(limit.max(1));
607 Ok(())
608}
609
610fn runtime_seed_result(
611 store: &dyn OkStore,
612 file: &File,
613 fact: &AnalysisFact,
614) -> Result<Option<SearchResult>> {
615 let chunks = store.chunks_for_file(&file.id)?;
616 let snippet = chunks
617 .iter()
618 .find(|chunk| {
619 fact.range
620 .as_ref()
621 .map(|range| chunk.range.start <= range.start && range.start <= chunk.range.end)
622 .unwrap_or(false)
623 })
624 .or_else(|| chunks.first())
625 .map(|chunk| chunk.text.clone())
626 .unwrap_or_else(|| fact.target.clone());
627 let evidence = vec![format!(
628 "runtime corroboration from local artifact `{}` targeting `{}`",
629 fact.source, fact.target
630 )];
631 Ok(Some(SearchResult {
632 path: file.path.clone(),
633 line_range: fact.range.clone(),
634 snippet,
635 symbol: None,
636 score: 1.35,
637 match_reason: "runtime artifact matched task intent".into(),
638 evidence,
639 evidence_refs: vec![fact.id.clone()],
640 confidence: fact.confidence.score(),
641 score_breakdown: vec![ScoreComponent::single(
642 "runtime_corroboration",
643 1.35,
644 vec![fact.id.clone()],
645 "local runtime trace/log/incident artifact matched the task",
646 )],
647 }))
648}
649
650fn annotate_results_with_runtime(results: &mut [SearchResult], signals: &[RuntimeSignal]) {
651 if signals.is_empty() {
652 return;
653 }
654 for result in results {
655 let result_path = normalize_path(&result.path);
656 let searchable = format!(
657 "{} {} {}",
658 result.snippet,
659 result.match_reason,
660 result.evidence.join(" ")
661 )
662 .to_ascii_lowercase();
663 let matched = signals
664 .iter()
665 .filter(|signal| {
666 signal
667 .file_range
668 .as_ref()
669 .map(|range| normalize_path(&range.path) == result_path)
670 .unwrap_or(false)
671 || runtime_message_tokens(&signal.message)
672 .iter()
673 .any(|token| searchable.contains(token))
674 })
675 .take(3)
676 .collect::<Vec<_>>();
677 if matched.is_empty() {
678 continue;
679 }
680 let evidence_ids = matched
681 .iter()
682 .map(|signal| signal.id.clone())
683 .collect::<Vec<_>>();
684 let labels = matched
685 .iter()
686 .map(|signal| signal.kind.as_str())
687 .collect::<Vec<_>>()
688 .join(", ");
689 for signal in &matched {
690 let evidence = format!(
691 "runtime corroboration `{}`: {}",
692 signal.kind, signal.message
693 );
694 if !result.evidence.contains(&evidence) {
695 result.evidence.push(evidence);
696 }
697 }
698 for id in &evidence_ids {
699 if !result.evidence_refs.contains(id) {
700 result.evidence_refs.push(id.clone());
701 }
702 }
703 result.score += 0.15 * matched.len() as f32;
704 result.confidence = result.confidence.max(0.75);
705 result.score_breakdown.push(ScoreComponent::adjustment(
706 "runtime_corroboration",
707 0.15 * matched.len() as f32,
708 evidence_ids,
709 format!("local runtime artifact corroborates this context result: {labels}"),
710 ));
711 }
712}
713
714fn runtime_signal_from_fact(fact: &AnalysisFact, file: &File) -> RuntimeSignal {
715 RuntimeSignal {
716 id: fact.id.clone(),
717 kind: runtime_kind(fact),
718 message: format!("{}: {}", fact.message, fact.target),
719 file_range: Some(FileRange {
720 path: file.path.clone(),
721 line_range: fact.range.clone(),
722 }),
723 occurred_at: None,
724 confidence: fact.confidence,
725 }
726}
727
728fn runtime_signal_evidence(signal: &RuntimeSignal) -> Evidence {
729 Evidence {
730 id: EvidenceId::new(signal.id.clone()),
731 source: "open-kioku-runtime".into(),
732 source_type: EvidenceSourceType::Runtime,
733 file_range: signal.file_range.clone(),
734 symbol_id: None,
735 confidence: signal.confidence,
736 message: signal.message.clone(),
737 indexed_at: Utc::now(),
738 }
739}
740
741fn annotate_results_with_git_history(
742 store: &dyn OkStore,
743 results: &mut [SearchResult],
744) -> Result<()> {
745 if results.is_empty() {
746 return Ok(());
747 }
748 let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
749 if facts.is_empty() {
750 return Ok(());
751 }
752 let files = store.list_files(usize::MAX, 0)?;
753 let files_by_path = files
754 .into_iter()
755 .map(|file| (normalize_path(&file.path), file))
756 .collect::<std::collections::HashMap<_, _>>();
757 for result in results {
758 let Some(file) = files_by_path.get(&normalize_path(&result.path)) else {
759 continue;
760 };
761 let matched = facts
762 .iter()
763 .filter(|fact| fact.file_id == file.id)
764 .take(3)
765 .collect::<Vec<_>>();
766 if matched.is_empty() {
767 continue;
768 }
769 let evidence_ids = matched
770 .iter()
771 .map(|fact| fact.id.clone())
772 .collect::<Vec<_>>();
773 let labels = matched
774 .iter()
775 .map(|fact| fact.target.as_str())
776 .collect::<Vec<_>>()
777 .join(", ");
778 for fact in &matched {
779 let evidence = format!(
780 "git co-change from local history: `{}` ({})",
781 fact.target, fact.message
782 );
783 if !result.evidence.contains(&evidence) {
784 result.evidence.push(evidence);
785 }
786 }
787 for id in &evidence_ids {
788 if !result.evidence_refs.contains(id) {
789 result.evidence_refs.push(id.clone());
790 }
791 }
792 result.score += 0.12 * matched.len() as f32;
793 result.confidence = result.confidence.max(0.70);
794 result.score_breakdown.push(ScoreComponent::adjustment(
795 "git_cochange",
796 0.12 * matched.len() as f32,
797 evidence_ids,
798 format!("local git history says this file co-changed with: {labels}"),
799 ));
800 }
801 Ok(())
802}
803
804fn git_history_evidence_for_results(
805 store: &dyn OkStore,
806 results: &[SearchResult],
807) -> Result<Vec<Evidence>> {
808 if results.is_empty() {
809 return Ok(Vec::new());
810 }
811 let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
812 if facts.is_empty() {
813 return Ok(Vec::new());
814 }
815 let files = store.list_files(usize::MAX, 0)?;
816 let paths_by_id = files
817 .into_iter()
818 .map(|file| (file.id, file.path))
819 .collect::<std::collections::HashMap<_, _>>();
820 let selected_paths = results
821 .iter()
822 .map(|result| normalize_path(&result.path))
823 .collect::<std::collections::HashSet<_>>();
824 let mut evidence = Vec::new();
825 for fact in facts {
826 let Some(path) = paths_by_id.get(&fact.file_id) else {
827 continue;
828 };
829 if !selected_paths.contains(&normalize_path(path)) {
830 continue;
831 }
832 evidence.push(Evidence {
833 id: EvidenceId::new(fact.id.clone()),
834 source: fact.source.clone(),
835 source_type: EvidenceSourceType::GitHistory,
836 file_range: Some(FileRange {
837 path: path.clone(),
838 line_range: None,
839 }),
840 symbol_id: None,
841 confidence: fact.confidence,
842 message: format!("{}: {}", fact.message, fact.target),
843 indexed_at: Utc::now(),
844 });
845 if evidence.len() >= 20 {
846 break;
847 }
848 }
849 Ok(evidence)
850}
851
852fn runtime_kind(fact: &AnalysisFact) -> String {
853 match (&fact.target_kind, &fact.edge_type) {
854 (GraphNodeType::Endpoint, GraphEdgeType::ExposesEndpoint) => "endpoint".into(),
855 (GraphNodeType::DatabaseTable, GraphEdgeType::ReadsTable) => "sql_read".into(),
856 (GraphNodeType::DatabaseTable, GraphEdgeType::WritesTable) => "sql_write".into(),
857 (GraphNodeType::RuntimeError, _) => "incident".into(),
858 (_, edge) => format!("{edge:?}").to_ascii_lowercase(),
859 }
860}
861
862fn runtime_fact_matches_query(fact: &AnalysisFact, searchable_context: &str) -> bool {
863 runtime_message_tokens(&fact.target)
864 .iter()
865 .any(|token| searchable_context.contains(token))
866 || runtime_message_tokens(&fact.message)
867 .iter()
868 .any(|token| searchable_context.contains(token))
869}
870
871fn runtime_message_tokens(value: &str) -> Vec<String> {
872 value
873 .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '/' || ch == '.'))
874 .map(|token| token.trim_matches('/').to_ascii_lowercase())
875 .filter(|token| token.len() >= 4)
876 .take(8)
877 .collect()
878}
879
880fn normalize_path(path: &std::path::Path) -> String {
881 path.to_string_lossy()
882 .replace('\\', "/")
883 .trim_start_matches("./")
884 .to_string()
885}
886
887fn negative_evidence_count(risk: &RiskReport) -> usize {
888 risk.reasons
889 .iter()
890 .filter(|reason| {
891 let lower = reason.to_ascii_lowercase();
892 lower.contains("low confidence")
893 || lower.contains("no matching")
894 || lower.contains("missing")
895 || lower.contains("absent")
896 || lower.contains("unavailable")
897 || lower.contains("weak")
898 || lower.contains("unknown")
899 })
900 .count()
901}
902
903fn docs_or_tests_only(results: &[SearchResult]) -> bool {
904 !results.is_empty()
905 && results
906 .iter()
907 .all(|result| is_docs_or_test_path(&result.path.to_string_lossy()))
908}
909
910fn is_docs_or_test_path(path: &str) -> bool {
911 let path = path.to_ascii_lowercase();
912 path.starts_with("docs/")
913 || path.starts_with("test/")
914 || path.starts_with("tests/")
915 || path.contains("/docs/")
916 || path.ends_with(".md")
917 || path.ends_with(".mdx")
918 || path.contains("/test/")
919 || path.contains("/tests/")
920 || path.contains("_test.")
921 || path.contains("test_")
922}
923
924#[derive(Debug, Clone, Default)]
925struct TaskSearchIntent {
926 primary_anchors: Vec<String>,
927 reference_anchors: Vec<String>,
928 ticket_anchors: Vec<String>,
929 path_anchors: Vec<String>,
930}
931
932impl TaskSearchIntent {
933 fn parse(task: &str) -> Self {
934 let mut intent = Self::default();
935 let lower = task.to_ascii_lowercase();
936 let reference_start = reference_marker_start(&lower).unwrap_or(task.len());
937 let edit_side = task.get(..reference_start).unwrap_or(task);
938 let reference_side = task.get(reference_start..).unwrap_or_default();
939 let all_identifiers = identifiers(task);
940
941 intent.primary_anchors = identifiers(edit_side);
942 intent.reference_anchors = identifiers(reference_side);
943 if intent.primary_anchors.is_empty() {
944 if let Some(first) = all_identifiers.first() {
945 intent.primary_anchors.push(first.clone());
946 }
947 }
948 for value in all_identifiers {
949 if !intent.primary_anchors.contains(&value)
950 && !intent.reference_anchors.contains(&value)
951 {
952 intent.reference_anchors.push(value);
953 }
954 }
955
956 for token in task.split_whitespace() {
957 let cleaned = token.trim_matches(|ch: char| {
958 !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/' || ch == '.')
959 });
960 if is_ticket_id(cleaned) && !intent.ticket_anchors.iter().any(|v| v == cleaned) {
961 intent.ticket_anchors.push(cleaned.to_string());
962 }
963 if is_path_like(cleaned) {
964 let normalized = cleaned.trim_matches('/');
965 if !normalized.is_empty() && !intent.path_anchors.iter().any(|v| v == normalized) {
966 intent.path_anchors.push(normalized.to_string());
967 }
968 }
969 }
970
971 intent
972 }
973
974 fn search_terms(&self, task: &str) -> Vec<String> {
975 let mut terms = vec![task.to_string()];
976 for term in self
977 .ticket_anchors
978 .iter()
979 .chain(self.path_anchors.iter())
980 .chain(self.primary_anchors.iter())
981 .chain(self.reference_anchors.iter())
982 {
983 if term.len() >= 3 && !terms.iter().any(|existing| existing == term) {
984 terms.push(term.clone());
985 }
986 }
987 terms
988 }
989}
990
991fn search_candidates(
992 chunks: &[CodeChunk],
993 files: &[File],
994 symbols: &[Symbol],
995 task: &str,
996 limit: usize,
997 intent: &TaskSearchIntent,
998) -> Result<Vec<SearchResult>> {
999 let mut merged = std::collections::BTreeMap::<String, SearchResult>::new();
1000 let per_anchor_limit = limit.clamp(8, 40);
1001 for term in intent.search_terms(task) {
1002 for mut result in search_chunks(chunks, files, symbols, &term, per_anchor_limit)? {
1003 if term != task {
1004 result
1005 .evidence
1006 .push(format!("task anchor `{term}` matched"));
1007 result.match_reason = format!("{}; task anchor `{term}`", result.match_reason);
1008 }
1009 let key = result_key(&result);
1010 match merged.get_mut(&key) {
1011 Some(existing) => {
1012 if result.score > existing.score {
1013 existing.score = result.score;
1014 existing.snippet = result.snippet;
1015 existing.line_range = result.line_range;
1016 existing.symbol = result.symbol;
1017 existing.score_breakdown = result.score_breakdown;
1018 }
1019 for evidence in result.evidence {
1020 if !existing.evidence.contains(&evidence) {
1021 existing.evidence.push(evidence);
1022 }
1023 }
1024 if !existing.match_reason.contains(&term) {
1025 existing.match_reason =
1026 format!("{}; task anchor `{term}`", existing.match_reason);
1027 }
1028 existing.reconcile_score_breakdown();
1029 }
1030 None => {
1031 merged.insert(key, result);
1032 }
1033 }
1034 }
1035 }
1036
1037 Ok(merged.into_values().collect())
1038}
1039
1040fn rerank_for_task(
1041 results: Vec<SearchResult>,
1042 intent: &TaskSearchIntent,
1043 ranking_options: &RankingOptions,
1044) -> Vec<SearchResult> {
1045 let mut results = rerank_with_options(results, ranking_options);
1046 for result in &mut results {
1047 let haystack = searchable_result_text(result);
1048 for anchor in &intent.primary_anchors {
1049 if contains_anchor(&haystack, anchor) {
1050 result.score += 0.65;
1051 result.confidence = result.confidence.max(0.85);
1052 result
1053 .evidence
1054 .push(format!("primary task anchor `{anchor}` matched"));
1055 result.add_score_component(ScoreComponent::adjustment(
1056 "primary_task_anchor_boost",
1057 0.65,
1058 result.derived_evidence_ids(),
1059 format!("primary task anchor `{anchor}` matched result text"),
1060 ));
1061 }
1062 }
1063 for anchor in &intent.reference_anchors {
1064 if contains_anchor(&haystack, anchor) {
1065 result.score += 0.25;
1066 result.confidence = result.confidence.max(0.65);
1067 result
1068 .evidence
1069 .push(format!("reference task anchor `{anchor}` matched"));
1070 result.add_score_component(ScoreComponent::adjustment(
1071 "reference_task_anchor_boost",
1072 0.25,
1073 result.derived_evidence_ids(),
1074 format!("reference task anchor `{anchor}` matched result text"),
1075 ));
1076 }
1077 }
1078 for anchor in intent
1079 .ticket_anchors
1080 .iter()
1081 .chain(intent.path_anchors.iter())
1082 {
1083 if contains_anchor(&haystack, anchor) {
1084 result.score += 0.35;
1085 result.confidence = result.confidence.max(0.75);
1086 result
1087 .evidence
1088 .push(format!("ticket/path task anchor `{anchor}` matched"));
1089 result.add_score_component(ScoreComponent::adjustment(
1090 "ticket_or_path_anchor_boost",
1091 0.35,
1092 result.derived_evidence_ids(),
1093 format!("ticket/path anchor `{anchor}` matched result text"),
1094 ));
1095 }
1096 }
1097 result.reconcile_score_breakdown();
1098 }
1099 results.sort_by(|a, b| {
1100 b.score
1101 .partial_cmp(&a.score)
1102 .unwrap_or(std::cmp::Ordering::Equal)
1103 .then_with(|| a.path.cmp(&b.path))
1104 });
1105 results
1106}
1107
1108fn result_key(result: &SearchResult) -> String {
1109 format!(
1110 "{}:{}-{}",
1111 result.path.display(),
1112 result
1113 .line_range
1114 .as_ref()
1115 .map(|range| range.start)
1116 .unwrap_or_default(),
1117 result
1118 .line_range
1119 .as_ref()
1120 .map(|range| range.end)
1121 .unwrap_or_default()
1122 )
1123}
1124
1125fn searchable_result_text(result: &SearchResult) -> String {
1126 format!(
1127 "{} {} {} {}",
1128 result.path.display(),
1129 result.snippet,
1130 result
1131 .symbol
1132 .as_ref()
1133 .map(|symbol| symbol.qualified_name.as_str())
1134 .unwrap_or_default(),
1135 result
1136 .symbol
1137 .as_ref()
1138 .map(|symbol| symbol.name.as_str())
1139 .unwrap_or_default()
1140 )
1141 .to_ascii_lowercase()
1142}
1143
1144fn contains_anchor(haystack: &str, anchor: &str) -> bool {
1145 haystack.contains(&anchor.to_ascii_lowercase())
1146 || haystack.contains(&normalize_identifier(anchor))
1147}
1148
1149fn reference_marker_start(lower: &str) -> Option<usize> {
1150 [
1151 " similar to ",
1152 " like ",
1153 " copy from ",
1154 " copied from ",
1155 " mirror ",
1156 " mirrored from ",
1157 " based on ",
1158 " reference ",
1159 ]
1160 .iter()
1161 .filter_map(|marker| lower.find(marker))
1162 .min()
1163}
1164
1165fn identifiers(value: &str) -> Vec<String> {
1166 let mut out = Vec::new();
1167 for token in value.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')) {
1168 let token = token.trim_matches('-');
1169 if is_named_identifier(token) && !out.iter().any(|existing| existing == token) {
1170 out.push(token.to_string());
1171 }
1172 }
1173 out
1174}
1175
1176fn is_named_identifier(value: &str) -> bool {
1177 if value.len() < 3 || is_ticket_id(value) {
1178 return false;
1179 }
1180 let has_lower = value.chars().any(|ch| ch.is_ascii_lowercase());
1181 let has_upper = value.chars().any(|ch| ch.is_ascii_uppercase());
1182 let has_digit = value.chars().any(|ch| ch.is_ascii_digit());
1183 let has_separator = value.contains('_') || value.contains('-');
1184 (has_lower && has_upper) || has_separator || (has_digit && has_upper)
1185}
1186
1187fn is_ticket_id(value: &str) -> bool {
1188 let Some((prefix, number)) = value.split_once('-') else {
1189 return false;
1190 };
1191 prefix.len() >= 2
1192 && prefix.chars().all(|ch| ch.is_ascii_uppercase())
1193 && number.len() >= 2
1194 && number.chars().all(|ch| ch.is_ascii_digit())
1195}
1196
1197fn is_path_like(value: &str) -> bool {
1198 value.contains('/')
1199 || value.ends_with(".rs")
1200 || value.ends_with(".ts")
1201 || value.ends_with(".tsx")
1202 || value.ends_with(".js")
1203 || value.ends_with(".jsx")
1204 || value.ends_with(".java")
1205 || value.ends_with(".py")
1206 || value.ends_with(".go")
1207 || value.ends_with(".md")
1208}
1209
1210fn normalize_identifier(value: &str) -> String {
1211 let mut out = String::new();
1212 let mut previous_lower_or_digit = false;
1213 for ch in value.chars() {
1214 if ch == '_' || ch == '-' || ch == '/' || ch == '.' {
1215 out.push(' ');
1216 previous_lower_or_digit = false;
1217 continue;
1218 }
1219 if ch.is_ascii_uppercase() && previous_lower_or_digit {
1220 out.push(' ');
1221 }
1222 out.push(ch.to_ascii_lowercase());
1223 previous_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
1224 }
1225 out.split_whitespace().collect::<Vec<_>>().join(" ")
1226}
1227
1228fn classify_intent(task: &str) -> &'static str {
1229 let lower = task.to_ascii_lowercase();
1230 if lower.contains("fix")
1231 || lower.contains("add")
1232 || lower.contains("change")
1233 || lower.contains("implement")
1234 {
1235 "code_change"
1236 } else if lower.contains("test") {
1237 "validation"
1238 } else {
1239 "understanding"
1240 }
1241}
1242
1243fn empty_impact(task: &str) -> open_kioku_core::ImpactReport {
1244 open_kioku_core::ImpactReport {
1245 target: task.into(),
1246 direct_impacts: Vec::new(),
1247 indirect_impacts: Vec::new(),
1248 risk_report: RiskReport {
1249 level: "unknown".into(),
1250 score: 0.0,
1251 reasons: vec!["no matching indexed files found".into()],
1252 },
1253 evidence: vec![Evidence {
1254 id: EvidenceId::new("context:no-match"),
1255 source: "open-kioku-context".into(),
1256 source_type: EvidenceSourceType::Lexical,
1257 file_range: None,
1258 symbol_id: None,
1259 confidence: Confidence::Low,
1260 message: "context pack search did not find indexed evidence".into(),
1261 indexed_at: Utc::now(),
1262 }],
1263 score_breakdown: vec![ScoreComponent::single(
1264 "no_context_found",
1265 0.0,
1266 vec!["context:no-match".into()],
1267 "no indexed context matched the task",
1268 )],
1269 }
1270}
1271
1272fn bounded_impact(task: &str) -> open_kioku_core::ImpactReport {
1273 open_kioku_core::ImpactReport {
1274 target: task.into(),
1275 direct_impacts: Vec::new(),
1276 indirect_impacts: Vec::new(),
1277 risk_report: RiskReport {
1278 level: "low".into(),
1279 score: 0.1,
1280 reasons: vec!["bounded context built from persisted search results".into()],
1281 },
1282 evidence: vec![Evidence {
1283 id: EvidenceId::new("context:bounded-search"),
1284 source: "open-kioku-context".into(),
1285 source_type: EvidenceSourceType::Lexical,
1286 file_range: None,
1287 symbol_id: None,
1288 confidence: Confidence::Medium,
1289 message:
1290 "context pack used persisted search results without full-table impact expansion"
1291 .into(),
1292 indexed_at: Utc::now(),
1293 }],
1294 score_breakdown: vec![ScoreComponent::single(
1295 "bounded_context_risk",
1296 0.1,
1297 vec!["context:bounded-search".into()],
1298 "bounded context used persisted search results without full impact expansion",
1299 )],
1300 }
1301}
1302
1303#[cfg(test)]
1304mod tests {
1305 use super::*;
1306 use open_kioku_core::{FileId, Language, LineRange, RepositoryId, SymbolId, SymbolKind};
1307 use std::path::Path;
1308
1309 #[test]
1310 fn primary_edit_anchor_outranks_reference_pattern_anchor() {
1311 let repo_id = RepositoryId::new("repo");
1312 let mutation_file = File {
1313 id: FileId::new("mutation"),
1314 repository_id: repo_id.clone(),
1315 path: "src/PublishRestrictionsMutation.java".into(),
1316 language: Language::Java,
1317 size_bytes: 100,
1318 content_hash: "mutation".into(),
1319 is_generated: false,
1320 is_vendor: false,
1321 };
1322 let validator_file = File {
1323 id: FileId::new("validator"),
1324 repository_id: repo_id,
1325 path: "src/EnterpriseRateValidator.java".into(),
1326 language: Language::Java,
1327 size_bytes: 100,
1328 content_hash: "validator".into(),
1329 is_generated: false,
1330 is_vendor: false,
1331 };
1332 let mutation_symbol = Symbol {
1333 id: SymbolId::new("mutation-symbol"),
1334 name: "PublishRestrictionsMutation".into(),
1335 qualified_name: "api.PublishRestrictionsMutation".into(),
1336 kind: SymbolKind::Class,
1337 file_id: mutation_file.id.clone(),
1338 range: Some(LineRange { start: 1, end: 20 }),
1339 language: Language::Java,
1340 confidence: Confidence::High,
1341 provenance: EvidenceSourceType::TreeSitter,
1342 };
1343 let validator_symbol = Symbol {
1344 id: SymbolId::new("validator-symbol"),
1345 name: "EnterpriseRateValidator".into(),
1346 qualified_name: "api.EnterpriseRateValidator".into(),
1347 kind: SymbolKind::Class,
1348 file_id: validator_file.id.clone(),
1349 range: Some(LineRange { start: 1, end: 20 }),
1350 language: Language::Java,
1351 confidence: Confidence::High,
1352 provenance: EvidenceSourceType::TreeSitter,
1353 };
1354 let chunks = vec![
1355 CodeChunk {
1356 id: "mutation-chunk".into(),
1357 file_id: mutation_file.id.clone(),
1358 range: LineRange { start: 1, end: 10 },
1359 language: Language::Java,
1360 text: "class PublishRestrictionsMutation { void mutate() {} }".into(),
1361 symbol_id: Some(mutation_symbol.id.clone()),
1362 },
1363 CodeChunk {
1364 id: "validator-chunk".into(),
1365 file_id: validator_file.id.clone(),
1366 range: LineRange { start: 1, end: 10 },
1367 language: Language::Java,
1368 text: "class EnterpriseRateValidator { boolean validate() { return true; } }"
1369 .into(),
1370 symbol_id: Some(validator_symbol.id.clone()),
1371 },
1372 ];
1373 let files = vec![mutation_file, validator_file];
1374 let symbols = vec![mutation_symbol, validator_symbol];
1375 let task =
1376 "add validation in PublishRestrictionsMutation similar to EnterpriseRateValidator";
1377 let intent = TaskSearchIntent::parse(task);
1378 let results = rerank_for_task(
1379 search_candidates(&chunks, &files, &symbols, task, 10, &intent).unwrap(),
1380 &intent,
1381 &RankingOptions::default(),
1382 );
1383
1384 assert_eq!(
1385 results[0].path,
1386 Path::new("src/PublishRestrictionsMutation.java")
1387 );
1388 assert!(results[0]
1389 .evidence
1390 .iter()
1391 .any(|evidence| evidence.contains("primary task anchor")));
1392 }
1393}