open_kioku_context/
lib.rs

1use chrono::Utc;
2use open_kioku_core::{
3    AnalysisFact, ChangeBoundary, CodeChunk, Confidence, ConfidenceBreakdown,
4    ConfidenceSignalInput, ContextPack, Evidence, EvidenceId, EvidenceSourceType, File, FileRange,
5    GraphEdge, GraphEdgeType, GraphNodeType, NegativeEvidence, RiskReport, RuntimeSignal,
6    ScoreComponent, SearchResult, Symbol, ValidationPlan,
7};
8use open_kioku_errors::Result;
9use open_kioku_impact::ImpactEngine;
10use open_kioku_ranking::{rerank_with_options, RankingOptions};
11use open_kioku_search_regex::search_chunks;
12use open_kioku_storage::OkStore;
13use open_kioku_tests::TestSelector;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
16pub enum ContextPackFormat {
17    Json,
18    Markdown,
19    PromptText,
20    Toon,
21}
22
23impl ContextPackFormat {
24    pub fn render(&self, pack: &ContextPack) -> Result<String> {
25        match self {
26            Self::Json => Ok(serde_json::to_string_pretty(pack)?),
27            Self::Toon => Ok(open_kioku_format::render_context_pack_toon(pack)),
28            Self::Markdown => {
29                let mut out = String::new();
30                out.push_str(&format!("# Task: {}\n\n", pack.task));
31                out.push_str("## Confidence\n\n");
32                out.push_str(&format!(
33                    "- Overall: `{:?}` (`{:.2}`)\n",
34                    pack.confidence_breakdown.overall_enum, pack.confidence_breakdown.overall_score
35                ));
36                write_markdown_confidence_breakdown(&mut out, &pack.confidence_breakdown);
37                out.push('\n');
38                out.push_str("## Primary Context\n\n");
39                for result in &pack.primary_files {
40                    out.push_str(&format!("### {}\n", result.path.display()));
41                    if let Some(range) = &result.line_range {
42                        out.push_str(&format!("Lines {}-{}\n", range.start, range.end));
43                    }
44                    out.push_str("```\n");
45                    out.push_str(&result.snippet);
46                    out.push_str("\n```\n\n");
47                }
48
49                out.push_str("## Supporting Impact\n\n");
50                for result in &pack.supporting_files {
51                    out.push_str(&format!("- {}\n", result.path.display()));
52                }
53
54                out.push_str("\n## Runtime Signals\n\n");
55                if pack.runtime_signals.is_empty() {
56                    out.push_str("- None found\n");
57                } else {
58                    for signal in &pack.runtime_signals {
59                        let location = signal
60                            .file_range
61                            .as_ref()
62                            .map(|range| {
63                                let lines = range
64                                    .line_range
65                                    .as_ref()
66                                    .map(|line_range| {
67                                        format!(":{}-{}", line_range.start, line_range.end)
68                                    })
69                                    .unwrap_or_default();
70                                format!("{}{}", range.path.display(), lines)
71                            })
72                            .unwrap_or_else(|| "unknown location".into());
73                        out.push_str(&format!(
74                            "- `{}` at `{}` ({:?})\n",
75                            signal.message, location, signal.confidence
76                        ));
77                    }
78                }
79
80                out.push_str("\n## Validation Plan\n\n");
81                for test in &pack.validation_plan.tests {
82                    out.push_str(&format!("- {}\n", test.name));
83                }
84
85                Ok(out)
86            }
87            Self::PromptText => {
88                let mut out = String::new();
89                out.push_str(&format!("TASK: {}\n", pack.task));
90                for result in &pack.primary_files {
91                    out.push_str(&format!("[FILE: {}]\n", result.path.display()));
92                    if let Some(range) = &result.line_range {
93                        out.push_str(&format!("SYM: lines {}-{}\n", range.start, range.end));
94                    }
95                    out.push_str(&result.snippet);
96                    out.push_str("\n[END FILE]\n");
97                }
98                for result in &pack.supporting_files {
99                    out.push_str(&format!("IMPACT: {}\n", result.path.display()));
100                }
101                for test in &pack.validation_plan.tests {
102                    out.push_str(&format!("TEST: {}\n", test.name));
103                }
104                Ok(out)
105            }
106        }
107    }
108}
109
110fn write_markdown_confidence_breakdown(out: &mut String, breakdown: &ConfidenceBreakdown) {
111    if !breakdown.blockers.is_empty() {
112        out.push_str("- Blockers:\n");
113        for blocker in &breakdown.blockers {
114            out.push_str(&format!("  - {blocker}\n"));
115        }
116    }
117    if !breakdown.caveats.is_empty() {
118        out.push_str("- Caveats:\n");
119        for caveat in &breakdown.caveats {
120            out.push_str(&format!("  - {caveat}\n"));
121        }
122    }
123    out.push_str("- Components:\n");
124    for component in &breakdown.components {
125        out.push_str(&format!(
126            "  - `{}` score `{:.2}`, weight `{:.2}`, contribution `{:.2}`\n",
127            component.signal, component.normalized_value, component.weight, component.contribution
128        ));
129    }
130}
131
132pub struct ContextPackBuilder<'a> {
133    store: &'a dyn OkStore,
134    ranking_options: RankingOptions,
135}
136
137impl<'a> ContextPackBuilder<'a> {
138    pub fn new(store: &'a dyn OkStore) -> Self {
139        Self {
140            store,
141            ranking_options: RankingOptions::default(),
142        }
143    }
144
145    pub fn with_ranking_options(mut self, ranking_options: RankingOptions) -> Self {
146        self.ranking_options = ranking_options;
147        self
148    }
149
150    pub fn build(&self, task: &str, limit: usize) -> Result<ContextPack> {
151        let files = self.store.list_files(usize::MAX, 0)?;
152        let chunks = self.store.all_chunks()?;
153        let symbols = self.store.list_symbols(None, usize::MAX, 0)?;
154        let intent = TaskSearchIntent::parse(task);
155        let primary = rerank_for_task(
156            search_candidates(&chunks, &files, &symbols, task, limit, &intent)?,
157            &intent,
158            &self.ranking_options,
159        );
160        self.build_from_primary_with_impact(task, limit, primary, true)
161    }
162
163    pub fn build_from_primary(
164        &self,
165        task: &str,
166        limit: usize,
167        primary: Vec<SearchResult>,
168    ) -> Result<ContextPack> {
169        self.build_from_primary_with_impact(
170            task,
171            limit,
172            rerank_with_options(primary, &self.ranking_options),
173            false,
174        )
175    }
176
177    fn build_from_primary_with_impact(
178        &self,
179        task: &str,
180        limit: usize,
181        primary: Vec<SearchResult>,
182        expand_impact: bool,
183    ) -> Result<ContextPack> {
184        let mut primary = primary;
185        augment_primary_with_runtime(self.store, task, &mut primary, limit)?;
186        let primary_symbols = primary
187            .iter()
188            .filter_map(|result| result.symbol.clone())
189            .take(10)
190            .collect::<Vec<_>>();
191        let mut tests = Vec::new();
192        let selector = TestSelector::new(self.store as &dyn open_kioku_storage::MetadataStore);
193        for result in primary.iter().take(3) {
194            tests.extend(selector.for_changed_path_with_evidence(&result.path, 5)?);
195        }
196        tests.truncate(10);
197        let impact = if expand_impact {
198            if let Some(first) = primary.first() {
199                ImpactEngine::new(self.store as &dyn open_kioku_storage::MetadataStore)
200                    .for_file(&first.path)?
201            } else {
202                empty_impact(task)
203            }
204        } else if primary.is_empty() {
205            empty_impact(task)
206        } else {
207            bounded_impact(task)
208        };
209
210        let mut dependency_edges: Vec<GraphEdge> = Vec::new();
211        for result in primary.iter().take(5) {
212            let node_id = format!("file:{}", result.path.display());
213            if let Ok((_nodes, edges)) = self.store.neighbors(&node_id, 20) {
214                dependency_edges.extend(edges);
215            }
216        }
217        dependency_edges.sort_by(|a, b| a.id.0.cmp(&b.id.0));
218        dependency_edges.dedup_by(|a, b| a.id == b.id);
219        dependency_edges.truncate(50);
220
221        let mut primary_files = primary.iter().take(limit).cloned().collect::<Vec<_>>();
222        let mut supporting_files = impact
223            .direct_impacts
224            .iter()
225            .take(10)
226            .cloned()
227            .collect::<Vec<_>>();
228        let runtime_signals =
229            runtime_signals_for_context(self.store, task, &primary_files, &supporting_files, 12)?;
230        annotate_results_with_runtime(&mut primary_files, &runtime_signals);
231        annotate_results_with_runtime(&mut supporting_files, &runtime_signals);
232        annotate_results_with_git_history(self.store, &mut primary_files)?;
233        annotate_results_with_git_history(self.store, &mut supporting_files)?;
234        let runtime_evidence = runtime_signals
235            .iter()
236            .map(runtime_signal_evidence)
237            .collect::<Vec<_>>();
238        let git_evidence = git_history_evidence_for_results(self.store, &primary_files)?;
239
240        let evidence = primary_files
241            .iter()
242            .take(20)
243            .flat_map(|result| {
244                result.evidence.iter().map(|msg| Evidence {
245                    id: EvidenceId::new(format!("context:{}", result.path.display())),
246                    source: "open-kioku-search".into(),
247                    source_type: EvidenceSourceType::Lexical,
248                    file_range: result
249                        .line_range
250                        .clone()
251                        .map(|lr| open_kioku_core::FileRange {
252                            path: result.path.clone(),
253                            line_range: Some(lr),
254                        }),
255                    symbol_id: result.symbol.as_ref().map(|s| s.id.clone()),
256                    confidence: Confidence::Medium,
257                    message: msg.clone(),
258                    indexed_at: Utc::now(),
259                })
260            })
261            .chain(impact.evidence.clone())
262            .chain(runtime_evidence.clone())
263            .chain(git_evidence)
264            .collect::<Vec<_>>();
265        let allowed_files = primary
266            .iter()
267            .take(8)
268            .map(|result| result.path.clone())
269            .collect::<Vec<_>>();
270        let confidence_breakdown = confidence_for_context(
271            &primary_files,
272            &supporting_files,
273            &tests,
274            &impact.risk_report,
275            allowed_files.len(),
276            evidence.len(),
277            runtime_signals.len(),
278        );
279        let negative_evidence = negative_evidence_for_context(
280            task,
281            &primary_files,
282            &supporting_files,
283            &tests,
284            &impact.risk_report,
285            &runtime_signals,
286        );
287        let boundary_evidence_refs = primary_files
288            .iter()
289            .flat_map(|result| result.derived_evidence_ids())
290            .collect::<Vec<_>>();
291        let confidence_summary = confidence_summary(&confidence_breakdown);
292        Ok(ContextPack {
293            task: task.into(),
294            intent: classify_intent(task).into(),
295            primary_files,
296            primary_symbols,
297            supporting_files,
298            dependency_edges,
299            runtime_signals,
300            test_candidates: tests.clone(),
301            risk_report: impact.risk_report,
302            recommended_change_boundary: ChangeBoundary {
303                allowed_files,
304                caution_files: impact
305                    .direct_impacts
306                    .iter()
307                    .take(8)
308                    .map(|result| result.path.clone())
309                    .collect(),
310                forbidden_files: Vec::new(),
311                evidence_refs: boundary_evidence_refs,
312                ..Default::default()
313            },
314            validation_plan: ValidationPlan {
315                commands: tests
316                    .iter()
317                    .filter_map(|test| test.command.clone())
318                    .collect(),
319                tests,
320                requires_approval: true,
321                evidence: evidence.clone(),
322            },
323            evidence,
324            negative_evidence,
325            confidence_summary,
326            confidence_breakdown,
327        })
328    }
329}
330
331fn negative_evidence_for_context(
332    task: &str,
333    primary_files: &[SearchResult],
334    supporting_files: &[SearchResult],
335    tests: &[open_kioku_core::TestTarget],
336    risk: &RiskReport,
337    runtime_signals: &[RuntimeSignal],
338) -> Vec<NegativeEvidence> {
339    let mut items = Vec::new();
340    if primary_files.is_empty() {
341        items.push(NegativeEvidence {
342            query: task.into(),
343            scope: "primary_context".into(),
344            inspected_sources: vec!["lexical_search".into(), "ranking_fusion".into()],
345            reason: "no primary context matched the task".into(),
346            confidence: 0.95,
347            suggested_next_probe: Some("Run `ok search <task> --explain-ranking` with named symbols or paths from the ticket.".into()),
348        });
349    }
350    if exact_reference_count(primary_files, supporting_files) == 0 {
351        items.push(NegativeEvidence {
352            query: task.into(),
353            scope: "exact_references".into(),
354            inspected_sources: vec![
355                "search_result.evidence".into(),
356                "search_result.match_reason".into(),
357            ],
358            reason: "no explicit exact symbol reference or SCIP evidence was found".into(),
359            confidence: 0.85,
360            suggested_next_probe: Some(
361                "Run `ok scip setup .` and re-index with `ok index . --with-scip auto`.".into(),
362            ),
363        });
364    }
365    if tests.is_empty() {
366        items.push(NegativeEvidence {
367            query: task.into(),
368            scope: "validation".into(),
369            inspected_sources: vec!["indexed_tests".into(), "test_selector".into()],
370            reason: "no nearby validation target was selected".into(),
371            confidence: 0.80,
372            suggested_next_probe: primary_files.first().map(|result| {
373                format!(
374                    "Run `ok tests {}` to inspect validation candidates for the top file.",
375                    result.path.display()
376                )
377            }),
378        });
379    }
380    if runtime_signals.is_empty() && runtime_signal_count(primary_files, supporting_files) == 0 {
381        items.push(NegativeEvidence {
382            query: task.into(),
383            scope: "runtime".into(),
384            inspected_sources: vec!["runtime_signals".into(), "search_result.evidence".into()],
385            reason:
386                "no runtime trace, incident, or error artifact corroborated the selected context"
387                    .into(),
388            confidence: 0.75,
389            suggested_next_probe: Some(
390                "Import or configure runtime artifacts, then rerun `ok plan`.".into(),
391            ),
392        });
393    }
394    if docs_or_tests_only(primary_files) {
395        items.push(NegativeEvidence {
396            query: task.into(),
397            scope: "boundary".into(),
398            inspected_sources: vec!["primary_context.paths".into()],
399            reason: "task anchors only matched docs or test fixtures, not source edit targets"
400                .into(),
401            confidence: 0.90,
402            suggested_next_probe: Some(
403                "Search for the production symbol or source path named by the ticket.".into(),
404            ),
405        });
406    }
407    for reason in &risk.reasons {
408        let lower = reason.to_ascii_lowercase();
409        if lower.contains("low confidence") || lower.contains("no matching") {
410            items.push(NegativeEvidence {
411                query: task.into(),
412                scope: "risk".into(),
413                inspected_sources: vec!["risk_report.reasons".into()],
414                reason: reason.clone(),
415                confidence: 0.85,
416                suggested_next_probe: Some(
417                    "Resolve the missing task anchor before editing.".into(),
418                ),
419            });
420        }
421    }
422    items
423}
424
425fn confidence_for_context(
426    primary_files: &[SearchResult],
427    supporting_files: &[SearchResult],
428    tests: &[open_kioku_core::TestTarget],
429    risk: &RiskReport,
430    allowed_file_count: usize,
431    evidence_count: usize,
432    runtime_signal_count_value: usize,
433) -> ConfidenceBreakdown {
434    ConfidenceBreakdown::from_signals(ConfidenceSignalInput {
435        primary_file_count: primary_files.len(),
436        evidence_count,
437        exact_reference_count: exact_reference_count(primary_files, supporting_files),
438        validation_count: tests.len(),
439        validation_with_command_count: tests.iter().filter(|test| test.command.is_some()).count(),
440        negative_evidence_count: negative_evidence_count(risk),
441        allowed_file_count,
442        runtime_signal_count: runtime_signal_count_value
443            + runtime_signal_count(primary_files, supporting_files),
444    })
445}
446
447fn confidence_summary(breakdown: &ConfidenceBreakdown) -> String {
448    let mut parts = vec![format!(
449        "overall {:?} ({:.2}) from explainable evidence signals",
450        breakdown.overall_enum, breakdown.overall_score
451    )];
452    if let Some(blocker) = breakdown.blockers.first() {
453        parts.push(format!("blocker: {blocker}"));
454    }
455    if let Some(caveat) = breakdown.caveats.first() {
456        parts.push(format!("caveat: {caveat}"));
457    }
458    parts.join("; ")
459}
460
461fn exact_reference_count(
462    primary_files: &[SearchResult],
463    supporting_files: &[SearchResult],
464) -> usize {
465    primary_files
466        .iter()
467        .chain(supporting_files.iter())
468        .filter(|result| has_exact_reference_signal(result))
469        .count()
470}
471
472fn has_exact_reference_signal(result: &SearchResult) -> bool {
473    result
474        .evidence
475        .iter()
476        .any(|evidence| contains_exact_reference(evidence))
477        || contains_exact_reference(&result.match_reason)
478}
479
480fn contains_exact_reference(value: &str) -> bool {
481    let lower = value.to_ascii_lowercase();
482    lower.contains("exact reference")
483        || lower.contains("exact symbol reference")
484        || lower.contains("scip")
485}
486
487fn runtime_signal_count(
488    primary_files: &[SearchResult],
489    supporting_files: &[SearchResult],
490) -> usize {
491    primary_files
492        .iter()
493        .chain(supporting_files.iter())
494        .filter(|result| {
495            result.score_breakdown.iter().any(|component| {
496                component.signal == "runtime_corroboration" && component.contribution > 0.0
497            }) || result
498                .evidence
499                .iter()
500                .any(|evidence| evidence.to_ascii_lowercase().contains("runtime"))
501        })
502        .count()
503}
504
505fn runtime_signals_for_context(
506    store: &dyn OkStore,
507    task: &str,
508    primary_files: &[SearchResult],
509    supporting_files: &[SearchResult],
510    limit: usize,
511) -> Result<Vec<RuntimeSignal>> {
512    let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
513    if facts.is_empty() {
514        return Ok(Vec::new());
515    }
516    let files = store.list_files(usize::MAX, 0)?;
517    let files_by_id = files
518        .into_iter()
519        .map(|file| (file.id.clone(), file))
520        .collect::<std::collections::HashMap<_, _>>();
521    let selected_paths = primary_files
522        .iter()
523        .chain(supporting_files.iter())
524        .map(|result| normalize_path(&result.path))
525        .collect::<std::collections::HashSet<_>>();
526    let searchable_context = primary_files
527        .iter()
528        .chain(supporting_files.iter())
529        .flat_map(|result| {
530            [
531                result.path.display().to_string(),
532                result.snippet.clone(),
533                result.match_reason.clone(),
534                result.evidence.join(" "),
535            ]
536        })
537        .chain(std::iter::once(task.to_string()))
538        .collect::<Vec<_>>()
539        .join(" ")
540        .to_ascii_lowercase();
541    let mut signals = facts
542        .into_iter()
543        .filter_map(|fact| {
544            let file = files_by_id.get(&fact.file_id)?;
545            if selected_paths.contains(&normalize_path(&file.path))
546                || runtime_fact_matches_query(&fact, &searchable_context)
547            {
548                Some(runtime_signal_from_fact(&fact, file))
549            } else {
550                None
551            }
552        })
553        .collect::<Vec<_>>();
554    signals.sort_by(|a, b| a.id.cmp(&b.id));
555    signals.dedup_by(|a, b| a.id == b.id);
556    signals.truncate(limit);
557    Ok(signals)
558}
559
560fn augment_primary_with_runtime(
561    store: &dyn OkStore,
562    task: &str,
563    primary: &mut Vec<SearchResult>,
564    limit: usize,
565) -> Result<()> {
566    let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
567    if facts.is_empty() {
568        return Ok(());
569    }
570    let task = task.to_ascii_lowercase();
571    let files = store.list_files(usize::MAX, 0)?;
572    let files_by_id = files
573        .into_iter()
574        .map(|file| (file.id.clone(), file))
575        .collect::<std::collections::HashMap<_, _>>();
576    let mut existing_paths = primary
577        .iter()
578        .map(|result| normalize_path(&result.path))
579        .collect::<std::collections::HashSet<_>>();
580    let mut additions = Vec::new();
581    for fact in facts
582        .into_iter()
583        .filter(|fact| runtime_fact_matches_query(fact, &task))
584    {
585        let Some(file) = files_by_id.get(&fact.file_id) else {
586            continue;
587        };
588        let normalized_path = normalize_path(&file.path);
589        if !existing_paths.insert(normalized_path) {
590            continue;
591        }
592        if let Some(result) = runtime_seed_result(store, file, &fact)? {
593            additions.push(result);
594        }
595        if additions.len() >= limit {
596            break;
597        }
598    }
599    primary.extend(additions);
600    primary.sort_by(|a, b| {
601        b.score
602            .partial_cmp(&a.score)
603            .unwrap_or(std::cmp::Ordering::Equal)
604            .then_with(|| a.path.cmp(&b.path))
605    });
606    primary.truncate(limit.max(1));
607    Ok(())
608}
609
610fn runtime_seed_result(
611    store: &dyn OkStore,
612    file: &File,
613    fact: &AnalysisFact,
614) -> Result<Option<SearchResult>> {
615    let chunks = store.chunks_for_file(&file.id)?;
616    let snippet = chunks
617        .iter()
618        .find(|chunk| {
619            fact.range
620                .as_ref()
621                .map(|range| chunk.range.start <= range.start && range.start <= chunk.range.end)
622                .unwrap_or(false)
623        })
624        .or_else(|| chunks.first())
625        .map(|chunk| chunk.text.clone())
626        .unwrap_or_else(|| fact.target.clone());
627    let evidence = vec![format!(
628        "runtime corroboration from local artifact `{}` targeting `{}`",
629        fact.source, fact.target
630    )];
631    Ok(Some(SearchResult {
632        path: file.path.clone(),
633        line_range: fact.range.clone(),
634        snippet,
635        symbol: None,
636        score: 1.35,
637        match_reason: "runtime artifact matched task intent".into(),
638        evidence,
639        evidence_refs: vec![fact.id.clone()],
640        confidence: fact.confidence.score(),
641        score_breakdown: vec![ScoreComponent::single(
642            "runtime_corroboration",
643            1.35,
644            vec![fact.id.clone()],
645            "local runtime trace/log/incident artifact matched the task",
646        )],
647    }))
648}
649
650fn annotate_results_with_runtime(results: &mut [SearchResult], signals: &[RuntimeSignal]) {
651    if signals.is_empty() {
652        return;
653    }
654    for result in results {
655        let result_path = normalize_path(&result.path);
656        let searchable = format!(
657            "{} {} {}",
658            result.snippet,
659            result.match_reason,
660            result.evidence.join(" ")
661        )
662        .to_ascii_lowercase();
663        let matched = signals
664            .iter()
665            .filter(|signal| {
666                signal
667                    .file_range
668                    .as_ref()
669                    .map(|range| normalize_path(&range.path) == result_path)
670                    .unwrap_or(false)
671                    || runtime_message_tokens(&signal.message)
672                        .iter()
673                        .any(|token| searchable.contains(token))
674            })
675            .take(3)
676            .collect::<Vec<_>>();
677        if matched.is_empty() {
678            continue;
679        }
680        let evidence_ids = matched
681            .iter()
682            .map(|signal| signal.id.clone())
683            .collect::<Vec<_>>();
684        let labels = matched
685            .iter()
686            .map(|signal| signal.kind.as_str())
687            .collect::<Vec<_>>()
688            .join(", ");
689        for signal in &matched {
690            let evidence = format!(
691                "runtime corroboration `{}`: {}",
692                signal.kind, signal.message
693            );
694            if !result.evidence.contains(&evidence) {
695                result.evidence.push(evidence);
696            }
697        }
698        for id in &evidence_ids {
699            if !result.evidence_refs.contains(id) {
700                result.evidence_refs.push(id.clone());
701            }
702        }
703        result.score += 0.15 * matched.len() as f32;
704        result.confidence = result.confidence.max(0.75);
705        result.score_breakdown.push(ScoreComponent::adjustment(
706            "runtime_corroboration",
707            0.15 * matched.len() as f32,
708            evidence_ids,
709            format!("local runtime artifact corroborates this context result: {labels}"),
710        ));
711    }
712}
713
714fn runtime_signal_from_fact(fact: &AnalysisFact, file: &File) -> RuntimeSignal {
715    RuntimeSignal {
716        id: fact.id.clone(),
717        kind: runtime_kind(fact),
718        message: format!("{}: {}", fact.message, fact.target),
719        file_range: Some(FileRange {
720            path: file.path.clone(),
721            line_range: fact.range.clone(),
722        }),
723        occurred_at: None,
724        confidence: fact.confidence,
725    }
726}
727
728fn runtime_signal_evidence(signal: &RuntimeSignal) -> Evidence {
729    Evidence {
730        id: EvidenceId::new(signal.id.clone()),
731        source: "open-kioku-runtime".into(),
732        source_type: EvidenceSourceType::Runtime,
733        file_range: signal.file_range.clone(),
734        symbol_id: None,
735        confidence: signal.confidence,
736        message: signal.message.clone(),
737        indexed_at: Utc::now(),
738    }
739}
740
741fn annotate_results_with_git_history(
742    store: &dyn OkStore,
743    results: &mut [SearchResult],
744) -> Result<()> {
745    if results.is_empty() {
746        return Ok(());
747    }
748    let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
749    if facts.is_empty() {
750        return Ok(());
751    }
752    let files = store.list_files(usize::MAX, 0)?;
753    let files_by_path = files
754        .into_iter()
755        .map(|file| (normalize_path(&file.path), file))
756        .collect::<std::collections::HashMap<_, _>>();
757    for result in results {
758        let Some(file) = files_by_path.get(&normalize_path(&result.path)) else {
759            continue;
760        };
761        let matched = facts
762            .iter()
763            .filter(|fact| fact.file_id == file.id)
764            .take(3)
765            .collect::<Vec<_>>();
766        if matched.is_empty() {
767            continue;
768        }
769        let evidence_ids = matched
770            .iter()
771            .map(|fact| fact.id.clone())
772            .collect::<Vec<_>>();
773        let labels = matched
774            .iter()
775            .map(|fact| fact.target.as_str())
776            .collect::<Vec<_>>()
777            .join(", ");
778        for fact in &matched {
779            let evidence = format!(
780                "git co-change from local history: `{}` ({})",
781                fact.target, fact.message
782            );
783            if !result.evidence.contains(&evidence) {
784                result.evidence.push(evidence);
785            }
786        }
787        for id in &evidence_ids {
788            if !result.evidence_refs.contains(id) {
789                result.evidence_refs.push(id.clone());
790            }
791        }
792        result.score += 0.12 * matched.len() as f32;
793        result.confidence = result.confidence.max(0.70);
794        result.score_breakdown.push(ScoreComponent::adjustment(
795            "git_cochange",
796            0.12 * matched.len() as f32,
797            evidence_ids,
798            format!("local git history says this file co-changed with: {labels}"),
799        ));
800    }
801    Ok(())
802}
803
804fn git_history_evidence_for_results(
805    store: &dyn OkStore,
806    results: &[SearchResult],
807) -> Result<Vec<Evidence>> {
808    if results.is_empty() {
809        return Ok(Vec::new());
810    }
811    let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
812    if facts.is_empty() {
813        return Ok(Vec::new());
814    }
815    let files = store.list_files(usize::MAX, 0)?;
816    let paths_by_id = files
817        .into_iter()
818        .map(|file| (file.id, file.path))
819        .collect::<std::collections::HashMap<_, _>>();
820    let selected_paths = results
821        .iter()
822        .map(|result| normalize_path(&result.path))
823        .collect::<std::collections::HashSet<_>>();
824    let mut evidence = Vec::new();
825    for fact in facts {
826        let Some(path) = paths_by_id.get(&fact.file_id) else {
827            continue;
828        };
829        if !selected_paths.contains(&normalize_path(path)) {
830            continue;
831        }
832        evidence.push(Evidence {
833            id: EvidenceId::new(fact.id.clone()),
834            source: fact.source.clone(),
835            source_type: EvidenceSourceType::GitHistory,
836            file_range: Some(FileRange {
837                path: path.clone(),
838                line_range: None,
839            }),
840            symbol_id: None,
841            confidence: fact.confidence,
842            message: format!("{}: {}", fact.message, fact.target),
843            indexed_at: Utc::now(),
844        });
845        if evidence.len() >= 20 {
846            break;
847        }
848    }
849    Ok(evidence)
850}
851
852fn runtime_kind(fact: &AnalysisFact) -> String {
853    match (&fact.target_kind, &fact.edge_type) {
854        (GraphNodeType::Endpoint, GraphEdgeType::ExposesEndpoint) => "endpoint".into(),
855        (GraphNodeType::DatabaseTable, GraphEdgeType::ReadsTable) => "sql_read".into(),
856        (GraphNodeType::DatabaseTable, GraphEdgeType::WritesTable) => "sql_write".into(),
857        (GraphNodeType::RuntimeError, _) => "incident".into(),
858        (_, edge) => format!("{edge:?}").to_ascii_lowercase(),
859    }
860}
861
862fn runtime_fact_matches_query(fact: &AnalysisFact, searchable_context: &str) -> bool {
863    runtime_message_tokens(&fact.target)
864        .iter()
865        .any(|token| searchable_context.contains(token))
866        || runtime_message_tokens(&fact.message)
867            .iter()
868            .any(|token| searchable_context.contains(token))
869}
870
871fn runtime_message_tokens(value: &str) -> Vec<String> {
872    value
873        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '/' || ch == '.'))
874        .map(|token| token.trim_matches('/').to_ascii_lowercase())
875        .filter(|token| token.len() >= 4)
876        .take(8)
877        .collect()
878}
879
880fn normalize_path(path: &std::path::Path) -> String {
881    path.to_string_lossy()
882        .replace('\\', "/")
883        .trim_start_matches("./")
884        .to_string()
885}
886
887fn negative_evidence_count(risk: &RiskReport) -> usize {
888    risk.reasons
889        .iter()
890        .filter(|reason| {
891            let lower = reason.to_ascii_lowercase();
892            lower.contains("low confidence")
893                || lower.contains("no matching")
894                || lower.contains("missing")
895                || lower.contains("absent")
896                || lower.contains("unavailable")
897                || lower.contains("weak")
898                || lower.contains("unknown")
899        })
900        .count()
901}
902
903fn docs_or_tests_only(results: &[SearchResult]) -> bool {
904    !results.is_empty()
905        && results
906            .iter()
907            .all(|result| is_docs_or_test_path(&result.path.to_string_lossy()))
908}
909
910fn is_docs_or_test_path(path: &str) -> bool {
911    let path = path.to_ascii_lowercase();
912    path.starts_with("docs/")
913        || path.starts_with("test/")
914        || path.starts_with("tests/")
915        || path.contains("/docs/")
916        || path.ends_with(".md")
917        || path.ends_with(".mdx")
918        || path.contains("/test/")
919        || path.contains("/tests/")
920        || path.contains("_test.")
921        || path.contains("test_")
922}
923
924#[derive(Debug, Clone, Default)]
925struct TaskSearchIntent {
926    primary_anchors: Vec<String>,
927    reference_anchors: Vec<String>,
928    ticket_anchors: Vec<String>,
929    path_anchors: Vec<String>,
930}
931
932impl TaskSearchIntent {
933    fn parse(task: &str) -> Self {
934        let mut intent = Self::default();
935        let lower = task.to_ascii_lowercase();
936        let reference_start = reference_marker_start(&lower).unwrap_or(task.len());
937        let edit_side = task.get(..reference_start).unwrap_or(task);
938        let reference_side = task.get(reference_start..).unwrap_or_default();
939        let all_identifiers = identifiers(task);
940
941        intent.primary_anchors = identifiers(edit_side);
942        intent.reference_anchors = identifiers(reference_side);
943        if intent.primary_anchors.is_empty() {
944            if let Some(first) = all_identifiers.first() {
945                intent.primary_anchors.push(first.clone());
946            }
947        }
948        for value in all_identifiers {
949            if !intent.primary_anchors.contains(&value)
950                && !intent.reference_anchors.contains(&value)
951            {
952                intent.reference_anchors.push(value);
953            }
954        }
955
956        for token in task.split_whitespace() {
957            let cleaned = token.trim_matches(|ch: char| {
958                !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/' || ch == '.')
959            });
960            if is_ticket_id(cleaned) && !intent.ticket_anchors.iter().any(|v| v == cleaned) {
961                intent.ticket_anchors.push(cleaned.to_string());
962            }
963            if is_path_like(cleaned) {
964                let normalized = cleaned.trim_matches('/');
965                if !normalized.is_empty() && !intent.path_anchors.iter().any(|v| v == normalized) {
966                    intent.path_anchors.push(normalized.to_string());
967                }
968            }
969        }
970
971        intent
972    }
973
974    fn search_terms(&self, task: &str) -> Vec<String> {
975        let mut terms = vec![task.to_string()];
976        for term in self
977            .ticket_anchors
978            .iter()
979            .chain(self.path_anchors.iter())
980            .chain(self.primary_anchors.iter())
981            .chain(self.reference_anchors.iter())
982        {
983            if term.len() >= 3 && !terms.iter().any(|existing| existing == term) {
984                terms.push(term.clone());
985            }
986        }
987        terms
988    }
989}
990
991fn search_candidates(
992    chunks: &[CodeChunk],
993    files: &[File],
994    symbols: &[Symbol],
995    task: &str,
996    limit: usize,
997    intent: &TaskSearchIntent,
998) -> Result<Vec<SearchResult>> {
999    let mut merged = std::collections::BTreeMap::<String, SearchResult>::new();
1000    let per_anchor_limit = limit.clamp(8, 40);
1001    for term in intent.search_terms(task) {
1002        for mut result in search_chunks(chunks, files, symbols, &term, per_anchor_limit)? {
1003            if term != task {
1004                result
1005                    .evidence
1006                    .push(format!("task anchor `{term}` matched"));
1007                result.match_reason = format!("{}; task anchor `{term}`", result.match_reason);
1008            }
1009            let key = result_key(&result);
1010            match merged.get_mut(&key) {
1011                Some(existing) => {
1012                    if result.score > existing.score {
1013                        existing.score = result.score;
1014                        existing.snippet = result.snippet;
1015                        existing.line_range = result.line_range;
1016                        existing.symbol = result.symbol;
1017                        existing.score_breakdown = result.score_breakdown;
1018                    }
1019                    for evidence in result.evidence {
1020                        if !existing.evidence.contains(&evidence) {
1021                            existing.evidence.push(evidence);
1022                        }
1023                    }
1024                    if !existing.match_reason.contains(&term) {
1025                        existing.match_reason =
1026                            format!("{}; task anchor `{term}`", existing.match_reason);
1027                    }
1028                    existing.reconcile_score_breakdown();
1029                }
1030                None => {
1031                    merged.insert(key, result);
1032                }
1033            }
1034        }
1035    }
1036
1037    Ok(merged.into_values().collect())
1038}
1039
1040fn rerank_for_task(
1041    results: Vec<SearchResult>,
1042    intent: &TaskSearchIntent,
1043    ranking_options: &RankingOptions,
1044) -> Vec<SearchResult> {
1045    let mut results = rerank_with_options(results, ranking_options);
1046    for result in &mut results {
1047        let haystack = searchable_result_text(result);
1048        for anchor in &intent.primary_anchors {
1049            if contains_anchor(&haystack, anchor) {
1050                result.score += 0.65;
1051                result.confidence = result.confidence.max(0.85);
1052                result
1053                    .evidence
1054                    .push(format!("primary task anchor `{anchor}` matched"));
1055                result.add_score_component(ScoreComponent::adjustment(
1056                    "primary_task_anchor_boost",
1057                    0.65,
1058                    result.derived_evidence_ids(),
1059                    format!("primary task anchor `{anchor}` matched result text"),
1060                ));
1061            }
1062        }
1063        for anchor in &intent.reference_anchors {
1064            if contains_anchor(&haystack, anchor) {
1065                result.score += 0.25;
1066                result.confidence = result.confidence.max(0.65);
1067                result
1068                    .evidence
1069                    .push(format!("reference task anchor `{anchor}` matched"));
1070                result.add_score_component(ScoreComponent::adjustment(
1071                    "reference_task_anchor_boost",
1072                    0.25,
1073                    result.derived_evidence_ids(),
1074                    format!("reference task anchor `{anchor}` matched result text"),
1075                ));
1076            }
1077        }
1078        for anchor in intent
1079            .ticket_anchors
1080            .iter()
1081            .chain(intent.path_anchors.iter())
1082        {
1083            if contains_anchor(&haystack, anchor) {
1084                result.score += 0.35;
1085                result.confidence = result.confidence.max(0.75);
1086                result
1087                    .evidence
1088                    .push(format!("ticket/path task anchor `{anchor}` matched"));
1089                result.add_score_component(ScoreComponent::adjustment(
1090                    "ticket_or_path_anchor_boost",
1091                    0.35,
1092                    result.derived_evidence_ids(),
1093                    format!("ticket/path anchor `{anchor}` matched result text"),
1094                ));
1095            }
1096        }
1097        result.reconcile_score_breakdown();
1098    }
1099    results.sort_by(|a, b| {
1100        b.score
1101            .partial_cmp(&a.score)
1102            .unwrap_or(std::cmp::Ordering::Equal)
1103            .then_with(|| a.path.cmp(&b.path))
1104    });
1105    results
1106}
1107
1108fn result_key(result: &SearchResult) -> String {
1109    format!(
1110        "{}:{}-{}",
1111        result.path.display(),
1112        result
1113            .line_range
1114            .as_ref()
1115            .map(|range| range.start)
1116            .unwrap_or_default(),
1117        result
1118            .line_range
1119            .as_ref()
1120            .map(|range| range.end)
1121            .unwrap_or_default()
1122    )
1123}
1124
1125fn searchable_result_text(result: &SearchResult) -> String {
1126    format!(
1127        "{} {} {} {}",
1128        result.path.display(),
1129        result.snippet,
1130        result
1131            .symbol
1132            .as_ref()
1133            .map(|symbol| symbol.qualified_name.as_str())
1134            .unwrap_or_default(),
1135        result
1136            .symbol
1137            .as_ref()
1138            .map(|symbol| symbol.name.as_str())
1139            .unwrap_or_default()
1140    )
1141    .to_ascii_lowercase()
1142}
1143
1144fn contains_anchor(haystack: &str, anchor: &str) -> bool {
1145    haystack.contains(&anchor.to_ascii_lowercase())
1146        || haystack.contains(&normalize_identifier(anchor))
1147}
1148
1149fn reference_marker_start(lower: &str) -> Option<usize> {
1150    [
1151        " similar to ",
1152        " like ",
1153        " copy from ",
1154        " copied from ",
1155        " mirror ",
1156        " mirrored from ",
1157        " based on ",
1158        " reference ",
1159    ]
1160    .iter()
1161    .filter_map(|marker| lower.find(marker))
1162    .min()
1163}
1164
1165fn identifiers(value: &str) -> Vec<String> {
1166    let mut out = Vec::new();
1167    for token in value.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')) {
1168        let token = token.trim_matches('-');
1169        if is_named_identifier(token) && !out.iter().any(|existing| existing == token) {
1170            out.push(token.to_string());
1171        }
1172    }
1173    out
1174}
1175
1176fn is_named_identifier(value: &str) -> bool {
1177    if value.len() < 3 || is_ticket_id(value) {
1178        return false;
1179    }
1180    let has_lower = value.chars().any(|ch| ch.is_ascii_lowercase());
1181    let has_upper = value.chars().any(|ch| ch.is_ascii_uppercase());
1182    let has_digit = value.chars().any(|ch| ch.is_ascii_digit());
1183    let has_separator = value.contains('_') || value.contains('-');
1184    (has_lower && has_upper) || has_separator || (has_digit && has_upper)
1185}
1186
1187fn is_ticket_id(value: &str) -> bool {
1188    let Some((prefix, number)) = value.split_once('-') else {
1189        return false;
1190    };
1191    prefix.len() >= 2
1192        && prefix.chars().all(|ch| ch.is_ascii_uppercase())
1193        && number.len() >= 2
1194        && number.chars().all(|ch| ch.is_ascii_digit())
1195}
1196
1197fn is_path_like(value: &str) -> bool {
1198    value.contains('/')
1199        || value.ends_with(".rs")
1200        || value.ends_with(".ts")
1201        || value.ends_with(".tsx")
1202        || value.ends_with(".js")
1203        || value.ends_with(".jsx")
1204        || value.ends_with(".java")
1205        || value.ends_with(".py")
1206        || value.ends_with(".go")
1207        || value.ends_with(".md")
1208}
1209
1210fn normalize_identifier(value: &str) -> String {
1211    let mut out = String::new();
1212    let mut previous_lower_or_digit = false;
1213    for ch in value.chars() {
1214        if ch == '_' || ch == '-' || ch == '/' || ch == '.' {
1215            out.push(' ');
1216            previous_lower_or_digit = false;
1217            continue;
1218        }
1219        if ch.is_ascii_uppercase() && previous_lower_or_digit {
1220            out.push(' ');
1221        }
1222        out.push(ch.to_ascii_lowercase());
1223        previous_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
1224    }
1225    out.split_whitespace().collect::<Vec<_>>().join(" ")
1226}
1227
1228fn classify_intent(task: &str) -> &'static str {
1229    let lower = task.to_ascii_lowercase();
1230    if lower.contains("fix")
1231        || lower.contains("add")
1232        || lower.contains("change")
1233        || lower.contains("implement")
1234    {
1235        "code_change"
1236    } else if lower.contains("test") {
1237        "validation"
1238    } else {
1239        "understanding"
1240    }
1241}
1242
1243fn empty_impact(task: &str) -> open_kioku_core::ImpactReport {
1244    open_kioku_core::ImpactReport {
1245        target: task.into(),
1246        direct_impacts: Vec::new(),
1247        indirect_impacts: Vec::new(),
1248        risk_report: RiskReport {
1249            level: "unknown".into(),
1250            score: 0.0,
1251            reasons: vec!["no matching indexed files found".into()],
1252        },
1253        evidence: vec![Evidence {
1254            id: EvidenceId::new("context:no-match"),
1255            source: "open-kioku-context".into(),
1256            source_type: EvidenceSourceType::Lexical,
1257            file_range: None,
1258            symbol_id: None,
1259            confidence: Confidence::Low,
1260            message: "context pack search did not find indexed evidence".into(),
1261            indexed_at: Utc::now(),
1262        }],
1263        score_breakdown: vec![ScoreComponent::single(
1264            "no_context_found",
1265            0.0,
1266            vec!["context:no-match".into()],
1267            "no indexed context matched the task",
1268        )],
1269    }
1270}
1271
1272fn bounded_impact(task: &str) -> open_kioku_core::ImpactReport {
1273    open_kioku_core::ImpactReport {
1274        target: task.into(),
1275        direct_impacts: Vec::new(),
1276        indirect_impacts: Vec::new(),
1277        risk_report: RiskReport {
1278            level: "low".into(),
1279            score: 0.1,
1280            reasons: vec!["bounded context built from persisted search results".into()],
1281        },
1282        evidence: vec![Evidence {
1283            id: EvidenceId::new("context:bounded-search"),
1284            source: "open-kioku-context".into(),
1285            source_type: EvidenceSourceType::Lexical,
1286            file_range: None,
1287            symbol_id: None,
1288            confidence: Confidence::Medium,
1289            message:
1290                "context pack used persisted search results without full-table impact expansion"
1291                    .into(),
1292            indexed_at: Utc::now(),
1293        }],
1294        score_breakdown: vec![ScoreComponent::single(
1295            "bounded_context_risk",
1296            0.1,
1297            vec!["context:bounded-search".into()],
1298            "bounded context used persisted search results without full impact expansion",
1299        )],
1300    }
1301}
1302
1303#[cfg(test)]
1304mod tests {
1305    use super::*;
1306    use open_kioku_core::{FileId, Language, LineRange, RepositoryId, SymbolId, SymbolKind};
1307    use std::path::Path;
1308
1309    #[test]
1310    fn primary_edit_anchor_outranks_reference_pattern_anchor() {
1311        let repo_id = RepositoryId::new("repo");
1312        let mutation_file = File {
1313            id: FileId::new("mutation"),
1314            repository_id: repo_id.clone(),
1315            path: "src/PublishRestrictionsMutation.java".into(),
1316            language: Language::Java,
1317            size_bytes: 100,
1318            content_hash: "mutation".into(),
1319            is_generated: false,
1320            is_vendor: false,
1321        };
1322        let validator_file = File {
1323            id: FileId::new("validator"),
1324            repository_id: repo_id,
1325            path: "src/EnterpriseRateValidator.java".into(),
1326            language: Language::Java,
1327            size_bytes: 100,
1328            content_hash: "validator".into(),
1329            is_generated: false,
1330            is_vendor: false,
1331        };
1332        let mutation_symbol = Symbol {
1333            id: SymbolId::new("mutation-symbol"),
1334            name: "PublishRestrictionsMutation".into(),
1335            qualified_name: "api.PublishRestrictionsMutation".into(),
1336            kind: SymbolKind::Class,
1337            file_id: mutation_file.id.clone(),
1338            range: Some(LineRange { start: 1, end: 20 }),
1339            language: Language::Java,
1340            confidence: Confidence::High,
1341            provenance: EvidenceSourceType::TreeSitter,
1342        };
1343        let validator_symbol = Symbol {
1344            id: SymbolId::new("validator-symbol"),
1345            name: "EnterpriseRateValidator".into(),
1346            qualified_name: "api.EnterpriseRateValidator".into(),
1347            kind: SymbolKind::Class,
1348            file_id: validator_file.id.clone(),
1349            range: Some(LineRange { start: 1, end: 20 }),
1350            language: Language::Java,
1351            confidence: Confidence::High,
1352            provenance: EvidenceSourceType::TreeSitter,
1353        };
1354        let chunks = vec![
1355            CodeChunk {
1356                id: "mutation-chunk".into(),
1357                file_id: mutation_file.id.clone(),
1358                range: LineRange { start: 1, end: 10 },
1359                language: Language::Java,
1360                text: "class PublishRestrictionsMutation { void mutate() {} }".into(),
1361                symbol_id: Some(mutation_symbol.id.clone()),
1362            },
1363            CodeChunk {
1364                id: "validator-chunk".into(),
1365                file_id: validator_file.id.clone(),
1366                range: LineRange { start: 1, end: 10 },
1367                language: Language::Java,
1368                text: "class EnterpriseRateValidator { boolean validate() { return true; } }"
1369                    .into(),
1370                symbol_id: Some(validator_symbol.id.clone()),
1371            },
1372        ];
1373        let files = vec![mutation_file, validator_file];
1374        let symbols = vec![mutation_symbol, validator_symbol];
1375        let task =
1376            "add validation in PublishRestrictionsMutation similar to EnterpriseRateValidator";
1377        let intent = TaskSearchIntent::parse(task);
1378        let results = rerank_for_task(
1379            search_candidates(&chunks, &files, &symbols, task, 10, &intent).unwrap(),
1380            &intent,
1381            &RankingOptions::default(),
1382        );
1383
1384        assert_eq!(
1385            results[0].path,
1386            Path::new("src/PublishRestrictionsMutation.java")
1387        );
1388        assert!(results[0]
1389            .evidence
1390            .iter()
1391            .any(|evidence| evidence.contains("primary task anchor")));
1392    }
1393}
open_kioku_context/lib.rs

open_kioku_context/
lib.rs