use chrono::Utc;
use open_kioku_core::{
AnalysisFact, ChangeBoundary, CodeChunk, Confidence, ConfidenceBreakdown,
ConfidenceSignalInput, ContextPack, Evidence, EvidenceId, EvidenceSourceType, File, FileRange,
GraphEdge, GraphEdgeType, GraphNodeType, NegativeEvidence, RiskReport, RuntimeSignal,
ScoreComponent, SearchResult, Symbol, ValidationPlan,
};
use open_kioku_errors::Result;
use open_kioku_impact::ImpactEngine;
use open_kioku_ranking::{rerank_with_options, RankingOptions};
use open_kioku_search_regex::search_chunks;
use open_kioku_storage::OkStore;
use open_kioku_tests::TestSelector;
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum ContextPackFormat {
Json,
Markdown,
PromptText,
Toon,
}
impl ContextPackFormat {
pub fn render(&self, pack: &ContextPack) -> Result<String> {
match self {
Self::Json => Ok(serde_json::to_string_pretty(pack)?),
Self::Toon => Ok(open_kioku_format::render_context_pack_toon(pack)),
Self::Markdown => {
let mut out = String::new();
out.push_str(&format!("# Task: {}\n\n", pack.task));
out.push_str("## Confidence\n\n");
out.push_str(&format!(
"- Overall: `{:?}` (`{:.2}`)\n",
pack.confidence_breakdown.overall_enum, pack.confidence_breakdown.overall_score
));
write_markdown_confidence_breakdown(&mut out, &pack.confidence_breakdown);
out.push('\n');
out.push_str("## Primary Context\n\n");
for result in &pack.primary_files {
out.push_str(&format!("### {}\n", result.path.display()));
if let Some(range) = &result.line_range {
out.push_str(&format!("Lines {}-{}\n", range.start, range.end));
}
out.push_str("```\n");
out.push_str(&result.snippet);
out.push_str("\n```\n\n");
}
out.push_str("## Supporting Impact\n\n");
for result in &pack.supporting_files {
out.push_str(&format!("- {}\n", result.path.display()));
}
out.push_str("\n## Runtime Signals\n\n");
if pack.runtime_signals.is_empty() {
out.push_str("- None found\n");
} else {
for signal in &pack.runtime_signals {
let location = signal
.file_range
.as_ref()
.map(|range| {
let lines = range
.line_range
.as_ref()
.map(|line_range| {
format!(":{}-{}", line_range.start, line_range.end)
})
.unwrap_or_default();
format!("{}{}", range.path.display(), lines)
})
.unwrap_or_else(|| "unknown location".into());
out.push_str(&format!(
"- `{}` at `{}` ({:?})\n",
signal.message, location, signal.confidence
));
}
}
out.push_str("\n## Validation Plan\n\n");
for test in &pack.validation_plan.tests {
out.push_str(&format!("- {}\n", test.name));
}
Ok(out)
}
Self::PromptText => {
let mut out = String::new();
out.push_str(&format!("TASK: {}\n", pack.task));
for result in &pack.primary_files {
out.push_str(&format!("[FILE: {}]\n", result.path.display()));
if let Some(range) = &result.line_range {
out.push_str(&format!("SYM: lines {}-{}\n", range.start, range.end));
}
out.push_str(&result.snippet);
out.push_str("\n[END FILE]\n");
}
for result in &pack.supporting_files {
out.push_str(&format!("IMPACT: {}\n", result.path.display()));
}
for test in &pack.validation_plan.tests {
out.push_str(&format!("TEST: {}\n", test.name));
}
Ok(out)
}
}
}
}
fn write_markdown_confidence_breakdown(out: &mut String, breakdown: &ConfidenceBreakdown) {
if !breakdown.blockers.is_empty() {
out.push_str("- Blockers:\n");
for blocker in &breakdown.blockers {
out.push_str(&format!(" - {blocker}\n"));
}
}
if !breakdown.caveats.is_empty() {
out.push_str("- Caveats:\n");
for caveat in &breakdown.caveats {
out.push_str(&format!(" - {caveat}\n"));
}
}
out.push_str("- Components:\n");
for component in &breakdown.components {
out.push_str(&format!(
" - `{}` score `{:.2}`, weight `{:.2}`, contribution `{:.2}`\n",
component.signal, component.normalized_value, component.weight, component.contribution
));
}
}
pub struct ContextPackBuilder<'a> {
store: &'a dyn OkStore,
ranking_options: RankingOptions,
}
impl<'a> ContextPackBuilder<'a> {
pub fn new(store: &'a dyn OkStore) -> Self {
Self {
store,
ranking_options: RankingOptions::default(),
}
}
pub fn with_ranking_options(mut self, ranking_options: RankingOptions) -> Self {
self.ranking_options = ranking_options;
self
}
pub fn build(&self, task: &str, limit: usize) -> Result<ContextPack> {
let files = self.store.list_files(usize::MAX, 0)?;
let chunks = self.store.all_chunks()?;
let symbols = self.store.list_symbols(None, usize::MAX, 0)?;
let intent = TaskSearchIntent::parse(task);
let primary = rerank_for_task(
search_candidates(&chunks, &files, &symbols, task, limit, &intent)?,
&intent,
&self.ranking_options,
);
self.build_from_primary_with_impact(task, limit, primary, true)
}
pub fn build_from_primary(
&self,
task: &str,
limit: usize,
primary: Vec<SearchResult>,
) -> Result<ContextPack> {
self.build_from_primary_with_impact(
task,
limit,
rerank_with_options(primary, &self.ranking_options),
false,
)
}
fn build_from_primary_with_impact(
&self,
task: &str,
limit: usize,
primary: Vec<SearchResult>,
expand_impact: bool,
) -> Result<ContextPack> {
let mut primary = primary;
augment_primary_with_runtime(self.store, task, &mut primary, limit)?;
let primary_symbols = primary
.iter()
.filter_map(|result| result.symbol.clone())
.take(10)
.collect::<Vec<_>>();
let mut tests = Vec::new();
let selector = TestSelector::new(self.store as &dyn open_kioku_storage::MetadataStore);
for result in primary.iter().take(3) {
tests.extend(selector.for_changed_path_with_evidence(&result.path, 5)?);
}
tests.truncate(10);
let impact = if expand_impact {
if let Some(first) = primary.first() {
ImpactEngine::new(self.store as &dyn open_kioku_storage::MetadataStore)
.for_file(&first.path)?
} else {
empty_impact(task)
}
} else if primary.is_empty() {
empty_impact(task)
} else {
bounded_impact(task)
};
let mut dependency_edges: Vec<GraphEdge> = Vec::new();
for result in primary.iter().take(5) {
let node_id = format!("file:{}", result.path.display());
if let Ok((_nodes, edges)) = self.store.neighbors(&node_id, 20) {
dependency_edges.extend(edges);
}
}
dependency_edges.sort_by(|a, b| a.id.0.cmp(&b.id.0));
dependency_edges.dedup_by(|a, b| a.id == b.id);
dependency_edges.truncate(50);
let mut primary_files = primary.iter().take(limit).cloned().collect::<Vec<_>>();
let mut supporting_files = impact
.direct_impacts
.iter()
.take(10)
.cloned()
.collect::<Vec<_>>();
let runtime_signals =
runtime_signals_for_context(self.store, task, &primary_files, &supporting_files, 12)?;
annotate_results_with_runtime(&mut primary_files, &runtime_signals);
annotate_results_with_runtime(&mut supporting_files, &runtime_signals);
annotate_results_with_git_history(self.store, &mut primary_files)?;
annotate_results_with_git_history(self.store, &mut supporting_files)?;
let runtime_evidence = runtime_signals
.iter()
.map(runtime_signal_evidence)
.collect::<Vec<_>>();
let git_evidence = git_history_evidence_for_results(self.store, &primary_files)?;
let evidence = primary_files
.iter()
.take(20)
.flat_map(|result| {
result.evidence.iter().map(|msg| Evidence {
id: EvidenceId::new(format!("context:{}", result.path.display())),
source: "open-kioku-search".into(),
source_type: EvidenceSourceType::Lexical,
file_range: result
.line_range
.clone()
.map(|lr| open_kioku_core::FileRange {
path: result.path.clone(),
line_range: Some(lr),
}),
symbol_id: result.symbol.as_ref().map(|s| s.id.clone()),
confidence: Confidence::Medium,
message: msg.clone(),
indexed_at: Utc::now(),
})
})
.chain(impact.evidence.clone())
.chain(runtime_evidence.clone())
.chain(git_evidence)
.collect::<Vec<_>>();
let allowed_files = primary
.iter()
.take(8)
.map(|result| result.path.clone())
.collect::<Vec<_>>();
let confidence_breakdown = confidence_for_context(
&primary_files,
&supporting_files,
&tests,
&impact.risk_report,
allowed_files.len(),
evidence.len(),
runtime_signals.len(),
);
let negative_evidence = negative_evidence_for_context(
task,
&primary_files,
&supporting_files,
&tests,
&impact.risk_report,
&runtime_signals,
);
let boundary_evidence_refs = primary_files
.iter()
.flat_map(|result| result.derived_evidence_ids())
.collect::<Vec<_>>();
let confidence_summary = confidence_summary(&confidence_breakdown);
Ok(ContextPack {
task: task.into(),
intent: classify_intent(task).into(),
primary_files,
primary_symbols,
supporting_files,
dependency_edges,
runtime_signals,
test_candidates: tests.clone(),
risk_report: impact.risk_report,
recommended_change_boundary: ChangeBoundary {
allowed_files,
caution_files: impact
.direct_impacts
.iter()
.take(8)
.map(|result| result.path.clone())
.collect(),
forbidden_files: Vec::new(),
evidence_refs: boundary_evidence_refs,
..Default::default()
},
validation_plan: ValidationPlan {
commands: tests
.iter()
.filter_map(|test| test.command.clone())
.collect(),
tests,
requires_approval: true,
evidence: evidence.clone(),
},
evidence,
negative_evidence,
confidence_summary,
confidence_breakdown,
})
}
}
fn negative_evidence_for_context(
task: &str,
primary_files: &[SearchResult],
supporting_files: &[SearchResult],
tests: &[open_kioku_core::TestTarget],
risk: &RiskReport,
runtime_signals: &[RuntimeSignal],
) -> Vec<NegativeEvidence> {
let mut items = Vec::new();
if primary_files.is_empty() {
items.push(NegativeEvidence {
query: task.into(),
scope: "primary_context".into(),
inspected_sources: vec!["lexical_search".into(), "ranking_fusion".into()],
reason: "no primary context matched the task".into(),
confidence: 0.95,
suggested_next_probe: Some("Run `ok search <task> --explain-ranking` with named symbols or paths from the ticket.".into()),
});
}
if exact_reference_count(primary_files, supporting_files) == 0 {
items.push(NegativeEvidence {
query: task.into(),
scope: "exact_references".into(),
inspected_sources: vec![
"search_result.evidence".into(),
"search_result.match_reason".into(),
],
reason: "no explicit exact symbol reference or SCIP evidence was found".into(),
confidence: 0.85,
suggested_next_probe: Some(
"Run `ok scip setup .` and re-index with `ok index . --with-scip auto`.".into(),
),
});
}
if tests.is_empty() {
items.push(NegativeEvidence {
query: task.into(),
scope: "validation".into(),
inspected_sources: vec!["indexed_tests".into(), "test_selector".into()],
reason: "no nearby validation target was selected".into(),
confidence: 0.80,
suggested_next_probe: primary_files.first().map(|result| {
format!(
"Run `ok tests {}` to inspect validation candidates for the top file.",
result.path.display()
)
}),
});
}
if runtime_signals.is_empty() && runtime_signal_count(primary_files, supporting_files) == 0 {
items.push(NegativeEvidence {
query: task.into(),
scope: "runtime".into(),
inspected_sources: vec!["runtime_signals".into(), "search_result.evidence".into()],
reason:
"no runtime trace, incident, or error artifact corroborated the selected context"
.into(),
confidence: 0.75,
suggested_next_probe: Some(
"Import or configure runtime artifacts, then rerun `ok plan`.".into(),
),
});
}
if docs_or_tests_only(primary_files) {
items.push(NegativeEvidence {
query: task.into(),
scope: "boundary".into(),
inspected_sources: vec!["primary_context.paths".into()],
reason: "task anchors only matched docs or test fixtures, not source edit targets"
.into(),
confidence: 0.90,
suggested_next_probe: Some(
"Search for the production symbol or source path named by the ticket.".into(),
),
});
}
for reason in &risk.reasons {
let lower = reason.to_ascii_lowercase();
if lower.contains("low confidence") || lower.contains("no matching") {
items.push(NegativeEvidence {
query: task.into(),
scope: "risk".into(),
inspected_sources: vec!["risk_report.reasons".into()],
reason: reason.clone(),
confidence: 0.85,
suggested_next_probe: Some(
"Resolve the missing task anchor before editing.".into(),
),
});
}
}
items
}
fn confidence_for_context(
primary_files: &[SearchResult],
supporting_files: &[SearchResult],
tests: &[open_kioku_core::TestTarget],
risk: &RiskReport,
allowed_file_count: usize,
evidence_count: usize,
runtime_signal_count_value: usize,
) -> ConfidenceBreakdown {
ConfidenceBreakdown::from_signals(ConfidenceSignalInput {
primary_file_count: primary_files.len(),
evidence_count,
exact_reference_count: exact_reference_count(primary_files, supporting_files),
validation_count: tests.len(),
validation_with_command_count: tests.iter().filter(|test| test.command.is_some()).count(),
negative_evidence_count: negative_evidence_count(risk),
allowed_file_count,
runtime_signal_count: runtime_signal_count_value
+ runtime_signal_count(primary_files, supporting_files),
})
}
fn confidence_summary(breakdown: &ConfidenceBreakdown) -> String {
let mut parts = vec![format!(
"overall {:?} ({:.2}) from explainable evidence signals",
breakdown.overall_enum, breakdown.overall_score
)];
if let Some(blocker) = breakdown.blockers.first() {
parts.push(format!("blocker: {blocker}"));
}
if let Some(caveat) = breakdown.caveats.first() {
parts.push(format!("caveat: {caveat}"));
}
parts.join("; ")
}
fn exact_reference_count(
primary_files: &[SearchResult],
supporting_files: &[SearchResult],
) -> usize {
primary_files
.iter()
.chain(supporting_files.iter())
.filter(|result| has_exact_reference_signal(result))
.count()
}
fn has_exact_reference_signal(result: &SearchResult) -> bool {
result
.evidence
.iter()
.any(|evidence| contains_exact_reference(evidence))
|| contains_exact_reference(&result.match_reason)
}
fn contains_exact_reference(value: &str) -> bool {
let lower = value.to_ascii_lowercase();
lower.contains("exact reference")
|| lower.contains("exact symbol reference")
|| lower.contains("scip")
}
fn runtime_signal_count(
primary_files: &[SearchResult],
supporting_files: &[SearchResult],
) -> usize {
primary_files
.iter()
.chain(supporting_files.iter())
.filter(|result| {
result.score_breakdown.iter().any(|component| {
component.signal == "runtime_corroboration" && component.contribution > 0.0
}) || result
.evidence
.iter()
.any(|evidence| evidence.to_ascii_lowercase().contains("runtime"))
})
.count()
}
fn runtime_signals_for_context(
store: &dyn OkStore,
task: &str,
primary_files: &[SearchResult],
supporting_files: &[SearchResult],
limit: usize,
) -> Result<Vec<RuntimeSignal>> {
let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
if facts.is_empty() {
return Ok(Vec::new());
}
let files = store.list_files(usize::MAX, 0)?;
let files_by_id = files
.into_iter()
.map(|file| (file.id.clone(), file))
.collect::<std::collections::HashMap<_, _>>();
let selected_paths = primary_files
.iter()
.chain(supporting_files.iter())
.map(|result| normalize_path(&result.path))
.collect::<std::collections::HashSet<_>>();
let searchable_context = primary_files
.iter()
.chain(supporting_files.iter())
.flat_map(|result| {
[
result.path.display().to_string(),
result.snippet.clone(),
result.match_reason.clone(),
result.evidence.join(" "),
]
})
.chain(std::iter::once(task.to_string()))
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase();
let mut signals = facts
.into_iter()
.filter_map(|fact| {
let file = files_by_id.get(&fact.file_id)?;
if selected_paths.contains(&normalize_path(&file.path))
|| runtime_fact_matches_query(&fact, &searchable_context)
{
Some(runtime_signal_from_fact(&fact, file))
} else {
None
}
})
.collect::<Vec<_>>();
signals.sort_by(|a, b| a.id.cmp(&b.id));
signals.dedup_by(|a, b| a.id == b.id);
signals.truncate(limit);
Ok(signals)
}
fn augment_primary_with_runtime(
store: &dyn OkStore,
task: &str,
primary: &mut Vec<SearchResult>,
limit: usize,
) -> Result<()> {
let facts = store.analysis_facts(Some(EvidenceSourceType::Runtime), 500)?;
if facts.is_empty() {
return Ok(());
}
let task = task.to_ascii_lowercase();
let files = store.list_files(usize::MAX, 0)?;
let files_by_id = files
.into_iter()
.map(|file| (file.id.clone(), file))
.collect::<std::collections::HashMap<_, _>>();
let mut existing_paths = primary
.iter()
.map(|result| normalize_path(&result.path))
.collect::<std::collections::HashSet<_>>();
let mut additions = Vec::new();
for fact in facts
.into_iter()
.filter(|fact| runtime_fact_matches_query(fact, &task))
{
let Some(file) = files_by_id.get(&fact.file_id) else {
continue;
};
let normalized_path = normalize_path(&file.path);
if !existing_paths.insert(normalized_path) {
continue;
}
if let Some(result) = runtime_seed_result(store, file, &fact)? {
additions.push(result);
}
if additions.len() >= limit {
break;
}
}
primary.extend(additions);
primary.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.path.cmp(&b.path))
});
primary.truncate(limit.max(1));
Ok(())
}
fn runtime_seed_result(
store: &dyn OkStore,
file: &File,
fact: &AnalysisFact,
) -> Result<Option<SearchResult>> {
let chunks = store.chunks_for_file(&file.id)?;
let snippet = chunks
.iter()
.find(|chunk| {
fact.range
.as_ref()
.map(|range| chunk.range.start <= range.start && range.start <= chunk.range.end)
.unwrap_or(false)
})
.or_else(|| chunks.first())
.map(|chunk| chunk.text.clone())
.unwrap_or_else(|| fact.target.clone());
let evidence = vec![format!(
"runtime corroboration from local artifact `{}` targeting `{}`",
fact.source, fact.target
)];
Ok(Some(SearchResult {
path: file.path.clone(),
line_range: fact.range.clone(),
snippet,
symbol: None,
score: 1.35,
match_reason: "runtime artifact matched task intent".into(),
evidence,
evidence_refs: vec![fact.id.clone()],
confidence: fact.confidence.score(),
score_breakdown: vec![ScoreComponent::single(
"runtime_corroboration",
1.35,
vec![fact.id.clone()],
"local runtime trace/log/incident artifact matched the task",
)],
}))
}
fn annotate_results_with_runtime(results: &mut [SearchResult], signals: &[RuntimeSignal]) {
if signals.is_empty() {
return;
}
for result in results {
let result_path = normalize_path(&result.path);
let searchable = format!(
"{} {} {}",
result.snippet,
result.match_reason,
result.evidence.join(" ")
)
.to_ascii_lowercase();
let matched = signals
.iter()
.filter(|signal| {
signal
.file_range
.as_ref()
.map(|range| normalize_path(&range.path) == result_path)
.unwrap_or(false)
|| runtime_message_tokens(&signal.message)
.iter()
.any(|token| searchable.contains(token))
})
.take(3)
.collect::<Vec<_>>();
if matched.is_empty() {
continue;
}
let evidence_ids = matched
.iter()
.map(|signal| signal.id.clone())
.collect::<Vec<_>>();
let labels = matched
.iter()
.map(|signal| signal.kind.as_str())
.collect::<Vec<_>>()
.join(", ");
for signal in &matched {
let evidence = format!(
"runtime corroboration `{}`: {}",
signal.kind, signal.message
);
if !result.evidence.contains(&evidence) {
result.evidence.push(evidence);
}
}
for id in &evidence_ids {
if !result.evidence_refs.contains(id) {
result.evidence_refs.push(id.clone());
}
}
result.score += 0.15 * matched.len() as f32;
result.confidence = result.confidence.max(0.75);
result.score_breakdown.push(ScoreComponent::adjustment(
"runtime_corroboration",
0.15 * matched.len() as f32,
evidence_ids,
format!("local runtime artifact corroborates this context result: {labels}"),
));
}
}
fn runtime_signal_from_fact(fact: &AnalysisFact, file: &File) -> RuntimeSignal {
RuntimeSignal {
id: fact.id.clone(),
kind: runtime_kind(fact),
message: format!("{}: {}", fact.message, fact.target),
file_range: Some(FileRange {
path: file.path.clone(),
line_range: fact.range.clone(),
}),
occurred_at: None,
confidence: fact.confidence,
}
}
fn runtime_signal_evidence(signal: &RuntimeSignal) -> Evidence {
Evidence {
id: EvidenceId::new(signal.id.clone()),
source: "open-kioku-runtime".into(),
source_type: EvidenceSourceType::Runtime,
file_range: signal.file_range.clone(),
symbol_id: None,
confidence: signal.confidence,
message: signal.message.clone(),
indexed_at: Utc::now(),
}
}
fn annotate_results_with_git_history(
store: &dyn OkStore,
results: &mut [SearchResult],
) -> Result<()> {
if results.is_empty() {
return Ok(());
}
let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
if facts.is_empty() {
return Ok(());
}
let files = store.list_files(usize::MAX, 0)?;
let files_by_path = files
.into_iter()
.map(|file| (normalize_path(&file.path), file))
.collect::<std::collections::HashMap<_, _>>();
for result in results {
let Some(file) = files_by_path.get(&normalize_path(&result.path)) else {
continue;
};
let matched = facts
.iter()
.filter(|fact| fact.file_id == file.id)
.take(3)
.collect::<Vec<_>>();
if matched.is_empty() {
continue;
}
let evidence_ids = matched
.iter()
.map(|fact| fact.id.clone())
.collect::<Vec<_>>();
let labels = matched
.iter()
.map(|fact| fact.target.as_str())
.collect::<Vec<_>>()
.join(", ");
for fact in &matched {
let evidence = format!(
"git co-change from local history: `{}` ({})",
fact.target, fact.message
);
if !result.evidence.contains(&evidence) {
result.evidence.push(evidence);
}
}
for id in &evidence_ids {
if !result.evidence_refs.contains(id) {
result.evidence_refs.push(id.clone());
}
}
result.score += 0.12 * matched.len() as f32;
result.confidence = result.confidence.max(0.70);
result.score_breakdown.push(ScoreComponent::adjustment(
"git_cochange",
0.12 * matched.len() as f32,
evidence_ids,
format!("local git history says this file co-changed with: {labels}"),
));
}
Ok(())
}
fn git_history_evidence_for_results(
store: &dyn OkStore,
results: &[SearchResult],
) -> Result<Vec<Evidence>> {
if results.is_empty() {
return Ok(Vec::new());
}
let facts = store.analysis_facts(Some(EvidenceSourceType::GitHistory), 10_000)?;
if facts.is_empty() {
return Ok(Vec::new());
}
let files = store.list_files(usize::MAX, 0)?;
let paths_by_id = files
.into_iter()
.map(|file| (file.id, file.path))
.collect::<std::collections::HashMap<_, _>>();
let selected_paths = results
.iter()
.map(|result| normalize_path(&result.path))
.collect::<std::collections::HashSet<_>>();
let mut evidence = Vec::new();
for fact in facts {
let Some(path) = paths_by_id.get(&fact.file_id) else {
continue;
};
if !selected_paths.contains(&normalize_path(path)) {
continue;
}
evidence.push(Evidence {
id: EvidenceId::new(fact.id.clone()),
source: fact.source.clone(),
source_type: EvidenceSourceType::GitHistory,
file_range: Some(FileRange {
path: path.clone(),
line_range: None,
}),
symbol_id: None,
confidence: fact.confidence,
message: format!("{}: {}", fact.message, fact.target),
indexed_at: Utc::now(),
});
if evidence.len() >= 20 {
break;
}
}
Ok(evidence)
}
fn runtime_kind(fact: &AnalysisFact) -> String {
match (&fact.target_kind, &fact.edge_type) {
(GraphNodeType::Endpoint, GraphEdgeType::ExposesEndpoint) => "endpoint".into(),
(GraphNodeType::DatabaseTable, GraphEdgeType::ReadsTable) => "sql_read".into(),
(GraphNodeType::DatabaseTable, GraphEdgeType::WritesTable) => "sql_write".into(),
(GraphNodeType::RuntimeError, _) => "incident".into(),
(_, edge) => format!("{edge:?}").to_ascii_lowercase(),
}
}
fn runtime_fact_matches_query(fact: &AnalysisFact, searchable_context: &str) -> bool {
runtime_message_tokens(&fact.target)
.iter()
.any(|token| searchable_context.contains(token))
|| runtime_message_tokens(&fact.message)
.iter()
.any(|token| searchable_context.contains(token))
}
fn runtime_message_tokens(value: &str) -> Vec<String> {
value
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '/' || ch == '.'))
.map(|token| token.trim_matches('/').to_ascii_lowercase())
.filter(|token| token.len() >= 4)
.take(8)
.collect()
}
fn normalize_path(path: &std::path::Path) -> String {
path.to_string_lossy()
.replace('\\', "/")
.trim_start_matches("./")
.to_string()
}
fn negative_evidence_count(risk: &RiskReport) -> usize {
risk.reasons
.iter()
.filter(|reason| {
let lower = reason.to_ascii_lowercase();
lower.contains("low confidence")
|| lower.contains("no matching")
|| lower.contains("missing")
|| lower.contains("absent")
|| lower.contains("unavailable")
|| lower.contains("weak")
|| lower.contains("unknown")
})
.count()
}
fn docs_or_tests_only(results: &[SearchResult]) -> bool {
!results.is_empty()
&& results
.iter()
.all(|result| is_docs_or_test_path(&result.path.to_string_lossy()))
}
fn is_docs_or_test_path(path: &str) -> bool {
let path = path.to_ascii_lowercase();
path.starts_with("docs/")
|| path.starts_with("test/")
|| path.starts_with("tests/")
|| path.contains("/docs/")
|| path.ends_with(".md")
|| path.ends_with(".mdx")
|| path.contains("/test/")
|| path.contains("/tests/")
|| path.contains("_test.")
|| path.contains("test_")
}
#[derive(Debug, Clone, Default)]
struct TaskSearchIntent {
primary_anchors: Vec<String>,
reference_anchors: Vec<String>,
ticket_anchors: Vec<String>,
path_anchors: Vec<String>,
}
impl TaskSearchIntent {
fn parse(task: &str) -> Self {
let mut intent = Self::default();
let lower = task.to_ascii_lowercase();
let reference_start = reference_marker_start(&lower).unwrap_or(task.len());
let edit_side = task.get(..reference_start).unwrap_or(task);
let reference_side = task.get(reference_start..).unwrap_or_default();
let all_identifiers = identifiers(task);
intent.primary_anchors = identifiers(edit_side);
intent.reference_anchors = identifiers(reference_side);
if intent.primary_anchors.is_empty() {
if let Some(first) = all_identifiers.first() {
intent.primary_anchors.push(first.clone());
}
}
for value in all_identifiers {
if !intent.primary_anchors.contains(&value)
&& !intent.reference_anchors.contains(&value)
{
intent.reference_anchors.push(value);
}
}
for token in task.split_whitespace() {
let cleaned = token.trim_matches(|ch: char| {
!(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/' || ch == '.')
});
if is_ticket_id(cleaned) && !intent.ticket_anchors.iter().any(|v| v == cleaned) {
intent.ticket_anchors.push(cleaned.to_string());
}
if is_path_like(cleaned) {
let normalized = cleaned.trim_matches('/');
if !normalized.is_empty() && !intent.path_anchors.iter().any(|v| v == normalized) {
intent.path_anchors.push(normalized.to_string());
}
}
}
intent
}
fn search_terms(&self, task: &str) -> Vec<String> {
let mut terms = vec![task.to_string()];
for term in self
.ticket_anchors
.iter()
.chain(self.path_anchors.iter())
.chain(self.primary_anchors.iter())
.chain(self.reference_anchors.iter())
{
if term.len() >= 3 && !terms.iter().any(|existing| existing == term) {
terms.push(term.clone());
}
}
terms
}
}
fn search_candidates(
chunks: &[CodeChunk],
files: &[File],
symbols: &[Symbol],
task: &str,
limit: usize,
intent: &TaskSearchIntent,
) -> Result<Vec<SearchResult>> {
let mut merged = std::collections::BTreeMap::<String, SearchResult>::new();
let per_anchor_limit = limit.clamp(8, 40);
for term in intent.search_terms(task) {
for mut result in search_chunks(chunks, files, symbols, &term, per_anchor_limit)? {
if term != task {
result
.evidence
.push(format!("task anchor `{term}` matched"));
result.match_reason = format!("{}; task anchor `{term}`", result.match_reason);
}
let key = result_key(&result);
match merged.get_mut(&key) {
Some(existing) => {
if result.score > existing.score {
existing.score = result.score;
existing.snippet = result.snippet;
existing.line_range = result.line_range;
existing.symbol = result.symbol;
existing.score_breakdown = result.score_breakdown;
}
for evidence in result.evidence {
if !existing.evidence.contains(&evidence) {
existing.evidence.push(evidence);
}
}
if !existing.match_reason.contains(&term) {
existing.match_reason =
format!("{}; task anchor `{term}`", existing.match_reason);
}
existing.reconcile_score_breakdown();
}
None => {
merged.insert(key, result);
}
}
}
}
Ok(merged.into_values().collect())
}
fn rerank_for_task(
results: Vec<SearchResult>,
intent: &TaskSearchIntent,
ranking_options: &RankingOptions,
) -> Vec<SearchResult> {
let mut results = rerank_with_options(results, ranking_options);
for result in &mut results {
let haystack = searchable_result_text(result);
for anchor in &intent.primary_anchors {
if contains_anchor(&haystack, anchor) {
result.score += 0.65;
result.confidence = result.confidence.max(0.85);
result
.evidence
.push(format!("primary task anchor `{anchor}` matched"));
result.add_score_component(ScoreComponent::adjustment(
"primary_task_anchor_boost",
0.65,
result.derived_evidence_ids(),
format!("primary task anchor `{anchor}` matched result text"),
));
}
}
for anchor in &intent.reference_anchors {
if contains_anchor(&haystack, anchor) {
result.score += 0.25;
result.confidence = result.confidence.max(0.65);
result
.evidence
.push(format!("reference task anchor `{anchor}` matched"));
result.add_score_component(ScoreComponent::adjustment(
"reference_task_anchor_boost",
0.25,
result.derived_evidence_ids(),
format!("reference task anchor `{anchor}` matched result text"),
));
}
}
for anchor in intent
.ticket_anchors
.iter()
.chain(intent.path_anchors.iter())
{
if contains_anchor(&haystack, anchor) {
result.score += 0.35;
result.confidence = result.confidence.max(0.75);
result
.evidence
.push(format!("ticket/path task anchor `{anchor}` matched"));
result.add_score_component(ScoreComponent::adjustment(
"ticket_or_path_anchor_boost",
0.35,
result.derived_evidence_ids(),
format!("ticket/path anchor `{anchor}` matched result text"),
));
}
}
result.reconcile_score_breakdown();
}
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.path.cmp(&b.path))
});
results
}
fn result_key(result: &SearchResult) -> String {
format!(
"{}:{}-{}",
result.path.display(),
result
.line_range
.as_ref()
.map(|range| range.start)
.unwrap_or_default(),
result
.line_range
.as_ref()
.map(|range| range.end)
.unwrap_or_default()
)
}
fn searchable_result_text(result: &SearchResult) -> String {
format!(
"{} {} {} {}",
result.path.display(),
result.snippet,
result
.symbol
.as_ref()
.map(|symbol| symbol.qualified_name.as_str())
.unwrap_or_default(),
result
.symbol
.as_ref()
.map(|symbol| symbol.name.as_str())
.unwrap_or_default()
)
.to_ascii_lowercase()
}
fn contains_anchor(haystack: &str, anchor: &str) -> bool {
haystack.contains(&anchor.to_ascii_lowercase())
|| haystack.contains(&normalize_identifier(anchor))
}
fn reference_marker_start(lower: &str) -> Option<usize> {
[
" similar to ",
" like ",
" copy from ",
" copied from ",
" mirror ",
" mirrored from ",
" based on ",
" reference ",
]
.iter()
.filter_map(|marker| lower.find(marker))
.min()
}
fn identifiers(value: &str) -> Vec<String> {
let mut out = Vec::new();
for token in value.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')) {
let token = token.trim_matches('-');
if is_named_identifier(token) && !out.iter().any(|existing| existing == token) {
out.push(token.to_string());
}
}
out
}
fn is_named_identifier(value: &str) -> bool {
if value.len() < 3 || is_ticket_id(value) {
return false;
}
let has_lower = value.chars().any(|ch| ch.is_ascii_lowercase());
let has_upper = value.chars().any(|ch| ch.is_ascii_uppercase());
let has_digit = value.chars().any(|ch| ch.is_ascii_digit());
let has_separator = value.contains('_') || value.contains('-');
(has_lower && has_upper) || has_separator || (has_digit && has_upper)
}
fn is_ticket_id(value: &str) -> bool {
let Some((prefix, number)) = value.split_once('-') else {
return false;
};
prefix.len() >= 2
&& prefix.chars().all(|ch| ch.is_ascii_uppercase())
&& number.len() >= 2
&& number.chars().all(|ch| ch.is_ascii_digit())
}
fn is_path_like(value: &str) -> bool {
value.contains('/')
|| value.ends_with(".rs")
|| value.ends_with(".ts")
|| value.ends_with(".tsx")
|| value.ends_with(".js")
|| value.ends_with(".jsx")
|| value.ends_with(".java")
|| value.ends_with(".py")
|| value.ends_with(".go")
|| value.ends_with(".md")
}
fn normalize_identifier(value: &str) -> String {
let mut out = String::new();
let mut previous_lower_or_digit = false;
for ch in value.chars() {
if ch == '_' || ch == '-' || ch == '/' || ch == '.' {
out.push(' ');
previous_lower_or_digit = false;
continue;
}
if ch.is_ascii_uppercase() && previous_lower_or_digit {
out.push(' ');
}
out.push(ch.to_ascii_lowercase());
previous_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
}
out.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn classify_intent(task: &str) -> &'static str {
let lower = task.to_ascii_lowercase();
if lower.contains("fix")
|| lower.contains("add")
|| lower.contains("change")
|| lower.contains("implement")
{
"code_change"
} else if lower.contains("test") {
"validation"
} else {
"understanding"
}
}
fn empty_impact(task: &str) -> open_kioku_core::ImpactReport {
open_kioku_core::ImpactReport {
target: task.into(),
direct_impacts: Vec::new(),
indirect_impacts: Vec::new(),
risk_report: RiskReport {
level: "unknown".into(),
score: 0.0,
reasons: vec!["no matching indexed files found".into()],
},
evidence: vec![Evidence {
id: EvidenceId::new("context:no-match"),
source: "open-kioku-context".into(),
source_type: EvidenceSourceType::Lexical,
file_range: None,
symbol_id: None,
confidence: Confidence::Low,
message: "context pack search did not find indexed evidence".into(),
indexed_at: Utc::now(),
}],
score_breakdown: vec![ScoreComponent::single(
"no_context_found",
0.0,
vec!["context:no-match".into()],
"no indexed context matched the task",
)],
}
}
fn bounded_impact(task: &str) -> open_kioku_core::ImpactReport {
open_kioku_core::ImpactReport {
target: task.into(),
direct_impacts: Vec::new(),
indirect_impacts: Vec::new(),
risk_report: RiskReport {
level: "low".into(),
score: 0.1,
reasons: vec!["bounded context built from persisted search results".into()],
},
evidence: vec![Evidence {
id: EvidenceId::new("context:bounded-search"),
source: "open-kioku-context".into(),
source_type: EvidenceSourceType::Lexical,
file_range: None,
symbol_id: None,
confidence: Confidence::Medium,
message:
"context pack used persisted search results without full-table impact expansion"
.into(),
indexed_at: Utc::now(),
}],
score_breakdown: vec![ScoreComponent::single(
"bounded_context_risk",
0.1,
vec!["context:bounded-search".into()],
"bounded context used persisted search results without full impact expansion",
)],
}
}
#[cfg(test)]
mod tests {
use super::*;
use open_kioku_core::{FileId, Language, LineRange, RepositoryId, SymbolId, SymbolKind};
use std::path::Path;
#[test]
fn primary_edit_anchor_outranks_reference_pattern_anchor() {
let repo_id = RepositoryId::new("repo");
let mutation_file = File {
id: FileId::new("mutation"),
repository_id: repo_id.clone(),
path: "src/PublishRestrictionsMutation.java".into(),
language: Language::Java,
size_bytes: 100,
content_hash: "mutation".into(),
is_generated: false,
is_vendor: false,
};
let validator_file = File {
id: FileId::new("validator"),
repository_id: repo_id,
path: "src/EnterpriseRateValidator.java".into(),
language: Language::Java,
size_bytes: 100,
content_hash: "validator".into(),
is_generated: false,
is_vendor: false,
};
let mutation_symbol = Symbol {
id: SymbolId::new("mutation-symbol"),
name: "PublishRestrictionsMutation".into(),
qualified_name: "api.PublishRestrictionsMutation".into(),
kind: SymbolKind::Class,
file_id: mutation_file.id.clone(),
range: Some(LineRange { start: 1, end: 20 }),
language: Language::Java,
confidence: Confidence::High,
provenance: EvidenceSourceType::TreeSitter,
};
let validator_symbol = Symbol {
id: SymbolId::new("validator-symbol"),
name: "EnterpriseRateValidator".into(),
qualified_name: "api.EnterpriseRateValidator".into(),
kind: SymbolKind::Class,
file_id: validator_file.id.clone(),
range: Some(LineRange { start: 1, end: 20 }),
language: Language::Java,
confidence: Confidence::High,
provenance: EvidenceSourceType::TreeSitter,
};
let chunks = vec![
CodeChunk {
id: "mutation-chunk".into(),
file_id: mutation_file.id.clone(),
range: LineRange { start: 1, end: 10 },
language: Language::Java,
text: "class PublishRestrictionsMutation { void mutate() {} }".into(),
symbol_id: Some(mutation_symbol.id.clone()),
},
CodeChunk {
id: "validator-chunk".into(),
file_id: validator_file.id.clone(),
range: LineRange { start: 1, end: 10 },
language: Language::Java,
text: "class EnterpriseRateValidator { boolean validate() { return true; } }"
.into(),
symbol_id: Some(validator_symbol.id.clone()),
},
];
let files = vec![mutation_file, validator_file];
let symbols = vec![mutation_symbol, validator_symbol];
let task =
"add validation in PublishRestrictionsMutation similar to EnterpriseRateValidator";
let intent = TaskSearchIntent::parse(task);
let results = rerank_for_task(
search_candidates(&chunks, &files, &symbols, task, 10, &intent).unwrap(),
&intent,
&RankingOptions::default(),
);
assert_eq!(
results[0].path,
Path::new("src/PublishRestrictionsMutation.java")
);
assert!(results[0]
.evidence
.iter()
.any(|evidence| evidence.contains("primary task anchor")));
}
}