use std::collections::BTreeSet;
use std::path::PathBuf;
use crate::domain::{
ChannelDiagnostic, ChannelHealth, ChannelHealthStatus, ChannelResult, ChannelStats,
EvidenceAnchor, EvidenceChannel, EvidenceDocumentRef, EvidenceHit, FriggError, FriggResult,
model::TextMatch,
};
use crate::indexer::PhpDeclarationRelation;
use crate::languages::{BladeSourceEvidence, PhpSourceEvidence, SymbolLanguage};
use super::attribution::SearchStageAttribution;
use super::policy::PostSelectionTrace;
#[derive(Debug, Clone)]
pub struct SearchTextQuery {
pub query: String,
pub path_regex: Option<regex::Regex>,
pub limit: usize,
}
#[derive(Debug, Clone, Default)]
pub struct SearchFilters {
pub repository_id: Option<String>,
pub language: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SearchDiagnosticKind {
Walk,
Read,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SearchDiagnostic {
pub repository_id: String,
pub path: Option<String>,
pub kind: SearchDiagnosticKind,
pub message: String,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct SearchExecutionDiagnostics {
pub entries: Vec<SearchDiagnostic>,
}
impl SearchExecutionDiagnostics {
pub fn total_count(&self) -> usize {
self.entries.len()
}
pub fn count_by_kind(&self, kind: SearchDiagnosticKind) -> usize {
self.entries
.iter()
.filter(|diagnostic| diagnostic.kind == kind)
.count()
}
}
#[derive(Debug, Clone, Default)]
pub struct SearchExecutionOutput {
pub total_matches: usize,
pub matches: Vec<TextMatch>,
pub diagnostics: SearchExecutionDiagnostics,
pub lexical_backend: Option<SearchLexicalBackend>,
pub lexical_backend_note: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SearchLexicalBackend {
Native,
Ripgrep,
Mixed,
}
impl SearchLexicalBackend {
pub fn as_str(self) -> &'static str {
match self {
Self::Native => "native",
Self::Ripgrep => "ripgrep",
Self::Mixed => "mixed",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct SearchCandidateFile {
pub(crate) relative_path: String,
pub(crate) absolute_path: PathBuf,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub(crate) struct RepositoryCandidateUniverse {
pub(crate) repository_id: String,
pub(crate) root: PathBuf,
pub(crate) snapshot_id: Option<String>,
pub(crate) candidates: Vec<SearchCandidateFile>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub(crate) struct SearchCandidateUniverse {
pub(crate) repositories: Vec<RepositoryCandidateUniverse>,
pub(crate) diagnostics: SearchExecutionDiagnostics,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub(crate) struct SearchCandidateUniverseBuild {
pub(crate) universe: SearchCandidateUniverse,
pub(crate) repository_count: usize,
pub(crate) candidate_count: usize,
pub(crate) manifest_backed_repository_count: usize,
pub(crate) candidate_intake_elapsed_us: u64,
pub(crate) freshness_validation_elapsed_us: u64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ManifestCandidateFilesBuild {
pub(crate) snapshot_id: String,
pub(crate) candidates: Vec<(String, PathBuf)>,
pub(crate) candidate_intake_elapsed_us: u64,
pub(crate) freshness_validation_elapsed_us: u64,
}
pub type HybridDocumentRef = EvidenceDocumentRef;
pub type HybridChannelHit = EvidenceHit;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HybridChannelWeights {
pub lexical: f32,
pub graph: f32,
pub semantic: f32,
}
impl Default for HybridChannelWeights {
fn default() -> Self {
Self {
lexical: 0.5,
graph: 0.3,
semantic: 0.2,
}
}
}
impl HybridChannelWeights {
pub fn validate(self) -> FriggResult<Self> {
if self.lexical < 0.0 || self.graph < 0.0 || self.semantic < 0.0 {
return Err(FriggError::InvalidInput(
"hybrid channel weights must be >= 0".to_owned(),
));
}
if self.lexical == 0.0 && self.graph == 0.0 && self.semantic == 0.0 {
return Err(FriggError::InvalidInput(
"hybrid channel weights must include at least one non-zero channel".to_owned(),
));
}
Ok(self)
}
}
#[derive(Debug, Clone)]
pub struct SearchHybridQuery {
pub query: String,
pub limit: usize,
pub weights: HybridChannelWeights,
pub semantic: Option<bool>,
}
pub type HybridSemanticStatus = ChannelHealthStatus;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HybridExecutionNote {
pub semantic_requested: bool,
pub semantic_enabled: bool,
pub semantic_status: HybridSemanticStatus,
pub semantic_reason: Option<String>,
pub semantic_candidate_count: usize,
pub semantic_hit_count: usize,
pub semantic_match_count: usize,
pub lexical_only_mode: bool,
pub lexical_backend: Option<SearchLexicalBackend>,
pub lexical_backend_note: Option<String>,
}
impl Default for HybridExecutionNote {
fn default() -> Self {
Self {
semantic_requested: false,
semantic_enabled: false,
semantic_status: HybridSemanticStatus::Disabled,
semantic_reason: None,
semantic_candidate_count: 0,
semantic_hit_count: 0,
semantic_match_count: 0,
lexical_only_mode: true,
lexical_backend: None,
lexical_backend_note: None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SearchHybridExecutionOutput {
pub matches: Vec<HybridRankedEvidence>,
pub ranked_anchors: Vec<HybridRankedEvidence>,
pub(crate) coverage_grouped_pool: Vec<HybridRankedEvidence>,
pub diagnostics: SearchExecutionDiagnostics,
pub channel_results: Vec<ChannelResult>,
pub note: HybridExecutionNote,
pub stage_attribution: Option<SearchStageAttribution>,
pub(crate) post_selection_trace: Option<PostSelectionTrace>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct HybridRankedEvidence {
pub document: HybridDocumentRef,
pub anchor: EvidenceAnchor,
pub excerpt: String,
pub blended_score: f32,
pub lexical_score: f32,
pub witness_score: f32,
pub graph_score: f32,
pub semantic_score: f32,
pub lexical_sources: Vec<String>,
pub witness_sources: Vec<String>,
pub graph_sources: Vec<String>,
pub semantic_sources: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub(crate) struct NormalizedSearchFilters {
pub(crate) repository_id: Option<String>,
pub(crate) language: Option<SymbolLanguage>,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct HybridPathWitnessProjectionCacheKey {
pub(crate) repository_id: String,
pub(crate) root: PathBuf,
pub(crate) snapshot_id: String,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct HybridGraphFileAnalysisCacheKey {
pub(crate) path: PathBuf,
pub(crate) modified_unix_nanos: u128,
pub(crate) size_bytes: u64,
}
#[derive(Debug, Clone, Default)]
pub(crate) struct HybridGraphFileAnalysis {
pub(crate) symbols: Vec<crate::indexer::SymbolDefinition>,
pub(crate) php_declaration_relations: Option<Vec<PhpDeclarationRelation>>,
pub(crate) php_evidence: Option<PhpSourceEvidence>,
pub(crate) blade_evidence: Option<BladeSourceEvidence>,
}
pub(crate) fn search_diagnostics_to_channel_diagnostics(
diagnostics: &SearchExecutionDiagnostics,
) -> Vec<ChannelDiagnostic> {
diagnostics
.entries
.iter()
.map(|entry| ChannelDiagnostic {
code: match entry.kind {
SearchDiagnosticKind::Walk => "walk".to_owned(),
SearchDiagnosticKind::Read => "read".to_owned(),
},
message: entry.message.clone(),
})
.collect()
}
pub(crate) fn empty_channel_result(
channel: EvidenceChannel,
status: ChannelHealthStatus,
reason: Option<String>,
) -> ChannelResult {
ChannelResult::new(
channel,
Vec::new(),
ChannelHealth::new(status, reason),
Vec::new(),
ChannelStats::default(),
)
}
fn channel_result_by_channel(
channel_results: &[ChannelResult],
channel: EvidenceChannel,
) -> Option<&ChannelResult> {
channel_results
.iter()
.find(|result| result.channel == channel)
}
fn hybrid_semantic_status_from_channel_health(status: ChannelHealthStatus) -> HybridSemanticStatus {
match status {
ChannelHealthStatus::Filtered => ChannelHealthStatus::Disabled,
other => other,
}
}
pub(crate) fn hybrid_execution_note_from_channel_results(
query_semantic: Option<bool>,
semantic_runtime_enabled: bool,
channel_results: &[ChannelResult],
) -> HybridExecutionNote {
let semantic = channel_result_by_channel(channel_results, EvidenceChannel::Semantic);
let semantic_requested = query_semantic.unwrap_or(semantic_runtime_enabled);
let semantic_status = semantic
.map(|result| hybrid_semantic_status_from_channel_health(result.health.status))
.unwrap_or(HybridSemanticStatus::Disabled);
let semantic_reason = semantic.and_then(|result| result.health.reason.clone());
let semantic_candidate_count = semantic.map_or(0, |result| result.stats.candidate_count);
let semantic_hit_count = semantic.map_or(0, |result| result.stats.hit_count);
let semantic_match_count = semantic.map_or(0, |result| result.stats.match_count);
let lexical_only_mode =
semantic_status != HybridSemanticStatus::Ok || semantic_match_count == 0;
HybridExecutionNote {
semantic_requested,
semantic_enabled: semantic_match_count > 0,
semantic_status,
semantic_reason,
semantic_candidate_count,
semantic_hit_count,
semantic_match_count,
lexical_only_mode,
lexical_backend: None,
lexical_backend_note: None,
}
}
pub(crate) fn match_count_for_hits(
matches: &[HybridRankedEvidence],
hits: &[HybridChannelHit],
) -> usize {
if matches.is_empty() || hits.is_empty() {
return 0;
}
let matched_documents = matches
.iter()
.map(|entry| (&entry.document.repository_id, &entry.document.path))
.collect::<BTreeSet<_>>();
hits.iter()
.map(|hit| (&hit.document.repository_id, &hit.document.path))
.collect::<BTreeSet<_>>()
.into_iter()
.filter(|document| matched_documents.contains(document))
.count()
}