use std::path::Path;
use crate::chunk::{ChunkConfig, CodeChunk};
pub const DEFAULT_BATCH_SIZE: usize = 32;
#[derive(Debug, Clone)]
pub struct SearchConfig {
pub batch_size: usize,
pub max_tokens: usize,
pub chunk: ChunkConfig,
pub text_mode: bool,
pub cascade_dim: Option<usize>,
pub file_type: Option<String>,
pub exclude_extensions: Vec<String>,
pub include_extensions: Vec<String>,
pub ignore_patterns: Vec<String>,
pub scope: Scope,
pub mode: crate::hybrid::SearchMode,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Scope {
Code,
Docs,
#[default]
All,
}
pub const PROSE_EXTENSIONS: &[&str] = &[
"md", "markdown", "mdx", "rst", "txt", "text", "adoc", "asciidoc", "org",
];
impl SearchConfig {
#[must_use]
pub fn walk_options(&self) -> crate::walk::WalkOptions {
let mut include = self.include_extensions.clone();
let mut exclude = self.exclude_extensions.clone();
if include.is_empty() {
match self.scope {
Scope::Docs => {
include.extend(PROSE_EXTENSIONS.iter().map(|s| (*s).to_string()));
}
Scope::Code => {
for ext in PROSE_EXTENSIONS {
if !exclude.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
exclude.push((*ext).to_string());
}
}
}
Scope::All => {}
}
}
crate::walk::WalkOptions {
file_type: self.file_type.clone(),
include_extensions: include,
exclude_extensions: exclude,
ignore_patterns: self.ignore_patterns.clone(),
}
}
pub fn apply_repo_config(&mut self, root: &Path) {
let Some((_, config)) = crate::cache::config::find_config(root) else {
return;
};
for pattern in config.ignore.patterns {
if !pattern.trim().is_empty() && !self.ignore_patterns.contains(&pattern) {
self.ignore_patterns.push(pattern);
}
}
}
}
impl Default for SearchConfig {
fn default() -> Self {
Self {
batch_size: DEFAULT_BATCH_SIZE,
max_tokens: 0,
chunk: ChunkConfig::default(),
text_mode: false,
cascade_dim: None,
file_type: None,
exclude_extensions: Vec::new(),
include_extensions: Vec::new(),
ignore_patterns: Vec::new(),
scope: Scope::All,
mode: crate::hybrid::SearchMode::Hybrid,
}
}
}
#[derive(Debug, Clone)]
pub struct SearchResult {
pub chunk: CodeChunk,
pub similarity: f32,
}
pub fn apply_structural_boost<S: ::std::hash::BuildHasher>(
results: &mut [SearchResult],
file_ranks: &std::collections::HashMap<String, f32, S>,
alpha: f32,
) {
if results.is_empty() || alpha == 0.0 {
return;
}
let min = results
.iter()
.map(|r| r.similarity)
.fold(f32::INFINITY, f32::min);
let max = results
.iter()
.map(|r| r.similarity)
.fold(f32::NEG_INFINITY, f32::max);
let range = (max - min).max(1e-12);
for r in results.iter_mut() {
let normalized = (r.similarity - min) / range;
let pr = file_ranks.get(&r.chunk.file_path).copied().unwrap_or(0.0);
r.similarity = normalized + alpha * pr;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_result(file_path: &str, similarity: f32) -> SearchResult {
SearchResult {
chunk: CodeChunk {
file_path: file_path.to_string(),
name: "test".to_string(),
kind: "function".to_string(),
start_line: 1,
end_line: 10,
enriched_content: String::new(),
content: String::new(),
},
similarity,
}
}
#[test]
fn structural_boost_normalizes_and_applies() {
let mut results = vec![
make_result("src/a.rs", 0.8),
make_result("src/b.rs", 0.4),
make_result("src/c.rs", 0.6),
];
let mut ranks = std::collections::HashMap::new();
ranks.insert("src/a.rs".to_string(), 0.5);
ranks.insert("src/b.rs".to_string(), 1.0);
ranks.insert("src/c.rs".to_string(), 0.0);
apply_structural_boost(&mut results, &ranks, 0.2);
assert!((results[0].similarity - 1.1).abs() < 1e-6);
assert!((results[1].similarity - 0.2).abs() < 1e-6);
assert!((results[2].similarity - 0.5).abs() < 1e-6);
}
#[test]
fn structural_boost_noop_on_empty() {
let mut results: Vec<SearchResult> = vec![];
let ranks = std::collections::HashMap::new();
apply_structural_boost(&mut results, &ranks, 0.2);
assert!(results.is_empty());
}
#[test]
fn structural_boost_noop_on_zero_alpha() {
let mut results = vec![make_result("src/a.rs", 0.8)];
let mut ranks = std::collections::HashMap::new();
ranks.insert("src/a.rs".to_string(), 1.0);
apply_structural_boost(&mut results, &ranks, 0.0);
assert!((results[0].similarity - 0.8).abs() < 1e-6);
}
}