use std::env;
#[derive(Debug, Clone)]
pub struct TreeSearchConfig {
pub max_nodes_per_doc: usize,
pub top_k_docs: usize,
pub max_concurrency: usize,
pub max_dir_files: usize,
pub max_node_chars: usize,
pub max_result_chars: usize,
pub fts_title_weight: f64,
pub fts_summary_weight: f64,
pub fts_body_weight: f64,
pub fts_code_weight: f64,
pub fts_front_matter_weight: f64,
pub search_mode: SearchMode,
pub anchor_top_k: usize,
pub max_anchor_per_doc: usize,
pub max_expansions: usize,
pub max_hops: usize,
pub max_siblings: usize,
pub min_frontier_score: f64,
pub early_stop_score: f64,
pub path_top_k: usize,
pub cjk_tokenizer: CjkTokenizerMode,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SearchMode {
Auto,
Flat,
Tree,
}
impl SearchMode {
pub fn as_str(&self) -> &str {
match self {
Self::Auto => "auto",
Self::Flat => "flat",
Self::Tree => "tree",
}
}
pub fn from_str(s: &str) -> Self {
match s.to_lowercase().as_str() {
"flat" => Self::Flat,
"tree" => Self::Tree,
_ => Self::Auto,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CjkTokenizerMode {
Auto,
Jieba,
Bigram,
Char,
}
impl CjkTokenizerMode {
pub fn from_str(s: &str) -> Self {
match s.to_lowercase().as_str() {
"jieba" => Self::Jieba,
"bigram" => Self::Bigram,
"char" => Self::Char,
_ => Self::Auto,
}
}
}
impl Default for TreeSearchConfig {
fn default() -> Self {
let cpus = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4)
.min(256);
Self {
max_nodes_per_doc: 5,
top_k_docs: 3,
max_concurrency: cpus,
max_dir_files: 10_000,
max_node_chars: 8_000,
max_result_chars: 32_000,
fts_title_weight: 5.0,
fts_summary_weight: 2.0,
fts_body_weight: 10.0,
fts_code_weight: 1.0,
fts_front_matter_weight: 2.0,
search_mode: SearchMode::Auto,
anchor_top_k: 5,
max_anchor_per_doc: 3,
max_expansions: 40,
max_hops: 3,
max_siblings: 2,
min_frontier_score: 0.1,
early_stop_score: 0.95,
path_top_k: 3,
cjk_tokenizer: CjkTokenizerMode::Auto,
}
}
}
impl TreeSearchConfig {
pub fn from_env() -> Self {
let mut config = Self::default();
if let Ok(v) = env::var("TREESEARCH_CJK_TOKENIZER") {
config.cjk_tokenizer = CjkTokenizerMode::from_str(&v);
}
if let Ok(v) = env::var("TREESEARCH_SEARCH_MODE") {
config.search_mode = SearchMode::from_str(&v);
}
if let Ok(v) = env::var("TREESEARCH_MAX_NODES_PER_DOC") {
if let Ok(n) = v.parse() {
config.max_nodes_per_doc = n;
}
}
if let Ok(v) = env::var("TREESEARCH_TOP_K_DOCS") {
if let Ok(n) = v.parse() {
config.top_k_docs = n;
}
}
config
}
pub fn fts_weights(&self) -> [f64; 5] {
[
self.fts_title_weight,
self.fts_summary_weight,
self.fts_body_weight,
self.fts_code_weight,
self.fts_front_matter_weight,
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = TreeSearchConfig::default();
assert_eq!(config.max_nodes_per_doc, 5);
assert_eq!(config.fts_body_weight, 10.0);
assert_eq!(config.search_mode, SearchMode::Auto);
}
#[test]
fn test_search_mode_parse() {
assert_eq!(SearchMode::from_str("flat"), SearchMode::Flat);
assert_eq!(SearchMode::from_str("TREE"), SearchMode::Tree);
assert_eq!(SearchMode::from_str("blah"), SearchMode::Auto);
}
#[test]
fn test_fts_weights() {
let config = TreeSearchConfig::default();
let w = config.fts_weights();
assert_eq!(w, [5.0, 2.0, 10.0, 1.0, 2.0]);
}
}