use crate::history::Conversation;
use chrono::{DateTime, Duration, Local};
use rayon::prelude::*;
#[derive(Clone)]
pub struct SearchableConversation {
pub text_lower: String,
pub title_lower: String,
pub summary_lower: String,
pub project_lower: String,
pub index: usize,
}
pub fn is_uuid(query: &str) -> bool {
let q = query.trim();
if q.len() != 36 {
return false;
}
let parts: Vec<&str> = q.split('-').collect();
if parts.len() != 5 {
return false;
}
let expected_lens = [8, 4, 4, 4, 12];
parts
.iter()
.zip(expected_lens.iter())
.all(|(part, &len)| part.len() == len && part.chars().all(|c| c.is_ascii_hexdigit()))
}
fn is_cjk_punctuation(c: char) -> bool {
matches!(
c,
'\u{3000}' | '\u{3001}' | '\u{3002}' | '\u{3008}' | '\u{3009}' | '\u{300A}' | '\u{300B}' | '\u{300C}' | '\u{300D}' | '\u{300E}' | '\u{300F}' | '\u{3010}' | '\u{3011}' | '\u{3014}' | '\u{3015}' | '\u{3016}' | '\u{3017}' | '\u{FF01}' | '\u{FF08}' | '\u{FF09}' | '\u{FF0C}' | '\u{FF1A}' | '\u{FF1B}' | '\u{FF1F}' | '\u{201C}' | '\u{201D}' | '\u{2018}' | '\u{2019}' | '\u{2014}' | '\u{2026}' | '\u{00B7}' )
}
pub fn normalize_for_search(text: &str) -> String {
let mut out = String::with_capacity(text.len());
for ch in text.chars() {
if ch == '_' || ch == '-' || ch == '/' || is_cjk_punctuation(ch) {
out.push(' ');
} else {
out.extend(ch.to_lowercase());
}
}
out
}
pub fn is_word_separator(c: char) -> bool {
c.is_whitespace() || c == '_' || c == '-' || c == '/' || is_cjk_punctuation(c)
}
pub fn precompute_search_text(conversations: &[Conversation]) -> Vec<SearchableConversation> {
conversations
.par_iter()
.enumerate()
.map(|(idx, conv)| {
let title_lower = conv
.custom_title
.as_ref()
.map(|t| normalize_for_search(t))
.unwrap_or_default();
let summary_lower = conv
.summary
.as_ref()
.map(|s| normalize_for_search(s))
.unwrap_or_default();
let project_lower = conv
.project_name
.as_ref()
.map(|n| normalize_for_search(n))
.unwrap_or_default();
let text_lower = if project_lower.is_empty() {
conv.search_text_lower.clone()
} else {
format!("{} {}", conv.search_text_lower, project_lower)
};
SearchableConversation {
text_lower,
title_lower,
summary_lower,
project_lower,
index: idx,
}
})
.collect()
}
pub fn search(
conversations: &[Conversation],
searchable: &[SearchableConversation],
query: &str,
now: DateTime<Local>,
) -> Vec<usize> {
let query = query.trim();
if query.is_empty() {
return (0..conversations.len()).collect();
}
let query_lower = normalize_for_search(query);
let query_words: Vec<&str> = query_lower.split_whitespace().collect();
if query_words.is_empty() {
return (0..conversations.len()).collect();
}
let adjacent_pairs: Vec<String> = if query_words.len() > 1 {
query_words
.windows(2)
.map(|w| format!("{} {}", w[0], w[1]))
.collect()
} else {
vec![]
};
let mut scored: Vec<(usize, f64, DateTime<Local>)> = searchable
.par_iter()
.filter_map(|s| {
let score = score_text(
s,
&conversations[s.index].search_text_lower,
&query_words,
&adjacent_pairs,
conversations[s.index].timestamp,
now,
);
if score > 0.0 {
Some((s.index, score, conversations[s.index].timestamp))
} else {
None
}
})
.collect();
scored.sort_unstable_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| b.2.cmp(&a.2))
});
scored.into_iter().map(|(idx, _, _)| idx).collect()
}
const WEIGHT_TITLE: f64 = 5.0;
const WEIGHT_SUMMARY: f64 = 3.0;
const WEIGHT_PROJECT: f64 = 4.0;
const WEIGHT_BODY: f64 = 1.0;
pub struct ScoreDebug {
pub total: f64,
pub freshness: f64,
pub fields: Vec<FieldDebug>,
}
pub struct FieldDebug {
pub name: &'static str,
pub weight: f64,
pub tf_score: f64,
pub adjacency_score: f64,
pub word_details: Vec<(String, usize, f64)>,
}
fn score_impl(
s: &SearchableConversation,
body_lower: &str,
query_words: &[&str],
adjacent_pairs: &[String],
timestamp: DateTime<Local>,
now: DateTime<Local>,
) -> Option<ScoreDebug> {
if query_words.is_empty() {
return None;
}
for &qw in query_words {
if !s.text_lower.contains(qw) {
return None;
}
}
for &qw in query_words {
if count_prefix_matches(&s.text_lower, qw, 1) == 0 {
let has_cjk = query_words
.iter()
.any(|w| w.chars().any(|c| ('\u{4E00}'..='\u{9FFF}').contains(&c)));
if has_cjk {
let fresh = freshness_bonus(timestamp, now);
let flat = (query_words.len() as f64) * 0.5;
return Some(ScoreDebug {
total: flat + fresh,
freshness: fresh,
fields: vec![],
});
}
return None;
}
}
let fields: &[(&str, f64, &'static str)] = &[
(&s.title_lower, WEIGHT_TITLE, "title"),
(&s.summary_lower, WEIGHT_SUMMARY, "summary"),
(&s.project_lower, WEIGHT_PROJECT, "project"),
(body_lower, WEIGHT_BODY, "body"),
];
let mut base_score = 0.0;
let mut field_debugs = Vec::new();
for &(field, weight, name) in fields {
if field.is_empty() {
continue;
}
let mut field_tf_score = 0.0;
let mut word_details = Vec::new();
for &qw in query_words {
let tf = count_prefix_matches(field, qw, 10); let ln_score = if tf > 0 { ((1 + tf) as f64).ln() } else { 0.0 };
field_tf_score += ln_score;
word_details.push((qw.to_string(), tf, ln_score));
}
let weighted_tf = weight * field_tf_score;
base_score += weighted_tf;
let adj_count = if !adjacent_pairs.is_empty() {
count_adjacent_pairs(field, adjacent_pairs, 3)
} else {
0
};
let weighted_adj = weight * 2.0 * adj_count as f64;
base_score += weighted_adj;
field_debugs.push(FieldDebug {
name,
weight,
tf_score: weighted_tf,
adjacency_score: weighted_adj,
word_details,
});
}
let fresh = freshness_bonus(timestamp, now);
Some(ScoreDebug {
total: base_score + fresh,
freshness: fresh,
fields: field_debugs,
})
}
fn score_text(
s: &SearchableConversation,
body_lower: &str,
query_words: &[&str],
adjacent_pairs: &[String],
timestamp: DateTime<Local>,
now: DateTime<Local>,
) -> f64 {
score_impl(s, body_lower, query_words, adjacent_pairs, timestamp, now).map_or(0.0, |d| d.total)
}
pub fn score_text_debug(
s: &SearchableConversation,
body_lower: &str,
query_words: &[&str],
adjacent_pairs: &[String],
timestamp: DateTime<Local>,
now: DateTime<Local>,
) -> Option<ScoreDebug> {
score_impl(s, body_lower, query_words, adjacent_pairs, timestamp, now)
}
fn count_prefix_matches(text: &str, word: &str, max_count: usize) -> usize {
let mut start = 0;
let mut count = 0;
while let Some(pos) = text[start..].find(word) {
let actual_pos = start + pos;
let at_boundary = actual_pos == 0
|| text[..actual_pos]
.chars()
.next_back()
.is_some_and(|c| c.is_whitespace());
if at_boundary {
count += 1;
if count >= max_count {
break;
}
}
start = actual_pos + word.len().max(1);
}
count
}
fn count_adjacent_pairs(text: &str, adjacent_pairs: &[String], max_count: usize) -> usize {
let mut count = 0;
for combined in adjacent_pairs {
let mut start = 0;
while let Some(pos) = text[start..].find(combined.as_str()) {
let actual_pos = start + pos;
let at_boundary = actual_pos == 0
|| text[..actual_pos]
.chars()
.next_back()
.is_some_and(|c| c.is_whitespace());
if at_boundary {
count += 1;
if count >= max_count {
return count;
}
}
start = actual_pos + combined.len().max(1);
}
}
count
}
fn freshness_bonus(timestamp: DateTime<Local>, now: DateTime<Local>) -> f64 {
let age = now.signed_duration_since(timestamp);
if age < Duration::zero() {
return 2.0; }
let age_days = age.num_seconds() as f64 / 86_400.0;
2.0 * 2_f64.powf(-age_days / 7.0)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::history::Conversation;
use std::path::PathBuf;
fn make_conv_full(
text: &str,
project: Option<&str>,
title: Option<&str>,
summary: Option<&str>,
timestamp: DateTime<Local>,
) -> Conversation {
let mut full_text = text.to_string();
if let Some(s) = summary {
full_text = format!("{} {}", s, full_text);
}
if let Some(t) = title {
full_text = format!("{} {}", t, full_text);
}
Conversation {
path: PathBuf::new(),
index: 0,
timestamp,
preview: text.to_string(),
preview_first: text.to_string(),
preview_last: text.to_string(),
full_text: full_text.clone(),
search_text_lower: normalize_for_search(&full_text),
project_name: project.map(|s| s.to_string()),
project_path: None,
cwd: None,
message_count: 1,
parse_errors: vec![],
summary: summary.map(|s| s.to_string()),
custom_title: title.map(|s| s.to_string()),
model: None,
total_tokens: 0,
duration_minutes: None,
}
}
fn make_conv(text: &str, timestamp: DateTime<Local>) -> Conversation {
make_conv_full(text, None, None, None, timestamp)
}
fn make_conv_with_project(
text: &str,
project: &str,
timestamp: DateTime<Local>,
) -> Conversation {
make_conv_full(text, Some(project), None, None, timestamp)
}
#[test]
fn search_matches_underscore_separated() {
let now = Local::now();
let convs = vec![make_conv("HARDENED_RUNTIME config", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "harden runtime", now);
assert_eq!(results.len(), 1);
}
#[test]
fn search_matches_different_case() {
let now = Local::now();
let convs = vec![make_conv("Hardened Runtime enabled", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "harden runtime", now);
assert_eq!(results.len(), 1);
}
#[test]
fn search_prefix_matches_words() {
let now = Local::now();
let convs = vec![make_conv("hardened security", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "harden", now);
assert_eq!(results.len(), 1);
}
#[test]
fn search_requires_all_words() {
let now = Local::now();
let convs = vec![make_conv("hardened security", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "harden runtime", now);
assert_eq!(results.len(), 0); }
#[test]
fn search_with_underscore_in_query() {
let now = Local::now();
let convs = vec![make_conv("hardened runtime enabled", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "hardened_runtime", now);
assert_eq!(results.len(), 1);
}
#[test]
fn freshness_decays_over_time() {
let now = Local::now();
let fresh = freshness_bonus(now - Duration::hours(1), now);
let week_old = freshness_bonus(now - Duration::days(7), now);
let month_old = freshness_bonus(now - Duration::days(30), now);
assert!(fresh > week_old, "fresh should score higher than week-old");
assert!(
week_old > month_old,
"week-old should score higher than month-old"
);
assert!(fresh <= 2.0, "freshness bonus should not exceed 2.0");
assert!(
month_old > 0.0,
"old conversations should still get some bonus"
);
}
#[test]
fn future_timestamp_gets_max_freshness() {
let now = Local::now();
let timestamp = now + Duration::hours(1);
assert_eq!(freshness_bonus(timestamp, now), 2.0);
}
#[test]
fn continuous_freshness_no_cliff() {
let now = Local::now();
let score_23h = freshness_bonus(now - Duration::hours(23), now);
let score_25h = freshness_bonus(now - Duration::hours(25), now);
let diff = (score_23h - score_25h).abs();
assert!(
diff < 0.1,
"no dramatic cliff at 24h boundary: 23h={:.3} 25h={:.3}",
score_23h,
score_25h
);
}
#[test]
fn search_matches_project_name() {
let now = Local::now();
let convs = vec![make_conv_with_project(
"some conversation",
"workmux/main-worktree-fix",
now,
)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "main-worktree-fix", now);
assert_eq!(results.len(), 1);
let results = search(&convs, &searchable, "workmux", now);
assert_eq!(results.len(), 1);
let results = search(&convs, &searchable, "workmux main worktree", now);
assert_eq!(results.len(), 1);
}
#[test]
fn search_matches_hyphenated_words() {
let now = Local::now();
let convs = vec![make_conv("main-worktree-fix discussion", now)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "worktree fix", now);
assert_eq!(results.len(), 1);
}
#[test]
fn is_uuid_valid() {
assert!(is_uuid("e7d318b1-4274-4ee2-a341-e94893b5df49"));
assert!(is_uuid("00000000-0000-0000-0000-000000000000"));
assert!(is_uuid("ABCDEF01-2345-6789-abcd-ef0123456789"));
}
#[test]
fn is_uuid_invalid() {
assert!(!is_uuid(""));
assert!(!is_uuid("not-a-uuid"));
assert!(!is_uuid("e7d318b1-4274-4ee2-a341")); assert!(!is_uuid("e7d318b1-4274-4ee2-a341-e94893b5df49x")); assert!(!is_uuid("e7d318b14274-4ee2-a341-e94893b5df49-")); assert!(!is_uuid("g7d318b1-4274-4ee2-a341-e94893b5df49")); }
#[test]
fn is_uuid_with_whitespace() {
assert!(is_uuid(" e7d318b1-4274-4ee2-a341-e94893b5df49 "));
}
#[test]
fn search_matches_chinese_text_with_punctuation() {
let now = Local::now();
let convs = vec![make_conv(
"\u{9000}\u{51FA}\u{7801} 143 \u{5C31}\u{662F} SIGTERM\u{FF0C}\u{5C5E}\u{4E8E}\u{9884}\u{671F}\u{884C}\u{4E3A}\u{3002}\u{5F53}\u{524D}\u{65B0}\u{8FDB}",
now,
)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "\u{5C5E}\u{4E8E}\u{9884}\u{671F}", now);
assert_eq!(results.len(), 1);
let results = search(&convs, &searchable, "\u{9000}\u{51FA}\u{7801}", now);
assert_eq!(results.len(), 1);
let results = search(&convs, &searchable, "SIGTERM \u{9884}\u{671F}", now);
assert_eq!(results.len(), 1);
}
#[test]
fn search_matches_chinese_substring_within_token() {
let now = Local::now();
let convs = vec![make_conv(
"\u{8FD9}\u{662F}\u{4E00}\u{4E2A}\u{6D4B}\u{8BD5}\u{4F1A}\u{8BDD}\u{5185}\u{5BB9}",
now,
)];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "\u{6D4B}\u{8BD5}\u{4F1A}\u{8BDD}", now);
assert_eq!(results.len(), 1);
}
#[test]
fn cjk_punctuation_treated_as_separator() {
assert_eq!(
normalize_for_search("SIGTERM\u{FF0C}\u{5C5E}\u{4E8E}\u{9884}\u{671F}"),
"sigterm \u{5C5E}\u{4E8E}\u{9884}\u{671F}"
);
assert_eq!(
normalize_for_search("\u{884C}\u{4E3A}\u{3002}\u{5F53}\u{524D}"),
"\u{884C}\u{4E3A} \u{5F53}\u{524D}"
);
}
#[test]
fn exact_project_match_beats_recent_body_mention() {
let now = Local::now();
let old_exact = make_conv_full(
"discussion about agents config",
Some("workmux/agents-config"),
None,
None,
now - Duration::hours(22),
);
let new_incidental = make_conv_full(
"updated agents and changed config files",
Some("workmux/other-project"),
None,
None,
now - Duration::hours(1),
);
let convs = vec![old_exact, new_incidental];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "agents-config", now);
assert_eq!(results[0], 0, "exact project match should rank first");
}
#[test]
fn title_match_beats_body_only() {
let now = Local::now();
let with_title = make_conv_full(
"some body text about agents and config",
None,
Some("agents config setup"),
None,
now,
);
let body_only = make_conv_full(
"discussed agents and config in detail agents config agents",
None,
None,
None,
now,
);
let convs = vec![with_title, body_only];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "agents config", now);
assert_eq!(results[0], 0, "title match should rank higher");
}
#[test]
fn repeated_term_beats_single_mention() {
let now = Local::now();
let repeated = make_conv_full(
"config config config setup config again",
None,
None,
None,
now,
);
let single = make_conv_full("config was mentioned once here", None, None, None, now);
let convs = vec![repeated, single];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "config", now);
assert_eq!(results[0], 0, "repeated mentions should score higher");
}
#[test]
fn adjacent_terms_beat_separated() {
let now = Local::now();
let adjacent = make_conv_full("the agents config is important", None, None, None, now);
let separated = make_conv_full(
"the agents did something and later we changed config",
None,
None,
None,
now,
);
let convs = vec![adjacent, separated];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "agents config", now);
assert_eq!(results[0], 0, "adjacent terms should score higher");
}
#[test]
fn freshness_does_not_overpower_relevance() {
let now = Local::now();
let old_relevant = make_conv_full(
"agents config agents config agents config",
Some("workmux/agents-config"),
Some("agents config"),
None,
now - Duration::days(7),
);
let new_weak = make_conv_full(
"something about config in passing",
Some("workmux/unrelated"),
None,
None,
now - Duration::minutes(5),
);
let convs = vec![old_relevant, new_weak];
let searchable = precompute_search_text(&convs);
let results = search(&convs, &searchable, "agents config", now);
assert_eq!(results[0], 0, "strong relevance should beat freshness");
}
}