1use crate::config::SessionFile;
2use crate::display;
3use crate::models::Record;
4use anyhow::Result;
5use indicatif::{ProgressBar, ProgressStyle};
6use rayon::prelude::*;
7use regex::Regex;
8use std::io::BufRead;
9use std::sync::atomic::{AtomicUsize, Ordering};
10
11#[derive(Default)]
12pub struct SearchOpts {
13 pub queries: Vec<String>,
14 pub is_regex: bool,
15 pub and_mode: bool,
16 pub role: Option<String>,
17 pub tool: Option<String>,
18 pub project: Option<String>,
19 pub after: Option<String>,
20 pub before: Option<String>,
21 pub branch: Option<String>,
22 pub file: Option<String>,
23 pub tool_input: bool,
24 pub thinking_only: bool,
25 pub no_thinking: bool,
26 pub max_results: usize,
27 pub stdout_md: bool,
28 pub md_file: Option<String>,
29 pub count_mode: bool,
30 pub summary_mode: bool,
31 pub json_mode: bool,
32 pub include_smc: bool,
33 pub exclude_session: Option<String>,
34}
35
36pub const SMC_TAG_OPEN: &str = "<smc-cc-cli>";
37pub const SMC_TAG_CLOSE: &str = "</smc-cc-cli>";
38
39impl SearchOpts {
40 pub fn query_display(&self) -> String {
41 self.queries.join(", ")
42 }
43}
44
45struct Matcher {
46 regexes: Vec<Regex>,
47 plains: Vec<String>,
48 and_mode: bool,
49}
50
51impl Matcher {
52 fn new(queries: &[String], is_regex: bool, and_mode: bool) -> Result<Self> {
53 if is_regex {
54 let regexes = queries
55 .iter()
56 .map(|q| Regex::new(q))
57 .collect::<std::result::Result<Vec<_>, _>>()?;
58 Ok(Matcher {
59 regexes,
60 plains: vec![],
61 and_mode,
62 })
63 } else {
64 Ok(Matcher {
65 regexes: vec![],
66 plains: queries.iter().map(|q| q.to_lowercase()).collect(),
67 and_mode,
68 })
69 }
70 }
71
72 fn first_matching_query(&self, text: &str) -> Option<String> {
73 if self.and_mode {
74 return self.all_match(text);
75 }
76 if !self.regexes.is_empty() {
77 for re in &self.regexes {
78 if let Some(m) = re.find(text) {
79 return Some(m.as_str().to_string());
80 }
81 }
82 } else {
83 let lower = text.to_lowercase();
84 for q in &self.plains {
85 if lower.contains(q.as_str()) {
86 return Some(q.clone());
87 }
88 }
89 }
90 None
91 }
92
93 fn all_match(&self, text: &str) -> Option<String> {
94 if !self.regexes.is_empty() {
95 let mut matches = Vec::new();
96 for re in &self.regexes {
97 if let Some(m) = re.find(text) {
98 matches.push(m.as_str().to_string());
99 } else {
100 return None;
101 }
102 }
103 Some(matches.join(" + "))
104 } else {
105 let lower = text.to_lowercase();
106 for q in &self.plains {
107 if !lower.contains(q.as_str()) {
108 return None;
109 }
110 }
111 Some(self.plains.join(" + "))
112 }
113 }
114}
115
116struct SearchHit {
117 project: String,
118 session_id: String,
119 record: Record,
120 line_num: usize,
121 matched_query: String,
122}
123
124pub fn search(files: &[SessionFile], opts: &SearchOpts) -> Result<()> {
125 anyhow::ensure!(!opts.queries.is_empty(), "Search query cannot be empty");
126 let matcher = Matcher::new(&opts.queries, opts.is_regex, opts.and_mode)?;
127
128 let filtered_files: Vec<&SessionFile> = files
130 .iter()
131 .filter(|f| {
132 if let Some(proj) = &opts.project {
133 if !f.project_name
134 .to_lowercase()
135 .contains(&proj.to_lowercase())
136 {
137 return false;
138 }
139 }
140 if let Some(exc) = &opts.exclude_session {
141 if f.session_id.starts_with(exc.as_str()) {
142 return false;
143 }
144 }
145 true
146 })
147 .collect();
148
149 let pb = ProgressBar::new(filtered_files.len() as u64);
150 pb.set_style(
151 ProgressStyle::default_bar()
152 .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} files ({msg})")
153 .unwrap()
154 .progress_chars("█▓░"),
155 );
156
157 let hit_count = AtomicUsize::new(0);
158 let max = opts.max_results;
159
160 let results: Vec<Vec<SearchHit>> = filtered_files
161 .par_iter()
162 .map(|file| {
163 if max > 0 && hit_count.load(Ordering::Relaxed) >= max {
164 pb.inc(1);
165 return vec![];
166 }
167
168 let hits = search_file(file, &matcher, opts, &hit_count, max);
169 pb.inc(1);
170 hits
171 })
172 .collect();
173
174 pb.finish_and_clear();
175
176 if opts.count_mode {
178 use std::collections::HashMap;
179 let mut counts: HashMap<String, usize> = HashMap::new();
180 for hits in &results {
181 for hit in hits {
182 *counts.entry(hit.project.clone()).or_default() += 1;
183 }
184 }
185 let mut sorted: Vec<_> = counts.into_iter().collect();
186 sorted.sort_by(|a, b| b.1.cmp(&a.1));
187 let total: usize = sorted.iter().map(|(_, c)| c).sum();
188
189 println!("Match counts for '{}'\n", opts.query_display());
190 for (project, count) in &sorted {
191 println!(" {:40} {:>5}", project, count);
192 }
193 println!("\n{} total matches across {} projects", total, sorted.len());
194 return Ok(());
195 }
196
197 if opts.summary_mode {
199 use std::collections::{HashMap, HashSet};
200
201 let mut project_counts: HashMap<String, usize> = HashMap::new();
202 let mut role_counts: HashMap<String, usize> = HashMap::new();
203 let mut sessions: HashSet<String> = HashSet::new();
204 let mut earliest: Option<String> = None;
205 let mut latest: Option<String> = None;
206 let mut word_counts: HashMap<String, usize> = HashMap::new();
207
208 let stop_words: HashSet<&str> = [
210 "the", "and", "for", "that", "this", "with", "from", "are", "was",
211 "were", "been", "have", "has", "had", "not", "but", "what", "all",
212 "can", "her", "his", "one", "our", "out", "you", "your", "which",
213 "their", "them", "then", "than", "into", "could", "would", "there",
214 "about", "just", "like", "some", "also", "more", "when", "will",
215 "each", "make", "way", "she", "how", "its", "may", "use", "used",
216 "using", "let", "get", "got", "did", "does", "done", "any", "very",
217 "here", "where", "should", "need", "don", "doesn", "isn", "it's",
218 "i'll", "i'm", "we're", "they", "it's", "that's", "file", "line",
219 "code", "run", "set", "new", "see", "now", "try", "want",
220 ].iter().copied().collect();
221
222 for hits in &results {
223 for hit in hits {
224 *project_counts.entry(hit.project.clone()).or_default() += 1;
225 *role_counts.entry(hit.record.role_str().to_string()).or_default() += 1;
226 sessions.insert(format!("{}:{}", hit.project, hit.session_id));
227
228 if let Some(msg) = hit.record.as_message_record() {
229 if let Some(ts) = &msg.timestamp {
230 let ts_date = ts.get(..10).unwrap_or(ts).to_string();
231 if earliest.as_ref().map_or(true, |e| ts_date < *e) {
232 earliest = Some(ts_date.clone());
233 }
234 if latest.as_ref().map_or(true, |l| ts_date > *l) {
235 latest = Some(ts_date);
236 }
237 }
238
239 let text = msg.text_content();
241 for word in text.split(|c: char| !c.is_alphanumeric() && c != '_') {
242 let w = word.to_lowercase();
243 if w.len() >= 4 && !stop_words.contains(w.as_str()) {
244 *word_counts.entry(w).or_default() += 1;
245 }
246 }
247 }
248 }
249 }
250
251 let query_lower: Vec<String> = opts.queries.iter().map(|q| q.to_lowercase()).collect();
253
254 let mut top_words: Vec<_> = word_counts.into_iter()
255 .filter(|(w, _)| !query_lower.iter().any(|q| w.contains(q.as_str())))
256 .collect();
257 top_words.sort_by(|a, b| b.1.cmp(&a.1));
258
259 let total: usize = project_counts.values().sum();
260
261 println!("Summary for '{}'\n", opts.query_display());
262
263 let mut proj_sorted: Vec<_> = project_counts.into_iter().collect();
265 proj_sorted.sort_by(|a, b| b.1.cmp(&a.1));
266 println!(" Projects:");
267 for (project, count) in &proj_sorted {
268 println!(" {:38} {:>5} matches", project, count);
269 }
270
271 println!("\n Roles:");
273 let mut role_sorted: Vec<_> = role_counts.into_iter().collect();
274 role_sorted.sort_by(|a, b| b.1.cmp(&a.1));
275 for (role, count) in &role_sorted {
276 println!(" {:38} {:>5}", role, count);
277 }
278
279 if let (Some(e), Some(l)) = (&earliest, &latest) {
281 if e == l {
282 println!("\n Date: {}", e);
283 } else {
284 println!("\n Dates: {} → {}", e, l);
285 }
286 }
287
288 println!(" Sessions: {}", sessions.len());
290
291 let topics: Vec<&str> = top_words.iter().take(10).map(|(w, _)| w.as_str()).collect();
293 if !topics.is_empty() {
294 println!("\n Topics: {}", topics.join(", "));
295 }
296
297 println!("\n{} total matches", total);
298 return Ok(());
299 }
300
301 let mut total = 0;
302 let needs_md = opts.stdout_md || opts.md_file.is_some();
303 let mut md_lines: Vec<String> = Vec::new();
304
305 for hits in &results {
306 for hit in hits {
307 if opts.json_mode {
308 print_hit_json(hit);
310 } else if !opts.stdout_md {
311 display::print_search_hit(
312 &hit.project,
313 &hit.session_id,
314 &hit.record,
315 hit.line_num,
316 &hit.matched_query,
317 );
318 }
319
320 if needs_md {
321 md_lines.push(format_hit_markdown(hit));
322 }
323
324 total += 1;
325 }
326 }
327
328 if !opts.json_mode && !opts.stdout_md {
329 if total == 0 {
330 println!("No results found for '{}'", opts.query_display());
331 } else {
332 println!("\n{} results found", total);
333 }
334 }
335
336 if opts.stdout_md {
337 write_markdown_to(&mut std::io::stdout().lock(), opts, &md_lines, total)?;
338 }
339
340 if let Some(path) = &opts.md_file {
341 let mut f = std::fs::File::create(path)?;
342 write_markdown_to(&mut f, opts, &md_lines, total)?;
343 eprintln!("Saved to {}", path);
344 }
345
346 Ok(())
347}
348
349fn format_hit_markdown(hit: &SearchHit) -> String {
350 let Some(msg) = hit.record.as_message_record() else {
351 return String::new();
352 };
353
354 let role = hit.record.role_str();
355 let timestamp = msg.timestamp.as_deref().unwrap_or("unknown");
356 let ts_short = if timestamp.len() >= 19 {
357 ×tamp[..19]
358 } else {
359 timestamp
360 };
361
362 let text = msg.text_content();
363 let preview: String = text.chars().take(500).collect();
364 let truncated = if text.chars().count() > 500 {
365 format!("{}...", preview)
366 } else {
367 preview
368 };
369
370 format!(
371 "### {project} — {role} ({ts})\n\n> Session: `{session}` Line: {line}\n\n{content}\n",
372 project = hit.project,
373 role = role,
374 ts = ts_short,
375 session = hit.session_id,
376 line = hit.line_num,
377 content = truncated,
378 )
379}
380
381fn write_markdown_to(w: &mut dyn std::io::Write, opts: &SearchOpts, hits: &[String], total: usize) -> Result<()> {
382 writeln!(w, "# smc Search Results\n")?;
383 writeln!(w, "**Query:** `{}`", opts.query_display())?;
384
385 let mut filters = Vec::new();
386 if let Some(r) = &opts.role {
387 filters.push(format!("role={}", r));
388 }
389 if let Some(t) = &opts.tool {
390 filters.push(format!("tool={}", t));
391 }
392 if let Some(p) = &opts.project {
393 filters.push(format!("project={}", p));
394 }
395 if let Some(a) = &opts.after {
396 filters.push(format!("after={}", a));
397 }
398 if let Some(b) = &opts.before {
399 filters.push(format!("before={}", b));
400 }
401 if let Some(br) = &opts.branch {
402 filters.push(format!("branch={}", br));
403 }
404 if !filters.is_empty() {
405 writeln!(w, "**Filters:** {}", filters.join(", "))?;
406 }
407
408 writeln!(w, "**Results:** {}\n", total)?;
409 writeln!(w, "---\n")?;
410
411 for hit in hits {
412 writeln!(w, "{}", hit)?;
413 writeln!(w, "---\n")?;
414 }
415
416 Ok(())
417}
418
419fn print_hit_json(hit: &SearchHit) {
420 let msg = hit.record.as_message_record();
421 let text = msg.map(|m| m.text_content()).unwrap_or_default();
422 let timestamp = msg
423 .and_then(|m| m.timestamp.as_deref())
424 .unwrap_or("unknown");
425 let role = hit.record.role_str();
426
427 let obj = serde_json::json!({
428 "project": hit.project,
429 "session_id": hit.session_id,
430 "line": hit.line_num,
431 "role": role,
432 "timestamp": timestamp,
433 "matched_query": hit.matched_query,
434 "text": text,
435 });
436 println!("{}", serde_json::to_string(&obj).unwrap_or_default());
437}
438
439fn search_file(
440 file: &SessionFile,
441 matcher: &Matcher,
442 opts: &SearchOpts,
443 hit_count: &AtomicUsize,
444 max: usize,
445) -> Vec<SearchHit> {
446 let mut hits = Vec::new();
447
448 let Ok(f) = std::fs::File::open(&file.path) else {
449 return hits;
450 };
451 let reader = std::io::BufReader::with_capacity(256 * 1024, f);
452
453 for (line_num, line) in reader.lines().enumerate() {
454 if max > 0 && hit_count.load(Ordering::Relaxed) >= max {
455 break;
456 }
457
458 let Ok(line) = line else { continue };
459 if line.trim().is_empty() {
460 continue;
461 }
462
463 let Ok(record) = serde_json::from_str::<Record>(&line) else {
464 continue;
465 };
466
467 let Some(msg) = record.as_message_record() else {
468 continue;
469 };
470
471 if let Some(role) = &opts.role {
473 if record.role_str() != role.as_str() {
474 continue;
475 }
476 }
477
478 if let Some(tool_name) = &opts.tool {
480 let tools = msg.tool_calls();
481 if !tools.iter().any(|t| {
482 t.to_lowercase()
483 .contains(&tool_name.to_lowercase())
484 }) {
485 continue;
486 }
487 }
488
489 if let Some(after) = &opts.after {
491 if let Some(ts) = &msg.timestamp {
492 if ts.as_str() < after.as_str() {
493 continue;
494 }
495 }
496 }
497 if let Some(before) = &opts.before {
498 if let Some(ts) = &msg.timestamp {
499 if ts.as_str() > before.as_str() {
500 continue;
501 }
502 }
503 }
504
505 if let Some(branch) = &opts.branch {
507 if let Some(gb) = &msg.git_branch {
508 if !gb.to_lowercase().contains(&branch.to_lowercase()) {
509 continue;
510 }
511 } else {
512 continue;
513 }
514 }
515
516 if let Some(file_path) = &opts.file {
518 if !msg.touches_file(file_path) {
519 continue;
520 }
521 }
522
523 let text = if opts.thinking_only {
525 msg.thinking_content()
526 } else if opts.no_thinking {
527 msg.text_content_no_thinking()
528 } else if opts.tool_input {
529 msg.tool_input_content()
530 } else {
531 msg.text_content()
532 };
533
534 if text.is_empty() {
536 continue;
537 }
538
539 if !opts.include_smc && text.contains(SMC_TAG_OPEN) {
541 continue;
542 }
543
544 if let Some(matched) = matcher.first_matching_query(&text) {
546 hit_count.fetch_add(1, Ordering::Relaxed);
547 hits.push(SearchHit {
548 project: file.project_name.clone(),
549 session_id: file.session_id.clone(),
550 record,
551 line_num: line_num + 1,
552 matched_query: matched,
553 });
554 }
555 }
556
557 hits
558}