Skip to main content

smc_cli_cc/
search.rs

1use crate::config::SessionFile;
2use crate::display;
3use crate::models::Record;
4use anyhow::Result;
5use indicatif::{ProgressBar, ProgressStyle};
6use rayon::prelude::*;
7use regex::Regex;
8use std::io::BufRead;
9use std::sync::atomic::{AtomicUsize, Ordering};
10
11#[derive(Default)]
12pub struct SearchOpts {
13    pub queries: Vec<String>,
14    pub is_regex: bool,
15    pub and_mode: bool,
16    pub role: Option<String>,
17    pub tool: Option<String>,
18    pub project: Option<String>,
19    pub after: Option<String>,
20    pub before: Option<String>,
21    pub branch: Option<String>,
22    pub file: Option<String>,
23    pub tool_input: bool,
24    pub thinking_only: bool,
25    pub no_thinking: bool,
26    pub max_results: usize,
27    pub stdout_md: bool,
28    pub md_file: Option<String>,
29    pub count_mode: bool,
30    pub summary_mode: bool,
31    pub json_mode: bool,
32    pub include_smc: bool,
33    pub exclude_session: Option<String>,
34}
35
36pub const SMC_TAG_OPEN: &str = "<smc-cc-cli>";
37pub const SMC_TAG_CLOSE: &str = "</smc-cc-cli>";
38
39impl SearchOpts {
40    pub fn query_display(&self) -> String {
41        self.queries.join(", ")
42    }
43}
44
45struct Matcher {
46    regexes: Vec<Regex>,
47    plains: Vec<String>,
48    and_mode: bool,
49}
50
51impl Matcher {
52    fn new(queries: &[String], is_regex: bool, and_mode: bool) -> Result<Self> {
53        if is_regex {
54            let regexes = queries
55                .iter()
56                .map(|q| Regex::new(q))
57                .collect::<std::result::Result<Vec<_>, _>>()?;
58            Ok(Matcher {
59                regexes,
60                plains: vec![],
61                and_mode,
62            })
63        } else {
64            Ok(Matcher {
65                regexes: vec![],
66                plains: queries.iter().map(|q| q.to_lowercase()).collect(),
67                and_mode,
68            })
69        }
70    }
71
72    fn first_matching_query(&self, text: &str) -> Option<String> {
73        if self.and_mode {
74            return self.all_match(text);
75        }
76        if !self.regexes.is_empty() {
77            for re in &self.regexes {
78                if let Some(m) = re.find(text) {
79                    return Some(m.as_str().to_string());
80                }
81            }
82        } else {
83            let lower = text.to_lowercase();
84            for q in &self.plains {
85                if lower.contains(q.as_str()) {
86                    return Some(q.clone());
87                }
88            }
89        }
90        None
91    }
92
93    fn all_match(&self, text: &str) -> Option<String> {
94        if !self.regexes.is_empty() {
95            let mut matches = Vec::new();
96            for re in &self.regexes {
97                if let Some(m) = re.find(text) {
98                    matches.push(m.as_str().to_string());
99                } else {
100                    return None;
101                }
102            }
103            Some(matches.join(" + "))
104        } else {
105            let lower = text.to_lowercase();
106            for q in &self.plains {
107                if !lower.contains(q.as_str()) {
108                    return None;
109                }
110            }
111            Some(self.plains.join(" + "))
112        }
113    }
114}
115
116struct SearchHit {
117    project: String,
118    session_id: String,
119    record: Record,
120    line_num: usize,
121    matched_query: String,
122}
123
124pub fn search(files: &[SessionFile], opts: &SearchOpts) -> Result<()> {
125    anyhow::ensure!(!opts.queries.is_empty(), "Search query cannot be empty");
126    let matcher = Matcher::new(&opts.queries, opts.is_regex, opts.and_mode)?;
127
128    // Filter files by project and exclude specific sessions
129    let filtered_files: Vec<&SessionFile> = files
130        .iter()
131        .filter(|f| {
132            if let Some(proj) = &opts.project {
133                if !f.project_name
134                    .to_lowercase()
135                    .contains(&proj.to_lowercase())
136                {
137                    return false;
138                }
139            }
140            if let Some(exc) = &opts.exclude_session {
141                if f.session_id.starts_with(exc.as_str()) {
142                    return false;
143                }
144            }
145            true
146        })
147        .collect();
148
149    let pb = ProgressBar::new(filtered_files.len() as u64);
150    pb.set_style(
151        ProgressStyle::default_bar()
152            .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} files ({msg})")
153            .unwrap()
154            .progress_chars("█▓░"),
155    );
156
157    let hit_count = AtomicUsize::new(0);
158    let max = opts.max_results;
159
160    let results: Vec<Vec<SearchHit>> = filtered_files
161        .par_iter()
162        .map(|file| {
163            if max > 0 && hit_count.load(Ordering::Relaxed) >= max {
164                pb.inc(1);
165                return vec![];
166            }
167
168            let hits = search_file(file, &matcher, opts, &hit_count, max);
169            pb.inc(1);
170            hits
171        })
172        .collect();
173
174    pb.finish_and_clear();
175
176    // Count mode: aggregate by project
177    if opts.count_mode {
178        use std::collections::HashMap;
179        let mut counts: HashMap<String, usize> = HashMap::new();
180        for hits in &results {
181            for hit in hits {
182                *counts.entry(hit.project.clone()).or_default() += 1;
183            }
184        }
185        let mut sorted: Vec<_> = counts.into_iter().collect();
186        sorted.sort_by(|a, b| b.1.cmp(&a.1));
187        let total: usize = sorted.iter().map(|(_, c)| c).sum();
188
189        println!("Match counts for '{}'\n", opts.query_display());
190        for (project, count) in &sorted {
191            println!("  {:40} {:>5}", project, count);
192        }
193        println!("\n{} total matches across {} projects", total, sorted.len());
194        return Ok(());
195    }
196
197    // Summary mode: condensed overview
198    if opts.summary_mode {
199        use std::collections::{HashMap, HashSet};
200
201        let mut project_counts: HashMap<String, usize> = HashMap::new();
202        let mut role_counts: HashMap<String, usize> = HashMap::new();
203        let mut sessions: HashSet<String> = HashSet::new();
204        let mut earliest: Option<String> = None;
205        let mut latest: Option<String> = None;
206        let mut word_counts: HashMap<String, usize> = HashMap::new();
207
208        // Stop words to skip in topic extraction
209        let stop_words: HashSet<&str> = [
210            "the", "and", "for", "that", "this", "with", "from", "are", "was",
211            "were", "been", "have", "has", "had", "not", "but", "what", "all",
212            "can", "her", "his", "one", "our", "out", "you", "your", "which",
213            "their", "them", "then", "than", "into", "could", "would", "there",
214            "about", "just", "like", "some", "also", "more", "when", "will",
215            "each", "make", "way", "she", "how", "its", "may", "use", "used",
216            "using", "let", "get", "got", "did", "does", "done", "any", "very",
217            "here", "where", "should", "need", "don", "doesn", "isn", "it's",
218            "i'll", "i'm", "we're", "they", "it's", "that's", "file", "line",
219            "code", "run", "set", "new", "see", "now", "try", "want",
220        ].iter().copied().collect();
221
222        for hits in &results {
223            for hit in hits {
224                *project_counts.entry(hit.project.clone()).or_default() += 1;
225                *role_counts.entry(hit.record.role_str().to_string()).or_default() += 1;
226                sessions.insert(format!("{}:{}", hit.project, hit.session_id));
227
228                if let Some(msg) = hit.record.as_message_record() {
229                    if let Some(ts) = &msg.timestamp {
230                        let ts_date = ts.get(..10).unwrap_or(ts).to_string();
231                        if earliest.as_ref().map_or(true, |e| ts_date < *e) {
232                            earliest = Some(ts_date.clone());
233                        }
234                        if latest.as_ref().map_or(true, |l| ts_date > *l) {
235                            latest = Some(ts_date);
236                        }
237                    }
238
239                    // Extract topic words
240                    let text = msg.text_content();
241                    for word in text.split(|c: char| !c.is_alphanumeric() && c != '_') {
242                        let w = word.to_lowercase();
243                        if w.len() >= 4 && !stop_words.contains(w.as_str()) {
244                            *word_counts.entry(w).or_default() += 1;
245                        }
246                    }
247                }
248            }
249        }
250
251        // Also skip the query terms themselves from topics
252        let query_lower: Vec<String> = opts.queries.iter().map(|q| q.to_lowercase()).collect();
253
254        let mut top_words: Vec<_> = word_counts.into_iter()
255            .filter(|(w, _)| !query_lower.iter().any(|q| w.contains(q.as_str())))
256            .collect();
257        top_words.sort_by(|a, b| b.1.cmp(&a.1));
258
259        let total: usize = project_counts.values().sum();
260
261        println!("Summary for '{}'\n", opts.query_display());
262
263        // Projects
264        let mut proj_sorted: Vec<_> = project_counts.into_iter().collect();
265        proj_sorted.sort_by(|a, b| b.1.cmp(&a.1));
266        println!("  Projects:");
267        for (project, count) in &proj_sorted {
268            println!("    {:38} {:>5} matches", project, count);
269        }
270
271        // Roles
272        println!("\n  Roles:");
273        let mut role_sorted: Vec<_> = role_counts.into_iter().collect();
274        role_sorted.sort_by(|a, b| b.1.cmp(&a.1));
275        for (role, count) in &role_sorted {
276            println!("    {:38} {:>5}", role, count);
277        }
278
279        // Date range
280        if let (Some(e), Some(l)) = (&earliest, &latest) {
281            if e == l {
282                println!("\n  Date:     {}", e);
283            } else {
284                println!("\n  Dates:    {} → {}", e, l);
285            }
286        }
287
288        // Sessions
289        println!("  Sessions: {}", sessions.len());
290
291        // Topics
292        let topics: Vec<&str> = top_words.iter().take(10).map(|(w, _)| w.as_str()).collect();
293        if !topics.is_empty() {
294            println!("\n  Topics:   {}", topics.join(", "));
295        }
296
297        println!("\n{} total matches", total);
298        return Ok(());
299    }
300
301    let mut total = 0;
302    let needs_md = opts.stdout_md || opts.md_file.is_some();
303    let mut md_lines: Vec<String> = Vec::new();
304
305    for hits in &results {
306        for hit in hits {
307            if opts.json_mode {
308                // Output as JSON line
309                print_hit_json(hit);
310            } else if !opts.stdout_md {
311                display::print_search_hit(
312                    &hit.project,
313                    &hit.session_id,
314                    &hit.record,
315                    hit.line_num,
316                    &hit.matched_query,
317                );
318            }
319
320            if needs_md {
321                md_lines.push(format_hit_markdown(hit));
322            }
323
324            total += 1;
325        }
326    }
327
328    if !opts.json_mode && !opts.stdout_md {
329        if total == 0 {
330            println!("No results found for '{}'", opts.query_display());
331        } else {
332            println!("\n{} results found", total);
333        }
334    }
335
336    if opts.stdout_md {
337        write_markdown_to(&mut std::io::stdout().lock(), opts, &md_lines, total)?;
338    }
339
340    if let Some(path) = &opts.md_file {
341        let mut f = std::fs::File::create(path)?;
342        write_markdown_to(&mut f, opts, &md_lines, total)?;
343        eprintln!("Saved to {}", path);
344    }
345
346    Ok(())
347}
348
349fn format_hit_markdown(hit: &SearchHit) -> String {
350    let Some(msg) = hit.record.as_message_record() else {
351        return String::new();
352    };
353
354    let role = hit.record.role_str();
355    let timestamp = msg.timestamp.as_deref().unwrap_or("unknown");
356    let ts_short = if timestamp.len() >= 19 {
357        &timestamp[..19]
358    } else {
359        timestamp
360    };
361
362    let text = msg.text_content();
363    let preview: String = text.chars().take(500).collect();
364    let truncated = if text.chars().count() > 500 {
365        format!("{}...", preview)
366    } else {
367        preview
368    };
369
370    format!(
371        "### {project} — {role} ({ts})\n\n> Session: `{session}` Line: {line}\n\n{content}\n",
372        project = hit.project,
373        role = role,
374        ts = ts_short,
375        session = hit.session_id,
376        line = hit.line_num,
377        content = truncated,
378    )
379}
380
381fn write_markdown_to(w: &mut dyn std::io::Write, opts: &SearchOpts, hits: &[String], total: usize) -> Result<()> {
382    writeln!(w, "# smc Search Results\n")?;
383    writeln!(w, "**Query:** `{}`", opts.query_display())?;
384
385    let mut filters = Vec::new();
386    if let Some(r) = &opts.role {
387        filters.push(format!("role={}", r));
388    }
389    if let Some(t) = &opts.tool {
390        filters.push(format!("tool={}", t));
391    }
392    if let Some(p) = &opts.project {
393        filters.push(format!("project={}", p));
394    }
395    if let Some(a) = &opts.after {
396        filters.push(format!("after={}", a));
397    }
398    if let Some(b) = &opts.before {
399        filters.push(format!("before={}", b));
400    }
401    if let Some(br) = &opts.branch {
402        filters.push(format!("branch={}", br));
403    }
404    if !filters.is_empty() {
405        writeln!(w, "**Filters:** {}", filters.join(", "))?;
406    }
407
408    writeln!(w, "**Results:** {}\n", total)?;
409    writeln!(w, "---\n")?;
410
411    for hit in hits {
412        writeln!(w, "{}", hit)?;
413        writeln!(w, "---\n")?;
414    }
415
416    Ok(())
417}
418
419fn print_hit_json(hit: &SearchHit) {
420    let msg = hit.record.as_message_record();
421    let text = msg.map(|m| m.text_content()).unwrap_or_default();
422    let timestamp = msg
423        .and_then(|m| m.timestamp.as_deref())
424        .unwrap_or("unknown");
425    let role = hit.record.role_str();
426
427    let obj = serde_json::json!({
428        "project": hit.project,
429        "session_id": hit.session_id,
430        "line": hit.line_num,
431        "role": role,
432        "timestamp": timestamp,
433        "matched_query": hit.matched_query,
434        "text": text,
435    });
436    println!("{}", serde_json::to_string(&obj).unwrap_or_default());
437}
438
439fn search_file(
440    file: &SessionFile,
441    matcher: &Matcher,
442    opts: &SearchOpts,
443    hit_count: &AtomicUsize,
444    max: usize,
445) -> Vec<SearchHit> {
446    let mut hits = Vec::new();
447
448    let Ok(f) = std::fs::File::open(&file.path) else {
449        return hits;
450    };
451    let reader = std::io::BufReader::with_capacity(256 * 1024, f);
452
453    for (line_num, line) in reader.lines().enumerate() {
454        if max > 0 && hit_count.load(Ordering::Relaxed) >= max {
455            break;
456        }
457
458        let Ok(line) = line else { continue };
459        if line.trim().is_empty() {
460            continue;
461        }
462
463        let Ok(record) = serde_json::from_str::<Record>(&line) else {
464            continue;
465        };
466
467        let Some(msg) = record.as_message_record() else {
468            continue;
469        };
470
471        // Role filter
472        if let Some(role) = &opts.role {
473            if record.role_str() != role.as_str() {
474                continue;
475            }
476        }
477
478        // Tool filter
479        if let Some(tool_name) = &opts.tool {
480            let tools = msg.tool_calls();
481            if !tools.iter().any(|t| {
482                t.to_lowercase()
483                    .contains(&tool_name.to_lowercase())
484            }) {
485                continue;
486            }
487        }
488
489        // Date filters
490        if let Some(after) = &opts.after {
491            if let Some(ts) = &msg.timestamp {
492                if ts.as_str() < after.as_str() {
493                    continue;
494                }
495            }
496        }
497        if let Some(before) = &opts.before {
498            if let Some(ts) = &msg.timestamp {
499                if ts.as_str() > before.as_str() {
500                    continue;
501                }
502            }
503        }
504
505        // Branch filter
506        if let Some(branch) = &opts.branch {
507            if let Some(gb) = &msg.git_branch {
508                if !gb.to_lowercase().contains(&branch.to_lowercase()) {
509                    continue;
510                }
511            } else {
512                continue;
513            }
514        }
515
516        // File path filter — record must touch this file in tool inputs/results
517        if let Some(file_path) = &opts.file {
518            if !msg.touches_file(file_path) {
519                continue;
520            }
521        }
522
523        // Choose what text to search based on mode
524        let text = if opts.thinking_only {
525            msg.thinking_content()
526        } else if opts.no_thinking {
527            msg.text_content_no_thinking()
528        } else if opts.tool_input {
529            msg.tool_input_content()
530        } else {
531            msg.text_content()
532        };
533
534        // Skip empty content (e.g., thinking_only on a message with no thinking)
535        if text.is_empty() {
536            continue;
537        }
538
539        // Skip smc output unless --include-smc
540        if !opts.include_smc && text.contains(SMC_TAG_OPEN) {
541            continue;
542        }
543
544        // Text match
545        if let Some(matched) = matcher.first_matching_query(&text) {
546            hit_count.fetch_add(1, Ordering::Relaxed);
547            hits.push(SearchHit {
548                project: file.project_name.clone(),
549                session_id: file.session_id.clone(),
550                record,
551                line_num: line_num + 1,
552                matched_query: matched,
553            });
554        }
555    }
556
557    hits
558}