Skip to main content

zag_agent/
search.rs

1use crate::session_log::{
2    AgentLogEvent, LogEventKind, SessionLogIndex, SessionLogIndexEntry, ToolKind,
3};
4use anyhow::{Context, Result, bail};
5use chrono::{DateTime, Duration, NaiveDate, Utc};
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11/// Query parameters for searching session logs.
12#[derive(Debug, Default)]
13pub struct SearchQuery {
14    /// Full-text pattern (literal substring or regex). None matches all events.
15    pub text: Option<String>,
16    /// Case-insensitive match (default: true).
17    pub case_insensitive: bool,
18    /// Treat `text` as a regular expression (default: false → literal substring).
19    pub use_regex: bool,
20    /// Filter by provider name (case-insensitive).
21    pub provider: Option<String>,
22    /// Filter by message role — only applies to `UserMessage` events.
23    pub role: Option<String>,
24    /// Filter by tool name (case-insensitive substring) — only applies to tool events.
25    pub tool: Option<String>,
26    /// Filter by tool kind — only applies to `ToolCall`/`ToolResult` events.
27    pub tool_kind: Option<ToolKind>,
28    /// Show only events at or after this timestamp.
29    pub from: Option<DateTime<Utc>>,
30    /// Show only events at or before this timestamp.
31    pub to: Option<DateTime<Utc>>,
32    /// Restrict search to a specific session ID (prefix match).
33    pub session_id: Option<String>,
34    /// Filter by session tag (exact match, case-insensitive).
35    pub tag: Option<String>,
36    /// Search all sessions across all projects (default: current project and sub-projects).
37    pub global: bool,
38    /// Stop after collecting this many matches.
39    pub limit: Option<usize>,
40}
41
42impl SearchQuery {
43    pub fn new() -> Self {
44        Self {
45            case_insensitive: true,
46            ..Default::default()
47        }
48    }
49}
50
51/// A single event that matched the search query.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct SearchMatch {
54    pub session_id: String,
55    pub provider: String,
56    pub started_at: String,
57    pub ended_at: Option<String>,
58    pub workspace_path: Option<String>,
59    pub command: Option<String>,
60    pub event: AgentLogEvent,
61    /// Short excerpt (~200 chars) of the matched text.
62    pub snippet: String,
63}
64
65/// Aggregate results from a search.
66#[derive(Debug, Default)]
67pub struct SearchResults {
68    pub total_sessions_scanned: usize,
69    pub total_events_scanned: usize,
70    pub total_files_missing: usize,
71    pub matches: Vec<SearchMatch>,
72}
73
74// ---------------------------------------------------------------------------
75// Date parsing
76// ---------------------------------------------------------------------------
77
78/// Parse a date/time string for `--from` / `--to` filters.
79///
80/// Accepted formats:
81/// - RFC 3339 (e.g. `2024-01-15T10:30:00Z`)
82/// - Date only (e.g. `2024-01-15`) — interpreted as start of day UTC
83/// - Relative offset from now: `1h`, `2d`, `3w`, `1m` (hours/days/weeks/months)
84pub fn parse_date_arg(s: &str) -> Result<DateTime<Utc>> {
85    // Try RFC 3339 first.
86    if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
87        return Ok(dt.with_timezone(&Utc));
88    }
89
90    // Try date-only (YYYY-MM-DD).
91    if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
92        let dt = date
93            .and_hms_opt(0, 0, 0)
94            .expect("midnight is always valid")
95            .and_utc();
96        return Ok(dt);
97    }
98
99    // Try relative offset: leading digits followed by a unit character.
100    let s_trimmed = s.trim();
101    if !s_trimmed.is_empty() {
102        let unit = s_trimmed.chars().last().unwrap();
103        let digits = &s_trimmed[..s_trimmed.len() - unit.len_utf8()];
104        if let Ok(n) = digits.parse::<i64>() {
105            let delta = match unit {
106                'h' => Duration::hours(n),
107                'd' => Duration::days(n),
108                'w' => Duration::weeks(n),
109                'm' => Duration::days(n * 30),
110                _ => bail!(
111                    "Unknown time unit '{unit}'. Use h (hours), d (days), w (weeks), or m (months)."
112                ),
113            };
114            return Ok(Utc::now() - delta);
115        }
116    }
117
118    bail!(
119        "Cannot parse date '{s}'. Use RFC 3339 (2024-01-15T10:30:00Z), date only (2024-01-15), or relative (1h, 2d, 3w, 1m)."
120    )
121}
122
123// ---------------------------------------------------------------------------
124// Text matcher
125// ---------------------------------------------------------------------------
126
127enum TextMatcher {
128    /// No text filter — everything matches.
129    None,
130    /// Case-insensitive literal substring.
131    Literal(String),
132    /// Compiled regex.
133    Pattern(Regex),
134}
135
136impl TextMatcher {
137    fn build(query: &SearchQuery) -> Result<Self> {
138        let Some(ref text) = query.text else {
139            return Ok(Self::None);
140        };
141        if query.use_regex {
142            let pattern = if query.case_insensitive {
143                format!("(?i){text}")
144            } else {
145                text.clone()
146            };
147            let re =
148                Regex::new(&pattern).with_context(|| format!("Invalid regex pattern: '{text}'"))?;
149            Ok(Self::Pattern(re))
150        } else if query.case_insensitive {
151            Ok(Self::Literal(text.to_lowercase()))
152        } else {
153            Ok(Self::Literal(text.clone()))
154        }
155    }
156
157    fn is_match(&self, haystack: &str) -> bool {
158        match self {
159            Self::None => true,
160            Self::Literal(needle) => haystack.to_lowercase().contains(needle.as_str()),
161            Self::Pattern(re) => re.is_match(haystack),
162        }
163    }
164
165    fn find_offset(&self, haystack: &str) -> Option<usize> {
166        match self {
167            Self::None => Some(0),
168            Self::Literal(needle) => haystack.to_lowercase().find(needle.as_str()),
169            Self::Pattern(re) => re.find(haystack).map(|m| m.start()),
170        }
171    }
172
173    fn has_filter(&self) -> bool {
174        !matches!(self, Self::None)
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Content extraction
180// ---------------------------------------------------------------------------
181
182fn extract_searchable_text(event: &AgentLogEvent) -> String {
183    let mut parts: Vec<String> = Vec::new();
184
185    match &event.kind {
186        LogEventKind::SessionStarted {
187            command,
188            model,
189            cwd,
190            ..
191        } => {
192            parts.push(command.clone());
193            if let Some(m) = model {
194                parts.push(m.clone());
195            }
196            if let Some(c) = cwd {
197                parts.push(c.clone());
198            }
199        }
200        LogEventKind::UserMessage { role, content, .. } => {
201            parts.push(role.clone());
202            parts.push(content.clone());
203        }
204        LogEventKind::AssistantMessage { content, .. } => {
205            parts.push(content.clone());
206        }
207        LogEventKind::Reasoning { content, .. } => {
208            parts.push(content.clone());
209        }
210        LogEventKind::ToolCall {
211            tool_name, input, ..
212        } => {
213            parts.push(tool_name.clone());
214            if let Some(v) = input {
215                parts.push(v.to_string());
216            }
217        }
218        LogEventKind::ToolResult {
219            tool_name,
220            output,
221            error,
222            data,
223            ..
224        } => {
225            if let Some(n) = tool_name {
226                parts.push(n.clone());
227            }
228            if let Some(o) = output {
229                parts.push(o.clone());
230            }
231            if let Some(e) = error {
232                parts.push(e.clone());
233            }
234            if let Some(d) = data {
235                parts.push(d.to_string());
236            }
237        }
238        LogEventKind::Permission {
239            tool_name,
240            description,
241            ..
242        } => {
243            parts.push(tool_name.clone());
244            parts.push(description.clone());
245        }
246        LogEventKind::ProviderStatus { message, .. } => {
247            parts.push(message.clone());
248        }
249        LogEventKind::Stderr { message } => {
250            parts.push(message.clone());
251        }
252        LogEventKind::ParseWarning { message, raw } => {
253            parts.push(message.clone());
254            if let Some(r) = raw {
255                parts.push(r.clone());
256            }
257        }
258        LogEventKind::SessionEnded { error, .. } => {
259            if let Some(e) = error {
260                parts.push(e.clone());
261            }
262        }
263        LogEventKind::SessionCleared { .. } => {}
264        LogEventKind::Heartbeat { .. } => {}
265        LogEventKind::Usage { .. } => {}
266        LogEventKind::UserEvent { message, .. } => {
267            parts.push(message.clone());
268        }
269        LogEventKind::SessionResult { result } => {
270            parts.push(result.clone());
271        }
272        LogEventKind::UsageLimitHit {
273            provider,
274            scope,
275            raw,
276            ..
277        } => {
278            parts.push(provider.clone());
279            parts.push(scope.clone());
280            if let Some(r) = raw {
281                parts.push(r.clone());
282            }
283        }
284        LogEventKind::UsageLimitResumed { resume_message, .. } => {
285            parts.push(resume_message.clone());
286        }
287        LogEventKind::UsageLimitResumeFailed { error, .. } => {
288            parts.push(error.clone());
289        }
290    }
291
292    parts.join(" ")
293}
294
295// ---------------------------------------------------------------------------
296// Snippet builder
297// ---------------------------------------------------------------------------
298
299fn make_snippet(text: &str, matcher: &TextMatcher, max_len: usize) -> String {
300    let offset = matcher.find_offset(text).unwrap_or(0);
301
302    let start = offset.saturating_sub(max_len / 4);
303    let end = (start + max_len).min(text.len());
304
305    // Clamp to char boundaries.
306    let start = text
307        .char_indices()
308        .map(|(i, _)| i)
309        .rfind(|&i| i <= start)
310        .unwrap_or(0);
311    let end = text
312        .char_indices()
313        .map(|(i, _)| i)
314        .find(|&i| i >= end)
315        .unwrap_or(text.len());
316
317    let mut snippet = String::new();
318    if start > 0 {
319        snippet.push_str("[...] ");
320    }
321    snippet.push_str(&text[start..end]);
322    if end < text.len() {
323        snippet.push_str(" [...]");
324    }
325    snippet
326}
327
328// ---------------------------------------------------------------------------
329// Metadata pre-filter
330// ---------------------------------------------------------------------------
331
332fn session_matches_query(entry: &SessionLogIndexEntry, query: &SearchQuery) -> bool {
333    // Provider filter
334    if let Some(ref p) = query.provider
335        && !entry.provider.eq_ignore_ascii_case(p)
336    {
337        return false;
338    }
339
340    // Session ID prefix filter
341    if let Some(ref sid) = query.session_id
342        && !entry.wrapper_session_id.starts_with(sid.as_str())
343    {
344        return false;
345    }
346
347    // Date range: skip sessions that definitely ended before `from`
348    if let Some(from) = query.from
349        && let Some(ref ended) = entry.ended_at
350        && let Ok(ended_dt) = DateTime::parse_from_rfc3339(ended)
351        && ended_dt.with_timezone(&Utc) < from
352    {
353        return false;
354    }
355
356    // Date range: skip sessions that started after `to`
357    if let Some(to) = query.to
358        && let Ok(started_dt) = DateTime::parse_from_rfc3339(&entry.started_at)
359        && started_dt.with_timezone(&Utc) > to
360    {
361        return false;
362    }
363
364    true
365}
366
367// ---------------------------------------------------------------------------
368// Event filter
369// ---------------------------------------------------------------------------
370
371fn event_matches_query(event: &AgentLogEvent, query: &SearchQuery, matcher: &TextMatcher) -> bool {
372    // Provider filter at event level
373    if let Some(ref p) = query.provider
374        && !event.provider.eq_ignore_ascii_case(p)
375    {
376        return false;
377    }
378
379    // Date range filters
380    if (query.from.is_some() || query.to.is_some())
381        && let Ok(event_dt) = DateTime::parse_from_rfc3339(&event.ts)
382    {
383        let event_utc = event_dt.with_timezone(&Utc);
384        if let Some(from) = query.from
385            && event_utc < from
386        {
387            return false;
388        }
389        if let Some(to) = query.to
390            && event_utc > to
391        {
392            return false;
393        }
394    }
395
396    // Tool kind / tool name / role filters
397    let has_tool_filter = query.tool.is_some() || query.tool_kind.is_some();
398    let has_role_filter = query.role.is_some();
399
400    if has_tool_filter {
401        match &event.kind {
402            LogEventKind::ToolCall {
403                tool_name,
404                tool_kind,
405                ..
406            } => {
407                if let Some(ref t) = query.tool
408                    && !tool_name.to_lowercase().contains(&t.to_lowercase())
409                {
410                    return false;
411                }
412                if let Some(ref tk) = query.tool_kind {
413                    let kind = tool_kind.unwrap_or_else(|| ToolKind::infer(tool_name));
414                    if kind != *tk {
415                        return false;
416                    }
417                }
418            }
419            LogEventKind::ToolResult {
420                tool_name,
421                tool_kind,
422                ..
423            } => {
424                if let Some(ref t) = query.tool {
425                    let name = tool_name.as_deref().unwrap_or("");
426                    if !name.to_lowercase().contains(&t.to_lowercase()) {
427                        return false;
428                    }
429                }
430                if let Some(ref tk) = query.tool_kind {
431                    let kind = tool_kind.unwrap_or_else(|| {
432                        tool_name
433                            .as_deref()
434                            .map(ToolKind::infer)
435                            .unwrap_or(ToolKind::Other)
436                    });
437                    if kind != *tk {
438                        return false;
439                    }
440                }
441            }
442            // Non-tool events are excluded when a tool filter is active
443            _ => return false,
444        }
445    }
446
447    if has_role_filter {
448        match &event.kind {
449            LogEventKind::UserMessage { role, .. } => {
450                if let Some(ref r) = query.role
451                    && !role.eq_ignore_ascii_case(r)
452                {
453                    return false;
454                }
455            }
456            // Non-message events are excluded when a role filter is active
457            // (unless combined with a tool filter, which we already handled above)
458            _ if !has_tool_filter => return false,
459            _ => {}
460        }
461    }
462
463    // Text filter
464    if matcher.has_filter() {
465        let text = extract_searchable_text(event);
466        if !matcher.is_match(&text) {
467            return false;
468        }
469    }
470
471    true
472}
473
474// ---------------------------------------------------------------------------
475// JSONL scanner
476// ---------------------------------------------------------------------------
477
478struct ScanResult {
479    events_scanned: usize,
480    matching_events: Vec<AgentLogEvent>,
481}
482
483fn scan_session(log_path: &Path, query: &SearchQuery, matcher: &TextMatcher) -> Result<ScanResult> {
484    let file = std::fs::File::open(log_path)
485        .with_context(|| format!("Failed to open log file: {}", log_path.display()))?;
486    let reader = BufReader::new(file);
487
488    let mut result = ScanResult {
489        events_scanned: 0,
490        matching_events: Vec::new(),
491    };
492
493    for line in reader.lines() {
494        let line =
495            line.with_context(|| format!("Failed to read line in {}", log_path.display()))?;
496        let line = line.trim();
497        if line.is_empty() {
498            continue;
499        }
500
501        let event: AgentLogEvent = match serde_json::from_str(line) {
502            Ok(e) => e,
503            Err(e) => {
504                log::debug!(
505                    "Skipping malformed JSONL line in {}: {}",
506                    log_path.display(),
507                    e
508                );
509                continue;
510            }
511        };
512
513        result.events_scanned += 1;
514
515        if event_matches_query(&event, query, matcher) {
516            result.matching_events.push(event);
517        }
518    }
519
520    Ok(result)
521}
522
523// ---------------------------------------------------------------------------
524// Session discovery
525// ---------------------------------------------------------------------------
526
527fn collect_candidate_sessions(
528    query: &SearchQuery,
529    zag_home: &Path,
530    cwd: &Path,
531) -> Result<Vec<(SessionLogIndexEntry, PathBuf)>> {
532    let projects_dir = zag_home.join("projects");
533    if !projects_dir.exists() {
534        return Ok(Vec::new());
535    }
536
537    // If tag filter is set, collect matching session IDs from session stores.
538    let tag_session_ids: Option<std::collections::HashSet<String>> = if query.tag.is_some() {
539        let store = if query.global {
540            crate::session::SessionStore::load_all().unwrap_or_default()
541        } else {
542            crate::session::SessionStore::load(Some(&cwd.to_string_lossy())).unwrap_or_default()
543        };
544        let tag = query.tag.as_deref().unwrap();
545        let matching = store.find_by_tag(tag);
546        Some(matching.into_iter().map(|e| e.session_id.clone()).collect())
547    } else {
548        None
549    };
550
551    let cwd_str = cwd.to_string_lossy().to_string();
552    let mut candidates: Vec<(SessionLogIndexEntry, PathBuf)> = Vec::new();
553    let mut seen_ids = std::collections::HashSet::new();
554
555    let read_dir = std::fs::read_dir(&projects_dir)
556        .with_context(|| format!("Failed to read {}", projects_dir.display()))?;
557
558    for entry in read_dir {
559        let project_dir = match entry {
560            Ok(e) => e.path(),
561            Err(_) => continue,
562        };
563        if !project_dir.is_dir() {
564            continue;
565        }
566
567        let index_path = project_dir.join("logs").join("index.json");
568        if !index_path.exists() {
569            continue;
570        }
571
572        let content = match std::fs::read_to_string(&index_path) {
573            Ok(c) => c,
574            Err(e) => {
575                log::warn!("Failed to read index {}: {}", index_path.display(), e);
576                continue;
577            }
578        };
579
580        let index: SessionLogIndex = match serde_json::from_str(&content) {
581            Ok(i) => i,
582            Err(e) => {
583                log::warn!("Malformed index {}: {}", index_path.display(), e);
584                continue;
585            }
586        };
587
588        for session_entry in index.sessions {
589            // Scope filter: in non-global mode, only include sessions whose workspace_path
590            // is within the current directory tree.
591            if !query.global {
592                let in_scope = match &session_entry.workspace_path {
593                    Some(wp) => {
594                        // Match if workspace is the cwd or a subdirectory of cwd
595                        wp == &cwd_str
596                            || wp.starts_with(&format!("{cwd_str}/"))
597                            || wp.starts_with(&format!("{cwd_str}\\"))
598                    }
599                    None => false,
600                };
601                if !in_scope {
602                    continue;
603                }
604            }
605
606            // Metadata pre-filter (provider, session ID, dates)
607            if !session_matches_query(&session_entry, query) {
608                continue;
609            }
610
611            // Tag filter: only include sessions matching the tag
612            if let Some(ref allowed) = tag_session_ids {
613                if !allowed.contains(&session_entry.wrapper_session_id) {
614                    continue;
615                }
616            }
617
618            // Deduplicate by session ID
619            if !seen_ids.insert(session_entry.wrapper_session_id.clone()) {
620                continue;
621            }
622
623            let log_path = PathBuf::from(&session_entry.log_path);
624            candidates.push((session_entry, log_path));
625        }
626    }
627
628    // Sort by started_at so results are in chronological order
629    candidates.sort_by(|a, b| a.0.started_at.cmp(&b.0.started_at));
630
631    Ok(candidates)
632}
633
634// ---------------------------------------------------------------------------
635// Main entry point
636// ---------------------------------------------------------------------------
637
638/// Search through session logs matching the given query.
639pub fn search(query: &SearchQuery, zag_home: &Path, cwd: &Path) -> Result<SearchResults> {
640    let matcher = TextMatcher::build(query)?;
641
642    let candidates = collect_candidate_sessions(query, zag_home, cwd)?;
643
644    let mut results = SearchResults::default();
645
646    'outer: for (entry, log_path) in candidates {
647        results.total_sessions_scanned += 1;
648
649        if !log_path.exists() {
650            results.total_files_missing += 1;
651            log::debug!("Log file missing: {}", log_path.display());
652            continue;
653        }
654
655        let scan = match scan_session(&log_path, query, &matcher) {
656            Ok(s) => s,
657            Err(e) => {
658                log::warn!("Failed to scan {}: {}", log_path.display(), e);
659                continue;
660            }
661        };
662
663        results.total_events_scanned += scan.events_scanned;
664
665        for event in scan.matching_events {
666            let text = extract_searchable_text(&event);
667            let snippet = make_snippet(&text, &matcher, 200);
668
669            results.matches.push(SearchMatch {
670                session_id: entry.wrapper_session_id.clone(),
671                provider: entry.provider.clone(),
672                started_at: entry.started_at.clone(),
673                ended_at: entry.ended_at.clone(),
674                workspace_path: entry.workspace_path.clone(),
675                command: entry.command.clone(),
676                event,
677                snippet,
678            });
679
680            if let Some(limit) = query.limit
681                && results.matches.len() >= limit
682            {
683                break 'outer;
684            }
685        }
686    }
687
688    Ok(results)
689}
690
691#[cfg(test)]
692#[path = "search_tests.rs"]
693mod tests;