Skip to main content

zag_agent/
search.rs

1use crate::session_log::{
2    AgentLogEvent, LogEventKind, SessionLogIndex, SessionLogIndexEntry, ToolKind,
3};
4use anyhow::{Context, Result, bail};
5use chrono::{DateTime, Duration, NaiveDate, Utc};
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11/// Query parameters for searching session logs.
12#[derive(Debug, Default)]
13pub struct SearchQuery {
14    /// Full-text pattern (literal substring or regex). None matches all events.
15    pub text: Option<String>,
16    /// Case-insensitive match (default: true).
17    pub case_insensitive: bool,
18    /// Treat `text` as a regular expression (default: false → literal substring).
19    pub use_regex: bool,
20    /// Filter by provider name (case-insensitive).
21    pub provider: Option<String>,
22    /// Filter by message role — only applies to `UserMessage` events.
23    pub role: Option<String>,
24    /// Filter by tool name (case-insensitive substring) — only applies to tool events.
25    pub tool: Option<String>,
26    /// Filter by tool kind — only applies to `ToolCall`/`ToolResult` events.
27    pub tool_kind: Option<ToolKind>,
28    /// Show only events at or after this timestamp.
29    pub from: Option<DateTime<Utc>>,
30    /// Show only events at or before this timestamp.
31    pub to: Option<DateTime<Utc>>,
32    /// Restrict search to a specific session ID (prefix match).
33    pub session_id: Option<String>,
34    /// Filter by session tag (exact match, case-insensitive).
35    pub tag: Option<String>,
36    /// Search all sessions across all projects (default: current project and sub-projects).
37    pub global: bool,
38    /// Stop after collecting this many matches.
39    pub limit: Option<usize>,
40}
41
42impl SearchQuery {
43    pub fn new() -> Self {
44        Self {
45            case_insensitive: true,
46            ..Default::default()
47        }
48    }
49}
50
51/// A single event that matched the search query.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct SearchMatch {
54    pub session_id: String,
55    pub provider: String,
56    pub started_at: String,
57    pub ended_at: Option<String>,
58    pub workspace_path: Option<String>,
59    pub command: Option<String>,
60    pub event: AgentLogEvent,
61    /// Short excerpt (~200 chars) of the matched text.
62    pub snippet: String,
63}
64
65/// Aggregate results from a search.
66#[derive(Debug, Default)]
67pub struct SearchResults {
68    pub total_sessions_scanned: usize,
69    pub total_events_scanned: usize,
70    pub total_files_missing: usize,
71    pub matches: Vec<SearchMatch>,
72}
73
74// ---------------------------------------------------------------------------
75// Date parsing
76// ---------------------------------------------------------------------------
77
78/// Parse a date/time string for `--from` / `--to` filters.
79///
80/// Accepted formats:
81/// - RFC 3339 (e.g. `2024-01-15T10:30:00Z`)
82/// - Date only (e.g. `2024-01-15`) — interpreted as start of day UTC
83/// - Relative offset from now: `1h`, `2d`, `3w`, `1m` (hours/days/weeks/months)
84pub fn parse_date_arg(s: &str) -> Result<DateTime<Utc>> {
85    // Try RFC 3339 first.
86    if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
87        return Ok(dt.with_timezone(&Utc));
88    }
89
90    // Try date-only (YYYY-MM-DD).
91    if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
92        let dt = date
93            .and_hms_opt(0, 0, 0)
94            .expect("midnight is always valid")
95            .and_utc();
96        return Ok(dt);
97    }
98
99    // Try relative offset: leading digits followed by a unit character.
100    let s_trimmed = s.trim();
101    if !s_trimmed.is_empty() {
102        let unit = s_trimmed.chars().last().unwrap();
103        let digits = &s_trimmed[..s_trimmed.len() - unit.len_utf8()];
104        if let Ok(n) = digits.parse::<i64>() {
105            let delta = match unit {
106                'h' => Duration::hours(n),
107                'd' => Duration::days(n),
108                'w' => Duration::weeks(n),
109                'm' => Duration::days(n * 30),
110                _ => bail!(
111                    "Unknown time unit '{}'. Use h (hours), d (days), w (weeks), or m (months).",
112                    unit
113                ),
114            };
115            return Ok(Utc::now() - delta);
116        }
117    }
118
119    bail!(
120        "Cannot parse date '{}'. Use RFC 3339 (2024-01-15T10:30:00Z), date only (2024-01-15), or relative (1h, 2d, 3w, 1m).",
121        s
122    )
123}
124
125// ---------------------------------------------------------------------------
126// Text matcher
127// ---------------------------------------------------------------------------
128
129enum TextMatcher {
130    /// No text filter — everything matches.
131    None,
132    /// Case-insensitive literal substring.
133    Literal(String),
134    /// Compiled regex.
135    Pattern(Regex),
136}
137
138impl TextMatcher {
139    fn build(query: &SearchQuery) -> Result<Self> {
140        let Some(ref text) = query.text else {
141            return Ok(Self::None);
142        };
143        if query.use_regex {
144            let pattern = if query.case_insensitive {
145                format!("(?i){}", text)
146            } else {
147                text.clone()
148            };
149            let re = Regex::new(&pattern)
150                .with_context(|| format!("Invalid regex pattern: '{}'", text))?;
151            Ok(Self::Pattern(re))
152        } else if query.case_insensitive {
153            Ok(Self::Literal(text.to_lowercase()))
154        } else {
155            Ok(Self::Literal(text.clone()))
156        }
157    }
158
159    fn is_match(&self, haystack: &str) -> bool {
160        match self {
161            Self::None => true,
162            Self::Literal(needle) => haystack.to_lowercase().contains(needle.as_str()),
163            Self::Pattern(re) => re.is_match(haystack),
164        }
165    }
166
167    fn find_offset(&self, haystack: &str) -> Option<usize> {
168        match self {
169            Self::None => Some(0),
170            Self::Literal(needle) => haystack.to_lowercase().find(needle.as_str()),
171            Self::Pattern(re) => re.find(haystack).map(|m| m.start()),
172        }
173    }
174
175    fn has_filter(&self) -> bool {
176        !matches!(self, Self::None)
177    }
178}
179
180// ---------------------------------------------------------------------------
181// Content extraction
182// ---------------------------------------------------------------------------
183
184fn extract_searchable_text(event: &AgentLogEvent) -> String {
185    let mut parts: Vec<String> = Vec::new();
186
187    match &event.kind {
188        LogEventKind::SessionStarted {
189            command,
190            model,
191            cwd,
192            ..
193        } => {
194            parts.push(command.clone());
195            if let Some(m) = model {
196                parts.push(m.clone());
197            }
198            if let Some(c) = cwd {
199                parts.push(c.clone());
200            }
201        }
202        LogEventKind::UserMessage { role, content, .. } => {
203            parts.push(role.clone());
204            parts.push(content.clone());
205        }
206        LogEventKind::AssistantMessage { content, .. } => {
207            parts.push(content.clone());
208        }
209        LogEventKind::Reasoning { content, .. } => {
210            parts.push(content.clone());
211        }
212        LogEventKind::ToolCall {
213            tool_name, input, ..
214        } => {
215            parts.push(tool_name.clone());
216            if let Some(v) = input {
217                parts.push(v.to_string());
218            }
219        }
220        LogEventKind::ToolResult {
221            tool_name,
222            output,
223            error,
224            data,
225            ..
226        } => {
227            if let Some(n) = tool_name {
228                parts.push(n.clone());
229            }
230            if let Some(o) = output {
231                parts.push(o.clone());
232            }
233            if let Some(e) = error {
234                parts.push(e.clone());
235            }
236            if let Some(d) = data {
237                parts.push(d.to_string());
238            }
239        }
240        LogEventKind::Permission {
241            tool_name,
242            description,
243            ..
244        } => {
245            parts.push(tool_name.clone());
246            parts.push(description.clone());
247        }
248        LogEventKind::ProviderStatus { message, .. } => {
249            parts.push(message.clone());
250        }
251        LogEventKind::Stderr { message } => {
252            parts.push(message.clone());
253        }
254        LogEventKind::ParseWarning { message, raw } => {
255            parts.push(message.clone());
256            if let Some(r) = raw {
257                parts.push(r.clone());
258            }
259        }
260        LogEventKind::SessionEnded { error, .. } => {
261            if let Some(e) = error {
262                parts.push(e.clone());
263            }
264        }
265        LogEventKind::SessionCleared { .. } => {}
266        LogEventKind::Heartbeat { .. } => {}
267        LogEventKind::Usage { .. } => {}
268        LogEventKind::UserEvent { message, .. } => {
269            parts.push(message.clone());
270        }
271    }
272
273    parts.join(" ")
274}
275
276// ---------------------------------------------------------------------------
277// Snippet builder
278// ---------------------------------------------------------------------------
279
280fn make_snippet(text: &str, matcher: &TextMatcher, max_len: usize) -> String {
281    let offset = matcher.find_offset(text).unwrap_or(0);
282
283    let start = offset.saturating_sub(max_len / 4);
284    let end = (start + max_len).min(text.len());
285
286    // Clamp to char boundaries.
287    let start = text
288        .char_indices()
289        .map(|(i, _)| i)
290        .rfind(|&i| i <= start)
291        .unwrap_or(0);
292    let end = text
293        .char_indices()
294        .map(|(i, _)| i)
295        .find(|&i| i >= end)
296        .unwrap_or(text.len());
297
298    let mut snippet = String::new();
299    if start > 0 {
300        snippet.push_str("[...] ");
301    }
302    snippet.push_str(&text[start..end]);
303    if end < text.len() {
304        snippet.push_str(" [...]");
305    }
306    snippet
307}
308
309// ---------------------------------------------------------------------------
310// Metadata pre-filter
311// ---------------------------------------------------------------------------
312
313fn session_matches_query(entry: &SessionLogIndexEntry, query: &SearchQuery) -> bool {
314    // Provider filter
315    if let Some(ref p) = query.provider
316        && !entry.provider.eq_ignore_ascii_case(p)
317    {
318        return false;
319    }
320
321    // Session ID prefix filter
322    if let Some(ref sid) = query.session_id
323        && !entry.wrapper_session_id.starts_with(sid.as_str())
324    {
325        return false;
326    }
327
328    // Date range: skip sessions that definitely ended before `from`
329    if let Some(from) = query.from
330        && let Some(ref ended) = entry.ended_at
331        && let Ok(ended_dt) = DateTime::parse_from_rfc3339(ended)
332        && ended_dt.with_timezone(&Utc) < from
333    {
334        return false;
335    }
336
337    // Date range: skip sessions that started after `to`
338    if let Some(to) = query.to
339        && let Ok(started_dt) = DateTime::parse_from_rfc3339(&entry.started_at)
340        && started_dt.with_timezone(&Utc) > to
341    {
342        return false;
343    }
344
345    true
346}
347
348// ---------------------------------------------------------------------------
349// Event filter
350// ---------------------------------------------------------------------------
351
352fn event_matches_query(event: &AgentLogEvent, query: &SearchQuery, matcher: &TextMatcher) -> bool {
353    // Provider filter at event level
354    if let Some(ref p) = query.provider
355        && !event.provider.eq_ignore_ascii_case(p)
356    {
357        return false;
358    }
359
360    // Date range filters
361    if (query.from.is_some() || query.to.is_some())
362        && let Ok(event_dt) = DateTime::parse_from_rfc3339(&event.ts)
363    {
364        let event_utc = event_dt.with_timezone(&Utc);
365        if let Some(from) = query.from
366            && event_utc < from
367        {
368            return false;
369        }
370        if let Some(to) = query.to
371            && event_utc > to
372        {
373            return false;
374        }
375    }
376
377    // Tool kind / tool name / role filters
378    let has_tool_filter = query.tool.is_some() || query.tool_kind.is_some();
379    let has_role_filter = query.role.is_some();
380
381    if has_tool_filter {
382        match &event.kind {
383            LogEventKind::ToolCall {
384                tool_name,
385                tool_kind,
386                ..
387            } => {
388                if let Some(ref t) = query.tool
389                    && !tool_name.to_lowercase().contains(&t.to_lowercase())
390                {
391                    return false;
392                }
393                if let Some(ref tk) = query.tool_kind {
394                    let kind = tool_kind.unwrap_or_else(|| ToolKind::infer(tool_name));
395                    if kind != *tk {
396                        return false;
397                    }
398                }
399            }
400            LogEventKind::ToolResult {
401                tool_name,
402                tool_kind,
403                ..
404            } => {
405                if let Some(ref t) = query.tool {
406                    let name = tool_name.as_deref().unwrap_or("");
407                    if !name.to_lowercase().contains(&t.to_lowercase()) {
408                        return false;
409                    }
410                }
411                if let Some(ref tk) = query.tool_kind {
412                    let kind = tool_kind.unwrap_or_else(|| {
413                        tool_name
414                            .as_deref()
415                            .map(ToolKind::infer)
416                            .unwrap_or(ToolKind::Other)
417                    });
418                    if kind != *tk {
419                        return false;
420                    }
421                }
422            }
423            // Non-tool events are excluded when a tool filter is active
424            _ => return false,
425        }
426    }
427
428    if has_role_filter {
429        match &event.kind {
430            LogEventKind::UserMessage { role, .. } => {
431                if let Some(ref r) = query.role
432                    && !role.eq_ignore_ascii_case(r)
433                {
434                    return false;
435                }
436            }
437            // Non-message events are excluded when a role filter is active
438            // (unless combined with a tool filter, which we already handled above)
439            _ if !has_tool_filter => return false,
440            _ => {}
441        }
442    }
443
444    // Text filter
445    if matcher.has_filter() {
446        let text = extract_searchable_text(event);
447        if !matcher.is_match(&text) {
448            return false;
449        }
450    }
451
452    true
453}
454
455// ---------------------------------------------------------------------------
456// JSONL scanner
457// ---------------------------------------------------------------------------
458
459struct ScanResult {
460    events_scanned: usize,
461    matching_events: Vec<AgentLogEvent>,
462}
463
464fn scan_session(log_path: &Path, query: &SearchQuery, matcher: &TextMatcher) -> Result<ScanResult> {
465    let file = std::fs::File::open(log_path)
466        .with_context(|| format!("Failed to open log file: {}", log_path.display()))?;
467    let reader = BufReader::new(file);
468
469    let mut result = ScanResult {
470        events_scanned: 0,
471        matching_events: Vec::new(),
472    };
473
474    for line in reader.lines() {
475        let line =
476            line.with_context(|| format!("Failed to read line in {}", log_path.display()))?;
477        let line = line.trim();
478        if line.is_empty() {
479            continue;
480        }
481
482        let event: AgentLogEvent = match serde_json::from_str(line) {
483            Ok(e) => e,
484            Err(e) => {
485                log::debug!(
486                    "Skipping malformed JSONL line in {}: {}",
487                    log_path.display(),
488                    e
489                );
490                continue;
491            }
492        };
493
494        result.events_scanned += 1;
495
496        if event_matches_query(&event, query, matcher) {
497            result.matching_events.push(event);
498        }
499    }
500
501    Ok(result)
502}
503
504// ---------------------------------------------------------------------------
505// Session discovery
506// ---------------------------------------------------------------------------
507
508fn collect_candidate_sessions(
509    query: &SearchQuery,
510    zag_home: &Path,
511    cwd: &Path,
512) -> Result<Vec<(SessionLogIndexEntry, PathBuf)>> {
513    let projects_dir = zag_home.join("projects");
514    if !projects_dir.exists() {
515        return Ok(Vec::new());
516    }
517
518    // If tag filter is set, collect matching session IDs from session stores.
519    let tag_session_ids: Option<std::collections::HashSet<String>> = if query.tag.is_some() {
520        let store = if query.global {
521            crate::session::SessionStore::load_all().unwrap_or_default()
522        } else {
523            crate::session::SessionStore::load(Some(&cwd.to_string_lossy())).unwrap_or_default()
524        };
525        let tag = query.tag.as_deref().unwrap();
526        let matching = store.find_by_tag(tag);
527        Some(matching.into_iter().map(|e| e.session_id.clone()).collect())
528    } else {
529        None
530    };
531
532    let cwd_str = cwd.to_string_lossy().to_string();
533    let mut candidates: Vec<(SessionLogIndexEntry, PathBuf)> = Vec::new();
534    let mut seen_ids = std::collections::HashSet::new();
535
536    let read_dir = std::fs::read_dir(&projects_dir)
537        .with_context(|| format!("Failed to read {}", projects_dir.display()))?;
538
539    for entry in read_dir {
540        let project_dir = match entry {
541            Ok(e) => e.path(),
542            Err(_) => continue,
543        };
544        if !project_dir.is_dir() {
545            continue;
546        }
547
548        let index_path = project_dir.join("logs").join("index.json");
549        if !index_path.exists() {
550            continue;
551        }
552
553        let content = match std::fs::read_to_string(&index_path) {
554            Ok(c) => c,
555            Err(e) => {
556                log::warn!("Failed to read index {}: {}", index_path.display(), e);
557                continue;
558            }
559        };
560
561        let index: SessionLogIndex = match serde_json::from_str(&content) {
562            Ok(i) => i,
563            Err(e) => {
564                log::warn!("Malformed index {}: {}", index_path.display(), e);
565                continue;
566            }
567        };
568
569        for session_entry in index.sessions {
570            // Scope filter: in non-global mode, only include sessions whose workspace_path
571            // is within the current directory tree.
572            if !query.global {
573                let in_scope = match &session_entry.workspace_path {
574                    Some(wp) => {
575                        // Match if workspace is the cwd or a subdirectory of cwd
576                        wp == &cwd_str
577                            || wp.starts_with(&format!("{}/", cwd_str))
578                            || wp.starts_with(&format!("{}\\", cwd_str))
579                    }
580                    None => false,
581                };
582                if !in_scope {
583                    continue;
584                }
585            }
586
587            // Metadata pre-filter (provider, session ID, dates)
588            if !session_matches_query(&session_entry, query) {
589                continue;
590            }
591
592            // Tag filter: only include sessions matching the tag
593            if let Some(ref allowed) = tag_session_ids {
594                if !allowed.contains(&session_entry.wrapper_session_id) {
595                    continue;
596                }
597            }
598
599            // Deduplicate by session ID
600            if !seen_ids.insert(session_entry.wrapper_session_id.clone()) {
601                continue;
602            }
603
604            let log_path = PathBuf::from(&session_entry.log_path);
605            candidates.push((session_entry, log_path));
606        }
607    }
608
609    // Sort by started_at so results are in chronological order
610    candidates.sort_by(|a, b| a.0.started_at.cmp(&b.0.started_at));
611
612    Ok(candidates)
613}
614
615// ---------------------------------------------------------------------------
616// Main entry point
617// ---------------------------------------------------------------------------
618
619/// Search through session logs matching the given query.
620pub fn search(query: &SearchQuery, zag_home: &Path, cwd: &Path) -> Result<SearchResults> {
621    let matcher = TextMatcher::build(query)?;
622
623    let candidates = collect_candidate_sessions(query, zag_home, cwd)?;
624
625    let mut results = SearchResults::default();
626
627    'outer: for (entry, log_path) in candidates {
628        results.total_sessions_scanned += 1;
629
630        if !log_path.exists() {
631            results.total_files_missing += 1;
632            log::debug!("Log file missing: {}", log_path.display());
633            continue;
634        }
635
636        let scan = match scan_session(&log_path, query, &matcher) {
637            Ok(s) => s,
638            Err(e) => {
639                log::warn!("Failed to scan {}: {}", log_path.display(), e);
640                continue;
641            }
642        };
643
644        results.total_events_scanned += scan.events_scanned;
645
646        for event in scan.matching_events {
647            let text = extract_searchable_text(&event);
648            let snippet = make_snippet(&text, &matcher, 200);
649
650            results.matches.push(SearchMatch {
651                session_id: entry.wrapper_session_id.clone(),
652                provider: entry.provider.clone(),
653                started_at: entry.started_at.clone(),
654                ended_at: entry.ended_at.clone(),
655                workspace_path: entry.workspace_path.clone(),
656                command: entry.command.clone(),
657                event,
658                snippet,
659            });
660
661            if let Some(limit) = query.limit
662                && results.matches.len() >= limit
663            {
664                break 'outer;
665            }
666        }
667    }
668
669    Ok(results)
670}
671
672#[cfg(test)]
673#[path = "search_tests.rs"]
674mod tests;