mi6_core/context/
github.rs

1//! GitHub context extraction from Bash tool events.
2//!
3//! This module parses Bash commands and their output to extract GitHub issue/PR
4//! references and worktree information. This provides more accurate context than
5//! branch name parsing alone, capturing:
6//!
7//! - Explicit `gh issue/pr` commands
8//! - GitHub URLs in command output (e.g., from `gh pr create`)
9//! - `Fixes #N` references in PR bodies
10//! - Git worktree operations
11//!
12//! # Usage
13//!
14//! ```rust,ignore
15//! use mi6_core::github::{extract_context, SessionContextUpdate};
16//! use std::path::Path;
17//!
18//! let cwd = Path::new("/home/user/project");
19//! let update = extract_context(
20//!     "gh issue view 52 --repo paradigmxyz/mi6",
21//!     None,
22//!     cwd,
23//! );
24//!
25//! assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
26//! assert_eq!(update.github_issue, Some(52));
27//! ```
28
29use std::path::Path;
30use std::sync::LazyLock;
31
32use regex::Regex;
33
34/// Context update derived from a Bash tool event.
35///
36/// Each field is `Some` only if the parsing found a new value.
37/// Use with COALESCE semantics to update session fields.
38#[derive(Debug, Default, Clone, PartialEq, Eq)]
39pub struct SessionContextUpdate {
40    /// Repository in `owner/repo` format
41    pub github_repo: Option<String>,
42    /// GitHub issue number
43    pub github_issue: Option<i32>,
44    /// GitHub PR number
45    pub github_pr: Option<i32>,
46    /// Absolute path to git worktree
47    pub worktree_path: Option<String>,
48    /// Branch name in the worktree
49    pub worktree_branch: Option<String>,
50}
51
52impl SessionContextUpdate {
53    /// Returns true if this update has any values
54    pub fn has_values(&self) -> bool {
55        self.github_repo.is_some()
56            || self.github_issue.is_some()
57            || self.github_pr.is_some()
58            || self.worktree_path.is_some()
59            || self.worktree_branch.is_some()
60    }
61
62    /// Merge another update into this one (other values take precedence)
63    pub fn merge(&mut self, other: SessionContextUpdate) {
64        if other.github_repo.is_some() {
65            self.github_repo = other.github_repo;
66        }
67        if other.github_issue.is_some() {
68            self.github_issue = other.github_issue;
69        }
70        if other.github_pr.is_some() {
71            self.github_pr = other.github_pr;
72        }
73        if other.worktree_path.is_some() {
74            self.worktree_path = other.worktree_path;
75        }
76        if other.worktree_branch.is_some() {
77            self.worktree_branch = other.worktree_branch;
78        }
79    }
80}
81
82// Lazy-compiled regex patterns.
83// These patterns are static literals that are verified at compile time,
84// so expect() is safe here.
85
86#[expect(
87    clippy::expect_used,
88    reason = "static regex pattern verified at compile time"
89)]
90static GH_ISSUE_PR_RE: LazyLock<Regex> = LazyLock::new(|| {
91    // gh issue/pr {view,checks,diff,merge,close,edit,reopen,comment} N [--repo R]
92    Regex::new(
93        r"gh\s+(issue|pr)\s+(view|checks|diff|merge|close|edit|reopen|comment)\s+(\d+)(?:.*--repo\s+([^\s]+))?"
94    ).expect("invalid regex")
95});
96
97#[expect(
98    clippy::expect_used,
99    reason = "static regex pattern verified at compile time"
100)]
101static GITHUB_URL_RE: LazyLock<Regex> = LazyLock::new(|| {
102    // https://github.com/owner/repo/issues/N or /pull/N
103    Regex::new(r"https://github\.com/([^/]+/[^/]+)/(issues|pull)/(\d+)").expect("invalid regex")
104});
105
106#[expect(
107    clippy::expect_used,
108    reason = "static regex pattern verified at compile time"
109)]
110static CLOSES_RE: LazyLock<Regex> = LazyLock::new(|| {
111    // Closes/Fixes/Resolves #N (case-insensitive)
112    Regex::new(r"(?i)\b(closes?|fixes?|resolves?)\s*#(\d+)").expect("invalid regex")
113});
114
115#[expect(
116    clippy::expect_used,
117    reason = "static regex pattern verified at compile time"
118)]
119static GH_PR_CREATE_BODY_RE: LazyLock<Regex> = LazyLock::new(|| {
120    // gh pr create ... --body "..." or --body '...'
121    // Capture the body content for scanning
122    Regex::new(r#"gh\s+pr\s+create\s+.*--body\s+(?:"([^"]*)"|'([^']*)')"#).expect("invalid regex")
123});
124
125#[expect(
126    clippy::expect_used,
127    reason = "static regex pattern verified at compile time"
128)]
129static GIT_WORKTREE_ADD_RE: LazyLock<Regex> = LazyLock::new(|| {
130    // git worktree add [-f] <path> [-b <branch>] [<commit>]
131    Regex::new(r"git\s+worktree\s+add\s+(?:-f\s+)?([^\s]+)(?:.*-b\s+([^\s]+))?")
132        .expect("invalid regex")
133});
134
135#[expect(
136    clippy::expect_used,
137    reason = "static regex pattern verified at compile time"
138)]
139static GIT_C_PATH_RE: LazyLock<Regex> = LazyLock::new(|| {
140    // git -C <path> ...
141    Regex::new(r"git\s+-C\s+([^\s]+)").expect("invalid regex")
142});
143
144#[expect(
145    clippy::expect_used,
146    reason = "static regex pattern verified at compile time"
147)]
148static CD_PATH_RE: LazyLock<Regex> = LazyLock::new(|| {
149    // cd <path> with optional quotes
150    Regex::new(r#"cd\s+(?:"([^"]+)"|'([^']+)'|([^\s;&|]+))"#).expect("invalid regex")
151});
152
153#[expect(
154    clippy::expect_used,
155    reason = "static regex pattern verified at compile time"
156)]
157static GH_CREATE_RE: LazyLock<Regex> = LazyLock::new(|| {
158    // gh issue create or gh pr create - commands that output URLs we want to capture
159    Regex::new(r"gh\s+(issue|pr)\s+create").expect("invalid regex")
160});
161
162/// Parse a Bash command for gh issue/pr references.
163///
164/// Handles commands like:
165/// - `gh issue view 52`
166/// - `gh pr checks 86 --repo paradigmxyz/mi6`
167/// - `gh issue close 42`
168pub fn parse_gh_command(command: &str) -> SessionContextUpdate {
169    let mut update = SessionContextUpdate::default();
170
171    // Split command on && || ; to handle chained commands
172    for part in split_chained_commands(command) {
173        if let Some(caps) = GH_ISSUE_PR_RE.captures(part) {
174            let cmd_type = caps.get(1).map(|m| m.as_str());
175            let number_str = caps.get(3).map(|m| m.as_str());
176            let repo = caps.get(4).map(|m| m.as_str().to_string());
177
178            if let Some(num) = number_str.and_then(|s| s.parse::<i32>().ok()) {
179                match cmd_type {
180                    Some("issue") => update.github_issue = Some(num),
181                    Some("pr") => update.github_pr = Some(num),
182                    _ => {}
183                }
184            }
185
186            if let Some(r) = repo {
187                update.github_repo = Some(r);
188            }
189        }
190
191        // Also check for gh pr create --body "... Fixes #N ..."
192        if let Some(caps) = GH_PR_CREATE_BODY_RE.captures(part) {
193            let body = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
194            if let Some(body_text) = body {
195                let issues = parse_closes_references(body_text);
196                if let Some(&first_issue) = issues.first() {
197                    update.github_issue = Some(first_issue);
198                }
199            }
200        }
201    }
202
203    update
204}
205
206/// Parse tool output for GitHub URLs.
207///
208/// Extracts repo, issue, and PR from URLs like:
209/// - `https://github.com/paradigmxyz/mi6/issues/236`
210/// - `https://github.com/paradigmxyz/mi6/pull/237`
211pub fn parse_github_urls(output: &str) -> SessionContextUpdate {
212    let mut update = SessionContextUpdate::default();
213
214    for caps in GITHUB_URL_RE.captures_iter(output) {
215        let repo = caps.get(1).map(|m| m.as_str().to_string());
216        let url_type = caps.get(2).map(|m| m.as_str());
217        let number_str = caps.get(3).map(|m| m.as_str());
218
219        if let Some(num) = number_str.and_then(|s| s.parse::<i32>().ok()) {
220            match url_type {
221                Some("issues") => update.github_issue = Some(num),
222                Some("pull") => update.github_pr = Some(num),
223                _ => {}
224            }
225        }
226
227        if let Some(r) = repo {
228            update.github_repo = Some(r);
229        }
230    }
231
232    update
233}
234
235/// Parse PR body text for "Closes #N" references.
236///
237/// Returns a list of issue numbers found.
238pub fn parse_closes_references(body: &str) -> Vec<i32> {
239    CLOSES_RE
240        .captures_iter(body)
241        .filter_map(|caps| caps.get(2).and_then(|m| m.as_str().parse::<i32>().ok()))
242        .collect()
243}
244
245/// Parse git worktree add command.
246///
247/// Extracts path and optional branch from commands like:
248/// - `git worktree add ../foo-issue-42`
249/// - `git worktree add ../foo -b fix/issue-42`
250/// - `git worktree add -f /path/to/worktree`
251pub fn parse_worktree_add(command: &str, cwd: &Path) -> Option<(String, Option<String>)> {
252    for part in split_chained_commands(command) {
253        if let Some(caps) = GIT_WORKTREE_ADD_RE.captures(part) {
254            let path_str = caps.get(1)?.as_str();
255            let branch = caps.get(2).map(|m| m.as_str().to_string());
256
257            // Resolve path relative to cwd
258            let resolved = resolve_path(path_str, cwd);
259            return Some((resolved, branch));
260        }
261    }
262    None
263}
264
265/// Parse `git -C <path>` from a command.
266///
267/// Returns the path argument if found.
268pub fn parse_git_c_path(command: &str, cwd: &Path) -> Option<String> {
269    for part in split_chained_commands(command) {
270        if let Some(caps) = GIT_C_PATH_RE.captures(part) {
271            let path_str = caps.get(1)?.as_str();
272            return Some(resolve_path(path_str, cwd));
273        }
274    }
275    None
276}
277
278/// Parse cd command for directory changes.
279///
280/// Handles quoted and unquoted paths:
281/// - `cd /path/to/dir`
282/// - `cd "/path with spaces/dir"`
283/// - `cd '/path/dir'`
284pub fn parse_cd_path(command: &str, cwd: &Path) -> Option<String> {
285    for part in split_chained_commands(command) {
286        if let Some(caps) = CD_PATH_RE.captures(part) {
287            // Match groups: 1=double-quoted, 2=single-quoted, 3=unquoted
288            let path_str = caps
289                .get(1)
290                .or_else(|| caps.get(2))
291                .or_else(|| caps.get(3))?
292                .as_str();
293            return Some(resolve_path(path_str, cwd));
294        }
295    }
296    None
297}
298
299/// Check if the command is one that creates GitHub resources and outputs URLs.
300///
301/// Only these commands should have their output parsed for GitHub URLs:
302/// - `gh issue create` - outputs the created issue URL
303/// - `gh pr create` - outputs the created PR URL
304///
305/// Other commands (like `gh run view`, `cat`, etc.) may have GitHub URLs in their
306/// output, but these are incidental and shouldn't be treated as session context.
307fn should_parse_output_urls(command: &str) -> bool {
308    for part in split_chained_commands(command) {
309        if GH_CREATE_RE.is_match(part) {
310            return true;
311        }
312    }
313    false
314}
315
316/// Extract all context from a Bash tool event.
317///
318/// Combines command parsing and output parsing to extract:
319/// - GitHub repo, issue, and PR references
320/// - Worktree path and branch
321///
322/// # Arguments
323/// * `command` - The Bash command (from event payload)
324/// * `output` - The tool output (from event tool_output), if any
325/// * `cwd` - Current working directory for resolving relative paths
326pub fn extract_context(command: &str, output: Option<&str>, cwd: &Path) -> SessionContextUpdate {
327    let mut update = SessionContextUpdate::default();
328
329    // Parse command for gh commands
330    update.merge(parse_gh_command(command));
331
332    // Parse output for GitHub URLs, but only for creation commands.
333    // This prevents incidental URLs (e.g., in CI logs, code comments) from
334    // being treated as session context.
335    if let Some(out) = output
336        && should_parse_output_urls(command)
337    {
338        update.merge(parse_github_urls(out));
339    }
340
341    // Parse worktree operations
342    if let Some((path, branch)) = parse_worktree_add(command, cwd) {
343        update.worktree_path = Some(path);
344        update.worktree_branch = branch;
345    }
346
347    // Parse git -C path (updates worktree_path)
348    if let Some(path) = parse_git_c_path(command, cwd) {
349        update.worktree_path = Some(path);
350    }
351
352    // Parse cd commands
353    if let Some(path) = parse_cd_path(command, cwd) {
354        update.worktree_path = Some(path);
355    }
356
357    update
358}
359
360/// Split command on && || ; to handle chained commands.
361fn split_chained_commands(command: &str) -> Vec<&str> {
362    // Split on && || ; but not inside quotes
363    // For simplicity, we do a basic split that handles most cases
364    let mut parts = Vec::new();
365    let mut current = command;
366
367    // Simple split that works for most shell commands
368    while !current.is_empty() {
369        // Find the next delimiter
370        let delim_pos = current
371            .find("&&")
372            .map(|p| (p, 2))
373            .or_else(|| current.find("||").map(|p| (p, 2)))
374            .or_else(|| current.find(';').map(|p| (p, 1)));
375
376        if let Some((pos, len)) = delim_pos {
377            let part = current[..pos].trim();
378            if !part.is_empty() {
379                parts.push(part);
380            }
381            current = &current[pos + len..];
382        } else {
383            let part = current.trim();
384            if !part.is_empty() {
385                parts.push(part);
386            }
387            break;
388        }
389    }
390
391    parts
392}
393
394/// Resolve a path relative to cwd, handling ~ expansion.
395fn resolve_path(path: &str, cwd: &Path) -> String {
396    let path = if path.starts_with('~') {
397        // Expand ~ to home directory
398        if let Ok(home) = std::env::var("HOME") {
399            path.replacen('~', &home, 1)
400        } else {
401            path.to_string()
402        }
403    } else {
404        path.to_string()
405    };
406
407    let path = Path::new(&path);
408    if path.is_absolute() {
409        path.to_string_lossy().into_owned()
410    } else {
411        cwd.join(path).to_string_lossy().into_owned()
412    }
413}
414
415#[cfg(test)]
416#[expect(clippy::unwrap_used, reason = "test code uses unwrap for clarity")]
417mod tests {
418    use super::*;
419    use std::path::PathBuf;
420
421    #[test]
422    fn test_parse_gh_issue_view() {
423        let update = parse_gh_command("gh issue view 52");
424        assert_eq!(update.github_issue, Some(52));
425        assert_eq!(update.github_pr, None);
426        assert_eq!(update.github_repo, None);
427    }
428
429    #[test]
430    fn test_parse_gh_issue_with_repo() {
431        let update = parse_gh_command("gh issue view 52 --repo paradigmxyz/mi6");
432        assert_eq!(update.github_issue, Some(52));
433        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
434    }
435
436    #[test]
437    fn test_parse_gh_pr_checks() {
438        let update = parse_gh_command("gh pr checks 86");
439        assert_eq!(update.github_pr, Some(86));
440        assert_eq!(update.github_issue, None);
441    }
442
443    #[test]
444    fn test_parse_gh_pr_with_repo() {
445        let update = parse_gh_command("gh pr view 64 --repo paradigmxyz/mi6");
446        assert_eq!(update.github_pr, Some(64));
447        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
448    }
449
450    #[test]
451    fn test_parse_github_url_issue() {
452        let update = parse_github_urls("Created https://github.com/paradigmxyz/mi6/issues/236");
453        assert_eq!(update.github_issue, Some(236));
454        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
455    }
456
457    #[test]
458    fn test_parse_github_url_pr() {
459        let update = parse_github_urls("Created https://github.com/paradigmxyz/mi6/pull/237");
460        assert_eq!(update.github_pr, Some(237));
461        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
462    }
463
464    #[test]
465    fn test_parse_closes_references() {
466        let issues = parse_closes_references("This PR fixes #52 and closes #53");
467        assert_eq!(issues, vec![52, 53]);
468    }
469
470    #[test]
471    fn test_parse_closes_references_case_insensitive() {
472        let issues = parse_closes_references("FIXES #42\nResolves #43\ncloses #44");
473        assert_eq!(issues, vec![42, 43, 44]);
474    }
475
476    #[test]
477    fn test_parse_gh_pr_create_with_fixes() {
478        let update = parse_gh_command(r#"gh pr create --title "Fix bug" --body "Fixes #52""#);
479        assert_eq!(update.github_issue, Some(52));
480    }
481
482    #[test]
483    fn test_parse_worktree_add_simple() {
484        let cwd = PathBuf::from("/test/repos/mi6");
485        let result = parse_worktree_add("git worktree add ../mi6-issue-42", &cwd);
486        assert!(result.is_some());
487        let (path, branch) = result.unwrap();
488        assert_eq!(path, "/test/repos/mi6/../mi6-issue-42");
489        assert_eq!(branch, None);
490    }
491
492    #[test]
493    fn test_parse_worktree_add_with_branch() {
494        let cwd = PathBuf::from("/test/repos/mi6");
495        let result = parse_worktree_add(
496            "git worktree add ../mi6-fix -b fix/issue-42 origin/main",
497            &cwd,
498        );
499        assert!(result.is_some());
500        let (path, branch) = result.unwrap();
501        assert_eq!(path, "/test/repos/mi6/../mi6-fix");
502        assert_eq!(branch, Some("fix/issue-42".to_string()));
503    }
504
505    #[test]
506    fn test_parse_git_c_path() {
507        let cwd = PathBuf::from("/test/repos");
508        let cmd = "git -C /test/worktree status";
509        let result = parse_git_c_path(cmd, &cwd);
510        assert_eq!(result, Some("/test/worktree".to_string()));
511    }
512
513    #[test]
514    fn test_parse_cd_path_unquoted() {
515        let cwd = PathBuf::from("/test");
516        let cmd = "cd /test/path/to/dir";
517        let result = parse_cd_path(cmd, &cwd);
518        assert_eq!(result, Some("/test/path/to/dir".to_string()));
519    }
520
521    #[test]
522    fn test_parse_cd_path_quoted() {
523        let cwd = PathBuf::from("/test");
524        let cmd = r#"cd "/test/path with spaces/dir""#;
525        let result = parse_cd_path(cmd, &cwd);
526        assert_eq!(result, Some("/test/path with spaces/dir".to_string()));
527    }
528
529    #[test]
530    fn test_parse_cd_path_relative() {
531        let cwd = PathBuf::from("/test/repos");
532        let result = parse_cd_path("cd mi6-issue-42", &cwd);
533        assert_eq!(result, Some("/test/repos/mi6-issue-42".to_string()));
534    }
535
536    #[test]
537    fn test_chained_commands() {
538        let update =
539            parse_gh_command("cargo build && gh pr create --title 'Fix' --body 'Fixes #52'");
540        assert_eq!(update.github_issue, Some(52));
541    }
542
543    #[test]
544    fn test_chained_commands_semicolon() {
545        let update = parse_gh_command("echo hello; gh issue view 42");
546        assert_eq!(update.github_issue, Some(42));
547    }
548
549    #[test]
550    fn test_extract_context_combined() {
551        let cwd = PathBuf::from("/test/repos/mi6");
552        let update = extract_context(
553            "gh pr view 86 --repo paradigmxyz/mi6",
554            Some("Some output here"),
555            &cwd,
556        );
557        assert_eq!(update.github_pr, Some(86));
558        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
559    }
560
561    #[test]
562    fn test_extract_context_from_output() {
563        let cwd = PathBuf::from("/test/repos/mi6");
564        let update = extract_context(
565            "gh pr create --title 'Fix' --body 'Some fix'",
566            Some("Created https://github.com/paradigmxyz/mi6/pull/237"),
567            &cwd,
568        );
569        assert_eq!(update.github_pr, Some(237));
570        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
571    }
572
573    #[test]
574    fn test_session_context_update_has_values() {
575        let empty = SessionContextUpdate::default();
576        assert!(!empty.has_values());
577
578        let with_issue = SessionContextUpdate {
579            github_issue: Some(42),
580            ..Default::default()
581        };
582        assert!(with_issue.has_values());
583    }
584
585    #[test]
586    fn test_session_context_update_merge() {
587        let mut base = SessionContextUpdate {
588            github_repo: Some("foo/bar".to_string()),
589            github_issue: Some(1),
590            ..Default::default()
591        };
592
593        let other = SessionContextUpdate {
594            github_issue: Some(2),
595            github_pr: Some(42),
596            ..Default::default()
597        };
598
599        base.merge(other);
600
601        assert_eq!(base.github_repo, Some("foo/bar".to_string())); // kept
602        assert_eq!(base.github_issue, Some(2)); // overwritten
603        assert_eq!(base.github_pr, Some(42)); // added
604    }
605
606    #[test]
607    fn test_should_parse_output_urls() {
608        // Creation commands should have output parsed
609        assert!(should_parse_output_urls("gh pr create --title 'Fix'"));
610        assert!(should_parse_output_urls("gh issue create --title 'Bug'"));
611        assert!(should_parse_output_urls(
612            "cargo build && gh pr create --title 'Fix'"
613        ));
614
615        // Non-creation commands should NOT have output parsed
616        assert!(!should_parse_output_urls("gh run view 12345"));
617        assert!(!should_parse_output_urls("gh pr view 42"));
618        assert!(!should_parse_output_urls("gh issue view 42"));
619        assert!(!should_parse_output_urls("cat README.md"));
620        assert!(!should_parse_output_urls(
621            "gh run download 12345 --name logs"
622        ));
623    }
624
625    #[test]
626    fn test_extract_context_ignores_urls_in_non_creation_output() {
627        // This test reproduces a bug where GitHub URLs in CI logs were incorrectly
628        // extracted as session context. The URL below appeared in a comment within
629        // the dtolnay/rust-toolchain action's shell script.
630        let cwd = PathBuf::from("/test/repos/mi6");
631        let ci_log_output = r#"
632            # GitHub does not enforce `required: true` inputs itself. https://github.com/actions/runner/issues/1070
633            echo "toolchain=$toolchain" >> $GITHUB_OUTPUT
634        "#;
635
636        let update = extract_context("gh run view 12345 --log", Some(ci_log_output), &cwd);
637
638        // Should NOT extract the incidental URL from CI logs
639        assert_eq!(update.github_issue, None);
640        assert_eq!(update.github_repo, None);
641        assert_eq!(update.github_pr, None);
642    }
643
644    #[test]
645    fn test_extract_context_issue_create_output() {
646        let cwd = PathBuf::from("/test/repos/mi6");
647        let update = extract_context(
648            "gh issue create --title 'Bug report'",
649            Some("https://github.com/paradigmxyz/mi6/issues/42"),
650            &cwd,
651        );
652        assert_eq!(update.github_issue, Some(42));
653        assert_eq!(update.github_repo, Some("paradigmxyz/mi6".to_string()));
654    }
655}