Skip to main content

wtg_cli/
parse_input.rs

1use std::path::{Path, PathBuf};
2
3use percent_encoding::percent_decode_str;
4use url::Url;
5
6use crate::{
7    error::{WtgError, WtgResult},
8    github::GhRepoInfo,
9};
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum Query {
13    /// A Git commit hash
14    GitCommit(String),
15    /// Either a GitHub issue or a pull request number
16    IssueOrPr(u64),
17    /// A GitHub issue number
18    Issue(u64),
19    /// A GitHub pull request number
20    Pr(u64),
21    /// A file path within the repository
22    FilePath { branch: String, path: PathBuf },
23    /// A git tag name
24    Tag(String),
25}
26
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum ParsedQuery {
29    Resolved(Query),
30    Unknown(String),
31    UnknownPath { segments: Vec<String> },
32}
33
34/// Parsed input that can come from either the input argument or a GitHub URL
35#[derive(Debug, Clone)]
36pub struct ParsedInput {
37    gh_repo_info: Option<GhRepoInfo>,
38    query: ParsedQuery,
39}
40
41impl ParsedInput {
42    const fn new_with_remote(gh_repo_info: GhRepoInfo, query: ParsedQuery) -> Self {
43        Self {
44            gh_repo_info: Some(gh_repo_info),
45            query,
46        }
47    }
48
49    /// Create a `ParsedInput` for a local query (no remote repo info).
50    #[must_use]
51    pub const fn new_local_query(query: ParsedQuery) -> Self {
52        Self {
53            gh_repo_info: None,
54            query,
55        }
56    }
57
58    #[must_use]
59    pub const fn gh_repo_info(&self) -> Option<&GhRepoInfo> {
60        self.gh_repo_info.as_ref()
61    }
62
63    #[must_use]
64    pub const fn query(&self) -> &ParsedQuery {
65        &self.query
66    }
67
68    #[cfg(test)]
69    #[must_use]
70    fn owner(&self) -> Option<&str> {
71        self.gh_repo_info.as_ref().map(GhRepoInfo::owner)
72    }
73
74    #[cfg(test)]
75    #[must_use]
76    fn repo(&self) -> Option<&str> {
77        self.gh_repo_info.as_ref().map(GhRepoInfo::repo)
78    }
79}
80
81/// Parse a GitHub URL to extract owner, repo, and optional query
82/// Supports:
83/// - <https://github.com/owner/repo/commit/hash>
84/// - <https://github.com/owner/repo/issues/123>
85/// - <https://github.com/owner/repo/pull/123>
86/// - <https://github.com/owner/repo/blob/branch/path/to/file>
87/// - <`git@github.com:owner/repo/pull/9#discussion_r123`>
88///
89/// Note: assumes input is already sanitized, trimmed & non-empty
90///
91/// Returns:
92/// - `Ok(ParsedInput)` if it's a valid GitHub URL
93/// - `Err(NotGitHubUrl)` if it's a valid URL but not GitHub
94/// - `Err(MalformedGitHubUrl)` if it's a GitHub URL but malformed
95fn try_parse_input_from_github_url(url: &str) -> Result<ParsedInput, WtgError> {
96    debug_assert!(
97        reject_control_chars(url).is_ok(),
98        "URL should be validated before parsing"
99    );
100
101    // Try SSH format first
102    if let Some(segments) = parse_git_ssh_segments(url) {
103        return parsed_input_from_segments(&segments, false, url);
104    }
105
106    // Try HTTP/HTTPS format
107    match parse_http_github_segments(url) {
108        Ok((segments, is_api)) => parsed_input_from_segments(&segments, is_api, url),
109        Err(e) => Err(e),
110    }
111}
112
113/// Parse a pre-validated input string into a `ParsedQuery`
114/// Assumes input is already trimmed, non-empty, and has no control characters
115fn parse_query(input: &str) -> ParsedQuery {
116    // If it starts with a '#', try to parse as issue or PR number
117    if let Some(stripped) = input.strip_prefix('#')
118        && let Ok(number) = stripped.parse()
119    {
120        return ParsedQuery::Resolved(Query::IssueOrPr(number));
121    }
122
123    // Otherwise we have to treat as unknown, since path & branches
124    // may look the same, and other git refs may be indistinguishable
125    // from commit hashes without querying the repo
126    ParsedQuery::Unknown(input.to_string())
127}
128
129/// Parse user input into a structured query, optionally with an explicit repo URL.
130///
131/// This is the main entry point for parsing CLI input. It handles:
132/// - GitHub URLs (issues, PRs, commits, files)
133/// - Local queries (commit hashes, tags, file paths)
134/// - Explicit repo + query combinations via the `-r` flag
135pub fn try_parse_input(raw_input: &str, repo_url: Option<&str>) -> Result<ParsedInput, WtgError> {
136    // Trim and validate input upfront
137    let input = raw_input.trim();
138    if input.is_empty() {
139        return Err(WtgError::EmptyInput);
140    }
141    let input = reject_control_chars(input)?;
142
143    // If repo url is explicitly provided, use it as the repo and input as the query
144    if let Some(repo_url) = repo_url {
145        let repo_info = parse_github_repo_url(repo_url)
146            .ok_or_else(|| WtgError::MalformedGitHubUrl(repo_url.to_string()))?;
147        return Ok(ParsedInput::new_with_remote(repo_info, parse_query(input)));
148    }
149
150    // Try to parse input as a GitHub URL
151    match try_parse_input_from_github_url(input) {
152        Ok(parsed) => Ok(parsed),
153        Err(WtgError::NotGitHubUrl(_) | WtgError::MalformedGitHubUrl(_)) => {
154            // If it looks like a URL attempt but failed, propagate the error
155            if is_url_like(input) {
156                Err(try_parse_input_from_github_url(input).unwrap_err())
157            } else {
158                // Not a URL, treat as a local query
159                Ok(ParsedInput::new_local_query(parse_query(input)))
160            }
161        }
162        Err(e) => Err(e),
163    }
164}
165
166/// Check if input looks like a URL attempt (has scheme or domain-like pattern)
167fn is_url_like(input: &str) -> bool {
168    let trimmed = input.trim().to_ascii_lowercase();
169    trimmed.starts_with("http://")
170        || trimmed.starts_with("https://")
171        || trimmed.starts_with("//")
172        || trimmed.starts_with("git@")
173        || trimmed.contains("://")
174}
175
176/// Parse a simple GitHub repo URL or just owner/repo format
177/// Supports:
178/// - owner/repo
179/// - <https://github.com/owner/repo.git>
180/// - <https://github.com/owner/repo>
181/// - <https://www.github.com/owner/repo>
182/// - <https://api.github.com/repos/owner/repo>
183/// - <git@github.com:owner/repo.git>
184#[must_use]
185pub(crate) fn parse_github_repo_url(url: &str) -> Option<GhRepoInfo> {
186    let trimmed = url.trim();
187    if trimmed.is_empty() {
188        return None;
189    }
190
191    if let Some(segments) = parse_git_ssh_segments(trimmed) {
192        return owner_repo_from_segments(&segments, false);
193    }
194
195    if let Ok((segments, is_api)) = parse_http_github_segments(trimmed)
196        && let Some(owner_repo) = owner_repo_from_segments(&segments, is_api)
197    {
198        return Some(owner_repo);
199    }
200
201    // Handle simple owner/repo format
202    let parts: Vec<&str> = trimmed.split('/').collect();
203    if parts.len() == 2
204        && let (Some(owner), Some(repo)) = (
205            sanitize_owner_repo_segment(parts[0]),
206            sanitize_owner_repo_segment(parts[1].trim_end_matches(".git")),
207        )
208    {
209        return Some(GhRepoInfo::new(owner, repo));
210    }
211
212    None
213}
214
215fn parse_http_github_segments(url: &str) -> Result<(Vec<String>, bool), WtgError> {
216    let mut parsed =
217        parse_with_https_fallback(url).ok_or_else(|| WtgError::NotGitHubUrl(url.to_string()))?;
218
219    let host = parsed
220        .host_str()
221        .ok_or_else(|| WtgError::NotGitHubUrl(url.to_string()))?;
222
223    let is_api = match is_allowed_github_host(host) {
224        GhUrlHostType::Github => false,
225        GhUrlHostType::GithubApi => true,
226        GhUrlHostType::Other => return Err(WtgError::NotGitHubUrl(url.to_string())),
227    };
228
229    parsed.set_fragment(None);
230    parsed.set_query(None);
231    Ok((collect_segments(parsed.path()), is_api))
232}
233
234/// Parse Git SSH URL format:
235/// - `git@github.com:owner/repo/pull/9#discussion_r123`
236fn parse_git_ssh_segments(url: &str) -> Option<Vec<String>> {
237    let normalized = url.trim();
238    if !normalized.starts_with("git@github.com:") {
239        return None;
240    }
241    let path = normalized.split(':').nth(1)?;
242    let path = path.split('#').next().unwrap_or(path);
243    let path = path.split('?').next().unwrap_or(path);
244    Some(collect_segments(path))
245}
246
247fn parse_with_https_fallback(input: &str) -> Option<Url> {
248    Url::parse(input).map_or_else(
249        |_| {
250            let lower = input.to_ascii_lowercase();
251            if lower.starts_with("github.com/") || lower.starts_with("www.github.com/") {
252                Url::parse(&format!("https://{input}")).ok()
253            } else if lower.starts_with("//github.com/") {
254                Url::parse(&format!("https:{input}")).ok()
255            } else {
256                None
257            }
258        },
259        Some,
260    )
261}
262
263enum GhUrlHostType {
264    Github,
265    GithubApi,
266    Other,
267}
268
269fn is_allowed_github_host(host: &str) -> GhUrlHostType {
270    let host = host.trim_start_matches("www.").to_ascii_lowercase();
271
272    if host == "github.com" {
273        return GhUrlHostType::Github;
274    }
275
276    if host == "api.github.com" {
277        return GhUrlHostType::GithubApi;
278    }
279
280    GhUrlHostType::Other
281}
282
283fn collect_segments(path: &str) -> Vec<String> {
284    path.trim_matches('/')
285        .split('/')
286        .filter(|segment| !segment.is_empty())
287        .map(|s| percent_decode_str(s).decode_utf8_lossy().into_owned())
288        .collect()
289}
290
291fn parse_github_blob_path(segments: &[String], url: &str) -> WtgResult<ParsedQuery> {
292    if segments.len() < 3 {
293        return Err(WtgError::MalformedGitHubUrl(format!(
294            "Missing file path in URL: {url}"
295        )));
296    }
297
298    let tail = &segments[1..];
299    let path_segments = &tail[1..];
300
301    // If only two segments after blob/tree, first must be the branch name
302    // and second a file in root
303    if tail.len() == 2 {
304        let branch = segments[1].clone();
305        let mut path = PathBuf::new();
306        let file = path_segments.first().ok_or_else(|| {
307            WtgError::MalformedGitHubUrl(format!("Missing file path in URL: {url}"))
308        })?;
309        reject_control_chars(file).map_err(|_| {
310            WtgError::MalformedGitHubUrl(format!("Invalid characters in URL: {url}"))
311        })?;
312        path.push(file);
313        check_path(&path).map_err(|_| {
314            WtgError::MalformedGitHubUrl(format!("Invalid file path in URL: {url}"))
315        })?;
316        return Ok(ParsedQuery::Resolved(Query::FilePath { branch, path }));
317    }
318
319    let mut sanitized = Vec::with_capacity(tail.len());
320    for seg in tail {
321        reject_control_chars(seg).map_err(|_| {
322            WtgError::MalformedGitHubUrl(format!("Invalid characters in URL: {url}"))
323        })?;
324        sanitized.push(seg.clone());
325    }
326
327    check_path(&PathBuf::from_iter(&sanitized))
328        .map_err(|_| WtgError::MalformedGitHubUrl(format!("Invalid file path in URL: {url}")))?;
329
330    if sanitized.is_empty() {
331        return Err(WtgError::MalformedGitHubUrl(format!(
332            "Missing file path in URL: {url}"
333        )));
334    }
335
336    Ok(ParsedQuery::UnknownPath {
337        segments: sanitized,
338    })
339}
340
341fn owner_repo_from_segments(segments: &[String], is_api: bool) -> Option<GhRepoInfo> {
342    split_url_segments(segments, is_api).map(|(repo_info, _)| repo_info)
343}
344
345fn split_url_segments(segments: &[String], is_api: bool) -> Option<(GhRepoInfo, &[String])> {
346    let min_segments = if is_api { 3 } else { 2 };
347
348    if segments.len() < min_segments {
349        return None;
350    }
351
352    let owner_segment_index = usize::from(is_api);
353
354    let owner = sanitize_owner_repo_segment(segments[owner_segment_index].as_str())?;
355    let repo =
356        sanitize_owner_repo_segment(segments[owner_segment_index + 1].trim_end_matches(".git"))?;
357    Some((
358        GhRepoInfo::new(owner, repo),
359        &segments[owner_segment_index + 2..],
360    ))
361}
362
363fn parsed_input_from_segments(
364    segments: &[String],
365    is_api: bool,
366    url: &str,
367) -> WtgResult<ParsedInput> {
368    let (repo_info, segments) = split_url_segments(segments, is_api).ok_or_else(|| {
369        WtgError::MalformedGitHubUrl("Where's the repo, where's the owner?".to_string())
370    })?;
371
372    let route = segments
373        .first()
374        .ok_or_else(|| WtgError::MalformedGitHubUrl("No route found in GitHub URL".to_string()))?
375        .as_str();
376
377    let query = match route {
378        "commit" => {
379            let hash = segments.get(1).ok_or_else(|| {
380                WtgError::MalformedGitHubUrl(format!("Missing commit hash in URL: {url}"))
381            })?;
382            // URL segments may contain percent-decoded control chars, validate them
383            reject_control_chars(hash).map_err(|_| {
384                WtgError::MalformedGitHubUrl(format!("Invalid characters in URL: {url}"))
385            })?;
386            ParsedQuery::Resolved(Query::GitCommit(hash.clone()))
387        }
388        "issues" => {
389            let num_str = segments.get(1).ok_or_else(|| {
390                WtgError::MalformedGitHubUrl(format!("Missing issue number in URL: {url}"))
391            })?;
392            let num = num_str.parse().map_err(|_| {
393                WtgError::MalformedGitHubUrl(format!("Invalid issue number in URL: {url}"))
394            })?;
395            ParsedQuery::Resolved(Query::Issue(num))
396        }
397        "pull" => {
398            let num_str = segments.get(1).ok_or_else(|| {
399                WtgError::MalformedGitHubUrl(format!("Missing PR number in URL: {url}"))
400            })?;
401            let num = num_str.parse().map_err(|_| {
402                WtgError::MalformedGitHubUrl(format!("Invalid PR number in URL: {url}"))
403            })?;
404            ParsedQuery::Resolved(Query::Pr(num))
405        }
406        // File path will start from segment index 2, e.g., /blob/branch/path/to/file
407        "blob" | "tree" if segments.len() >= 2 => parse_github_blob_path(segments, url)?,
408        _ => {
409            return Err(WtgError::MalformedGitHubUrl(format!(
410                "Unrecognized GitHub URL route: {url}"
411            )));
412        }
413    };
414
415    Ok(ParsedInput::new_with_remote(repo_info, query))
416}
417
418/// Sanitize owner or repo segment by trimming whitespace and allowing only certain characters
419fn sanitize_owner_repo_segment(raw: &str) -> Option<String> {
420    let trimmed = raw.trim();
421    if trimmed.is_empty() {
422        return None;
423    }
424
425    if trimmed
426        .chars()
427        .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
428    {
429        Some(trimmed.to_string())
430    } else {
431        None
432    }
433}
434
435/// Check for control characters (security check only, no empty check)
436/// Returns the input unchanged if valid, avoiding allocation
437fn reject_control_chars(input: &str) -> WtgResult<&str> {
438    if input.chars().any(char::is_control) {
439        return Err(WtgError::SecurityRejection(
440            "Input contains control characters (null bytes, newlines, etc.)".to_string(),
441        ));
442    }
443    Ok(input)
444}
445
446/// Checks whether the given path is valid & safe to use
447pub(crate) fn check_path(path: &Path) -> WtgResult<()> {
448    // We may have an empty path after sanitation, or it may be
449    // absolute, or contain parent components - reject those
450    if path.as_os_str().is_empty() {
451        return Err(WtgError::EmptyInput);
452    }
453
454    if path.is_absolute() {
455        return Err(WtgError::SecurityRejection(
456            "An absolute path snuck in".to_string(),
457        ));
458    }
459
460    if let Some(c) = path.components().find(|c| {
461        matches!(
462            c,
463            std::path::Component::ParentDir | std::path::Component::CurDir
464        )
465    }) {
466        return Err(WtgError::SecurityRejection(format!(
467            "Some fishy `{}` in the path",
468            c.as_os_str().to_string_lossy()
469        )));
470    }
471
472    Ok(())
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478    use rstest::rstest;
479    use std::path::PathBuf;
480
481    // ========================================================================
482    // Helper Types for Flexible Test Assertions
483    // ========================================================================
484
485    /// Helper enum to allow flexible query matching in tests
486    enum QueryMatcher {
487        Exact(Query),
488        Commit(String),
489    }
490
491    impl From<Query> for QueryMatcher {
492        fn from(q: Query) -> Self {
493            Self::Exact(q)
494        }
495    }
496
497    impl From<&str> for QueryMatcher {
498        fn from(s: &str) -> Self {
499            Self::Commit(s.to_string())
500        }
501    }
502
503    impl QueryMatcher {
504        fn assert_matches(&self, actual: &ParsedQuery) {
505            match self {
506                Self::Exact(expected) => {
507                    assert_eq!(actual, &ParsedQuery::Resolved(expected.clone()));
508                }
509                Self::Commit(hash) => {
510                    assert_eq!(
511                        actual,
512                        &ParsedQuery::Resolved(Query::GitCommit(hash.clone()))
513                    );
514                }
515            }
516        }
517    }
518
519    // ========================================================================
520    // Local & URL Parsing Tests
521    // ========================================================================
522
523    #[rstest]
524    #[case::basic_issue(
525        "https://github.com/owner/repo/issues/42",
526        "owner",
527        "repo",
528        Query::Issue(42)
529    )]
530    #[case::issue_with_comment(
531        "https://github.com/owner/repo/issues/42#issuecomment-123456",
532        "owner",
533        "repo",
534        Query::Issue(42)
535    )]
536    #[case::issue_with_query(
537        "https://github.com/owner/repo/issues/42?tab=comments",
538        "owner",
539        "repo",
540        Query::Issue(42)
541    )]
542    #[case::issue_large_number(
543        "https://github.com/owner/repo/issues/999999",
544        "owner",
545        "repo",
546        Query::Issue(999_999)
547    )]
548    fn parses_github_issue_urls(
549        #[case] url: &str,
550        #[case] expected_owner: &str,
551        #[case] expected_repo: &str,
552        #[case] expected_query: Query,
553    ) {
554        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
555        assert_eq!(parsed.owner(), Some(expected_owner));
556        assert_eq!(parsed.repo(), Some(expected_repo));
557        assert_eq!(parsed.query, ParsedQuery::Resolved(expected_query));
558    }
559
560    #[rstest]
561    #[case::basic_pr("https://github.com/owner/repo/pull/7", "owner", "repo", Query::Pr(7))]
562    #[case::pr_files(
563        "https://github.com/owner/repo/pull/7/files",
564        "owner",
565        "repo",
566        Query::Pr(7)
567    )]
568    #[case::pr_files_diff(
569        "https://github.com/owner/repo/pull/7/files?diff=split",
570        "owner",
571        "repo",
572        Query::Pr(7)
573    )]
574    #[case::pr_discussion(
575        "https://github.com/owner/repo/pull/7#discussion_r987654321",
576        "owner",
577        "repo",
578        Query::Pr(7)
579    )]
580    #[case::pr_comment(
581        "https://github.com/owner/repo/pull/7#issuecomment-abcdef",
582        "owner",
583        "repo",
584        Query::Pr(7)
585    )]
586    #[case::pr_large_number(
587        "https://github.com/owner/repo/pull/123456",
588        "owner",
589        "repo",
590        Query::Pr(123_456)
591    )]
592    fn parses_github_pr_urls(
593        #[case] url: &str,
594        #[case] expected_owner: &str,
595        #[case] expected_repo: &str,
596        #[case] expected_query: Query,
597    ) {
598        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
599        assert_eq!(parsed.owner(), Some(expected_owner));
600        assert_eq!(parsed.repo(), Some(expected_repo));
601        assert_eq!(parsed.query, ParsedQuery::Resolved(expected_query));
602    }
603
604    #[rstest]
605    #[case::full_hash(
606        "https://github.com/owner/repo/commit/abc123def456",
607        "owner",
608        "repo",
609        "abc123def456"
610    )]
611    #[case::short_hash(
612        "https://github.com/owner/repo/commit/abc123d",
613        "owner",
614        "repo",
615        "abc123d"
616    )]
617    #[case::commit_with_fragment(
618        "https://github.com/owner/repo/commit/abc123#diff-1",
619        "owner",
620        "repo",
621        "abc123"
622    )]
623    fn parses_github_commit_urls(
624        #[case] url: &str,
625        #[case] expected_owner: &str,
626        #[case] expected_repo: &str,
627        #[case] expected_hash: &str,
628    ) {
629        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
630        assert_eq!(parsed.owner(), Some(expected_owner));
631        assert_eq!(parsed.repo(), Some(expected_repo));
632        assert_eq!(
633            parsed.query,
634            ParsedQuery::Resolved(Query::GitCommit(expected_hash.to_string()))
635        );
636    }
637
638    #[rstest]
639    #[case::blob_single_file(
640        "https://github.com/owner/repo/blob/main/README.md",
641        "owner",
642        "repo",
643        "main",
644        "README.md"
645    )]
646    #[case::tree_directory(
647        "https://github.com/owner/repo/tree/main/src",
648        "owner",
649        "repo",
650        "main",
651        "src"
652    )]
653    fn parses_github_file_urls(
654        #[case] url: &str,
655        #[case] expected_owner: &str,
656        #[case] expected_repo: &str,
657        #[case] expected_branch: &str,
658        #[case] expected_path: &str,
659    ) {
660        let parsed = try_parse_input_from_github_url(url)
661            .unwrap_or_else(|_| panic!("failed to parse {url}"));
662        assert_eq!(parsed.owner(), Some(expected_owner));
663        assert_eq!(parsed.repo(), Some(expected_repo));
664        assert_eq!(
665            parsed.query,
666            ParsedQuery::Resolved(Query::FilePath {
667                branch: expected_branch.to_string(),
668                path: PathBuf::from(expected_path)
669            })
670        );
671    }
672
673    #[rstest]
674    #[case::tree_nested_branch(
675        "https://github.com/owner/repo/tree/feat/new-feature/docs/api",
676        vec!["feat", "new-feature", "docs", "api"]
677    )]
678    #[case::blob_nested_branch(
679        "https://github.com/owner/repo/blob/feat/new-feature/docs/api/readme.md",
680        vec!["feat", "new-feature", "docs", "api", "readme.md"]
681    )]
682    #[case::blob_deep_nesting(
683        "https://github.com/owner/repo/blob/main/a/b/c/d.txt",
684        vec!["main", "a", "b", "c", "d.txt"]
685    )]
686    #[case::percent_encoded_space(
687        "https://github.com/owner/repo/blob/main/path%20with%20spaces/file.txt",
688        vec!["main", "path with spaces", "file.txt"]
689    )]
690    fn parses_github_paths_with_ambiguous_branch(
691        #[case] url: &str,
692        #[case] expected_segments: Vec<&str>,
693    ) {
694        let parsed = try_parse_input_from_github_url(url)
695            .unwrap_or_else(|_| panic!("failed to parse {url}"));
696        assert_eq!(
697            parsed.query,
698            ParsedQuery::UnknownPath {
699                segments: expected_segments.iter().map(|s| (*s).to_string()).collect()
700            }
701        );
702    }
703
704    #[rstest]
705    #[case::no_scheme("github.com/owner/repo/issues/101", "owner", "repo", Query::Issue(101))]
706    #[case::no_scheme_with_comment(
707        "github.com/owner/repo/issues/101#issuecomment-1",
708        "owner",
709        "repo",
710        Query::Issue(101)
711    )]
712    #[case::scheme_only("//github.com/owner/repo/pull/15", "owner", "repo", Query::Pr(15))]
713    #[case::scheme_only_with_query(
714        "//github.com/owner/repo/pull/15?tab=commits",
715        "owner",
716        "repo",
717        Query::Pr(15)
718    )]
719    #[case::www_prefix(
720        "https://www.github.com/owner/repo/pull/7",
721        "owner",
722        "repo",
723        Query::Pr(7)
724    )]
725    #[case::www_with_fragment(
726        "https://www.github.com/owner/repo/pull/7#discussion_r42",
727        "owner",
728        "repo",
729        Query::Pr(7)
730    )]
731    fn parses_alternate_github_url_formats(
732        #[case] url: &str,
733        #[case] expected_owner: &str,
734        #[case] expected_repo: &str,
735        #[case] expected_query: Query,
736    ) {
737        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
738        assert_eq!(parsed.owner(), Some(expected_owner));
739        assert_eq!(parsed.repo(), Some(expected_repo));
740        assert_eq!(parsed.query, ParsedQuery::Resolved(expected_query));
741    }
742
743    #[rstest]
744    #[case::basic_ssh("git@github.com:owner/repo/pull/9", "owner", "repo", Query::Pr(9))]
745    #[case::ssh_with_fragment(
746        "git@github.com:owner/repo/pull/9#discussion_r123",
747        "owner",
748        "repo",
749        Query::Pr(9)
750    )]
751    #[case::ssh_issue(
752        "git@github.com:owner/repo/issues/42",
753        "owner",
754        "repo",
755        Query::Issue(42)
756    )]
757    #[case::ssh_commit("git@github.com:owner/repo/commit/abc123", "owner", "repo", "abc123")]
758    fn parses_github_ssh_urls(
759        #[case] url: &str,
760        #[case] expected_owner: &str,
761        #[case] expected_repo: &str,
762        #[case] expected_query: impl Into<QueryMatcher>,
763    ) {
764        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
765        assert_eq!(parsed.owner(), Some(expected_owner));
766        assert_eq!(parsed.repo(), Some(expected_repo));
767        expected_query.into().assert_matches(&parsed.query);
768    }
769
770    #[rstest]
771    #[case::api_issue(
772        "https://api.github.com/repos/owner/repo/issues/42",
773        "owner",
774        "repo",
775        Query::Issue(42)
776    )]
777    fn parses_github_api_urls(
778        #[case] url: &str,
779        #[case] expected_owner: &str,
780        #[case] expected_repo: &str,
781        #[case] expected_query: Query,
782    ) {
783        let parsed = try_parse_input(url, None).unwrap_or_else(|_| panic!("failed to parse {url}"));
784        assert_eq!(parsed.owner(), Some(expected_owner));
785        assert_eq!(parsed.repo(), Some(expected_repo));
786        assert_eq!(parsed.query, ParsedQuery::Resolved(expected_query));
787    }
788
789    #[rstest]
790    #[case::hash_with_prefix("#42", ParsedQuery::Resolved(Query::IssueOrPr(42)))]
791    #[case::hash_without_prefix("42", ParsedQuery::Unknown("42".to_string()))]
792    #[case::hash_with_whitespace("  #99  ", ParsedQuery::Resolved(Query::IssueOrPr(99)))]
793    #[case::short_hash("abc123d", ParsedQuery::Unknown("abc123d".to_string()))]
794    #[case::hash_with_whitespace("  abc123  ", ParsedQuery::Unknown("abc123".to_string()))]
795    #[case::simple_tag("v1.0.0", ParsedQuery::Unknown("v1.0.0".to_string()))]
796    #[case::simple_file("README.md", ParsedQuery::Unknown("README.md".to_string()))]
797    #[case::nested_file("src/lib.rs", ParsedQuery::Unknown("src/lib.rs".to_string()))]
798    #[case::unicode_path("src/файл.rs", ParsedQuery::Unknown("src/файл.rs".to_string()))]
799    #[case::unicode_tag("версия-1.0", ParsedQuery::Unknown("версия-1.0".to_string()))]
800    #[case::emoji_in_path("src/👍.md", ParsedQuery::Unknown("src/👍.md".to_string()))]
801    fn parses_local_inputs(#[case] input: &str, #[case] expected: ParsedQuery) {
802        let parsed = try_parse_input(input, None).expect("Should parse issue/PR number");
803        assert_eq!(parsed.query, expected);
804        assert!(parsed.gh_repo_info().is_none());
805    }
806
807    // ========================================================================
808    // Repository URL Parsing Tests
809    // ========================================================================
810
811    #[rstest]
812    #[case::simple_format("owner/repo", "owner", "repo")]
813    #[case::with_dash("my-org/my-repo", "my-org", "my-repo")]
814    #[case::with_underscore("my_org/my_repo", "my_org", "my_repo")]
815    #[case::with_dot("my.org/my.repo", "my.org", "my.repo")]
816    #[case::mixed_separators("my-org_test/repo.name-2", "my-org_test", "repo.name-2")]
817    fn parses_simple_owner_repo_format(
818        #[case] input: &str,
819        #[case] expected_owner: &str,
820        #[case] expected_repo: &str,
821    ) {
822        let parsed = try_parse_input("dummy", Some(input))
823            .unwrap_or_else(|_| panic!("failed to parse {input}"));
824        assert_eq!(parsed.owner(), Some(expected_owner));
825        assert_eq!(parsed.repo(), Some(expected_repo));
826        assert_eq!(parsed.query, ParsedQuery::Unknown("dummy".to_string()));
827    }
828
829    #[rstest]
830    #[case::https("https://github.com/owner/repo", "owner", "repo")]
831    #[case::https_with_git("https://github.com/owner/repo.git", "owner", "repo")]
832    #[case::https_www("https://www.github.com/owner/repo", "owner", "repo")]
833    #[case::api_repos("https://api.github.com/repos/owner/repo", "owner", "repo")]
834    #[case::ssh("git@github.com:owner/repo", "owner", "repo")]
835    #[case::ssh_with_git("git@github.com:owner/repo.git", "owner", "repo")]
836    fn parses_various_repo_url_formats(
837        #[case] url: &str,
838        #[case] expected_owner: &str,
839        #[case] expected_repo: &str,
840    ) {
841        let parsed =
842            try_parse_input("dummy", Some(url)).unwrap_or_else(|_| panic!("failed to parse {url}"));
843        assert_eq!(parsed.owner(), Some(expected_owner));
844        assert_eq!(parsed.repo(), Some(expected_repo));
845        assert_eq!(parsed.query, ParsedQuery::Unknown("dummy".to_string()));
846    }
847
848    // ========================================================================
849    // Combined Parsing Tests (try_parse_input)
850    // ========================================================================
851
852    #[rstest]
853    #[case::issue_with_repo(
854        "#42",
855        "owner/repo",
856        "owner",
857        "repo",
858        ParsedQuery::Resolved(Query::IssueOrPr(42))
859    )]
860    #[case::hash_with_repo(
861        "abc123",
862        "owner/repo",
863        "owner",
864        "repo",
865        ParsedQuery::Unknown("abc123".to_string())
866    )]
867    #[case::file_with_repo(
868        "src/lib.rs",
869        "https://github.com/owner/repo",
870        "owner",
871        "repo",
872        ParsedQuery::Unknown("src/lib.rs".to_string())
873    )]
874    fn parses_input_with_explicit_repo(
875        #[case] input: &str,
876        #[case] repo_url: &str,
877        #[case] expected_owner: &str,
878        #[case] expected_repo: &str,
879        #[case] expected_query: ParsedQuery,
880    ) {
881        let parsed = try_parse_input(input, Some(repo_url))
882            .unwrap_or_else(|_| panic!("failed to parse {input} with repo {repo_url}"));
883        assert_eq!(parsed.owner(), Some(expected_owner));
884        assert_eq!(parsed.repo(), Some(expected_repo));
885        assert_eq!(parsed.query, expected_query);
886    }
887
888    // ========================================================================
889    // Rejection Tests (Negative Cases)
890    // ========================================================================
891
892    #[rstest]
893    #[case::owner_with_space("https://github.com/owner space/repo/issues/1")]
894    #[case::repo_with_space("https://github.com/owner/repo space/issues/1")]
895    #[case::owner_with_tilde("https://github.com/owner~/repo/issues/1")]
896    #[case::repo_with_tilde("https://github.com/owner/repo~/issues/1")]
897    #[case::empty_owner("https://github.com//repo/issues/1")]
898    #[case::empty_repo("https://github.com/owner//issues/1")]
899    #[case::whitespace_owner("https://github.com/   /repo/issues/1")]
900    fn rejects_malformed_github_urls(#[case] url: &str) {
901        let parsed = try_parse_input(url, None);
902        assert!(
903            parsed.is_err() && parsed.unwrap_err().is_malformed_git_hub_url(),
904            "Should reject malformed URL: {url}"
905        );
906    }
907
908    #[rstest]
909    #[case::parent_traversal("https://github.com/owner/repo/blob/main/../../../etc/passwd")]
910    #[case::parent_in_middle("https://github.com/owner/repo/blob/main/src/../../../etc/passwd")]
911    fn rejects_unsafe_file_paths_in_github_urls(#[case] input: &str) {
912        let parsed = try_parse_input(input, None);
913        assert!(
914            parsed.is_err() && parsed.unwrap_err().is_malformed_git_hub_url(),
915            "Should reject unsafe path in GitHub URL: {input}"
916        );
917    }
918
919    #[rstest]
920    #[case::null_in_commit("https://github.com/owner/repo/commit/abc%00def")]
921    #[case::newline_in_path("https://github.com/owner/repo/blob/main/file%0Aname.txt")]
922    #[case::carriage_return_in_path("https://github.com/owner/repo/blob/main/file%0Dname.txt")]
923    #[case::tab_in_path("https://github.com/owner/repo/blob/main/file%09name.txt")]
924    fn rejects_percent_encoded_control_chars_in_urls(#[case] input: &str) {
925        let parsed = try_parse_input(input, None);
926        assert!(
927            parsed.is_err() && parsed.unwrap_err().is_malformed_git_hub_url(),
928            "Should reject percent-encoded control chars in URL: {input}"
929        );
930    }
931
932    #[rstest]
933    #[case::empty_string("")]
934    #[case::whitespace_only("   ")]
935    #[case::newlines_only("\n\n")]
936    #[case::tabs_only("\t\t")]
937    fn rejects_empty_inputs(#[case] url: &str) {
938        let parsed = try_parse_input(url, None);
939        assert!(
940            parsed.is_err() && parsed.unwrap_err().is_empty_input(),
941            "Should reject empty input: {url:?}"
942        );
943    }
944
945    #[rstest]
946    #[case::null_byte("test\0data")]
947    #[case::newline_in_middle("test\ndata")]
948    #[case::carriage_return("test\rdata")]
949    #[case::tab_in_middle("test\tdata")]
950    fn rejects_control_characters(#[case] input: &str) {
951        let parsed = try_parse_input(input, None);
952        assert!(
953            parsed.is_err() && parsed.unwrap_err().is_security_rejection(),
954            "Should reject input with control chars: {input:?}"
955        );
956    }
957
958    #[rstest]
959    #[case::owner_with_space("owner space/repo")]
960    #[case::repo_with_space("owner/repo space")]
961    #[case::owner_with_tilde("owner~/repo")]
962    #[case::repo_with_tilde("owner/repo~")]
963    #[case::owner_with_bang("owner!/repo")]
964    #[case::too_many_slashes("owner/repo/extra")]
965    #[case::single_segment("justowner")]
966    #[case::empty_owner("/repo")]
967    #[case::empty_repo("owner/")]
968    #[case::empty_string("")]
969    #[case::whitespace_only("   ")]
970    fn rejects_malformed_repo_urls(#[case] input: &str) {
971        let parsed = try_parse_input("dummy", Some(input));
972
973        assert!(parsed.is_err(), "Should reject malformed repo URL: {input}");
974    }
975}