wtg_cli/
cli.rs

1use clap::Parser;
2use url::Url;
3
4use crate::constants;
5
6#[derive(Parser, Debug)]
7#[command(
8    name = "wtg",
9    version,
10    about = constants::DESCRIPTION,
11    disable_help_flag = true,
12)]
13pub struct Cli {
14    /// The thing to identify: commit hash (c62bbcc), issue/PR (#123), file path (Cargo.toml), tag (v1.2.3), or a GitHub URL
15    #[arg(value_name = "COMMIT|ISSUE|FILE|TAG|URL")]
16    pub input: Option<String>,
17
18    /// GitHub repository URL to operate on (e.g., <https://github.com/owner/repo>)
19    #[arg(short = 'r', long, value_name = "URL")]
20    pub repo: Option<String>,
21
22    /// Print help information
23    #[arg(short, long, action = clap::ArgAction::Help)]
24    help: Option<bool>,
25}
26
27/// Parsed input that can come from either the input argument or a GitHub URL
28#[derive(Debug, Clone)]
29pub struct ParsedInput {
30    pub owner: Option<String>,
31    pub repo: Option<String>,
32    pub query: String,
33}
34
35impl Cli {
36    /// Parse the input and -r flag to determine the repository and query
37    #[must_use]
38    pub fn parse_input(&self) -> Option<ParsedInput> {
39        let input = self.input.as_ref()?;
40
41        // If -r flag is provided, use it as the repo and input as the query
42        if let Some(repo_url) = &self.repo {
43            let (owner, repo) = parse_github_repo_url(repo_url)?;
44            let query = sanitize_query(input)?;
45            return Some(ParsedInput {
46                owner: Some(owner),
47                repo: Some(repo),
48                query,
49            });
50        }
51
52        // Try to parse input as a GitHub URL
53        if let Some(parsed) = parse_github_url(input) {
54            return Some(parsed);
55        }
56
57        // Otherwise, it's just a query (local repo)
58        sanitize_query(input).map(|query| ParsedInput {
59            owner: None,
60            repo: None,
61            query,
62        })
63    }
64}
65
66/// Parse a GitHub URL to extract owner, repo, and optional query
67/// Supports:
68/// - <https://github.com/owner/repo>
69/// - <https://github.com/owner/repo/commit/hash>
70/// - <https://github.com/owner/repo/issues/123>
71/// - <https://github.com/owner/repo/pull/123>
72/// - <https://github.com/owner/repo/blob/branch/path/to/file>
73fn parse_github_url(url: &str) -> Option<ParsedInput> {
74    let trimmed = url.trim();
75    if trimmed.is_empty() {
76        return None;
77    }
78
79    if let Some(segments) = parse_git_ssh_segments(trimmed) {
80        return parsed_input_from_segments(&segments);
81    }
82
83    let segments = parse_http_github_segments(trimmed)?;
84    parsed_input_from_segments(&segments)
85}
86
87/// Parse a simple GitHub repo URL (owner/repo or <https://github.com/owner/repo>)
88fn parse_github_repo_url(url: &str) -> Option<(String, String)> {
89    let trimmed = url.trim();
90    if trimmed.is_empty() {
91        return None;
92    }
93
94    if let Some(segments) = parse_git_ssh_segments(trimmed) {
95        return owner_repo_from_segments(&segments);
96    }
97
98    if let Some(mut parsed) = parse_with_https_fallback(trimmed) {
99        let host = parsed.host_str()?;
100        if !is_allowed_github_host(host) {
101            return None;
102        }
103        parsed.set_fragment(None);
104        parsed.set_query(None);
105        let segments = collect_segments(parsed.path());
106        if let Some(owner_repo) = owner_repo_from_segments(&segments) {
107            return Some(owner_repo);
108        }
109    }
110
111    // Handle simple owner/repo format
112    let parts: Vec<&str> = trimmed.split('/').collect();
113    if parts.len() == 2
114        && let (Some(owner), Some(repo)) = (
115            sanitize_owner_repo_segment(parts[0]),
116            sanitize_owner_repo_segment(parts[1].trim_end_matches(".git")),
117        )
118    {
119        return Some((owner, repo));
120    }
121
122    None
123}
124
125fn parse_http_github_segments(url: &str) -> Option<Vec<String>> {
126    let mut parsed = parse_with_https_fallback(url)?;
127    let host = parsed.host_str()?;
128    if !is_allowed_github_host(host) {
129        return None;
130    }
131    parsed.set_fragment(None);
132    parsed.set_query(None);
133    Some(collect_segments(parsed.path()))
134}
135
136fn parse_git_ssh_segments(url: &str) -> Option<Vec<String>> {
137    let normalized = url.trim();
138    if !normalized.starts_with("git@github.com:") {
139        return None;
140    }
141    let path = normalized.split(':').nth(1)?;
142    let path = path.split('#').next().unwrap_or(path);
143    let path = path.split('?').next().unwrap_or(path);
144    Some(collect_segments(path))
145}
146
147fn parse_with_https_fallback(input: &str) -> Option<Url> {
148    Url::parse(input).map_or_else(
149        |_| {
150            let lower = input.to_ascii_lowercase();
151            if lower.starts_with("github.com/") || lower.starts_with("www.github.com/") {
152                Url::parse(&format!("https://{input}")).ok()
153            } else if lower.starts_with("//github.com/") {
154                Url::parse(&format!("https:{input}")).ok()
155            } else {
156                None
157            }
158        },
159        Some,
160    )
161}
162
163fn is_allowed_github_host(host: &str) -> bool {
164    matches!(
165        host.to_ascii_lowercase().as_str(),
166        "github.com" | "www.github.com"
167    )
168}
169
170fn collect_segments(path: &str) -> Vec<String> {
171    path.trim_matches('/')
172        .split('/')
173        .filter(|segment| !segment.is_empty())
174        .map(ToString::to_string)
175        .collect()
176}
177
178fn owner_repo_from_segments(segments: &[String]) -> Option<(String, String)> {
179    if segments.len() < 2 {
180        return None;
181    }
182    let owner = sanitize_owner_repo_segment(segments[0].as_str())?;
183    let repo = sanitize_owner_repo_segment(segments[1].trim_end_matches(".git"))?;
184    Some((owner, repo))
185}
186
187fn parsed_input_from_segments(segments: &[String]) -> Option<ParsedInput> {
188    if segments.len() < 3 {
189        return None;
190    }
191
192    let (owner, repo) = owner_repo_from_segments(segments)?;
193    let query = match segments.get(2)?.as_str() {
194        "commit" => segments.get(3)?.clone(),
195        "issues" | "pull" => format!("#{}", segments.get(3)?),
196        "blob" | "tree" => {
197            if segments.len() >= 5 {
198                segments[4..].join("/")
199            } else {
200                return None;
201            }
202        }
203        _ => return None,
204    };
205
206    let query = sanitize_query(&query)?;
207
208    Some(ParsedInput {
209        owner: Some(owner),
210        repo: Some(repo),
211        query,
212    })
213}
214
215fn sanitize_owner_repo_segment(raw: &str) -> Option<String> {
216    let trimmed = raw.trim();
217    if trimmed.is_empty() {
218        return None;
219    }
220
221    if trimmed
222        .chars()
223        .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
224    {
225        Some(trimmed.to_string())
226    } else {
227        None
228    }
229}
230
231fn sanitize_query(raw: &str) -> Option<String> {
232    let trimmed = raw.trim();
233    if trimmed.is_empty() {
234        return None;
235    }
236
237    if trimmed.chars().any(char::is_control) {
238        return None;
239    }
240
241    Some(trimmed.to_string())
242}
243
244#[cfg(test)]
245mod tests {
246    use super::{Cli, parse_github_repo_url, parse_github_url};
247
248    fn assert_issue_or_pr(url: &str, expected_query: &str) {
249        let parsed = parse_github_url(url).unwrap_or_else(|| panic!("failed to parse {url}"));
250        assert_eq!(parsed.owner.as_deref(), Some("owner"));
251        assert_eq!(parsed.repo.as_deref(), Some("repo"));
252        assert_eq!(parsed.query, expected_query);
253    }
254
255    #[test]
256    fn parses_issue_urls_with_fragments_and_queries() {
257        let urls = [
258            "https://github.com/owner/repo/issues/42",
259            "https://github.com/owner/repo/issues/42#issuecomment-123456",
260            "https://github.com/owner/repo/issues/42?tab=comments",
261        ];
262
263        for url in urls {
264            assert_issue_or_pr(url, "#42");
265        }
266    }
267
268    #[test]
269    fn parses_pr_urls_with_files_views_and_comments() {
270        let urls = [
271            "https://github.com/owner/repo/pull/7",
272            "https://github.com/owner/repo/pull/7/files",
273            "https://github.com/owner/repo/pull/7/files?diff=split",
274            "https://github.com/owner/repo/pull/7#discussion_r987654321",
275            "https://github.com/owner/repo/pull/7#issuecomment-abcdef",
276        ];
277
278        for url in urls {
279            assert_issue_or_pr(url, "#7");
280        }
281    }
282
283    #[test]
284    fn parses_www_and_scheme_less_urls() {
285        let urls = [
286            "github.com/owner/repo/issues/101#issuecomment-1",
287            "//github.com/owner/repo/pull/15?tab=commits",
288            "https://www.github.com/owner/repo/pull/7#discussion_r42",
289        ];
290
291        assert_issue_or_pr(urls[0], "#101");
292        assert_issue_or_pr(urls[1], "#15");
293        assert_issue_or_pr(urls[2], "#7");
294    }
295
296    #[test]
297    fn parses_git_ssh_urls() {
298        let parsed = parse_github_url("git@github.com:owner/repo/pull/9#discussion_r123").unwrap();
299        assert_eq!(parsed.owner.as_deref(), Some("owner"));
300        assert_eq!(parsed.repo.as_deref(), Some("repo"));
301        assert_eq!(parsed.query, "#9");
302
303        let repo = parse_github_repo_url("git@github.com:owner/repo.git").unwrap();
304        assert_eq!(repo.0, "owner");
305        assert_eq!(repo.1, "repo");
306    }
307
308    #[test]
309    fn rejects_malformed_owner_repo_segments() {
310        assert!(parse_github_repo_url("owner space/repo").is_none());
311        assert!(parse_github_repo_url("owner/repo~").is_none());
312        assert!(parse_github_url("https://github.com/owner space/repo/issues/1").is_none());
313    }
314
315    #[test]
316    fn sanitizes_plain_query_inputs() {
317        let cli = Cli {
318            input: Some("   \n".into()),
319            repo: Some("owner/repo".into()),
320            help: None,
321        };
322        assert!(cli.parse_input().is_none());
323
324        let cli = Cli {
325            input: Some("  #99  ".into()),
326            repo: Some("owner/repo".into()),
327            help: None,
328        };
329        let parsed = cli.parse_input().unwrap();
330        assert_eq!(parsed.query, "#99");
331    }
332}