Skip to main content

parse_git_url/
lib.rs

1//! Parse Git repository URLs into a stable, documented Rust data structure.
2//!
3//! `GitUrl::parse` supports the host-specific layouts currently covered by the
4//! test suite and README: GitHub, Bitbucket, and Azure DevOps over SSH,
5//! HTTP(S), and related URL schemes. Local Unix paths and relative Windows
6//! paths are also supported.
7//!
8//! Absolute Windows drive paths such as `C:\repo.git` and
9//! `file:///C:/repo.git` are intentionally unsupported.
10//!
11//! # Examples
12//!
13//! ```
14//! use parse_git_url::{GitUrl, Scheme};
15//!
16//! let parsed = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")
17//!     .expect("example URL should parse");
18//!
19//! assert_eq!(parsed.host.as_deref(), Some("github.com"));
20//! assert_eq!(parsed.owner.as_deref(), Some("tjtelan"));
21//! assert_eq!(parsed.name, "git-url-parse-rs");
22//! assert_eq!(parsed.scheme, Scheme::Ssh);
23//! ```
24//!
25use std::fmt::Display;
26use std::str::FromStr;
27use std::{error::Error, fmt};
28use tracing::debug;
29use url::Url;
30
31mod scheme;
32
33pub use crate::scheme::Scheme;
34
35/// GitUrl represents an input URL used by git hosting tools and repositories.
36///
37/// Parsing normalizes the input first, then uses the [`url`] crate for the
38/// generic URL handling before extracting Git-specific metadata.
39#[derive(Debug, PartialEq, Eq, Clone)]
40pub struct GitUrl {
41    /// The fully qualified domain name (FQDN) or IP address of the repository.
42    pub host: Option<String>,
43    /// The repository name with any trailing `.git` suffix removed.
44    pub name: String,
45    /// The owner, account, or project segment directly associated with `name`.
46    pub owner: Option<String>,
47    /// The organization segment when the host encodes one explicitly.
48    ///
49    /// Azure DevOps URLs currently populate this field.
50    pub organization: Option<String>,
51    /// The canonical repository path assembled from the extracted metadata.
52    pub fullname: String,
53    /// The parsed transport or file scheme.
54    pub scheme: Scheme,
55    /// The authentication username embedded in the input URL.
56    pub user: Option<String>,
57    /// The password or token component embedded in the input URL.
58    pub token: Option<String>,
59    /// The explicit port number when one is present.
60    pub port: Option<u16>,
61    /// The normalized path portion relative to the host and auth fields.
62    pub path: String,
63    /// Whether the original path ended with `.git`.
64    pub git_suffix: bool,
65    /// Whether the original input explicitly spelled out its URL scheme.
66    pub scheme_prefix: bool,
67}
68
69/// Build the printable GitUrl from its components
70impl fmt::Display for GitUrl {
71    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
72        let scheme_prefix = match self.scheme_prefix {
73            true => format!("{}://", self.scheme),
74            false => String::new(),
75        };
76
77        let auth_info = match self.scheme {
78            Scheme::Ssh | Scheme::Git | Scheme::GitSsh => {
79                if let Some(user) = &self.user {
80                    format!("{}@", user)
81                } else {
82                    String::new()
83                }
84            }
85            Scheme::Http | Scheme::Https => match (&self.user, &self.token) {
86                (Some(user), Some(token)) => format!("{}:{}@", user, token),
87                (Some(user), None) => format!("{}@", user),
88                (None, Some(token)) => format!("{}@", token),
89                (None, None) => String::new(),
90            },
91            _ => String::new(),
92        };
93
94        let host = match &self.host {
95            Some(host) => host.to_string(),
96            None => String::new(),
97        };
98
99        let port = match &self.port {
100            Some(p) => format!(":{}", p),
101            None => String::new(),
102        };
103
104        let path = match &self.scheme {
105            Scheme::Ssh => {
106                if self.port.is_some() {
107                    format!("/{}", &self.path)
108                } else {
109                    format!(":{}", &self.path)
110                }
111            }
112            _ => self.path.to_string(),
113        };
114
115        let git_url_str = format!("{}{}{}{}{}", scheme_prefix, auth_info, host, port, path);
116
117        write!(f, "{}", git_url_str)
118    }
119}
120
121impl Default for GitUrl {
122    fn default() -> Self {
123        GitUrl {
124            host: None,
125            name: "".to_string(),
126            owner: None,
127            organization: None,
128            fullname: "".to_string(),
129            scheme: Scheme::Unspecified,
130            user: None,
131            token: None,
132            port: None,
133            path: "".to_string(),
134            git_suffix: false,
135            scheme_prefix: false,
136        }
137    }
138}
139
140#[derive(Debug)]
141#[non_exhaustive]
142pub struct FromStrError {
143    url: String,
144    kind: FromStrErrorKind,
145}
146
147impl Display for FromStrError {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        match &self.kind {
150            FromStrErrorKind::NormalizeUrl(_) => {
151                write!(f, "unable to normalize URL `{}`", self.url)
152            }
153            FromStrErrorKind::UrlHost => {
154                write!(f, "could not isolate host from URL `{}`", self.url)
155            }
156            FromStrErrorKind::UnsupportedScheme => {
157                write!(f, "unsupported scheme`",)
158            }
159            FromStrErrorKind::MalformedGitUrl => {
160                write!(f, "unknown format of git URL `{}`", self.url)
161            }
162        }
163    }
164}
165
166impl Error for FromStrError {
167    fn source(&self) -> Option<&(dyn Error + 'static)> {
168        match &self.kind {
169            FromStrErrorKind::NormalizeUrl(err) => Some(err),
170            FromStrErrorKind::UrlHost => None,
171            FromStrErrorKind::UnsupportedScheme => None,
172            FromStrErrorKind::MalformedGitUrl => None,
173        }
174    }
175}
176
177#[derive(Debug)]
178pub enum FromStrErrorKind {
179    #[non_exhaustive]
180    NormalizeUrl(NormalizeUrlError),
181    #[non_exhaustive]
182    UrlHost,
183    #[non_exhaustive]
184    UnsupportedScheme,
185    #[non_exhaustive]
186    MalformedGitUrl,
187}
188
189impl FromStr for GitUrl {
190    type Err = FromStrError;
191
192    fn from_str(s: &str) -> Result<Self, Self::Err> {
193        GitUrl::parse(s)
194    }
195}
196
197impl GitUrl {
198    /// Returns `GitUrl` after removing `user` and `token` values
199    /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info
200    pub fn trim_auth(&self) -> GitUrl {
201        let mut new_giturl = self.clone();
202        new_giturl.user = None;
203        new_giturl.token = None;
204        new_giturl
205    }
206
207    /// Normalizes and parses `url` for metadata
208    pub fn parse(url: &str) -> Result<GitUrl, FromStrError> {
209        // Normalize the url so we can use Url crate to process ssh urls
210        let normalized = normalize_url(url).map_err(|err| FromStrError {
211            url: url.to_owned(),
212            kind: FromStrErrorKind::NormalizeUrl(err),
213        })?;
214
215        // Some pre-processing for paths
216        // REFACTOR: write Scheme::from_str explicitly and include that error in the chain
217        let scheme = Scheme::from_str(normalized.scheme()).map_err(|_err| FromStrError {
218            url: url.to_owned(),
219            kind: FromStrErrorKind::UnsupportedScheme,
220        })?;
221
222        // Normalized ssh urls can always have their first '/' removed
223        let urlpath = match &scheme {
224            Scheme::Ssh => {
225                // At the moment, we're relying on url::Url's parse() behavior to not duplicate
226                // the leading '/' when we normalize
227                normalized.path()[1..].to_string()
228            }
229            _ => normalized.path().to_string(),
230        };
231
232        let git_suffix_check = &urlpath.ends_with(".git");
233
234        // Parse through path for name,owner,organization
235        // Support organizations for Azure Devops
236        debug!("The urlpath: {:?}", &urlpath);
237
238        // Most git services use the path for metadata in the same way, so we're going to separate
239        // the metadata
240        // ex. github.com/accountname/reponame
241        // owner = accountname
242        // name = reponame
243        //
244        // organizations are going to be supported on a per-host basis
245        let splitpath = &urlpath.rsplit_terminator('/').collect::<Vec<&str>>();
246        debug!("rsplit results for metadata: {:?}", splitpath);
247
248        let name = splitpath[0].trim_end_matches(".git").to_string();
249
250        let (owner, organization, fullname) = match &scheme {
251            // We're not going to assume anything about metadata from a filepath
252            Scheme::File => (None::<String>, None::<String>, name.clone()),
253            _ => {
254                let mut fullname: Vec<&str> = Vec::new();
255
256                // TODO: Add support for parsing out orgs from these urls
257                let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"];
258                //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"];
259
260                let host_str = normalized.host_str().ok_or_else(|| FromStrError {
261                    url: url.to_owned(),
262                    kind: FromStrErrorKind::UrlHost,
263                })?;
264
265                match hosts_w_organization_in_path.contains(&host_str) {
266                    true => {
267                        debug!("Found a git provider with an org");
268
269                        // The path differs between git:// and https:// schemes
270
271                        match &scheme {
272                            // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName",
273                            Scheme::Ssh => {
274                                // Organization
275                                fullname.push(splitpath[2]);
276                                // Project/Owner name
277                                fullname.push(splitpath[1]);
278                                // Repo name
279                                fullname.push(splitpath[0]);
280
281                                (
282                                    Some(splitpath[1].to_string()),
283                                    Some(splitpath[2].to_string()),
284                                    fullname.join("/"),
285                                )
286                            }
287                            // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName",
288                            Scheme::Https => {
289                                // Organization
290                                fullname.push(splitpath[3]);
291                                // Project/Owner name
292                                fullname.push(splitpath[2]);
293                                // Repo name
294                                fullname.push(splitpath[0]);
295
296                                (
297                                    Some(splitpath[2].to_string()),
298                                    Some(splitpath[3].to_string()),
299                                    fullname.join("/"),
300                                )
301                            }
302                            _ => {
303                                return Err(FromStrError {
304                                    url: url.to_owned(),
305                                    kind: FromStrErrorKind::UnsupportedScheme,
306                                });
307                            }
308                        }
309                    }
310                    false => {
311                        if !url.starts_with("ssh") && splitpath.len() < 2 {
312                            return Err(FromStrError {
313                                url: url.to_owned(),
314                                kind: FromStrErrorKind::MalformedGitUrl,
315                            });
316                        }
317
318                        let position = match splitpath.len() {
319                            0 => {
320                                return Err(FromStrError {
321                                    url: url.to_owned(),
322                                    kind: FromStrErrorKind::MalformedGitUrl,
323                                })
324                            }
325                            1 => 0,
326                            _ => 1,
327                        };
328
329                        // push owner
330                        fullname.push(splitpath[position]);
331                        // push name
332                        fullname.push(name.as_str());
333
334                        (
335                            Some(splitpath[position].to_string()),
336                            None::<String>,
337                            fullname.join("/"),
338                        )
339                    }
340                }
341            }
342        };
343
344        let final_host = match scheme {
345            Scheme::File => None,
346            _ => normalized.host_str().map(|h| h.to_string()),
347        };
348
349        let final_path = match scheme {
350            Scheme::File => {
351                if let Some(host) = normalized.host_str() {
352                    format!("{}{}", host, urlpath)
353                } else {
354                    urlpath
355                }
356            }
357            _ => urlpath,
358        };
359
360        Ok(GitUrl {
361            host: final_host,
362            name,
363            owner,
364            organization,
365            fullname,
366            scheme,
367            user: match normalized.username().to_string().len() {
368                0 => None,
369                _ => Some(normalized.username().to_string()),
370            },
371            token: normalized.password().map(|p| p.to_string()),
372            port: normalized.port(),
373            path: final_path,
374            git_suffix: *git_suffix_check,
375            scheme_prefix: url.contains("://") || url.starts_with("git:"),
376        })
377    }
378}
379
380/// `normalize_ssh_url` takes in an ssh url that separates the login info
381/// from the path into with a `:` and replaces it with `/`.
382///
383/// Prepends `ssh://` to url
384///
385/// Supports absolute and relative paths
386fn normalize_ssh_url(url: &str) -> Result<Url, NormalizeUrlError> {
387    let u = url.split(':').collect::<Vec<&str>>();
388
389    match u.len() {
390        2 => {
391            debug!("Normalizing ssh url: {:?}", u);
392            normalize_url(&format!("ssh://{}/{}", u[0], u[1]))
393        }
394        3 => {
395            debug!("Normalizing ssh url with ports: {:?}", u);
396            normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2]))
397        }
398        _default => Err(NormalizeUrlError {
399            kind: NormalizeUrlErrorKind::UnsupportedSshPattern {
400                url: url.to_owned(),
401            },
402        }),
403    }
404}
405
406/// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse
407///
408/// Prepends `file://` to url
409#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
410fn normalize_file_path(filepath: &str) -> Result<Url, NormalizeUrlError> {
411    let fp = Url::from_file_path(filepath);
412
413    match fp {
414        Ok(path) => Ok(path),
415        Err(_e) => normalize_url(&format!("file://{}", filepath)),
416    }
417}
418
419#[cfg(target_arch = "wasm32")]
420fn normalize_file_path(_filepath: &str) -> Result<Url> {
421    unreachable!()
422}
423
424#[derive(Debug)]
425#[non_exhaustive]
426pub struct NormalizeUrlError {
427    kind: NormalizeUrlErrorKind,
428}
429
430impl Display for NormalizeUrlError {
431    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
432        match &self.kind {
433            NormalizeUrlErrorKind::NullBytes => write!(f, "input URL contains null bytes"),
434            NormalizeUrlErrorKind::UrlParse(_) => write!(f, "unable to parse URL"),
435            NormalizeUrlErrorKind::UnsupportedSshPattern { url } => {
436                write!(f, "unsupported SSH pattern `{}`", url)
437            }
438            NormalizeUrlErrorKind::UnsupportedWindowsPath { path } => {
439                write!(f, "unsupported absolute Windows path `{}`", path)
440            }
441            NormalizeUrlErrorKind::UnsupportedScheme => write!(f, "unsupported URL scheme"),
442        }
443    }
444}
445
446impl Error for NormalizeUrlError {
447    fn source(&self) -> Option<&(dyn Error + 'static)> {
448        match &self.kind {
449            NormalizeUrlErrorKind::NullBytes => None,
450            NormalizeUrlErrorKind::UrlParse(err) => Some(err),
451            NormalizeUrlErrorKind::UnsupportedSshPattern { url: _ } => None,
452            NormalizeUrlErrorKind::UnsupportedWindowsPath { path: _ } => None,
453            NormalizeUrlErrorKind::UnsupportedScheme => None,
454        }
455    }
456}
457
458#[derive(Debug)]
459pub enum NormalizeUrlErrorKind {
460    #[non_exhaustive]
461    NullBytes,
462    #[non_exhaustive]
463    UrlParse(url::ParseError),
464    #[non_exhaustive]
465    UnsupportedSshPattern { url: String },
466    #[non_exhaustive]
467    UnsupportedWindowsPath { path: String },
468    #[non_exhaustive]
469    UnsupportedScheme,
470}
471
472/// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify
473/// `ssh://` or `file://` urls that require more information to be added so that
474/// they can be parsed more effectively by `url::Url::parse()`
475pub fn normalize_url(url: &str) -> Result<Url, NormalizeUrlError> {
476    debug!("Processing: {:?}", &url);
477
478    // Error if there are null bytes within the url
479    // https://github.com/tjtelan/git-url-parse-rs/issues/16
480    if url.contains('\0') {
481        return Err(NormalizeUrlError {
482            kind: NormalizeUrlErrorKind::NullBytes,
483        });
484    }
485
486    // We're going to remove any trailing slash before running through Url::parse
487    let url = url.trim_end_matches('/');
488
489    if is_absolute_windows_path(url) {
490        return Err(NormalizeUrlError {
491            kind: NormalizeUrlErrorKind::UnsupportedWindowsPath {
492                path: url.to_owned(),
493            },
494        });
495    }
496
497    // Normalize short git url notation: git:host/path.
498    // This is the same as matching Regex::new(r"^git:[^/]")
499    let url_starts_with_git_but_no_slash = url.starts_with("git:") && url.get(4..5) != Some("/");
500    let url_to_parse = if url_starts_with_git_but_no_slash {
501        url.replace("git:", "git://")
502    } else {
503        url.to_string()
504    };
505
506    let url_parse = Url::parse(&url_to_parse);
507
508    Ok(match url_parse {
509        Ok(u) => match Scheme::from_str(u.scheme()) {
510            Ok(_) => u,
511            Err(_) => normalize_ssh_url(url)?,
512        },
513        Err(url::ParseError::RelativeUrlWithoutBase) => {
514            // If we're here, we're only looking for Scheme::Ssh or Scheme::File
515
516            // Assuming we have found Scheme::Ssh if we can find an "@" before ":"
517            // Otherwise we have Scheme::File
518            match string_contains_asperand_before_colon(url) {
519                true => {
520                    debug!("Scheme::SSH match for normalization");
521                    normalize_ssh_url(url)?
522                }
523                false => {
524                    debug!("Scheme::File match for normalization");
525                    normalize_file_path(url)?
526                }
527            }
528        }
529        Err(err) => {
530            return Err(NormalizeUrlError {
531                kind: NormalizeUrlErrorKind::UrlParse(err),
532            });
533        }
534    })
535}
536
537/// This is the same as matching Regex::new(r"^\S+(@)\S+(:).*$");
538fn string_contains_asperand_before_colon(str: &str) -> bool {
539    let index_of_asperand = str.find('@');
540    let index_of_colon = str.find(':');
541
542    match (index_of_asperand, index_of_colon) {
543        (Some(index_of_asperand), Some(index_of_colon)) => index_of_asperand < index_of_colon,
544        _ => false,
545    }
546}
547
548fn is_absolute_windows_path(url: &str) -> bool {
549    if let Some(path) = url.strip_prefix("file://") {
550        return is_windows_drive_path(path.trim_start_matches('/'));
551    }
552
553    is_windows_drive_path(url)
554}
555
556fn is_windows_drive_path(path: &str) -> bool {
557    let bytes = path.as_bytes();
558
559    bytes.len() >= 3
560        && bytes[0].is_ascii_alphabetic()
561        && bytes[1] == b':'
562        && matches!(bytes[2], b'/' | b'\\')
563}