parse_git_url/
lib.rs

1use std::fmt::Display;
2use std::str::FromStr;
3use std::{error::Error, fmt};
4use tracing::debug;
5use url::Url;
6
7mod scheme;
8
9pub use crate::scheme::Scheme;
10
11/// GitUrl represents an input url that is a url used by git
12/// Internally during parsing the url is sanitized and uses the `url` crate to perform
13/// the majority of the parsing effort, and with some extra handling to expose
14/// metadata used my many git hosting services
15#[derive(Debug, PartialEq, Eq, Clone)]
16pub struct GitUrl {
17    /// The fully qualified domain name (FQDN) or IP of the repo
18    pub host: Option<String>,
19    /// The name of the repo
20    pub name: String,
21    /// The owner/account/project name
22    pub owner: Option<String>,
23    /// The organization name. Supported by Azure DevOps
24    pub organization: Option<String>,
25    /// The full name of the repo, formatted as "owner/name"
26    pub fullname: String,
27    /// The git url scheme
28    pub scheme: Scheme,
29    /// The authentication user
30    pub user: Option<String>,
31    /// The oauth token (could appear in the https urls)
32    pub token: Option<String>,
33    /// The non-conventional port where git service is hosted
34    pub port: Option<u16>,
35    /// The path to repo w/ respect to user + hostname
36    pub path: String,
37    /// Indicate if url uses the .git suffix
38    pub git_suffix: bool,
39    /// Indicate if url explicitly uses its scheme
40    pub scheme_prefix: bool,
41}
42
43/// Build the printable GitUrl from its components
44impl fmt::Display for GitUrl {
45    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
46        let scheme_prefix = match self.scheme_prefix {
47            true => format!("{}://", self.scheme),
48            false => String::new(),
49        };
50
51        let auth_info = match self.scheme {
52            Scheme::Ssh | Scheme::Git | Scheme::GitSsh => {
53                if let Some(user) = &self.user {
54                    format!("{}@", user)
55                } else {
56                    String::new()
57                }
58            }
59            Scheme::Http | Scheme::Https => match (&self.user, &self.token) {
60                (Some(user), Some(token)) => format!("{}:{}@", user, token),
61                (Some(user), None) => format!("{}@", user),
62                (None, Some(token)) => format!("{}@", token),
63                (None, None) => String::new(),
64            },
65            _ => String::new(),
66        };
67
68        let host = match &self.host {
69            Some(host) => host.to_string(),
70            None => String::new(),
71        };
72
73        let port = match &self.port {
74            Some(p) => format!(":{}", p),
75            None => String::new(),
76        };
77
78        let path = match &self.scheme {
79            Scheme::Ssh => {
80                if self.port.is_some() {
81                    format!("/{}", &self.path)
82                } else {
83                    format!(":{}", &self.path)
84                }
85            }
86            _ => (&self.path).to_string(),
87        };
88
89        let git_url_str = format!("{}{}{}{}{}", scheme_prefix, auth_info, host, port, path);
90
91        write!(f, "{}", git_url_str)
92    }
93}
94
95impl Default for GitUrl {
96    fn default() -> Self {
97        GitUrl {
98            host: None,
99            name: "".to_string(),
100            owner: None,
101            organization: None,
102            fullname: "".to_string(),
103            scheme: Scheme::Unspecified,
104            user: None,
105            token: None,
106            port: None,
107            path: "".to_string(),
108            git_suffix: false,
109            scheme_prefix: false,
110        }
111    }
112}
113
114#[derive(Debug)]
115#[non_exhaustive]
116pub struct FromStrError {
117    url: String,
118    kind: FromStrErrorKind,
119}
120
121impl Display for FromStrError {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        match &self.kind {
124            FromStrErrorKind::NormalizeUrl(_) => {
125                write!(f, "unable to normalize URL `{}`", self.url)
126            }
127            FromStrErrorKind::UrlHost => {
128                write!(f, "could not isolate host from URL `{}`", self.url)
129            }
130            FromStrErrorKind::UnsupportedScheme => {
131                write!(f, "unsupported scheme`",)
132            }
133            FromStrErrorKind::MalformedGitUrl => {
134                write!(f, "unknown format of git URL `{}`", self.url)
135            }
136        }
137    }
138}
139
140impl Error for FromStrError {
141    fn source(&self) -> Option<&(dyn Error + 'static)> {
142        match &self.kind {
143            FromStrErrorKind::NormalizeUrl(err) => Some(err),
144            FromStrErrorKind::UrlHost => None,
145            FromStrErrorKind::UnsupportedScheme => None,
146            FromStrErrorKind::MalformedGitUrl => None,
147        }
148    }
149}
150
151#[derive(Debug)]
152pub enum FromStrErrorKind {
153    #[non_exhaustive]
154    NormalizeUrl(NormalizeUrlError),
155    #[non_exhaustive]
156    UrlHost,
157    #[non_exhaustive]
158    UnsupportedScheme,
159    #[non_exhaustive]
160    MalformedGitUrl,
161}
162
163impl FromStr for GitUrl {
164    type Err = FromStrError;
165
166    fn from_str(s: &str) -> Result<Self, Self::Err> {
167        GitUrl::parse(s)
168    }
169}
170
171impl GitUrl {
172    /// Returns `GitUrl` after removing `user` and `token` values
173    /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info
174    pub fn trim_auth(&self) -> GitUrl {
175        let mut new_giturl = self.clone();
176        new_giturl.user = None;
177        new_giturl.token = None;
178        new_giturl
179    }
180
181    /// Normalizes and parses `url` for metadata
182    pub fn parse(url: &str) -> Result<GitUrl, FromStrError> {
183        // Normalize the url so we can use Url crate to process ssh urls
184        let normalized = normalize_url(url).map_err(|err| FromStrError {
185            url: url.to_owned(),
186            kind: FromStrErrorKind::NormalizeUrl(err),
187        })?;
188
189        // Some pre-processing for paths
190        // REFACTOR: write Scheme::from_str explicitly and include that error in the chain
191        let scheme = Scheme::from_str(normalized.scheme()).map_err(|_err| FromStrError {
192            url: url.to_owned(),
193            kind: FromStrErrorKind::UnsupportedScheme,
194        })?;
195
196        // Normalized ssh urls can always have their first '/' removed
197        let urlpath = match &scheme {
198            Scheme::Ssh => {
199                // At the moment, we're relying on url::Url's parse() behavior to not duplicate
200                // the leading '/' when we normalize
201                normalized.path()[1..].to_string()
202            }
203            _ => normalized.path().to_string(),
204        };
205
206        let git_suffix_check = &urlpath.ends_with(".git");
207
208        // Parse through path for name,owner,organization
209        // Support organizations for Azure Devops
210        debug!("The urlpath: {:?}", &urlpath);
211
212        // Most git services use the path for metadata in the same way, so we're going to separate
213        // the metadata
214        // ex. github.com/accountname/reponame
215        // owner = accountname
216        // name = reponame
217        //
218        // organizations are going to be supported on a per-host basis
219        let splitpath = &urlpath.rsplit_terminator('/').collect::<Vec<&str>>();
220        debug!("rsplit results for metadata: {:?}", splitpath);
221
222        let name = splitpath[0].trim_end_matches(".git").to_string();
223
224        let (owner, organization, fullname) = match &scheme {
225            // We're not going to assume anything about metadata from a filepath
226            Scheme::File => (None::<String>, None::<String>, name.clone()),
227            _ => {
228                let mut fullname: Vec<&str> = Vec::new();
229
230                // TODO: Add support for parsing out orgs from these urls
231                let hosts_w_organization_in_path = vec!["dev.azure.com", "ssh.dev.azure.com"];
232                //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"];
233
234                let host_str = normalized.host_str().ok_or_else(|| FromStrError {
235                    url: url.to_owned(),
236                    kind: FromStrErrorKind::UrlHost,
237                })?;
238
239                match hosts_w_organization_in_path.contains(&host_str) {
240                    true => {
241                        debug!("Found a git provider with an org");
242
243                        // The path differs between git:// and https:// schemes
244
245                        match &scheme {
246                            // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName",
247                            Scheme::Ssh => {
248                                // Organization
249                                fullname.push(splitpath[2]);
250                                // Project/Owner name
251                                fullname.push(splitpath[1]);
252                                // Repo name
253                                fullname.push(splitpath[0]);
254
255                                (
256                                    Some(splitpath[1].to_string()),
257                                    Some(splitpath[2].to_string()),
258                                    fullname.join("/"),
259                                )
260                            }
261                            // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName",
262                            Scheme::Https => {
263                                // Organization
264                                fullname.push(splitpath[3]);
265                                // Project/Owner name
266                                fullname.push(splitpath[2]);
267                                // Repo name
268                                fullname.push(splitpath[0]);
269
270                                (
271                                    Some(splitpath[2].to_string()),
272                                    Some(splitpath[3].to_string()),
273                                    fullname.join("/"),
274                                )
275                            }
276                            _ => {
277                                return Err(FromStrError {
278                                    url: url.to_owned(),
279                                    kind: FromStrErrorKind::UnsupportedScheme,
280                                });
281                            }
282                        }
283                    }
284                    false => {
285                        if !url.starts_with("ssh") && splitpath.len() < 2 {
286                            return Err(FromStrError {
287                                url: url.to_owned(),
288                                kind: FromStrErrorKind::MalformedGitUrl,
289                            });
290                        }
291
292                        let position = match splitpath.len() {
293                            0 => {
294                                return Err(FromStrError {
295                                    url: url.to_owned(),
296                                    kind: FromStrErrorKind::MalformedGitUrl,
297                                })
298                            }
299                            1 => 0,
300                            _ => 1,
301                        };
302
303                        // push owner
304                        fullname.push(splitpath[position]);
305                        // push name
306                        fullname.push(name.as_str());
307
308                        (
309                            Some(splitpath[position].to_string()),
310                            None::<String>,
311                            fullname.join("/"),
312                        )
313                    }
314                }
315            }
316        };
317
318        let final_host = match scheme {
319            Scheme::File => None,
320            _ => normalized.host_str().map(|h| h.to_string()),
321        };
322
323        let final_path = match scheme {
324            Scheme::File => {
325                if let Some(host) = normalized.host_str() {
326                    format!("{}{}", host, urlpath)
327                } else {
328                    urlpath
329                }
330            }
331            _ => urlpath,
332        };
333
334        Ok(GitUrl {
335            host: final_host,
336            name,
337            owner,
338            organization,
339            fullname,
340            scheme,
341            user: match normalized.username().to_string().len() {
342                0 => None,
343                _ => Some(normalized.username().to_string()),
344            },
345            token: normalized.password().map(|p| p.to_string()),
346            port: normalized.port(),
347            path: final_path,
348            git_suffix: *git_suffix_check,
349            scheme_prefix: url.contains("://") || url.starts_with("git:"),
350        })
351    }
352}
353
354/// `normalize_ssh_url` takes in an ssh url that separates the login info
355/// from the path into with a `:` and replaces it with `/`.
356///
357/// Prepends `ssh://` to url
358///
359/// Supports absolute and relative paths
360fn normalize_ssh_url(url: &str) -> Result<Url, NormalizeUrlError> {
361    let u = url.split(':').collect::<Vec<&str>>();
362
363    match u.len() {
364        2 => {
365            debug!("Normalizing ssh url: {:?}", u);
366            normalize_url(&format!("ssh://{}/{}", u[0], u[1]))
367        }
368        3 => {
369            debug!("Normalizing ssh url with ports: {:?}", u);
370            normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2]))
371        }
372        _default => Err(NormalizeUrlError {
373            kind: NormalizeUrlErrorKind::UnsupportedSshPattern {
374                url: url.to_owned(),
375            },
376        }),
377    }
378}
379
380/// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse
381///
382/// Prepends `file://` to url
383#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
384fn normalize_file_path(filepath: &str) -> Result<Url, NormalizeUrlError> {
385    let fp = Url::from_file_path(filepath);
386
387    match fp {
388        Ok(path) => Ok(path),
389        Err(_e) => normalize_url(&format!("file://{}", filepath)),
390    }
391}
392
393#[cfg(target_arch = "wasm32")]
394fn normalize_file_path(_filepath: &str) -> Result<Url> {
395    unreachable!()
396}
397
398#[derive(Debug)]
399#[non_exhaustive]
400pub struct NormalizeUrlError {
401    kind: NormalizeUrlErrorKind,
402}
403
404impl Display for NormalizeUrlError {
405    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
406        match &self.kind {
407            NormalizeUrlErrorKind::NullBytes => write!(f, "input URL contains null bytes"),
408            NormalizeUrlErrorKind::UrlParse(_) => write!(f, "unable to parse URL"),
409            NormalizeUrlErrorKind::UnsupportedSshPattern { url } => {
410                write!(f, "unsupported SSH pattern `{}`", url)
411            }
412            NormalizeUrlErrorKind::UnsupportedScheme => write!(f, "unsupported URL scheme"),
413        }
414    }
415}
416
417impl Error for NormalizeUrlError {
418    fn source(&self) -> Option<&(dyn Error + 'static)> {
419        match &self.kind {
420            NormalizeUrlErrorKind::NullBytes => None,
421            NormalizeUrlErrorKind::UrlParse(err) => Some(err),
422            NormalizeUrlErrorKind::UnsupportedSshPattern { url: _ } => None,
423            NormalizeUrlErrorKind::UnsupportedScheme => None,
424        }
425    }
426}
427
428#[derive(Debug)]
429pub enum NormalizeUrlErrorKind {
430    #[non_exhaustive]
431    NullBytes,
432    #[non_exhaustive]
433    UrlParse(url::ParseError),
434    #[non_exhaustive]
435    UnsupportedSshPattern { url: String },
436    #[non_exhaustive]
437    UnsupportedScheme,
438}
439
440/// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify
441/// `ssh://` or `file://` urls that require more information to be added so that
442/// they can be parsed more effectively by `url::Url::parse()`
443pub fn normalize_url(url: &str) -> Result<Url, NormalizeUrlError> {
444    debug!("Processing: {:?}", &url);
445
446    // Error if there are null bytes within the url
447    // https://github.com/tjtelan/git-url-parse-rs/issues/16
448    if url.contains('\0') {
449        return Err(NormalizeUrlError {
450            kind: NormalizeUrlErrorKind::NullBytes,
451        });
452    }
453
454    // We're going to remove any trailing slash before running through Url::parse
455    let url = url.trim_end_matches('/');
456
457    // Normalize short git url notation: git:host/path.
458    // This is the same as matching Regex::new(r"^git:[^/]")
459    let url_starts_with_git_but_no_slash = url.starts_with("git:") && url.get(4..5) != Some("/");
460    let url_to_parse = if url_starts_with_git_but_no_slash {
461        url.replace("git:", "git://")
462    } else {
463        url.to_string()
464    };
465
466    let url_parse = Url::parse(&url_to_parse);
467
468    Ok(match url_parse {
469        Ok(u) => match Scheme::from_str(u.scheme()) {
470            Ok(_) => u,
471            Err(_) => normalize_ssh_url(url)?,
472        },
473        Err(url::ParseError::RelativeUrlWithoutBase) => {
474            // If we're here, we're only looking for Scheme::Ssh or Scheme::File
475
476            // Assuming we have found Scheme::Ssh if we can find an "@" before ":"
477            // Otherwise we have Scheme::File
478            match string_contains_asperand_before_colon(url) {
479                true => {
480                    debug!("Scheme::SSH match for normalization");
481                    normalize_ssh_url(url)?
482                }
483                false => {
484                    debug!("Scheme::File match for normalization");
485                    normalize_file_path(url)?
486                }
487            }
488        }
489        Err(err) => {
490            return Err(NormalizeUrlError {
491                kind: NormalizeUrlErrorKind::UrlParse(err),
492            });
493        }
494    })
495}
496
497/// This is the same as matching Regex::new(r"^\S+(@)\S+(:).*$");
498fn string_contains_asperand_before_colon(str: &str) -> bool {
499    let index_of_asperand = str.find('@');
500    let index_of_colon = str.find(':');
501
502    match (index_of_asperand, index_of_colon) {
503        (Some(index_of_asperand), Some(index_of_colon)) => index_of_asperand < index_of_colon,
504        _ => false,
505    }
506}