hosted_git_info/
lib.rs

1//! hosted-git-info is a [Rust] port of the original [hosted-git-info] project on [npm].
2//!
3//! [Rust]: https://www.rustlang.org/
4//! [hosted-git-info]: https://github.com/npm/hosted-git-info
5//! [npm]: https://www.npmjs.com
6//!
7//! It provides metadata and conversions from repository urls for [GitHub], [Bitbucket]
8//! and [GitLab].
9//!
10//! [GitHub]: https://github.com/
11//! [Bitbucket]: https://www.bitbucket.org/
12//! [GitLab]: https://www.gitlab.com/
13//!
14//! It will let you identify and transform various git hosts URLs between
15//! protocols. It also can tell you what the URL is for the raw path for
16//! particular file for direct access without git.
17//!
18//! # Usage
19//!
20//! First, URL parsing may fail for various reasons and therefore returns a `Result`:
21//!
22//! ```
23//! use hosted_git_info::{HostedGitInfo, ParseError};
24//!
25//! assert!(HostedGitInfo::from_url("https://www.rustlang.org/") == Err(ParseError::UnknownUrl));
26//! ```
27//!
28//! Let’s parse a valid URL and look at its components.
29//!
30//! ```
31//! use hosted_git_info::{HostedGitInfo, Provider};
32//!
33//! let url = "https://github.com/foo/bar.git#branch";
34//! let info = HostedGitInfo::from_url(url).unwrap();
35//! assert_eq!(info.provider(), Provider::GitHub);
36//! assert_eq!(info.user(), Some("foo"));
37//! assert_eq!(info.project(), "bar");
38//! assert_eq!(info.committish(), Some("branch"));
39//! assert_eq!(info.auth(), None);
40//! ```
41//!
42//! [HostedGitInfo] also implements the [str::FromStr] trait:
43//!
44//! ```
45//! use hosted_git_info::{HostedGitInfo, Provider};
46//!
47//! let url = "git+ssh://github.com:foo/bar.git";
48//! let info: HostedGitInfo = url.parse().unwrap();
49//! assert_eq!(info.provider(), Provider::GitHub);
50//! assert_eq!(info.user(), Some("foo"));
51//! assert_eq!(info.project(), "bar");
52//! assert_eq!(info.committish(), None);
53//! assert_eq!(info.auth(), None);
54//! ```
55
56#![deny(clippy::unwrap_used)]
57
58#[cfg(feature = "derive_builder")]
59use derive_builder::Builder;
60use percent_encoding::percent_decode_str;
61use std::str;
62use thiserror::Error;
63use url::Url;
64
65mod parser;
66
67#[cfg(test)]
68mod proptest;
69
70static AUTH_SCHEMES: [&str; 5] = ["git", "https", "git+https", "http", "git+http"];
71static KNOWN_SCHEMES: [&str; 10] = [
72    "http",
73    "https",
74    "git",
75    "git+ssh",
76    "git+https",
77    "ssh",
78    "bitbucket",
79    "gist",
80    "github",
81    "gitlab",
82];
83
84/// Enum of supported git hosting providers.
85#[derive(Debug, Eq, PartialEq, Copy, Clone)]
86pub enum Provider {
87    /// see <https://www.bitbucket.org/>
88    BitBucket,
89    /// see <https://gist.github.com/>
90    Gist,
91    /// see <https://github.com/>
92    GitHub,
93    /// see <https://www.gitlab.com/>
94    GitLab,
95}
96
97/// Enum of the original URL types (shortcut, https, ssh, ...)
98#[derive(Debug, Eq, PartialEq, Copy, Clone)]
99pub enum DefaultRepresentation {
100    /// Example: `Turbo87/hosted-git-info-rs`
101    Shortcut,
102    /// Example: `git://github.com/Turbo87/hosted-git-info-rs`
103    Git,
104    /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git`
105    Https,
106    /// Example: `git+ssh://git@github.com:Turbo87/hosted-git-info-rs.git`
107    Ssh,
108    /// anything else πŸ€·β€
109    Other,
110}
111
112impl DefaultRepresentation {
113    fn from_scheme(scheme: &str) -> DefaultRepresentation {
114        use DefaultRepresentation::*;
115
116        match scheme {
117            "git" => Git,
118            "git+https" => Https,
119            "git+ssh" => Ssh,
120            "https" => Https,
121            "ssh" => Ssh,
122            _ => Other,
123        }
124    }
125}
126
127/// Errors that can occur during parsing.
128#[derive(PartialEq, Eq, Clone, Copy, Debug, Error)]
129pub enum ParseError {
130    /// Failed to parse the URL with the `url` crate.
131    #[error("Failed to parse URL")]
132    InvalidUrl(#[from] url::ParseError),
133
134    /// Failed to parse a part of the URL with the `percent_encoding` crate.
135    #[error("Failed to parse percent-encoded URI component")]
136    InvalidUriEncoding(#[from] str::Utf8Error),
137
138    /// The URL could not be recognized.
139    #[error("Failed to recognize URL")]
140    UnknownUrl,
141}
142
143/// The parsed information from a git hosting URL.
144#[derive(Debug, Eq, PartialEq, Clone)]
145#[cfg_attr(feature = "derive_builder", derive(Builder))]
146pub struct HostedGitInfo {
147    provider: Provider,
148
149    #[cfg_attr(
150        feature = "derive_builder",
151        builder(setter(into, strip_option), default)
152    )]
153    user: Option<String>,
154
155    #[cfg_attr(
156        feature = "derive_builder",
157        builder(setter(into, strip_option), default)
158    )]
159    auth: Option<String>,
160
161    #[cfg_attr(feature = "derive_builder", builder(setter(into)))]
162    project: String,
163
164    #[cfg_attr(
165        feature = "derive_builder",
166        builder(setter(into, strip_option), default)
167    )]
168    committish: Option<String>,
169
170    #[cfg_attr(feature = "derive_builder", builder(setter(name = "repr")))]
171    default_representation: DefaultRepresentation,
172}
173
174impl HostedGitInfo {
175    /// Parses a URL string and returns a [HostedGitInfo] struct, if successful.
176    /// If parsing fails, a [ParseError] will be returned.
177    pub fn from_url(giturl: &str) -> Result<Self, ParseError> {
178        // if (!giturl) {
179        //   return
180        // }
181
182        // const url = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)
183        let url = if is_github_shorthand(giturl) {
184            format!("github:{}", giturl)
185        } else {
186            // correctProtocol(giturl)
187            correct_protocol(giturl)
188        };
189
190        // const parsed = parseGitUrl(url)
191        // if (!parsed) {
192        //   return parsed
193        // }
194        let parsed = parse_git_url(&url)?;
195
196        // const gitHostShortcut = gitHosts.byShortcut[parsed.protocol]
197        let parser_from_shortcut = parser::parser_from_shortcut(parsed.scheme());
198
199        // const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.') ? parsed.hostname.slice(4) : parsed.hostname]
200        let simplified_domain = parsed
201            .domain()
202            .map(|domain| domain.strip_prefix("www.").unwrap_or(domain));
203        let parser_from_domain =
204            simplified_domain.and_then(|domain| parser::parser_from_domain(domain));
205
206        // const gitHostName = gitHostShortcut || gitHostDomain
207        let parser = parser_from_shortcut
208            .as_ref()
209            .or_else(|| parser_from_domain.as_ref());
210
211        // if (!gitHostName) {
212        //   return
213        // }
214        //
215        // const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain]
216        let parser = match parser {
217            Some(parser) => parser,
218            None => return Err(ParseError::UnknownUrl),
219        };
220
221        // let auth = null
222        // if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) {
223        //   auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}`
224        // }
225        let username = match parsed.username() {
226            username if !username.is_empty() => Some(username),
227            _ => None,
228        };
229        let password = parsed.password();
230        let auth = if AUTH_SCHEMES.contains(&parsed.scheme()) {
231            match (username, password) {
232                (Some(username), Some(password)) => Some(format!("{}:{}", username, password)),
233                (Some(username), None) => Some(username.to_string()),
234                (None, Some(password)) => Some(format!(":{}", password)),
235                (None, None) => None,
236            }
237        } else {
238            None
239        };
240
241        // let committish = null
242        // let user = null
243        // let project = null
244        // let defaultRepresentation = null
245        //
246        // try {
247        //   if (gitHostShortcut) {
248        if parser_from_shortcut.is_some() {
249            // let pathname = parsed.pathname.startsWith('/') ? parsed.pathname.slice(1) : parsed.pathname
250            let path = parsed.path();
251            let mut pathname = path.strip_prefix('/').unwrap_or(path);
252
253            // const firstAt = pathname.indexOf('@')
254            let first_at = pathname.find('@');
255            // we ignore auth for shortcuts, so just trim it out
256            // if (firstAt > -1) {
257            //   pathname = pathname.slice(firstAt + 1)
258            // }
259            if let Some(first_at) = first_at {
260                pathname = &pathname[first_at + 1..];
261            }
262
263            // const lastSlash = pathname.lastIndexOf('/')
264            let last_slash = pathname.rfind('/');
265            let (user, project) = if let Some(last_slash) = last_slash {
266                // user = decodeURIComponent(pathname.slice(0, lastSlash))
267                let user = percent_decode_str(&pathname[0..last_slash]).decode_utf8()?;
268
269                // we want nulls only, never empty strings
270                // if (!user) {
271                //   user = null
272                // }
273                let user = if user.is_empty() { None } else { Some(user) };
274
275                // project = decodeURIComponent(pathname.slice(lastSlash + 1))
276                let project = percent_decode_str(&pathname[last_slash + 1..]).decode_utf8()?;
277                (user, project)
278            } else {
279                // project = decodeURIComponent(pathname)
280                let project = percent_decode_str(&pathname).decode_utf8()?;
281                (None, project)
282            };
283
284            let project = project
285                .strip_suffix(".git")
286                .unwrap_or_else(|| project.as_ref());
287
288            // if (parsed.hash) {
289            //   committish = decodeURIComponent(parsed.hash.slice(1))
290            // }
291            let committish = parsed
292                .fragment()
293                .map(|committish| percent_decode_str(&committish).decode_utf8())
294                .transpose()?;
295
296            // defaultRepresentation = 'shortcut'
297            Ok(Self {
298                provider: parser.provider(),
299                user: user.map(|s| s.to_string()),
300                auth,
301                project: project.to_string(),
302                committish: committish.map(|s| s.to_string()),
303                default_representation: DefaultRepresentation::Shortcut,
304            })
305        } else {
306            // if (!gitHostInfo.protocols.includes(parsed.protocol)) {
307            //   return
308            // }
309            if !parser.supports_scheme(parsed.scheme()) {
310                return Err(ParseError::UnknownUrl);
311            }
312
313            // const segments = gitHostInfo.extract(parsed)
314            // if (!segments) {
315            //   return
316            // }
317            let segments = parser.extract(&parsed)?;
318
319            // user = segments.user && decodeURIComponent(segments.user)
320            let user = segments
321                .user
322                .map(|user| percent_decode_str(&user).decode_utf8())
323                .transpose()?;
324
325            // project = decodeURIComponent(segments.project)
326            let project = segments
327                .project
328                .map(|project| percent_decode_str(&project).decode_utf8())
329                .transpose()?
330                .ok_or(ParseError::UnknownUrl)?;
331
332            // committish = decodeURIComponent(segments.committish)
333            let committish = segments
334                .committish
335                .map(|committish| percent_decode_str(&committish).decode_utf8())
336                .transpose()?;
337
338            // defaultRepresentation = protocolToRepresentation(parsed.protocol)
339            Ok(Self {
340                provider: parser.provider(),
341                user: user.map(|s| s.to_string()),
342                auth,
343                project: project.to_string(),
344                committish: committish.map(|s| s.to_string()),
345                default_representation: DefaultRepresentation::from_scheme(parsed.scheme()),
346            })
347        }
348        //   }
349        // } catch (err) {
350        //   /* istanbul ignore else */
351        //   if (err instanceof URIError) {
352        //     return
353        //   } else {
354        //     throw err
355        //   }
356        // }
357        //
358        // return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts)
359    }
360
361    /// The type of hosting provider. (GitHub, Gitlab, Bitbucket, ...)
362    pub fn provider(&self) -> Provider {
363        self.provider
364    }
365
366    /// The name of the user or organization on the git host.
367    ///
368    /// This is using an [Option] because some hosting providers allow projects
369    /// that are not scoped to a particular user or organization.
370    ///
371    /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β†’ `Turbo87`
372    pub fn user(&self) -> Option<&str> {
373        self.user.as_deref()
374    }
375
376    /// The authentication part of the URL, if it exists.
377    ///
378    /// Format: `<USER>[:<PASSWORD>]`
379    ///
380    /// Example: `https://user:password@github.com/foo/bar.git` β†’ `user:password`
381    pub fn auth(&self) -> Option<&str> {
382        self.auth.as_deref()
383    }
384
385    /// The name of the project on the git host.
386    ///
387    /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β†’ `hosted-git-info-rs`
388    pub fn project(&self) -> &str {
389        &self.project
390    }
391
392    /// The [branch, tag, commit, ...](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefcommit-ishacommit-ishalsocommittish)
393    /// part of the URL, if it exists.
394    ///
395    /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git#rust-is-awesome` β†’ `rust-is-awesome`
396    pub fn committish(&self) -> Option<&str> {
397        self.committish.as_deref()
398    }
399
400    /// The original URL type (shortcut, https, ssh, ...).
401    ///
402    /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β†’ `Https`
403    pub fn default_representation(&self) -> DefaultRepresentation {
404        self.default_representation
405    }
406}
407
408impl str::FromStr for HostedGitInfo {
409    type Err = ParseError;
410
411    fn from_str(s: &str) -> Result<Self, Self::Err> {
412        HostedGitInfo::from_url(s)
413    }
414}
415
416// look for github shorthand inputs, such as npm/cli
417fn is_github_shorthand(arg: &str) -> bool {
418    // it cannot contain whitespace before the first #
419    // it cannot start with a / because that's probably an absolute file path
420    // but it must include a slash since repos are username/repository
421    // it cannot start with a . because that's probably a relative file path
422    // it cannot start with an @ because that's a scoped package if it passes the other tests
423    // it cannot contain a : before a # because that tells us that there's a protocol
424    // a second / may not exist before a #
425
426    // const firstHash = arg.indexOf('#')
427    let first_hash = arg.find('#');
428    // const firstSlash = arg.indexOf('/')
429    let first_slash = arg.find('/');
430    // const secondSlash = arg.indexOf('/', firstSlash + 1)
431    let second_slash = first_slash.and_then(|first_slash| arg[first_slash + 1..].find('/'));
432    // const firstColon = arg.indexOf(':')
433    let first_colon = arg.find(':');
434    // const firstSpace = /\s/.exec(arg)
435    let first_space = arg.find(char::is_whitespace);
436    // const firstAt = arg.indexOf('@')
437    let first_at = arg.find('@');
438
439    // const spaceOnlyAfterHash = !firstSpace || (firstHash > -1 && firstSpace.index > firstHash)
440    let space_only_after_hash =
441        first_space.is_none() || (first_hash.is_some() && first_space > first_hash);
442    // const atOnlyAfterHash = firstAt === -1 || (firstHash > -1 && firstAt > firstHash)
443    let at_only_after_hash = first_at.is_none() || (first_hash.is_some() && first_at > first_hash);
444    // const colonOnlyAfterHash = firstColon === -1 || (firstHash > -1 && firstColon > firstHash)
445    let colon_only_after_hash =
446        first_colon.is_none() || (first_hash.is_some() && first_colon > first_hash);
447    // const secondSlashOnlyAfterHash = secondSlash === -1 || (firstHash > -1 && secondSlash > firstHash)
448    let second_slash_only_after_hash =
449        second_slash.is_none() || (first_hash.is_some() && second_slash > first_hash);
450    // const hasSlash = firstSlash > 0
451    let has_slash = matches!(first_slash, Some(first_slash) if first_slash > 0);
452
453    // if a # is found, what we really want to know is that the character immediately before # is not a /
454
455    // const doesNotEndWithSlash = firstHash > -1 ? arg[firstHash - 1] !== '/' : !arg.endsWith('/')
456    let does_not_end_with_slash = if let Some(first_hash) = first_hash {
457        first_hash == 0 || arg.as_bytes().get(first_hash - 1) != Some(&b'/')
458    } else {
459        !arg.ends_with('/')
460    };
461    // const doesNotStartWithDot = !arg.startsWith('.')
462    let does_not_start_with_dot = !arg.starts_with('.');
463
464    // return spaceOnlyAfterHash && hasSlash && doesNotEndWithSlash && doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash && secondSlashOnlyAfterHash
465    space_only_after_hash
466        && has_slash
467        && does_not_end_with_slash
468        && does_not_start_with_dot
469        && at_only_after_hash
470        && colon_only_after_hash
471        && second_slash_only_after_hash
472}
473
474// accepts input like git:github.com:user/repo and inserts the // after the first :
475fn correct_protocol(arg: &str) -> String {
476    // const firstColon = arg.indexOf(':')
477    if let Some(first_colon) = arg.find(':') {
478        // const proto = arg.slice(0, firstColon + 1)
479        let proto = &arg[0..first_colon];
480
481        // if (knownProtocols.includes(proto)) {
482        //   return arg
483        // }
484        if KNOWN_SCHEMES.contains(&proto) {
485            return arg.to_string();
486        }
487
488        // const firstAt = arg.indexOf('@')
489        // if (firstAt > -1) {
490        if let Some(first_at) = arg.find('@') {
491            // if (firstAt > firstColon) {
492            return if first_at > first_colon {
493                // return `git+ssh://${arg}`
494                format!("git+ssh://{}", arg)
495            } else {
496                arg.to_string()
497            };
498        }
499
500        // const doubleSlash = arg.indexOf('//')
501        let double_slash = arg.find("//");
502        // if (doubleSlash === firstColon + 1) {
503        if double_slash == Some(first_colon + 1) {
504            return arg.to_string();
505        }
506
507        // return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1)
508        format!("{}//{}", &arg[0..first_colon + 1], &arg[first_colon + 1..])
509    } else {
510        arg.to_string()
511    }
512}
513
514// try to parse the url as its given to us, if that throws
515// then we try to clean the url and parse that result instead
516fn parse_git_url(giturl: &str) -> Result<Url, url::ParseError> {
517    // let result
518    // try {
519    //   result = new url.URL(giturl)
520    // } catch (err) {}
521    //
522    // if (result) {
523    //   return result
524    // }
525    Url::parse(giturl).or_else(|_error| {
526        // const correctedUrl = correctUrl(giturl)
527        let corrected_url = correct_url(giturl).ok_or(_error)?;
528
529        // try {
530        //   result = new url.URL(correctedUrl)
531        // } catch (err) {}
532        //
533        // return result
534        Url::parse(&corrected_url)
535    })
536}
537
538// attempt to correct an scp style url so that it will parse with `new URL()`
539fn correct_url(giturl: &str) -> Option<String> {
540    // const firstAt = giturl.indexOf('@')
541    let first_at = giturl.find('@');
542    // const lastHash = giturl.lastIndexOf('#')
543    let last_hash = giturl.rfind('#');
544    // let firstColon = giturl.indexOf(':')
545    let mut first_colon = giturl.find(':');
546    // let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity)
547    let last_colon = last_hash
548        .map(|last_hash| &giturl[..last_hash])
549        .unwrap_or(giturl)
550        .rfind(':');
551
552    // let corrected
553    let mut corrected = None;
554
555    // if (lastColon > firstAt) {
556    if let Some(last_colon_) = last_colon {
557        if last_colon > first_at {
558            // the last : comes after the first @ (or there is no @)
559            // like it would in:
560            // proto://hostname.com:user/repo
561            // username@hostname.com:user/repo
562            // :password@hostname.com:user/repo
563            // username:password@hostname.com:user/repo
564            // proto://username@hostname.com:user/repo
565            // proto://:password@hostname.com:user/repo
566            // proto://username:password@hostname.com:user/repo
567            // then we replace the last : with a / to create a valid path
568
569            //   corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1)
570            let corrected_ = format!("{}/{}", &giturl[0..last_colon_], &giturl[last_colon_ + 1..]);
571
572            // and we find our new : positions
573
574            // firstColon = corrected.indexOf(':')
575            first_colon = corrected_.find(':');
576            // lastColon = corrected.lastIndexOf(':')
577            // last_colon = corrected_.rfind(':'); // this appears to be a bug in the original?
578
579            corrected = Some(corrected_);
580        }
581    }
582
583    // if (firstColon === -1 && giturl.indexOf('//') === -1) {
584    if first_colon.is_none() && !giturl.contains("//") {
585        // we have no : at all
586        // as it would be in:
587        // username@hostname.com/user/repo
588        // then we prepend a protocol
589
590        // corrected = `git+ssh://${corrected}`
591        corrected = corrected.map(|corrected| format!("git+ssh://{}", corrected));
592    }
593
594    // return corrected
595    corrected
596}