hosted_git_info/lib.rs
1//! hosted-git-info is a [Rust] port of the original [hosted-git-info] project on [npm].
2//!
3//! [Rust]: https://www.rustlang.org/
4//! [hosted-git-info]: https://github.com/npm/hosted-git-info
5//! [npm]: https://www.npmjs.com
6//!
7//! It provides metadata and conversions from repository urls for [GitHub], [Bitbucket]
8//! and [GitLab].
9//!
10//! [GitHub]: https://github.com/
11//! [Bitbucket]: https://www.bitbucket.org/
12//! [GitLab]: https://www.gitlab.com/
13//!
14//! It will let you identify and transform various git hosts URLs between
15//! protocols. It also can tell you what the URL is for the raw path for
16//! particular file for direct access without git.
17//!
18//! # Usage
19//!
20//! First, URL parsing may fail for various reasons and therefore returns a `Result`:
21//!
22//! ```
23//! use hosted_git_info::{HostedGitInfo, ParseError};
24//!
25//! assert!(HostedGitInfo::from_url("https://www.rustlang.org/") == Err(ParseError::UnknownUrl));
26//! ```
27//!
28//! Letβs parse a valid URL and look at its components.
29//!
30//! ```
31//! use hosted_git_info::{HostedGitInfo, Provider};
32//!
33//! let url = "https://github.com/foo/bar.git#branch";
34//! let info = HostedGitInfo::from_url(url).unwrap();
35//! assert_eq!(info.provider(), Provider::GitHub);
36//! assert_eq!(info.user(), Some("foo"));
37//! assert_eq!(info.project(), "bar");
38//! assert_eq!(info.committish(), Some("branch"));
39//! assert_eq!(info.auth(), None);
40//! ```
41//!
42//! [HostedGitInfo] also implements the [str::FromStr] trait:
43//!
44//! ```
45//! use hosted_git_info::{HostedGitInfo, Provider};
46//!
47//! let url = "git+ssh://github.com:foo/bar.git";
48//! let info: HostedGitInfo = url.parse().unwrap();
49//! assert_eq!(info.provider(), Provider::GitHub);
50//! assert_eq!(info.user(), Some("foo"));
51//! assert_eq!(info.project(), "bar");
52//! assert_eq!(info.committish(), None);
53//! assert_eq!(info.auth(), None);
54//! ```
55
56#![deny(clippy::unwrap_used)]
57
58#[cfg(feature = "derive_builder")]
59use derive_builder::Builder;
60use percent_encoding::percent_decode_str;
61use std::str;
62use thiserror::Error;
63use url::Url;
64
65mod parser;
66
67#[cfg(test)]
68mod proptest;
69
70static AUTH_SCHEMES: [&str; 5] = ["git", "https", "git+https", "http", "git+http"];
71static KNOWN_SCHEMES: [&str; 10] = [
72 "http",
73 "https",
74 "git",
75 "git+ssh",
76 "git+https",
77 "ssh",
78 "bitbucket",
79 "gist",
80 "github",
81 "gitlab",
82];
83
84/// Enum of supported git hosting providers.
85#[derive(Debug, Eq, PartialEq, Copy, Clone)]
86pub enum Provider {
87 /// see <https://www.bitbucket.org/>
88 BitBucket,
89 /// see <https://gist.github.com/>
90 Gist,
91 /// see <https://github.com/>
92 GitHub,
93 /// see <https://www.gitlab.com/>
94 GitLab,
95}
96
97/// Enum of the original URL types (shortcut, https, ssh, ...)
98#[derive(Debug, Eq, PartialEq, Copy, Clone)]
99pub enum DefaultRepresentation {
100 /// Example: `Turbo87/hosted-git-info-rs`
101 Shortcut,
102 /// Example: `git://github.com/Turbo87/hosted-git-info-rs`
103 Git,
104 /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git`
105 Https,
106 /// Example: `git+ssh://git@github.com:Turbo87/hosted-git-info-rs.git`
107 Ssh,
108 /// anything else π€·β
109 Other,
110}
111
112impl DefaultRepresentation {
113 fn from_scheme(scheme: &str) -> DefaultRepresentation {
114 use DefaultRepresentation::*;
115
116 match scheme {
117 "git" => Git,
118 "git+https" => Https,
119 "git+ssh" => Ssh,
120 "https" => Https,
121 "ssh" => Ssh,
122 _ => Other,
123 }
124 }
125}
126
127/// Errors that can occur during parsing.
128#[derive(PartialEq, Eq, Clone, Copy, Debug, Error)]
129pub enum ParseError {
130 /// Failed to parse the URL with the `url` crate.
131 #[error("Failed to parse URL")]
132 InvalidUrl(#[from] url::ParseError),
133
134 /// Failed to parse a part of the URL with the `percent_encoding` crate.
135 #[error("Failed to parse percent-encoded URI component")]
136 InvalidUriEncoding(#[from] str::Utf8Error),
137
138 /// The URL could not be recognized.
139 #[error("Failed to recognize URL")]
140 UnknownUrl,
141}
142
143/// The parsed information from a git hosting URL.
144#[derive(Debug, Eq, PartialEq, Clone)]
145#[cfg_attr(feature = "derive_builder", derive(Builder))]
146pub struct HostedGitInfo {
147 provider: Provider,
148
149 #[cfg_attr(
150 feature = "derive_builder",
151 builder(setter(into, strip_option), default)
152 )]
153 user: Option<String>,
154
155 #[cfg_attr(
156 feature = "derive_builder",
157 builder(setter(into, strip_option), default)
158 )]
159 auth: Option<String>,
160
161 #[cfg_attr(feature = "derive_builder", builder(setter(into)))]
162 project: String,
163
164 #[cfg_attr(
165 feature = "derive_builder",
166 builder(setter(into, strip_option), default)
167 )]
168 committish: Option<String>,
169
170 #[cfg_attr(feature = "derive_builder", builder(setter(name = "repr")))]
171 default_representation: DefaultRepresentation,
172}
173
174impl HostedGitInfo {
175 /// Parses a URL string and returns a [HostedGitInfo] struct, if successful.
176 /// If parsing fails, a [ParseError] will be returned.
177 pub fn from_url(giturl: &str) -> Result<Self, ParseError> {
178 // if (!giturl) {
179 // return
180 // }
181
182 // const url = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)
183 let url = if is_github_shorthand(giturl) {
184 format!("github:{}", giturl)
185 } else {
186 // correctProtocol(giturl)
187 correct_protocol(giturl)
188 };
189
190 // const parsed = parseGitUrl(url)
191 // if (!parsed) {
192 // return parsed
193 // }
194 let parsed = parse_git_url(&url)?;
195
196 // const gitHostShortcut = gitHosts.byShortcut[parsed.protocol]
197 let parser_from_shortcut = parser::parser_from_shortcut(parsed.scheme());
198
199 // const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.') ? parsed.hostname.slice(4) : parsed.hostname]
200 let simplified_domain = parsed
201 .domain()
202 .map(|domain| domain.strip_prefix("www.").unwrap_or(domain));
203 let parser_from_domain =
204 simplified_domain.and_then(|domain| parser::parser_from_domain(domain));
205
206 // const gitHostName = gitHostShortcut || gitHostDomain
207 let parser = parser_from_shortcut
208 .as_ref()
209 .or_else(|| parser_from_domain.as_ref());
210
211 // if (!gitHostName) {
212 // return
213 // }
214 //
215 // const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain]
216 let parser = match parser {
217 Some(parser) => parser,
218 None => return Err(ParseError::UnknownUrl),
219 };
220
221 // let auth = null
222 // if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) {
223 // auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}`
224 // }
225 let username = match parsed.username() {
226 username if !username.is_empty() => Some(username),
227 _ => None,
228 };
229 let password = parsed.password();
230 let auth = if AUTH_SCHEMES.contains(&parsed.scheme()) {
231 match (username, password) {
232 (Some(username), Some(password)) => Some(format!("{}:{}", username, password)),
233 (Some(username), None) => Some(username.to_string()),
234 (None, Some(password)) => Some(format!(":{}", password)),
235 (None, None) => None,
236 }
237 } else {
238 None
239 };
240
241 // let committish = null
242 // let user = null
243 // let project = null
244 // let defaultRepresentation = null
245 //
246 // try {
247 // if (gitHostShortcut) {
248 if parser_from_shortcut.is_some() {
249 // let pathname = parsed.pathname.startsWith('/') ? parsed.pathname.slice(1) : parsed.pathname
250 let path = parsed.path();
251 let mut pathname = path.strip_prefix('/').unwrap_or(path);
252
253 // const firstAt = pathname.indexOf('@')
254 let first_at = pathname.find('@');
255 // we ignore auth for shortcuts, so just trim it out
256 // if (firstAt > -1) {
257 // pathname = pathname.slice(firstAt + 1)
258 // }
259 if let Some(first_at) = first_at {
260 pathname = &pathname[first_at + 1..];
261 }
262
263 // const lastSlash = pathname.lastIndexOf('/')
264 let last_slash = pathname.rfind('/');
265 let (user, project) = if let Some(last_slash) = last_slash {
266 // user = decodeURIComponent(pathname.slice(0, lastSlash))
267 let user = percent_decode_str(&pathname[0..last_slash]).decode_utf8()?;
268
269 // we want nulls only, never empty strings
270 // if (!user) {
271 // user = null
272 // }
273 let user = if user.is_empty() { None } else { Some(user) };
274
275 // project = decodeURIComponent(pathname.slice(lastSlash + 1))
276 let project = percent_decode_str(&pathname[last_slash + 1..]).decode_utf8()?;
277 (user, project)
278 } else {
279 // project = decodeURIComponent(pathname)
280 let project = percent_decode_str(&pathname).decode_utf8()?;
281 (None, project)
282 };
283
284 let project = project
285 .strip_suffix(".git")
286 .unwrap_or_else(|| project.as_ref());
287
288 // if (parsed.hash) {
289 // committish = decodeURIComponent(parsed.hash.slice(1))
290 // }
291 let committish = parsed
292 .fragment()
293 .map(|committish| percent_decode_str(&committish).decode_utf8())
294 .transpose()?;
295
296 // defaultRepresentation = 'shortcut'
297 Ok(Self {
298 provider: parser.provider(),
299 user: user.map(|s| s.to_string()),
300 auth,
301 project: project.to_string(),
302 committish: committish.map(|s| s.to_string()),
303 default_representation: DefaultRepresentation::Shortcut,
304 })
305 } else {
306 // if (!gitHostInfo.protocols.includes(parsed.protocol)) {
307 // return
308 // }
309 if !parser.supports_scheme(parsed.scheme()) {
310 return Err(ParseError::UnknownUrl);
311 }
312
313 // const segments = gitHostInfo.extract(parsed)
314 // if (!segments) {
315 // return
316 // }
317 let segments = parser.extract(&parsed)?;
318
319 // user = segments.user && decodeURIComponent(segments.user)
320 let user = segments
321 .user
322 .map(|user| percent_decode_str(&user).decode_utf8())
323 .transpose()?;
324
325 // project = decodeURIComponent(segments.project)
326 let project = segments
327 .project
328 .map(|project| percent_decode_str(&project).decode_utf8())
329 .transpose()?
330 .ok_or(ParseError::UnknownUrl)?;
331
332 // committish = decodeURIComponent(segments.committish)
333 let committish = segments
334 .committish
335 .map(|committish| percent_decode_str(&committish).decode_utf8())
336 .transpose()?;
337
338 // defaultRepresentation = protocolToRepresentation(parsed.protocol)
339 Ok(Self {
340 provider: parser.provider(),
341 user: user.map(|s| s.to_string()),
342 auth,
343 project: project.to_string(),
344 committish: committish.map(|s| s.to_string()),
345 default_representation: DefaultRepresentation::from_scheme(parsed.scheme()),
346 })
347 }
348 // }
349 // } catch (err) {
350 // /* istanbul ignore else */
351 // if (err instanceof URIError) {
352 // return
353 // } else {
354 // throw err
355 // }
356 // }
357 //
358 // return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts)
359 }
360
361 /// The type of hosting provider. (GitHub, Gitlab, Bitbucket, ...)
362 pub fn provider(&self) -> Provider {
363 self.provider
364 }
365
366 /// The name of the user or organization on the git host.
367 ///
368 /// This is using an [Option] because some hosting providers allow projects
369 /// that are not scoped to a particular user or organization.
370 ///
371 /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β `Turbo87`
372 pub fn user(&self) -> Option<&str> {
373 self.user.as_deref()
374 }
375
376 /// The authentication part of the URL, if it exists.
377 ///
378 /// Format: `<USER>[:<PASSWORD>]`
379 ///
380 /// Example: `https://user:password@github.com/foo/bar.git` β `user:password`
381 pub fn auth(&self) -> Option<&str> {
382 self.auth.as_deref()
383 }
384
385 /// The name of the project on the git host.
386 ///
387 /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β `hosted-git-info-rs`
388 pub fn project(&self) -> &str {
389 &self.project
390 }
391
392 /// The [branch, tag, commit, ...](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefcommit-ishacommit-ishalsocommittish)
393 /// part of the URL, if it exists.
394 ///
395 /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git#rust-is-awesome` β `rust-is-awesome`
396 pub fn committish(&self) -> Option<&str> {
397 self.committish.as_deref()
398 }
399
400 /// The original URL type (shortcut, https, ssh, ...).
401 ///
402 /// Example: `https://github.com/Turbo87/hosted-git-info-rs.git` β `Https`
403 pub fn default_representation(&self) -> DefaultRepresentation {
404 self.default_representation
405 }
406}
407
408impl str::FromStr for HostedGitInfo {
409 type Err = ParseError;
410
411 fn from_str(s: &str) -> Result<Self, Self::Err> {
412 HostedGitInfo::from_url(s)
413 }
414}
415
416// look for github shorthand inputs, such as npm/cli
417fn is_github_shorthand(arg: &str) -> bool {
418 // it cannot contain whitespace before the first #
419 // it cannot start with a / because that's probably an absolute file path
420 // but it must include a slash since repos are username/repository
421 // it cannot start with a . because that's probably a relative file path
422 // it cannot start with an @ because that's a scoped package if it passes the other tests
423 // it cannot contain a : before a # because that tells us that there's a protocol
424 // a second / may not exist before a #
425
426 // const firstHash = arg.indexOf('#')
427 let first_hash = arg.find('#');
428 // const firstSlash = arg.indexOf('/')
429 let first_slash = arg.find('/');
430 // const secondSlash = arg.indexOf('/', firstSlash + 1)
431 let second_slash = first_slash.and_then(|first_slash| arg[first_slash + 1..].find('/'));
432 // const firstColon = arg.indexOf(':')
433 let first_colon = arg.find(':');
434 // const firstSpace = /\s/.exec(arg)
435 let first_space = arg.find(char::is_whitespace);
436 // const firstAt = arg.indexOf('@')
437 let first_at = arg.find('@');
438
439 // const spaceOnlyAfterHash = !firstSpace || (firstHash > -1 && firstSpace.index > firstHash)
440 let space_only_after_hash =
441 first_space.is_none() || (first_hash.is_some() && first_space > first_hash);
442 // const atOnlyAfterHash = firstAt === -1 || (firstHash > -1 && firstAt > firstHash)
443 let at_only_after_hash = first_at.is_none() || (first_hash.is_some() && first_at > first_hash);
444 // const colonOnlyAfterHash = firstColon === -1 || (firstHash > -1 && firstColon > firstHash)
445 let colon_only_after_hash =
446 first_colon.is_none() || (first_hash.is_some() && first_colon > first_hash);
447 // const secondSlashOnlyAfterHash = secondSlash === -1 || (firstHash > -1 && secondSlash > firstHash)
448 let second_slash_only_after_hash =
449 second_slash.is_none() || (first_hash.is_some() && second_slash > first_hash);
450 // const hasSlash = firstSlash > 0
451 let has_slash = matches!(first_slash, Some(first_slash) if first_slash > 0);
452
453 // if a # is found, what we really want to know is that the character immediately before # is not a /
454
455 // const doesNotEndWithSlash = firstHash > -1 ? arg[firstHash - 1] !== '/' : !arg.endsWith('/')
456 let does_not_end_with_slash = if let Some(first_hash) = first_hash {
457 first_hash == 0 || arg.as_bytes().get(first_hash - 1) != Some(&b'/')
458 } else {
459 !arg.ends_with('/')
460 };
461 // const doesNotStartWithDot = !arg.startsWith('.')
462 let does_not_start_with_dot = !arg.starts_with('.');
463
464 // return spaceOnlyAfterHash && hasSlash && doesNotEndWithSlash && doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash && secondSlashOnlyAfterHash
465 space_only_after_hash
466 && has_slash
467 && does_not_end_with_slash
468 && does_not_start_with_dot
469 && at_only_after_hash
470 && colon_only_after_hash
471 && second_slash_only_after_hash
472}
473
474// accepts input like git:github.com:user/repo and inserts the // after the first :
475fn correct_protocol(arg: &str) -> String {
476 // const firstColon = arg.indexOf(':')
477 if let Some(first_colon) = arg.find(':') {
478 // const proto = arg.slice(0, firstColon + 1)
479 let proto = &arg[0..first_colon];
480
481 // if (knownProtocols.includes(proto)) {
482 // return arg
483 // }
484 if KNOWN_SCHEMES.contains(&proto) {
485 return arg.to_string();
486 }
487
488 // const firstAt = arg.indexOf('@')
489 // if (firstAt > -1) {
490 if let Some(first_at) = arg.find('@') {
491 // if (firstAt > firstColon) {
492 return if first_at > first_colon {
493 // return `git+ssh://${arg}`
494 format!("git+ssh://{}", arg)
495 } else {
496 arg.to_string()
497 };
498 }
499
500 // const doubleSlash = arg.indexOf('//')
501 let double_slash = arg.find("//");
502 // if (doubleSlash === firstColon + 1) {
503 if double_slash == Some(first_colon + 1) {
504 return arg.to_string();
505 }
506
507 // return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1)
508 format!("{}//{}", &arg[0..first_colon + 1], &arg[first_colon + 1..])
509 } else {
510 arg.to_string()
511 }
512}
513
514// try to parse the url as its given to us, if that throws
515// then we try to clean the url and parse that result instead
516fn parse_git_url(giturl: &str) -> Result<Url, url::ParseError> {
517 // let result
518 // try {
519 // result = new url.URL(giturl)
520 // } catch (err) {}
521 //
522 // if (result) {
523 // return result
524 // }
525 Url::parse(giturl).or_else(|_error| {
526 // const correctedUrl = correctUrl(giturl)
527 let corrected_url = correct_url(giturl).ok_or(_error)?;
528
529 // try {
530 // result = new url.URL(correctedUrl)
531 // } catch (err) {}
532 //
533 // return result
534 Url::parse(&corrected_url)
535 })
536}
537
538// attempt to correct an scp style url so that it will parse with `new URL()`
539fn correct_url(giturl: &str) -> Option<String> {
540 // const firstAt = giturl.indexOf('@')
541 let first_at = giturl.find('@');
542 // const lastHash = giturl.lastIndexOf('#')
543 let last_hash = giturl.rfind('#');
544 // let firstColon = giturl.indexOf(':')
545 let mut first_colon = giturl.find(':');
546 // let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity)
547 let last_colon = last_hash
548 .map(|last_hash| &giturl[..last_hash])
549 .unwrap_or(giturl)
550 .rfind(':');
551
552 // let corrected
553 let mut corrected = None;
554
555 // if (lastColon > firstAt) {
556 if let Some(last_colon_) = last_colon {
557 if last_colon > first_at {
558 // the last : comes after the first @ (or there is no @)
559 // like it would in:
560 // proto://hostname.com:user/repo
561 // username@hostname.com:user/repo
562 // :password@hostname.com:user/repo
563 // username:password@hostname.com:user/repo
564 // proto://username@hostname.com:user/repo
565 // proto://:password@hostname.com:user/repo
566 // proto://username:password@hostname.com:user/repo
567 // then we replace the last : with a / to create a valid path
568
569 // corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1)
570 let corrected_ = format!("{}/{}", &giturl[0..last_colon_], &giturl[last_colon_ + 1..]);
571
572 // and we find our new : positions
573
574 // firstColon = corrected.indexOf(':')
575 first_colon = corrected_.find(':');
576 // lastColon = corrected.lastIndexOf(':')
577 // last_colon = corrected_.rfind(':'); // this appears to be a bug in the original?
578
579 corrected = Some(corrected_);
580 }
581 }
582
583 // if (firstColon === -1 && giturl.indexOf('//') === -1) {
584 if first_colon.is_none() && !giturl.contains("//") {
585 // we have no : at all
586 // as it would be in:
587 // username@hostname.com/user/repo
588 // then we prepend a protocol
589
590 // corrected = `git+ssh://${corrected}`
591 corrected = corrected.map(|corrected| format!("git+ssh://{}", corrected));
592 }
593
594 // return corrected
595 corrected
596}