git_url_parse/types/
mod.rs

1//! # GitUrl internal types
2//!
3//! Internal types and parsing logic for Git urls
4//!
5
6mod error;
7mod spec;
8use spec::*;
9pub mod provider;
10
11pub use error::GitUrlParseError;
12
13use core::str;
14use std::fmt;
15use url::Url;
16
17use getset::{CopyGetters, Getters, Setters};
18#[cfg(feature = "log")]
19use log::debug;
20use nom::Finish;
21#[cfg(feature = "serde")]
22use serde::{Deserialize, Serialize};
23
24/// Assigned as a label during parsing for different Git URL types.
25/// Some printing or `GitProvider` parsing behavior are influenced by this type.
26#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
27#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
28pub(crate) enum GitUrlParseHint {
29    /// The default status
30    #[default]
31    Unknown,
32    /// When `ssh` is in the scheme, or a `:` is used as initial path separator
33    Sshlike,
34    /// When `file` is in scheme, or filesystem-like relative paths
35    Filelike,
36    /// Default network scheme if not `ssh`. If `:` is used as initial path separator in the userinfo
37    Httplike,
38}
39
40/// Represents a parsed Git repository url
41///
42/// GitUrl is an input url used by git.
43/// Parsing of the url inspired by rfc3986, but does not strictly cover the spec
44/// Optional, but by default, uses the `url` crate to perform a final validation of the parsing effort
45#[derive(Clone, CopyGetters, Getters, Debug, Default, Setters, PartialEq, Eq)]
46#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
47#[getset(set = "pub(crate)")]
48pub struct GitUrl {
49    /// scheme name (i.e. `scheme://`)
50    scheme: Option<String>,
51    /// user name userinfo
52    user: Option<String>,
53    /// password userinfo provided with `user` (i.e. `user`:`password`@...)
54    password: Option<String>,
55    /// The hostname or IP of the repo host
56    host: Option<String>,
57    /// The port number of the repo host, if specified
58    #[getset(get_copy = "pub")]
59    port: Option<u16>,
60    /// File or network path to repo
61    path: String,
62    /// If we should print `scheme://` from input or derived during parsing
63    #[getset(get_copy = "pub")]
64    print_scheme: bool,
65    /// Pattern style of url derived during parsing
66    #[getset(get_copy = "pub")]
67    hint: GitUrlParseHint,
68}
69
70impl GitUrl {
71    /// scheme name (i.e. `scheme://`)
72    pub fn scheme(&self) -> Option<&str> {
73        if let Some(s) = &self.scheme {
74            Some(&s[..])
75        } else {
76            None
77        }
78    }
79
80    /// user name userinfo
81    pub fn user(&self) -> Option<&str> {
82        if let Some(u) = &self.user {
83            Some(&u[..])
84        } else {
85            None
86        }
87    }
88
89    /// password userinfo provided with `user` (i.e. `user`:`password`@...)
90    pub fn password(&self) -> Option<&str> {
91        if let Some(p) = &self.password {
92            Some(&p[..])
93        } else {
94            None
95        }
96    }
97
98    /// The hostname or IP of the repo host
99    pub fn host(&self) -> Option<&str> {
100        if let Some(h) = &self.host {
101            Some(&h[..])
102        } else {
103            None
104        }
105    }
106
107    /// File or network path to repo
108    pub fn path(&self) -> &str {
109        &self.path[..]
110    }
111
112    /// Wrapper function for the default output mode via [`Display`](std::fmt::Display) trait
113    fn display(&self) -> String {
114        self.build_string(false)
115    }
116
117    /// Wrapper function for printing a url for the [`url`](https://docs.rs/url/latest/url/) crate
118    #[cfg(feature = "url")]
119    fn url_compat_display(&self) -> String {
120        self.build_string(true)
121    }
122
123    /// This method rebuilds the printable GitUrl from its components.
124    /// `url_compat` results in output that can be parsed by the [`url`](https://docs.rs/url/latest/url/) crate
125    fn build_string(&self, url_compat: bool) -> String {
126        let scheme = if self.print_scheme() || url_compat {
127            if let Some(scheme) = self.scheme() {
128                format!("{scheme}://")
129            } else {
130                String::new()
131            }
132        } else {
133            String::new()
134        };
135
136        let auth_info = match (self.user(), self.password()) {
137            (Some(user), Some(password)) => format!("{user}:{password}@"),
138            (Some(user), None) => format!("{user}@",),
139            (None, Some(password)) => format!("{password}@"),
140            (None, None) => String::new(),
141        };
142
143        let host = match &self.host() {
144            Some(host) => host.to_string(),
145            None => String::new(),
146        };
147
148        let (port, path) = match (self.hint(), self.port(), self.path()) {
149            (GitUrlParseHint::Httplike, Some(port), path) => {
150                (format!(":{port}"), format!("/{path}"))
151            }
152            (GitUrlParseHint::Httplike, None, path) => (String::new(), path.to_string()),
153            (GitUrlParseHint::Sshlike, Some(port), path) => {
154                (format!(":{port}"), format!("/{path}"))
155            }
156            (GitUrlParseHint::Sshlike, None, path) => {
157                if url_compat {
158                    (String::new(), format!("/{path}"))
159                } else {
160                    (String::new(), format!(":{path}"))
161                }
162            }
163            (GitUrlParseHint::Filelike, None, path) => (String::new(), path.to_string()),
164            _ => (String::new(), String::new()),
165        };
166
167        let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}");
168        git_url_str
169    }
170
171    /// Returns `GitUrl` after removing all user info values
172    pub fn trim_auth(&self) -> GitUrl {
173        let mut new_giturl = self.clone();
174        new_giturl.set_user(None);
175        new_giturl.set_password(None);
176        #[cfg(feature = "log")]
177        debug!("{new_giturl:?}");
178        new_giturl
179    }
180
181    /// Returns a `Result<GitUrl>` after parsing `input` for metadata
182    ///
183    /// ```
184    /// # use git_url_parse::GitUrl;
185    /// # use git_url_parse::types::provider::GenericProvider;
186    /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> {
187    /// let http_url = GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs.git")?;
188    /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?;
189    /// # Ok(())
190    /// #  }
191    /// ```
192    pub fn parse(input: &str) -> Result<Self, GitUrlParseError> {
193        let git_url = Self::parse_to_git_url(input)?;
194
195        git_url.is_valid()?;
196
197        Ok(git_url)
198    }
199
200    /// Internal parse to `GitUrl` without validation steps
201    fn parse_to_git_url(input: &str) -> Result<Self, GitUrlParseError> {
202        let mut git_url_result = GitUrl::default();
203        // Error if there are null bytes within the url
204        // https://github.com/tjtelan/git-url-parse-rs/issues/16
205        if input.contains('\0') {
206            return Err(GitUrlParseError::FoundNullBytes);
207        }
208
209        let (_input, url_spec_parser) = UrlSpecParser::parse(input).finish().unwrap_or_default();
210
211        let scheme = url_spec_parser.scheme();
212        let user = url_spec_parser.hier_part().authority().userinfo().user();
213        let password = url_spec_parser.hier_part().authority().userinfo().token();
214        let host = url_spec_parser.hier_part().authority().host();
215        let port = url_spec_parser.hier_part().authority().port();
216        let path = url_spec_parser.hier_part().path();
217
218        git_url_result.set_scheme(scheme.clone());
219        git_url_result.set_user(user.clone());
220        git_url_result.set_password(password.clone());
221        git_url_result.set_host(host.clone());
222        git_url_result.set_port(*port);
223        git_url_result.set_path(path.clone());
224
225        // We will respect whether scheme was initially set
226        let print_scheme = scheme.is_some();
227
228        // Take a moment to identify the type of url we have
229        // We use the GitUrlParseHint to validate or adjust formatting path, if necessary
230        let hint = if let Some(scheme) = scheme.as_ref() {
231            if scheme.contains("ssh") {
232                GitUrlParseHint::Sshlike
233            } else {
234                match scheme.to_lowercase().as_str() {
235                    "file" => GitUrlParseHint::Filelike,
236                    _ => GitUrlParseHint::Httplike,
237                }
238            }
239        } else if user.is_none()
240            && password.is_none()
241            && host.is_none()
242            && port.is_none()
243            && !path.is_empty()
244        {
245            // if we only have a path => file
246            GitUrlParseHint::Filelike
247        } else if user.is_some() && password.is_some() {
248            // If we have a user and password => http
249            GitUrlParseHint::Httplike
250        } else if path.starts_with(':') {
251            // If path starts with a colon => ssh
252            GitUrlParseHint::Sshlike
253        } else {
254            GitUrlParseHint::Unknown
255        };
256
257        // If we found an ssh url, we should adjust the path.
258        // Skip the first character
259        if hint == GitUrlParseHint::Sshlike {
260            git_url_result.set_scheme(Some("ssh".to_string()));
261            git_url_result.set_path(path[1..].to_string());
262        }
263
264        if hint == GitUrlParseHint::Filelike {
265            git_url_result.set_scheme(Some("file".to_string()));
266        }
267
268        git_url_result.set_print_scheme(print_scheme);
269        git_url_result.set_hint(hint);
270
271        git_url_result.is_valid()?;
272
273        Ok(git_url_result)
274    }
275
276    /// Normalize input into form that can be used by [`Url::parse`](https://docs.rs/url/latest/url/struct.Url.html#method.parse)
277    ///
278    /// ```
279    /// use git_url_parse::GitUrl;
280    /// #[cfg(feature = "url")]
281    /// use url::Url;
282    ///
283    /// fn main() -> Result<(), git_url_parse::GitUrlParseError> {
284    ///     let ssh_url = GitUrl::parse_to_url("git@github.com:tjtelan/git-url-parse-rs.git")?;
285    ///
286    ///     assert_eq!(ssh_url.scheme(), "ssh");
287    ///     assert_eq!(ssh_url.username(), "git");
288    ///     assert_eq!(ssh_url.host_str(), Some("github.com"));
289    ///     assert_eq!(ssh_url.path(), "/tjtelan/git-url-parse-rs.git");
290    ///     Ok(())
291    /// }
292    /// ```
293    ///
294    #[cfg(feature = "url")]
295    pub fn parse_to_url(input: &str) -> Result<Url, GitUrlParseError> {
296        let git_url = Self::parse_to_git_url(input)?;
297
298        Ok(Url::try_from(git_url)?)
299    }
300
301    /// ```
302    /// use git_url_parse::GitUrl;
303    /// use git_url_parse::types::provider::GenericProvider;
304    ///
305    /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> {
306    /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?;
307    /// let provider : GenericProvider = ssh_url.provider_info()?;
308    /// # assert_eq!(provider.owner(), "tjtelan");
309    /// # assert_eq!(provider.repo(), "git-url-parse-rs");
310    ///
311    /// # Ok(())
312    /// # }
313    pub fn provider_info<T>(&self) -> Result<T, GitUrlParseError>
314    where
315        T: provider::GitProvider<GitUrl, GitUrlParseError>,
316    {
317        T::from_git_url(self)
318    }
319
320    /// This is called as the last step before returning a `GitUrl` to the user
321    fn is_valid(&self) -> Result<(), GitUrlParseError> {
322        // Last chance validation
323
324        #[cfg(feature = "log")]
325        debug!("Validating parsing results {self:#?}");
326
327        if self.path().is_empty() {
328            return Err(GitUrlParseError::InvalidPathEmpty);
329        }
330
331        // There's an edge case we don't properly cover: ssh urls using ports + absolute paths
332        // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it
333
334        // only ssh paths start with ':'
335        if self.hint() != GitUrlParseHint::Sshlike && self.path.starts_with(':') {
336            #[cfg(feature = "log")]
337            {
338                debug!("{:?}", self.hint());
339                debug!("{:?}", self.path());
340                debug!("Only sshlike url path starts with ':'");
341                debug!("path starts with ':'? {}", self.path.starts_with(':'));
342            }
343
344            return Err(GitUrlParseError::InvalidPortNumber);
345        }
346
347        // if we are not httplike, we shouldn't have passwords
348        if self.hint() != GitUrlParseHint::Httplike && self.password().is_some() {
349            #[cfg(feature = "log")]
350            {
351                debug!("{:?}", self.hint());
352                debug!(
353                    "password support only for httplike url: {:?}",
354                    self.password()
355                );
356            }
357            return Err(GitUrlParseError::InvalidPasswordUnsupported);
358        }
359
360        // if we are filelike, we should only have paths
361        if self.hint() == GitUrlParseHint::Filelike
362            && (self.user().is_some()
363                || self.password().is_some()
364                || self.host().is_some()
365                || self.port().is_some()
366                || self.path().is_empty())
367        {
368            #[cfg(feature = "log")]
369            {
370                debug!(
371                    "Only scheme and path expected to have values set for filelike urls {:?}",
372                    self
373                );
374            }
375            return Err(GitUrlParseError::InvalidFilePattern);
376        }
377
378        #[cfg(feature = "url")]
379        {
380            // Since we don't fully implement any spec, we'll rely on the url crate
381            let _u: Url = self.try_into()?;
382        }
383
384        Ok(())
385    }
386}
387
388/// Build the printable GitUrl from its components
389impl fmt::Display for GitUrl {
390    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
391        let git_url_str = self.display();
392
393        write!(f, "{git_url_str}",)
394    }
395}
396
397#[cfg(feature = "url")]
398impl TryFrom<&GitUrl> for Url {
399    type Error = url::ParseError;
400    fn try_from(value: &GitUrl) -> Result<Self, Self::Error> {
401        // Since we don't fully implement any spec, we'll rely on the url crate
402        Url::parse(&value.url_compat_display())
403    }
404}
405
406#[cfg(feature = "url")]
407impl TryFrom<GitUrl> for Url {
408    type Error = url::ParseError;
409    fn try_from(value: GitUrl) -> Result<Self, Self::Error> {
410        // Since we don't fully implement any spec, we'll rely on the url crate
411        Url::parse(&value.url_compat_display())
412    }
413}
414
415#[cfg(feature = "url")]
416impl TryFrom<&Url> for GitUrl {
417    type Error = GitUrlParseError;
418    fn try_from(value: &Url) -> Result<Self, Self::Error> {
419        GitUrl::parse(value.as_str())
420    }
421}
422
423#[cfg(feature = "url")]
424impl TryFrom<Url> for GitUrl {
425    type Error = GitUrlParseError;
426    fn try_from(value: Url) -> Result<Self, Self::Error> {
427        GitUrl::parse(value.as_str())
428    }
429}