git_url_parse/types/
mod.rs

1//! # GitUrl internal types
2//!
3//! Internal types and parsing logic for Git urls
4//!
5
6mod error;
7mod spec;
8use spec::*;
9pub mod provider;
10
11pub use error::GitUrlParseError;
12
13use core::str;
14use std::fmt;
15use url::Url;
16
17use getset::{CloneGetters, CopyGetters, Setters};
18#[cfg(feature = "log")]
19use log::debug;
20use nom::Finish;
21#[cfg(feature = "serde")]
22use serde::{Deserialize, Serialize};
23
24/// Assigned as a label during parsing for different Git URL types.
25/// Some printing or `GitProvider` parsing behavior are influenced by this type.
26#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
27#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
28pub(crate) enum GitUrlParseHint {
29    /// The default status
30    #[default]
31    Unknown,
32    /// When `ssh` is in the scheme, or a `:` is used as initial path separator
33    Sshlike,
34    /// When `file` is in scheme, or filesystem-like relative paths
35    Filelike,
36    /// Default network scheme if not `ssh`. If `:` is used as initial path separator in the userinfo
37    Httplike,
38}
39
40/// Represents a parsed Git repository url
41///
42/// GitUrl is an input url used by git.
43/// Parsing of the url inspired by rfc3986, but does not strictly cover the spec
44/// Optional, but by default, uses the `url` crate to perform a final validation of the parsing effort
45#[derive(Clone, CopyGetters, CloneGetters, Debug, Default, Setters, PartialEq, Eq)]
46#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
47pub struct GitUrl<'url> {
48    /// scheme name (i.e. `scheme://`)
49    #[getset(get_copy = "pub", set = "pub(crate)")]
50    scheme: Option<&'url str>,
51    /// user name userinfo
52    #[getset(get_copy = "pub", set = "pub(crate)")]
53    user: Option<&'url str>,
54    /// password userinfo provided with `user` (i.e. `user`:`password`@...)
55    #[getset(get_copy = "pub", set = "pub(crate)")]
56    password: Option<&'url str>,
57    /// The hostname or IP of the repo host
58    #[getset(get_copy = "pub")]
59    host: Option<&'url str>,
60    /// The port number of the repo host, if specified
61    #[getset(get_copy = "pub")]
62    port: Option<u16>,
63    /// File or network path to repo
64    #[getset(get_copy = "pub", set = "pub(crate)")]
65    path: &'url str,
66    /// If we should print `scheme://` from input or derived during parsing
67    #[getset(get_copy = "pub", set = "pub(crate)")]
68    print_scheme: bool,
69    /// Pattern style of url derived during parsing
70    #[getset(get_copy = "pub(crate)")]
71    hint: GitUrlParseHint,
72}
73
74/// Build the printable GitUrl from its components
75impl fmt::Display for GitUrl<'_> {
76    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77        let git_url_str = self.display();
78
79        write!(f, "{git_url_str}",)
80    }
81}
82
83impl<'url> GitUrl<'url> {
84    /// Wrapper function for the default output mode via [`Display`](std::fmt::Display) trait
85    fn display(&self) -> String {
86        self.build_string(false)
87    }
88
89    /// Wrapper function for printing a url for the [`url`](https://docs.rs/url/latest/url/) crate
90    #[cfg(feature = "url")]
91    fn url_compat_display(&self) -> String {
92        self.build_string(true)
93    }
94
95    /// This method rebuilds the printable GitUrl from its components.
96    /// `url_compat` results in output that can be parsed by the `url` crate
97    fn build_string(&self, url_compat: bool) -> String {
98        let scheme = if self.print_scheme() || url_compat {
99            if let Some(scheme) = self.scheme() {
100                format!("{scheme}://")
101            } else {
102                String::new()
103            }
104        } else {
105            String::new()
106        };
107
108        let auth_info = match (self.user(), self.password()) {
109            (Some(user), Some(password)) => format!("{user}:{password}@"),
110            (Some(user), None) => format!("{user}@",),
111            (None, Some(password)) => format!("{password}@"),
112            (None, None) => String::new(),
113        };
114
115        let host = match &self.host() {
116            Some(host) => host.to_string(),
117            None => String::new(),
118        };
119
120        let (port, path) = match (self.hint(), self.port(), self.path()) {
121            (GitUrlParseHint::Httplike, Some(port), path) => {
122                (format!(":{port}"), format!("/{path}"))
123            }
124            (GitUrlParseHint::Httplike, None, path) => (String::new(), path.to_string()),
125            (GitUrlParseHint::Sshlike, Some(port), path) => {
126                (format!(":{port}"), format!("/{path}"))
127            }
128            (GitUrlParseHint::Sshlike, None, path) => {
129                if url_compat {
130                    (String::new(), format!("/{path}"))
131                } else {
132                    (String::new(), format!(":{path}"))
133                }
134            }
135            (GitUrlParseHint::Filelike, None, path) => (String::new(), path.to_string()),
136            _ => (String::new(), String::new()),
137        };
138
139        let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}");
140        git_url_str
141    }
142}
143
144#[cfg(feature = "url")]
145impl<'url> TryFrom<&GitUrl<'url>> for Url {
146    type Error = url::ParseError;
147    fn try_from(value: &GitUrl) -> Result<Self, Self::Error> {
148        // Since we don't fully implement any spec, we'll rely on the url crate
149        Url::parse(&value.url_compat_display())
150    }
151}
152
153#[cfg(feature = "url")]
154impl<'url> TryFrom<GitUrl<'url>> for Url {
155    type Error = url::ParseError;
156    fn try_from(value: GitUrl) -> Result<Self, Self::Error> {
157        // Since we don't fully implement any spec, we'll rely on the url crate
158        Url::parse(&value.url_compat_display())
159    }
160}
161
162impl<'url> GitUrl<'url> {
163    /// Returns `GitUrl` after removing all user info values
164    pub fn trim_auth(&self) -> GitUrl {
165        let mut new_giturl = self.clone();
166        new_giturl.set_user(None);
167        new_giturl.set_password(None);
168        #[cfg(feature = "log")]
169        debug!("{new_giturl:?}");
170        new_giturl
171    }
172
173    /// Returns a `Result<GitUrl>` after parsing `input` for metadata
174    ///
175    /// ```
176    /// # use git_url_parse::GitUrl;
177    /// # use git_url_parse::types::provider::GenericProvider;
178    /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> {
179    /// let http_url = GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs.git")?;
180    /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?;
181    /// # Ok(())
182    /// #  }
183    /// ```
184    pub fn parse(input: &'url str) -> Result<Self, GitUrlParseError> {
185        // Error if there are null bytes within the url
186        // https://github.com/tjtelan/git-url-parse-rs/issues/16
187        if input.contains('\0') {
188            return Err(GitUrlParseError::FoundNullBytes);
189        }
190
191        let (_input, url_spec_parser) = UrlSpecParser::parse(input).finish().unwrap_or_default();
192
193        let mut scheme = url_spec_parser.scheme();
194        let user = url_spec_parser.hier_part().authority().userinfo().user();
195        let password = url_spec_parser.hier_part().authority().userinfo().token();
196        let host = url_spec_parser.hier_part().authority().host();
197        let port = url_spec_parser.hier_part().authority().port();
198        let mut path = url_spec_parser.hier_part().path();
199
200        // We will respect whether scheme was initially set
201        let print_scheme = scheme.is_some();
202
203        // Take a moment to identify the type of url we have
204        // We use the GitUrlParseHint to validate or adjust formatting path, if necessary
205        let hint = if let Some(scheme) = scheme {
206            if scheme.contains("ssh") {
207                GitUrlParseHint::Sshlike
208            } else {
209                match scheme.to_lowercase().as_str() {
210                    "file" => GitUrlParseHint::Filelike,
211                    _ => GitUrlParseHint::Httplike,
212                }
213            }
214        } else if user.is_none()
215            && password.is_none()
216            && host.is_none()
217            && port.is_none()
218            && !path.is_empty()
219        {
220            // if we only have a path => file
221            GitUrlParseHint::Filelike
222        } else if user.is_some() && password.is_some() {
223            // If we have a user and password => http
224            GitUrlParseHint::Httplike
225        } else if path.starts_with(':') {
226            // If path starts with a colon => ssh
227            GitUrlParseHint::Sshlike
228        } else {
229            GitUrlParseHint::Unknown
230        };
231
232        // If we found an ssh url, we should adjust the path.
233        // Skip the first character
234        if hint == GitUrlParseHint::Sshlike {
235            if let Some(scheme) = scheme.as_mut() {
236                *scheme = "ssh";
237            } else {
238                scheme = Some("ssh")
239            }
240            path = &path[1..];
241        }
242
243        if hint == GitUrlParseHint::Filelike {
244            if let Some(scheme) = scheme.as_mut() {
245                *scheme = "file";
246            } else {
247                scheme = Some("file")
248            }
249        }
250
251        let git_url = GitUrl {
252            scheme,
253            user,
254            password,
255            host,
256            port,
257            path,
258            print_scheme,
259            hint,
260        };
261
262        git_url.is_valid()?;
263
264        Ok(git_url)
265    }
266
267    /// ```
268    /// use git_url_parse::GitUrl;
269    /// use git_url_parse::types::provider::GenericProvider;
270    ///
271    /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> {
272    /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?;
273    /// let provider : GenericProvider = ssh_url.provider_info()?;
274    /// # assert_eq!(provider.owner(), "tjtelan");
275    /// # assert_eq!(provider.repo(), "git-url-parse-rs");
276    ///
277    /// # Ok(())
278    /// # }
279    pub fn provider_info<T>(&self) -> Result<T, GitUrlParseError>
280    where
281        T: provider::GitProvider<GitUrl<'url>, GitUrlParseError>,
282    {
283        T::from_git_url(self)
284    }
285
286    /// This is called as the last step before returning a `GitUrl` to the user
287    fn is_valid(&self) -> Result<(), GitUrlParseError> {
288        // Last chance validation
289
290        #[cfg(feature = "log")]
291        debug!("Validating parsing results {self:#?}");
292
293        if self.path().is_empty() {
294            return Err(GitUrlParseError::InvalidPathEmpty);
295        }
296
297        // There's an edge case we don't properly cover: ssh urls using ports + absolute paths
298        // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it
299
300        // only ssh paths start with ':'
301        if self.hint() != GitUrlParseHint::Sshlike && self.path.starts_with(':') {
302            #[cfg(feature = "log")]
303            {
304                debug!("{:?}", self.hint());
305                debug!("{:?}", self.path());
306                debug!("Only sshlike url path starts with ':'");
307                debug!("path starts with ':'? {}", self.path.starts_with(':'));
308            }
309
310            return Err(GitUrlParseError::InvalidPortNumber);
311        }
312
313        // if we are not httplike, we shouldn't have passwords
314        if self.hint() != GitUrlParseHint::Httplike && self.password().is_some() {
315            #[cfg(feature = "log")]
316            {
317                debug!("{:?}", self.hint());
318                debug!(
319                    "password support only for httplike url: {:?}",
320                    self.password()
321                );
322            }
323            return Err(GitUrlParseError::InvalidPasswordUnsupported);
324        }
325
326        // if we are filelike, we should only have paths
327        if self.hint() == GitUrlParseHint::Filelike
328            && (self.user().is_some()
329                || self.password().is_some()
330                || self.host().is_some()
331                || self.port().is_some()
332                || self.path().is_empty())
333        {
334            #[cfg(feature = "log")]
335            {
336                debug!(
337                    "Only scheme and path expected to have values set for filelike urls {:?}",
338                    self
339                );
340            }
341            return Err(GitUrlParseError::InvalidFilePattern);
342        }
343
344        #[cfg(feature = "url")]
345        {
346            // Since we don't fully implement any spec, we'll rely on the url crate
347            let _u: Url = self.try_into()?;
348        }
349
350        Ok(())
351    }
352}