use getset::Getters;
#[cfg(feature = "log")]
use log::debug;
use nom::Finish;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_while};
use nom::character::complete::alpha1;
use nom::combinator::{map_opt, peek, recognize, verify};
use nom::error::context;
use nom::multi::{many0, many1};
use nom::sequence::{pair, preceded, separated_pair, terminated};
use nom::{IResult, Parser, combinator::opt};
#[derive(Debug, Default, Clone, Getters)]
#[getset(get = "pub")]
pub(crate) struct UrlSpecParser {
pub(crate) scheme: Option<String>,
pub(crate) hier_part: UrlHierPart,
}
impl UrlSpecParser {
pub(crate) fn parse(input: &str) -> IResult<&str, Self> {
let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default();
let (input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default();
let parsed = UrlSpecParser {
scheme,
hier_part: heir_part,
};
Ok((input, parsed))
}
fn parse_scheme(input: &str) -> IResult<&str, Option<String>> {
#[cfg(feature = "log")]
{
debug!("Looking ahead before parsing for scheme");
}
let mut check = context(
"scheme validate",
peek(pair(
pair(
alpha1,
take_while(|c: char| {
c.is_ascii_alphabetic()
|| c.is_ascii_digit()
|| c == '+'
|| c == '-'
|| c == '.'
}),
),
tag::<&str, &str, nom::error::Error<&str>>("://"),
)),
);
if Self::short_git_scheme_check(input) {
if let Ok((input, scheme)) = Self::short_git_scheme_parser().parse(input) {
return Ok((input, scheme.map(|s| s.to_string())));
}
}
if check.parse(input).is_err() {
#[cfg(feature = "log")]
{
debug!("Look ahead check for scheme failed");
}
return Ok((input, None));
}
#[cfg(feature = "log")]
{
debug!("Look ahead check passed, parsing for scheme");
}
let (input, scheme) = context(
"Scheme parse",
opt(verify(
terminated(
recognize(pair(
alpha1,
take_while(|c: char| {
c.is_ascii_alphabetic()
|| c.is_ascii_digit()
|| c == '+'
|| c == '-'
|| c == '.'
}),
)),
tag("://"),
),
|s: &str| !s.is_empty(),
)),
)
.parse(input)?;
#[cfg(feature = "log")]
{
debug!("{input:?}");
debug!("{scheme:?}");
}
Ok((input, scheme.map(|s| s.to_string())))
}
fn parse_hier_part(input: &str) -> IResult<&str, UrlHierPart> {
#[cfg(feature = "log")]
{
debug!("Parsing for heir-part");
}
let (input, authority) = Self::parse_authority(input)?;
let (input, path) = context(
"Top of path parsers",
verify(
alt((
Self::path_abempty_parser(),
Self::path_rootless_parser(),
Self::path_ssh_parser(),
)),
|s: &str| !s.is_empty(),
),
)
.parse(input)?;
let hier_part = UrlHierPart {
authority,
path: path.to_string(),
};
#[cfg(feature = "log")]
{
debug!("{:?}", input);
debug!("{:?}", hier_part);
}
Ok((input, hier_part))
}
fn parse_authority(input: &str) -> IResult<&str, UrlAuthority> {
#[cfg(feature = "log")]
{
debug!("Parsing for Authority");
}
let (input, userinfo) = Self::parse_userinfo(input)?;
#[cfg(feature = "log")]
{
debug!("Looking ahead for windows-style path vs host");
}
let check = context(
"Host check for windows path",
peek(preceded(
take_while(|c| reg_name_uri_chars(c) && c != '\\'),
tag::<&str, &str, nom::error::Error<&str>>(":\\"),
)),
)
.parse(input);
if check.is_ok() {
#[cfg(feature = "log")]
{
debug!(
"Host check failed. Found potential windows-style path while looking for host"
);
}
return Ok((input, UrlAuthority::default()));
}
#[cfg(feature = "log")]
{
debug!("Parsing for host");
}
let (input, host) = context(
"Host parser",
opt(verify(
recognize(take_while(|c: char| reg_name_uri_chars(c))),
|s: &str| {
let has_alphanum = s.chars().any(char::is_alphanumeric);
let starts_with_alphanum = s.chars().next().is_some_and(char::is_alphanumeric);
has_alphanum && starts_with_alphanum && !s.is_empty()
},
)),
)
.parse(input)?;
#[cfg(feature = "log")]
{
debug!("host found: {host:?}");
}
let (input, port) = Self::parse_port(input)?;
let authority = UrlAuthority {
userinfo,
host: host.map(|h| h.to_string()),
port,
};
#[cfg(feature = "log")]
{
debug!("{input:?}");
debug!("{authority:?}");
}
Ok((input, authority))
}
fn parse_userinfo(authority_input: &str) -> IResult<&str, UrlUserInfo> {
#[cfg(feature = "log")]
{
debug!("Checking for for Userinfo");
}
let mut check = context(
"Userinfo validation",
peek(pair(
take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'),
tag::<&str, &str, nom::error::Error<&str>>("@"),
)),
);
if check.parse(authority_input).is_err() {
#[cfg(feature = "log")]
{
debug!("Userinfo check failed");
}
return Ok((authority_input, UrlUserInfo::default()));
}
let (authority_input, userinfo) = context(
"Userinfo parser",
opt(verify(
recognize(take_while(|c: char| {
unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'
})),
|s: &str| !s.is_empty(),
)),
)
.parse(authority_input)?;
let (authority_input, _) = if userinfo.is_some() {
#[cfg(feature = "log")]
{
debug!("Userinfo found. Parsing for '@'");
}
context("Userinfo '@' parser", tag("@")).parse(authority_input)?
} else {
(authority_input, authority_input)
};
let (user, token) = if let Some(userinfo) = userinfo {
if userinfo.contains(":") {
#[cfg(feature = "log")]
{
debug!("Continue break down userinfo into user:token");
}
let (_, (user, token)) = context(
"Userinfo with colon parser",
separated_pair(
verify(
take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)),
|s: &str| !s.is_empty(),
),
tag(":"),
verify(
take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)),
|s: &str| !s.is_empty(),
),
),
)
.parse(userinfo)?;
(Some(user), Some(token))
} else {
(Some(userinfo), None)
}
} else {
(None, None)
};
let userinfo = UrlUserInfo {
user: user.map(|u| u.to_string()),
token: token.map(|u| u.to_string()),
};
#[cfg(feature = "log")]
{
debug!("{authority_input:?}");
debug!("{userinfo:?}");
}
Ok((authority_input, userinfo))
}
fn parse_port(authority_input: &str) -> IResult<&str, Option<u16>> {
#[cfg(feature = "log")]
{
debug!("Parsing port");
}
let (input, port) = context(
"Port parser",
opt(map_opt(
verify(
preceded(
tag(":"),
take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)),
),
|p_str: &str| !p_str.is_empty(),
),
|s: &str| s.parse::<u16>().ok(),
)),
)
.parse(authority_input)?;
#[cfg(feature = "log")]
{
debug!("{authority_input:?}");
debug!("{port:?}");
}
Ok((input, port))
}
fn path_abempty_parser<'url>(
) -> impl Parser<
&'url str,
Output = <dyn Parser<&'url str, Output = &'url str, Error = nom::error::Error<&'url str>> as Parser<
&'url str,
>>::Output,
Error = nom::error::Error<&'url str>,
>{
#[cfg(feature = "log")]
{
debug!("parsing abempty path");
}
context(
"Path parser (abempty)",
recognize(many1(pair(
tag("/"),
take_while(|c: char| pchar_uri_chars(c)),
))),
)
}
fn path_ssh_parser<'url>(
) -> impl Parser<
&'url str,
Output = <dyn Parser<&'url str, Output = &'url str, Error = nom::error::Error<&'url str>> as Parser<
&'url str,
>>::Output,
Error = nom::error::Error<&'url str>,
>{
#[cfg(feature = "log")]
{
debug!("Parsing ssh path");
}
context(
"Path parser (ssh)",
recognize((
tag(":"),
take_while(|c: char| pchar_uri_chars(c)),
many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))),
)),
)
}
fn path_rootless_parser<'url>(
) -> impl Parser<
&'url str,
Output = <dyn Parser<&'url str, Output = &'url str, Error = nom::error::Error<&'url str>> as Parser<
&'url str,
>>::Output,
Error = nom::error::Error<&'url str>,
>{
#[cfg(feature = "log")]
{
debug!("Parsing rootless path");
}
context(
"Path parser (rootless)",
recognize(pair(
take_while(|c: char| pchar_uri_chars(c)),
many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))),
)),
)
}
fn short_git_scheme_parser<'url>() -> impl Parser<
&'url str,
Output = <dyn Parser<
&'url str,
Output = Option<&'url str>,
Error = nom::error::Error<&'url str>,
> as Parser<&'url str>>::Output,
Error = nom::error::Error<&'url str>,
> {
#[cfg(feature = "log")]
{
debug!("Parsing short git scheme");
}
context(
"short git scheme parse",
opt(terminated(
tag::<&str, &str, nom::error::Error<&str>>("git"),
tag::<&str, &str, nom::error::Error<&str>>(":"),
)),
)
}
fn short_git_scheme_check(input: &str) -> bool {
context(
"short git validate",
peek(terminated(
tag::<&str, &str, nom::error::Error<&str>>("git"),
tag::<&str, &str, nom::error::Error<&str>>(":"),
)),
)
.parse(input)
.is_ok()
}
}
#[derive(Debug, Default, Clone, Getters)]
#[getset(get = "pub")]
pub(crate) struct UrlUserInfo {
pub(crate) user: Option<String>,
pub(crate) token: Option<String>,
}
#[derive(Debug, Default, Clone, Getters)]
#[getset(get = "pub")]
pub(crate) struct UrlAuthority {
pub(crate) userinfo: UrlUserInfo,
pub(crate) host: Option<String>,
pub(crate) port: Option<u16>,
}
#[derive(Debug, Default, Clone, Getters)]
#[getset(get = "pub")]
pub(crate) struct UrlHierPart {
pub(crate) authority: UrlAuthority,
pub(crate) path: String,
}
pub(crate) fn pchar_uri_chars(c: char) -> bool {
unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@'
}
pub(crate) fn reg_name_uri_chars(c: char) -> bool {
unreserved_uri_chars(c) || subdelims_uri_chars(c)
}
pub(crate) fn unreserved_uri_chars(c: char) -> bool {
c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '~'
}
pub(crate) fn subdelims_uri_chars(c: char) -> bool {
c == '!'
|| c == '$'
|| c == '&'
|| c == '\''
|| c == '('
|| c == ')'
|| c == '*'
|| c == '+'
|| c == ','
|| c == ';'
|| c == '='
|| c == '\\' }