use lazy_static::lazy_static;
use percent_encoding::percent_decode_str;
use regex::Regex;
use url::Url;
lazy_static! {
static ref SPECIAL_PROTO_RE: Regex = Regex::new(
r"^[[:space:]\x00]*((blob|data|javascript|vbscript|file|ftp|gopher|mailto|news|telnet|view-source):)(.*)"
).unwrap();
static ref URL_COMPONENTS_RE: Regex = Regex::new(r"([^#?]*)(\?[^#]*)?(#.*)?").unwrap();
static ref URL_HOST_RE: Regex = Regex::new(r"([^/:]*):?(\d+)?").unwrap();
}
#[derive(Debug, Clone)]
pub struct ParsedURL {
pub protocol: Option<String>,
pub host: Option<String>,
pub hostname: Option<String>,
pub auth: Option<String>,
pub pathname: String,
pub hash: String,
pub search: String,
pub href: Option<String>,
pub protocol_relative: bool,
}
#[derive(Debug, Clone)]
pub struct ParsedAuth {
pub username: String,
pub password: String,
}
#[derive(Debug, Clone)]
pub struct ParsedHost {
pub hostname: String,
pub port: String,
}
pub fn parse_url(input: &str) -> ParsedURL {
if let Some(captures) = SPECIAL_PROTO_RE.captures(input) {
let proto = captures
.get(2)
.map(|m| m.as_str().to_lowercase())
.unwrap_or_default();
let pathname = captures
.get(3)
.map(|m| m.as_str().to_string())
.unwrap_or_default();
return ParsedURL {
protocol: Some(proto.clone()),
pathname: pathname.clone(),
href: Some(format!("{}{}", proto, pathname)),
auth: Some(String::new()),
host: Some(String::new()),
hostname: Some(String::new()),
search: String::new(),
hash: String::new(),
protocol_relative: false,
};
}
match Url::parse(input) {
Ok(url) => {
let host = url.host_str().map(String::from);
let hostname = url.host_str().map(String::from);
let port = url.port().map(|p| p.to_string());
ParsedURL {
protocol: Some(url.scheme().to_string()),
host: match (host.clone(), port) {
(Some(h), Some(p)) => Some(format!("{}:{}", h, p)),
(Some(h), None) => Some(h),
_ => None,
},
hostname,
auth: if !url.username().is_empty() {
Some(format!(
"{}:{}",
url.username(),
url.password().unwrap_or("")
))
} else {
None
},
pathname: url.path().to_string(),
hash: url
.fragment()
.map(|f| format!("#{}", f))
.unwrap_or_default(),
search: url.query().map(|q| format!("?{}", q)).unwrap_or_default(),
href: Some(url.to_string()),
protocol_relative: false,
}
}
Err(_) => parse_path(input),
}
}
pub fn parse_url_with_protocol(input: &str, proto: &str) -> ParsedURL {
if !has_protocol(input) {
parse_url(&format!("{}://{}", proto, input))
} else {
parse_url(input)
}
}
pub fn parse_path(input: &str) -> ParsedURL {
let caps = URL_COMPONENTS_RE
.captures(input)
.unwrap_or_else(|| URL_COMPONENTS_RE.captures("").unwrap());
ParsedURL {
pathname: caps.get(1).map_or("", |m| m.as_str()).to_string(),
search: caps.get(2).map_or("", |m| m.as_str()).to_string(),
hash: caps.get(3).map_or("", |m| m.as_str()).to_string(),
protocol: None,
host: None,
hostname: None,
auth: None,
href: None,
protocol_relative: false,
}
}
pub fn parse_auth(input: &str) -> ParsedAuth {
let parts: Vec<&str> = input.split(':').collect();
ParsedAuth {
username: percent_decode_str(parts.first().unwrap_or(&""))
.decode_utf8_lossy()
.to_string(),
password: percent_decode_str(parts.get(1).unwrap_or(&""))
.decode_utf8_lossy()
.to_string(),
}
}
pub fn parse_host(input: &str) -> ParsedHost {
let caps = URL_HOST_RE
.captures(input)
.unwrap_or_else(|| URL_HOST_RE.captures("").unwrap());
ParsedHost {
hostname: percent_decode_str(caps.get(1).map_or("", |m| m.as_str()))
.decode_utf8_lossy()
.to_string(),
port: caps.get(2).map_or("", |m| m.as_str()).to_string(),
}
}
fn has_protocol(input: &str) -> bool {
Regex::new(r"^[\s\w+.-]{2,}://").unwrap().is_match(input)
}
#[derive(Default)]
pub struct ParsedURLConfig {
pub trailing_slash: bool,
}
impl ParsedURLConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_trailing_slash(mut self, trailing_slash: bool) -> Self {
self.trailing_slash = trailing_slash;
self
}
}
impl ParsedURL {
fn format_search(&self) -> String {
if self.search.starts_with('?') {
self.search.clone()
} else if !self.search.is_empty() {
format!("?{}", self.search)
} else {
String::new()
}
}
fn format_hash(&self) -> String {
if !self.hash.is_empty() {
self.hash.clone()
} else {
String::new()
}
}
fn format_auth(&self) -> String {
if self.auth.as_ref().map_or(true, |a| a.is_empty()) {
String::new()
} else {
format!("{}@", self.auth.as_ref().unwrap())
}
}
fn format_host(&self) -> String {
self.host.as_ref().map_or(String::new(), |h| h.clone())
}
fn format_protocol(&self) -> String {
if self.protocol_relative {
"//".to_string()
} else {
self.protocol.as_ref().map_or(String::new(), |p| {
if p.ends_with("://") {
p.clone()
} else if !p.is_empty() {
format!("{}://", p)
} else {
String::new()
}
})
}
}
fn format_pathname(&self) -> String {
if self.pathname == "/" {
String::new()
} else {
self.pathname.clone()
}
}
pub fn stringify(&self) -> String {
format!(
"{}{}{}{}{}{}",
self.format_protocol(),
self.format_auth(),
self.format_host(),
self.format_pathname(),
self.format_search(),
self.format_hash()
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_url() {
let url = parse_url("https://example.com/path?query=value#fragment");
assert_eq!(url.protocol, Some("https".to_string()));
assert_eq!(url.host, Some("example.com".to_string()));
assert_eq!(url.hostname, Some("example.com".to_string()));
assert_eq!(url.pathname, "/path");
assert_eq!(url.search, "?query=value");
assert_eq!(url.hash, "#fragment");
let url = parse_url("blob:https://example.com/some-path");
assert_eq!(url.protocol, Some("blob".to_string()));
assert_eq!(url.pathname, "https://example.com/some-path");
}
#[test]
fn test_parse_url_with_protocol() {
let parsed = parse_url_with_protocol("example.com/path", "https");
assert_eq!(parsed.protocol, Some("https".to_string()));
assert_eq!(parsed.host, Some("example.com".to_string()));
assert_eq!(parsed.hostname, Some("example.com".to_string()));
assert_eq!(parsed.pathname, "/path");
let parsed = parse_url_with_protocol("http://example.com", "https");
assert_eq!(parsed.protocol, Some("http".to_string()));
let parsed = parse_url_with_protocol("user:pass@example.com", "https");
assert_eq!(parsed.protocol, Some("https".to_string()));
assert_eq!(parsed.auth, Some("user:pass".to_string()));
let parsed = parse_url_with_protocol("localhost:8080/api", "http");
assert_eq!(parsed.protocol, Some("http".to_string()));
assert_eq!(parsed.host, Some("localhost:8080".to_string()));
assert_eq!(parsed.hostname, Some("localhost".to_string()));
}
#[test]
fn test_parse_path() {
let parsed = parse_path("/path?query#hash");
assert_eq!(parsed.pathname, "/path");
assert_eq!(parsed.search, "?query");
assert_eq!(parsed.hash, "#hash");
let parsed = parse_path("/users/123/posts");
assert_eq!(parsed.pathname, "/users/123/posts");
let parsed = parse_path("/search?q=test&page=1&sort=desc#top");
assert_eq!(parsed.pathname, "/search");
assert_eq!(parsed.search, "?q=test&page=1&sort=desc");
assert_eq!(parsed.hash, "#top");
}
#[test]
fn test_parse_auth() {
let auth = parse_auth("admin:secret123");
assert_eq!(auth.username, "admin");
assert_eq!(auth.password, "secret123");
let auth = parse_auth("user%40example.com:pass%21word");
assert_eq!(auth.username, "user@example.com");
assert_eq!(auth.password, "pass!word");
let auth = parse_auth("username");
assert_eq!(auth.username, "username");
assert_eq!(auth.password, "");
}
#[test]
fn test_parse_host() {
let host = parse_host("example.com:8080");
assert_eq!(host.hostname, "example.com");
assert_eq!(host.port, "8080");
let host = parse_host("example.com");
assert_eq!(host.hostname, "example.com");
assert_eq!(host.port, "");
let host = parse_host("sub.example%2Ecom:9000");
assert_eq!(host.hostname, "sub.example.com");
assert_eq!(host.port, "9000");
}
#[test]
fn test_has_protocol() {
assert!(has_protocol("http://example.com"));
assert!(has_protocol("https://example.com"));
assert!(has_protocol("ftp://files.example.com"));
assert!(has_protocol("file:///path/to/file.txt"));
assert!(has_protocol("myapp://open/resource"));
assert!(!has_protocol("example.com"));
}
#[test]
fn test_parsed_url_stringify() {
let parsed = ParsedURL {
protocol: Some("http".to_string()),
host: Some("example.com".to_string()),
hostname: Some("example.com".to_string()),
auth: None,
pathname: "/path".to_string(),
search: "?query=1".to_string(),
hash: "#hash".to_string(),
href: None,
protocol_relative: false,
};
assert_eq!(parsed.stringify(), "http://example.com/path?query=1#hash");
let parsed = ParsedURL {
protocol: Some("https".to_string()),
host: Some("example.com".to_string()),
hostname: Some("example.com".to_string()),
auth: Some("user:pass".to_string()),
pathname: "/".to_string(),
search: String::new(),
hash: String::new(),
href: None,
protocol_relative: false,
};
assert_eq!(parsed.stringify(), "https://user:pass@example.com");
let parsed = ParsedURL {
protocol: None,
host: Some("cdn.example.com".to_string()),
hostname: Some("cdn.example.com".to_string()),
auth: None,
pathname: "/assets/img.png".to_string(),
search: String::new(),
hash: String::new(),
href: None,
protocol_relative: true,
};
assert_eq!(parsed.stringify(), "//cdn.example.com/assets/img.png");
let parsed = ParsedURL {
protocol: Some("http".to_string()),
host: Some("localhost:8080".to_string()),
hostname: Some("localhost".to_string()),
auth: None,
pathname: "/api".to_string(),
search: String::new(),
hash: String::new(),
href: None,
protocol_relative: false,
};
assert_eq!(parsed.stringify(), "http://localhost:8080/api");
let parsed = ParsedURL {
protocol: Some("https".to_string()),
host: Some("api.example.com".to_string()),
hostname: Some("api.example.com".to_string()),
auth: None,
pathname: "/search".to_string(),
search: "?q=test&page=1&sort=desc".to_string(),
hash: "#results".to_string(),
href: None,
protocol_relative: false,
};
assert_eq!(
parsed.stringify(),
"https://api.example.com/search?q=test&page=1&sort=desc#results"
);
let parsed = ParsedURL {
protocol: Some("file".to_string()),
host: None,
hostname: None,
auth: None,
pathname: "/path/to/my file.txt".to_string(),
search: String::new(),
hash: String::new(),
href: None,
protocol_relative: false,
};
assert_eq!(parsed.stringify(), "file:///path/to/my file.txt");
}
}