use std::collections::HashMap;
use url::Url;
pub struct UrlTransformer {
merge_endpoint: bool,
show_only_host: bool,
show_only_path: bool,
show_only_param: bool,
normalize_url: bool,
}
impl UrlTransformer {
pub fn new() -> Self {
UrlTransformer {
merge_endpoint: false,
show_only_host: false,
show_only_path: false,
show_only_param: false,
normalize_url: false,
}
}
pub fn with_merge_endpoint(&mut self, merge: bool) -> &mut Self {
self.merge_endpoint = merge;
self
}
pub fn with_show_only_host(&mut self, show: bool) -> &mut Self {
self.show_only_host = show;
self
}
pub fn with_show_only_path(&mut self, show: bool) -> &mut Self {
self.show_only_path = show;
self
}
pub fn with_show_only_param(&mut self, show: bool) -> &mut Self {
self.show_only_param = show;
self
}
pub fn with_normalize_url(&mut self, normalize: bool) -> &mut Self {
self.normalize_url = normalize;
self
}
pub fn transform(&self, urls: Vec<String>) -> Vec<String> {
let mut transformed_urls = urls;
if self.normalize_url {
transformed_urls = self.normalize_urls(transformed_urls);
}
if self.merge_endpoint {
transformed_urls = self.merge_endpoints(transformed_urls);
}
if self.show_only_host || self.show_only_path || self.show_only_param {
transformed_urls = self.extract_url_parts(transformed_urls);
}
transformed_urls
}
fn normalize_urls(&self, urls: Vec<String>) -> Vec<String> {
let mut normalized_urls = Vec::new();
for url_str in urls {
if let Ok(mut url) = Url::parse(&url_str) {
let path = url.path().to_string();
if path.len() > 1 && path.ends_with('/') {
let normalized_path = &path[..path.len() - 1];
url.set_path(normalized_path);
}
if url.query().is_some() {
let mut params: Vec<(String, String)> = url
.query_pairs()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
params.sort_by(|a, b| a.0.cmp(&b.0));
url.set_query(None);
if !params.is_empty() {
let query_string = params
.into_iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("&");
url.set_query(Some(&query_string));
}
}
normalized_urls.push(url.to_string());
} else {
normalized_urls.push(url_str);
}
}
normalized_urls.sort();
normalized_urls.dedup();
normalized_urls
}
fn merge_endpoints(&self, urls: Vec<String>) -> Vec<String> {
let mut path_groups: HashMap<String, Vec<String>> = HashMap::new();
for url_str in urls {
if let Ok(url) = Url::parse(&url_str) {
let key = format!("{}{}", url.host_str().unwrap_or(""), url.path());
path_groups.entry(key).or_default().push(url_str);
} else {
path_groups
.entry(url_str.clone())
.or_default()
.push(url_str);
}
}
let mut merged_urls = Vec::new();
for (_, group_urls) in path_groups {
if group_urls.len() == 1 {
merged_urls.push(group_urls[0].clone());
} else {
if let Ok(base_url) = Url::parse(&group_urls[0]) {
let mut merged_url = base_url.clone();
let mut all_params = Vec::new();
for url_str in &group_urls {
if let Ok(url) = Url::parse(url_str) {
for (key, value) in url.query_pairs() {
if !all_params.iter().any(|(k, v)| k == &key && v == &value) {
all_params.push((key.to_string(), value.to_string()));
}
}
}
}
if !all_params.is_empty() {
let query_string = all_params
.into_iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("&");
merged_url.set_query(None);
if !query_string.is_empty() {
merged_url.set_query(Some(&query_string));
}
}
merged_urls.push(merged_url.to_string());
} else {
merged_urls.push(group_urls[0].clone());
}
}
}
merged_urls.sort();
merged_urls
}
fn extract_url_parts(&self, urls: Vec<String>) -> Vec<String> {
let mut extracted_parts = Vec::new();
for url_str in urls {
if let Ok(url) = Url::parse(&url_str) {
if self.show_only_host {
if let Some(host) = url.host_str() {
extracted_parts.push(host.to_string());
}
} else if self.show_only_path {
if url.path() != "/" {
extracted_parts.push(url.path().to_string());
}
} else if self.show_only_param {
if let Some(query) = url.query() {
extracted_parts.push(query.to_string());
}
}
} else {
extracted_parts.push(url_str);
}
}
extracted_parts.sort();
extracted_parts.dedup();
extracted_parts
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_url_transformer_merge_endpoints() {
let mut transformer = UrlTransformer::new();
transformer.with_merge_endpoint(true);
let urls = vec![
"https://example.com/api?param1=value1".to_string(),
"https://example.com/api?param2=value2".to_string(),
"https://example.com/api?param3=value3".to_string(),
"https://other.com/path".to_string(),
];
let transformed = transformer.transform(urls);
assert!(transformed.contains(
&"https://example.com/api?param1=value1¶m2=value2¶m3=value3".to_string()
));
assert!(transformed.contains(&"https://other.com/path".to_string()));
}
#[test]
fn test_url_transformer_show_only_host() {
let mut transformer = UrlTransformer::new();
transformer.with_show_only_host(true);
let urls = vec![
"https://example.com/path1".to_string(),
"https://example.com/path2".to_string(),
"https://other.com/path".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 2); assert!(transformed.contains(&"example.com".to_string()));
assert!(transformed.contains(&"other.com".to_string()));
}
#[test]
fn test_url_transformer_show_only_path() {
let mut transformer = UrlTransformer::new();
transformer.with_show_only_path(true);
let urls = vec![
"https://example.com/path1".to_string(),
"https://example.com/path2".to_string(),
"https://other.com/path1".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 2); assert!(transformed.contains(&"/path1".to_string()));
assert!(transformed.contains(&"/path2".to_string()));
}
#[test]
fn test_url_transformer_show_only_param() {
let mut transformer = UrlTransformer::new();
transformer.with_show_only_param(true);
let urls = vec![
"https://example.com/api?param1=value1".to_string(),
"https://example.com/api?param2=value2".to_string(),
"https://other.com/api?param1=value1".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 2); assert!(transformed.contains(&"param1=value1".to_string()));
assert!(transformed.contains(&"param2=value2".to_string()));
}
#[test]
fn test_url_transformer_normalize_query_params() {
let mut transformer = UrlTransformer::new();
transformer.with_normalize_url(true);
let urls = vec![
"https://example.com/api?b=2&a=1".to_string(),
"https://example.com/api?a=1&b=2".to_string(),
"https://example.com/path?z=3&y=2&x=1".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 2); assert!(transformed.contains(&"https://example.com/api?a=1&b=2".to_string()));
assert!(transformed.contains(&"https://example.com/path?x=1&y=2&z=3".to_string()));
}
#[test]
fn test_url_transformer_normalize_trailing_slashes() {
let mut transformer = UrlTransformer::new();
transformer.with_normalize_url(true);
let urls = vec![
"https://example.com/api/".to_string(),
"https://example.com/api".to_string(),
"https://example.com/path/subpath/".to_string(),
"https://example.com/path/subpath".to_string(),
"https://example.com/".to_string(), ];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 3); assert!(transformed.contains(&"https://example.com/".to_string())); assert!(transformed.contains(&"https://example.com/api".to_string()));
assert!(transformed.contains(&"https://example.com/path/subpath".to_string()));
assert!(!transformed.contains(&"https://example.com/api/".to_string()));
assert!(!transformed.contains(&"https://example.com/path/subpath/".to_string()));
}
#[test]
fn test_url_transformer_normalize_complex() {
let mut transformer = UrlTransformer::new();
transformer.with_normalize_url(true);
let urls = vec![
"https://example.com/api/?c=3&b=2&a=1".to_string(),
"https://example.com/api?a=1&b=2&c=3".to_string(),
"https://example.com/api/?a=1&c=3&b=2".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 1); assert!(transformed.contains(&"https://example.com/api?a=1&b=2&c=3".to_string()));
}
#[test]
fn test_url_transformer_normalize_with_merge_endpoint() {
let mut transformer = UrlTransformer::new();
transformer
.with_normalize_url(true)
.with_merge_endpoint(true);
let urls = vec![
"https://example.com/api/?param2=value2¶m1=value1".to_string(),
"https://example.com/api?param3=value3".to_string(),
"https://example.com/api/?param1=value1¶m2=value2".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 1);
let result_url = &transformed[0];
assert!(result_url.starts_with("https://example.com/api?"));
assert!(result_url.contains("param1=value1"));
assert!(result_url.contains("param2=value2"));
assert!(result_url.contains("param3=value3"));
}
#[test]
fn test_url_transformer_normalize_invalid_urls() {
let mut transformer = UrlTransformer::new();
transformer.with_normalize_url(true);
let urls = vec![
"https://example.com/api?a=1&b=2".to_string(),
"not-a-valid-url".to_string(),
"https://example.com/api?b=2&a=1".to_string(),
];
let transformed = transformer.transform(urls);
assert_eq!(transformed.len(), 2); assert!(transformed.contains(&"https://example.com/api?a=1&b=2".to_string()));
assert!(transformed.contains(&"not-a-valid-url".to_string()));
}
}