1use crate::{Transform, TransformError, TransformerCategory};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct UrlParser;
6
7pub const DEFAULT_TEST_INPUT: &str =
9 "https://user:pass@example.com:8080/path/to/resource?key=value&key2=value2#fragment";
10
11const NON_HIERARCHICAL_SCHEMES: &[&str] = &["mailto", "urn", "tel", "sms", "news", "isbn"];
13
14impl Transform for UrlParser {
15 fn name(&self) -> &'static str {
16 "URL Parser"
17 }
18
19 fn id(&self) -> &'static str {
20 "urlparser"
21 }
22
23 fn description(&self) -> &'static str {
24 "Parses a URL into its components (scheme, authority, path, query, fragment)"
25 }
26
27 fn category(&self) -> TransformerCategory {
28 TransformerCategory::Other
29 }
30
31 fn transform(&self, input: &str) -> Result<String, TransformError> {
33 let input = input.trim();
34 if input.is_empty() {
35 return Err(TransformError::InvalidArgument("Input URL is empty".into()));
36 }
37
38 let mut remainder = input;
39
40 let (scheme, is_hierarchical, remainder_after_scheme) = if let Some(pos) =
43 remainder.find("://")
44 {
45 let scheme_part = &remainder[..pos];
46 if scheme_part.is_empty()
48 || !scheme_part.starts_with(|c: char| c.is_ascii_alphabetic())
49 || !scheme_part
50 .chars()
51 .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.')
52 {
53 return Err(TransformError::InvalidArgument(
54 format!("Invalid characters in scheme before '://': {}", scheme_part).into(),
55 ));
56 }
57 (Some(scheme_part), true, &remainder[pos + 3..]) } else if let Some(pos) = remainder.find(':') {
59 let potential_scheme = &remainder[..pos];
60 if !potential_scheme.is_empty()
62 && potential_scheme.starts_with(|c: char| c.is_ascii_alphabetic())
63 && potential_scheme
64 .chars()
65 .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.')
66 {
67 let lower_scheme = potential_scheme.to_ascii_lowercase();
69 if NON_HIERARCHICAL_SCHEMES.contains(&lower_scheme.as_str()) {
70 (Some(potential_scheme), false, &remainder[pos + 1..])
72 } else {
73 (None, true, remainder) }
77 } else {
78 (None, true, remainder) }
81 } else {
82 (None, true, remainder) };
85
86 remainder = remainder_after_scheme;
88
89 let fragment = if let Some(pos) = remainder.find('#') {
91 let frag = &remainder[pos + 1..];
92 remainder = &remainder[..pos];
93 Some(frag)
94 } else {
95 None
96 };
97
98 let query = if let Some(pos) = remainder.find('?') {
100 let q = &remainder[pos + 1..];
101 remainder = &remainder[..pos];
102 Some(q)
103 } else {
104 None
105 };
106
107 let (authority, path_str) = if !is_hierarchical {
109 (None, remainder)
111 } else if remainder.starts_with("//") {
112 remainder = &remainder[2..];
114 if let Some(pos) = remainder.find('/') {
115 (Some(&remainder[..pos]), &remainder[pos..])
116 } else {
117 (Some(remainder), "")
118 }
119 } else if remainder.starts_with('/') {
120 (None, remainder)
122 } else if let Some(pos) = remainder.find('/') {
123 (Some(&remainder[..pos]), &remainder[pos..])
125 } else {
126 if scheme.is_some() {
128 (Some(remainder), "")
130 } else {
131 let is_likely_host_port = remainder.contains(':')
133 && remainder.chars().filter(|&c| c == ':').count() == 1
134 && remainder
135 .split(':')
136 .nth(1)
137 .unwrap_or("")
138 .chars()
139 .all(|c| c.is_ascii_digit())
140 && !remainder.contains('/')
141 && !remainder.contains('?')
142 && !remainder.contains('#');
143
144 if is_likely_host_port {
145 (Some(remainder), "")
147 } else {
148 (None, remainder)
150 }
151 }
152 };
153
154 let mut userinfo = None;
156 let mut host = None;
157 let mut port = None;
158
159 if let Some(auth_str) = authority {
160 let mut auth_rem = auth_str;
161 if let Some(pos) = auth_rem.rfind('@') {
162 userinfo = Some(&auth_rem[..pos]);
163 auth_rem = &auth_rem[pos + 1..];
164 }
165
166 if let Some(pos) = auth_rem.rfind(':') {
168 if !auth_rem[..pos].contains(':') {
170 host = Some(&auth_rem[..pos]); let port_str = &auth_rem[pos + 1..];
173 if port_str.chars().all(|c| c.is_ascii_digit()) {
174 port = Some(port_str); } } else {
177 host = Some(auth_rem);
179 }
181 } else {
182 host = Some(auth_rem);
184 }
186 }
187
188 let mut result = String::new();
189 result.push_str(&format!("Scheme: {}\n", scheme.unwrap_or("-")));
190 result.push_str(&format!("UserInfo: {}\n", userinfo.unwrap_or("-")));
191 result.push_str(&format!("Host: {}\n", host.unwrap_or("-")));
192 result.push_str(&format!("Port: {}\n", port.unwrap_or("-")));
193 result.push_str(&format!(
194 "Path: {}\n",
195 if path_str.is_empty() { "-" } else { path_str }
196 ));
197 result.push_str(&format!("Query: {}\n", query.unwrap_or("-")));
198 result.push_str(&format!("Fragment: {}", fragment.unwrap_or("-")));
199
200 Ok(result)
201 }
202
203 fn default_test_input(&self) -> &'static str {
204 "https://user:pass@example.com:8080/p/a/t/h?query=string&key=val#fragment"
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211
212 fn get_component(output: &str, label: &str) -> String {
213 output
214 .lines()
215 .find(|line| line.starts_with(label))
216 .map(|line| {
217 line.split_once(':')
218 .map(|(_, v)| v.trim().to_string())
219 .unwrap_or_default()
220 })
221 .unwrap_or_default()
222 }
223
224 #[test]
225 fn test_url_parser_full() {
226 let transformer = UrlParser;
227 let url = DEFAULT_TEST_INPUT;
228 let result = transformer.transform(url).unwrap();
229 assert_eq!(get_component(&result, "Scheme"), "https");
230 assert_eq!(get_component(&result, "UserInfo"), "user:pass");
231 assert_eq!(get_component(&result, "Host"), "example.com");
232 assert_eq!(get_component(&result, "Port"), "8080");
233 assert_eq!(get_component(&result, "Path"), "/path/to/resource");
234 assert_eq!(get_component(&result, "Query"), "key=value&key2=value2");
235 assert_eq!(get_component(&result, "Fragment"), "fragment");
236 }
237
238 #[test]
239 fn test_url_parser_simple_http() {
240 let transformer = UrlParser;
241 let url = "http://example.com/home";
242 let result = transformer.transform(url).unwrap();
243 assert_eq!(get_component(&result, "Scheme"), "http");
244 assert_eq!(get_component(&result, "UserInfo"), "-");
245 assert_eq!(get_component(&result, "Host"), "example.com");
246 assert_eq!(get_component(&result, "Port"), "-");
247 assert_eq!(get_component(&result, "Path"), "/home");
248 assert_eq!(get_component(&result, "Query"), "-");
249 assert_eq!(get_component(&result, "Fragment"), "-");
250 }
251
252 #[test]
253 fn test_url_parser_ftp() {
254 let transformer = UrlParser;
255 let url = "ftp://user@ftp.example.org/";
256 let result = transformer.transform(url).unwrap();
257 assert_eq!(get_component(&result, "Scheme"), "ftp");
258 assert_eq!(get_component(&result, "UserInfo"), "user");
259 assert_eq!(get_component(&result, "Host"), "ftp.example.org");
260 assert_eq!(get_component(&result, "Port"), "-");
261 assert_eq!(get_component(&result, "Path"), "/");
262 assert_eq!(get_component(&result, "Query"), "-");
263 assert_eq!(get_component(&result, "Fragment"), "-");
264 }
265
266 #[test]
267 fn test_url_parser_mailto() {
268 let transformer = UrlParser;
269 let url = "mailto:user@example.com";
270 let result = transformer.transform(url).unwrap();
271 assert_eq!(get_component(&result, "Scheme"), "mailto");
272 assert_eq!(get_component(&result, "UserInfo"), "-"); assert_eq!(get_component(&result, "Host"), "-"); assert_eq!(get_component(&result, "Port"), "-");
275 assert_eq!(get_component(&result, "Path"), "user@example.com"); assert_eq!(get_component(&result, "Query"), "-");
277 assert_eq!(get_component(&result, "Fragment"), "-");
278 }
279
280 #[test]
281 fn test_url_parser_urn() {
282 let transformer = UrlParser;
283 let url = "urn:isbn:0451450523";
284 let result = transformer.transform(url).unwrap();
285 assert_eq!(get_component(&result, "Scheme"), "urn");
286 assert_eq!(get_component(&result, "UserInfo"), "-"); assert_eq!(get_component(&result, "Host"), "-"); assert_eq!(get_component(&result, "Port"), "-");
289 assert_eq!(get_component(&result, "Path"), "isbn:0451450523"); assert_eq!(get_component(&result, "Query"), "-");
291 assert_eq!(get_component(&result, "Fragment"), "-");
292 }
293
294 #[test]
295 fn test_url_parser_path_only() {
296 let transformer = UrlParser;
297 let url = "/path/only?query#frag";
298 let result = transformer.transform(url).unwrap();
299 assert_eq!(get_component(&result, "Scheme"), "-");
300 assert_eq!(get_component(&result, "UserInfo"), "-");
301 assert_eq!(get_component(&result, "Host"), "-");
302 assert_eq!(get_component(&result, "Port"), "-");
303 assert_eq!(get_component(&result, "Path"), "/path/only");
304 assert_eq!(get_component(&result, "Query"), "query");
305 assert_eq!(get_component(&result, "Fragment"), "frag");
306 }
307
308 #[test]
309 fn test_url_parser_host_port_only() {
310 let transformer = UrlParser;
311 let url = "example.com:8080"; let result = transformer.transform(url).unwrap();
313 assert_eq!(get_component(&result, "Scheme"), "-");
314 assert_eq!(get_component(&result, "UserInfo"), "-");
315 assert_eq!(get_component(&result, "Host"), "example.com");
316 assert_eq!(get_component(&result, "Port"), "8080");
317 assert_eq!(get_component(&result, "Path"), "-");
318 assert_eq!(get_component(&result, "Query"), "-");
319 assert_eq!(get_component(&result, "Fragment"), "-");
320 }
321
322 #[test]
323 fn test_url_parser_empty() {
324 let transformer = UrlParser;
325 assert!(matches!(
326 transformer.transform(""),
327 Err(TransformError::InvalidArgument(_))
328 ));
329 }
330
331 #[test]
332 fn test_url_parser_invalid_scheme() {
333 let transformer = UrlParser;
334 assert!(matches!(
335 transformer.transform("1http://example.com"),
336 Err(TransformError::InvalidArgument(_))
337 ));
338 assert!(matches!(
339 transformer.transform("://example.com"),
340 Err(TransformError::InvalidArgument(_))
341 ));
342 }
343}