urlpattern/
lib.rs

1// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
2//! rust-urlpattern is an implementation of the
3//! [URLPattern standard](https://wicg.github.io/urlpattern) for the Rust
4//! programming language.
5//!
6//! For a usage example, see the [UrlPattern] documentation.
7
8mod canonicalize_and_process;
9mod component;
10mod constructor_parser;
11mod error;
12mod matcher;
13mod parser;
14pub mod quirks;
15mod regexp;
16mod tokenizer;
17
18pub use error::Error;
19use serde::Deserialize;
20use serde::Serialize;
21use url::Url;
22
23use crate::canonicalize_and_process::is_special_scheme;
24use crate::canonicalize_and_process::process_base_url;
25use crate::canonicalize_and_process::special_scheme_default_port;
26use crate::canonicalize_and_process::ProcessType;
27use crate::component::Component;
28use crate::regexp::RegExp;
29
30/// Options to create a URL pattern.
31#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct UrlPatternOptions {
34  pub ignore_case: bool,
35}
36
37/// The structured input used to create a URL pattern.
38#[derive(Debug, Default, Clone, Eq, PartialEq)]
39pub struct UrlPatternInit {
40  pub protocol: Option<String>,
41  pub username: Option<String>,
42  pub password: Option<String>,
43  pub hostname: Option<String>,
44  pub port: Option<String>,
45  pub pathname: Option<String>,
46  pub search: Option<String>,
47  pub hash: Option<String>,
48  pub base_url: Option<Url>,
49}
50
51impl UrlPatternInit {
52  pub fn parse_constructor_string<R: RegExp>(
53    pattern: &str,
54    base_url: Option<Url>,
55  ) -> Result<UrlPatternInit, Error> {
56    let mut init = constructor_parser::parse_constructor_string::<R>(pattern)?;
57    if base_url.is_none() && init.protocol.is_none() {
58      return Err(Error::BaseUrlRequired);
59    }
60    init.base_url = base_url;
61    Ok(init)
62  }
63
64  // Ref: https://wicg.github.io/urlpattern/#process-a-urlpatterninit
65  // TODO: use UrlPatternInit for arguments?
66  #[allow(clippy::too_many_arguments)]
67  fn process(
68    &self,
69    kind: ProcessType,
70    protocol: Option<String>,
71    username: Option<String>,
72    password: Option<String>,
73    hostname: Option<String>,
74    port: Option<String>,
75    pathname: Option<String>,
76    search: Option<String>,
77    hash: Option<String>,
78  ) -> Result<UrlPatternInit, Error> {
79    let mut result = UrlPatternInit {
80      protocol,
81      username,
82      password,
83      hostname,
84      port,
85      pathname,
86      search,
87      hash,
88      base_url: None,
89    };
90
91    let base_url = if let Some(parsed_base_url) = &self.base_url {
92      if self.protocol.is_none() {
93        result.protocol =
94          Some(process_base_url(parsed_base_url.scheme(), &kind));
95      }
96
97      if kind != ProcessType::Pattern
98        && (self.protocol.is_none()
99          && self.hostname.is_none()
100          && self.port.is_none()
101          && self.username.is_none())
102      {
103        result.username =
104          Some(process_base_url(parsed_base_url.username(), &kind));
105      }
106
107      if kind != ProcessType::Pattern
108        && (self.protocol.is_none()
109          && self.hostname.is_none()
110          && self.port.is_none()
111          && self.username.is_none()
112          && self.password.is_none())
113      {
114        result.password = Some(process_base_url(
115          parsed_base_url.password().unwrap_or_default(),
116          &kind,
117        ));
118      }
119
120      if self.protocol.is_none() && self.hostname.is_none() {
121        result.hostname = Some(process_base_url(
122          parsed_base_url.host_str().unwrap_or_default(),
123          &kind,
124        ));
125      }
126
127      if self.protocol.is_none()
128        && self.hostname.is_none()
129        && self.port.is_none()
130      {
131        result.port =
132          Some(process_base_url(url::quirks::port(parsed_base_url), &kind));
133      }
134
135      if self.protocol.is_none()
136        && self.hostname.is_none()
137        && self.port.is_none()
138        && self.pathname.is_none()
139      {
140        result.pathname = Some(process_base_url(
141          url::quirks::pathname(parsed_base_url),
142          &kind,
143        ));
144      }
145
146      if self.protocol.is_none()
147        && self.hostname.is_none()
148        && self.port.is_none()
149        && self.pathname.is_none()
150        && self.search.is_none()
151      {
152        result.search = Some(process_base_url(
153          parsed_base_url.query().unwrap_or_default(),
154          &kind,
155        ));
156      }
157
158      if self.protocol.is_none()
159        && self.hostname.is_none()
160        && self.port.is_none()
161        && self.pathname.is_none()
162        && self.search.is_none()
163        && self.hash.is_none()
164      {
165        result.hash = Some(process_base_url(
166          parsed_base_url.fragment().unwrap_or_default(),
167          &kind,
168        ));
169      }
170
171      Some(parsed_base_url)
172    } else {
173      None
174    };
175
176    if let Some(protocol) = &self.protocol {
177      result.protocol = Some(canonicalize_and_process::process_protocol_init(
178        protocol, &kind,
179      )?);
180    }
181    if let Some(username) = &self.username {
182      result.username = Some(canonicalize_and_process::process_username_init(
183        username, &kind,
184      )?);
185    }
186    if let Some(password) = &self.password {
187      result.password = Some(canonicalize_and_process::process_password_init(
188        password, &kind,
189      )?);
190    }
191    if let Some(hostname) = &self.hostname {
192      result.hostname = Some(canonicalize_and_process::process_hostname_init(
193        hostname, &kind,
194      )?);
195    }
196    if let Some(port) = &self.port {
197      result.port = Some(canonicalize_and_process::process_port_init(
198        port,
199        result.protocol.as_deref(),
200        &kind,
201      )?);
202    }
203    if let Some(pathname) = &self.pathname {
204      result.pathname = Some(pathname.clone());
205
206      if let Some(base_url) = base_url {
207        if !base_url.cannot_be_a_base()
208          && !is_absolute_pathname(pathname, &kind)
209        {
210          let baseurl_path = url::quirks::pathname(base_url);
211          let slash_index = baseurl_path.rfind('/');
212          if let Some(slash_index) = slash_index {
213            let new_pathname = baseurl_path[..=slash_index].to_string();
214            result.pathname =
215              Some(format!("{}{}", new_pathname, result.pathname.unwrap()));
216          }
217        }
218      }
219
220      result.pathname = Some(canonicalize_and_process::process_pathname_init(
221        &result.pathname.unwrap(),
222        result.protocol.as_deref(),
223        &kind,
224      )?);
225    }
226    if let Some(search) = &self.search {
227      result.search = Some(canonicalize_and_process::process_search_init(
228        search, &kind,
229      )?);
230    }
231    if let Some(hash) = &self.hash {
232      result.hash =
233        Some(canonicalize_and_process::process_hash_init(hash, &kind)?);
234    }
235    Ok(result)
236  }
237}
238
239// Ref: https://wicg.github.io/urlpattern/#is-an-absolute-pathname
240fn is_absolute_pathname(
241  input: &str,
242  kind: &canonicalize_and_process::ProcessType,
243) -> bool {
244  if input.is_empty() {
245    return false;
246  }
247  if input.starts_with('/') {
248    return true;
249  }
250  if kind == &canonicalize_and_process::ProcessType::Url {
251    return false;
252  }
253  // TODO: input code point length
254  if input.len() < 2 {
255    return false;
256  }
257
258  input.starts_with("\\/") || input.starts_with("{/")
259}
260
261// Ref: https://wicg.github.io/urlpattern/#urlpattern
262/// A UrlPattern that can be matched against.
263///
264/// # Examples
265///
266/// ```
267/// use urlpattern::UrlPattern;
268/// use urlpattern::UrlPatternInit;
269/// use urlpattern::UrlPatternMatchInput;
270///
271///# fn main() {
272/// // Create the UrlPattern to match against.
273/// let init = UrlPatternInit {
274///   pathname: Some("/users/:id".to_owned()),
275///   ..Default::default()
276/// };
277/// let pattern = <UrlPattern>::parse(init, Default::default()).unwrap();
278///
279/// // Match the pattern against a URL.
280/// let url = "https://example.com/users/123".parse().unwrap();
281/// let result = pattern.exec(UrlPatternMatchInput::Url(url)).unwrap().unwrap();
282/// assert_eq!(result.pathname.groups.get("id").unwrap().as_ref().unwrap(), "123");
283///# }
284/// ```
285#[derive(Debug)]
286pub struct UrlPattern<R: RegExp = regex::Regex> {
287  protocol: Component<R>,
288  username: Component<R>,
289  password: Component<R>,
290  hostname: Component<R>,
291  port: Component<R>,
292  pathname: Component<R>,
293  search: Component<R>,
294  hash: Component<R>,
295}
296
297#[derive(Debug, Clone, PartialEq, Eq)]
298pub enum UrlPatternMatchInput {
299  Init(UrlPatternInit),
300  Url(Url),
301}
302
303impl<R: RegExp> UrlPattern<R> {
304  // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-urlpattern
305  /// Parse a [UrlPatternInit] into a [UrlPattern].
306  pub fn parse(
307    init: UrlPatternInit,
308    options: UrlPatternOptions,
309  ) -> Result<Self, Error> {
310    Self::parse_internal(init, true, options)
311  }
312
313  pub(crate) fn parse_internal(
314    init: UrlPatternInit,
315    report_regex_errors: bool,
316    options: UrlPatternOptions,
317  ) -> Result<Self, Error> {
318    let mut processed_init = init.process(
319      ProcessType::Pattern,
320      None,
321      None,
322      None,
323      None,
324      None,
325      None,
326      None,
327      None,
328    )?;
329
330    //  If processedInit["protocol"] is a special scheme and processedInit["port"] is its corresponding default port
331    if let Some(protocol) = &processed_init.protocol {
332      if is_special_scheme(protocol) {
333        let default_port = special_scheme_default_port(protocol);
334        if default_port == processed_init.port.as_deref() {
335          processed_init.port = Some(String::new())
336        }
337      }
338    }
339
340    let protocol = Component::compile(
341      processed_init.protocol.as_deref(),
342      canonicalize_and_process::canonicalize_protocol,
343      parser::Options::default(),
344    )?
345    .optionally_transpose_regex_error(report_regex_errors)?;
346
347    let hostname_is_ipv6 = processed_init
348      .hostname
349      .as_deref()
350      .map(hostname_pattern_is_ipv6_address)
351      .unwrap_or(false);
352
353    let hostname = if hostname_is_ipv6 {
354      Component::compile(
355        processed_init.hostname.as_deref(),
356        canonicalize_and_process::canonicalize_ipv6_hostname,
357        parser::Options::hostname(),
358      )?
359      .optionally_transpose_regex_error(report_regex_errors)?
360    } else {
361      Component::compile(
362        processed_init.hostname.as_deref(),
363        canonicalize_and_process::canonicalize_hostname,
364        parser::Options::hostname(),
365      )?
366      .optionally_transpose_regex_error(report_regex_errors)?
367    };
368
369    let compile_options = parser::Options {
370      ignore_case: options.ignore_case,
371      ..Default::default()
372    };
373
374    let pathname = {
375      // Determine if path is non-opaque using the same criteria as process_pathname_init
376      let protocol_is_empty = processed_init
377        .protocol
378        .as_ref()
379        .is_some_and(|p| p.is_empty());
380      let has_leading_slash = processed_init
381        .pathname
382        .as_ref()
383        .is_some_and(|p| p.starts_with('/'));
384      let is_non_opaque = protocol_is_empty
385        || protocol.protocol_component_matches_special_scheme()
386        || has_leading_slash;
387
388      if is_non_opaque {
389        Component::compile(
390          processed_init.pathname.as_deref(),
391          canonicalize_and_process::canonicalize_pathname,
392          parser::Options {
393            ignore_case: options.ignore_case,
394            ..parser::Options::pathname()
395          },
396        )?
397        .optionally_transpose_regex_error(report_regex_errors)?
398      } else {
399        Component::compile(
400          processed_init.pathname.as_deref(),
401          canonicalize_and_process::canonicalize_an_opaque_pathname,
402          compile_options.clone(),
403        )?
404        .optionally_transpose_regex_error(report_regex_errors)?
405      }
406    };
407
408    Ok(UrlPattern {
409      protocol,
410      username: Component::compile(
411        processed_init.username.as_deref(),
412        canonicalize_and_process::canonicalize_username,
413        parser::Options::default(),
414      )?
415      .optionally_transpose_regex_error(report_regex_errors)?,
416      password: Component::compile(
417        processed_init.password.as_deref(),
418        canonicalize_and_process::canonicalize_password,
419        parser::Options::default(),
420      )?
421      .optionally_transpose_regex_error(report_regex_errors)?,
422      hostname,
423      port: Component::compile(
424        processed_init.port.as_deref(),
425        |port| canonicalize_and_process::canonicalize_port(port, None),
426        parser::Options::default(),
427      )?
428      .optionally_transpose_regex_error(report_regex_errors)?,
429      pathname,
430      search: Component::compile(
431        processed_init.search.as_deref(),
432        canonicalize_and_process::canonicalize_search,
433        compile_options.clone(),
434      )?
435      .optionally_transpose_regex_error(report_regex_errors)?,
436      hash: Component::compile(
437        processed_init.hash.as_deref(),
438        canonicalize_and_process::canonicalize_hash,
439        compile_options,
440      )?
441      .optionally_transpose_regex_error(report_regex_errors)?,
442    })
443  }
444
445  /// The pattern used to match against the protocol of the URL.
446  pub fn protocol(&self) -> &str {
447    &self.protocol.pattern_string
448  }
449
450  /// The pattern used to match against the username of the URL.
451  pub fn username(&self) -> &str {
452    &self.username.pattern_string
453  }
454
455  /// The pattern used to match against the password of the URL.
456  pub fn password(&self) -> &str {
457    &self.password.pattern_string
458  }
459
460  /// The pattern used to match against the hostname of the URL.
461  pub fn hostname(&self) -> &str {
462    &self.hostname.pattern_string
463  }
464
465  /// The pattern used to match against the port of the URL.
466  pub fn port(&self) -> &str {
467    &self.port.pattern_string
468  }
469
470  /// The pattern used to match against the pathname of the URL.
471  pub fn pathname(&self) -> &str {
472    &self.pathname.pattern_string
473  }
474
475  /// The pattern used to match against the search string of the URL.
476  pub fn search(&self) -> &str {
477    &self.search.pattern_string
478  }
479
480  /// The pattern used to match against the hash fragment of the URL.
481  pub fn hash(&self) -> &str {
482    &self.hash.pattern_string
483  }
484
485  /// Returns whether the URLPattern contains one or more groups which uses regular expression matching.
486  pub fn has_regexp_groups(&self) -> bool {
487    self.protocol.has_regexp_group
488      || self.username.has_regexp_group
489      || self.password.has_regexp_group
490      || self.hostname.has_regexp_group
491      || self.port.has_regexp_group
492      || self.pathname.has_regexp_group
493      || self.search.has_regexp_group
494      || self.hash.has_regexp_group
495  }
496
497  // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-test
498  /// Test if a given [UrlPatternInput] (with optional base url), matches the
499  /// pattern.
500  pub fn test(&self, input: UrlPatternMatchInput) -> Result<bool, Error> {
501    self.matches(input).map(|res| res.is_some())
502  }
503
504  // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-exec
505  /// Execute the pattern against a [UrlPatternInput] (with optional base url),
506  /// returning a [UrlPatternResult] if the pattern matches. If the pattern
507  /// doesn't match, returns `None`.
508  pub fn exec(
509    &self,
510    input: UrlPatternMatchInput,
511  ) -> Result<Option<UrlPatternResult>, Error> {
512    self.matches(input)
513  }
514
515  // Ref: https://wicg.github.io/urlpattern/#match
516  fn matches(
517    &self,
518    input: UrlPatternMatchInput,
519  ) -> Result<Option<UrlPatternResult>, Error> {
520    let input = match quirks::parse_match_input(input) {
521      Some(input) => input,
522      None => return Ok(None),
523    };
524
525    let protocol_exec_result = self.protocol.matcher.matches(&input.protocol);
526    let username_exec_result = self.username.matcher.matches(&input.username);
527    let password_exec_result = self.password.matcher.matches(&input.password);
528    let hostname_exec_result = self.hostname.matcher.matches(&input.hostname);
529    let port_exec_result = self.port.matcher.matches(&input.port);
530    let pathname_exec_result = self.pathname.matcher.matches(&input.pathname);
531    let search_exec_result = self.search.matcher.matches(&input.search);
532    let hash_exec_result = self.hash.matcher.matches(&input.hash);
533
534    match (
535      protocol_exec_result,
536      username_exec_result,
537      password_exec_result,
538      hostname_exec_result,
539      port_exec_result,
540      pathname_exec_result,
541      search_exec_result,
542      hash_exec_result,
543    ) {
544      (
545        Some(protocol_exec_result),
546        Some(username_exec_result),
547        Some(password_exec_result),
548        Some(hostname_exec_result),
549        Some(port_exec_result),
550        Some(pathname_exec_result),
551        Some(search_exec_result),
552        Some(hash_exec_result),
553      ) => Ok(Some(UrlPatternResult {
554        protocol: self
555          .protocol
556          .create_match_result(input.protocol.clone(), protocol_exec_result),
557        username: self
558          .username
559          .create_match_result(input.username.clone(), username_exec_result),
560        password: self
561          .password
562          .create_match_result(input.password.clone(), password_exec_result),
563        hostname: self
564          .hostname
565          .create_match_result(input.hostname.clone(), hostname_exec_result),
566        port: self
567          .port
568          .create_match_result(input.port.clone(), port_exec_result),
569        pathname: self
570          .pathname
571          .create_match_result(input.pathname.clone(), pathname_exec_result),
572        search: self
573          .search
574          .create_match_result(input.search.clone(), search_exec_result),
575        hash: self
576          .hash
577          .create_match_result(input.hash.clone(), hash_exec_result),
578      })),
579      _ => Ok(None),
580    }
581  }
582}
583
584// Ref: https://wicg.github.io/urlpattern/#hostname-pattern-is-an-ipv6-address
585fn hostname_pattern_is_ipv6_address(input: &str) -> bool {
586  // TODO: code point length
587  if input.len() < 2 {
588    return false;
589  }
590
591  input.starts_with('[') || input.starts_with("{[") || input.starts_with("\\[")
592}
593
594// Ref: https://wicg.github.io/urlpattern/#dictdef-urlpatternresult
595/// A result of a URL pattern match.
596#[derive(Debug, Clone, PartialEq, Eq)]
597pub struct UrlPatternResult {
598  pub protocol: UrlPatternComponentResult,
599  pub username: UrlPatternComponentResult,
600  pub password: UrlPatternComponentResult,
601  pub hostname: UrlPatternComponentResult,
602  pub port: UrlPatternComponentResult,
603  pub pathname: UrlPatternComponentResult,
604  pub search: UrlPatternComponentResult,
605  pub hash: UrlPatternComponentResult,
606}
607
608// Ref: https://wicg.github.io/urlpattern/#dictdef-urlpatterncomponentresult
609/// A result of a URL pattern match on a single component.
610#[derive(Debug, Clone, PartialEq, Eq)]
611pub struct UrlPatternComponentResult {
612  /// The matched input for this component.
613  pub input: String,
614  /// The values for all named groups in the pattern.
615  pub groups: std::collections::HashMap<String, Option<String>>,
616}
617
618#[cfg(test)]
619mod tests {
620  use regex::Regex;
621  use std::collections::HashMap;
622
623  use serde::Deserialize;
624  use serde::Serialize;
625  use url::Url;
626
627  use crate::quirks;
628  use crate::quirks::StringOrInit;
629  use crate::UrlPatternComponentResult;
630  use crate::UrlPatternOptions;
631  use crate::UrlPatternResult;
632
633  use super::UrlPattern;
634  use super::UrlPatternInit;
635
636  #[derive(Debug, Deserialize)]
637  #[serde(untagged)]
638  #[allow(clippy::large_enum_variant)]
639  enum ExpectedMatch {
640    String(String),
641    MatchResult(MatchResult),
642  }
643
644  #[derive(Debug, Deserialize)]
645  struct ComponentResult {
646    input: String,
647    groups: HashMap<String, Option<String>>,
648  }
649
650  #[allow(clippy::large_enum_variant)]
651  #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
652  #[serde(untagged)]
653  pub enum StringOrInitOrOptions {
654    Options(UrlPatternOptions),
655    StringOrInit(quirks::StringOrInit),
656  }
657
658  #[derive(Debug, Deserialize)]
659  struct TestCase {
660    skip: Option<String>,
661    pattern: Vec<StringOrInitOrOptions>,
662    #[serde(default)]
663    inputs: Vec<quirks::StringOrInit>,
664    expected_obj: Option<quirks::StringOrInit>,
665    expected_match: Option<ExpectedMatch>,
666    #[serde(default)]
667    exactly_empty_components: Vec<String>,
668  }
669
670  #[derive(Debug, Deserialize)]
671  struct MatchResult {
672    #[serde(deserialize_with = "deserialize_match_result_inputs")]
673    #[serde(default)]
674    inputs: Option<(quirks::StringOrInit, Option<String>)>,
675
676    protocol: Option<ComponentResult>,
677    username: Option<ComponentResult>,
678    password: Option<ComponentResult>,
679    hostname: Option<ComponentResult>,
680    port: Option<ComponentResult>,
681    pathname: Option<ComponentResult>,
682    search: Option<ComponentResult>,
683    hash: Option<ComponentResult>,
684  }
685
686  fn deserialize_match_result_inputs<'de, D>(
687    deserializer: D,
688  ) -> Result<Option<(quirks::StringOrInit, Option<String>)>, D::Error>
689  where
690    D: serde::Deserializer<'de>,
691  {
692    #[derive(Debug, Deserialize)]
693    #[serde(untagged)]
694    enum MatchResultInputs {
695      OneArgument((quirks::StringOrInit,)),
696      TwoArguments(quirks::StringOrInit, String),
697    }
698
699    let res = Option::<MatchResultInputs>::deserialize(deserializer)?;
700    Ok(match res {
701      Some(MatchResultInputs::OneArgument((a,))) => Some((a, None)),
702      Some(MatchResultInputs::TwoArguments(a, b)) => Some((a, Some(b))),
703      None => None,
704    })
705  }
706
707  fn test_case(case: TestCase) {
708    let mut input = quirks::StringOrInit::Init(Default::default());
709    let mut base_url = None;
710    let mut options = None;
711
712    for (i, pattern_input) in case.pattern.into_iter().enumerate() {
713      match pattern_input {
714        StringOrInitOrOptions::StringOrInit(str_or_init) => {
715          if i == 0 {
716            input = str_or_init;
717          } else if i == 1 {
718            base_url = match str_or_init {
719              StringOrInit::String(str) => Some(str.clone()),
720              StringOrInit::Init(_) => None,
721            };
722          } else if matches!(&case.expected_obj, Some(StringOrInit::String(s)) if s == "error")
723          {
724            println!("Expected not to pass due to bad parameters");
725            println!("✅ Passed");
726            return;
727          } else {
728            panic!("Failed to parse testcase");
729          }
730        }
731        StringOrInitOrOptions::Options(opts) => {
732          options = Some(opts);
733        }
734      }
735    }
736
737    println!("\n=====");
738    println!(
739      "Pattern: {}, {}",
740      serde_json::to_string(&input).unwrap(),
741      serde_json::to_string(&base_url).unwrap()
742    );
743    if let Some(options) = &options {
744      println!("Options: {}", serde_json::to_string(&options).unwrap(),);
745    }
746
747    if let Some(reason) = case.skip {
748      println!("🟠 Skipping: {reason}");
749      return;
750    }
751
752    let init_res = quirks::process_construct_pattern_input(
753      input.clone(),
754      base_url.as_deref(),
755    );
756
757    let res = init_res.and_then(|init_res| {
758      UrlPattern::<Regex>::parse(init_res, options.unwrap_or_default())
759    });
760    let expected_obj = match case.expected_obj {
761      Some(StringOrInit::String(s)) if s == "error" => {
762        assert!(res.is_err());
763        println!("✅ Passed");
764        return;
765      }
766      Some(StringOrInit::String(_)) => unreachable!(),
767      Some(StringOrInit::Init(init)) => {
768        let base_url = init.base_url.map(|url| url.parse().unwrap());
769        UrlPatternInit {
770          protocol: init.protocol,
771          username: init.username,
772          password: init.password,
773          hostname: init.hostname,
774          port: init.port,
775          pathname: init.pathname,
776          search: init.search,
777          hash: init.hash,
778          base_url,
779        }
780      }
781      None => UrlPatternInit::default(),
782    };
783    let pattern = res.expect("failed to parse pattern");
784
785    if let StringOrInit::Init(quirks::UrlPatternInit {
786      base_url: Some(url),
787      ..
788    }) = &input
789    {
790      base_url = Some(url.clone())
791    }
792
793    macro_rules! assert_field {
794      ($field:ident) => {{
795        let mut expected = expected_obj.$field;
796        if expected == None {
797          if case
798            .exactly_empty_components
799            .contains(&stringify!($field).to_owned())
800          {
801            expected = Some(String::new())
802          } else if let StringOrInit::Init(quirks::UrlPatternInit {
803            $field: Some($field),
804            ..
805          }) = &input
806          {
807            expected = Some($field.to_owned())
808          } else if {
809            if let StringOrInit::Init(init) = &input {
810              match stringify!($field) {
811                "protocol" => false,
812                "hostname" => init.protocol.is_some(),
813                "port" => init.protocol.is_some() || init.hostname.is_some(),
814                "username" => false,
815                "password" => false,
816                "pathname" => {
817                  init.protocol.is_some()
818                    || init.hostname.is_some()
819                    || init.port.is_some()
820                }
821                "search" => {
822                  init.protocol.is_some()
823                    || init.hostname.is_some()
824                    || init.port.is_some()
825                    || init.pathname.is_some()
826                }
827                "hash" => {
828                  init.protocol.is_some()
829                    || init.hostname.is_some()
830                    || init.port.is_some()
831                    || init.pathname.is_some()
832                    || init.search.is_some()
833                }
834                _ => unreachable!(),
835              }
836            } else {
837              false
838            }
839          } {
840            expected = Some("*".to_owned())
841          } else if let Some(base_url) =
842            base_url.as_ref().and_then(|base_url| {
843              if !matches!(stringify!($field), "username" | "password") {
844                Some(base_url)
845              } else {
846                None
847              }
848            })
849          {
850            let base_url = Url::parse(base_url).unwrap();
851            let field = url::quirks::$field(&base_url);
852            let field: String = match stringify!($field) {
853              "protocol" if !field.is_empty() => {
854                field[..field.len() - 1].to_owned()
855              }
856              "search" | "hash" if !field.is_empty() => field[1..].to_owned(),
857              _ => field.to_owned(),
858            };
859            expected = Some(field)
860          } else {
861            expected = Some("*".to_owned())
862          }
863        }
864
865        let expected = expected.unwrap();
866        let pattern = &pattern.$field.pattern_string;
867
868        assert_eq!(
869          &expected,
870          pattern,
871          "pattern for {} does not match",
872          stringify!($field)
873        );
874      }};
875    }
876
877    assert_field!(protocol);
878    assert_field!(username);
879    assert_field!(password);
880    assert_field!(hostname);
881    assert_field!(port);
882    assert_field!(pathname);
883    assert_field!(search);
884    assert_field!(hash);
885
886    let input = case.inputs.first().cloned();
887    let base_url = case.inputs.get(1).map(|input| match input {
888      StringOrInit::String(str) => str.clone(),
889      StringOrInit::Init(_) => unreachable!(),
890    });
891
892    println!(
893      "Input: {}, {}",
894      serde_json::to_string(&input).unwrap(),
895      serde_json::to_string(&base_url).unwrap(),
896    );
897
898    let input = input.unwrap_or_else(|| StringOrInit::Init(Default::default()));
899
900    let expected_input = (input.clone(), base_url.clone());
901
902    let match_input = quirks::process_match_input(input, base_url.as_deref());
903
904    if let Some(ExpectedMatch::String(s)) = &case.expected_match {
905      if s == "error" {
906        assert!(match_input.is_err());
907        println!("✅ Passed");
908        return;
909      }
910    };
911
912    let input = match_input.expect("failed to parse match input");
913
914    if input.is_none() {
915      assert!(case.expected_match.is_none());
916      println!("✅ Passed");
917      return;
918    }
919    let test_res = if let Some((input, _)) = input.clone() {
920      pattern.test(input)
921    } else {
922      Ok(false)
923    };
924    let exec_res = if let Some((input, _)) = input.clone() {
925      pattern.exec(input)
926    } else {
927      Ok(None)
928    };
929    if let Some(ExpectedMatch::String(s)) = &case.expected_match {
930      if s == "error" {
931        assert!(test_res.is_err());
932        assert!(exec_res.is_err());
933        println!("✅ Passed");
934        return;
935      }
936    };
937
938    let expected_match = case.expected_match.map(|x| match x {
939      ExpectedMatch::String(_) => unreachable!(),
940      ExpectedMatch::MatchResult(x) => x,
941    });
942
943    let test = test_res.unwrap();
944    let actual_match = exec_res.unwrap();
945
946    assert_eq!(
947      expected_match.is_some(),
948      test,
949      "pattern.test result is not correct"
950    );
951
952    let expected_match = match expected_match {
953      Some(x) => x,
954      None => {
955        assert!(actual_match.is_none(), "expected match to be None");
956        println!("✅ Passed");
957        return;
958      }
959    };
960
961    let actual_match = actual_match.expect("expected match to be Some");
962
963    let expected_inputs = expected_match.inputs.unwrap_or(expected_input);
964
965    let (_, inputs) = input.unwrap();
966
967    assert_eq!(inputs, expected_inputs, "expected inputs to be identical");
968
969    let exactly_empty_components = case.exactly_empty_components;
970
971    macro_rules! convert_result {
972      ($component:ident) => {
973        expected_match
974          .$component
975          .map(|c| UrlPatternComponentResult {
976            input: c.input,
977            groups: c.groups,
978          })
979          .unwrap_or_else(|| {
980            let mut groups = HashMap::new();
981            if !exactly_empty_components
982              .contains(&stringify!($component).to_owned())
983            {
984              groups.insert("0".to_owned(), Some("".to_owned()));
985            }
986            UrlPatternComponentResult {
987              input: "".to_owned(),
988              groups,
989            }
990          })
991      };
992    }
993
994    let expected_result = UrlPatternResult {
995      protocol: convert_result!(protocol),
996      username: convert_result!(username),
997      password: convert_result!(password),
998      hostname: convert_result!(hostname),
999      port: convert_result!(port),
1000      pathname: convert_result!(pathname),
1001      search: convert_result!(search),
1002      hash: convert_result!(hash),
1003    };
1004
1005    assert_eq!(
1006      actual_match, expected_result,
1007      "pattern.exec result is not correct"
1008    );
1009
1010    println!("✅ Passed");
1011  }
1012
1013  #[test]
1014  fn test_cases() {
1015    let testdata = include_str!("./testdata/urlpatterntestdata.json");
1016    let cases: Vec<TestCase> = serde_json::from_str(testdata).unwrap();
1017    for case in cases {
1018      test_case(case);
1019    }
1020  }
1021
1022  #[test]
1023  fn issue26() {
1024    UrlPattern::<Regex>::parse(
1025      UrlPatternInit {
1026        pathname: Some("/:foo.".to_owned()),
1027        ..Default::default()
1028      },
1029      Default::default(),
1030    )
1031    .unwrap();
1032  }
1033
1034  #[test]
1035  fn issue46() {
1036    quirks::process_construct_pattern_input(
1037      quirks::StringOrInit::String(":café://:foo".to_owned()),
1038      None,
1039    )
1040    .unwrap();
1041  }
1042
1043  #[test]
1044  fn has_regexp_group() {
1045    let pattern = <UrlPattern>::parse(
1046      UrlPatternInit {
1047        pathname: Some("/:foo.".to_owned()),
1048        ..Default::default()
1049      },
1050      Default::default(),
1051    )
1052    .unwrap();
1053    assert!(!pattern.has_regexp_groups());
1054
1055    let pattern = <UrlPattern>::parse(
1056      UrlPatternInit {
1057        pathname: Some("/(.*?)".to_owned()),
1058        ..Default::default()
1059      },
1060      Default::default(),
1061    )
1062    .unwrap();
1063    assert!(pattern.has_regexp_groups());
1064  }
1065
1066  #[test]
1067  fn issue54() {
1068    let pattern = <UrlPattern>::parse(
1069      UrlPatternInit {
1070        pathname: Some("/:thereisa\u{30FB}middledot.".to_owned()),
1071        ..Default::default()
1072      },
1073      Default::default(),
1074    )
1075    .unwrap();
1076    assert_eq!(
1077      pattern.pathname.group_name_list,
1078      vec!["thereisa\u{30FB}middledot"]
1079    );
1080  }
1081
1082  #[test]
1083  fn issue61() {
1084    // Test case for https://github.com/denoland/deno/issues/29935
1085    // Custom protocols should not escape colons and slashes in pattern pathnames
1086
1087    // Test using init with pattern components
1088    let pattern = <UrlPattern>::parse(
1089      UrlPatternInit {
1090        protocol: Some("myhttp".to_string()),
1091        hostname: Some("example.com".to_string()),
1092        pathname: Some("/:directory/:file".to_string()),
1093        ..Default::default()
1094      },
1095      Default::default(),
1096    )
1097    .unwrap();
1098
1099    println!("Pattern: {pattern:?}");
1100    println!("Protocol: {}", pattern.protocol());
1101    println!("Hostname: {}", pattern.hostname());
1102    println!("Pathname: {}", pattern.pathname());
1103
1104    // The pathname should be "/:directory/:file", not "%2F:directory%2F:file"
1105    assert_eq!(pattern.pathname().to_string(), "/:directory/:file");
1106
1107    // Also test myfile:///test case - empty hostname with leading slash
1108    let myfile_pattern = <UrlPattern>::parse(
1109      UrlPatternInit {
1110        protocol: Some("myfile".to_string()),
1111        hostname: Some("".to_string()), // empty hostname
1112        pathname: Some("/test".to_string()),
1113        ..Default::default()
1114      },
1115      Default::default(),
1116    )
1117    .unwrap();
1118
1119    println!("\nMyfile pattern pathname: {}", myfile_pattern.pathname());
1120    // Should use non-opaque canonicalization because of leading slash
1121    assert_eq!(myfile_pattern.pathname().to_string(), "/test");
1122  }
1123
1124  #[test]
1125  fn issue72() {
1126    let _ = <UrlPattern>::parse(
1127      UrlPatternInit {
1128        pathname: Some("\\\n*\0".to_string()),
1129        ..Default::default()
1130      },
1131      Default::default(),
1132    )
1133    .unwrap();
1134  }
1135}