Skip to main content

url_normalize/
options.rs

1/// Options for URL normalization.
2///
3/// All options have sensible defaults matching the behavior of the original
4/// `normalize-url` npm package.
5pub struct Options {
6    /// Default protocol to prepend if missing.
7    ///
8    /// Default: `Protocol::Http`
9    pub default_protocol: Protocol,
10
11    /// Additional protocols to normalize (beyond http, https, file, data).
12    /// Protocols should be specified without `:`.
13    ///
14    /// Default: `vec![]`
15    pub custom_protocols: Vec<String>,
16
17    /// Prepend `default_protocol` to protocol-relative URLs.
18    ///
19    /// Default: `true`
20    pub normalize_protocol: bool,
21
22    /// Normalize HTTPS to HTTP.
23    ///
24    /// Default: `false`
25    pub force_http: bool,
26
27    /// Normalize HTTP to HTTPS. Cannot be used with `force_http`.
28    ///
29    /// Default: `false`
30    pub force_https: bool,
31
32    /// Strip the authentication part of the URL.
33    ///
34    /// Default: `true`
35    pub strip_authentication: bool,
36
37    /// Strip the hash/fragment part of the URL.
38    ///
39    /// Default: `false`
40    pub strip_hash: bool,
41
42    /// Remove the protocol from the URL.
43    ///
44    /// Default: `false`
45    pub strip_protocol: bool,
46
47    /// Strip the text fragment part of the URL (`#:~:text=...`).
48    ///
49    /// Default: `true`
50    pub strip_text_fragment: bool,
51
52    /// Remove `www.` from the URL.
53    ///
54    /// Default: `true`
55    pub strip_www: bool,
56
57    /// Controls removal of query parameters.
58    ///
59    /// Default: `RemoveQueryParameters::List` with a single filter matching `utm_*`
60    pub remove_query_parameters: RemoveQueryParameters,
61
62    /// If set, only keep query parameters matching these filters.
63    /// Overrides `remove_query_parameters`.
64    ///
65    /// Default: `None`
66    pub keep_query_parameters: Option<Vec<QueryFilter>>,
67
68    /// Remove trailing slash from the path.
69    ///
70    /// Default: `true`
71    pub remove_trailing_slash: bool,
72
73    /// Remove a sole `/` pathname in the output.
74    ///
75    /// Default: `true`
76    pub remove_single_slash: bool,
77
78    /// Remove directory index files matching the given filters.
79    ///
80    /// Default: `RemoveDirectoryIndex::None`
81    pub remove_directory_index: RemoveDirectoryIndex,
82
83    /// Remove explicit port numbers.
84    ///
85    /// Default: `false`
86    pub remove_explicit_port: bool,
87
88    /// Sort query parameters alphabetically by key.
89    ///
90    /// Default: `true`
91    pub sort_query_parameters: bool,
92
93    /// Controls how empty query parameter values are formatted.
94    ///
95    /// Default: `EmptyQueryValue::Preserve`
96    pub empty_query_value: EmptyQueryValue,
97
98    /// Remove the entire URL path, leaving only the domain.
99    ///
100    /// Default: `false`
101    pub remove_path: bool,
102
103    /// Custom function to transform path components.
104    ///
105    /// Default: `None`
106    pub transform_path: Option<TransformPathFn>,
107}
108
109/// Type alias for the transform_path callback.
110pub type TransformPathFn = Box<dyn Fn(Vec<String>) -> Vec<String>>;
111
112/// Default protocol for URLs without a scheme.
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub enum Protocol {
115    Http,
116    Https,
117}
118
119/// A filter for matching query parameter keys or path components.
120///
121/// Can be either an exact string match or a closure-based predicate.
122pub enum QueryFilter {
123    /// Match the parameter key exactly.
124    Exact(String),
125    /// Match using a predicate function. For regex matching,
126    /// users can bring their own regex crate.
127    Predicate(Box<dyn Fn(&str) -> bool>),
128}
129
130impl QueryFilter {
131    /// Test whether a parameter name matches this filter.
132    pub fn matches(&self, name: &str) -> bool {
133        match self {
134            QueryFilter::Exact(s) => s == name,
135            QueryFilter::Predicate(f) => f(name),
136        }
137    }
138}
139
140/// Controls whether query parameters will be removed.
141pub enum RemoveQueryParameters {
142    /// No query parameters will be removed (disabled).
143    None,
144    /// All query parameters will be removed.
145    All,
146    /// Only query parameters matching any of the provided filters will be removed.
147    List(Vec<QueryFilter>),
148}
149
150/// Controls whether directory index files will be removed from the path.
151pub enum RemoveDirectoryIndex {
152    /// No directory indices will be removed.
153    None,
154    /// Use the default pattern: `index.*`
155    Default,
156    /// Only directory indices matching any of the provided filters will be removed.
157    List(Vec<QueryFilter>),
158}
159
160/// Controls how query parameters with empty values are formatted.
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum EmptyQueryValue {
163    /// Keep the original format (`?key` stays `?key`, `?key=` stays `?key=`).
164    Preserve,
165    /// Always include `=` for empty values (`?key` becomes `?key=`).
166    Always,
167    /// Never include `=` for empty values (`?key=` becomes `?key`).
168    Never,
169}
170
171impl Default for Options {
172    fn default() -> Self {
173        Options {
174            default_protocol: Protocol::Http,
175            custom_protocols: vec![],
176            normalize_protocol: true,
177            force_http: false,
178            force_https: false,
179            strip_authentication: true,
180            strip_hash: false,
181            strip_protocol: false,
182            strip_text_fragment: true,
183            strip_www: true,
184            remove_query_parameters: RemoveQueryParameters::List(vec![QueryFilter::Predicate(
185                Box::new(|key: &str| {
186                    key.len() >= 4
187                        && key.is_char_boundary(4)
188                        && key[..4].eq_ignore_ascii_case("utm_")
189                }),
190            )]),
191            keep_query_parameters: None,
192            remove_trailing_slash: true,
193            remove_single_slash: true,
194            remove_directory_index: RemoveDirectoryIndex::None,
195            remove_explicit_port: false,
196            sort_query_parameters: true,
197            empty_query_value: EmptyQueryValue::Preserve,
198            remove_path: false,
199            transform_path: None,
200        }
201    }
202}