url_normalize/options.rs
1/// Options for URL normalization.
2///
3/// All options have sensible defaults matching the behavior of the original
4/// `normalize-url` npm package.
5pub struct Options {
6 /// Default protocol to prepend if missing.
7 ///
8 /// Default: `Protocol::Http`
9 pub default_protocol: Protocol,
10
11 /// Additional protocols to normalize (beyond http, https, file, data).
12 /// Protocols should be specified without `:`.
13 ///
14 /// Default: `vec![]`
15 pub custom_protocols: Vec<String>,
16
17 /// Prepend `default_protocol` to protocol-relative URLs.
18 ///
19 /// Default: `true`
20 pub normalize_protocol: bool,
21
22 /// Normalize HTTPS to HTTP.
23 ///
24 /// Default: `false`
25 pub force_http: bool,
26
27 /// Normalize HTTP to HTTPS. Cannot be used with `force_http`.
28 ///
29 /// Default: `false`
30 pub force_https: bool,
31
32 /// Strip the authentication part of the URL.
33 ///
34 /// Default: `true`
35 pub strip_authentication: bool,
36
37 /// Strip the hash/fragment part of the URL.
38 ///
39 /// Default: `false`
40 pub strip_hash: bool,
41
42 /// Remove the protocol from the URL.
43 ///
44 /// Default: `false`
45 pub strip_protocol: bool,
46
47 /// Strip the text fragment part of the URL (`#:~:text=...`).
48 ///
49 /// Default: `true`
50 pub strip_text_fragment: bool,
51
52 /// Remove `www.` from the URL.
53 ///
54 /// Default: `true`
55 pub strip_www: bool,
56
57 /// Controls removal of query parameters.
58 ///
59 /// Default: `RemoveQueryParameters::List` with a single filter matching `utm_*`
60 pub remove_query_parameters: RemoveQueryParameters,
61
62 /// If set, only keep query parameters matching these filters.
63 /// Overrides `remove_query_parameters`.
64 ///
65 /// Default: `None`
66 pub keep_query_parameters: Option<Vec<QueryFilter>>,
67
68 /// Remove trailing slash from the path.
69 ///
70 /// Default: `true`
71 pub remove_trailing_slash: bool,
72
73 /// Remove a sole `/` pathname in the output.
74 ///
75 /// Default: `true`
76 pub remove_single_slash: bool,
77
78 /// Remove directory index files matching the given filters.
79 ///
80 /// Default: `RemoveDirectoryIndex::None`
81 pub remove_directory_index: RemoveDirectoryIndex,
82
83 /// Remove explicit port numbers.
84 ///
85 /// Default: `false`
86 pub remove_explicit_port: bool,
87
88 /// Sort query parameters alphabetically by key.
89 ///
90 /// Default: `true`
91 pub sort_query_parameters: bool,
92
93 /// Controls how empty query parameter values are formatted.
94 ///
95 /// Default: `EmptyQueryValue::Preserve`
96 pub empty_query_value: EmptyQueryValue,
97
98 /// Remove the entire URL path, leaving only the domain.
99 ///
100 /// Default: `false`
101 pub remove_path: bool,
102
103 /// Custom function to transform path components.
104 ///
105 /// Default: `None`
106 pub transform_path: Option<TransformPathFn>,
107}
108
109/// Type alias for the transform_path callback.
110pub type TransformPathFn = Box<dyn Fn(Vec<String>) -> Vec<String>>;
111
112/// Default protocol for URLs without a scheme.
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub enum Protocol {
115 Http,
116 Https,
117}
118
119/// A filter for matching query parameter keys or path components.
120///
121/// Can be either an exact string match or a closure-based predicate.
122pub enum QueryFilter {
123 /// Match the parameter key exactly.
124 Exact(String),
125 /// Match using a predicate function. For regex matching,
126 /// users can bring their own regex crate.
127 Predicate(Box<dyn Fn(&str) -> bool>),
128}
129
130impl QueryFilter {
131 /// Test whether a parameter name matches this filter.
132 pub fn matches(&self, name: &str) -> bool {
133 match self {
134 QueryFilter::Exact(s) => s == name,
135 QueryFilter::Predicate(f) => f(name),
136 }
137 }
138}
139
140/// Controls whether query parameters will be removed.
141pub enum RemoveQueryParameters {
142 /// No query parameters will be removed (disabled).
143 None,
144 /// All query parameters will be removed.
145 All,
146 /// Only query parameters matching any of the provided filters will be removed.
147 List(Vec<QueryFilter>),
148}
149
150/// Controls whether directory index files will be removed from the path.
151pub enum RemoveDirectoryIndex {
152 /// No directory indices will be removed.
153 None,
154 /// Use the default pattern: `index.*`
155 Default,
156 /// Only directory indices matching any of the provided filters will be removed.
157 List(Vec<QueryFilter>),
158}
159
160/// Controls how query parameters with empty values are formatted.
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum EmptyQueryValue {
163 /// Keep the original format (`?key` stays `?key`, `?key=` stays `?key=`).
164 Preserve,
165 /// Always include `=` for empty values (`?key` becomes `?key=`).
166 Always,
167 /// Never include `=` for empty values (`?key=` becomes `?key`).
168 Never,
169}
170
171impl Default for Options {
172 fn default() -> Self {
173 Options {
174 default_protocol: Protocol::Http,
175 custom_protocols: vec![],
176 normalize_protocol: true,
177 force_http: false,
178 force_https: false,
179 strip_authentication: true,
180 strip_hash: false,
181 strip_protocol: false,
182 strip_text_fragment: true,
183 strip_www: true,
184 remove_query_parameters: RemoveQueryParameters::List(vec![QueryFilter::Predicate(
185 Box::new(|key: &str| {
186 key.len() >= 4
187 && key.is_char_boundary(4)
188 && key[..4].eq_ignore_ascii_case("utm_")
189 }),
190 )]),
191 keep_query_parameters: None,
192 remove_trailing_slash: true,
193 remove_single_slash: true,
194 remove_directory_index: RemoveDirectoryIndex::None,
195 remove_explicit_port: false,
196 sort_query_parameters: true,
197 empty_query_value: EmptyQueryValue::Preserve,
198 remove_path: false,
199 transform_path: None,
200 }
201 }
202}