Skip to main content

lychee_lib/
remap.rs

1//! Remap rules which allow to map URLs matching a pattern to a different
2//! URL.
3//!
4//! # Notes
5//! Use in moderation as there are no sanity or performance guarantees.
6//!
7//! - There is no constraint on remap rules upon instantiation or during
8//!   remapping. In particular, rules are checked sequentially so later rules
9//!   might contradict with earlier ones if they both match a URL.
10//! - A large rule set has a performance impact because the client needs to
11//!   match every link against all rules.
12
13// Notes on terminology:
14// The major difference between URI (Uniform Resource Identifier) and
15// URL (Uniform Resource Locator) is that the former is an identifier for
16// resources and the latter is a locator.
17// We are not interested in differentiating resources by names and the purpose of
18// remapping is to provide an alternative **location** in certain
19// circumanstances. Thus the documentation should be about remapping URLs
20// (locations), not remapping URIs (identities).
21
22use std::{fmt::Display, ops::Index};
23
24use regex::Regex;
25use serde::Serialize;
26use url::Url;
27
28use crate::{ErrorKind, Result, Uri};
29
30/// Records a single [`Uri`] remapping
31#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
32pub struct Remap {
33    /// The original [`Uri`] before remapping
34    pub original: Uri,
35    /// The new [`Uri`] after applying [`Remaps`]
36    pub new: Uri,
37}
38
39impl Display for Remap {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        write!(f, "{} --> {}", self.original, self.new)
42    }
43}
44
45/// Rules that remap matching URL patterns.
46///
47/// Some use-cases are:
48/// - Testing URLs prior to production deployment.
49/// - Testing URLs behind a proxy.
50///
51/// # Notes
52/// See module level documentation of usage notes.
53#[derive(Debug, Clone)]
54pub struct Remaps {
55    patterns: Vec<(Regex, String)>,
56}
57
58impl Remaps {
59    /// Create a new remapper
60    #[must_use]
61    pub const fn new(patterns: Vec<(Regex, String)>) -> Self {
62        Self { patterns }
63    }
64
65    /// Returns an iterator over the rules.
66    // `iter_mut` is deliberately avoided.
67    pub fn iter(&self) -> std::slice::Iter<'_, (Regex, String)> {
68        self.patterns.iter()
69    }
70
71    /// Remap [`Uri`] as a side-effect, using the given patterns.
72    /// Return `None` if no rule matches.
73    /// Return `Some` if any rule applied.
74    ///
75    /// # Errors
76    ///
77    /// Returns an `Err` if the remap rule produces an invalid URL.
78    #[must_use = "Remapped URLs must be used"]
79    pub fn remap(&self, original: &Uri) -> Result<Option<Remap>> {
80        for (pattern, replacement) in self {
81            if pattern.is_match(original.as_str()) {
82                let new = pattern.replace_all(original.as_str(), replacement);
83                let new = Url::parse(&new).map_err(|_| {
84                    ErrorKind::InvalidUrlRemap(format!("the result `{new}` is not a valid URL"))
85                })?;
86
87                let remap = Remap {
88                    original: original.clone(),
89                    new: Uri { url: new },
90                };
91                return Ok(Some(remap));
92            }
93        }
94
95        Ok(None)
96    }
97
98    /// Returns `true` if there is no remap rule defined.
99    #[must_use]
100    pub const fn is_empty(&self) -> bool {
101        self.patterns.is_empty()
102    }
103
104    /// Get the number of remap rules.
105    #[must_use]
106    pub const fn len(&self) -> usize {
107        self.patterns.len()
108    }
109}
110
111impl Index<usize> for Remaps {
112    type Output = (Regex, String);
113
114    fn index(&self, index: usize) -> &(regex::Regex, String) {
115        &self.patterns[index]
116    }
117}
118
119impl TryFrom<&[String]> for Remaps {
120    type Error = ErrorKind;
121
122    /// Try to convert a slice of `String`s to remap rules.
123    ///
124    /// Each string should contain a Regex pattern and a URL, separated by
125    /// whitespaces.
126    ///
127    /// # Errors
128    ///
129    /// Returns an `Err` if:
130    /// - Any string in the slice is not of the form `REGEX URL`.
131    /// - REGEX is not a valid regular expression.
132    /// - URL is not a valid URL.
133    fn try_from(remaps: &[String]) -> std::result::Result<Self, Self::Error> {
134        let mut parsed = Vec::new();
135
136        for remap in remaps {
137            let params: Vec<_> = remap.split_whitespace().collect();
138            if params.len() != 2 {
139                return Err(ErrorKind::InvalidUrlRemap(format!(
140                    "Cannot parse into URI remap rule, must be a Regex pattern and a URL separated by whitespaces: {remap}"
141                )));
142            }
143
144            let pattern = Regex::new(params[0])?;
145            let replacement = params[1].to_string();
146            parsed.push((pattern, replacement));
147        }
148
149        Ok(Remaps::new(parsed))
150    }
151}
152
153// Implementation for mutable iterator and moving iterator are deliberately
154// avoided
155impl<'a> IntoIterator for &'a Remaps {
156    type Item = &'a (Regex, String);
157
158    type IntoIter = std::slice::Iter<'a, (Regex, String)>;
159
160    fn into_iter(self) -> Self::IntoIter {
161        self.patterns.iter()
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use url::Url;
168
169    use super::*;
170
171    #[test]
172    fn test_remap() {
173        let input = "https://example.com";
174        let input_url = Uri::try_from(input).unwrap();
175        let input_pattern = Regex::new(input).unwrap();
176        let replacement = "http://127.0.0.1:8080";
177        let remaps = Remaps::new(vec![(input_pattern, replacement.to_string())]);
178
179        let output = remaps.remap(&input_url).unwrap();
180
181        assert_eq!(
182            output,
183            Some(Remap {
184                new: Uri::try_from(replacement).unwrap(),
185                original: input_url
186            })
187        );
188    }
189
190    #[test]
191    fn test_remap_path() {
192        let input = Uri::try_from("file://../../issues").unwrap();
193        let input_pattern = Regex::new(".*?../../issues").unwrap();
194        let replacement = Uri::try_from("https://example.com").unwrap();
195        let remaps = Remaps::new(vec![(input_pattern, replacement.to_string())]);
196
197        let output = remaps.remap(&input).unwrap();
198
199        assert_eq!(
200            output,
201            Some(Remap {
202                new: replacement,
203                original: input
204            })
205        );
206    }
207
208    #[test]
209    fn test_remap_skip() {
210        let input = Uri::try_from("https://unrelated.example.com").unwrap();
211        let pattern = Regex::new("https://example.com").unwrap();
212        let replacement = Uri::try_from("http://127.0.0.1:8080").unwrap();
213        let remaps = Remaps::new(vec![(pattern, replacement.to_string())]);
214
215        let output = remaps.remap(&input).unwrap();
216        assert_eq!(output, None); // URL was not remapped
217    }
218
219    #[test]
220    fn test_remap_url_to_file() {
221        let pattern = Regex::new("https://docs.example.org").unwrap();
222        let replacement = "file:///Users/user/code/repo/docs/_site";
223        let remaps = Remaps::new(vec![(pattern, replacement.to_string())]);
224
225        let tests = [
226            (
227                "https://docs.example.org/integrations/distcp.html",
228                "file:///Users/user/code/repo/docs/_site/integrations/distcp.html",
229            ),
230            (
231                "https://docs.example.org/howto/import.html#working-with-imported-data",
232                "file:///Users/user/code/repo/docs/_site/howto/import.html#working-with-imported-data",
233            ),
234            (
235                "https://docs.example.org/howto/garbage-collection-committed.html",
236                "file:///Users/user/code/repo/docs/_site/howto/garbage-collection-committed.html",
237            ),
238        ];
239
240        for (input, expected) in tests {
241            let input = Uri::try_from(input).unwrap();
242            let output = remaps.remap(&input).unwrap();
243            assert_eq!(output.unwrap().new, Uri::try_from(expected).unwrap());
244        }
245    }
246
247    /// This is a partial remap, i.e. the URL is not fully replaced but only
248    /// part of it. The parts to be replaced are defined by the regex pattern
249    /// using capture groups.
250    #[test]
251    fn test_remap_capture_group() {
252        let input = Uri::try_from("https://example.com/1/2/3").unwrap();
253        let input_pattern = Regex::new("https://example.com/.*?/(.*?)/.*").unwrap();
254        let replacement = Uri::try_from("https://example.com/foo/$1/bar").unwrap();
255
256        let remaps = Remaps::new(vec![(input_pattern, replacement.to_string())]);
257
258        let output = remaps.remap(&input).unwrap();
259
260        assert_eq!(
261            output,
262            Some(Remap {
263                new: Uri::try_from("https://example.com/foo/2/bar").unwrap(),
264                original: input
265            })
266        );
267    }
268
269    #[test]
270    fn test_remap_named_capture() {
271        let input = Uri::try_from("https://example.com/1/2/3").unwrap();
272        let input_pattern = Regex::new("https://example.com/.*?/(?P<foo>.*?)/.*").unwrap();
273        let replacement = Uri::try_from("https://example.com/foo/$foo/bar").unwrap();
274
275        let remaps = Remaps::new(vec![(input_pattern, replacement.to_string())]);
276
277        let output = remaps.remap(&input).unwrap();
278
279        assert_eq!(
280            output,
281            Some(Remap {
282                new: Uri::try_from("https://example.com/foo/2/bar").unwrap(),
283                original: input
284            })
285        );
286    }
287
288    #[test]
289    fn test_remap_named_capture_shorthand() {
290        let input = Uri::try_from("https://example.com/1/2/3").unwrap();
291        #[allow(clippy::invalid_regex)]
292        // Clippy acts up here, but this syntax is actually valid
293        // See https://docs.rs/regex/latest/regex/index.html#grouping-and-flags
294        let input_pattern = Regex::new(r"https://example.com/.*?/(?<foo>.*?)/.*").unwrap();
295        let replacement = Url::try_from("https://example.com/foo/$foo/bar").unwrap();
296
297        let remaps = Remaps::new(vec![(input_pattern, replacement.to_string())]);
298
299        let output = remaps.remap(&input).unwrap();
300
301        assert_eq!(
302            output,
303            Some(Remap {
304                new: Uri::try_from("https://example.com/foo/2/bar").unwrap(),
305                original: input
306            })
307        );
308    }
309}