parse_link_header/
lib.rs

1//! A library for parsing HTTP Link header.
2//!
3//! ## How to use
4//!
5//! ### Note for version 0.1.x
6//!
7//! The version 0.1 can't correctly handle the `relative ref` which described in <https://tools.ietf.org/html/rfc3986#section-4.1>
8//!
9//! The parsed value of version 0.1 refers to the return value of <https://github.com/thlorenz/parse-link-header>, which is a `HashMap` with the same structure.
10//!
11//! **So if you want to parse `relative ref`, please use version `>=0.2`.**
12//!
13//! **Or if you don't care about `relative ref` and want a simple `HashMap<String, HashMap<String, String>>` result, you can use version `0.1`.**
14//!
15//! ### Example
16//!
17//! In your `Cargo.toml`, add:
18//!
19//! ```toml
20//! [dependencies]
21//! parse_link_header = "0.4"
22//! ```
23//!
24//! Then:
25//!
26//! ```rust
27//! let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
28//!
29//! let res = parse_link_header::parse(link_header);
30//! assert!(res.is_ok());
31//!
32//! let val = res.unwrap();
33//! assert_eq!(val.len(), 2);
34//! assert_eq!(val.get(&Some("next".to_string())).unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=2");
35//! assert_eq!(val.get(&Some("last".to_string())).unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=14");
36//! ```
37//!
38//! The parsed value is a `Result<HashMap<Option<Rel>, Link>, Error>` (aka a
39//! [`LinkMap`](type.LinkMap.html)), which `Rel` and `Link` is:
40//!
41//! ```rust
42//! use std::collections::HashMap;
43//!
44//! #[cfg(not(feature = "url"))]
45//! use http::Uri;
46//!
47//! #[cfg(feature = "url")]
48//! use url::Url as Uri;
49//!
50//! #[derive(Debug, PartialEq)]
51//! pub struct Link {
52//!     pub uri: Uri,
53//!     pub raw_uri: String,
54//!     pub queries: HashMap<String, String>,
55//!     pub params: HashMap<String, String>,
56//! }
57//!
58//! type Rel = String;
59//! ```
60//!
61//! Note that according to <https://tools.ietf.org/html/rfc8288#section-3.3> (October 2017),
62//! **the rel parameter must be present**. That's why the key of `HashMap<Option<Rel>, Link>` is `Option<Rel>`.
63//! So if you find that the key is `None`, check if you specified the `rel` type.
64//!
65//! ## parse_with_rel
66//!
67//! > Version >= 0.3.0
68//!
69//! Alternatively, use the `parse_with_rel()` function to get a `HashMap<String, Link>` (aka a [`RelLinkMap`](type.RelLinkMap.html)), as in:
70//!
71//! ```rust
72//! let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
73//!
74//! let res = parse_link_header::parse_with_rel(link_header);
75//! assert!(res.is_ok());
76//!
77//! let val = res.unwrap();
78//! assert_eq!(val.len(), 2);
79//! assert_eq!(val.get("next").unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=2");
80//! assert_eq!(val.get("last").unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=14");
81//! ```
82//!
83//! You can use this function if you ensure that the `rel` parameter is present in the header.
84//!
85//! ## Feature: `url`
86//!
87//! > Version >= 0.3.0
88//!
89//! If the `url` feature is enabled, the `uri` field in struct [`parse_link_header::Link`](struct.Link.html) will be
90//! of type `url::Url` from the [url crate](https://crates.io/crates/url), rather than the `http::uri::Uri` it normally is.
91//! This allows integration with other libraries that use the url crate, such as [reqwest](https://crates.io/crates/reqwest).
92//!
93//! **NOTE:** This implicitly disabled support for relative refs, as URLs do not support relative refs (whereas URIs do).
94
95#![allow(clippy::derive_partial_eq_without_eq)]
96use std::collections::HashMap;
97use std::fmt;
98
99#[cfg(not(feature = "url"))]
100use http::Uri;
101
102#[cfg(feature = "url")]
103use url::Url as Uri;
104
105/// A `Result` alias where the `Err` case is [`parse_link_header::Error`].
106///
107/// [`parse_link_header::Error`]: struct.Error.html
108pub type Result<T> = std::result::Result<T, Error>;
109
110/// An error encountered when attempting to parse a `Link:` HTTP header.
111#[derive(Copy, Clone, Debug, PartialEq)]
112pub struct Error(ErrorKind);
113
114/// Enum to indicate the type of error encountered.
115#[derive(Copy, Clone, Debug, PartialEq)]
116pub enum ErrorKind {
117    /// Internal error of the type that should never happen
118    InternalError,
119
120    /// Failure to parse link value into URI
121    InvalidURI,
122
123    /// Malformed parameters
124    MalformedParam,
125
126    /// Malformed URI query
127    MalformedQuery,
128
129    /// Missing `rel` parameter when required
130    MissingRel,
131}
132
133impl fmt::Display for Error {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        match self.0 {
136            ErrorKind::InternalError => write!(f, "internal parser error"),
137            ErrorKind::InvalidURI => write!(f, "unable to parse URI component"),
138            ErrorKind::MalformedParam => write!(f, "malformed parameter list"),
139            ErrorKind::MalformedQuery => write!(f, "malformed URI query"),
140            ErrorKind::MissingRel => write!(f, "missing 'rel' parameter"),
141        }
142    }
143}
144
145impl std::error::Error for Error {}
146
147impl From<&Error> for Error {
148    /// Create a new Error object from a borrowed immutable reference. This is required as part of
149    /// using `lazy_static!`, as that deals in references.
150    fn from(x: &Error) -> Self {
151        Error(x.0)
152    }
153}
154
155/// Struct to describe a single `Link:` header entry.
156///
157/// This stores the raw URI found in the header, as well as parsed forms of that URI (including the
158/// queries) and any parameters associated with this URI.
159#[derive(Debug, PartialEq)]
160pub struct Link {
161    /// A parsed form of the URI
162    pub uri: Uri,
163
164    /// The raw text string of the URI
165    pub raw_uri: String,
166
167    /// A `HashMap` of the query part of the URI (in the form of key=value)
168    pub queries: HashMap<String, String>,
169
170    /// A `HashMap` of the parameters associated with this URI. The most common is `rel`,
171    /// indicating the relationship between the current HTTP data being fetched and the URI in this
172    /// `Link:` header.
173    pub params: HashMap<String, String>,
174}
175
176type Rel = String;
177
178/// Type alias for the parsed data returned as a `HashMap` with a key of `Option<Rel>`.
179///
180/// This is different from [`RelLinkMap`](type.RelLinkMap.html) which has a key of `Rel`.
181pub type LinkMap = HashMap<Option<Rel>, Link>;
182
183/// Type alias for the parsed data returned as a `HashMap` where the `rel` parameter is required to
184/// be present.
185///
186/// This is different from the [`LinkMap`](type.LinkMap.html) which has a key of `Option<Rel>`.
187pub type RelLinkMap = HashMap<Rel, Link>;
188
189/// Parse link header into a [`RelLinkMap`](type.RelLinkMap.html).
190///
191/// Takes a `&str` which is the value of the HTTP `Link:` header, attempts to parse it, and returns
192/// a `Result<RelLinkMap>` which represents the mapping between the relationship and the link entry.
193pub fn parse_with_rel(link_header: &str) -> Result<RelLinkMap> {
194    parse_with(link_header, |x| x.ok_or(Error(ErrorKind::MissingRel)))
195}
196
197/// Parse link header into a [`LinkMap`](type.LinkMap.html).
198///
199/// Takes a `&str` which is the value of the HTTP `Link:` header, attempts to parse it, and returns
200/// a `Result<LinkMap>` which represents the mapping between the relationship and the link entry.
201pub fn parse(link_header: &str) -> Result<LinkMap> {
202    parse_with(link_header, Ok)
203}
204
205/// Generic parser function.
206///
207/// Does the actual parsing work, and then uses make_key() to proceses the HashMap key into the
208/// desired type.
209fn parse_with<K, F>(link_header: &str, make_key: F) -> Result<HashMap<K, Link>>
210where
211    K: Eq + std::hash::Hash,
212    F: Fn(Option<String>) -> Result<K>,
213{
214    use lazy_static::lazy_static;
215    use regex::Regex;
216
217    lazy_static! {
218        static ref RE: Result<Regex> =
219            Regex::new(r#"[<>"\s]"#).or(Err(Error(ErrorKind::InternalError)));
220    }
221    let mut result = HashMap::new();
222
223    // remove all quotes, angle brackets, and whitespace
224    let preprocessed = RE.as_ref()?.replace_all(link_header, "");
225
226    // split along comma into different entries
227    let splited = preprocessed.split(',');
228
229    for s in splited {
230        // split each entry into parts
231        let mut link_vec: Vec<_> = s.split(';').collect();
232        link_vec.reverse();
233
234        // pop off the link value; the split() guarantees at least one entry to pop()
235        let raw_uri = link_vec
236            .pop()
237            .ok_or(Error(ErrorKind::InternalError))?
238            .to_string();
239        let uri: Uri = raw_uri.parse().or(Err(Error(ErrorKind::InvalidURI)))?;
240
241        let mut queries = HashMap::new();
242        if let Some(query) = uri.query() {
243            let mut query = query.to_string();
244
245            // skip leading ampersand
246            if query.starts_with('&') {
247                query = query.chars().skip(1).collect();
248            }
249
250            // split each query and extract as (key, value) pairs
251            for q in query.split('&') {
252                let (key, val) = q.split_once('=').ok_or(Error(ErrorKind::MalformedQuery))?;
253
254                queries.insert(key.to_string(), val.to_string());
255            }
256        }
257
258        let mut params = HashMap::new();
259
260        // extract the parameter list as (key, value) pairs
261        for param in link_vec {
262            let (key, val) = param
263                .split_once('=')
264                .ok_or(Error(ErrorKind::MalformedParam))?;
265
266            params.insert(key.to_string(), val.to_string());
267        }
268
269        result.insert(
270            make_key(params.get("rel").cloned())?,
271            Link {
272                uri,
273                raw_uri,
274                queries,
275                params,
276            },
277        );
278    }
279
280    Ok(result)
281}
282
283#[cfg(test)]
284mod tests {
285    use std::collections::HashMap;
286
287    use super::*;
288
289    #[test]
290    fn parse_link_header_works() {
291        let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
292        let mut expected = HashMap::new();
293
294        expected.insert(
295            Some("next".to_string()),
296            Link {
297                uri: "https://api.github.com/repositories/41986369/contributors?page=2"
298                    .parse()
299                    .unwrap(),
300                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=2"
301                    .to_string(),
302                queries: [("page".to_string(), "2".to_string())]
303                    .iter()
304                    .cloned()
305                    .collect(),
306                params: [("rel".to_string(), "next".to_string())]
307                    .iter()
308                    .cloned()
309                    .collect(),
310            },
311        );
312        expected.insert(
313            Some("last".to_string()),
314            Link {
315                uri: "https://api.github.com/repositories/41986369/contributors?page=14"
316                    .parse()
317                    .unwrap(),
318                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=14"
319                    .to_string(),
320                queries: [("page".to_string(), "14".to_string())]
321                    .iter()
322                    .cloned()
323                    .collect(),
324                params: [("rel".to_string(), "last".to_string())]
325                    .iter()
326                    .cloned()
327                    .collect(),
328            },
329        );
330
331        let parsed = parse(link_header).unwrap();
332
333        assert_eq!(expected, parsed);
334
335        #[cfg(not(feature = "url"))]
336        {
337            let mut rel_link_expected = HashMap::new();
338
339            rel_link_expected.insert(
340                Some("foo/bar".to_string()),
341                Link {
342                    uri: "/foo/bar".parse().unwrap(),
343                    raw_uri: "/foo/bar".to_string(),
344                    queries: HashMap::new(),
345                    params: [("rel".to_string(), "foo/bar".to_string())]
346                        .iter()
347                        .cloned()
348                        .collect(),
349                },
350            );
351
352            let rel_link_parsed = parse(r#"</foo/bar>; rel="foo/bar""#).unwrap();
353
354            assert_eq!(rel_link_expected, rel_link_parsed);
355        }
356    }
357
358    #[test]
359    fn parse_with_rel_works() {
360        let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
361        let mut expected = HashMap::new();
362
363        expected.insert(
364            "next".to_string(),
365            Link {
366                uri: "https://api.github.com/repositories/41986369/contributors?page=2"
367                    .parse()
368                    .unwrap(),
369                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=2"
370                    .to_string(),
371                queries: [("page".to_string(), "2".to_string())]
372                    .iter()
373                    .cloned()
374                    .collect(),
375                params: [("rel".to_string(), "next".to_string())]
376                    .iter()
377                    .cloned()
378                    .collect(),
379            },
380        );
381        expected.insert(
382            "last".to_string(),
383            Link {
384                uri: "https://api.github.com/repositories/41986369/contributors?page=14"
385                    .parse()
386                    .unwrap(),
387                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=14"
388                    .to_string(),
389                queries: [("page".to_string(), "14".to_string())]
390                    .iter()
391                    .cloned()
392                    .collect(),
393                params: [("rel".to_string(), "last".to_string())]
394                    .iter()
395                    .cloned()
396                    .collect(),
397            },
398        );
399
400        let parsed = parse_with_rel(link_header).unwrap();
401
402        assert_eq!(expected, parsed);
403
404        #[cfg(not(feature = "url"))]
405        {
406            let mut rel_link_expected = HashMap::new();
407
408            rel_link_expected.insert(
409                "foo/bar".to_string(),
410                Link {
411                    uri: "/foo/bar".parse().unwrap(),
412                    raw_uri: "/foo/bar".to_string(),
413                    queries: HashMap::new(),
414                    params: [("rel".to_string(), "foo/bar".to_string())]
415                        .iter()
416                        .cloned()
417                        .collect(),
418                },
419            );
420
421            let rel_link_parsed = parse_with_rel(r#"</foo/bar>; rel="foo/bar""#).unwrap();
422
423            assert_eq!(rel_link_expected, rel_link_parsed);
424        }
425    }
426
427    #[test]
428    fn parse_link_header_should_err() {
429        assert_eq!(parse("<>"), Err(Error(ErrorKind::InvalidURI)));
430    }
431
432    #[test]
433    fn parse_with_rel_should_err() {
434        assert_eq!(
435            parse_with_rel(r#"<http://local.host/foo/bar>; type="foo/bar""#),
436            Err(Error(ErrorKind::MissingRel))
437        );
438    }
439
440    #[test]
441    fn sentry_paginating_results() {
442        let link_header = r#"<https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1>; rel="previous"; results="false", <https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0>; rel="next"; results="true""#;
443        let mut expected = HashMap::new();
444
445        expected.insert(
446            Some("previous".to_string()),
447            Link {
448                uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1"
449                    .parse()
450                    .unwrap(),
451                raw_uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1"
452                    .to_string(),
453                queries: [("cursor".to_string(), "1420837590:0:1".to_string())]
454                    .iter()
455                    .cloned()
456                    .collect(),
457                params: [
458                    ("rel".to_string(), "previous".to_string()),
459                    ("results".to_string(), "false".to_string()),
460                ]
461                .iter()
462                .cloned()
463                .collect(),
464            },
465        );
466
467        expected.insert(
468            Some("next".to_string()),
469            Link {
470                uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0"
471                    .parse()
472                    .unwrap(),
473                raw_uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0"
474                    .to_string(),
475                queries: [("cursor".to_string(), "1420837533:0:0".to_string())]
476                    .iter()
477                    .cloned()
478                    .collect(),
479                params: [
480                    ("rel".to_string(), "next".to_string()),
481                    ("results".to_string(), "true".to_string()),
482                ]
483                .iter()
484                .cloned()
485                .collect(),
486            },
487        );
488
489        let parsed = parse(link_header).unwrap();
490
491        assert_eq!(expected, parsed);
492    }
493
494    #[test]
495    fn test_error_display() {
496        assert_eq!(
497            format!("{}", Error(ErrorKind::InternalError)),
498            "internal parser error"
499        );
500
501        assert_eq!(
502            format!("{}", Error(ErrorKind::InvalidURI)),
503            "unable to parse URI component"
504        );
505
506        assert_eq!(
507            format!("{}", Error(ErrorKind::MalformedParam)),
508            "malformed parameter list"
509        );
510
511        assert_eq!(
512            format!("{}", Error(ErrorKind::MalformedQuery)),
513            "malformed URI query"
514        );
515
516        assert_eq!(
517            format!("{}", Error(ErrorKind::MissingRel)),
518            "missing 'rel' parameter"
519        );
520    }
521
522    #[test]
523    fn test_error_from() {
524        let e1 = Error(ErrorKind::InternalError);
525        let e2 = Error::from(&e1);
526
527        assert_eq!(e1, e2);
528    }
529}