Skip to main content

parse_link_header/
lib.rs

1//! A library for parsing HTTP Link header.
2//!
3//! ## How to use
4//!
5//! ### Note for version 0.1.x
6//!
7//! The version 0.1 can't correctly handle the `relative ref` which described in <https://tools.ietf.org/html/rfc3986#section-4.1>
8//!
9//! The parsed value of version 0.1 refers to the return value of <https://github.com/thlorenz/parse-link-header>, which is a `HashMap` with the same structure.
10//!
11//! **So if you want to parse `relative ref`, please use version `>=0.2`.**
12//!
13//! **Or if you don't care about `relative ref` and want a simple `HashMap<String, HashMap<String, String>>` result, you can use version `0.1`.**
14//!
15//! ### Example
16//!
17//! In your `Cargo.toml`, add:
18//!
19//! ```toml
20//! [dependencies]
21//! parse_link_header = "0.4"
22//! ```
23//!
24//! Then:
25//!
26//! ```rust
27//! let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
28//!
29//! let res = parse_link_header::parse(link_header);
30//! assert!(res.is_ok());
31//!
32//! let val = res.unwrap();
33//! assert_eq!(val.len(), 2);
34//! assert_eq!(val.get(&Some("next".to_string())).unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=2");
35//! assert_eq!(val.get(&Some("last".to_string())).unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=14");
36//! ```
37//!
38//! The parsed value is a `Result<HashMap<Option<Rel>, Link>, Error>` (aka a
39//! [`LinkMap`](type.LinkMap.html)), which `Rel` and `Link` is:
40//!
41//! ```rust
42//! use std::collections::HashMap;
43//!
44//! #[cfg(not(feature = "url"))]
45//! use http::Uri;
46//!
47//! #[cfg(feature = "url")]
48//! use url::Url as Uri;
49//!
50//! #[derive(Debug, PartialEq)]
51//! pub struct Link {
52//!     pub uri: Uri,
53//!     pub raw_uri: String,
54//!     pub queries: HashMap<String, String>,
55//!     pub params: HashMap<String, String>,
56//! }
57//!
58//! type Rel = String;
59//! ```
60//!
61//! Note that according to <https://tools.ietf.org/html/rfc8288#section-3.3> (October 2017),
62//! **the rel parameter must be present**. That's why the key of `HashMap<Option<Rel>, Link>` is `Option<Rel>`.
63//! So if you find that the key is `None`, check if you specified the `rel` type.
64//!
65//! ## parse_with_rel
66//!
67//! > Version >= 0.3.0
68//!
69//! Alternatively, use the `parse_with_rel()` function to get a `HashMap<String, Link>` (aka a [`RelLinkMap`](type.RelLinkMap.html)), as in:
70//!
71//! ```rust
72//! let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
73//!
74//! let res = parse_link_header::parse_with_rel(link_header);
75//! assert!(res.is_ok());
76//!
77//! let val = res.unwrap();
78//! assert_eq!(val.len(), 2);
79//! assert_eq!(val.get("next").unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=2");
80//! assert_eq!(val.get("last").unwrap().raw_uri, "https://api.github.com/repositories/41986369/contributors?page=14");
81//! ```
82//!
83//! You can use this function if you ensure that the `rel` parameter is present in the header.
84//!
85//! ## Feature: `url`
86//!
87//! > Version >= 0.3.0
88//!
89//! If the `url` feature is enabled, the `uri` field in struct [`parse_link_header::Link`](struct.Link.html) will be
90//! of type `url::Url` from the [url crate](https://crates.io/crates/url), rather than the `http::uri::Uri` it normally is.
91//! This allows integration with other libraries that use the url crate, such as [reqwest](https://crates.io/crates/reqwest).
92//!
93//! **NOTE:** This implicitly disabled support for relative refs, as URLs do not support relative refs (whereas URIs do).
94
95#![allow(clippy::derive_partial_eq_without_eq)]
96use std::collections::HashMap;
97use std::fmt;
98
99#[cfg(not(feature = "url"))]
100use http::Uri;
101
102#[cfg(feature = "url")]
103use url::Url as Uri;
104
105/// A `Result` alias where the `Err` case is [`parse_link_header::Error`].
106///
107/// [`parse_link_header::Error`]: struct.Error.html
108pub type Result<T> = std::result::Result<T, Error>;
109
110/// An error encountered when attempting to parse a `Link:` HTTP header.
111#[derive(Copy, Clone, Debug, PartialEq)]
112pub struct Error(ErrorKind);
113
114/// Enum to indicate the type of error encountered.
115#[derive(Copy, Clone, Debug, PartialEq)]
116pub enum ErrorKind {
117    /// Internal error of the type that should never happen
118    InternalError,
119
120    /// Failure to parse link value into URI
121    InvalidURI,
122
123    /// Malformed parameters
124    MalformedParam,
125
126    /// Malformed URI query
127    MalformedQuery,
128
129    /// Missing `rel` parameter when required
130    MissingRel,
131}
132
133impl fmt::Display for Error {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        match self.0 {
136            ErrorKind::InternalError => write!(f, "internal parser error"),
137            ErrorKind::InvalidURI => write!(f, "unable to parse URI component"),
138            ErrorKind::MalformedParam => write!(f, "malformed parameter list"),
139            ErrorKind::MalformedQuery => write!(f, "malformed URI query"),
140            ErrorKind::MissingRel => write!(f, "missing 'rel' parameter"),
141        }
142    }
143}
144
145impl std::error::Error for Error {}
146
147impl From<&Error> for Error {
148    /// Create a new Error object from a borrowed immutable reference. This is required as part of
149    /// using `lazy_static!`, as that deals in references.
150    fn from(x: &Error) -> Self {
151        Error(x.0)
152    }
153}
154
155/// Struct to describe a single `Link:` header entry.
156///
157/// This stores the raw URI found in the header, as well as parsed forms of that URI (including the
158/// queries) and any parameters associated with this URI.
159#[derive(Debug, PartialEq)]
160pub struct Link {
161    /// A parsed form of the URI
162    pub uri: Uri,
163
164    /// The raw text string of the URI
165    pub raw_uri: String,
166
167    /// A `HashMap` of the query part of the URI (in the form of key=value)
168    pub queries: HashMap<String, String>,
169
170    /// A `HashMap` of the parameters associated with this URI. The most common is `rel`,
171    /// indicating the relationship between the current HTTP data being fetched and the URI in this
172    /// `Link:` header.
173    pub params: HashMap<String, String>,
174}
175
176type Rel = String;
177
178/// Type alias for the parsed data returned as a `HashMap` with a key of `Option<Rel>`.
179///
180/// This is different from [`RelLinkMap`](type.RelLinkMap.html) which has a key of `Rel`.
181pub type LinkMap = HashMap<Option<Rel>, Link>;
182
183/// Type alias for the parsed data returned as a `HashMap` where the `rel` parameter is required to
184/// be present.
185///
186/// This is different from the [`LinkMap`](type.LinkMap.html) which has a key of `Option<Rel>`.
187pub type RelLinkMap = HashMap<Rel, Link>;
188
189/// Parse link header into a [`RelLinkMap`](type.RelLinkMap.html).
190///
191/// Takes a `&str` which is the value of the HTTP `Link:` header, attempts to parse it, and returns
192/// a `Result<RelLinkMap>` which represents the mapping between the relationship and the link entry.
193pub fn parse_with_rel(link_header: &str) -> Result<RelLinkMap> {
194    parse_with(link_header, |x| x.ok_or(Error(ErrorKind::MissingRel)))
195}
196
197/// Parse link header into a [`LinkMap`](type.LinkMap.html).
198///
199/// Takes a `&str` which is the value of the HTTP `Link:` header, attempts to parse it, and returns
200/// a `Result<LinkMap>` which represents the mapping between the relationship and the link entry.
201pub fn parse(link_header: &str) -> Result<LinkMap> {
202    parse_with(link_header, Ok)
203}
204
205/// Generic parser function.
206///
207/// Does the actual parsing work, and then uses make_key() to proceses the HashMap key into the
208/// desired type.
209fn parse_with<K, F>(link_header: &str, make_key: F) -> Result<HashMap<K, Link>>
210where
211    K: Eq + std::hash::Hash,
212    F: Fn(Option<String>) -> Result<K>,
213{
214    use lazy_static::lazy_static;
215    use regex::Regex;
216
217    lazy_static! {
218        static ref RE: Result<Regex> =
219            Regex::new(r#"[<>"\s]"#).or(Err(Error(ErrorKind::InternalError)));
220    }
221    let mut result = HashMap::new();
222
223    // remove all quotes, angle brackets, and whitespace
224    let preprocessed = RE.as_ref()?.replace_all(link_header, "");
225
226    // split along comma into different entries
227    let splited = preprocessed.split(',');
228
229    for s in splited {
230        // split each entry into parts
231        let mut link_vec: Vec<_> = s.split(';').collect();
232        link_vec.reverse();
233
234        // pop off the link value; the split() guarantees at least one entry to pop()
235        let raw_uri = link_vec
236            .pop()
237            .ok_or(Error(ErrorKind::InternalError))?
238            .to_string();
239        let uri: Uri = raw_uri.parse().or(Err(Error(ErrorKind::InvalidURI)))?;
240
241        let mut queries = HashMap::new();
242        if let Some(query) = uri
243            .query()
244            // skip leading ampersand
245            .map(|query| query.trim_start_matches('&'))
246        {
247            // split each query and extract as (key, value) pairs
248            for q in query.split('&') {
249                let (key, val) = q.split_once('=').ok_or(Error(ErrorKind::MalformedQuery))?;
250
251                queries.insert(key.to_string(), val.to_string());
252            }
253        }
254
255        let mut params = HashMap::new();
256
257        // extract the parameter list as (key, value) pairs
258        for param in link_vec {
259            let (key, val) = param
260                .split_once('=')
261                .ok_or(Error(ErrorKind::MalformedParam))?;
262
263            params.insert(key.to_string(), val.to_string());
264        }
265
266        result.insert(
267            make_key(params.get("rel").cloned())?,
268            Link {
269                uri,
270                raw_uri,
271                queries,
272                params,
273            },
274        );
275    }
276
277    Ok(result)
278}
279
280#[cfg(test)]
281mod tests {
282    use std::collections::HashMap;
283
284    use super::*;
285
286    #[test]
287    fn parse_link_header_works() {
288        let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
289        let mut expected = HashMap::new();
290
291        expected.insert(
292            Some("next".to_string()),
293            Link {
294                uri: "https://api.github.com/repositories/41986369/contributors?page=2"
295                    .parse()
296                    .unwrap(),
297                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=2"
298                    .to_string(),
299                queries: [("page".to_string(), "2".to_string())]
300                    .iter()
301                    .cloned()
302                    .collect(),
303                params: [("rel".to_string(), "next".to_string())]
304                    .iter()
305                    .cloned()
306                    .collect(),
307            },
308        );
309        expected.insert(
310            Some("last".to_string()),
311            Link {
312                uri: "https://api.github.com/repositories/41986369/contributors?page=14"
313                    .parse()
314                    .unwrap(),
315                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=14"
316                    .to_string(),
317                queries: [("page".to_string(), "14".to_string())]
318                    .iter()
319                    .cloned()
320                    .collect(),
321                params: [("rel".to_string(), "last".to_string())]
322                    .iter()
323                    .cloned()
324                    .collect(),
325            },
326        );
327
328        let parsed = parse(link_header).unwrap();
329
330        assert_eq!(expected, parsed);
331
332        #[cfg(not(feature = "url"))]
333        {
334            let mut rel_link_expected = HashMap::new();
335
336            rel_link_expected.insert(
337                Some("foo/bar".to_string()),
338                Link {
339                    uri: "/foo/bar".parse().unwrap(),
340                    raw_uri: "/foo/bar".to_string(),
341                    queries: HashMap::new(),
342                    params: [("rel".to_string(), "foo/bar".to_string())]
343                        .iter()
344                        .cloned()
345                        .collect(),
346                },
347            );
348
349            let rel_link_parsed = parse(r#"</foo/bar>; rel="foo/bar""#).unwrap();
350
351            assert_eq!(rel_link_expected, rel_link_parsed);
352        }
353    }
354
355    #[test]
356    fn parse_with_rel_works() {
357        let link_header = r#"<https://api.github.com/repositories/41986369/contributors?page=2>; rel="next", <https://api.github.com/repositories/41986369/contributors?page=14>; rel="last""#;
358        let mut expected = HashMap::new();
359
360        expected.insert(
361            "next".to_string(),
362            Link {
363                uri: "https://api.github.com/repositories/41986369/contributors?page=2"
364                    .parse()
365                    .unwrap(),
366                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=2"
367                    .to_string(),
368                queries: [("page".to_string(), "2".to_string())]
369                    .iter()
370                    .cloned()
371                    .collect(),
372                params: [("rel".to_string(), "next".to_string())]
373                    .iter()
374                    .cloned()
375                    .collect(),
376            },
377        );
378        expected.insert(
379            "last".to_string(),
380            Link {
381                uri: "https://api.github.com/repositories/41986369/contributors?page=14"
382                    .parse()
383                    .unwrap(),
384                raw_uri: "https://api.github.com/repositories/41986369/contributors?page=14"
385                    .to_string(),
386                queries: [("page".to_string(), "14".to_string())]
387                    .iter()
388                    .cloned()
389                    .collect(),
390                params: [("rel".to_string(), "last".to_string())]
391                    .iter()
392                    .cloned()
393                    .collect(),
394            },
395        );
396
397        let parsed = parse_with_rel(link_header).unwrap();
398
399        assert_eq!(expected, parsed);
400
401        #[cfg(not(feature = "url"))]
402        {
403            let mut rel_link_expected = HashMap::new();
404
405            rel_link_expected.insert(
406                "foo/bar".to_string(),
407                Link {
408                    uri: "/foo/bar".parse().unwrap(),
409                    raw_uri: "/foo/bar".to_string(),
410                    queries: HashMap::new(),
411                    params: [("rel".to_string(), "foo/bar".to_string())]
412                        .iter()
413                        .cloned()
414                        .collect(),
415                },
416            );
417
418            let rel_link_parsed = parse_with_rel(r#"</foo/bar>; rel="foo/bar""#).unwrap();
419
420            assert_eq!(rel_link_expected, rel_link_parsed);
421        }
422    }
423
424    #[test]
425    fn parse_link_header_should_err() {
426        assert_eq!(parse("<>"), Err(Error(ErrorKind::InvalidURI)));
427    }
428
429    #[test]
430    fn parse_with_rel_should_err() {
431        assert_eq!(
432            parse_with_rel(r#"<http://local.host/foo/bar>; type="foo/bar""#),
433            Err(Error(ErrorKind::MissingRel))
434        );
435    }
436
437    #[test]
438    fn sentry_paginating_results() {
439        let link_header = r#"<https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1>; rel="previous"; results="false", <https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0>; rel="next"; results="true""#;
440        let mut expected = HashMap::new();
441
442        expected.insert(
443            Some("previous".to_string()),
444            Link {
445                uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1"
446                    .parse()
447                    .unwrap(),
448                raw_uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837590:0:1"
449                    .to_string(),
450                queries: [("cursor".to_string(), "1420837590:0:1".to_string())]
451                    .iter()
452                    .cloned()
453                    .collect(),
454                params: [
455                    ("rel".to_string(), "previous".to_string()),
456                    ("results".to_string(), "false".to_string()),
457                ]
458                .iter()
459                .cloned()
460                .collect(),
461            },
462        );
463
464        expected.insert(
465            Some("next".to_string()),
466            Link {
467                uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0"
468                    .parse()
469                    .unwrap(),
470                raw_uri: "https://sentry.io/api/0/projects/1/groups/?&cursor=1420837533:0:0"
471                    .to_string(),
472                queries: [("cursor".to_string(), "1420837533:0:0".to_string())]
473                    .iter()
474                    .cloned()
475                    .collect(),
476                params: [
477                    ("rel".to_string(), "next".to_string()),
478                    ("results".to_string(), "true".to_string()),
479                ]
480                .iter()
481                .cloned()
482                .collect(),
483            },
484        );
485
486        let parsed = parse(link_header).unwrap();
487
488        assert_eq!(expected, parsed);
489    }
490
491    #[test]
492    fn test_error_display() {
493        assert_eq!(
494            format!("{}", Error(ErrorKind::InternalError)),
495            "internal parser error"
496        );
497
498        assert_eq!(
499            format!("{}", Error(ErrorKind::InvalidURI)),
500            "unable to parse URI component"
501        );
502
503        assert_eq!(
504            format!("{}", Error(ErrorKind::MalformedParam)),
505            "malformed parameter list"
506        );
507
508        assert_eq!(
509            format!("{}", Error(ErrorKind::MalformedQuery)),
510            "malformed URI query"
511        );
512
513        assert_eq!(
514            format!("{}", Error(ErrorKind::MissingRel)),
515            "missing 'rel' parameter"
516        );
517    }
518
519    #[test]
520    fn test_error_from() {
521        let e1 = Error(ErrorKind::InternalError);
522        let e2 = Error::from(&e1);
523
524        assert_eq!(e1, e2);
525    }
526}