http_link/
lib.rs

1//! Simple crate for parsing HTTP Link header.
2//! Naively implements algorithms described in [RFC8288](https://datatracker.ietf.org/doc/html/rfc8288#appendix-B).
3
4use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, CONTROLS};
5use std::fmt;
6
7// WHATWG URL is equivalent of W3C URI with best effort handling for non-ASCII characters.
8use url::Url;
9
10/// A parsed link object.
11#[derive(Debug, Eq, PartialEq, Clone, Hash)]
12pub struct Link {
13	pub target: Url,
14	pub rel: String,
15	pub context: Url,
16	pub attributes: Vec<Parameter>
17}
18
19impl fmt::Display for Link {
20	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21		let semicolon_separated = self.attributes.iter().fold("".to_string(), |acc, p| acc + "; " + &p.to_string());
22		write!(f, "<{}>; rel={}; anchor={}{}", self.target, self.rel, self.context, semicolon_separated)
23	}
24}
25
26/// An attribute in a parsed link object.
27#[derive(Debug, Eq, PartialEq, Clone, Hash)]
28pub struct Parameter {
29	pub name: String,
30	pub value: String
31}
32
33const VALUE: &AsciiSet = &CONTROLS.add(b'*').add(b'\'').add(b'%');
34
35impl fmt::Display for Parameter {
36	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37		if self.value.is_ascii() {
38			write!(f, "{}={}", self.name, self.value)
39		} else {
40			write!(f, "{}*=UTF-8''{}", self.name.clone(), utf8_percent_encode(&self.value, VALUE).to_string())
41		}
42	}
43}
44
45/// Errors that can occur during parsing.
46#[derive(Debug)]
47pub enum ParseLinkError {
48	SyntaxError(String),
49	InvalidUrl(url::ParseError),
50	BadEncoding(std::str::Utf8Error),
51	UnknownEncoding
52}
53
54use ParseLinkError::*;
55
56impl fmt::Display for ParseLinkError {
57	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58		match self {
59			SyntaxError(description) => write!(f, "Syntax error: {}", description),
60			InvalidUrl(e) => write!(f, "{}", e),
61			BadEncoding(e) => write!(f, "Bad charactor encoding in attributes: {}", e),
62			UnknownEncoding => write!(f, "Unknown charactor encoding in attributes")
63		}
64	}
65}
66
67impl std::error::Error for ParseLinkError {}
68
69fn is_http_whitespace(ch: char) -> bool {
70	ch == ' ' || ch == '\t'
71}
72
73static UNIQUE_ATTRIBUTES: [&str; 4] = ["media", "title", "title*", "type"];
74
75/// Parse a Link header with given base URL.
76pub fn parse_link_header(mut s: &str, base: &Url) -> Result<Vec<Link>, ParseLinkError> {
77	let mut links = Vec::new(); // 1.
78
79	// 2.
80	loop {
81		s = s.trim_start_matches(is_http_whitespace); // 2.1.
82
83		// 2.2.
84		if !s.starts_with('<') {
85			return Err(SyntaxError("Expected \"<\"".to_string()));
86		}
87
88		s = &s[1..]; // 2.3.
89
90		// 2.4/2.5.
91		let (target_str, params_str) = s.split_at(s.find('>').ok_or(SyntaxError("Unclosed <".to_string()))?);
92
93		// 2.6/2.7.
94		let parse_result = parse_params(&params_str[1..])?;
95		let params = parse_result.0;
96		s = parse_result.1;
97
98		let target = Url::options().base_url(Some(base)).parse(target_str).map_err(|e| InvalidUrl(e))?; // 2.8.
99
100		// 2.9/2.10.
101		let relations = params
102			.iter()
103			.find(|p| p.name == "rel")
104			.map_or("", |p| &p.value)
105			.split(is_http_whitespace)
106			.map(|s| s.to_string())
107			.collect::<Vec<String>>();
108
109		// 2.11/2.12.
110		let anchor = params.iter().find(|p| p.name == "anchor");
111		let context = if let Some(anchor) = anchor {
112			Url::options().base_url(Some(base)).parse(&anchor.value).map_err(|e| InvalidUrl(e))?
113		} else {
114			base.to_owned()
115		};
116
117		let mut attributes_found = Vec::<String>::new();
118		let mut attributes = params
119			.into_iter()
120			.filter(|p| {
121				// 2.13/2.14.
122				if p.name == "rel" || p.name == "anchor" {
123					return false; // 2.14.1.
124				}
125				if UNIQUE_ATTRIBUTES.iter().any(|n| n == &p.name) {
126					if attributes_found.iter().any(|n| n == &p.name) {
127						return false; // 2.14.2.
128					} else {
129						attributes_found.push(p.name.to_string());
130					}
131				}
132				true
133			})
134			.collect::<Vec<Parameter>>();
135
136		// 2.15
137		let star_param_names = attributes
138			.iter()
139			.filter_map(|p| if p.name.ends_with('*') { Some(p.name.to_string()) } else { None })
140			.collect::<Vec<String>>();
141
142		// 2.16
143		for star_param_name in star_param_names {
144			let base_param_name = &star_param_name[..(star_param_name.len() - 1)]; // 2.16.1.
145			attributes = attributes
146				.into_iter()
147				.filter_map(|p| {
148					if p.name == base_param_name {
149						None // 2.16.3.
150					} else {
151						if p.name == star_param_name {
152							// 2.16.4.
153							Some(Parameter {
154								name: base_param_name.to_string(),
155								value: p.value
156							})
157						} else {
158							Some(p)
159						}
160					}
161				})
162				.collect();
163		}
164
165		// 2.17
166		for rel in relations.iter().map(|rel| rel.to_ascii_lowercase() /* 2.17.1. */) {
167			// 2.17.2.
168			links.push(Link {
169				target: target.to_owned(),
170				rel,
171				context: context.to_owned(),
172				attributes: attributes.to_owned()
173			});
174		}
175
176		s = s.trim_start_matches(is_http_whitespace);
177		if s.is_empty() {
178			break;
179		}
180		if !s.starts_with(',') {
181			return Err(SyntaxError("Expected \",\"".to_string())); // 2.2.
182		}
183
184		s = &s[1..];
185	}
186	Ok(links) // 3.
187}
188
189/// Parse parameters in a Link header.
190/// Returns parsed parameters and remainder of the input string.
191pub fn parse_params(mut s: &str) -> Result<(Vec<Parameter>, &str), ParseLinkError> {
192	let mut params = Vec::new(); // 1.
193
194	// 2.
195	while !(s.is_empty() || s.starts_with(',')) {
196		s = s.trim_start_matches(is_http_whitespace); // 2.1.
197
198		if !s.starts_with(';') {
199			return Err(SyntaxError("Expected parameter separator".to_string())); // 2.2.
200		}
201
202		s = s[1..].trim_start_matches(is_http_whitespace); // 2.3/2.4.
203
204		// 2.5/2.9.
205		let split = s.split_at(
206			s.find(|ch| is_http_whitespace(ch) || ch == '=' || ch == ';' || ch == ',')
207				.ok_or(SyntaxError("Expected \"=\" or parameter separator".to_string()))?
208		);
209		let name = split.0.to_ascii_lowercase();
210
211		s = split.1.trim_start_matches(is_http_whitespace); // 2.6.
212
213		let value = if s.starts_with('=') {
214			// 2.7.
215
216			s = s[1..].trim_start_matches(is_http_whitespace); // 2.7.1/2.7.2.
217			let value = if s.starts_with('"') {
218				// 2.7.3. https://tools.ietf.org/html/rfc8288#appendix-B.4
219				let mut v = String::new(); // 1.
220
221				// 2. is skipped
222
223				s = &s[1..]; // 3.
224				while !s.starts_with('"') {
225					// 4/4.2.
226
227					// 4.1.
228					if s.starts_with('\\') {
229						s = &s[1..];
230					}
231
232					v.push(s.chars().next().ok_or(SyntaxError("Unexpected end of input".to_string()))?); // 4.1.2.
233
234					// 4.3/4.1.3.
235					s = s.get(1..).ok_or(SyntaxError("Bad non-ASCII charactor detected".to_string()))?;
236				}
237				s = &s[1..];
238				v
239			} else {
240				// 2.7.4.
241				let split = s.split_at(s.find(|ch| ch == ';' || ch == ',').unwrap_or(s.len()));
242				s = split.1;
243				split.0.to_string()
244			};
245			if name.ends_with('*') {
246				// 2.7.5.
247				let mut iter = value.split('\'');
248				let (encoding, _lang, value) = (
249					iter.next().unwrap(),
250					iter.next().ok_or(SyntaxError("Expected \"'\"".to_string()))?,
251					iter.next().ok_or(SyntaxError("Expected \"'\"".to_string()))?
252				);
253				if iter.next().is_some() {
254					return Err(SyntaxError("Unexpected \"'\"".to_string()));
255				}
256				match encoding.to_ascii_uppercase().as_str() {
257					"UTF-8" => percent_decode_str(value).decode_utf8().map_err(|e| BadEncoding(e))?.to_string(),
258					_ => return Err(UnknownEncoding)
259				}
260			} else {
261				value
262			}
263		} else {
264			"".to_string() // 2.8.
265		};
266		params.push(Parameter { name, value }); // 2.10.
267		s = s.trim_start_matches(is_http_whitespace); // 2.11.
268	}
269	Ok((params, s))
270}
271
272#[cfg(test)]
273fn assert_parse_stringify(s: &str, base: &Url, expected: Vec<Link>, expected_str: &str) {
274	let parsed = parse_link_header(s, base).unwrap();
275	assert_eq!(parsed, expected);
276
277	let mut iter = parsed.iter();
278	let first = iter.next().map(|p| p.to_string()).unwrap_or("".to_string());
279	assert_eq!(format!("{}", iter.fold(first, |acc, v| acc + ", " + &v.to_string())), expected_str);
280}
281
282#[test]
283fn rfc8288_examples() -> Result<(), ParseLinkError> {
284	let base = Url::parse("http://example.com").unwrap();
285
286	assert_parse_stringify(
287		r#"<http://example.com/TheBook/chapter2>; rel="previous"; title="previous chapter""#,
288		&base,
289		vec![Link {
290			target: Url::parse("http://example.com/TheBook/chapter2").unwrap(),
291			rel: "previous".to_string(),
292			context: base.clone(),
293			attributes: vec![Parameter {
294				name: "title".to_string(),
295				value: "previous chapter".to_string()
296			}]
297		}],
298		"<http://example.com/TheBook/chapter2>; rel=previous; anchor=http://example.com/; title=previous chapter"
299	);
300
301	assert_parse_stringify(
302		r#"</>; rel="http://example.net/foo""#,
303		&base,
304		vec![Link {
305			target: base.clone(),
306			rel: "http://example.net/foo".to_string(),
307			context: base.clone(),
308			attributes: Vec::new()
309		}],
310		"<http://example.com/>; rel=http://example.net/foo; anchor=http://example.com/"
311	);
312
313	assert_parse_stringify(
314		r##"</terms>; rel="copyright"; anchor="#foo""##,
315		&base,
316		vec![Link {
317			target: Url::parse("http://example.com/terms").unwrap(),
318			rel: "copyright".to_string(),
319			context: Url::parse("http://example.com#foo").unwrap(),
320			attributes: Vec::new()
321		}],
322		"<http://example.com/terms>; rel=copyright; anchor=http://example.com/#foo"
323	);
324
325	assert_parse_stringify(
326		"</TheBook/chapter2>; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, \
327						</TheBook/chapter4>; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel",
328		&base,
329		vec![
330			Link {
331				target: Url::parse("http://example.com/TheBook/chapter2").unwrap(),
332				rel: "previous".to_string(),
333				context: base.clone(),
334				attributes: vec![Parameter {
335					name: "title".to_string(),
336					value: "letztes Kapitel".to_string()
337				}]
338			},
339			Link {
340				target: Url::parse("http://example.com/TheBook/chapter4").unwrap(),
341				rel: "next".to_string(),
342				context: base.clone(),
343				attributes: vec![Parameter {
344					name: "title".to_string(),
345					value: "nächstes Kapitel".to_string()
346				}]
347			},
348		],
349		"<http://example.com/TheBook/chapter2>; rel=previous; anchor=http://example.com/; \
350		title=letztes Kapitel, <http://example.com/TheBook/chapter4>; rel=next; \
351		anchor=http://example.com/; title*=UTF-8''n%C3%A4chstes Kapitel"
352	);
353
354	assert_parse_stringify(
355		r#"<http://example.org/>; rel="start http://example.net/relation/other""#,
356		&base,
357		vec![
358			Link {
359				target: Url::parse("http://example.org/").unwrap(),
360				rel: "start".to_string(),
361				context: base.clone(),
362				attributes: Vec::new()
363			},
364			Link {
365				target: Url::parse("http://example.org/").unwrap(),
366				rel: "http://example.net/relation/other".to_string(),
367				context: base.clone(),
368				attributes: Vec::new()
369			},
370		],
371		"<http://example.org/>; rel=start; anchor=http://example.com/, \
372		<http://example.org/>; rel=http://example.net/relation/other; anchor=http://example.com/"
373	);
374
375	Ok(())
376}