1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
//! Contains structs and functions to parse Debian-styled RFC 822 files.
use core::iter::Iterator;
use std::collections::HashMap;

#[derive(Debug)]
/// The result of a parsing error.
pub struct ParserError {
	pub msg: String,
	pub line: Option<usize>,
}

/// A section in a TagFile. A TagFile is made up of double-newline (`\n\n`)
/// separated paragraphs, each of which make up one of these sections.
pub struct TagSection {
	data: HashMap<String, String>,
}

impl TagSection {
	fn error(msg: &str, line: Option<usize>) -> Result<Self, ParserError> {
		Err(ParserError {
			msg: "E:".to_owned() + msg,
			line,
		})
	}

	fn line_is_key(line: &str) -> bool { !line.starts_with(' ') && !line.starts_with('\t') }

	fn next_line_extends_value(lines: &[&str], current_line: usize) -> bool {
		if let Some(next_line) = lines.get(current_line + 1) {
			!Self::line_is_key(next_line)
		} else {
			false
		}
	}

	/// Create a new [`TagSection`] instance.
	/// # Returns
	/// * A [`Result`]: The [`Ok`] variant if there was no issue parsing the
	///   section, and the [`Err`] variant if there was.
	pub fn new(section: &str) -> Result<Self, ParserError> {
		// Make sure the string doesn't contain multiple sections.
		if section.contains("\n\n") {
			return Self::error("More than one section was found", None);
		}

		// Make sure the user didn't pass an empty string.
		if section.is_empty() {
			return Self::error("An empty string was passed", None);
		}

		// Start building up the HashMap.
		let mut data = HashMap::new();
		let lines = section.lines().collect::<Vec<&str>>();

		// Variables used while parsing.
		let mut current_key: Option<String> = None;
		let mut current_value = String::new();

		for (index, line) in lines.iter().enumerate() {
			// Indexes start at 0, so increase by 1 to get the line number.
			let line_number = index + 1;

			// If this line starts with a comment ignore it.
			if line.starts_with('#') {
				continue;
			}

			// If this line is a new key, split the line into the key and its value.
			if Self::line_is_key(line) {
				let (key, value) = match line.split_once(':') {
					Some((key, value)) => {
						(key.to_string(), value.strip_prefix(' ').unwrap_or(value))
					},
					None => {
						return Self::error(
							"Line doesn't contain a ':' separator",
							Some(line_number),
						)
					},
				};

				// Set the current key and value.
				// If the value is empty, then this is a multiline field, and it's going to be
				// one of these things:
				// 1. A multiline field, in which case we want to add a
				// newline to reflect such.
				// 2. A key with an empty value, in which case it will
				// be removed post-processing.
				current_key = Some(key);

				if value.is_empty() {
					current_value = "\n".to_string();
				} else {
					current_value = value.to_string();

					// If the next extends the value, add the newline before it.
					if Self::next_line_extends_value(&lines, index) {
						current_value += "\n";
					}
				}
			}

			// If this line is indented with spaces or tabs, add it to the current value.
			// This should never end up running in conjunction with the above `if` block.
			if line.starts_with(' ') || line.starts_with('\t') {
				current_value += line;

				// If the next line extends the value, add the newline. `line_number`
				// conveniently is the next index, so use that to our advantage.
				if Self::next_line_extends_value(&lines, index) {
					current_value += "\n";
				}
			}

			// If the next line is a new key or this is the last line, add the current key
			// and value to the HashMap. `line_number` conveniently is the next index, so
			// use that to our advantage.
			if !Self::next_line_extends_value(&lines, index) {
				// If no key exists, we've defined a paragraph (at the beginning of the control
				// file) with no key. This would be parsed at the very beginning, but the file
				// may have an unknown amount of comment lines, so we just do this here as a
				// normal step of the parsing stage.
				if current_key.is_none() {
					return Self::error(
						"No key defined for the currently indented line",
						Some(line_number),
					);
				}

				// Add the key and reset the `current_key` and `current_value` counters.
				data.insert(current_key.unwrap(), current_value);
				current_key = None;
				current_value = String::new();
			}
		}

		Ok(Self { data })
	}

	/// Get the underlying [`HashMap`] used in the generated [`TagSection`].
	pub fn hashmap(&self) -> &HashMap<String, String> { &self.data }

	/// Get the value of the specified key.
	pub fn get(&self, key: &str) -> Option<&String> { self.data.get(&key.to_string()) }
}

/// Parses a TagFile: these are files such as Debian `control` and `Packages`
/// files.
///
/// # Returns
/// * A [`Result`]: The [`Ok`] variant containing the vector of [`TagSection`]
///   objects if there was no issue parsing the file, and the [`Err`] variant if
///   there was.
pub fn parse_tagfile(content: &str) -> Result<Vec<TagSection>, ParserError> {
	let mut sections = vec![];
	let section_strings = content.split("\n\n");

	for (iter, section) in section_strings.clone().enumerate() {
		// If this section is empty (i.e. more than one empty line was placed between
		// each section), then ignore this section.
		if section.is_empty() || section.chars().all(|c| c == '\n') {
			break;
		}

		match TagSection::new(section) {
			Ok(section) => sections.push(section),
			Err(mut err) => {
				// If an error line was provided, add the number of lines in the sections before
				// this one. Otherwise no line was specified, and we'll just specify the number
				// of lines in the section before this one so we know which section the line is
				// in.
				let mut line_count = 0;

				for _ in 0..iter {
					// Add one for the line separation between each section.
					line_count += 1;

					// Add the line count in this section.
					line_count += section_strings.clone().count();
				}

				if let Some(line) = err.line {
					err.line = Some(line_count + line);
				} else {
					err.line = Some(line_count);
				}
			},
		}
	}

	Ok(sections)
}