rust_apt/
tagfile.rs

1//! Contains structs and functions to parse Debian-styled RFC 822 files.
2use core::iter::Iterator;
3use std::collections::HashMap;
4use std::fmt;
5
6#[derive(Debug)]
7/// The result of a parsing error.
8pub struct ParserError {
9	pub msg: String,
10	pub line: Option<usize>,
11}
12
13impl fmt::Display for ParserError {
14	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
15		if let Some(num) = self.line {
16			write!(f, "{} at line '{num}'", self.msg)?
17		} else {
18			write!(f, "{}", self.msg)?
19		}
20		Ok(())
21	}
22}
23
24impl std::error::Error for ParserError {}
25
26/// A section in a TagFile. A TagFile is made up of double-newline (`\n\n`)
27/// separated paragraphs, each of which make up one of these sections.
28#[derive(Debug)]
29pub struct TagSection {
30	data: HashMap<String, String>,
31}
32
33impl TagSection {
34	fn error(msg: &str, line: Option<usize>) -> Result<Self, ParserError> {
35		Err(ParserError {
36			msg: "E:".to_owned() + msg,
37			line,
38		})
39	}
40
41	fn line_is_key(line: &str) -> bool { !line.starts_with(' ') && !line.starts_with('\t') }
42
43	fn next_line_extends_value(lines: &[&str], current_line: usize) -> bool {
44		if let Some(next_line) = lines.get(current_line + 1) {
45			!Self::line_is_key(next_line)
46		} else {
47			false
48		}
49	}
50
51	/// Create a new [`TagSection`] instance.
52	/// # Returns
53	/// * A [`Result`]: The [`Ok`] variant if there was no issue parsing the
54	///   section, and the [`Err`] variant if there was.
55	pub fn new(section: &str) -> Result<Self, ParserError> {
56		// Make sure the string doesn't contain multiple sections.
57		if section.contains("\n\n") {
58			return Self::error("More than one section was found", None);
59		}
60
61		// Make sure the user didn't pass an empty string.
62		if section.is_empty() {
63			return Self::error("An empty string was passed", None);
64		}
65
66		// Start building up the HashMap.
67		let mut data = HashMap::new();
68		let lines = section.lines().collect::<Vec<&str>>();
69
70		// Variables used while parsing.
71		let mut current_key: Option<String> = None;
72		let mut current_value = String::new();
73
74		for (index, line) in lines.iter().enumerate() {
75			// Indexes start at 0, so increase by 1 to get the line number.
76			let line_number = index + 1;
77
78			// If this line starts with a comment ignore it.
79			if line.starts_with('#') {
80				continue;
81			}
82
83			// If this line is a new key, split the line into the key and its value.
84			if Self::line_is_key(line) {
85				let (key, value) = match line.split_once(':') {
86					Some((key, value)) => {
87						(key.to_string(), value.strip_prefix(' ').unwrap_or(value))
88					},
89					None => {
90						return Self::error(
91							"Line doesn't contain a ':' separator",
92							Some(line_number),
93						);
94					},
95				};
96
97				// Set the current key and value.
98				// If the value is empty, then this is a multiline field, and it's going to be
99				// one of these things:
100				// 1. A multiline field, in which case we want to add a
101				// newline to reflect such.
102				// 2. A key with an empty value, in which case it will
103				// be removed post-processing.
104				current_key = Some(key);
105
106				if value.is_empty() {
107					current_value = "\n".to_string();
108				} else {
109					current_value = value.to_string();
110
111					// If the next extends the value, add the newline before it.
112					if Self::next_line_extends_value(&lines, index) {
113						current_value += "\n";
114					}
115				}
116			}
117
118			// If this line is indented with spaces or tabs, add it to the current value.
119			// This should never end up running in conjunction with the above `if` block.
120			if line.starts_with(' ') || line.starts_with('\t') {
121				current_value += line;
122
123				// If the next line extends the value, add the newline. `line_number`
124				// conveniently is the next index, so use that to our advantage.
125				if Self::next_line_extends_value(&lines, index) {
126					current_value += "\n";
127				}
128			}
129
130			// If the next line is a new key or this is the last line, add the current key
131			// and value to the HashMap. `line_number` conveniently is the next index, so
132			// use that to our advantage.
133			if !Self::next_line_extends_value(&lines, index) {
134				// If no key exists, we've defined a paragraph (at the beginning of the control
135				// file) with no key. This would be parsed at the very beginning, but the file
136				// may have an unknown amount of comment lines, so we just do this here as a
137				// normal step of the parsing stage.
138				if current_key.is_none() {
139					return Self::error(
140						"No key defined for the currently indented line",
141						Some(line_number),
142					);
143				}
144
145				// Add the key and reset the `current_key` and `current_value` counters.
146				data.insert(current_key.unwrap(), current_value);
147				current_key = None;
148				current_value = String::new();
149			}
150		}
151
152		Ok(Self { data })
153	}
154
155	/// Get the underlying [`HashMap`] used in the generated [`TagSection`].
156	pub fn hashmap(&self) -> &HashMap<String, String> { &self.data }
157
158	/// Get the value of the specified key.
159	pub fn get(&self, key: &str) -> Option<&String> { self.data.get(key) }
160
161	/// Get the value of the specified key,
162	///
163	/// Returns specified default on failure.
164	pub fn get_default<'a, 'b: 'a>(&'a self, key: &str, default: &'b str) -> &str {
165		if let Some(value) = self.data.get(key) {
166			return value;
167		}
168		default
169	}
170}
171
172/// Parses a TagFile: these are files such as Debian `control` and `Packages`
173/// files.
174///
175/// # Returns
176/// * A [`Result`]: The [`Ok`] variant containing the vector of [`TagSection`]
177///   objects if there was no issue parsing the file, and the [`Err`] variant if
178///   there was.
179pub fn parse_tagfile(content: &str) -> Result<Vec<TagSection>, ParserError> {
180	let mut sections = vec![];
181	let section_strings = content.split("\n\n");
182
183	for (iter, section) in section_strings.clone().enumerate() {
184		// If this section is empty (i.e. more than one empty line was placed between
185		// each section), then ignore this section.
186		if section.is_empty() || section.chars().all(|c| c == '\n') {
187			break;
188		}
189
190		match TagSection::new(section) {
191			Ok(section) => sections.push(section),
192			Err(mut err) => {
193				// If an error line was provided, add the number of lines in the sections before
194				// this one. Otherwise no line was specified, and we'll just specify the number
195				// of lines in the section before this one so we know which section the line is
196				// in.
197				let mut line_count = 0;
198
199				for _ in 0..iter {
200					// Add one for the line separation between each section.
201					line_count += 1;
202
203					// Add the line count in this section.
204					line_count += section_strings.clone().count();
205				}
206
207				if let Some(line) = err.line {
208					err.line = Some(line_count + line);
209				} else {
210					err.line = Some(line_count);
211				}
212			},
213		}
214	}
215
216	Ok(sections)
217}