rust_apt/
tagfile.rs

1//! Contains structs and functions to parse Debian-styled RFC 822 files.
2use core::iter::Iterator;
3use std::collections::HashMap;
4use std::fmt;
5
6#[derive(Debug)]
7/// The result of a parsing error.
8pub struct ParserError {
9	pub msg: String,
10	pub line: Option<usize>,
11}
12
13impl fmt::Display for ParserError {
14	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
15		if let Some(num) = self.line {
16			write!(f, "{} at line '{num}'", self.msg)?
17		} else {
18			write!(f, "{}", self.msg)?
19		}
20		Ok(())
21	}
22}
23
24impl std::error::Error for ParserError {}
25
26/// A section in a TagFile. A TagFile is made up of double-newline (`\n\n`)
27/// separated paragraphs, each of which make up one of these sections.
28#[derive(Debug)]
29pub struct TagSection {
30	data: HashMap<String, String>,
31}
32
33impl From<TagSection> for HashMap<String, String> {
34	fn from(value: TagSection) -> Self { value.data }
35}
36
37impl TagSection {
38	fn error(msg: &str, line: Option<usize>) -> Result<Self, ParserError> {
39		Err(ParserError {
40			msg: "E:".to_owned() + msg,
41			line,
42		})
43	}
44
45	fn line_is_key(line: &str) -> bool { !line.starts_with(' ') && !line.starts_with('\t') }
46
47	fn next_line_extends_value(lines: &[&str], current_line: usize) -> bool {
48		if let Some(next_line) = lines.get(current_line + 1) {
49			!Self::line_is_key(next_line)
50		} else {
51			false
52		}
53	}
54
55	/// Create a new [`TagSection`] instance.
56	/// # Returns
57	/// * A [`Result`]: The [`Ok`] variant if there was no issue parsing the
58	///   section, and the [`Err`] variant if there was.
59	pub fn new(section: &str) -> Result<Self, ParserError> {
60		// Make sure the string doesn't contain multiple sections.
61		if section.contains("\n\n") {
62			return Self::error("More than one section was found", None);
63		}
64
65		// Make sure the user didn't pass an empty string.
66		if section.is_empty() {
67			return Self::error("An empty string was passed", None);
68		}
69
70		// Start building up the HashMap.
71		let mut data = HashMap::new();
72		let lines = section.lines().collect::<Vec<&str>>();
73
74		// Variables used while parsing.
75		let mut current_key: Option<String> = None;
76		let mut current_value = String::new();
77
78		for (index, line) in lines.iter().enumerate() {
79			// Indexes start at 0, so increase by 1 to get the line number.
80			let line_number = index + 1;
81
82			// If this line starts with a comment ignore it.
83			if line.starts_with('#') {
84				continue;
85			}
86
87			// If this line is a new key, split the line into the key and its value.
88			if Self::line_is_key(line) {
89				let (key, value) = match line.split_once(':') {
90					Some((key, value)) => {
91						(key.to_string(), value.strip_prefix(' ').unwrap_or(value))
92					},
93					None => {
94						return Self::error(
95							"Line doesn't contain a ':' separator",
96							Some(line_number),
97						);
98					},
99				};
100
101				// Set the current key and value.
102				// If the value is empty, then this is a multiline field, and it's going to be
103				// one of these things:
104				// 1. A multiline field, in which case we want to add a
105				// newline to reflect such.
106				// 2. A key with an empty value, in which case it will
107				// be removed post-processing.
108				current_key = Some(key);
109
110				if value.is_empty() {
111					current_value = "\n".to_string();
112				} else {
113					current_value = value.to_string();
114
115					// If the next extends the value, add the newline before it.
116					if Self::next_line_extends_value(&lines, index) {
117						current_value += "\n";
118					}
119				}
120			}
121
122			// If this line is indented with spaces or tabs, add it to the current value.
123			// This should never end up running in conjunction with the above `if` block.
124			if line.starts_with(' ') || line.starts_with('\t') {
125				current_value += line;
126
127				// If the next line extends the value, add the newline. `line_number`
128				// conveniently is the next index, so use that to our advantage.
129				if Self::next_line_extends_value(&lines, index) {
130					current_value += "\n";
131				}
132			}
133
134			// If the next line is a new key or this is the last line, add the current key
135			// and value to the HashMap. `line_number` conveniently is the next index, so
136			// use that to our advantage.
137			if !Self::next_line_extends_value(&lines, index) {
138				// If no key exists, we've defined a paragraph (at the beginning of the control
139				// file) with no key. This would be parsed at the very beginning, but the file
140				// may have an unknown amount of comment lines, so we just do this here as a
141				// normal step of the parsing stage.
142				if current_key.is_none() {
143					return Self::error(
144						"No key defined for the currently indented line",
145						Some(line_number),
146					);
147				}
148
149				// Add the key and reset the `current_key` and `current_value` counters.
150				data.insert(current_key.unwrap(), current_value);
151				current_key = None;
152				current_value = String::new();
153			}
154		}
155
156		Ok(Self { data })
157	}
158
159	/// Get the underlying [`HashMap`] used in the generated [`TagSection`].
160	pub fn hashmap(&self) -> &HashMap<String, String> { &self.data }
161
162	/// Get the value of the specified key.
163	pub fn get(&self, key: &str) -> Option<&String> { self.data.get(key) }
164
165	/// Get the value of the specified key,
166	///
167	/// Returns specified default on failure.
168	pub fn get_default<'a>(&'a self, key: &str, default: &'a str) -> &'a str {
169		if let Some(value) = self.data.get(key) {
170			return value;
171		}
172		default
173	}
174}
175
176/// Parses a TagFile: these are files such as Debian `control` and `Packages`
177/// files.
178///
179/// # Returns
180/// * A [`Result`]: The [`Ok`] variant containing the vector of [`TagSection`]
181///   objects if there was no issue parsing the file, and the [`Err`] variant if
182///   there was.
183pub fn parse_tagfile(content: &str) -> Result<Vec<TagSection>, ParserError> {
184	let mut sections = vec![];
185	let section_strings = content.split("\n\n");
186
187	for (iter, section) in section_strings.clone().enumerate() {
188		// If this section is empty (i.e. more than one empty line was placed between
189		// each section), then ignore this section.
190		if section.is_empty() || section.chars().all(|c| c == '\n') {
191			break;
192		}
193
194		match TagSection::new(section) {
195			Ok(section) => sections.push(section),
196			Err(mut err) => {
197				// If an error line was provided, add the number of lines in the sections before
198				// this one. Otherwise no line was specified, and we'll just specify the number
199				// of lines in the section before this one so we know which section the line is
200				// in.
201				let mut line_count = 0;
202
203				for _ in 0..iter {
204					// Add one for the line separation between each section.
205					line_count += 1;
206
207					// Add the line count in this section.
208					line_count += section_strings.clone().count();
209				}
210
211				if let Some(line) = err.line {
212					err.line = Some(line_count + line);
213				} else {
214					err.line = Some(line_count);
215				}
216			},
217		}
218	}
219
220	Ok(sections)
221}