1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
//! Contains structs and functions to parse Debian-styled RFC 822 files.
use core::iter::Iterator;
use std::collections::HashMap;
#[derive(Debug)]
/// The result of a parsing error.
pub struct ParserError {
pub msg: String,
pub line: Option<usize>,
}
/// A section in a TagFile. A TagFile is made up of double-newline (`\n\n`)
/// separated paragraphs, each of which make up one of these sections.
pub struct TagSection {
data: HashMap<String, String>,
}
impl TagSection {
fn error(msg: &str, line: Option<usize>) -> Result<Self, ParserError> {
Err(ParserError {
msg: "E:".to_owned() + msg,
line,
})
}
fn line_is_key(line: &str) -> bool { !line.starts_with(' ') && !line.starts_with('\t') }
fn next_line_extends_value(lines: &[&str], current_line: usize) -> bool {
if let Some(next_line) = lines.get(current_line + 1) {
!Self::line_is_key(next_line)
} else {
false
}
}
/// Create a new [`TagSection`] instance.
/// # Returns
/// * A [`Result`]: The [`Ok`] variant if there was no issue parsing the
/// section, and the [`Err`] variant if there was.
pub fn new(section: &str) -> Result<Self, ParserError> {
// Make sure the string doesn't contain multiple sections.
if section.contains("\n\n") {
return Self::error("More than one section was found", None);
}
// Make sure the user didn't pass an empty string.
if section.is_empty() {
return Self::error("An empty string was passed", None);
}
// Start building up the HashMap.
let mut data = HashMap::new();
let lines = section.lines().collect::<Vec<&str>>();
// Variables used while parsing.
let mut current_key: Option<String> = None;
let mut current_value = String::new();
for (index, line) in lines.iter().enumerate() {
// Indexes start at 0, so increase by 1 to get the line number.
let line_number = index + 1;
// If this line starts with a comment ignore it.
if line.starts_with('#') {
continue;
}
// If this line is a new key, split the line into the key and its value.
if Self::line_is_key(line) {
let (key, value) = match line.split_once(':') {
Some((key, value)) => {
(key.to_string(), value.strip_prefix(' ').unwrap_or(value))
},
None => {
return Self::error(
"Line doesn't contain a ':' separator",
Some(line_number),
)
},
};
// Set the current key and value.
// If the value is empty, then this is a multiline field, and it's going to be
// one of these things:
// 1. A multiline field, in which case we want to add a
// newline to reflect such.
// 2. A key with an empty value, in which case it will
// be removed post-processing.
current_key = Some(key);
if value.is_empty() {
current_value = "\n".to_string();
} else {
current_value = value.to_string();
// If the next extends the value, add the newline before it.
if Self::next_line_extends_value(&lines, index) {
current_value += "\n";
}
}
}
// If this line is indented with spaces or tabs, add it to the current value.
// This should never end up running in conjunction with the above `if` block.
if line.starts_with(' ') || line.starts_with('\t') {
current_value += line;
// If the next line extends the value, add the newline. `line_number`
// conveniently is the next index, so use that to our advantage.
if Self::next_line_extends_value(&lines, index) {
current_value += "\n";
}
}
// If the next line is a new key or this is the last line, add the current key
// and value to the HashMap. `line_number` conveniently is the next index, so
// use that to our advantage.
if !Self::next_line_extends_value(&lines, index) {
// If no key exists, we've defined a paragraph (at the beginning of the control
// file) with no key. This would be parsed at the very beginning, but the file
// may have an unknown amount of comment lines, so we just do this here as a
// normal step of the parsing stage.
if current_key.is_none() {
return Self::error(
"No key defined for the currently indented line",
Some(line_number),
);
}
// Add the key and reset the `current_key` and `current_value` counters.
data.insert(current_key.unwrap(), current_value);
current_key = None;
current_value = String::new();
}
}
Ok(Self { data })
}
/// Get the underlying [`HashMap`] used in the generated [`TagSection`].
pub fn hashmap(&self) -> &HashMap<String, String> { &self.data }
/// Get the value of the specified key.
pub fn get(&self, key: &str) -> Option<&String> { self.data.get(&key.to_string()) }
}
/// Parses a TagFile: these are files such as Debian `control` and `Packages`
/// files.
///
/// # Returns
/// * A [`Result`]: The [`Ok`] variant containing the vector of [`TagSection`]
/// objects if there was no issue parsing the file, and the [`Err`] variant if
/// there was.
pub fn parse_tagfile(content: &str) -> Result<Vec<TagSection>, ParserError> {
let mut sections = vec![];
let section_strings = content.split("\n\n");
for (iter, section) in section_strings.clone().enumerate() {
// If this section is empty (i.e. more than one empty line was placed between
// each section), then ignore this section.
if section.is_empty() || section.chars().all(|c| c == '\n') {
break;
}
match TagSection::new(section) {
Ok(section) => sections.push(section),
Err(mut err) => {
// If an error line was provided, add the number of lines in the sections before
// this one. Otherwise no line was specified, and we'll just specify the number
// of lines in the section before this one so we know which section the line is
// in.
let mut line_count = 0;
for _ in 0..iter {
// Add one for the line separation between each section.
line_count += 1;
// Add the line count in this section.
line_count += section_strings.clone().count();
}
if let Some(line) = err.line {
err.line = Some(line_count + line);
} else {
err.line = Some(line_count);
}
},
}
}
Ok(sections)
}