use std::{
borrow::Cow,
io::{BufRead, Write},
};
#[derive(Debug)]
pub enum ControlError {
IoError(std::io::Error),
ParseError(String),
}
impl From<std::io::Error> for ControlError {
fn from(e: std::io::Error) -> Self {
Self::IoError(e)
}
}
impl std::fmt::Display for ControlError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::IoError(inner) => write!(f, "I/O error: {}", inner),
Self::ParseError(msg) => write!(f, "parse error: {}", msg),
}
}
}
impl std::error::Error for ControlError {}
#[derive(Clone, Debug)]
pub enum ControlFieldValue<'a> {
Simple(Cow<'a, str>),
Folded(Cow<'a, str>),
Multiline(Cow<'a, str>),
}
impl<'a> ControlFieldValue<'a> {
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
let data = match self {
Self::Simple(v) => v,
Self::Folded(v) => v,
Self::Multiline(v) => v,
};
writer.write_all(data.as_bytes())
}
}
impl<'a> From<Cow<'a, str>> for ControlFieldValue<'a> {
fn from(value: Cow<'a, str>) -> Self {
if value.contains('\n') {
if value.starts_with(' ') || value.starts_with('\t') {
ControlFieldValue::Multiline(value)
} else {
ControlFieldValue::Folded(value)
}
} else {
ControlFieldValue::Simple(value)
}
}
}
#[derive(Clone, Debug)]
pub struct ControlField<'a> {
name: Cow<'a, str>,
value: ControlFieldValue<'a>,
}
impl<'a> ControlField<'a> {
pub fn new(name: Cow<'a, str>, value: ControlFieldValue<'a>) -> Self {
Self { name, value }
}
pub fn from_string_value(key: Cow<'a, str>, value: Cow<'a, str>) -> Result<Self, ControlError> {
let value = ControlFieldValue::from(value);
Ok(Self { name: key, value })
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
writer.write_all(self.name.as_bytes())?;
writer.write_all(b": ")?;
self.value.write(writer)?;
writer.write_all(b"\n")
}
}
#[derive(Clone, Debug, Default)]
pub struct ControlParagraph<'a> {
fields: Vec<ControlField<'a>>,
}
impl<'a> ControlParagraph<'a> {
pub fn add_field(&mut self, field: ControlField<'a>) {
self.fields.push(field);
}
pub fn add_field_from_string(
&mut self,
name: Cow<'a, str>,
value: Cow<'a, str>,
) -> Result<(), ControlError> {
self.fields
.push(ControlField::from_string_value(name, value)?);
Ok(())
}
pub fn has_field(&self, name: &str) -> bool {
self.fields.iter().any(|f| f.name == name)
}
pub fn get_field(&self, name: &str) -> Option<&ControlField> {
self.fields.iter().find(|f| f.name == name)
}
pub fn get_field_mut(&mut self, name: &str) -> Option<&'a mut ControlField> {
self.fields.iter_mut().find(|f| f.name == name)
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
for field in &self.fields {
field.write(writer)?;
}
writer.write_all(b"\n")
}
}
#[derive(Clone, Debug, Default)]
pub struct ControlFile<'a> {
paragraphs: Vec<ControlParagraph<'a>>,
}
impl<'a> ControlFile<'a> {
pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self, ControlError> {
let mut paragraphs = Vec::new();
let mut current_paragraph = ControlParagraph::default();
let mut current_field: Option<String> = None;
loop {
let mut line = String::new();
let bytes_read = reader.read_line(&mut line)?;
let is_empty_line = line.trim().is_empty();
let is_indented = line.starts_with(' ') && line.len() > 1;
current_field = match (is_empty_line, current_field, is_indented) {
(_, Some(v), false) => {
let mut parts = v.splitn(2, ':');
let name = parts.next().ok_or_else(|| {
ControlError::ParseError(format!(
"error parsing line '{}'; missing colon",
line
))
})?;
let value = parts
.next()
.ok_or_else(|| {
ControlError::ParseError(format!(
"error parsing field '{}'; could not detect value",
v
))
})?
.trim();
current_paragraph.add_field_from_string(
Cow::Owned(name.to_string()),
Cow::Owned(value.to_string()),
)?;
if is_empty_line {
None
} else {
Some(line)
}
}
(true, _, _) => {
if !current_paragraph.fields.is_empty() {
paragraphs.push(current_paragraph);
current_paragraph = ControlParagraph::default();
}
None
}
(false, None, _) => Some(line),
(false, Some(v), true) => Some(v + &line),
};
if bytes_read == 0 {
break;
}
}
Ok(Self { paragraphs })
}
pub fn parse_str(s: &str) -> Result<Self, ControlError> {
let mut reader = std::io::BufReader::new(s.as_bytes());
Self::parse_reader(&mut reader)
}
pub fn add_paragraph(&mut self, p: ControlParagraph<'a>) {
self.paragraphs.push(p);
}
pub fn paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
self.paragraphs.iter()
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
for p in &self.paragraphs {
p.write(writer)?;
}
Ok(())
}
}
#[derive(Default)]
pub struct SourceControl<'a> {
general: ControlParagraph<'a>,
binaries: Vec<ControlParagraph<'a>>,
}
impl<'a> SourceControl<'a> {
pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self, ControlError> {
let control = ControlFile::parse_reader(reader)?;
let mut paragraphs = control.paragraphs();
let general = paragraphs
.next()
.ok_or_else(|| {
ControlError::ParseError("no general paragraph in source control file".to_string())
})?
.to_owned();
let binaries = paragraphs.map(|x| x.to_owned()).collect();
Ok(Self { general, binaries })
}
pub fn parse_str(s: &str) -> Result<Self, ControlError> {
let mut reader = std::io::BufReader::new(s.as_bytes());
Self::parse_reader(&mut reader)
}
pub fn general_paragraph(&self) -> &ControlParagraph<'a> {
&self.general
}
pub fn binary_paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
self.binaries.iter()
}
}
#[cfg(test)]
mod tests {
use {super::*, anyhow::Result};
#[test]
fn test_parse_system_lists() -> Result<()> {
let paths = glob::glob("/var/lib/apt/lists/*_Packages")?
.chain(glob::glob("/var/lib/apt/lists/*_Sources")?)
.chain(glob::glob("/var/lib/apt/lists/*i18n_Translation-*")?);
for path in paths {
let path = path?;
eprintln!("parsing {}", path.display());
let fh = std::fs::File::open(&path)?;
let mut reader = std::io::BufReader::new(fh);
ControlFile::parse_reader(&mut reader)?;
}
Ok(())
}
}