use {
crate::{
dependency::DependencyList,
error::{DebianError, Result},
},
chrono::{DateTime, TimeZone, Utc},
futures::{AsyncBufRead, AsyncBufReadExt},
pin_project::pin_project,
std::{
borrow::Cow,
collections::HashMap,
io::{BufRead, Write},
str::FromStr,
},
};
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum ControlFieldValue<'a> {
Simple(Cow<'a, str>),
Folded(Cow<'a, str>),
Multiline(Cow<'a, str>),
}
impl<'a> AsRef<Cow<'a, str>> for ControlFieldValue<'a> {
fn as_ref(&self) -> &Cow<'a, str> {
match self {
Self::Simple(v) => v,
Self::Folded(v) => v,
Self::Multiline(v) => v,
}
}
}
impl<'a> ControlFieldValue<'a> {
pub fn iter_lines(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
match self {
Self::Simple(v) => Box::new([v.as_ref()].into_iter()),
Self::Folded(values) => Box::new(values.lines().map(|x| x.trim_start())),
Self::Multiline(values) => Box::new(values.lines().map(|x| x.trim_start())),
}
}
pub fn iter_words(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
Box::new(self.as_ref().split_ascii_whitespace())
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
let data = match self {
Self::Simple(v) => v,
Self::Folded(v) => v,
Self::Multiline(v) => v,
};
writer.write_all(data.as_bytes())
}
pub fn into_inner(self) -> Cow<'a, str> {
match self {
Self::Simple(v) => v,
Self::Folded(v) => v,
Self::Multiline(v) => v,
}
}
pub fn to_control_field(self, name: Cow<'a, str>) -> ControlField<'a> {
ControlField::new(name, self.into_inner())
}
}
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ControlField<'a> {
name: Cow<'a, str>,
value: Cow<'a, str>,
}
impl<'a> ControlField<'a> {
pub fn new(name: Cow<'a, str>, value: Cow<'a, str>) -> Self {
Self { name, value }
}
pub fn from_lines(name: Cow<'a, str>, lines: impl Iterator<Item = String>) -> Self {
let value = lines
.enumerate()
.map(|(i, line)| if i == 0 { line } else { format!(" {}", line) })
.collect::<Vec<_>>()
.join("\n")
.into();
Self { name, value }
}
pub fn name(&self) -> &str {
self.name.as_ref()
}
pub fn value_str(&self) -> &str {
self.value.as_ref()
}
pub fn as_simple(&self) -> Result<ControlFieldValue<'a>> {
if self.value.as_ref().contains('\n') {
Err(DebianError::ControlSimpleValueNoMultiline)
} else {
Ok(ControlFieldValue::Simple(self.value.clone()))
}
}
pub fn as_folded(&self) -> ControlFieldValue<'a> {
ControlFieldValue::Folded(self.value.clone())
}
pub fn as_multiline(&self) -> ControlFieldValue<'a> {
ControlFieldValue::Multiline(self.value.clone())
}
pub fn iter_words(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
Box::new(self.value.as_ref().split_ascii_whitespace())
}
pub fn iter_lines(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
Box::new(self.value.lines().map(|x| x.trim_start()))
}
pub fn iter_comma_delimited(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
Box::new(self.value.as_ref().split(',').map(|v| v.trim()))
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
writer.write_all(self.name.as_bytes())?;
writer.write_all(b": ")?;
writer.write_all(self.value.as_ref().as_bytes())?;
writer.write_all(b"\n")
}
}
impl<'a> ToString for ControlField<'a> {
fn to_string(&self) -> String {
format!("{}: {}\n", self.name, self.value_str())
}
}
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ControlParagraph<'a> {
fields: Vec<ControlField<'a>>,
}
impl<'a> ControlParagraph<'a> {
pub fn is_empty(&self) -> bool {
self.fields.is_empty()
}
pub fn set_field(&mut self, field: ControlField<'a>) {
self.fields
.retain(|cf| cf.name.to_lowercase() != field.name.to_lowercase());
self.fields.push(field);
}
pub fn set_field_from_string(&mut self, name: Cow<'a, str>, value: Cow<'a, str>) {
self.set_field(ControlField::new(name, value));
}
pub fn has_field(&self, name: &str) -> bool {
self.field(name).is_some()
}
pub fn iter_fields(&self) -> impl Iterator<Item = &ControlField<'a>> {
self.fields.iter()
}
pub fn field(&self, name: &str) -> Option<&'_ ControlField<'a>> {
self.fields
.iter()
.find(|f| f.name.as_ref().to_lowercase() == name.to_lowercase())
}
pub fn field_mut(&mut self, name: &str) -> Option<&'a mut ControlField> {
self.fields
.iter_mut()
.find(|f| f.name.as_ref().to_lowercase() == name.to_lowercase())
}
pub fn required_field(&self, name: &str) -> Result<&'_ ControlField<'a>> {
self.field(name)
.ok_or_else(|| DebianError::ControlRequiredFieldMissing(name.to_string()))
}
pub fn field_str(&self, name: &str) -> Option<&str> {
self.field(name).map(|f| f.value_str())
}
pub fn required_field_str(&self, name: &str) -> Result<&str> {
Ok(self.required_field(name)?.value_str())
}
pub fn field_bool(&self, name: &str) -> Option<bool> {
self.field_str(name).map(|v| matches!(v, "yes"))
}
pub fn field_u64(&self, name: &str) -> Option<Result<u64>> {
self.field_str(name).map(|x| {
u64::from_str(x).map_err(|e| DebianError::ControlFieldIntParse(name.to_string(), e))
})
}
pub fn field_dependency_list(&self, name: &str) -> Option<Result<DependencyList>> {
self.field_str(name).map(DependencyList::parse)
}
pub fn field_datetime_rfc5322(&self, name: &str) -> Option<Result<DateTime<Utc>>> {
self.field_str(name)
.map(|v| Ok(Utc.timestamp(mailparse::dateparse(v)?, 0)))
}
pub fn field_simple(&self, name: &str) -> Option<Result<ControlFieldValue<'a>>> {
self.field(name).map(|cf| cf.as_simple())
}
pub fn field_folded(&self, name: &str) -> Option<ControlFieldValue<'a>> {
self.field(name).map(|cf| cf.as_folded())
}
pub fn field_multiline(&self, name: &str) -> Option<ControlFieldValue<'a>> {
self.field(name).map(|cf| cf.as_multiline())
}
pub fn iter_field_words(&self, name: &str) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
self.field(name)
.map(|f| Box::new(f.value.split_ascii_whitespace()) as Box<dyn Iterator<Item = &str>>)
}
pub fn iter_field_lines(&self, name: &str) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
self.field(name).map(|f| f.iter_lines())
}
pub fn iter_field_comma_delimited(
&self,
name: &str,
) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
self.field(name).map(|f| f.iter_comma_delimited())
}
pub fn as_str_hash_map(&self) -> HashMap<&str, &str> {
HashMap::from_iter(
self.fields
.iter()
.map(|field| (field.name.as_ref(), field.value_str())),
)
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
for field in &self.fields {
field.write(writer)?;
}
Ok(())
}
}
impl<'a> ToString for ControlParagraph<'a> {
fn to_string(&self) -> String {
let fields = self
.fields
.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>();
fields.join("")
}
}
#[derive(Clone, Debug, Default)]
pub struct ControlFileParser {
paragraph: ControlParagraph<'static>,
field: Option<String>,
}
impl ControlFileParser {
pub fn write_line(&mut self, line: &str) -> Result<Option<ControlParagraph<'static>>> {
let is_empty_line = line.trim().is_empty();
let is_indented = line.starts_with(' ') && line.len() > 1;
let current_field = self.field.take();
if is_empty_line {
if let Some(field) = current_field {
self.flush_field(field)?;
}
return Ok(if self.paragraph.is_empty() {
None
} else {
let para = self.paragraph.clone();
self.paragraph = ControlParagraph::default();
Some(para)
});
}
match (current_field, is_indented) {
(Some(v), false) => {
self.flush_field(v)?;
self.field = if is_empty_line {
None
} else {
Some(line.to_string())
};
Ok(None)
}
(None, _) => {
self.field = Some(line.to_string());
Ok(None)
}
(Some(v), true) => {
self.field = Some(v + line);
Ok(None)
}
}
}
pub fn finish(mut self) -> Result<Option<ControlParagraph<'static>>> {
if let Some(field) = self.field.take() {
self.flush_field(field)?;
}
Ok(if self.paragraph.is_empty() {
None
} else {
Some(self.paragraph)
})
}
fn flush_field(&mut self, v: String) -> Result<()> {
let mut parts = v.splitn(2, ':');
let name = parts.next().ok_or_else(|| {
DebianError::ControlParseError(format!("error parsing line '{}'; missing colon", v))
})?;
let value = parts
.next()
.ok_or_else(|| {
DebianError::ControlParseError(format!(
"error parsing field '{}'; could not detect value",
v
))
})?
.trim();
self.paragraph
.set_field_from_string(Cow::Owned(name.to_string()), Cow::Owned(value.to_string()));
Ok(())
}
}
pub struct ControlParagraphReader<R: BufRead> {
reader: R,
parser: Option<ControlFileParser>,
}
impl<R: BufRead> ControlParagraphReader<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
parser: Some(ControlFileParser::default()),
}
}
pub fn into_inner(self) -> R {
self.reader
}
fn get_next(&mut self) -> Result<Option<ControlParagraph<'static>>> {
let mut parser = self.parser.take().unwrap();
loop {
let mut line = String::new();
let bytes_read = self.reader.read_line(&mut line)?;
if bytes_read != 0 {
if let Some(paragraph) = parser.write_line(&line)? {
self.parser.replace(parser);
return Ok(Some(paragraph));
}
} else {
return if let Some(paragraph) = parser.finish()? {
Ok(Some(paragraph))
} else {
Ok(None)
};
}
}
}
}
impl<R: BufRead> Iterator for ControlParagraphReader<R> {
type Item = Result<ControlParagraph<'static>>;
fn next(&mut self) -> Option<Self::Item> {
if self.parser.is_none() {
None
} else {
match self.get_next() {
Ok(Some(para)) => Some(Ok(para)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
}
#[pin_project]
pub struct ControlParagraphAsyncReader<R> {
#[pin]
reader: R,
parser: Option<ControlFileParser>,
}
impl<R> ControlParagraphAsyncReader<R>
where
R: AsyncBufRead + Unpin,
{
pub fn new(reader: R) -> Self {
Self {
reader,
parser: Some(ControlFileParser::default()),
}
}
pub fn into_inner(self) -> R {
self.reader
}
pub async fn read_paragraph(&mut self) -> Result<Option<ControlParagraph<'static>>> {
let mut parser = if let Some(parser) = self.parser.take() {
parser
} else {
return Ok(None);
};
loop {
let mut line = String::new();
let bytes_read = self.reader.read_line(&mut line).await?;
if bytes_read != 0 {
if let Some(paragraph) = parser.write_line(&line)? {
self.parser.replace(parser);
return Ok(Some(paragraph));
}
} else {
return if let Some(paragraph) = parser.finish()? {
Ok(Some(paragraph))
} else {
Ok(None)
};
}
}
}
}
#[derive(Clone, Debug, Default)]
pub struct ControlFile<'a> {
paragraphs: Vec<ControlParagraph<'a>>,
}
impl<'a> ControlFile<'a> {
pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self> {
let mut paragraphs = Vec::new();
let mut parser = ControlFileParser::default();
loop {
let mut line = String::new();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break;
}
if let Some(paragraph) = parser.write_line(&line)? {
paragraphs.push(paragraph);
}
}
if let Some(paragraph) = parser.finish()? {
paragraphs.push(paragraph);
}
Ok(Self { paragraphs })
}
pub fn parse_str(s: &str) -> Result<Self> {
let mut reader = std::io::BufReader::new(s.as_bytes());
Self::parse_reader(&mut reader)
}
pub fn add_paragraph(&mut self, p: ControlParagraph<'a>) {
self.paragraphs.push(p);
}
pub fn paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
self.paragraphs.iter()
}
pub fn into_paragraphs(self) -> impl Iterator<Item = ControlParagraph<'a>> {
self.paragraphs.into_iter()
}
pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
for p in &self.paragraphs {
p.write(writer)?;
writer.write_all(b"\n")?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn control_paragraph_field_semantics() {
let mut p = ControlParagraph::default();
p.set_field_from_string("foo".into(), "bar".into());
p.set_field_from_string("foo".into(), "baz".into());
assert_eq!(p.field("foo").unwrap().value, "baz");
p.set_field_from_string("FOO".into(), "bar".into());
assert_eq!(p.field("foo").unwrap().value, "bar");
assert_eq!(p.field("FOO").unwrap().value, "bar");
}
#[test]
fn parse_paragraph_release() -> Result<()> {
let paragraphs = ControlParagraphReader::new(std::io::Cursor::new(include_bytes!(
"testdata/release-debian-bullseye"
)))
.collect::<Result<Vec<_>>>()?;
assert_eq!(paragraphs.len(), 1);
let p = ¶graphs[0];
assert_eq!(p.fields.len(), 14);
assert!(p.has_field("Origin"));
assert!(p.has_field("Version"));
assert!(!p.has_field("Missing"));
assert!(p.field("Version").is_some());
let fields = &p.fields;
assert_eq!(fields[0].name, "Origin");
assert_eq!(fields[0].value, "Debian");
assert_eq!(fields[3].name, "Version");
assert_eq!(fields[3].value, "11.1");
let ml = p.field_multiline("MD5Sum").unwrap();
assert_eq!(ml.iter_lines().count(), 600);
assert_eq!(
ml.iter_lines().next().unwrap(),
"7fdf4db15250af5368cc52a91e8edbce 738242 contrib/Contents-all"
);
assert!(p.field_multiline("SHA256").is_some());
assert_eq!(fields[0].iter_words().collect::<Vec<_>>(), vec!["Debian"]);
let values = p
.field_multiline("MD5Sum")
.unwrap()
.iter_lines()
.map(|x| x.to_string())
.collect::<Vec<_>>();
assert_eq!(values.len(), 600);
assert_eq!(
values[0],
"7fdf4db15250af5368cc52a91e8edbce 738242 contrib/Contents-all"
);
assert_eq!(
values[1],
"cbd7bc4d3eb517ac2b22f929dfc07b47 57319 contrib/Contents-all.gz"
);
assert_eq!(
values[599],
"e3830f6fc5a946b5a5b46e8277e1d86f 80488 non-free/source/Sources.xz"
);
let values = p
.field_multiline("SHA256")
.unwrap()
.iter_lines()
.map(|x| x.to_string())
.collect::<Vec<_>>();
assert_eq!(values.len(), 600);
assert_eq!(
values[0],
"3957f28db16e3f28c7b34ae84f1c929c567de6970f3f1b95dac9b498dd80fe63 738242 contrib/Contents-all",
);
assert_eq!(
values[1],
"3e9a121d599b56c08bc8f144e4830807c77c29d7114316d6984ba54695d3db7b 57319 contrib/Contents-all.gz",
);
assert_eq!(values[599], "30f3f996941badb983141e3b29b2ed5941d28cf81f9b5f600bb48f782d386fc7 80488 non-free/source/Sources.xz");
Ok(())
}
#[test]
fn test_parse_system_lists() -> Result<()> {
let paths = glob::glob("/var/lib/apt/lists/*_Packages")
.unwrap()
.chain(glob::glob("/var/lib/apt/lists/*_Sources").unwrap())
.chain(glob::glob("/var/lib/apt/lists/*i18n_Translation-*").unwrap());
for path in paths {
let path = path.unwrap();
eprintln!("parsing {}", path.display());
let fh = std::fs::File::open(&path)?;
let reader = std::io::BufReader::new(fh);
for para in ControlParagraphReader::new(reader) {
para?;
}
}
Ok(())
}
}