tugger_debian/
control.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5/*! Defines primitives in control files.
6
7See https://www.debian.org/doc/debian-policy/ch-controlfields.html
8for the canonical source of truth for how control files work.
9*/
10
11use std::{
12    borrow::Cow,
13    io::{BufRead, Write},
14};
15
16#[derive(Debug)]
17pub enum ControlError {
18    IoError(std::io::Error),
19    ParseError(String),
20}
21
22impl From<std::io::Error> for ControlError {
23    fn from(e: std::io::Error) -> Self {
24        Self::IoError(e)
25    }
26}
27
28impl std::fmt::Display for ControlError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            Self::IoError(inner) => write!(f, "I/O error: {}", inner),
32            Self::ParseError(msg) => write!(f, "parse error: {}", msg),
33        }
34    }
35}
36
37impl std::error::Error for ControlError {}
38
39/// A field value in a control file.
40#[derive(Clone, Debug)]
41pub enum ControlFieldValue<'a> {
42    Simple(Cow<'a, str>),
43    Folded(Cow<'a, str>),
44    Multiline(Cow<'a, str>),
45}
46
47impl<'a> ControlFieldValue<'a> {
48    /// Write this value to a writer.
49    pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
50        let data = match self {
51            Self::Simple(v) => v,
52            Self::Folded(v) => v,
53            Self::Multiline(v) => v,
54        };
55
56        writer.write_all(data.as_bytes())
57    }
58}
59
60impl<'a> From<Cow<'a, str>> for ControlFieldValue<'a> {
61    fn from(value: Cow<'a, str>) -> Self {
62        if value.contains('\n') {
63            if value.starts_with(' ') || value.starts_with('\t') {
64                ControlFieldValue::Multiline(value)
65            } else {
66                ControlFieldValue::Folded(value)
67            }
68        } else {
69            ControlFieldValue::Simple(value)
70        }
71    }
72}
73
74/// A field in a control file.
75#[derive(Clone, Debug)]
76pub struct ControlField<'a> {
77    name: Cow<'a, str>,
78    value: ControlFieldValue<'a>,
79}
80
81impl<'a> ControlField<'a> {
82    /// Construct an instance from a field name and typed value.
83    pub fn new(name: Cow<'a, str>, value: ControlFieldValue<'a>) -> Self {
84        Self { name, value }
85    }
86
87    /// Construct a field from a named key and string value.
88    ///
89    /// The type of the field value will be derived from the key name.
90    ///
91    /// Unknown keys will be rejected.
92    pub fn from_string_value(key: Cow<'a, str>, value: Cow<'a, str>) -> Result<Self, ControlError> {
93        let value = ControlFieldValue::from(value);
94
95        Ok(Self { name: key, value })
96    }
97
98    /// Write the contents of this field to a writer.
99    pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
100        writer.write_all(self.name.as_bytes())?;
101        writer.write_all(b": ")?;
102        self.value.write(writer)?;
103        writer.write_all(b"\n")
104    }
105}
106
107/// A paragraph in a control file.
108///
109/// A paragraph is an ordered series of control fields.
110#[derive(Clone, Debug, Default)]
111pub struct ControlParagraph<'a> {
112    fields: Vec<ControlField<'a>>,
113}
114
115impl<'a> ControlParagraph<'a> {
116    /// Add a `ControlField` to this instance.
117    pub fn add_field(&mut self, field: ControlField<'a>) {
118        self.fields.push(field);
119    }
120
121    /// Add a field defined via strings.
122    pub fn add_field_from_string(
123        &mut self,
124        name: Cow<'a, str>,
125        value: Cow<'a, str>,
126    ) -> Result<(), ControlError> {
127        self.fields
128            .push(ControlField::from_string_value(name, value)?);
129        Ok(())
130    }
131
132    /// Whether a named field is present in this paragraph.
133    pub fn has_field(&self, name: &str) -> bool {
134        self.fields.iter().any(|f| f.name == name)
135    }
136
137    /// Obtain the first field with a given name in this paragraph.
138    pub fn get_field(&self, name: &str) -> Option<&ControlField> {
139        self.fields.iter().find(|f| f.name == name)
140    }
141
142    /// Obtain a mutable reference to the first field with a given name.
143    pub fn get_field_mut(&mut self, name: &str) -> Option<&'a mut ControlField> {
144        self.fields.iter_mut().find(|f| f.name == name)
145    }
146
147    /// Serialize the paragraph to a writer.
148    pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
149        for field in &self.fields {
150            field.write(writer)?;
151        }
152
153        writer.write_all(b"\n")
154    }
155}
156
157/// A debian control file.
158///
159/// A control file is an ordered series of paragraphs.
160#[derive(Clone, Debug, Default)]
161pub struct ControlFile<'a> {
162    paragraphs: Vec<ControlParagraph<'a>>,
163}
164
165impl<'a> ControlFile<'a> {
166    /// Construct a new instance by parsing data from a reader.
167    pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self, ControlError> {
168        let mut paragraphs = Vec::new();
169        let mut current_paragraph = ControlParagraph::default();
170        let mut current_field: Option<String> = None;
171
172        loop {
173            let mut line = String::new();
174            let bytes_read = reader.read_line(&mut line)?;
175
176            let is_empty_line = line.trim().is_empty();
177            let is_indented = line.starts_with(' ') && line.len() > 1;
178
179            current_field = match (is_empty_line, current_field, is_indented) {
180                // We have a field on the stack and got an unindented line. This
181                // must be the beginning of a new field. Flush the current field.
182                (_, Some(v), false) => {
183                    let mut parts = v.splitn(2, ':');
184
185                    let name = parts.next().ok_or_else(|| {
186                        ControlError::ParseError(format!(
187                            "error parsing line '{}'; missing colon",
188                            line
189                        ))
190                    })?;
191                    let value = parts
192                        .next()
193                        .ok_or_else(|| {
194                            ControlError::ParseError(format!(
195                                "error parsing field '{}'; could not detect value",
196                                v
197                            ))
198                        })?
199                        .trim();
200
201                    current_paragraph.add_field_from_string(
202                        Cow::Owned(name.to_string()),
203                        Cow::Owned(value.to_string()),
204                    )?;
205
206                    if is_empty_line {
207                        None
208                    } else {
209                        Some(line)
210                    }
211                }
212
213                // If we're an empty line and no fields is on the stack, we're at
214                // the end of the paragraph with no field to flush. Just flush the
215                // paragraph if it is non-empty.
216                (true, _, _) => {
217                    if !current_paragraph.fields.is_empty() {
218                        paragraphs.push(current_paragraph);
219                        current_paragraph = ControlParagraph::default();
220                    }
221
222                    None
223                }
224                // We got a non-empty line and no field is currently being
225                // processed. This must be the start of a new field.
226                (false, None, _) => Some(line),
227                // We have a field on the stack and got an indented line. This
228                // must be a field value continuation. Add it to the current
229                // field.
230                (false, Some(v), true) => Some(v + &line),
231            };
232
233            // .read_line() indicates EOF by Ok(0).
234            if bytes_read == 0 {
235                break;
236            }
237        }
238
239        Ok(Self { paragraphs })
240    }
241
242    /// Parse a control file from a string.
243    pub fn parse_str(s: &str) -> Result<Self, ControlError> {
244        let mut reader = std::io::BufReader::new(s.as_bytes());
245        Self::parse_reader(&mut reader)
246    }
247
248    /// Add a paragraph to this control file.
249    pub fn add_paragraph(&mut self, p: ControlParagraph<'a>) {
250        self.paragraphs.push(p);
251    }
252
253    /// Obtain paragraphs in this control file.
254    pub fn paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
255        self.paragraphs.iter()
256    }
257
258    /// Serialize the control file to a writer.
259    pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
260        for p in &self.paragraphs {
261            p.write(writer)?;
262        }
263
264        // Paragraph writer adds additional line break. So no need to
265        // add another here.
266
267        Ok(())
268    }
269}
270
271/// Represents a `debian/control` file.
272///
273/// Specified at https://www.debian.org/doc/debian-policy/ch-controlfields.html#source-package-control-files-debian-control.
274#[derive(Default)]
275pub struct SourceControl<'a> {
276    general: ControlParagraph<'a>,
277    binaries: Vec<ControlParagraph<'a>>,
278}
279
280impl<'a> SourceControl<'a> {
281    /// Construct an instance by parsing a control file from a reader.
282    pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self, ControlError> {
283        let control = ControlFile::parse_reader(reader)?;
284
285        let mut paragraphs = control.paragraphs();
286
287        let general = paragraphs
288            .next()
289            .ok_or_else(|| {
290                ControlError::ParseError("no general paragraph in source control file".to_string())
291            })?
292            .to_owned();
293
294        let binaries = paragraphs.map(|x| x.to_owned()).collect();
295
296        Ok(Self { general, binaries })
297    }
298
299    pub fn parse_str(s: &str) -> Result<Self, ControlError> {
300        let mut reader = std::io::BufReader::new(s.as_bytes());
301        Self::parse_reader(&mut reader)
302    }
303
304    /// Obtain a handle on the general paragraph.
305    pub fn general_paragraph(&self) -> &ControlParagraph<'a> {
306        &self.general
307    }
308
309    /// Obtain an iterator over paragraphs defining binaries.
310    pub fn binary_paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
311        self.binaries.iter()
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use {super::*, anyhow::Result};
318
319    #[test]
320    fn test_parse_system_lists() -> Result<()> {
321        let paths = glob::glob("/var/lib/apt/lists/*_Packages")?
322            .chain(glob::glob("/var/lib/apt/lists/*_Sources")?)
323            .chain(glob::glob("/var/lib/apt/lists/*i18n_Translation-*")?);
324
325        for path in paths {
326            let path = path?;
327
328            eprintln!("parsing {}", path.display());
329            let fh = std::fs::File::open(&path)?;
330            let mut reader = std::io::BufReader::new(fh);
331
332            ControlFile::parse_reader(&mut reader)?;
333        }
334
335        Ok(())
336    }
337}