1use {
12 crate::{
13 dependency::DependencyList,
14 error::{DebianError, Result},
15 },
16 chrono::{DateTime, TimeZone, Utc},
17 futures::{AsyncBufRead, AsyncBufReadExt},
18 pin_project::pin_project,
19 std::{
20 borrow::Cow,
21 collections::HashMap,
22 fmt::Display,
23 io::{BufRead, Write},
24 str::FromStr,
25 },
26};
27
28#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
37pub enum ControlFieldValue<'a> {
38 Simple(Cow<'a, str>),
39 Folded(Cow<'a, str>),
40 Multiline(Cow<'a, str>),
41}
42
43impl<'a> AsRef<Cow<'a, str>> for ControlFieldValue<'a> {
44 fn as_ref(&self) -> &Cow<'a, str> {
45 match self {
46 Self::Simple(v) => v,
47 Self::Folded(v) => v,
48 Self::Multiline(v) => v,
49 }
50 }
51}
52
53impl<'a> ControlFieldValue<'a> {
54 pub fn iter_lines(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
62 match self {
63 Self::Simple(v) => Box::new([v.as_ref()].into_iter()),
64 Self::Folded(values) => Box::new(values.lines().map(|x| x.trim_start())),
65 Self::Multiline(values) => Box::new(values.lines().map(|x| x.trim_start())),
66 }
67 }
68
69 pub fn iter_words(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
73 Box::new(self.as_ref().split_ascii_whitespace())
74 }
75
76 pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
78 let data = match self {
79 Self::Simple(v) => v,
80 Self::Folded(v) => v,
81 Self::Multiline(v) => v,
82 };
83
84 writer.write_all(data.as_bytes())
85 }
86
87 pub fn into_inner(self) -> Cow<'a, str> {
89 match self {
90 Self::Simple(v) => v,
91 Self::Folded(v) => v,
92 Self::Multiline(v) => v,
93 }
94 }
95
96 pub fn to_control_field(self, name: Cow<'a, str>) -> ControlField<'a> {
98 ControlField::new(name, self.into_inner())
99 }
100}
101
102#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
104pub struct ControlField<'a> {
105 name: Cow<'a, str>,
106 value: Cow<'a, str>,
107}
108
109impl<'a> ControlField<'a> {
110 pub fn new(name: Cow<'a, str>, value: Cow<'a, str>) -> Self {
112 Self { name, value }
113 }
114
115 pub fn from_lines(name: Cow<'a, str>, lines: impl Iterator<Item = String>) -> Self {
119 let value = lines
120 .enumerate()
121 .map(|(i, line)| if i == 0 { line } else { format!(" {}", line) })
122 .collect::<Vec<_>>()
123 .join("\n")
124 .into();
125
126 Self { name, value }
127 }
128
129 pub fn name(&self) -> &str {
131 self.name.as_ref()
132 }
133
134 pub fn value_str(&self) -> &str {
139 self.value.as_ref()
140 }
141
142 pub fn as_simple(&self) -> Result<ControlFieldValue<'a>> {
146 if self.value.as_ref().contains('\n') {
147 Err(DebianError::ControlSimpleValueNoMultiline)
148 } else {
149 Ok(ControlFieldValue::Simple(self.value.clone()))
150 }
151 }
152
153 pub fn as_folded(&self) -> ControlFieldValue<'a> {
155 ControlFieldValue::Folded(self.value.clone())
156 }
157
158 pub fn as_multiline(&self) -> ControlFieldValue<'a> {
160 ControlFieldValue::Multiline(self.value.clone())
161 }
162
163 pub fn iter_words(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
165 Box::new(self.value.as_ref().split_ascii_whitespace())
166 }
167
168 pub fn iter_lines(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
172 Box::new(self.value.lines().map(|x| x.trim_start()))
173 }
174
175 pub fn iter_comma_delimited(&self) -> Box<(dyn Iterator<Item = &str> + '_)> {
179 Box::new(self.value.as_ref().split(',').map(|v| v.trim()))
180 }
181
182 pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
184 writer.write_all(self.name.as_bytes())?;
185 writer.write_all(b": ")?;
186 writer.write_all(self.value.as_ref().as_bytes())?;
187 writer.write_all(b"\n")
188 }
189}
190
191impl<'a> Display for ControlField<'a> {
192 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193 write!(f, "{}", format!("{}: {}\n", self.name, self.value_str()))
194 }
195}
196
197#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
206pub struct ControlParagraph<'a> {
207 fields: Vec<ControlField<'a>>,
208}
209
210impl<'a> ControlParagraph<'a> {
211 pub fn is_empty(&self) -> bool {
215 self.fields.is_empty()
216 }
217
218 pub fn set_field(&mut self, field: ControlField<'a>) {
223 self.fields
224 .retain(|cf| cf.name.to_lowercase() != field.name.to_lowercase());
225 self.fields.push(field);
226 }
227
228 pub fn set_field_from_string(&mut self, name: Cow<'a, str>, value: Cow<'a, str>) {
233 self.set_field(ControlField::new(name, value));
234 }
235
236 pub fn has_field(&self, name: &str) -> bool {
238 self.field(name).is_some()
239 }
240
241 pub fn iter_fields(&self) -> impl Iterator<Item = &ControlField<'a>> {
245 self.fields.iter()
246 }
247
248 pub fn field(&self, name: &str) -> Option<&'_ ControlField<'a>> {
250 self.fields
251 .iter()
252 .find(|f| f.name.as_ref().to_lowercase() == name.to_lowercase())
253 }
254
255 pub fn field_mut(&mut self, name: &str) -> Option<&'a mut ControlField> {
257 self.fields
258 .iter_mut()
259 .find(|f| f.name.as_ref().to_lowercase() == name.to_lowercase())
260 }
261
262 pub fn required_field(&self, name: &str) -> Result<&'_ ControlField<'a>> {
264 self.field(name)
265 .ok_or_else(|| DebianError::ControlRequiredFieldMissing(name.to_string()))
266 }
267
268 pub fn field_str(&self, name: &str) -> Option<&str> {
270 self.field(name).map(|f| f.value_str())
271 }
272
273 pub fn required_field_str(&self, name: &str) -> Result<&str> {
275 Ok(self.required_field(name)?.value_str())
276 }
277
278 pub fn field_bool(&self, name: &str) -> Option<bool> {
282 self.field_str(name).map(|v| matches!(v, "yes"))
283 }
284
285 pub fn field_u64(&self, name: &str) -> Option<Result<u64>> {
287 self.field_str(name).map(|x| {
288 u64::from_str(x).map_err(|e| DebianError::ControlFieldIntParse(name.to_string(), e))
289 })
290 }
291
292 pub fn field_dependency_list(&self, name: &str) -> Option<Result<DependencyList>> {
294 self.field_str(name).map(DependencyList::parse)
295 }
296
297 pub fn field_datetime_rfc5322(&self, name: &str) -> Option<Result<DateTime<Utc>>> {
304 self.field_str(name).map(|v| {
305 Utc.timestamp_opt(mailparse::dateparse(v)?, 0)
306 .single()
307 .ok_or_else(|| DebianError::ControlFieldTimestampParse)
308 })
309 }
310
311 pub fn field_simple(&self, name: &str) -> Option<Result<ControlFieldValue<'a>>> {
313 self.field(name).map(|cf| cf.as_simple())
314 }
315
316 pub fn field_folded(&self, name: &str) -> Option<ControlFieldValue<'a>> {
318 self.field(name).map(|cf| cf.as_folded())
319 }
320
321 pub fn field_multiline(&self, name: &str) -> Option<ControlFieldValue<'a>> {
323 self.field(name).map(|cf| cf.as_multiline())
324 }
325
326 pub fn iter_field_words(&self, name: &str) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
328 self.field(name)
329 .map(|f| Box::new(f.value.split_ascii_whitespace()) as Box<dyn Iterator<Item = &str>>)
330 }
331
332 pub fn iter_field_lines(&self, name: &str) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
334 self.field(name).map(|f| f.iter_lines())
335 }
336
337 pub fn iter_field_comma_delimited(
339 &self,
340 name: &str,
341 ) -> Option<Box<(dyn Iterator<Item = &str> + '_)>> {
342 self.field(name).map(|f| f.iter_comma_delimited())
343 }
344
345 pub fn as_str_hash_map(&self) -> HashMap<&str, &str> {
352 HashMap::from_iter(
353 self.fields
354 .iter()
355 .map(|field| (field.name.as_ref(), field.value_str())),
356 )
357 }
358
359 pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
366 for field in &self.fields {
367 field.write(writer)?;
368 }
369
370 Ok(())
371 }
372}
373
374impl<'a> Display for ControlParagraph<'a> {
375 fn fmt(&self, f1: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
376 let fields = self
377 .fields
378 .iter()
379 .map(|f| f.to_string())
380 .collect::<Vec<_>>();
381
382 write!(f1, "{}", fields.join(""))
383 }
384}
385
386#[derive(Clone, Debug, Default)]
391pub struct ControlFileParser {
392 paragraph: ControlParagraph<'static>,
393 field: Option<String>,
394}
395
396impl ControlFileParser {
397 pub fn write_line(&mut self, line: &str) -> Result<Option<ControlParagraph<'static>>> {
404 let is_empty_line = line.trim().is_empty();
405 let is_indented = line.starts_with(' ') && line.len() > 1;
406
407 let current_field = self.field.take();
408
409 if is_empty_line {
411 if let Some(field) = current_field {
412 self.flush_field(field)?;
413 }
414
415 return Ok(if self.paragraph.is_empty() {
416 None
417 } else {
418 let para = self.paragraph.clone();
419 self.paragraph = ControlParagraph::default();
420 Some(para)
421 });
422 }
423
424 match (current_field, is_indented) {
425 (Some(v), false) => {
428 self.flush_field(v)?;
429
430 self.field = if is_empty_line {
431 None
432 } else {
433 Some(line.to_string())
434 };
435
436 Ok(None)
437 }
438
439 (None, _) => {
442 self.field = Some(line.to_string());
443
444 Ok(None)
445 }
446 (Some(v), true) => {
450 self.field = Some(v + line);
451
452 Ok(None)
453 }
454 }
455 }
456
457 pub fn finish(mut self) -> Result<Option<ControlParagraph<'static>>> {
462 if let Some(field) = self.field.take() {
463 self.flush_field(field)?;
464 }
465
466 Ok(if self.paragraph.is_empty() {
467 None
468 } else {
469 Some(self.paragraph)
470 })
471 }
472
473 fn flush_field(&mut self, v: String) -> Result<()> {
474 let mut parts = v.splitn(2, ':');
475
476 let name = parts.next().ok_or_else(|| {
477 DebianError::ControlParseError(format!("error parsing line '{}'; missing colon", v))
478 })?;
479 let value = parts
480 .next()
481 .ok_or_else(|| {
482 DebianError::ControlParseError(format!(
483 "error parsing field '{}'; could not detect value",
484 v
485 ))
486 })?
487 .trim();
488
489 self.paragraph
490 .set_field_from_string(Cow::Owned(name.to_string()), Cow::Owned(value.to_string()));
491
492 Ok(())
493 }
494}
495
496pub struct ControlParagraphReader<R: BufRead> {
503 reader: R,
504 parser: Option<ControlFileParser>,
505}
506
507impl<R: BufRead> ControlParagraphReader<R> {
508 pub fn new(reader: R) -> Self {
510 Self {
511 reader,
512 parser: Some(ControlFileParser::default()),
513 }
514 }
515
516 pub fn into_inner(self) -> R {
518 self.reader
519 }
520
521 fn get_next(&mut self) -> Result<Option<ControlParagraph<'static>>> {
522 let mut parser = self.parser.take().unwrap();
523
524 loop {
525 let mut line = String::new();
526
527 let bytes_read = self.reader.read_line(&mut line)?;
528
529 if bytes_read != 0 {
530 if let Some(paragraph) = parser.write_line(&line)? {
531 self.parser.replace(parser);
532 return Ok(Some(paragraph));
533 }
534 } else {
536 return if let Some(paragraph) = parser.finish()? {
537 Ok(Some(paragraph))
538 } else {
539 Ok(None)
540 };
541 }
542 }
543 }
544}
545
546impl<R: BufRead> Iterator for ControlParagraphReader<R> {
547 type Item = Result<ControlParagraph<'static>>;
548
549 fn next(&mut self) -> Option<Self::Item> {
550 if self.parser.is_none() {
551 None
552 } else {
553 match self.get_next() {
554 Ok(Some(para)) => Some(Ok(para)),
555 Ok(None) => None,
556 Err(e) => Some(Err(e)),
557 }
558 }
559 }
560}
561
562#[pin_project]
566pub struct ControlParagraphAsyncReader<R> {
567 #[pin]
568 reader: R,
569 parser: Option<ControlFileParser>,
570}
571
572impl<R> ControlParagraphAsyncReader<R>
573where
574 R: AsyncBufRead + Unpin,
575{
576 pub fn new(reader: R) -> Self {
578 Self {
579 reader,
580 parser: Some(ControlFileParser::default()),
581 }
582 }
583
584 pub fn into_inner(self) -> R {
586 self.reader
587 }
588
589 pub async fn read_paragraph(&mut self) -> Result<Option<ControlParagraph<'static>>> {
593 let mut parser = if let Some(parser) = self.parser.take() {
594 parser
595 } else {
596 return Ok(None);
597 };
598
599 loop {
600 let mut line = String::new();
601
602 let bytes_read = self.reader.read_line(&mut line).await?;
603
604 if bytes_read != 0 {
605 if let Some(paragraph) = parser.write_line(&line)? {
606 self.parser.replace(parser);
607 return Ok(Some(paragraph));
608 }
609 } else {
611 return if let Some(paragraph) = parser.finish()? {
612 Ok(Some(paragraph))
613 } else {
614 Ok(None)
615 };
616 }
617 }
618 }
619}
620
621#[derive(Clone, Debug, Default)]
625pub struct ControlFile<'a> {
626 paragraphs: Vec<ControlParagraph<'a>>,
627}
628
629impl<'a> ControlFile<'a> {
630 pub fn parse_reader<R: BufRead>(reader: &mut R) -> Result<Self> {
632 let mut paragraphs = Vec::new();
633 let mut parser = ControlFileParser::default();
634
635 loop {
636 let mut line = String::new();
637 let bytes_read = reader.read_line(&mut line)?;
638
639 if bytes_read == 0 {
641 break;
642 }
643
644 if let Some(paragraph) = parser.write_line(&line)? {
645 paragraphs.push(paragraph);
646 }
647 }
648
649 if let Some(paragraph) = parser.finish()? {
650 paragraphs.push(paragraph);
651 }
652
653 Ok(Self { paragraphs })
654 }
655
656 pub fn parse_str(s: &str) -> Result<Self> {
658 let mut reader = std::io::BufReader::new(s.as_bytes());
659 Self::parse_reader(&mut reader)
660 }
661
662 pub fn add_paragraph(&mut self, p: ControlParagraph<'a>) {
664 self.paragraphs.push(p);
665 }
666
667 pub fn paragraphs(&self) -> impl Iterator<Item = &ControlParagraph<'a>> {
669 self.paragraphs.iter()
670 }
671
672 pub fn into_paragraphs(self) -> impl Iterator<Item = ControlParagraph<'a>> {
674 self.paragraphs.into_iter()
675 }
676
677 pub fn write<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
679 for p in &self.paragraphs {
680 p.write(writer)?;
681 writer.write_all(b"\n")?;
682 }
683
684 Ok(())
685 }
686}
687
688#[cfg(test)]
689mod tests {
690 use super::*;
691
692 #[test]
693 fn control_paragraph_field_semantics() {
694 let mut p = ControlParagraph::default();
695
696 p.set_field_from_string("foo".into(), "bar".into());
698 p.set_field_from_string("foo".into(), "baz".into());
699 assert_eq!(p.field("foo").unwrap().value, "baz");
700
701 p.set_field_from_string("FOO".into(), "bar".into());
703 assert_eq!(p.field("foo").unwrap().value, "bar");
704 assert_eq!(p.field("FOO").unwrap().value, "bar");
705 }
706
707 #[test]
708 fn parse_paragraph_release() -> Result<()> {
709 let paragraphs = ControlParagraphReader::new(std::io::Cursor::new(include_bytes!(
710 "testdata/release-debian-bullseye"
711 )))
712 .collect::<Result<Vec<_>>>()?;
713
714 assert_eq!(paragraphs.len(), 1);
715 let p = ¶graphs[0];
716
717 assert_eq!(p.fields.len(), 14);
718
719 assert!(p.has_field("Origin"));
720 assert!(p.has_field("Version"));
721 assert!(!p.has_field("Missing"));
722
723 assert!(p.field("Version").is_some());
724
725 let fields = &p.fields;
726 assert_eq!(fields[0].name, "Origin");
727 assert_eq!(fields[0].value, "Debian");
728
729 assert_eq!(fields[3].name, "Version");
730 assert_eq!(fields[3].value, "11.1");
731
732 let ml = p.field_multiline("MD5Sum").unwrap();
733 assert_eq!(ml.iter_lines().count(), 600);
734 assert_eq!(
735 ml.iter_lines().next().unwrap(),
736 "7fdf4db15250af5368cc52a91e8edbce 738242 contrib/Contents-all"
737 );
738
739 assert!(p.field_multiline("SHA256").is_some());
740
741 assert_eq!(fields[0].iter_words().collect::<Vec<_>>(), vec!["Debian"]);
742
743 let values = p
744 .field_multiline("MD5Sum")
745 .unwrap()
746 .iter_lines()
747 .map(|x| x.to_string())
748 .collect::<Vec<_>>();
749
750 assert_eq!(values.len(), 600);
751 assert_eq!(
752 values[0],
753 "7fdf4db15250af5368cc52a91e8edbce 738242 contrib/Contents-all"
754 );
755 assert_eq!(
756 values[1],
757 "cbd7bc4d3eb517ac2b22f929dfc07b47 57319 contrib/Contents-all.gz"
758 );
759 assert_eq!(
760 values[599],
761 "e3830f6fc5a946b5a5b46e8277e1d86f 80488 non-free/source/Sources.xz"
762 );
763
764 let values = p
765 .field_multiline("SHA256")
766 .unwrap()
767 .iter_lines()
768 .map(|x| x.to_string())
769 .collect::<Vec<_>>();
770 assert_eq!(values.len(), 600);
771 assert_eq!(
772 values[0],
773 "3957f28db16e3f28c7b34ae84f1c929c567de6970f3f1b95dac9b498dd80fe63 738242 contrib/Contents-all",
774 );
775 assert_eq!(
776 values[1],
777 "3e9a121d599b56c08bc8f144e4830807c77c29d7114316d6984ba54695d3db7b 57319 contrib/Contents-all.gz",
778 );
779 assert_eq!(values[599], "30f3f996941badb983141e3b29b2ed5941d28cf81f9b5f600bb48f782d386fc7 80488 non-free/source/Sources.xz");
780
781 Ok(())
782 }
783
784 #[test]
785 fn test_parse_system_lists() -> Result<()> {
786 let paths = glob::glob("/var/lib/apt/lists/*_Packages")
787 .unwrap()
788 .chain(glob::glob("/var/lib/apt/lists/*_Sources").unwrap())
789 .chain(glob::glob("/var/lib/apt/lists/*i18n_Translation-*").unwrap());
790
791 for path in paths {
792 let path = path.unwrap();
793
794 eprintln!("parsing {}", path.display());
795 let fh = std::fs::File::open(&path)?;
796 let reader = std::io::BufReader::new(fh);
797
798 for para in ControlParagraphReader::new(reader) {
799 para?;
800 }
801 }
802
803 Ok(())
804 }
805}