deb822_lossless/
lossless.rs

1//! Parser for deb822 style files.
2//!
3//! This parser can be used to parse files in the deb822 format, while preserving
4//! all whitespace and comments. It is based on the [rowan] library, which is a
5//! lossless parser library for Rust.
6//!
7//! Once parsed, the file can be traversed or modified, and then written back to
8//! a file.
9//!
10//! # Example
11//!
12//! ```rust
13//! use deb822_lossless::Deb822;
14//! use std::str::FromStr;
15//!
16//! let input = r###"Package: deb822-lossless
17//! ## Comments are preserved
18//! Maintainer: Jelmer Vernooij <jelmer@debian.org>
19//! Homepage: https://github.com/jelmer/deb822-lossless
20//! Section: rust
21//!
22//! Package: deb822-lossless
23//! Architecture: any
24//! Description: Lossless parser for deb822 style files.
25//!   This parser can be used to parse files in the deb822 format, while preserving
26//!   all whitespace and comments. It is based on the [rowan] library, which is a
27//!   lossless parser library for Rust.
28//! "###;
29//!
30//! let deb822 = Deb822::from_str(input).unwrap();
31//! assert_eq!(deb822.paragraphs().count(), 2);
32//! let homepage = deb822.paragraphs().nth(0).unwrap().get("Homepage");
33//! assert_eq!(homepage.as_deref(), Some("https://github.com/jelmer/deb822-lossless"));
34//! ```
35
36use crate::{
37    lex::lex,
38    lex::SyntaxKind::{self, *},
39    Indentation,
40};
41use rowan::ast::AstNode;
42use std::path::Path;
43use std::str::FromStr;
44
45/// A positioned parse error containing location information.
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47pub struct PositionedParseError {
48    /// The error message
49    pub message: String,
50    /// The text range where the error occurred
51    pub range: rowan::TextRange,
52    /// Optional error code for categorization
53    pub code: Option<String>,
54}
55
56impl std::fmt::Display for PositionedParseError {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        write!(f, "{}", self.message)
59    }
60}
61
62impl std::error::Error for PositionedParseError {}
63
64/// List of encountered syntax errors.
65#[derive(Debug, Clone, PartialEq, Eq, Hash)]
66pub struct ParseError(pub Vec<String>);
67
68impl std::fmt::Display for ParseError {
69    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
70        for err in &self.0 {
71            writeln!(f, "{}", err)?;
72        }
73        Ok(())
74    }
75}
76
77impl std::error::Error for ParseError {}
78
79/// Error parsing deb822 control files
80#[derive(Debug)]
81pub enum Error {
82    /// A syntax error was encountered while parsing the file.
83    ParseError(ParseError),
84
85    /// An I/O error was encountered while reading the file.
86    IoError(std::io::Error),
87}
88
89impl std::fmt::Display for Error {
90    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
91        match &self {
92            Error::ParseError(err) => write!(f, "{}", err),
93            Error::IoError(err) => write!(f, "{}", err),
94        }
95    }
96}
97
98impl From<ParseError> for Error {
99    fn from(err: ParseError) -> Self {
100        Self::ParseError(err)
101    }
102}
103
104impl From<std::io::Error> for Error {
105    fn from(err: std::io::Error) -> Self {
106        Self::IoError(err)
107    }
108}
109
110impl std::error::Error for Error {}
111
112/// Second, implementing the `Language` trait teaches rowan to convert between
113/// these two SyntaxKind types, allowing for a nicer SyntaxNode API where
114/// "kinds" are values from our `enum SyntaxKind`, instead of plain u16 values.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
116pub enum Lang {}
117impl rowan::Language for Lang {
118    type Kind = SyntaxKind;
119    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
120        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
121    }
122    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
123        kind.into()
124    }
125}
126
127/// GreenNode is an immutable tree, which is cheap to change,
128/// but doesn't contain offsets and parent pointers.
129use rowan::GreenNode;
130
131/// You can construct GreenNodes by hand, but a builder
132/// is helpful for top-down parsers: it maintains a stack
133/// of currently in-progress nodes
134use rowan::GreenNodeBuilder;
135
136/// The parse results are stored as a "green tree".
137/// We'll discuss working with the results later
138pub(crate) struct Parse {
139    pub(crate) green_node: GreenNode,
140    #[allow(unused)]
141    pub(crate) errors: Vec<String>,
142    pub(crate) positioned_errors: Vec<PositionedParseError>,
143}
144
145pub(crate) fn parse(text: &str) -> Parse {
146    struct Parser<'a> {
147        /// input tokens, including whitespace,
148        /// in *reverse* order.
149        tokens: Vec<(SyntaxKind, &'a str)>,
150        /// the in-progress tree.
151        builder: GreenNodeBuilder<'static>,
152        /// the list of syntax errors we've accumulated
153        /// so far.
154        errors: Vec<String>,
155        /// positioned errors with location information
156        positioned_errors: Vec<PositionedParseError>,
157        /// All tokens with their positions in forward order for position tracking
158        token_positions: Vec<(SyntaxKind, rowan::TextSize, rowan::TextSize)>,
159        /// current token index (counting from the end since tokens are in reverse)
160        current_token_index: usize,
161    }
162
163    impl<'a> Parser<'a> {
164        fn parse_entry(&mut self) {
165            while self.current() == Some(COMMENT) {
166                self.bump();
167
168                match self.current() {
169                    Some(NEWLINE) => {
170                        self.bump();
171                    }
172                    None => {
173                        return;
174                    }
175                    Some(g) => {
176                        self.builder.start_node(ERROR.into());
177                        self.add_positioned_error(
178                            format!("expected newline, got {g:?}"),
179                            Some("unexpected_token".to_string()),
180                        );
181                        self.bump();
182                        self.builder.finish_node();
183                    }
184                }
185            }
186
187            self.builder.start_node(ENTRY.into());
188
189            // First, parse the key and colon
190            if self.current() == Some(KEY) {
191                self.bump();
192                self.skip_ws();
193            } else {
194                self.builder.start_node(ERROR.into());
195                self.add_positioned_error(
196                    "expected key".to_string(),
197                    Some("missing_key".to_string()),
198                );
199                if self.current().is_some() {
200                    self.bump();
201                }
202                self.builder.finish_node();
203            }
204            if self.current() == Some(COLON) {
205                self.bump();
206                self.skip_ws();
207            } else {
208                self.builder.start_node(ERROR.into());
209                self.add_positioned_error(
210                    format!("expected ':', got {:?}", self.current()),
211                    Some("missing_colon".to_string()),
212                );
213                if self.current().is_some() {
214                    self.bump();
215                }
216                self.builder.finish_node();
217            }
218            loop {
219                while self.current() == Some(WHITESPACE) || self.current() == Some(VALUE) {
220                    self.bump();
221                }
222
223                match self.current() {
224                    None => {
225                        break;
226                    }
227                    Some(NEWLINE) => {
228                        self.bump();
229                    }
230                    Some(g) => {
231                        self.builder.start_node(ERROR.into());
232                        self.add_positioned_error(
233                            format!("expected newline, got {g:?}"),
234                            Some("unexpected_token".to_string()),
235                        );
236                        self.bump();
237                        self.builder.finish_node();
238                    }
239                }
240                if self.current() == Some(INDENT) {
241                    self.bump();
242                    self.skip_ws();
243                } else {
244                    break;
245                }
246            }
247            self.builder.finish_node();
248        }
249
250        fn parse_paragraph(&mut self) {
251            self.builder.start_node(PARAGRAPH.into());
252            while self.current() != Some(NEWLINE) && self.current().is_some() {
253                self.parse_entry();
254            }
255            self.builder.finish_node();
256        }
257
258        fn parse(mut self) -> Parse {
259            // Make sure that the root node covers all source
260            self.builder.start_node(ROOT.into());
261            while self.current().is_some() {
262                self.skip_ws_and_newlines();
263                if self.current().is_some() {
264                    self.parse_paragraph();
265                }
266            }
267            // Don't forget to eat *trailing* whitespace
268            self.skip_ws_and_newlines();
269            // Close the root node.
270            self.builder.finish_node();
271
272            // Turn the builder into a GreenNode
273            Parse {
274                green_node: self.builder.finish(),
275                errors: self.errors,
276                positioned_errors: self.positioned_errors,
277            }
278        }
279        /// Advance one token, adding it to the current branch of the tree builder.
280        fn bump(&mut self) {
281            let (kind, text) = self.tokens.pop().unwrap();
282            self.builder.token(kind.into(), text);
283            if self.current_token_index > 0 {
284                self.current_token_index -= 1;
285            }
286        }
287        /// Peek at the first unprocessed token
288        fn current(&self) -> Option<SyntaxKind> {
289            self.tokens.last().map(|(kind, _)| *kind)
290        }
291
292        /// Add a positioned error at the current position
293        fn add_positioned_error(&mut self, message: String, code: Option<String>) {
294            let range = if self.current_token_index < self.token_positions.len() {
295                let (_, start, end) = self.token_positions[self.current_token_index];
296                rowan::TextRange::new(start, end)
297            } else {
298                // Default to end of text if no current token
299                let end = self
300                    .token_positions
301                    .last()
302                    .map(|(_, _, end)| *end)
303                    .unwrap_or_else(|| rowan::TextSize::from(0));
304                rowan::TextRange::new(end, end)
305            };
306
307            self.positioned_errors.push(PositionedParseError {
308                message: message.clone(),
309                range,
310                code,
311            });
312            self.errors.push(message);
313        }
314        fn skip_ws(&mut self) {
315            while self.current() == Some(WHITESPACE) || self.current() == Some(COMMENT) {
316                self.bump()
317            }
318        }
319        fn skip_ws_and_newlines(&mut self) {
320            while self.current() == Some(WHITESPACE)
321                || self.current() == Some(COMMENT)
322                || self.current() == Some(NEWLINE)
323            {
324                self.builder.start_node(EMPTY_LINE.into());
325                while self.current() != Some(NEWLINE) && self.current().is_some() {
326                    self.bump();
327                }
328                if self.current() == Some(NEWLINE) {
329                    self.bump();
330                }
331                self.builder.finish_node();
332            }
333        }
334    }
335
336    let mut tokens = lex(text).collect::<Vec<_>>();
337
338    // Build token positions in forward order
339    let mut token_positions = Vec::new();
340    let mut position = rowan::TextSize::from(0);
341    for (kind, text) in &tokens {
342        let start = position;
343        let end = start + rowan::TextSize::of(*text);
344        token_positions.push((*kind, start, end));
345        position = end;
346    }
347
348    // Reverse tokens for parsing (but keep positions in forward order)
349    tokens.reverse();
350    let current_token_index = tokens.len().saturating_sub(1);
351
352    Parser {
353        tokens,
354        builder: GreenNodeBuilder::new(),
355        errors: Vec::new(),
356        positioned_errors: Vec::new(),
357        token_positions,
358        current_token_index,
359    }
360    .parse()
361}
362
363/// To work with the parse results we need a view into the
364/// green tree - the Syntax tree.
365/// It is also immutable, like a GreenNode,
366/// but it contains parent pointers, offsets, and
367/// has identity semantics.
368
369type SyntaxNode = rowan::SyntaxNode<Lang>;
370#[allow(unused)]
371type SyntaxToken = rowan::SyntaxToken<Lang>;
372#[allow(unused)]
373type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
374
375impl Parse {
376    #[cfg(test)]
377    fn syntax(&self) -> SyntaxNode {
378        SyntaxNode::new_root(self.green_node.clone())
379    }
380
381    fn root_mut(&self) -> Deb822 {
382        Deb822::cast(SyntaxNode::new_root_mut(self.green_node.clone())).unwrap()
383    }
384}
385
386macro_rules! ast_node {
387    ($ast:ident, $kind:ident) => {
388        #[doc = "An AST node representing a `"]
389        #[doc = stringify!($ast)]
390        #[doc = "`."]
391        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
392        #[repr(transparent)]
393        pub struct $ast(SyntaxNode);
394        impl $ast {
395            #[allow(unused)]
396            fn cast(node: SyntaxNode) -> Option<Self> {
397                if node.kind() == $kind {
398                    Some(Self(node))
399                } else {
400                    None
401                }
402            }
403        }
404
405        impl AstNode for $ast {
406            type Language = Lang;
407
408            fn can_cast(kind: SyntaxKind) -> bool {
409                kind == $kind
410            }
411
412            fn cast(syntax: SyntaxNode) -> Option<Self> {
413                Self::cast(syntax)
414            }
415
416            fn syntax(&self) -> &SyntaxNode {
417                &self.0
418            }
419        }
420
421        impl std::fmt::Display for $ast {
422            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
423                write!(f, "{}", self.0.text())
424            }
425        }
426    };
427}
428
429ast_node!(Deb822, ROOT);
430ast_node!(Paragraph, PARAGRAPH);
431ast_node!(Entry, ENTRY);
432
433impl Default for Deb822 {
434    fn default() -> Self {
435        Self::new()
436    }
437}
438
439impl Deb822 {
440    /// Create a new empty deb822 file.
441    pub fn new() -> Deb822 {
442        let mut builder = GreenNodeBuilder::new();
443
444        builder.start_node(ROOT.into());
445        builder.finish_node();
446        Deb822(SyntaxNode::new_root_mut(builder.finish()))
447    }
448
449    /// Parse deb822 text, returning a Parse result
450    pub fn parse(text: &str) -> crate::Parse<Deb822> {
451        crate::Parse::parse_deb822(text)
452    }
453
454    /// Provide a formatter that can handle indentation and trailing separators
455    ///
456    /// # Arguments
457    /// * `control` - The control file to format
458    /// * `indentation` - The indentation to use
459    /// * `immediate_empty_line` - Whether the value should always start with an empty line. If true,
460    ///                  then the result becomes something like "Field:\n value". This parameter
461    ///                  only applies to the values that will be formatted over more than one line.
462    /// * `max_line_length_one_liner` - If set, then this is the max length of the value
463    ///                        if it is crammed into a "one-liner" value. If the value(s) fit into
464    ///                        one line, this parameter will overrule immediate_empty_line.
465    /// * `sort_paragraphs` - If set, then this function will sort the paragraphs according to the
466    ///                given function.
467    /// * `sort_entries` - If set, then this function will sort the entries according to the
468    ///               given function.
469    #[must_use]
470    pub fn wrap_and_sort(
471        &self,
472        sort_paragraphs: Option<&dyn Fn(&Paragraph, &Paragraph) -> std::cmp::Ordering>,
473        wrap_and_sort_paragraph: Option<&dyn Fn(&Paragraph) -> Paragraph>,
474    ) -> Deb822 {
475        let mut builder = GreenNodeBuilder::new();
476        builder.start_node(ROOT.into());
477        let mut current = vec![];
478        let mut paragraphs = vec![];
479        for c in self.0.children_with_tokens() {
480            match c.kind() {
481                PARAGRAPH => {
482                    paragraphs.push((
483                        current,
484                        Paragraph::cast(c.as_node().unwrap().clone()).unwrap(),
485                    ));
486                    current = vec![];
487                }
488                COMMENT | ERROR => {
489                    current.push(c);
490                }
491                EMPTY_LINE => {
492                    current.extend(
493                        c.as_node()
494                            .unwrap()
495                            .children_with_tokens()
496                            .skip_while(|c| matches!(c.kind(), EMPTY_LINE | NEWLINE | WHITESPACE)),
497                    );
498                }
499                _ => {}
500            }
501        }
502        if let Some(sort_paragraph) = sort_paragraphs {
503            paragraphs.sort_by(|a, b| {
504                let a_key = &a.1;
505                let b_key = &b.1;
506                sort_paragraph(a_key, b_key)
507            });
508        }
509
510        for (i, paragraph) in paragraphs.into_iter().enumerate() {
511            if i > 0 {
512                builder.start_node(EMPTY_LINE.into());
513                builder.token(NEWLINE.into(), "\n");
514                builder.finish_node();
515            }
516            for c in paragraph.0.into_iter() {
517                builder.token(c.kind().into(), c.as_token().unwrap().text());
518            }
519            let new_paragraph = if let Some(ref ws) = wrap_and_sort_paragraph {
520                ws(&paragraph.1)
521            } else {
522                paragraph.1
523            };
524            inject(&mut builder, new_paragraph.0);
525        }
526
527        for c in current {
528            builder.token(c.kind().into(), c.as_token().unwrap().text());
529        }
530
531        builder.finish_node();
532        Self(SyntaxNode::new_root_mut(builder.finish()))
533    }
534
535    /// Returns an iterator over all paragraphs in the file.
536    pub fn paragraphs(&self) -> impl Iterator<Item = Paragraph> {
537        self.0.children().filter_map(Paragraph::cast)
538    }
539
540    /// Converts the perceptual paragraph index to the node index.
541    fn convert_index(&self, index: usize) -> Option<usize> {
542        let mut current_pos = 0usize;
543        if index == 0 {
544            return Some(0);
545        }
546        for (i, node) in self.0.children_with_tokens().enumerate() {
547            if node.kind() == PARAGRAPH {
548                if current_pos == index {
549                    return Some(i);
550                }
551                current_pos += 1;
552            }
553        }
554
555        None
556    }
557
558    /// Delete trailing empty lines after specified node and before any non-empty line nodes.
559    fn delete_trailing_space(&self, start: usize) {
560        for (i, node) in self.0.children_with_tokens().enumerate() {
561            if i < start {
562                continue;
563            }
564            if node.kind() != EMPTY_LINE {
565                return;
566            }
567            // this is not a typo, the index will shift by one after deleting the node
568            // so instead of deleting using `i`, we use `start` as the start index
569            self.0.splice_children(start..start + 1, []);
570        }
571    }
572
573    /// Shared internal function to insert a new paragraph into the file.
574    fn insert_empty_paragraph(&mut self, index: Option<usize>) -> Paragraph {
575        let paragraph = Paragraph::new();
576        let mut to_insert = vec![];
577        if self.0.children().count() > 0 {
578            let mut builder = GreenNodeBuilder::new();
579            builder.start_node(EMPTY_LINE.into());
580            builder.token(NEWLINE.into(), "\n");
581            builder.finish_node();
582            to_insert.push(SyntaxNode::new_root_mut(builder.finish()).into());
583        }
584        to_insert.push(paragraph.0.clone().into());
585        let insertion_point = match index {
586            Some(i) => {
587                if to_insert.len() > 1 {
588                    to_insert.swap(0, 1);
589                }
590                i
591            }
592            None => self.0.children().count(),
593        };
594        self.0
595            .splice_children(insertion_point..insertion_point, to_insert);
596        paragraph
597    }
598
599    /// Insert a new empty paragraph into the file after specified index.
600    ///
601    /// # Examples
602    ///
603    /// ```
604    /// use deb822_lossless::{Deb822, Paragraph};
605    /// let mut d: Deb822 = vec![
606    ///     vec![("Foo", "Bar"), ("Baz", "Qux")].into_iter().collect(),
607    ///     vec![("A", "B"), ("C", "D")].into_iter().collect(),
608    /// ]
609    /// .into_iter()
610    /// .collect();
611    /// let mut p = d.insert_paragraph(0);
612    /// p.set("Foo", "Baz");
613    /// assert_eq!(d.to_string(), "Foo: Baz\n\nFoo: Bar\nBaz: Qux\n\nA: B\nC: D\n");
614    /// let mut another = d.insert_paragraph(1);
615    /// another.set("Y", "Z");
616    /// assert_eq!(d.to_string(), "Foo: Baz\n\nY: Z\n\nFoo: Bar\nBaz: Qux\n\nA: B\nC: D\n");
617    /// ```
618    pub fn insert_paragraph(&mut self, index: usize) -> Paragraph {
619        self.insert_empty_paragraph(self.convert_index(index))
620    }
621
622    /// Remove the paragraph at the specified index from the file.
623    ///
624    /// # Examples
625    ///
626    /// ```
627    /// use deb822_lossless::Deb822;
628    /// let mut d: Deb822 = vec![
629    ///     vec![("Foo", "Bar"), ("Baz", "Qux")].into_iter().collect(),
630    ///     vec![("A", "B"), ("C", "D")].into_iter().collect(),
631    /// ]
632    /// .into_iter()
633    /// .collect();
634    /// d.remove_paragraph(0);
635    /// assert_eq!(d.to_string(), "A: B\nC: D\n");
636    /// d.remove_paragraph(0);
637    /// assert_eq!(d.to_string(), "");
638    /// ```
639    pub fn remove_paragraph(&mut self, index: usize) {
640        if let Some(index) = self.convert_index(index) {
641            self.0.splice_children(index..index + 1, []);
642            self.delete_trailing_space(index);
643        }
644    }
645
646    /// Add a new empty paragraph to the end of the file.
647    pub fn add_paragraph(&mut self) -> Paragraph {
648        self.insert_empty_paragraph(None)
649    }
650
651    /// Read a deb822 file from the given path.
652    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> {
653        let text = std::fs::read_to_string(path)?;
654        Ok(Self::from_str(&text)?)
655    }
656
657    /// Read a deb822 file from the given path, ignoring any syntax errors.
658    pub fn from_file_relaxed(
659        path: impl AsRef<Path>,
660    ) -> Result<(Self, Vec<String>), std::io::Error> {
661        let text = std::fs::read_to_string(path)?;
662        Ok(Self::from_str_relaxed(&text))
663    }
664
665    /// Parse a deb822 file from a string, allowing syntax errors.
666    pub fn from_str_relaxed(s: &str) -> (Self, Vec<String>) {
667        let parsed = parse(s);
668        (parsed.root_mut(), parsed.errors)
669    }
670
671    /// Read a deb822 file from a Read object.
672    pub fn read<R: std::io::Read>(mut r: R) -> Result<Self, Error> {
673        let mut buf = String::new();
674        r.read_to_string(&mut buf)?;
675        Ok(Self::from_str(&buf)?)
676    }
677
678    /// Read a deb822 file from a Read object, allowing syntax errors.
679    pub fn read_relaxed<R: std::io::Read>(mut r: R) -> Result<(Self, Vec<String>), std::io::Error> {
680        let mut buf = String::new();
681        r.read_to_string(&mut buf)?;
682        Ok(Self::from_str_relaxed(&buf))
683    }
684}
685
686fn inject(builder: &mut GreenNodeBuilder, node: SyntaxNode) {
687    builder.start_node(node.kind().into());
688    for child in node.children_with_tokens() {
689        match child {
690            rowan::NodeOrToken::Node(child) => {
691                inject(builder, child);
692            }
693            rowan::NodeOrToken::Token(token) => {
694                builder.token(token.kind().into(), token.text());
695            }
696        }
697    }
698    builder.finish_node();
699}
700
701impl FromIterator<Paragraph> for Deb822 {
702    fn from_iter<T: IntoIterator<Item = Paragraph>>(iter: T) -> Self {
703        let mut builder = GreenNodeBuilder::new();
704        builder.start_node(ROOT.into());
705        for (i, paragraph) in iter.into_iter().enumerate() {
706            if i > 0 {
707                builder.start_node(EMPTY_LINE.into());
708                builder.token(NEWLINE.into(), "\n");
709                builder.finish_node();
710            }
711            inject(&mut builder, paragraph.0);
712        }
713        builder.finish_node();
714        Self(SyntaxNode::new_root_mut(builder.finish()))
715    }
716}
717
718impl From<Vec<(String, String)>> for Paragraph {
719    fn from(v: Vec<(String, String)>) -> Self {
720        v.into_iter().collect()
721    }
722}
723
724impl From<Vec<(&str, &str)>> for Paragraph {
725    fn from(v: Vec<(&str, &str)>) -> Self {
726        v.into_iter().collect()
727    }
728}
729
730impl FromIterator<(String, String)> for Paragraph {
731    fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self {
732        let mut builder = GreenNodeBuilder::new();
733        builder.start_node(PARAGRAPH.into());
734        for (key, value) in iter {
735            builder.start_node(ENTRY.into());
736            builder.token(KEY.into(), &key);
737            builder.token(COLON.into(), ":");
738            builder.token(WHITESPACE.into(), " ");
739            for (i, line) in value.split('\n').enumerate() {
740                if i > 0 {
741                    builder.token(INDENT.into(), " ");
742                }
743                builder.token(VALUE.into(), line);
744                builder.token(NEWLINE.into(), "\n");
745            }
746            builder.finish_node();
747        }
748        builder.finish_node();
749        Self(SyntaxNode::new_root_mut(builder.finish()))
750    }
751}
752
753impl<'a> FromIterator<(&'a str, &'a str)> for Paragraph {
754    fn from_iter<T: IntoIterator<Item = (&'a str, &'a str)>>(iter: T) -> Self {
755        let mut builder = GreenNodeBuilder::new();
756        builder.start_node(PARAGRAPH.into());
757        for (key, value) in iter {
758            builder.start_node(ENTRY.into());
759            builder.token(KEY.into(), key);
760            builder.token(COLON.into(), ":");
761            builder.token(WHITESPACE.into(), " ");
762            for (i, line) in value.split('\n').enumerate() {
763                if i > 0 {
764                    builder.token(INDENT.into(), " ");
765                }
766                builder.token(VALUE.into(), line);
767                builder.token(NEWLINE.into(), "\n");
768            }
769            builder.finish_node();
770        }
771        builder.finish_node();
772        Self(SyntaxNode::new_root_mut(builder.finish()))
773    }
774}
775
776impl Paragraph {
777    /// Create a new empty paragraph.
778    pub fn new() -> Paragraph {
779        let mut builder = GreenNodeBuilder::new();
780
781        builder.start_node(PARAGRAPH.into());
782        builder.finish_node();
783        Paragraph(SyntaxNode::new_root_mut(builder.finish()))
784    }
785
786    /// Reformat this paragraph
787    ///
788    /// # Arguments
789    /// * `indentation` - The indentation to use
790    /// * `immediate_empty_line` - Whether multi-line values should always start with an empty line
791    /// * `max_line_length_one_liner` - If set, then this is the max length of the value if it is
792    ///     crammed into a "one-liner" value
793    /// * `sort_entries` - If set, then this function will sort the entries according to the given
794    /// function
795    /// * `format_value` - If set, then this function will format the value according to the given
796    ///   function
797    #[must_use]
798    pub fn wrap_and_sort(
799        &self,
800        indentation: Indentation,
801        immediate_empty_line: bool,
802        max_line_length_one_liner: Option<usize>,
803        sort_entries: Option<&dyn Fn(&Entry, &Entry) -> std::cmp::Ordering>,
804        format_value: Option<&dyn Fn(&str, &str) -> String>,
805    ) -> Paragraph {
806        let mut builder = GreenNodeBuilder::new();
807
808        let mut current = vec![];
809        let mut entries = vec![];
810
811        builder.start_node(PARAGRAPH.into());
812        for c in self.0.children_with_tokens() {
813            match c.kind() {
814                ENTRY => {
815                    entries.push((current, Entry::cast(c.as_node().unwrap().clone()).unwrap()));
816                    current = vec![];
817                }
818                ERROR | COMMENT => {
819                    current.push(c);
820                }
821                _ => {}
822            }
823        }
824
825        if let Some(sort_entry) = sort_entries {
826            entries.sort_by(|a, b| {
827                let a_key = &a.1;
828                let b_key = &b.1;
829                sort_entry(a_key, b_key)
830            });
831        }
832
833        for (pre, entry) in entries.into_iter() {
834            for c in pre.into_iter() {
835                builder.token(c.kind().into(), c.as_token().unwrap().text());
836            }
837
838            inject(
839                &mut builder,
840                entry
841                    .wrap_and_sort(
842                        indentation,
843                        immediate_empty_line,
844                        max_line_length_one_liner,
845                        format_value,
846                    )
847                    .0,
848            );
849        }
850
851        for c in current {
852            builder.token(c.kind().into(), c.as_token().unwrap().text());
853        }
854
855        builder.finish_node();
856        Self(SyntaxNode::new_root_mut(builder.finish()))
857    }
858
859    /// Returns the value of the given key in the paragraph.
860    pub fn get(&self, key: &str) -> Option<String> {
861        self.entries()
862            .find(|e| e.key().as_deref() == Some(key))
863            .map(|e| e.value())
864    }
865
866    /// Returns whether the paragraph contains the given key.
867    pub fn contains_key(&self, key: &str) -> bool {
868        self.get(key).is_some()
869    }
870
871    /// Returns an iterator over all entries in the paragraph.
872    pub fn entries(&self) -> impl Iterator<Item = Entry> + '_ {
873        self.0.children().filter_map(Entry::cast)
874    }
875
876    /// Returns an iterator over all items in the paragraph.
877    pub fn items(&self) -> impl Iterator<Item = (String, String)> + '_ {
878        self.entries()
879            .filter_map(|e| e.key().map(|k| (k, e.value())))
880    }
881
882    /// Returns an iterator over all values for the given key in the paragraph.
883    pub fn get_all<'a>(&'a self, key: &'a str) -> impl Iterator<Item = String> + 'a {
884        self.items()
885            .filter_map(move |(k, v)| if k == key { Some(v) } else { None })
886    }
887
888    /// Returns an iterator over all keys in the paragraph.
889    pub fn keys(&self) -> impl Iterator<Item = String> + '_ {
890        self.entries().filter_map(|e| e.key())
891    }
892
893    /// Remove the given field from the paragraph.
894    pub fn remove(&mut self, key: &str) {
895        for mut entry in self.entries() {
896            if entry.key().as_deref() == Some(key) {
897                entry.detach();
898            }
899        }
900    }
901
902    /// Insert a new field
903    pub fn insert(&mut self, key: &str, value: &str) {
904        let entry = Entry::new(key, value);
905        let count = self.0.children_with_tokens().count();
906        self.0.splice_children(count..count, vec![entry.0.into()]);
907    }
908
909    /// Set a field in the paragraph
910    pub fn set(&mut self, key: &str, value: &str) {
911        let new_entry = Entry::new(key, value);
912
913        for entry in self.entries() {
914            if entry.key().as_deref() == Some(key) {
915                self.0.splice_children(
916                    entry.0.index()..entry.0.index() + 1,
917                    vec![new_entry.0.into()],
918                );
919                return;
920            }
921        }
922        let count = self.0.children_with_tokens().count();
923        self.0
924            .splice_children(count..count, vec![new_entry.0.into()]);
925    }
926
927    /// Rename the given field in the paragraph.
928    pub fn rename(&mut self, old_key: &str, new_key: &str) -> bool {
929        for entry in self.entries() {
930            if entry.key().as_deref() == Some(old_key) {
931                self.0.splice_children(
932                    entry.0.index()..entry.0.index() + 1,
933                    vec![Entry::new(new_key, entry.value().as_str()).0.into()],
934                );
935                return true;
936            }
937        }
938        false
939    }
940}
941
942impl Default for Paragraph {
943    fn default() -> Self {
944        Self::new()
945    }
946}
947
948impl std::str::FromStr for Paragraph {
949    type Err = ParseError;
950
951    fn from_str(text: &str) -> Result<Self, Self::Err> {
952        let deb822 = Deb822::from_str(text)?;
953
954        let mut paragraphs = deb822.paragraphs();
955
956        paragraphs
957            .next()
958            .ok_or_else(|| ParseError(vec!["no paragraphs".to_string()]))
959    }
960}
961
962#[cfg(feature = "python-debian")]
963impl<'py> pyo3::IntoPyObject<'py> for Paragraph {
964    type Target = pyo3::PyAny;
965    type Output = pyo3::Bound<'py, Self::Target>;
966    type Error = pyo3::PyErr;
967
968    fn into_pyobject(self, py: pyo3::Python<'py>) -> Result<Self::Output, Self::Error> {
969        use pyo3::prelude::*;
970        let d = pyo3::types::PyDict::new(py);
971        for (k, v) in self.items() {
972            d.set_item(k, v)?;
973        }
974        let m = py.import("debian.deb822")?;
975        let cls = m.getattr("Deb822")?;
976        cls.call1((d,))
977    }
978}
979
980#[cfg(feature = "python-debian")]
981impl<'a, 'py> pyo3::IntoPyObject<'py> for &'a Paragraph {
982    type Target = pyo3::PyAny;
983    type Output = pyo3::Bound<'py, Self::Target>;
984    type Error = pyo3::PyErr;
985
986    fn into_pyobject(self, py: pyo3::Python<'py>) -> Result<Self::Output, Self::Error> {
987        use pyo3::prelude::*;
988        let d = pyo3::types::PyDict::new(py);
989        for (k, v) in self.items() {
990            d.set_item(k, v)?;
991        }
992        let m = py.import("debian.deb822")?;
993        let cls = m.getattr("Deb822")?;
994        cls.call1((d,))
995    }
996}
997
998#[cfg(feature = "python-debian")]
999impl pyo3::FromPyObject<'_> for Paragraph {
1000    fn extract_bound(obj: &pyo3::Bound<pyo3::PyAny>) -> pyo3::PyResult<Self> {
1001        use pyo3::prelude::*;
1002        let d = obj.call_method0("__str__")?.extract::<String>()?;
1003        Ok(Paragraph::from_str(&d)
1004            .map_err(|e| pyo3::exceptions::PyValueError::new_err((e.to_string(),)))?)
1005    }
1006}
1007
1008impl Entry {
1009    /// Returns the text range of this entry in the source text.
1010    pub fn text_range(&self) -> rowan::TextRange {
1011        self.0.text_range()
1012    }
1013
1014    /// Returns the text range of the key (field name) in this entry.
1015    pub fn key_range(&self) -> Option<rowan::TextRange> {
1016        self.0
1017            .children_with_tokens()
1018            .filter_map(|it| it.into_token())
1019            .find(|it| it.kind() == KEY)
1020            .map(|it| it.text_range())
1021    }
1022
1023    /// Returns the text range of the colon separator in this entry.
1024    pub fn colon_range(&self) -> Option<rowan::TextRange> {
1025        self.0
1026            .children_with_tokens()
1027            .filter_map(|it| it.into_token())
1028            .find(|it| it.kind() == COLON)
1029            .map(|it| it.text_range())
1030    }
1031
1032    /// Returns the text range of the value portion (excluding the key and colon) in this entry.
1033    /// This includes all VALUE tokens and any continuation lines.
1034    pub fn value_range(&self) -> Option<rowan::TextRange> {
1035        let value_tokens: Vec<_> = self
1036            .0
1037            .children_with_tokens()
1038            .filter_map(|it| it.into_token())
1039            .filter(|it| it.kind() == VALUE)
1040            .collect();
1041
1042        if value_tokens.is_empty() {
1043            return None;
1044        }
1045
1046        let first = value_tokens.first().unwrap();
1047        let last = value_tokens.last().unwrap();
1048        Some(rowan::TextRange::new(
1049            first.text_range().start(),
1050            last.text_range().end(),
1051        ))
1052    }
1053
1054    /// Returns the text ranges of all individual value lines in this entry.
1055    /// Multi-line values will return multiple ranges.
1056    pub fn value_line_ranges(&self) -> Vec<rowan::TextRange> {
1057        self.0
1058            .children_with_tokens()
1059            .filter_map(|it| it.into_token())
1060            .filter(|it| it.kind() == VALUE)
1061            .map(|it| it.text_range())
1062            .collect()
1063    }
1064
1065    /// Create a new entry with the given key and value.
1066    pub fn new(key: &str, value: &str) -> Entry {
1067        let mut builder = GreenNodeBuilder::new();
1068
1069        builder.start_node(ENTRY.into());
1070        builder.token(KEY.into(), key);
1071        builder.token(COLON.into(), ":");
1072        builder.token(WHITESPACE.into(), " ");
1073        for (i, line) in value.split('\n').enumerate() {
1074            if i > 0 {
1075                builder.token(INDENT.into(), " ");
1076            }
1077            builder.token(VALUE.into(), line);
1078            builder.token(NEWLINE.into(), "\n");
1079        }
1080        builder.finish_node();
1081        Entry(SyntaxNode::new_root_mut(builder.finish()))
1082    }
1083
1084    #[must_use]
1085    /// Reformat this entry
1086    ///
1087    /// # Arguments
1088    /// * `indentation` - The indentation to use
1089    /// * `immediate_empty_line` - Whether multi-line values should always start with an empty line
1090    /// * `max_line_length_one_liner` - If set, then this is the max length of the value if it is
1091    ///    crammed into a "one-liner" value
1092    /// * `format_value` - If set, then this function will format the value according to the given
1093    ///    function
1094    ///
1095    /// # Returns
1096    /// The reformatted entry
1097    pub fn wrap_and_sort(
1098        &self,
1099        mut indentation: Indentation,
1100        immediate_empty_line: bool,
1101        max_line_length_one_liner: Option<usize>,
1102        format_value: Option<&dyn Fn(&str, &str) -> String>,
1103    ) -> Entry {
1104        let mut builder = GreenNodeBuilder::new();
1105
1106        let mut content = vec![];
1107        builder.start_node(ENTRY.into());
1108        for c in self.0.children_with_tokens() {
1109            let text = c.as_token().map(|t| t.text());
1110            match c.kind() {
1111                KEY => {
1112                    builder.token(KEY.into(), text.unwrap());
1113                    if indentation == Indentation::FieldNameLength {
1114                        indentation = Indentation::Spaces(text.unwrap().len() as u32);
1115                    }
1116                }
1117                COLON => {
1118                    builder.token(COLON.into(), ":");
1119                }
1120                INDENT => {
1121                    // Discard original whitespace
1122                }
1123                ERROR | COMMENT | VALUE | WHITESPACE | NEWLINE => {
1124                    content.push(c);
1125                }
1126                EMPTY_LINE | ENTRY | ROOT | PARAGRAPH => unreachable!(),
1127            }
1128        }
1129
1130        let indentation = if let crate::Indentation::Spaces(i) = indentation {
1131            i
1132        } else {
1133            1
1134        };
1135
1136        assert!(indentation > 0);
1137
1138        // Strip trailing whitespace and newlines
1139        while let Some(c) = content.last() {
1140            if c.kind() == NEWLINE || c.kind() == WHITESPACE {
1141                content.pop();
1142            } else {
1143                break;
1144            }
1145        }
1146
1147        // Reformat iff there is a format function and the value
1148        // has no errors or comments
1149        let tokens = if let Some(ref format_value) = format_value {
1150            if !content
1151                .iter()
1152                .any(|c| c.kind() == ERROR || c.kind() == COMMENT)
1153            {
1154                let concat = content
1155                    .iter()
1156                    .filter_map(|c| c.as_token().map(|t| t.text()))
1157                    .collect::<String>();
1158                let formatted = format_value(self.key().as_ref().unwrap(), &concat);
1159                crate::lex::lex_inline(&formatted)
1160                    .map(|(k, t)| (k, t.to_string()))
1161                    .collect::<Vec<_>>()
1162            } else {
1163                content
1164                    .into_iter()
1165                    .map(|n| n.into_token().unwrap())
1166                    .map(|i| (i.kind(), i.text().to_string()))
1167                    .collect::<Vec<_>>()
1168            }
1169        } else {
1170            content
1171                .into_iter()
1172                .map(|n| n.into_token().unwrap())
1173                .map(|i| (i.kind(), i.text().to_string()))
1174                .collect::<Vec<_>>()
1175        };
1176
1177        rebuild_value(
1178            &mut builder,
1179            tokens,
1180            self.key().map_or(0, |k| k.len()),
1181            indentation,
1182            immediate_empty_line,
1183            max_line_length_one_liner,
1184        );
1185
1186        builder.finish_node();
1187        Self(SyntaxNode::new_root_mut(builder.finish()))
1188    }
1189
1190    /// Returns the key of the entry.
1191    pub fn key(&self) -> Option<String> {
1192        self.0
1193            .children_with_tokens()
1194            .filter_map(|it| it.into_token())
1195            .find(|it| it.kind() == KEY)
1196            .map(|it| it.text().to_string())
1197    }
1198
1199    /// Returns the value of the entry.
1200    pub fn value(&self) -> String {
1201        let mut parts = self
1202            .0
1203            .children_with_tokens()
1204            .filter_map(|it| it.into_token())
1205            .filter(|it| it.kind() == VALUE)
1206            .map(|it| it.text().to_string());
1207
1208        match parts.next() {
1209            None => String::new(),
1210            Some(first) => {
1211                let mut result = first;
1212                for part in parts {
1213                    result.push('\n');
1214                    result.push_str(&part);
1215                }
1216                result
1217            }
1218        }
1219    }
1220
1221    /// Detach this entry from the paragraph.
1222    pub fn detach(&mut self) {
1223        self.0.detach();
1224    }
1225}
1226
1227impl FromStr for Deb822 {
1228    type Err = ParseError;
1229
1230    fn from_str(s: &str) -> Result<Self, Self::Err> {
1231        Deb822::parse(s).to_result()
1232    }
1233}
1234
1235#[test]
1236fn test_parse_simple() {
1237    const CONTROLV1: &str = r#"Source: foo
1238Maintainer: Foo Bar <foo@example.com>
1239Section: net
1240
1241# This is a comment
1242
1243Package: foo
1244Architecture: all
1245Depends:
1246 bar,
1247 blah
1248Description: This is a description
1249 And it is
1250 .
1251 multiple
1252 lines
1253"#;
1254    let parsed = parse(CONTROLV1);
1255    let node = parsed.syntax();
1256    assert_eq!(
1257        format!("{:#?}", node),
1258        r###"ROOT@0..203
1259  PARAGRAPH@0..63
1260    ENTRY@0..12
1261      KEY@0..6 "Source"
1262      COLON@6..7 ":"
1263      WHITESPACE@7..8 " "
1264      VALUE@8..11 "foo"
1265      NEWLINE@11..12 "\n"
1266    ENTRY@12..50
1267      KEY@12..22 "Maintainer"
1268      COLON@22..23 ":"
1269      WHITESPACE@23..24 " "
1270      VALUE@24..49 "Foo Bar <foo@example. ..."
1271      NEWLINE@49..50 "\n"
1272    ENTRY@50..63
1273      KEY@50..57 "Section"
1274      COLON@57..58 ":"
1275      WHITESPACE@58..59 " "
1276      VALUE@59..62 "net"
1277      NEWLINE@62..63 "\n"
1278  EMPTY_LINE@63..64
1279    NEWLINE@63..64 "\n"
1280  EMPTY_LINE@64..84
1281    COMMENT@64..83 "# This is a comment"
1282    NEWLINE@83..84 "\n"
1283  EMPTY_LINE@84..85
1284    NEWLINE@84..85 "\n"
1285  PARAGRAPH@85..203
1286    ENTRY@85..98
1287      KEY@85..92 "Package"
1288      COLON@92..93 ":"
1289      WHITESPACE@93..94 " "
1290      VALUE@94..97 "foo"
1291      NEWLINE@97..98 "\n"
1292    ENTRY@98..116
1293      KEY@98..110 "Architecture"
1294      COLON@110..111 ":"
1295      WHITESPACE@111..112 " "
1296      VALUE@112..115 "all"
1297      NEWLINE@115..116 "\n"
1298    ENTRY@116..137
1299      KEY@116..123 "Depends"
1300      COLON@123..124 ":"
1301      NEWLINE@124..125 "\n"
1302      INDENT@125..126 " "
1303      VALUE@126..130 "bar,"
1304      NEWLINE@130..131 "\n"
1305      INDENT@131..132 " "
1306      VALUE@132..136 "blah"
1307      NEWLINE@136..137 "\n"
1308    ENTRY@137..203
1309      KEY@137..148 "Description"
1310      COLON@148..149 ":"
1311      WHITESPACE@149..150 " "
1312      VALUE@150..171 "This is a description"
1313      NEWLINE@171..172 "\n"
1314      INDENT@172..173 " "
1315      VALUE@173..182 "And it is"
1316      NEWLINE@182..183 "\n"
1317      INDENT@183..184 " "
1318      VALUE@184..185 "."
1319      NEWLINE@185..186 "\n"
1320      INDENT@186..187 " "
1321      VALUE@187..195 "multiple"
1322      NEWLINE@195..196 "\n"
1323      INDENT@196..197 " "
1324      VALUE@197..202 "lines"
1325      NEWLINE@202..203 "\n"
1326"###
1327    );
1328    assert_eq!(parsed.errors, Vec::<String>::new());
1329
1330    let root = parsed.root_mut();
1331    assert_eq!(root.paragraphs().count(), 2);
1332    let source = root.paragraphs().next().unwrap();
1333    assert_eq!(
1334        source.keys().collect::<Vec<_>>(),
1335        vec!["Source", "Maintainer", "Section"]
1336    );
1337    assert_eq!(source.get("Source").as_deref(), Some("foo"));
1338    assert_eq!(
1339        source.get("Maintainer").as_deref(),
1340        Some("Foo Bar <foo@example.com>")
1341    );
1342    assert_eq!(source.get("Section").as_deref(), Some("net"));
1343    assert_eq!(
1344        source.items().collect::<Vec<_>>(),
1345        vec![
1346            ("Source".into(), "foo".into()),
1347            ("Maintainer".into(), "Foo Bar <foo@example.com>".into()),
1348            ("Section".into(), "net".into()),
1349        ]
1350    );
1351
1352    let binary = root.paragraphs().nth(1).unwrap();
1353    assert_eq!(
1354        binary.keys().collect::<Vec<_>>(),
1355        vec!["Package", "Architecture", "Depends", "Description"]
1356    );
1357    assert_eq!(binary.get("Package").as_deref(), Some("foo"));
1358    assert_eq!(binary.get("Architecture").as_deref(), Some("all"));
1359    assert_eq!(binary.get("Depends").as_deref(), Some("bar,\nblah"));
1360    assert_eq!(
1361        binary.get("Description").as_deref(),
1362        Some("This is a description\nAnd it is\n.\nmultiple\nlines")
1363    );
1364
1365    assert_eq!(node.text(), CONTROLV1);
1366}
1367
1368#[test]
1369fn test_with_trailing_whitespace() {
1370    const CONTROLV1: &str = r#"Source: foo
1371Maintainer: Foo Bar <foo@example.com>
1372
1373
1374"#;
1375    let parsed = parse(CONTROLV1);
1376    let node = parsed.syntax();
1377    assert_eq!(
1378        format!("{:#?}", node),
1379        r###"ROOT@0..52
1380  PARAGRAPH@0..50
1381    ENTRY@0..12
1382      KEY@0..6 "Source"
1383      COLON@6..7 ":"
1384      WHITESPACE@7..8 " "
1385      VALUE@8..11 "foo"
1386      NEWLINE@11..12 "\n"
1387    ENTRY@12..50
1388      KEY@12..22 "Maintainer"
1389      COLON@22..23 ":"
1390      WHITESPACE@23..24 " "
1391      VALUE@24..49 "Foo Bar <foo@example. ..."
1392      NEWLINE@49..50 "\n"
1393  EMPTY_LINE@50..51
1394    NEWLINE@50..51 "\n"
1395  EMPTY_LINE@51..52
1396    NEWLINE@51..52 "\n"
1397"###
1398    );
1399    assert_eq!(parsed.errors, Vec::<String>::new());
1400
1401    let root = parsed.root_mut();
1402    assert_eq!(root.paragraphs().count(), 1);
1403    let source = root.paragraphs().next().unwrap();
1404    assert_eq!(
1405        source.items().collect::<Vec<_>>(),
1406        vec![
1407            ("Source".into(), "foo".into()),
1408            ("Maintainer".into(), "Foo Bar <foo@example.com>".into()),
1409        ]
1410    );
1411}
1412
1413fn rebuild_value(
1414    builder: &mut GreenNodeBuilder,
1415    mut tokens: Vec<(SyntaxKind, String)>,
1416    key_len: usize,
1417    indentation: u32,
1418    immediate_empty_line: bool,
1419    max_line_length_one_liner: Option<usize>,
1420) {
1421    let first_line_len = tokens
1422        .iter()
1423        .take_while(|(k, _t)| *k != NEWLINE)
1424        .map(|(_k, t)| t.len())
1425        .sum::<usize>() + key_len + 2 /* ": " */;
1426
1427    let has_newline = tokens.iter().any(|(k, _t)| *k == NEWLINE);
1428
1429    let mut last_was_newline = false;
1430    if max_line_length_one_liner
1431        .map(|mll| first_line_len <= mll)
1432        .unwrap_or(false)
1433        && !has_newline
1434    {
1435        // Just copy tokens if the value fits into one line
1436        for (k, t) in tokens {
1437            builder.token(k.into(), &t);
1438        }
1439    } else {
1440        // Insert a leading newline if the value is multi-line and immediate_empty_line is set
1441        if immediate_empty_line && has_newline {
1442            builder.token(NEWLINE.into(), "\n");
1443            last_was_newline = true;
1444        } else {
1445            builder.token(WHITESPACE.into(), " ");
1446        }
1447        // Strip leading whitespace and newlines
1448        let mut start_idx = 0;
1449        while start_idx < tokens.len() {
1450            if tokens[start_idx].0 == NEWLINE || tokens[start_idx].0 == WHITESPACE {
1451                start_idx += 1;
1452            } else {
1453                break;
1454            }
1455        }
1456        tokens.drain(..start_idx);
1457        // Pre-allocate indentation string to avoid repeated allocations
1458        let indent_str = " ".repeat(indentation as usize);
1459        for (k, t) in tokens {
1460            if last_was_newline {
1461                builder.token(INDENT.into(), &indent_str);
1462            }
1463            builder.token(k.into(), &t);
1464            last_was_newline = k == NEWLINE;
1465        }
1466    }
1467
1468    if !last_was_newline {
1469        builder.token(NEWLINE.into(), "\n");
1470    }
1471}
1472
1473#[cfg(test)]
1474mod tests {
1475    use super::*;
1476    #[test]
1477    fn test_parse() {
1478        let d: super::Deb822 = r#"Source: foo
1479Maintainer: Foo Bar <jelmer@jelmer.uk>
1480Section: net
1481
1482Package: foo
1483Architecture: all
1484Depends: libc6
1485Description: This is a description
1486 With details
1487 "#
1488        .parse()
1489        .unwrap();
1490        let mut ps = d.paragraphs();
1491        let p = ps.next().unwrap();
1492
1493        assert_eq!(p.get("Source").as_deref(), Some("foo"));
1494        assert_eq!(
1495            p.get("Maintainer").as_deref(),
1496            Some("Foo Bar <jelmer@jelmer.uk>")
1497        );
1498        assert_eq!(p.get("Section").as_deref(), Some("net"));
1499
1500        let b = ps.next().unwrap();
1501        assert_eq!(b.get("Package").as_deref(), Some("foo"));
1502    }
1503
1504    #[test]
1505    fn test_after_multi_line() {
1506        let d: super::Deb822 = r#"Source: golang-github-blah-blah
1507Section: devel
1508Priority: optional
1509Standards-Version: 4.2.0
1510Maintainer: Some Maintainer <example@example.com>
1511Build-Depends: debhelper (>= 11~),
1512               dh-golang,
1513               golang-any
1514Homepage: https://github.com/j-keck/arping
1515"#
1516        .parse()
1517        .unwrap();
1518        let mut ps = d.paragraphs();
1519        let p = ps.next().unwrap();
1520        assert_eq!(p.get("Source").as_deref(), Some("golang-github-blah-blah"));
1521        assert_eq!(p.get("Section").as_deref(), Some("devel"));
1522        assert_eq!(p.get("Priority").as_deref(), Some("optional"));
1523        assert_eq!(p.get("Standards-Version").as_deref(), Some("4.2.0"));
1524        assert_eq!(
1525            p.get("Maintainer").as_deref(),
1526            Some("Some Maintainer <example@example.com>")
1527        );
1528        assert_eq!(
1529            p.get("Build-Depends").as_deref(),
1530            Some("debhelper (>= 11~),\ndh-golang,\ngolang-any")
1531        );
1532        assert_eq!(
1533            p.get("Homepage").as_deref(),
1534            Some("https://github.com/j-keck/arping")
1535        );
1536    }
1537
1538    #[test]
1539    fn test_remove_field() {
1540        let d: super::Deb822 = r#"Source: foo
1541# Comment
1542Maintainer: Foo Bar <jelmer@jelmer.uk>
1543Section: net
1544
1545Package: foo
1546Architecture: all
1547Depends: libc6
1548Description: This is a description
1549 With details
1550 "#
1551        .parse()
1552        .unwrap();
1553        let mut ps = d.paragraphs();
1554        let mut p = ps.next().unwrap();
1555        p.set("Foo", "Bar");
1556        p.remove("Section");
1557        p.remove("Nonexistent");
1558        assert_eq!(p.get("Foo").as_deref(), Some("Bar"));
1559        assert_eq!(
1560            p.to_string(),
1561            r#"Source: foo
1562# Comment
1563Maintainer: Foo Bar <jelmer@jelmer.uk>
1564Foo: Bar
1565"#
1566        );
1567    }
1568
1569    #[test]
1570    fn test_rename_field() {
1571        let d: super::Deb822 = r#"Source: foo
1572Vcs-Browser: https://salsa.debian.org/debian/foo
1573"#
1574        .parse()
1575        .unwrap();
1576        let mut ps = d.paragraphs();
1577        let mut p = ps.next().unwrap();
1578        assert!(p.rename("Vcs-Browser", "Homepage"));
1579        assert_eq!(
1580            p.to_string(),
1581            r#"Source: foo
1582Homepage: https://salsa.debian.org/debian/foo
1583"#
1584        );
1585
1586        assert_eq!(
1587            p.get("Homepage").as_deref(),
1588            Some("https://salsa.debian.org/debian/foo")
1589        );
1590        assert_eq!(p.get("Vcs-Browser").as_deref(), None);
1591
1592        // Nonexistent field
1593        assert!(!p.rename("Nonexistent", "Homepage"));
1594    }
1595
1596    #[test]
1597    fn test_set_field() {
1598        let d: super::Deb822 = r#"Source: foo
1599Maintainer: Foo Bar <joe@example.com>
1600"#
1601        .parse()
1602        .unwrap();
1603        let mut ps = d.paragraphs();
1604        let mut p = ps.next().unwrap();
1605        p.set("Maintainer", "Somebody Else <jane@example.com>");
1606        assert_eq!(
1607            p.get("Maintainer").as_deref(),
1608            Some("Somebody Else <jane@example.com>")
1609        );
1610        assert_eq!(
1611            p.to_string(),
1612            r#"Source: foo
1613Maintainer: Somebody Else <jane@example.com>
1614"#
1615        );
1616    }
1617
1618    #[test]
1619    fn test_set_new_field() {
1620        let d: super::Deb822 = r#"Source: foo
1621"#
1622        .parse()
1623        .unwrap();
1624        let mut ps = d.paragraphs();
1625        let mut p = ps.next().unwrap();
1626        p.set("Maintainer", "Somebody <joe@example.com>");
1627        assert_eq!(
1628            p.get("Maintainer").as_deref(),
1629            Some("Somebody <joe@example.com>")
1630        );
1631        assert_eq!(
1632            p.to_string(),
1633            r#"Source: foo
1634Maintainer: Somebody <joe@example.com>
1635"#
1636        );
1637    }
1638
1639    #[test]
1640    fn test_add_paragraph() {
1641        let mut d = super::Deb822::new();
1642        let mut p = d.add_paragraph();
1643        p.set("Foo", "Bar");
1644        assert_eq!(p.get("Foo").as_deref(), Some("Bar"));
1645        assert_eq!(
1646            p.to_string(),
1647            r#"Foo: Bar
1648"#
1649        );
1650        assert_eq!(
1651            d.to_string(),
1652            r#"Foo: Bar
1653"#
1654        );
1655
1656        let mut p = d.add_paragraph();
1657        p.set("Foo", "Blah");
1658        assert_eq!(p.get("Foo").as_deref(), Some("Blah"));
1659        assert_eq!(
1660            d.to_string(),
1661            r#"Foo: Bar
1662
1663Foo: Blah
1664"#
1665        );
1666    }
1667
1668    #[test]
1669    fn test_crud_paragraph() {
1670        let mut d = super::Deb822::new();
1671        let mut p = d.insert_paragraph(0);
1672        p.set("Foo", "Bar");
1673        assert_eq!(p.get("Foo").as_deref(), Some("Bar"));
1674        assert_eq!(
1675            d.to_string(),
1676            r#"Foo: Bar
1677"#
1678        );
1679
1680        // test prepend
1681        let mut p = d.insert_paragraph(0);
1682        p.set("Foo", "Blah");
1683        assert_eq!(p.get("Foo").as_deref(), Some("Blah"));
1684        assert_eq!(
1685            d.to_string(),
1686            r#"Foo: Blah
1687
1688Foo: Bar
1689"#
1690        );
1691
1692        // test delete
1693        d.remove_paragraph(1);
1694        assert_eq!(d.to_string(), "Foo: Blah\n\n");
1695
1696        // test update again
1697        p.set("Foo", "Baz");
1698        assert_eq!(d.to_string(), "Foo: Baz\n\n");
1699
1700        // test delete again
1701        d.remove_paragraph(0);
1702        assert_eq!(d.to_string(), "");
1703    }
1704
1705    #[test]
1706    fn test_multiline_entry() {
1707        use super::SyntaxKind::*;
1708        use rowan::ast::AstNode;
1709
1710        let entry = super::Entry::new("foo", "bar\nbaz");
1711        let tokens: Vec<_> = entry
1712            .syntax()
1713            .descendants_with_tokens()
1714            .filter_map(|tok| tok.into_token())
1715            .collect();
1716
1717        assert_eq!("foo: bar\n baz\n", entry.to_string());
1718        assert_eq!("bar\nbaz", entry.value());
1719
1720        assert_eq!(
1721            vec![
1722                (KEY, "foo"),
1723                (COLON, ":"),
1724                (WHITESPACE, " "),
1725                (VALUE, "bar"),
1726                (NEWLINE, "\n"),
1727                (INDENT, " "),
1728                (VALUE, "baz"),
1729                (NEWLINE, "\n"),
1730            ],
1731            tokens
1732                .iter()
1733                .map(|token| (token.kind(), token.text()))
1734                .collect::<Vec<_>>()
1735        );
1736    }
1737
1738    #[test]
1739    fn test_apt_entry() {
1740        let text = r#"Package: cvsd
1741Binary: cvsd
1742Version: 1.0.24
1743Maintainer: Arthur de Jong <adejong@debian.org>
1744Build-Depends: debhelper (>= 9), po-debconf
1745Architecture: any
1746Standards-Version: 3.9.3
1747Format: 3.0 (native)
1748Files:
1749 b7a7d67a02974c52c408fdb5e118406d 890 cvsd_1.0.24.dsc
1750 b73ee40774c3086cb8490cdbb96ac883 258139 cvsd_1.0.24.tar.gz
1751Vcs-Browser: http://arthurdejong.org/viewvc/cvsd/
1752Vcs-Cvs: :pserver:anonymous@arthurdejong.org:/arthur/
1753Checksums-Sha256:
1754 a7bb7a3aacee19cd14ce5c26cb86e348b1608e6f1f6e97c6ea7c58efa440ac43 890 cvsd_1.0.24.dsc
1755 46bc517760c1070ae408693b89603986b53e6f068ae6bdc744e2e830e46b8cba 258139 cvsd_1.0.24.tar.gz
1756Homepage: http://arthurdejong.org/cvsd/
1757Package-List:
1758 cvsd deb vcs optional
1759Directory: pool/main/c/cvsd
1760Priority: source
1761Section: vcs
1762
1763"#;
1764        let d: super::Deb822 = text.parse().unwrap();
1765        let p = d.paragraphs().next().unwrap();
1766        assert_eq!(p.get("Binary").as_deref(), Some("cvsd"));
1767        assert_eq!(p.get("Version").as_deref(), Some("1.0.24"));
1768        assert_eq!(
1769            p.get("Maintainer").as_deref(),
1770            Some("Arthur de Jong <adejong@debian.org>")
1771        );
1772    }
1773
1774    #[test]
1775    fn test_format() {
1776        let d: super::Deb822 = r#"Source: foo
1777Maintainer: Foo Bar <foo@example.com>
1778Section:      net
1779Blah: blah  # comment
1780Multi-Line:
1781  Ahoi!
1782     Matey!
1783
1784"#
1785        .parse()
1786        .unwrap();
1787        let mut ps = d.paragraphs();
1788        let p = ps.next().unwrap();
1789        let result = p.wrap_and_sort(
1790            crate::Indentation::FieldNameLength,
1791            false,
1792            None,
1793            None::<&dyn Fn(&super::Entry, &super::Entry) -> std::cmp::Ordering>,
1794            None,
1795        );
1796        assert_eq!(
1797            result.to_string(),
1798            r#"Source: foo
1799Maintainer: Foo Bar <foo@example.com>
1800Section: net
1801Blah: blah  # comment
1802Multi-Line: Ahoi!
1803          Matey!
1804"#
1805        );
1806    }
1807
1808    #[test]
1809    fn test_format_sort_paragraphs() {
1810        let d: super::Deb822 = r#"Source: foo
1811Maintainer: Foo Bar <foo@example.com>
1812
1813# This is a comment
1814Source: bar
1815Maintainer: Bar Foo <bar@example.com>
1816
1817"#
1818        .parse()
1819        .unwrap();
1820        let result = d.wrap_and_sort(
1821            Some(&|a: &super::Paragraph, b: &super::Paragraph| {
1822                a.get("Source").cmp(&b.get("Source"))
1823            }),
1824            Some(&|p| {
1825                p.wrap_and_sort(
1826                    crate::Indentation::FieldNameLength,
1827                    false,
1828                    None,
1829                    None::<&dyn Fn(&super::Entry, &super::Entry) -> std::cmp::Ordering>,
1830                    None,
1831                )
1832            }),
1833        );
1834        assert_eq!(
1835            result.to_string(),
1836            r#"# This is a comment
1837Source: bar
1838Maintainer: Bar Foo <bar@example.com>
1839
1840Source: foo
1841Maintainer: Foo Bar <foo@example.com>
1842"#,
1843        );
1844    }
1845
1846    #[test]
1847    fn test_format_sort_fields() {
1848        let d: super::Deb822 = r#"Source: foo
1849Maintainer: Foo Bar <foo@example.com>
1850Build-Depends: debhelper (>= 9), po-debconf
1851Homepage: https://example.com/
1852
1853"#
1854        .parse()
1855        .unwrap();
1856        let result = d.wrap_and_sort(
1857            None,
1858            Some(&|p: &super::Paragraph| -> super::Paragraph {
1859                p.wrap_and_sort(
1860                    crate::Indentation::FieldNameLength,
1861                    false,
1862                    None,
1863                    Some(&|a: &super::Entry, b: &super::Entry| a.key().cmp(&b.key())),
1864                    None,
1865                )
1866            }),
1867        );
1868        assert_eq!(
1869            result.to_string(),
1870            r#"Build-Depends: debhelper (>= 9), po-debconf
1871Homepage: https://example.com/
1872Maintainer: Foo Bar <foo@example.com>
1873Source: foo
1874"#
1875        );
1876    }
1877
1878    #[test]
1879    fn test_para_from_iter() {
1880        let p: super::Paragraph = vec![("Foo", "Bar"), ("Baz", "Qux")].into_iter().collect();
1881        assert_eq!(
1882            p.to_string(),
1883            r#"Foo: Bar
1884Baz: Qux
1885"#
1886        );
1887
1888        let p: super::Paragraph = vec![
1889            ("Foo".to_string(), "Bar".to_string()),
1890            ("Baz".to_string(), "Qux".to_string()),
1891        ]
1892        .into_iter()
1893        .collect();
1894
1895        assert_eq!(
1896            p.to_string(),
1897            r#"Foo: Bar
1898Baz: Qux
1899"#
1900        );
1901    }
1902
1903    #[test]
1904    fn test_deb822_from_iter() {
1905        let d: super::Deb822 = vec![
1906            vec![("Foo", "Bar"), ("Baz", "Qux")].into_iter().collect(),
1907            vec![("A", "B"), ("C", "D")].into_iter().collect(),
1908        ]
1909        .into_iter()
1910        .collect();
1911        assert_eq!(
1912            d.to_string(),
1913            r#"Foo: Bar
1914Baz: Qux
1915
1916A: B
1917C: D
1918"#
1919        );
1920    }
1921
1922    #[test]
1923    fn test_format_parse_error() {
1924        assert_eq!(ParseError(vec!["foo".to_string()]).to_string(), "foo\n");
1925    }
1926
1927    #[test]
1928    fn test_positioned_parse_error() {
1929        let error = PositionedParseError {
1930            message: "test error".to_string(),
1931            range: rowan::TextRange::new(rowan::TextSize::from(5), rowan::TextSize::from(10)),
1932            code: Some("test_code".to_string()),
1933        };
1934        assert_eq!(error.to_string(), "test error");
1935        assert_eq!(error.range.start(), rowan::TextSize::from(5));
1936        assert_eq!(error.range.end(), rowan::TextSize::from(10));
1937        assert_eq!(error.code, Some("test_code".to_string()));
1938    }
1939
1940    #[test]
1941    fn test_format_error() {
1942        assert_eq!(
1943            super::Error::ParseError(ParseError(vec!["foo".to_string()])).to_string(),
1944            "foo\n"
1945        );
1946    }
1947
1948    #[test]
1949    fn test_get_all() {
1950        let d: super::Deb822 = r#"Source: foo
1951Maintainer: Foo Bar <foo@example.com>
1952Maintainer: Bar Foo <bar@example.com>"#
1953            .parse()
1954            .unwrap();
1955        let p = d.paragraphs().next().unwrap();
1956        assert_eq!(
1957            p.get_all("Maintainer").collect::<Vec<_>>(),
1958            vec!["Foo Bar <foo@example.com>", "Bar Foo <bar@example.com>"]
1959        );
1960    }
1961
1962    #[test]
1963    fn test_entry_ranges() {
1964        let input = r#"Package: test-package
1965Maintainer: Test User <test@example.com>
1966Description: A simple test package
1967 with multiple lines
1968 of description text"#;
1969
1970        let deb822 = super::Deb822::from_str(input).unwrap();
1971        let paragraph = deb822.paragraphs().next().unwrap();
1972        let entries: Vec<_> = paragraph.entries().collect();
1973
1974        // Test first entry (Package)
1975        let package_entry = &entries[0];
1976        assert_eq!(package_entry.key(), Some("Package".to_string()));
1977
1978        // Test key_range
1979        let key_range = package_entry.key_range().unwrap();
1980        assert_eq!(
1981            &input[key_range.start().into()..key_range.end().into()],
1982            "Package"
1983        );
1984
1985        // Test colon_range
1986        let colon_range = package_entry.colon_range().unwrap();
1987        assert_eq!(
1988            &input[colon_range.start().into()..colon_range.end().into()],
1989            ":"
1990        );
1991
1992        // Test value_range
1993        let value_range = package_entry.value_range().unwrap();
1994        assert_eq!(
1995            &input[value_range.start().into()..value_range.end().into()],
1996            "test-package"
1997        );
1998
1999        // Test text_range covers the whole entry
2000        let text_range = package_entry.text_range();
2001        assert_eq!(
2002            &input[text_range.start().into()..text_range.end().into()],
2003            "Package: test-package\n"
2004        );
2005
2006        // Test single-line value_line_ranges
2007        let value_lines = package_entry.value_line_ranges();
2008        assert_eq!(value_lines.len(), 1);
2009        assert_eq!(
2010            &input[value_lines[0].start().into()..value_lines[0].end().into()],
2011            "test-package"
2012        );
2013    }
2014
2015    #[test]
2016    fn test_multiline_entry_ranges() {
2017        let input = r#"Description: Short description
2018 Extended description line 1
2019 Extended description line 2"#;
2020
2021        let deb822 = super::Deb822::from_str(input).unwrap();
2022        let paragraph = deb822.paragraphs().next().unwrap();
2023        let entry = paragraph.entries().next().unwrap();
2024
2025        assert_eq!(entry.key(), Some("Description".to_string()));
2026
2027        // Test value_range spans all lines
2028        let value_range = entry.value_range().unwrap();
2029        let full_value = &input[value_range.start().into()..value_range.end().into()];
2030        assert!(full_value.contains("Short description"));
2031        assert!(full_value.contains("Extended description line 1"));
2032        assert!(full_value.contains("Extended description line 2"));
2033
2034        // Test value_line_ranges gives individual lines
2035        let value_lines = entry.value_line_ranges();
2036        assert_eq!(value_lines.len(), 3);
2037
2038        assert_eq!(
2039            &input[value_lines[0].start().into()..value_lines[0].end().into()],
2040            "Short description"
2041        );
2042        assert_eq!(
2043            &input[value_lines[1].start().into()..value_lines[1].end().into()],
2044            "Extended description line 1"
2045        );
2046        assert_eq!(
2047            &input[value_lines[2].start().into()..value_lines[2].end().into()],
2048            "Extended description line 2"
2049        );
2050    }
2051
2052    #[test]
2053    fn test_entries_public_access() {
2054        let input = r#"Package: test
2055Version: 1.0"#;
2056
2057        let deb822 = super::Deb822::from_str(input).unwrap();
2058        let paragraph = deb822.paragraphs().next().unwrap();
2059
2060        // Test that entries() method is now public
2061        let entries: Vec<_> = paragraph.entries().collect();
2062        assert_eq!(entries.len(), 2);
2063        assert_eq!(entries[0].key(), Some("Package".to_string()));
2064        assert_eq!(entries[1].key(), Some("Version".to_string()));
2065    }
2066
2067    #[test]
2068    fn test_empty_value_ranges() {
2069        let input = r#"EmptyField: "#;
2070
2071        let deb822 = super::Deb822::from_str(input).unwrap();
2072        let paragraph = deb822.paragraphs().next().unwrap();
2073        let entry = paragraph.entries().next().unwrap();
2074
2075        assert_eq!(entry.key(), Some("EmptyField".to_string()));
2076
2077        // Empty value should still have ranges
2078        assert!(entry.key_range().is_some());
2079        assert!(entry.colon_range().is_some());
2080
2081        // Empty value might not have value tokens
2082        let value_lines = entry.value_line_ranges();
2083        // This depends on how the parser handles empty values
2084        // but we should not panic
2085        assert!(value_lines.len() <= 1);
2086    }
2087
2088    #[test]
2089    fn test_range_ordering() {
2090        let input = r#"Field: value"#;
2091
2092        let deb822 = super::Deb822::from_str(input).unwrap();
2093        let paragraph = deb822.paragraphs().next().unwrap();
2094        let entry = paragraph.entries().next().unwrap();
2095
2096        let key_range = entry.key_range().unwrap();
2097        let colon_range = entry.colon_range().unwrap();
2098        let value_range = entry.value_range().unwrap();
2099        let text_range = entry.text_range();
2100
2101        // Verify ranges are in correct order
2102        assert!(key_range.end() <= colon_range.start());
2103        assert!(colon_range.end() <= value_range.start());
2104        assert!(key_range.start() >= text_range.start());
2105        assert!(value_range.end() <= text_range.end());
2106    }
2107}