debian_analyzer/
benfile.rs

1//! File parsing for the benfile format.
2use std::iter::Peekable;
3use std::vec::IntoIter;
4
5#[derive(Debug, PartialEq, Clone)]
6enum Token {
7    True,
8    False,
9    Not,
10    Or,
11    And,
12    LParen,
13    RParen,
14    Field(String),
15    Identifier(String),
16    Regex(String),
17    Source,
18    Comparison(Comparison),
19    String(String),
20    Semicolon,
21    Tilde,
22}
23
24struct Lexer<'a> {
25    input: &'a str,
26}
27
28impl<'a> Lexer<'a> {
29    pub fn new(input: &'a str) -> Self {
30        Lexer { input }
31    }
32
33    pub fn next_token(&mut self) -> Result<Option<Token>, String> {
34        self.skip_whitespace();
35
36        if self.input.is_empty() {
37            return Ok(None);
38        }
39
40        let c = self.current_char();
41
42        match c {
43            Some('t') if self.input.starts_with("true") => {
44                self.input = &self.input[4..];
45                Ok(Some(Token::True))
46            }
47            Some('f') if self.input.starts_with("false") => {
48                self.input = &self.input[5..];
49                Ok(Some(Token::False))
50            }
51            Some('!') => {
52                self.input = &self.input[1..];
53                Ok(Some(Token::Not))
54            }
55            Some('|') => {
56                self.input = &self.input[1..];
57                Ok(Some(Token::Or))
58            }
59            Some('&') => {
60                self.input = &self.input[1..];
61                Ok(Some(Token::And))
62            }
63            Some('(') => {
64                self.input = &self.input[1..];
65                Ok(Some(Token::LParen))
66            }
67            Some(')') => {
68                self.input = &self.input[1..];
69                Ok(Some(Token::RParen))
70            }
71            Some('.') => {
72                self.input = &self.input[1..];
73                let field = self.consume_while(Self::is_valid_field_char);
74                Ok(Some(Token::Field(field.to_string())))
75            }
76            Some('/') => {
77                self.input = &self.input[1..];
78                let regex = self.consume_until('/');
79                self.input = &self.input[1..];
80                Ok(Some(Token::Regex(regex.to_string())))
81            }
82            Some('"') => {
83                self.input = &self.input[1..];
84                // consume until next ", but allow escaping with \
85                let in_escape = std::sync::atomic::AtomicBool::new(false);
86                let string = self.consume_while(move |c| {
87                    if in_escape.swap(false, std::sync::atomic::Ordering::SeqCst) {
88                        true
89                    } else if c == '\\' {
90                        in_escape.store(true, std::sync::atomic::Ordering::SeqCst);
91                        true
92                    } else {
93                        c != '"'
94                    }
95                });
96                self.input = &self.input[1..];
97                Ok(Some(Token::String(string.to_string())))
98            }
99            Some('~') => {
100                self.input = &self.input[1..];
101                Ok(Some(Token::Tilde))
102            }
103            Some('<' | '>' | '=') if self.is_comparison() => {
104                let comparison = self.consume_comparison()?;
105                Ok(Some(Token::Comparison(comparison)))
106            }
107            Some('s') if self.input.starts_with("source") => {
108                self.input = &self.input[6..];
109                Ok(Some(Token::Source))
110            }
111            Some(';') => {
112                self.input = &self.input[1..];
113                Ok(Some(Token::Semicolon))
114            }
115            Some(c) if Self::is_valid_identifier_char(c) => {
116                let identifier = self.consume_while(Self::is_valid_identifier_char);
117                Ok(Some(Token::Identifier(identifier.to_string())))
118            }
119            None => Ok(None),
120            Some(c) => Err(format!("Unexpected character: {}", c)),
121        }
122    }
123
124    fn is_valid_identifier_char(c: char) -> bool {
125        c.is_alphanumeric() || c == '_'
126    }
127
128    fn is_valid_field_char(c: char) -> bool {
129        c.is_alphanumeric() || c == '-'
130    }
131
132    fn current_char(&mut self) -> Option<char> {
133        self.skip_whitespace();
134        self.input.chars().next()
135    }
136
137    fn skip_whitespace(&mut self) {
138        while !self.input.is_empty() && self.input.chars().next().unwrap().is_whitespace() {
139            self.input = &self.input[1..];
140        }
141    }
142
143    fn consume_while<F>(&mut self, test: F) -> &'a str
144    where
145        F: Fn(char) -> bool,
146    {
147        let mut end = 0;
148        while !self.input.len() > end && test(self.input.chars().nth(end).unwrap()) {
149            end += 1;
150        }
151        let (word, rest) = self.input.split_at(end);
152        self.input = rest;
153        word
154    }
155
156    fn consume_until(&mut self, end_char: char) -> &'a str {
157        let mut end = 0;
158        while self.input.chars().nth(end).unwrap() != end_char {
159            end += 1;
160        }
161        let (word, rest) = self.input.split_at(end);
162        self.input = rest;
163        word
164    }
165
166    fn is_comparison(&self) -> bool {
167        let comparisons = ["<<", "<=", "<", ">=", ">>", ">", "="];
168        for &comp in &comparisons {
169            if self.input.starts_with(comp) {
170                return true;
171            }
172        }
173        false
174    }
175
176    fn consume_comparison(&mut self) -> Result<Comparison, String> {
177        if self.input.starts_with("<<") {
178            self.input = &self.input[2..];
179            Ok(Comparison::MuchLessThan)
180        } else if self.input.starts_with("<=") {
181            self.input = &self.input[2..];
182            Ok(Comparison::LessOrEqual)
183        } else if self.input.starts_with("<") {
184            self.input = &self.input[2..];
185            Ok(Comparison::LessThan)
186        } else if self.input.starts_with(">=") {
187            self.input = &self.input[2..];
188            Ok(Comparison::GreaterOrEqual)
189        } else if self.input.starts_with(">>") {
190            self.input = &self.input[2..];
191            Ok(Comparison::MuchGreaterThan)
192        } else if self.input.starts_with(">") {
193            self.input = &self.input[1..];
194            Ok(Comparison::GreaterThan)
195        } else if self.input.starts_with("=") {
196            self.input = &self.input[1..];
197            Ok(Comparison::Equal)
198        } else {
199            Err(format!("Expected comparison, got {}", self.input))
200        }
201    }
202}
203
204#[derive(Debug, PartialEq, Eq, Clone)]
205/// The comparison operators supported by the benfile format.
206pub enum Comparison {
207    /// <
208    LessThan,
209    /// <<
210    MuchLessThan,
211    /// <=
212    LessOrEqual,
213    /// >
214    GreaterThan,
215    /// >>
216    MuchGreaterThan,
217    /// >=
218    GreaterOrEqual,
219    /// =
220    Equal,
221}
222
223impl std::fmt::Display for Comparison {
224    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
225        match self {
226            Comparison::LessThan => write!(f, "<"),
227            Comparison::MuchLessThan => write!(f, "<<"),
228            Comparison::LessOrEqual => write!(f, "<="),
229            Comparison::GreaterThan => write!(f, ">"),
230            Comparison::MuchGreaterThan => write!(f, ">>"),
231            Comparison::GreaterOrEqual => write!(f, ">="),
232            Comparison::Equal => write!(f, "="),
233        }
234    }
235}
236
237impl std::str::FromStr for Comparison {
238    type Err = String;
239
240    fn from_str(s: &str) -> Result<Self, Self::Err> {
241        match s {
242            "<" => Ok(Comparison::LessThan),
243            "<<" => Ok(Comparison::MuchLessThan),
244            "<=" => Ok(Comparison::LessOrEqual),
245            ">" => Ok(Comparison::GreaterThan),
246            ">>" => Ok(Comparison::MuchGreaterThan),
247            ">=" => Ok(Comparison::GreaterOrEqual),
248            "=" => Ok(Comparison::Equal),
249            _ => Err(format!("Invalid comparison: {}", s)),
250        }
251    }
252}
253
254#[derive(PartialEq, Eq, Clone)]
255/// The expression types supported by the benfile format.
256pub enum Expr {
257    /// true or false
258    Bool(bool),
259    /// !<query>
260    Not(Box<Expr>),
261    /// <query> | <query>
262    Or(Vec<Box<Expr>>),
263    /// <query> & <query>
264    And(Vec<Box<Expr>>),
265    /// Field ~ /regex/
266    FieldRegex(String, String),
267    /// Field ~ "string"
268    FieldString(String, String),
269    /// source
270    Source,
271    /// <comparison> "<string>"
272    Comparison(Comparison, String),
273    /// <field> ~ "<string>"
274    FieldComparison(String, Comparison, String),
275    /// "string"
276    String(String),
277}
278
279#[derive(Debug, PartialEq, Eq)]
280/// An assignment in a benfile.
281pub struct Assignment {
282    /// The field being assigned to.
283    pub field: String,
284    /// The expression being assigned.
285    pub expr: Expr,
286}
287
288impl std::str::FromStr for Assignment {
289    type Err = String;
290
291    fn from_str(s: &str) -> Result<Self, Self::Err> {
292        let mut lexer = Lexer::new(s);
293        let mut tokens = vec![];
294
295        while let Some(token) = lexer.next_token()? {
296            tokens.push(token);
297        }
298
299        let mut parser = Parser::new(tokens);
300        let assignment = parser.parse_assignment()?;
301        match assignment {
302            Some(assignment) => Ok(assignment),
303            None => Err("Expected assignment".to_string()),
304        }
305    }
306}
307
308impl std::str::FromStr for Expr {
309    type Err = String;
310
311    fn from_str(s: &str) -> Result<Self, Self::Err> {
312        let mut lexer = Lexer::new(s);
313        let mut tokens = vec![];
314
315        while let Some(token) = lexer.next_token()? {
316            tokens.push(token);
317        }
318
319        let mut parser = Parser::new(tokens);
320        parser.parse()
321    }
322}
323
324impl std::fmt::Debug for Expr {
325    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
326        match self {
327            Expr::Bool(b) => write!(f, "Bool({})", b),
328            Expr::Not(expr) => write!(f, "Not({:?})", expr),
329            Expr::Or(exprs) => write!(f, "Or({:?})", exprs),
330            Expr::And(exprs) => write!(f, "And({:?})", exprs),
331            Expr::FieldRegex(field, regex) => write!(f, "FieldRegex({}, {})", field, regex),
332            Expr::FieldString(field, string) => write!(f, "FieldString({}, {})", field, string),
333            Expr::Source => write!(f, "Source"),
334            Expr::Comparison(comp, string) => write!(f, "Comparison({}, {})", comp, string),
335            Expr::FieldComparison(field, comp, string) => {
336                write!(f, "FieldComparison({}, {}, {})", field, comp, string)
337            }
338            Expr::String(string) => write!(f, "String({})", string),
339        }
340    }
341}
342
343struct Parser {
344    tokens: Peekable<IntoIter<Token>>,
345}
346
347impl Parser {
348    pub fn new(tokens: Vec<Token>) -> Self {
349        Parser {
350            tokens: tokens.into_iter().peekable(),
351        }
352    }
353
354    pub fn parse_multiple(&mut self) -> Result<Vec<Assignment>, String> {
355        let mut assignments = vec![];
356        while let Some(assignment) = self.parse_assignment()? {
357            assignments.push(assignment);
358        }
359        Ok(assignments)
360    }
361
362    pub fn parse_assignment(&mut self) -> Result<Option<Assignment>, String> {
363        let field = match self.tokens.next() {
364            Some(Token::Identifier(field)) => field,
365            None => return Ok(None),
366            n => {
367                return Err(format!("Expected identifier, got {:?}", n));
368            }
369        };
370        if self.tokens.next() != Some(Token::Comparison(Comparison::Equal)) {
371            return Err("Expected =".to_string());
372        }
373        let expr = self.parse()?;
374        if self.tokens.next() == Some(Token::Semicolon) {
375            Ok(Some(Assignment { field, expr }))
376        } else {
377            Err(format!("Expected ;, got {:?}", self.tokens.peek()))
378        }
379    }
380
381    pub fn parse(&mut self) -> Result<Expr, String> {
382        let expr: Expr = match self.tokens.next() {
383            // true
384            Some(Token::True) => Ok(Expr::Bool(true)),
385            // false
386            Some(Token::False) => Ok(Expr::Bool(false)),
387            // "string"
388            Some(Token::String(string)) => Ok(Expr::String(string)),
389            // ( <query> )
390            Some(Token::LParen) => {
391                let expr = self.parse()?;
392                match self.tokens.next() {
393                    Some(Token::RParen) => Ok(expr),
394                    Some(n) => Err(format!("Expected ), got {:?}", n)),
395                    None => Err("Expected ), got end of file".to_string()),
396                }
397            }
398            //  ! <query>
399            Some(Token::Not) => Ok(Expr::Not(Box::new(self.parse()?))),
400            Some(Token::Field(field)) => {
401                if self.tokens.next() != Some(Token::Tilde) {
402                    return Err(format!("Expected ~, got {:?}", self.tokens.peek()));
403                }
404
405                match self.tokens.next() {
406                    // <query> ~ /regex/
407                    Some(Token::Regex(regex)) => Ok(Expr::FieldRegex(field, regex)),
408                    // <query> ~ "<string>" <comparison> "<string>"
409                    Some(Token::String(comp_str)) => {
410                        let n = self.tokens.peek().cloned();
411                        match n {
412                            Some(Token::Comparison(comp)) => {
413                                self.tokens.next();
414                                if let Some(Token::String(comp_str2)) = self.tokens.next() {
415                                    Ok(Expr::FieldComparison(field, comp, comp_str2))
416                                } else {
417                                    Err("Expected string".to_string())
418                                }
419                            }
420                            // <query> ~ "string"
421                            _ => Ok(Expr::FieldString(field, comp_str)),
422                        }
423                    }
424                    _ => Err(format!(
425                        "Expected regex or string, got {:?}",
426                        self.tokens.peek()
427                    )),
428                }
429            }
430            Some(Token::Source) => Ok(Expr::Source),
431            // <query> "<string>"
432            Some(Token::Comparison(comp)) => {
433                if let Some(Token::String(comp_str)) = self.tokens.next() {
434                    Ok(Expr::Comparison(comp, comp_str))
435                } else {
436                    Err("Expected string".to_string())
437                }
438            }
439            n => Err(format!("Unexpected token: {:?}", n)),
440        }?;
441
442        match self.tokens.peek() {
443            Some(&Token::And) => {
444                let mut ands = vec![Box::new(expr)];
445                while self.tokens.peek() == Some(&Token::And) {
446                    self.tokens.next().unwrap();
447                    match self.parse()? {
448                        Expr::And(new_ands) => {
449                            ands.extend(new_ands);
450                        }
451                        next_expr => {
452                            ands.push(Box::new(next_expr));
453                        }
454                    }
455                }
456                Ok(Expr::And(ands))
457            }
458            Some(&Token::Or) => {
459                let mut ors = vec![Box::new(expr)];
460                while self.tokens.peek() == Some(&Token::Or) {
461                    self.tokens.next().unwrap();
462                    match self.parse()? {
463                        Expr::Or(new_ors) => {
464                            ors.extend(new_ors);
465                        }
466                        next_expr => {
467                            ors.push(Box::new(next_expr));
468                        }
469                    }
470                }
471                Ok(Expr::Or(ors))
472            }
473            _ => Ok(expr),
474        }
475    }
476}
477
478/// Read a benfile from a reader and return a vector of assignments.
479pub fn read_benfile<R: std::io::Read>(mut reader: R) -> Result<Vec<Assignment>, String> {
480    let mut text = String::new();
481    reader
482        .read_to_string(&mut text)
483        .map_err(|e| e.to_string())?;
484    let mut lexer = Lexer::new(&text);
485    let mut tokens = vec![];
486    while let Some(token) = lexer.next_token()? {
487        tokens.push(token);
488    }
489    let mut parser = Parser::new(tokens);
490    let assignments = parser.parse_multiple()?;
491    Ok(assignments)
492}
493
494#[cfg(test)]
495mod tests {
496    use super::*;
497
498    #[test]
499    fn test_simple_lex() {
500        let input = r#"true & .field ~ /regex/ | .field ~ "string" << "comparison""#;
501        let mut lexer = Lexer::new(input);
502        let mut tokens = vec![];
503
504        while let Some(token) = lexer.next_token().unwrap() {
505            tokens.push(token);
506        }
507
508        assert_eq!(
509            tokens,
510            vec![
511                Token::True,
512                Token::And,
513                Token::Field("field".to_string()),
514                Token::Tilde,
515                Token::Regex("regex".to_string()),
516                Token::Or,
517                Token::Field("field".to_string()),
518                Token::Tilde,
519                Token::String("string".to_string()),
520                Token::Comparison(Comparison::MuchLessThan),
521                Token::String("comparison".to_string())
522            ]
523        );
524    }
525
526    #[test]
527    fn test_simple_parse() {
528        let input = r#"true & .field ~ /regex/ | .field ~ "string" << "comparison""#;
529        let mut lexer = Lexer::new(input);
530        let mut tokens: Vec<Token> = vec![];
531        while let Some(token) = lexer.next_token().unwrap() {
532            tokens.push(token);
533        }
534        let mut parser = Parser::new(tokens);
535
536        assert_eq!(
537            Ok(Expr::And(vec![
538                Box::new(Expr::Bool(true)),
539                Box::new(Expr::Or(vec![
540                    Box::new(Expr::FieldRegex("field".to_string(), "regex".to_string())),
541                    Box::new(Expr::FieldComparison(
542                        "field".to_string(),
543                        Comparison::MuchLessThan,
544                        "comparison".to_string()
545                    ))
546                ]))
547            ])),
548            parser.parse()
549        );
550    }
551
552    #[test]
553    fn test_parse_benfile() {
554        let input = r###"title = "libsoup2.4 -> libsoup3";
555is_affected = .build-depends ~ /libsoup2.4-dev|libsoup-gnome2.4-dev|libsoup-3.0-dev/ | .build-depends-arch ~ /libsoup2.4-dev|libsoup-gnome2.4-dev|libsoup-3.0-dev/ | .build-depends ~ /gir1.2-soup-2.4|gir1.2-soup-3.0/ | .depends ~ /gir1.2-soup-2.4/;
556is_good = .depends ~ /libsoup-3.0-0|gir1.2-soup-3.0/;
557is_bad = .depends ~ /libsoup-2.4-1|libsoup-gnome-2.4-1|gir1.2-soup-2.4/;
558notes = "https://bugs.debian.org/cgi-bin/pkgreport.cgi?users=pkg-gnome-maintainers@lists.alioth.debian.org&tag=libsoup2";
559export = false;
560"###;
561        let assignments = read_benfile(input.as_bytes()).unwrap();
562        assert_eq!(assignments.len(), 6);
563        assert_eq!(
564            assignments[0],
565            Assignment {
566                field: "title".to_string(),
567                expr: Expr::String("libsoup2.4 -> libsoup3".to_string())
568            }
569        );
570        assert_eq!(
571            assignments[1],
572            Assignment {
573                field: "is_affected".to_string(),
574                expr: Expr::Or(vec![
575                    Box::new(Expr::FieldRegex(
576                        "build-depends".to_string(),
577                        "libsoup2.4-dev|libsoup-gnome2.4-dev|libsoup-3.0-dev".to_string()
578                    )),
579                    Box::new(Expr::FieldRegex(
580                        "build-depends-arch".to_string(),
581                        "libsoup2.4-dev|libsoup-gnome2.4-dev|libsoup-3.0-dev".to_string()
582                    )),
583                    Box::new(Expr::FieldRegex(
584                        "build-depends".to_string(),
585                        "gir1.2-soup-2.4|gir1.2-soup-3.0".to_string()
586                    )),
587                    Box::new(Expr::FieldRegex(
588                        "depends".to_string(),
589                        "gir1.2-soup-2.4".to_string()
590                    ))
591                ])
592            }
593        );
594        assert_eq!(assignments[4],
595            Assignment {
596                field: "notes".to_string(),
597                expr: Expr::String("https://bugs.debian.org/cgi-bin/pkgreport.cgi?users=pkg-gnome-maintainers@lists.alioth.debian.org&tag=libsoup2".to_string())
598            }
599        );
600
601        assert_eq!(
602            assignments[5],
603            Assignment {
604                field: "export".to_string(),
605                expr: Expr::Bool(false)
606            }
607        );
608    }
609}