Skip to main content

debian_copyright/
expression.rs

1//! License expression parsing for DEP-5 copyright files.
2//!
3//! License expressions combine license names with `or`, `and`, and `with` operators.
4//! `and` binds tighter than `or`. A comma before an operator lowers its precedence,
5//! e.g. `A or B, and C` means `(A or B) and C`.
6//!
7//! The `with` keyword attaches an exception to the preceding license name
8//! (e.g. `GPL-2+ with OpenSSL-exception`).
9
10/// A parsed license expression from a DEP-5 copyright file.
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub enum LicenseExpr {
13    /// A single license name, e.g. `MIT`.
14    Name(String),
15
16    /// A license with an exception, e.g. `GPL-2+ with OpenSSL-exception`.
17    WithException(String, String),
18
19    /// All of these licenses apply simultaneously.
20    And(Vec<LicenseExpr>),
21
22    /// Any one of these licenses may be chosen.
23    Or(Vec<LicenseExpr>),
24}
25
26impl LicenseExpr {
27    /// Parse a license expression string.
28    ///
29    /// # Examples
30    ///
31    /// ```
32    /// use debian_copyright::LicenseExpr;
33    ///
34    /// let expr = LicenseExpr::parse("GPL-2+ or MIT");
35    /// assert_eq!(expr, LicenseExpr::Or(vec![
36    ///     LicenseExpr::Name("GPL-2+".to_string()),
37    ///     LicenseExpr::Name("MIT".to_string()),
38    /// ]));
39    ///
40    /// let expr = LicenseExpr::parse("GPL-2+ with OpenSSL-exception");
41    /// assert_eq!(expr, LicenseExpr::WithException(
42    ///     "GPL-2+".to_string(),
43    ///     "OpenSSL-exception".to_string(),
44    /// ));
45    /// ```
46    pub fn parse(input: &str) -> Self {
47        let tokens = tokenize(input);
48        if tokens.is_empty() {
49            return LicenseExpr::Name(String::new());
50        }
51        parse_expr(&tokens)
52    }
53
54    /// Returns the individual license names contained in this expression.
55    ///
56    /// For `WithException` variants, only the license name is returned,
57    /// not the exception name.
58    pub fn license_names(&self) -> Vec<&str> {
59        let mut names = Vec::new();
60        self.collect_names(&mut names);
61        names
62    }
63
64    fn collect_names<'a>(&'a self, names: &mut Vec<&'a str>) {
65        match self {
66            LicenseExpr::Name(n) => names.push(n),
67            LicenseExpr::WithException(n, _) => names.push(n),
68            LicenseExpr::And(exprs) | LicenseExpr::Or(exprs) => {
69                for expr in exprs {
70                    expr.collect_names(names);
71                }
72            }
73        }
74    }
75}
76
77impl std::fmt::Display for LicenseExpr {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            LicenseExpr::Name(n) => f.write_str(n),
81            LicenseExpr::WithException(n, e) => write!(f, "{} with {}", n, e),
82            LicenseExpr::And(exprs) => {
83                for (i, expr) in exprs.iter().enumerate() {
84                    if i > 0 {
85                        f.write_str(" and ")?;
86                    }
87                    write!(f, "{}", expr)?;
88                }
89                Ok(())
90            }
91            LicenseExpr::Or(exprs) => {
92                for (i, expr) in exprs.iter().enumerate() {
93                    if i > 0 {
94                        f.write_str(" or ")?;
95                    }
96                    write!(f, "{}", expr)?;
97                }
98                Ok(())
99            }
100        }
101    }
102}
103
104#[derive(Debug, PartialEq, Clone)]
105enum Token {
106    Word(String),
107    Or,
108    And,
109    With,
110    Comma,
111}
112
113fn tokenize(input: &str) -> Vec<Token> {
114    let mut tokens = Vec::new();
115    for word in input.split_whitespace() {
116        let (word, has_comma) = if let Some(stripped) = word.strip_suffix(',') {
117            (stripped, true)
118        } else {
119            (word, false)
120        };
121
122        if !word.is_empty() {
123            if word.eq_ignore_ascii_case("or") {
124                tokens.push(Token::Or);
125            } else if word.eq_ignore_ascii_case("and") {
126                tokens.push(Token::And);
127            } else if word.eq_ignore_ascii_case("with") {
128                tokens.push(Token::With);
129            } else {
130                tokens.push(Token::Word(word.to_string()));
131            }
132        }
133
134        if has_comma {
135            tokens.push(Token::Comma);
136        }
137    }
138    tokens
139}
140
141/// Parse a single license term: a name optionally followed by `with <exception>`.
142/// The exception after `with` consumes all words until the next `or`, `and`, comma, or end.
143fn parse_term(tokens: &[Token], pos: &mut usize) -> LicenseExpr {
144    let name = match tokens.get(*pos) {
145        Some(Token::Word(w)) => {
146            *pos += 1;
147            w.clone()
148        }
149        _ => return LicenseExpr::Name(String::new()),
150    };
151
152    if matches!(tokens.get(*pos), Some(Token::With)) {
153        *pos += 1;
154        let mut exception_parts = Vec::new();
155        while *pos < tokens.len() {
156            match &tokens[*pos] {
157                Token::Word(w) => {
158                    exception_parts.push(w.clone());
159                    *pos += 1;
160                }
161                _ => break,
162            }
163        }
164        LicenseExpr::WithException(name, exception_parts.join(" "))
165    } else {
166        LicenseExpr::Name(name)
167    }
168}
169
170/// Parse a token stream into a `LicenseExpr`.
171///
172/// Handles comma-lowered precedence by splitting on `, and` / `, or` first,
173/// then parsing each segment with normal precedence (`and` > `or`).
174fn parse_expr(tokens: &[Token]) -> LicenseExpr {
175    // Split into segments at comma boundaries (comma + operator = low precedence).
176    let mut segments: Vec<(Vec<Token>, Option<Token>)> = Vec::new();
177    let mut current: Vec<Token> = Vec::new();
178
179    let mut i = 0;
180    while i < tokens.len() {
181        if tokens[i] == Token::Comma {
182            if i + 1 < tokens.len() && matches!(tokens[i + 1], Token::Or | Token::And) {
183                let op = tokens[i + 1].clone();
184                segments.push((std::mem::take(&mut current), Some(op)));
185                i += 2;
186            } else {
187                i += 1;
188            }
189        } else {
190            current.push(tokens[i].clone());
191            i += 1;
192        }
193    }
194    if !current.is_empty() {
195        segments.push((current, None));
196    }
197
198    if segments.len() == 1 {
199        return parse_segment(&segments[0].0);
200    }
201
202    // Group segments by their joining low-precedence operator.
203    // Low-precedence `and` binds tighter than low-precedence `or`.
204    // First pass: group consecutive And-joined segments.
205    let mut and_groups: Vec<Vec<LicenseExpr>> = vec![vec![parse_segment(&segments[0].0)]];
206    let mut joining_ops: Vec<Token> = Vec::new();
207
208    for i in 1..segments.len() {
209        let preceding_op = segments[i - 1].1.as_ref().unwrap_or(&Token::Or);
210        if matches!(preceding_op, Token::And) {
211            and_groups
212                .last_mut()
213                .unwrap()
214                .push(parse_segment(&segments[i].0));
215        } else {
216            joining_ops.push(Token::Or);
217            and_groups.push(vec![parse_segment(&segments[i].0)]);
218        }
219    }
220
221    let flattened: Vec<LicenseExpr> = and_groups
222        .into_iter()
223        .map(|group| {
224            if group.len() == 1 {
225                group.into_iter().next().unwrap()
226            } else {
227                LicenseExpr::And(group)
228            }
229        })
230        .collect();
231
232    if flattened.len() == 1 {
233        flattened.into_iter().next().unwrap()
234    } else {
235        LicenseExpr::Or(flattened)
236    }
237}
238
239/// Parse a segment (no comma-lowered operators) with normal precedence: `and` > `or`.
240fn parse_segment(tokens: &[Token]) -> LicenseExpr {
241    // Split on `or` (lower precedence), then each part on `and`.
242    let mut or_groups: Vec<Vec<Token>> = vec![Vec::new()];
243    for tok in tokens {
244        if *tok == Token::Or {
245            or_groups.push(Vec::new());
246        } else {
247            or_groups.last_mut().unwrap().push(tok.clone());
248        }
249    }
250
251    let or_exprs: Vec<LicenseExpr> = or_groups
252        .into_iter()
253        .map(|group| {
254            let mut and_groups: Vec<Vec<Token>> = vec![Vec::new()];
255            for tok in &group {
256                if *tok == Token::And {
257                    and_groups.push(Vec::new());
258                } else {
259                    and_groups.last_mut().unwrap().push(tok.clone());
260                }
261            }
262
263            let and_exprs: Vec<LicenseExpr> = and_groups
264                .into_iter()
265                .map(|toks| {
266                    let mut pos = 0;
267                    parse_term(&toks, &mut pos)
268                })
269                .collect();
270
271            if and_exprs.len() == 1 {
272                and_exprs.into_iter().next().unwrap()
273            } else {
274                LicenseExpr::And(and_exprs)
275            }
276        })
277        .collect();
278
279    if or_exprs.len() == 1 {
280        or_exprs.into_iter().next().unwrap()
281    } else {
282        LicenseExpr::Or(or_exprs)
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn test_single_name() {
292        assert_eq!(LicenseExpr::parse("MIT"), LicenseExpr::Name("MIT".into()));
293    }
294
295    #[test]
296    fn test_or() {
297        assert_eq!(
298            LicenseExpr::parse("GPL-2+ or MIT"),
299            LicenseExpr::Or(vec![
300                LicenseExpr::Name("GPL-2+".into()),
301                LicenseExpr::Name("MIT".into()),
302            ])
303        );
304    }
305
306    #[test]
307    fn test_and() {
308        assert_eq!(
309            LicenseExpr::parse("Apache-2.0 and BSD-3-clause"),
310            LicenseExpr::And(vec![
311                LicenseExpr::Name("Apache-2.0".into()),
312                LicenseExpr::Name("BSD-3-clause".into()),
313            ])
314        );
315    }
316
317    #[test]
318    fn test_with_exception() {
319        assert_eq!(
320            LicenseExpr::parse("GPL-2+ with OpenSSL-exception"),
321            LicenseExpr::WithException("GPL-2+".into(), "OpenSSL-exception".into())
322        );
323    }
324
325    #[test]
326    fn test_with_multi_word_exception() {
327        assert_eq!(
328            LicenseExpr::parse("GPL-2+ with Autoconf exception"),
329            LicenseExpr::WithException("GPL-2+".into(), "Autoconf exception".into())
330        );
331    }
332
333    #[test]
334    fn test_with_exception_then_or() {
335        assert_eq!(
336            LicenseExpr::parse("GPL-2+ with OpenSSL-exception or MIT"),
337            LicenseExpr::Or(vec![
338                LicenseExpr::WithException("GPL-2+".into(), "OpenSSL-exception".into()),
339                LicenseExpr::Name("MIT".into()),
340            ])
341        );
342    }
343
344    #[test]
345    fn test_and_binds_tighter_than_or() {
346        // A or B and C → A or (B and C)
347        assert_eq!(
348            LicenseExpr::parse("A or B and C"),
349            LicenseExpr::Or(vec![
350                LicenseExpr::Name("A".into()),
351                LicenseExpr::And(vec![
352                    LicenseExpr::Name("B".into()),
353                    LicenseExpr::Name("C".into()),
354                ]),
355            ])
356        );
357    }
358
359    #[test]
360    fn test_comma_lowers_precedence() {
361        // A or B, and C → (A or B) and C
362        assert_eq!(
363            LicenseExpr::parse("A or B, and C"),
364            LicenseExpr::And(vec![
365                LicenseExpr::Or(vec![
366                    LicenseExpr::Name("A".into()),
367                    LicenseExpr::Name("B".into()),
368                ]),
369                LicenseExpr::Name("C".into()),
370            ])
371        );
372    }
373
374    #[test]
375    fn test_case_insensitive_operators() {
376        assert_eq!(
377            LicenseExpr::parse("GPL-2+ OR MIT"),
378            LicenseExpr::Or(vec![
379                LicenseExpr::Name("GPL-2+".into()),
380                LicenseExpr::Name("MIT".into()),
381            ])
382        );
383    }
384
385    #[test]
386    fn test_license_names() {
387        let expr = LicenseExpr::parse("GPL-2+ or MIT and BSD-3-clause");
388        assert_eq!(expr.license_names(), vec!["GPL-2+", "MIT", "BSD-3-clause"]);
389    }
390
391    #[test]
392    fn test_license_names_with_exception() {
393        let expr = LicenseExpr::parse("GPL-2+ with OpenSSL-exception or MIT");
394        assert_eq!(expr.license_names(), vec!["GPL-2+", "MIT"]);
395    }
396
397    #[test]
398    fn test_display_round_trip_simple() {
399        let input = "GPL-2+ or MIT";
400        let expr = LicenseExpr::parse(input);
401        assert_eq!(expr.to_string(), input);
402    }
403
404    #[test]
405    fn test_display_with_exception() {
406        let input = "GPL-2+ with OpenSSL-exception";
407        let expr = LicenseExpr::parse(input);
408        assert_eq!(expr.to_string(), input);
409    }
410
411    #[test]
412    fn test_three_way_or() {
413        assert_eq!(
414            LicenseExpr::parse("GPL-1+ or Artistic or Perl"),
415            LicenseExpr::Or(vec![
416                LicenseExpr::Name("GPL-1+".into()),
417                LicenseExpr::Name("Artistic".into()),
418                LicenseExpr::Name("Perl".into()),
419            ])
420        );
421    }
422}