Skip to main content

provenant/license_detection/expression/
mod.rs

1//! License expression parsing and manipulation.
2//!
3//! This module provides a parser for ScanCode license expressions, supporting:
4//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
5//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
6//! - Parenthetical grouping
7//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
8//!
9//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
10//! and provides functions for validation and simplification.
11
12mod parse;
13mod simplify;
14
15pub use parse::parse_expression;
16pub use simplify::{
17    combine_expressions_and, combine_expressions_or, expression_to_string, licensing_contains,
18    simplify_expression,
19};
20
21/// Error type for license expression parsing.
22#[derive(Debug, Clone, PartialEq)]
23#[allow(clippy::enum_variant_names)]
24pub enum ParseError {
25    /// Empty expression
26    EmptyExpression,
27
28    /// Unexpected token at position
29    UnexpectedToken { token: String, position: usize },
30
31    /// Mismatched parentheses
32    MismatchedParentheses,
33
34    /// Generic parse error with message
35    ParseError(String),
36}
37
38impl std::fmt::Display for ParseError {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            Self::EmptyExpression => write!(f, "Empty license expression"),
42            Self::UnexpectedToken { token, position } => {
43                write!(f, "Unexpected token '{}' at position {}", token, position)
44            }
45            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
46            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
47        }
48    }
49}
50
51impl std::error::Error for ParseError {}
52
53/// A parsed license expression represented as an AST.
54#[derive(Debug, Clone, PartialEq)]
55pub enum LicenseExpression {
56    /// A single license key
57    License(String),
58
59    /// A LicenseRef-scancode-* reference
60    LicenseRef(String),
61
62    /// AND operation: left AND right
63    And {
64        left: Box<LicenseExpression>,
65        right: Box<LicenseExpression>,
66    },
67
68    /// OR operation: left OR right
69    Or {
70        left: Box<LicenseExpression>,
71        right: Box<LicenseExpression>,
72    },
73
74    /// WITH operation: left WITH right (exception)
75    With {
76        left: Box<LicenseExpression>,
77        right: Box<LicenseExpression>,
78    },
79}
80
81impl LicenseExpression {
82    /// Extract all license keys from the expression.
83    #[allow(dead_code)]
84    pub fn license_keys(&self) -> Vec<String> {
85        let mut keys = Vec::new();
86        self.collect_keys(&mut keys);
87        keys.sort();
88        keys.dedup();
89        keys
90    }
91
92    #[allow(dead_code)]
93    fn collect_keys(&self, keys: &mut Vec<String>) {
94        match self {
95            Self::License(key) => keys.push(key.clone()),
96            Self::LicenseRef(key) => keys.push(key.clone()),
97            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
98                left.collect_keys(keys);
99                right.collect_keys(keys);
100            }
101        }
102    }
103
104    /// Create an AND expression combining multiple expressions.
105    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
106        if expressions.is_empty() {
107            None
108        } else if expressions.len() == 1 {
109            Some(expressions.into_iter().next().unwrap())
110        } else {
111            let mut iter = expressions.into_iter();
112            let mut result = iter.next().unwrap();
113            for expr in iter {
114                result = LicenseExpression::And {
115                    left: Box::new(result),
116                    right: Box::new(expr),
117                };
118            }
119            Some(result)
120        }
121    }
122
123    /// Create an OR expression combining multiple expressions.
124    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
125        if expressions.is_empty() {
126            None
127        } else if expressions.len() == 1 {
128            Some(expressions.into_iter().next().unwrap())
129        } else {
130            let mut iter = expressions.into_iter();
131            let mut result = iter.next().unwrap();
132            for expr in iter {
133                result = LicenseExpression::Or {
134                    left: Box::new(result),
135                    right: Box::new(expr),
136                };
137            }
138            Some(result)
139        }
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use std::collections::HashSet;
147
148    #[test]
149    fn test_and_helper_empty() {
150        let result = LicenseExpression::and(vec![]);
151        assert!(result.is_none());
152    }
153
154    #[test]
155    fn test_and_helper_single() {
156        let expr = LicenseExpression::License("mit".to_string());
157        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
158        assert_eq!(result, expr);
159    }
160
161    #[test]
162    fn test_and_helper_multiple() {
163        let exprs = vec![
164            LicenseExpression::License("mit".to_string()),
165            LicenseExpression::License("apache-2.0".to_string()),
166        ];
167        let result = LicenseExpression::and(exprs).unwrap();
168        assert!(matches!(result, LicenseExpression::And { .. }));
169    }
170
171    #[test]
172    fn test_or_helper_empty() {
173        let result = LicenseExpression::or(vec![]);
174        assert!(result.is_none());
175    }
176
177    #[test]
178    fn test_or_helper_single() {
179        let expr = LicenseExpression::License("mit".to_string());
180        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
181        assert_eq!(result, expr);
182    }
183
184    #[test]
185    fn test_or_helper_multiple() {
186        let exprs = vec![
187            LicenseExpression::License("mit".to_string()),
188            LicenseExpression::License("apache-2.0".to_string()),
189        ];
190        let result = LicenseExpression::or(exprs).unwrap();
191        assert!(matches!(result, LicenseExpression::Or { .. }));
192    }
193
194    #[test]
195    fn test_validate_expression_valid() {
196        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
197        let mut known = HashSet::new();
198        known.insert("mit".to_string());
199        known.insert("apache-2.0".to_string());
200
201        let unknown: Vec<_> = expr
202            .license_keys()
203            .into_iter()
204            .filter(|key| !known.contains(key))
205            .collect();
206        assert!(unknown.is_empty());
207    }
208
209    #[test]
210    fn test_validate_expression_unknown_keys() {
211        let expr = parse_expression("MIT AND UnknownKey").unwrap();
212        let mut known = HashSet::new();
213        known.insert("mit".to_string());
214
215        let unknown: Vec<_> = expr
216            .license_keys()
217            .into_iter()
218            .filter(|key| !known.contains(key))
219            .collect();
220        assert_eq!(unknown, vec!["unknownkey".to_string()]);
221    }
222
223    #[test]
224    fn test_validate_expression_empty_known_keys() {
225        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
226        let known: HashSet<String> = HashSet::new();
227
228        let unknown: Vec<_> = expr
229            .license_keys()
230            .into_iter()
231            .filter(|key| !known.contains(key))
232            .collect();
233        assert_eq!(unknown.len(), 2);
234    }
235}