Skip to main content

provenant/license_detection/expression/
mod.rs

1//! License expression parsing and manipulation.
2//!
3//! This module provides a parser for ScanCode license expressions, supporting:
4//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
5//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
6//! - Parenthetical grouping
7//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
8//!
9//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
10//! and provides functions for validation and simplification.
11
12mod parse;
13mod simplify;
14
15pub use parse::parse_expression;
16pub use simplify::{
17    combine_expressions_and, expression_to_string, licensing_contains, simplify_expression,
18};
19
20/// Error type for license expression parsing.
21#[derive(Debug, Clone, PartialEq)]
22#[allow(clippy::enum_variant_names)]
23pub enum ParseError {
24    /// Empty expression
25    EmptyExpression,
26
27    /// Unexpected token at position
28    UnexpectedToken { token: String, position: usize },
29
30    /// Mismatched parentheses
31    MismatchedParentheses,
32
33    /// Generic parse error with message
34    ParseError(String),
35}
36
37impl std::fmt::Display for ParseError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            Self::EmptyExpression => write!(f, "Empty license expression"),
41            Self::UnexpectedToken { token, position } => {
42                write!(f, "Unexpected token '{}' at position {}", token, position)
43            }
44            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
45            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
46        }
47    }
48}
49
50impl std::error::Error for ParseError {}
51
52/// A parsed license expression represented as an AST.
53#[derive(Debug, Clone, PartialEq)]
54pub enum LicenseExpression {
55    /// A single license key
56    License(String),
57
58    /// A LicenseRef-scancode-* reference
59    LicenseRef(String),
60
61    /// AND operation: left AND right
62    And {
63        left: Box<LicenseExpression>,
64        right: Box<LicenseExpression>,
65    },
66
67    /// OR operation: left OR right
68    Or {
69        left: Box<LicenseExpression>,
70        right: Box<LicenseExpression>,
71    },
72
73    /// WITH operation: left WITH right (exception)
74    With {
75        left: Box<LicenseExpression>,
76        right: Box<LicenseExpression>,
77    },
78}
79
80impl LicenseExpression {
81    /// Extract all license keys from the expression.
82    #[allow(dead_code)]
83    pub fn license_keys(&self) -> Vec<String> {
84        let mut keys = Vec::new();
85        self.collect_keys(&mut keys);
86        keys.sort();
87        keys.dedup();
88        keys
89    }
90
91    #[allow(dead_code)]
92    fn collect_keys(&self, keys: &mut Vec<String>) {
93        match self {
94            Self::License(key) => keys.push(key.clone()),
95            Self::LicenseRef(key) => keys.push(key.clone()),
96            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
97                left.collect_keys(keys);
98                right.collect_keys(keys);
99            }
100        }
101    }
102
103    /// Create an AND expression combining multiple expressions.
104    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
105        if expressions.is_empty() {
106            None
107        } else if expressions.len() == 1 {
108            Some(expressions.into_iter().next().unwrap())
109        } else {
110            let mut iter = expressions.into_iter();
111            let mut result = iter.next().unwrap();
112            for expr in iter {
113                result = LicenseExpression::And {
114                    left: Box::new(result),
115                    right: Box::new(expr),
116                };
117            }
118            Some(result)
119        }
120    }
121
122    /// Create an OR expression combining multiple expressions.
123    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
124        if expressions.is_empty() {
125            None
126        } else if expressions.len() == 1 {
127            Some(expressions.into_iter().next().unwrap())
128        } else {
129            let mut iter = expressions.into_iter();
130            let mut result = iter.next().unwrap();
131            for expr in iter {
132                result = LicenseExpression::Or {
133                    left: Box::new(result),
134                    right: Box::new(expr),
135                };
136            }
137            Some(result)
138        }
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use std::collections::HashSet;
146
147    #[test]
148    fn test_and_helper_empty() {
149        let result = LicenseExpression::and(vec![]);
150        assert!(result.is_none());
151    }
152
153    #[test]
154    fn test_and_helper_single() {
155        let expr = LicenseExpression::License("mit".to_string());
156        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
157        assert_eq!(result, expr);
158    }
159
160    #[test]
161    fn test_and_helper_multiple() {
162        let exprs = vec![
163            LicenseExpression::License("mit".to_string()),
164            LicenseExpression::License("apache-2.0".to_string()),
165        ];
166        let result = LicenseExpression::and(exprs).unwrap();
167        assert!(matches!(result, LicenseExpression::And { .. }));
168    }
169
170    #[test]
171    fn test_or_helper_empty() {
172        let result = LicenseExpression::or(vec![]);
173        assert!(result.is_none());
174    }
175
176    #[test]
177    fn test_or_helper_single() {
178        let expr = LicenseExpression::License("mit".to_string());
179        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
180        assert_eq!(result, expr);
181    }
182
183    #[test]
184    fn test_or_helper_multiple() {
185        let exprs = vec![
186            LicenseExpression::License("mit".to_string()),
187            LicenseExpression::License("apache-2.0".to_string()),
188        ];
189        let result = LicenseExpression::or(exprs).unwrap();
190        assert!(matches!(result, LicenseExpression::Or { .. }));
191    }
192
193    #[test]
194    fn test_validate_expression_valid() {
195        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
196        let mut known = HashSet::new();
197        known.insert("mit".to_string());
198        known.insert("apache-2.0".to_string());
199
200        let unknown: Vec<_> = expr
201            .license_keys()
202            .into_iter()
203            .filter(|key| !known.contains(key))
204            .collect();
205        assert!(unknown.is_empty());
206    }
207
208    #[test]
209    fn test_validate_expression_unknown_keys() {
210        let expr = parse_expression("MIT AND UnknownKey").unwrap();
211        let mut known = HashSet::new();
212        known.insert("mit".to_string());
213
214        let unknown: Vec<_> = expr
215            .license_keys()
216            .into_iter()
217            .filter(|key| !known.contains(key))
218            .collect();
219        assert_eq!(unknown, vec!["unknownkey".to_string()]);
220    }
221
222    #[test]
223    fn test_validate_expression_empty_known_keys() {
224        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
225        let known: HashSet<String> = HashSet::new();
226
227        let unknown: Vec<_> = expr
228            .license_keys()
229            .into_iter()
230            .filter(|key| !known.contains(key))
231            .collect();
232        assert_eq!(unknown.len(), 2);
233    }
234}