Skip to main content

provenant/license_detection/expression/
mod.rs

1//! License expression parsing and manipulation.
2//!
3//! This module provides a parser for ScanCode license expressions, supporting:
4//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
5//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
6//! - Parenthetical grouping
7//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
8//!
9//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
10//! and provides functions for validation and simplification.
11
12mod parse;
13mod simplify;
14
15pub use parse::parse_expression;
16pub use simplify::{
17    combine_expressions_and, expression_to_string, licensing_contains, simplify_expression,
18};
19
20/// Error type for license expression parsing.
21#[derive(Debug, Clone, PartialEq)]
22#[allow(clippy::enum_variant_names)]
23pub enum ParseError {
24    /// Empty expression
25    EmptyExpression,
26
27    /// Unexpected token at position
28    UnexpectedToken { token: String, position: usize },
29
30    /// Mismatched parentheses
31    MismatchedParentheses,
32
33    /// Generic parse error with message
34    ParseError(String),
35}
36
37impl std::fmt::Display for ParseError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            Self::EmptyExpression => write!(f, "Empty license expression"),
41            Self::UnexpectedToken { token, position } => {
42                write!(f, "Unexpected token '{}' at position {}", token, position)
43            }
44            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
45            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
46        }
47    }
48}
49
50impl std::error::Error for ParseError {}
51
52/// A parsed license expression represented as an AST.
53#[derive(Debug, Clone, PartialEq)]
54pub enum LicenseExpression {
55    /// A single license key
56    License(String),
57
58    /// A LicenseRef-scancode-* reference
59    LicenseRef(String),
60
61    /// AND operation: left AND right
62    And {
63        left: Box<LicenseExpression>,
64        right: Box<LicenseExpression>,
65    },
66
67    /// OR operation: left OR right
68    Or {
69        left: Box<LicenseExpression>,
70        right: Box<LicenseExpression>,
71    },
72
73    /// WITH operation: left WITH right (exception)
74    With {
75        left: Box<LicenseExpression>,
76        right: Box<LicenseExpression>,
77    },
78}
79
80impl LicenseExpression {
81    /// Extract all license keys from the expression.
82    // Kept for future parity work around reference-following and validation.
83    // See docs/license-detection/GAPS.md#expression-key-set-features.
84    #[allow(dead_code)]
85    pub fn license_keys(&self) -> Vec<String> {
86        let mut keys = Vec::new();
87        self.collect_keys(&mut keys);
88        keys.sort();
89        keys.dedup();
90        keys
91    }
92
93    // Kept for future parity work around reference-following and validation.
94    // See docs/license-detection/GAPS.md#expression-key-set-features.
95    #[allow(dead_code)]
96    fn collect_keys(&self, keys: &mut Vec<String>) {
97        match self {
98            Self::License(key) => keys.push(key.clone()),
99            Self::LicenseRef(key) => keys.push(key.clone()),
100            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
101                left.collect_keys(keys);
102                right.collect_keys(keys);
103            }
104        }
105    }
106
107    /// Create an AND expression combining multiple expressions.
108    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
109        if expressions.is_empty() {
110            None
111        } else if expressions.len() == 1 {
112            Some(expressions.into_iter().next().unwrap())
113        } else {
114            let mut iter = expressions.into_iter();
115            let mut result = iter.next().unwrap();
116            for expr in iter {
117                result = LicenseExpression::And {
118                    left: Box::new(result),
119                    right: Box::new(expr),
120                };
121            }
122            Some(result)
123        }
124    }
125
126    /// Create an OR expression combining multiple expressions.
127    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
128        if expressions.is_empty() {
129            None
130        } else if expressions.len() == 1 {
131            Some(expressions.into_iter().next().unwrap())
132        } else {
133            let mut iter = expressions.into_iter();
134            let mut result = iter.next().unwrap();
135            for expr in iter {
136                result = LicenseExpression::Or {
137                    left: Box::new(result),
138                    right: Box::new(expr),
139                };
140            }
141            Some(result)
142        }
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use std::collections::HashSet;
150
151    #[test]
152    fn test_and_helper_empty() {
153        let result = LicenseExpression::and(vec![]);
154        assert!(result.is_none());
155    }
156
157    #[test]
158    fn test_and_helper_single() {
159        let expr = LicenseExpression::License("mit".to_string());
160        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
161        assert_eq!(result, expr);
162    }
163
164    #[test]
165    fn test_and_helper_multiple() {
166        let exprs = vec![
167            LicenseExpression::License("mit".to_string()),
168            LicenseExpression::License("apache-2.0".to_string()),
169        ];
170        let result = LicenseExpression::and(exprs).unwrap();
171        assert!(matches!(result, LicenseExpression::And { .. }));
172    }
173
174    #[test]
175    fn test_or_helper_empty() {
176        let result = LicenseExpression::or(vec![]);
177        assert!(result.is_none());
178    }
179
180    #[test]
181    fn test_or_helper_single() {
182        let expr = LicenseExpression::License("mit".to_string());
183        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
184        assert_eq!(result, expr);
185    }
186
187    #[test]
188    fn test_or_helper_multiple() {
189        let exprs = vec![
190            LicenseExpression::License("mit".to_string()),
191            LicenseExpression::License("apache-2.0".to_string()),
192        ];
193        let result = LicenseExpression::or(exprs).unwrap();
194        assert!(matches!(result, LicenseExpression::Or { .. }));
195    }
196
197    #[test]
198    fn test_validate_expression_valid() {
199        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
200        let mut known = HashSet::new();
201        known.insert("mit".to_string());
202        known.insert("apache-2.0".to_string());
203
204        let unknown: Vec<_> = expr
205            .license_keys()
206            .into_iter()
207            .filter(|key| !known.contains(key))
208            .collect();
209        assert!(unknown.is_empty());
210    }
211
212    #[test]
213    fn test_validate_expression_unknown_keys() {
214        let expr = parse_expression("MIT AND UnknownKey").unwrap();
215        let mut known = HashSet::new();
216        known.insert("mit".to_string());
217
218        let unknown: Vec<_> = expr
219            .license_keys()
220            .into_iter()
221            .filter(|key| !known.contains(key))
222            .collect();
223        assert_eq!(unknown, vec!["unknownkey".to_string()]);
224    }
225
226    #[test]
227    fn test_validate_expression_empty_known_keys() {
228        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
229        let known: HashSet<String> = HashSet::new();
230
231        let unknown: Vec<_> = expr
232            .license_keys()
233            .into_iter()
234            .filter(|key| !known.contains(key))
235            .collect();
236        assert_eq!(unknown.len(), 2);
237    }
238}