Skip to main content

provenant/license_detection/expression/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing and manipulation.
5//!
6//! This module provides a parser for ScanCode license expressions, supporting:
7//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
8//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
9//! - Parenthetical grouping
10//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
11//!
12//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
13//! and provides functions for validation and simplification.
14
15mod parse;
16mod simplify;
17
18pub use parse::parse_expression;
19pub use simplify::{
20    combine_expressions_and, combine_expressions_and_preserving_structure, combine_expressions_or,
21    combine_expressions_or_preserving_structure, expression_to_string, licensing_contains,
22    simplify_expression, simplify_expression_preserving_structure,
23};
24
25/// Error type for license expression parsing.
26#[derive(Debug, Clone, PartialEq, thiserror::Error)]
27#[allow(clippy::enum_variant_names)]
28pub enum ParseError {
29    /// Empty expression
30    #[error("Empty license expression")]
31    EmptyExpression,
32
33    /// Unexpected token at position
34    #[error("Unexpected token '{token}' at position {position}")]
35    UnexpectedToken { token: String, position: usize },
36
37    /// Mismatched parentheses
38    #[error("Mismatched parentheses")]
39    MismatchedParentheses,
40
41    /// Generic parse error with message
42    #[error("Parse error: {0}")]
43    ParseError(String),
44}
45
46/// A parsed license expression represented as an AST.
47#[derive(Debug, Clone, PartialEq)]
48pub enum LicenseExpression {
49    /// A single license key
50    License(String),
51
52    /// A LicenseRef-scancode-* reference
53    LicenseRef(String),
54
55    /// AND operation: left AND right
56    And {
57        left: Box<LicenseExpression>,
58        right: Box<LicenseExpression>,
59    },
60
61    /// OR operation: left OR right
62    Or {
63        left: Box<LicenseExpression>,
64        right: Box<LicenseExpression>,
65    },
66
67    /// WITH operation: left WITH right (exception)
68    With {
69        left: Box<LicenseExpression>,
70        right: Box<LicenseExpression>,
71    },
72}
73
74impl LicenseExpression {
75    /// Extract all license keys from the expression.
76    #[allow(dead_code)]
77    pub fn license_keys(&self) -> Vec<String> {
78        let mut keys = Vec::new();
79        self.collect_keys(&mut keys);
80        keys.sort();
81        keys.dedup();
82        keys
83    }
84
85    #[allow(dead_code)]
86    fn collect_keys(&self, keys: &mut Vec<String>) {
87        match self {
88            Self::License(key) => keys.push(key.clone()),
89            Self::LicenseRef(key) => keys.push(key.clone()),
90            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
91                left.collect_keys(keys);
92                right.collect_keys(keys);
93            }
94        }
95    }
96
97    /// Create an AND expression combining multiple expressions.
98    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
99        if expressions.is_empty() {
100            None
101        } else {
102            Some(build_balanced_boolean_expression(
103                &expressions,
104                |left, right| LicenseExpression::And { left, right },
105            ))
106        }
107    }
108
109    /// Create an OR expression combining multiple expressions.
110    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
111        if expressions.is_empty() {
112            None
113        } else {
114            Some(build_balanced_boolean_expression(
115                &expressions,
116                |left, right| LicenseExpression::Or { left, right },
117            ))
118        }
119    }
120}
121
122fn build_balanced_boolean_expression(
123    expressions: &[LicenseExpression],
124    combine: fn(Box<LicenseExpression>, Box<LicenseExpression>) -> LicenseExpression,
125) -> LicenseExpression {
126    debug_assert!(
127        !expressions.is_empty(),
128        "build_balanced_boolean_expression called with empty list"
129    );
130    match expressions.len() {
131        1 => expressions[0].clone(),
132        _ => {
133            let midpoint = expressions.len() / 2;
134            let left = build_balanced_boolean_expression(&expressions[..midpoint], combine);
135            let right = build_balanced_boolean_expression(&expressions[midpoint..], combine);
136            combine(Box::new(left), Box::new(right))
137        }
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use std::collections::HashSet;
145
146    fn expression_depth(expr: &LicenseExpression) -> usize {
147        match expr {
148            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_) => 1,
149            LicenseExpression::And { left, right }
150            | LicenseExpression::Or { left, right }
151            | LicenseExpression::With { left, right } => {
152                1 + expression_depth(left).max(expression_depth(right))
153            }
154        }
155    }
156
157    #[test]
158    fn test_and_helper_empty() {
159        let result = LicenseExpression::and(vec![]);
160        assert!(result.is_none());
161    }
162
163    #[test]
164    fn test_and_helper_single() {
165        let expr = LicenseExpression::License("mit".to_string());
166        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
167        assert_eq!(result, expr);
168    }
169
170    #[test]
171    fn test_and_helper_multiple() {
172        let exprs = vec![
173            LicenseExpression::License("mit".to_string()),
174            LicenseExpression::License("apache-2.0".to_string()),
175        ];
176        let result = LicenseExpression::and(exprs).unwrap();
177        assert!(matches!(result, LicenseExpression::And { .. }));
178    }
179
180    #[test]
181    fn test_or_helper_empty() {
182        let result = LicenseExpression::or(vec![]);
183        assert!(result.is_none());
184    }
185
186    #[test]
187    fn test_or_helper_single() {
188        let expr = LicenseExpression::License("mit".to_string());
189        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
190        assert_eq!(result, expr);
191    }
192
193    #[test]
194    fn test_or_helper_multiple() {
195        let exprs = vec![
196            LicenseExpression::License("mit".to_string()),
197            LicenseExpression::License("apache-2.0".to_string()),
198        ];
199        let result = LicenseExpression::or(exprs).unwrap();
200        assert!(matches!(result, LicenseExpression::Or { .. }));
201    }
202
203    #[test]
204    fn test_and_helper_balances_large_expression_depth() {
205        let exprs: Vec<_> = (0..1024)
206            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
207            .collect();
208
209        let result = LicenseExpression::and(exprs).unwrap();
210
211        assert!(expression_depth(&result) <= 12);
212    }
213
214    #[test]
215    fn test_or_helper_balances_large_expression_depth() {
216        let exprs: Vec<_> = (0..1024)
217            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
218            .collect();
219
220        let result = LicenseExpression::or(exprs).unwrap();
221
222        assert!(expression_depth(&result) <= 12);
223    }
224
225    #[test]
226    fn test_validate_expression_valid() {
227        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
228        let mut known = HashSet::new();
229        known.insert("mit".to_string());
230        known.insert("apache-2.0".to_string());
231
232        let unknown: Vec<_> = expr
233            .license_keys()
234            .into_iter()
235            .filter(|key| !known.contains(key))
236            .collect();
237        assert!(unknown.is_empty());
238    }
239
240    #[test]
241    fn test_validate_expression_unknown_keys() {
242        let expr = parse_expression("MIT AND UnknownKey").unwrap();
243        let mut known = HashSet::new();
244        known.insert("mit".to_string());
245
246        let unknown: Vec<_> = expr
247            .license_keys()
248            .into_iter()
249            .filter(|key| !known.contains(key))
250            .collect();
251        assert_eq!(unknown, vec!["unknownkey".to_string()]);
252    }
253
254    #[test]
255    fn test_validate_expression_empty_known_keys() {
256        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
257        let known: HashSet<String> = HashSet::new();
258
259        let unknown: Vec<_> = expr
260            .license_keys()
261            .into_iter()
262            .filter(|key| !known.contains(key))
263            .collect();
264        assert_eq!(unknown.len(), 2);
265    }
266}