Skip to main content

provenant/license_detection/expression/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing and manipulation.
5//!
6//! This module provides a parser for ScanCode license expressions, supporting:
7//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
8//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
9//! - Parenthetical grouping
10//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
11//!
12//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
13//! and provides functions for validation and simplification.
14
15mod parse;
16mod simplify;
17
18pub use parse::parse_expression;
19pub use simplify::{
20    combine_expressions_and, combine_expressions_and_preserving_structure, combine_expressions_or,
21    combine_expressions_or_preserving_structure, expression_to_string, licensing_contains,
22    simplify_expression, simplify_expression_preserving_structure,
23};
24
25/// Error type for license expression parsing.
26#[derive(Debug, Clone, PartialEq)]
27#[allow(clippy::enum_variant_names)]
28pub enum ParseError {
29    /// Empty expression
30    EmptyExpression,
31
32    /// Unexpected token at position
33    UnexpectedToken { token: String, position: usize },
34
35    /// Mismatched parentheses
36    MismatchedParentheses,
37
38    /// Generic parse error with message
39    ParseError(String),
40}
41
42impl std::fmt::Display for ParseError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::EmptyExpression => write!(f, "Empty license expression"),
46            Self::UnexpectedToken { token, position } => {
47                write!(f, "Unexpected token '{}' at position {}", token, position)
48            }
49            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
50            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
51        }
52    }
53}
54
55impl std::error::Error for ParseError {}
56
57/// A parsed license expression represented as an AST.
58#[derive(Debug, Clone, PartialEq)]
59pub enum LicenseExpression {
60    /// A single license key
61    License(String),
62
63    /// A LicenseRef-scancode-* reference
64    LicenseRef(String),
65
66    /// AND operation: left AND right
67    And {
68        left: Box<LicenseExpression>,
69        right: Box<LicenseExpression>,
70    },
71
72    /// OR operation: left OR right
73    Or {
74        left: Box<LicenseExpression>,
75        right: Box<LicenseExpression>,
76    },
77
78    /// WITH operation: left WITH right (exception)
79    With {
80        left: Box<LicenseExpression>,
81        right: Box<LicenseExpression>,
82    },
83}
84
85impl LicenseExpression {
86    /// Extract all license keys from the expression.
87    #[allow(dead_code)]
88    pub fn license_keys(&self) -> Vec<String> {
89        let mut keys = Vec::new();
90        self.collect_keys(&mut keys);
91        keys.sort();
92        keys.dedup();
93        keys
94    }
95
96    #[allow(dead_code)]
97    fn collect_keys(&self, keys: &mut Vec<String>) {
98        match self {
99            Self::License(key) => keys.push(key.clone()),
100            Self::LicenseRef(key) => keys.push(key.clone()),
101            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
102                left.collect_keys(keys);
103                right.collect_keys(keys);
104            }
105        }
106    }
107
108    /// Create an AND expression combining multiple expressions.
109    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
110        if expressions.is_empty() {
111            None
112        } else {
113            Some(build_balanced_boolean_expression(
114                &expressions,
115                |left, right| LicenseExpression::And { left, right },
116            ))
117        }
118    }
119
120    /// Create an OR expression combining multiple expressions.
121    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
122        if expressions.is_empty() {
123            None
124        } else {
125            Some(build_balanced_boolean_expression(
126                &expressions,
127                |left, right| LicenseExpression::Or { left, right },
128            ))
129        }
130    }
131}
132
133fn build_balanced_boolean_expression(
134    expressions: &[LicenseExpression],
135    combine: fn(Box<LicenseExpression>, Box<LicenseExpression>) -> LicenseExpression,
136) -> LicenseExpression {
137    match expressions.len() {
138        0 => panic!("build_balanced_boolean_expression called with empty list"),
139        1 => expressions[0].clone(),
140        _ => {
141            let midpoint = expressions.len() / 2;
142            let left = build_balanced_boolean_expression(&expressions[..midpoint], combine);
143            let right = build_balanced_boolean_expression(&expressions[midpoint..], combine);
144            combine(Box::new(left), Box::new(right))
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use std::collections::HashSet;
153
154    fn expression_depth(expr: &LicenseExpression) -> usize {
155        match expr {
156            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_) => 1,
157            LicenseExpression::And { left, right }
158            | LicenseExpression::Or { left, right }
159            | LicenseExpression::With { left, right } => {
160                1 + expression_depth(left).max(expression_depth(right))
161            }
162        }
163    }
164
165    #[test]
166    fn test_and_helper_empty() {
167        let result = LicenseExpression::and(vec![]);
168        assert!(result.is_none());
169    }
170
171    #[test]
172    fn test_and_helper_single() {
173        let expr = LicenseExpression::License("mit".to_string());
174        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
175        assert_eq!(result, expr);
176    }
177
178    #[test]
179    fn test_and_helper_multiple() {
180        let exprs = vec![
181            LicenseExpression::License("mit".to_string()),
182            LicenseExpression::License("apache-2.0".to_string()),
183        ];
184        let result = LicenseExpression::and(exprs).unwrap();
185        assert!(matches!(result, LicenseExpression::And { .. }));
186    }
187
188    #[test]
189    fn test_or_helper_empty() {
190        let result = LicenseExpression::or(vec![]);
191        assert!(result.is_none());
192    }
193
194    #[test]
195    fn test_or_helper_single() {
196        let expr = LicenseExpression::License("mit".to_string());
197        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
198        assert_eq!(result, expr);
199    }
200
201    #[test]
202    fn test_or_helper_multiple() {
203        let exprs = vec![
204            LicenseExpression::License("mit".to_string()),
205            LicenseExpression::License("apache-2.0".to_string()),
206        ];
207        let result = LicenseExpression::or(exprs).unwrap();
208        assert!(matches!(result, LicenseExpression::Or { .. }));
209    }
210
211    #[test]
212    fn test_and_helper_balances_large_expression_depth() {
213        let exprs: Vec<_> = (0..1024)
214            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
215            .collect();
216
217        let result = LicenseExpression::and(exprs).unwrap();
218
219        assert!(expression_depth(&result) <= 12);
220    }
221
222    #[test]
223    fn test_or_helper_balances_large_expression_depth() {
224        let exprs: Vec<_> = (0..1024)
225            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
226            .collect();
227
228        let result = LicenseExpression::or(exprs).unwrap();
229
230        assert!(expression_depth(&result) <= 12);
231    }
232
233    #[test]
234    fn test_validate_expression_valid() {
235        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
236        let mut known = HashSet::new();
237        known.insert("mit".to_string());
238        known.insert("apache-2.0".to_string());
239
240        let unknown: Vec<_> = expr
241            .license_keys()
242            .into_iter()
243            .filter(|key| !known.contains(key))
244            .collect();
245        assert!(unknown.is_empty());
246    }
247
248    #[test]
249    fn test_validate_expression_unknown_keys() {
250        let expr = parse_expression("MIT AND UnknownKey").unwrap();
251        let mut known = HashSet::new();
252        known.insert("mit".to_string());
253
254        let unknown: Vec<_> = expr
255            .license_keys()
256            .into_iter()
257            .filter(|key| !known.contains(key))
258            .collect();
259        assert_eq!(unknown, vec!["unknownkey".to_string()]);
260    }
261
262    #[test]
263    fn test_validate_expression_empty_known_keys() {
264        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
265        let known: HashSet<String> = HashSet::new();
266
267        let unknown: Vec<_> = expr
268            .license_keys()
269            .into_iter()
270            .filter(|key| !known.contains(key))
271            .collect();
272        assert_eq!(unknown.len(), 2);
273    }
274}