Skip to main content

provenant/license_detection/expression/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing and manipulation.
5//!
6//! This module provides a parser for ScanCode license expressions, supporting:
7//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
8//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
9//! - Parenthetical grouping
10//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
11//!
12//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
13//! and provides functions for validation and simplification.
14
15mod parse;
16mod simplify;
17
18pub use parse::parse_expression;
19pub use simplify::{
20    combine_expressions_and, combine_expressions_and_preserving_structure, combine_expressions_or,
21    combine_expressions_or_preserving_structure, expression_to_string, licensing_contains,
22    simplify_expression, simplify_expression_preserving_structure,
23};
24
25/// Error type for license expression parsing.
26#[derive(Debug, Clone, PartialEq)]
27#[allow(clippy::enum_variant_names)]
28pub enum ParseError {
29    /// Empty expression
30    EmptyExpression,
31
32    /// Unexpected token at position
33    UnexpectedToken { token: String, position: usize },
34
35    /// Mismatched parentheses
36    MismatchedParentheses,
37
38    /// Generic parse error with message
39    ParseError(String),
40}
41
42impl std::fmt::Display for ParseError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::EmptyExpression => write!(f, "Empty license expression"),
46            Self::UnexpectedToken { token, position } => {
47                write!(f, "Unexpected token '{}' at position {}", token, position)
48            }
49            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
50            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
51        }
52    }
53}
54
55impl std::error::Error for ParseError {}
56
57/// A parsed license expression represented as an AST.
58#[derive(Debug, Clone, PartialEq)]
59pub enum LicenseExpression {
60    /// A single license key
61    License(String),
62
63    /// A LicenseRef-scancode-* reference
64    LicenseRef(String),
65
66    /// AND operation: left AND right
67    And {
68        left: Box<LicenseExpression>,
69        right: Box<LicenseExpression>,
70    },
71
72    /// OR operation: left OR right
73    Or {
74        left: Box<LicenseExpression>,
75        right: Box<LicenseExpression>,
76    },
77
78    /// WITH operation: left WITH right (exception)
79    With {
80        left: Box<LicenseExpression>,
81        right: Box<LicenseExpression>,
82    },
83}
84
85impl LicenseExpression {
86    /// Extract all license keys from the expression.
87    #[allow(dead_code)]
88    pub fn license_keys(&self) -> Vec<String> {
89        let mut keys = Vec::new();
90        self.collect_keys(&mut keys);
91        keys.sort();
92        keys.dedup();
93        keys
94    }
95
96    #[allow(dead_code)]
97    fn collect_keys(&self, keys: &mut Vec<String>) {
98        match self {
99            Self::License(key) => keys.push(key.clone()),
100            Self::LicenseRef(key) => keys.push(key.clone()),
101            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
102                left.collect_keys(keys);
103                right.collect_keys(keys);
104            }
105        }
106    }
107
108    /// Create an AND expression combining multiple expressions.
109    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
110        if expressions.is_empty() {
111            None
112        } else if expressions.len() == 1 {
113            Some(expressions.into_iter().next().unwrap())
114        } else {
115            let mut iter = expressions.into_iter();
116            let mut result = iter.next().unwrap();
117            for expr in iter {
118                result = LicenseExpression::And {
119                    left: Box::new(result),
120                    right: Box::new(expr),
121                };
122            }
123            Some(result)
124        }
125    }
126
127    /// Create an OR expression combining multiple expressions.
128    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
129        if expressions.is_empty() {
130            None
131        } else if expressions.len() == 1 {
132            Some(expressions.into_iter().next().unwrap())
133        } else {
134            let mut iter = expressions.into_iter();
135            let mut result = iter.next().unwrap();
136            for expr in iter {
137                result = LicenseExpression::Or {
138                    left: Box::new(result),
139                    right: Box::new(expr),
140                };
141            }
142            Some(result)
143        }
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use std::collections::HashSet;
151
152    #[test]
153    fn test_and_helper_empty() {
154        let result = LicenseExpression::and(vec![]);
155        assert!(result.is_none());
156    }
157
158    #[test]
159    fn test_and_helper_single() {
160        let expr = LicenseExpression::License("mit".to_string());
161        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
162        assert_eq!(result, expr);
163    }
164
165    #[test]
166    fn test_and_helper_multiple() {
167        let exprs = vec![
168            LicenseExpression::License("mit".to_string()),
169            LicenseExpression::License("apache-2.0".to_string()),
170        ];
171        let result = LicenseExpression::and(exprs).unwrap();
172        assert!(matches!(result, LicenseExpression::And { .. }));
173    }
174
175    #[test]
176    fn test_or_helper_empty() {
177        let result = LicenseExpression::or(vec![]);
178        assert!(result.is_none());
179    }
180
181    #[test]
182    fn test_or_helper_single() {
183        let expr = LicenseExpression::License("mit".to_string());
184        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
185        assert_eq!(result, expr);
186    }
187
188    #[test]
189    fn test_or_helper_multiple() {
190        let exprs = vec![
191            LicenseExpression::License("mit".to_string()),
192            LicenseExpression::License("apache-2.0".to_string()),
193        ];
194        let result = LicenseExpression::or(exprs).unwrap();
195        assert!(matches!(result, LicenseExpression::Or { .. }));
196    }
197
198    #[test]
199    fn test_validate_expression_valid() {
200        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
201        let mut known = HashSet::new();
202        known.insert("mit".to_string());
203        known.insert("apache-2.0".to_string());
204
205        let unknown: Vec<_> = expr
206            .license_keys()
207            .into_iter()
208            .filter(|key| !known.contains(key))
209            .collect();
210        assert!(unknown.is_empty());
211    }
212
213    #[test]
214    fn test_validate_expression_unknown_keys() {
215        let expr = parse_expression("MIT AND UnknownKey").unwrap();
216        let mut known = HashSet::new();
217        known.insert("mit".to_string());
218
219        let unknown: Vec<_> = expr
220            .license_keys()
221            .into_iter()
222            .filter(|key| !known.contains(key))
223            .collect();
224        assert_eq!(unknown, vec!["unknownkey".to_string()]);
225    }
226
227    #[test]
228    fn test_validate_expression_empty_known_keys() {
229        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
230        let known: HashSet<String> = HashSet::new();
231
232        let unknown: Vec<_> = expr
233            .license_keys()
234            .into_iter()
235            .filter(|key| !known.contains(key))
236            .collect();
237        assert_eq!(unknown.len(), 2);
238    }
239}