Skip to main content

provenant/license_detection/expression/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing and manipulation.
5//!
6//! This module provides a parser for ScanCode license expressions, supporting:
7//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
8//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
9//! - Parenthetical grouping
10//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
11//!
12//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
13//! and provides functions for validation and simplification.
14
15mod parse;
16mod simplify;
17
18pub use parse::parse_expression;
19pub use simplify::{
20    combine_expressions_and, combine_expressions_or, expression_to_string, licensing_contains,
21    simplify_expression,
22};
23
24/// Error type for license expression parsing.
25#[derive(Debug, Clone, PartialEq)]
26#[allow(clippy::enum_variant_names)]
27pub enum ParseError {
28    /// Empty expression
29    EmptyExpression,
30
31    /// Unexpected token at position
32    UnexpectedToken { token: String, position: usize },
33
34    /// Mismatched parentheses
35    MismatchedParentheses,
36
37    /// Generic parse error with message
38    ParseError(String),
39}
40
41impl std::fmt::Display for ParseError {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            Self::EmptyExpression => write!(f, "Empty license expression"),
45            Self::UnexpectedToken { token, position } => {
46                write!(f, "Unexpected token '{}' at position {}", token, position)
47            }
48            Self::MismatchedParentheses => write!(f, "Mismatched parentheses"),
49            Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
50        }
51    }
52}
53
54impl std::error::Error for ParseError {}
55
56/// A parsed license expression represented as an AST.
57#[derive(Debug, Clone, PartialEq)]
58pub enum LicenseExpression {
59    /// A single license key
60    License(String),
61
62    /// A LicenseRef-scancode-* reference
63    LicenseRef(String),
64
65    /// AND operation: left AND right
66    And {
67        left: Box<LicenseExpression>,
68        right: Box<LicenseExpression>,
69    },
70
71    /// OR operation: left OR right
72    Or {
73        left: Box<LicenseExpression>,
74        right: Box<LicenseExpression>,
75    },
76
77    /// WITH operation: left WITH right (exception)
78    With {
79        left: Box<LicenseExpression>,
80        right: Box<LicenseExpression>,
81    },
82}
83
84impl LicenseExpression {
85    /// Extract all license keys from the expression.
86    #[allow(dead_code)]
87    pub fn license_keys(&self) -> Vec<String> {
88        let mut keys = Vec::new();
89        self.collect_keys(&mut keys);
90        keys.sort();
91        keys.dedup();
92        keys
93    }
94
95    #[allow(dead_code)]
96    fn collect_keys(&self, keys: &mut Vec<String>) {
97        match self {
98            Self::License(key) => keys.push(key.clone()),
99            Self::LicenseRef(key) => keys.push(key.clone()),
100            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
101                left.collect_keys(keys);
102                right.collect_keys(keys);
103            }
104        }
105    }
106
107    /// Create an AND expression combining multiple expressions.
108    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
109        if expressions.is_empty() {
110            None
111        } else if expressions.len() == 1 {
112            Some(expressions.into_iter().next().unwrap())
113        } else {
114            let mut iter = expressions.into_iter();
115            let mut result = iter.next().unwrap();
116            for expr in iter {
117                result = LicenseExpression::And {
118                    left: Box::new(result),
119                    right: Box::new(expr),
120                };
121            }
122            Some(result)
123        }
124    }
125
126    /// Create an OR expression combining multiple expressions.
127    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
128        if expressions.is_empty() {
129            None
130        } else if expressions.len() == 1 {
131            Some(expressions.into_iter().next().unwrap())
132        } else {
133            let mut iter = expressions.into_iter();
134            let mut result = iter.next().unwrap();
135            for expr in iter {
136                result = LicenseExpression::Or {
137                    left: Box::new(result),
138                    right: Box::new(expr),
139                };
140            }
141            Some(result)
142        }
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use std::collections::HashSet;
150
151    #[test]
152    fn test_and_helper_empty() {
153        let result = LicenseExpression::and(vec![]);
154        assert!(result.is_none());
155    }
156
157    #[test]
158    fn test_and_helper_single() {
159        let expr = LicenseExpression::License("mit".to_string());
160        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
161        assert_eq!(result, expr);
162    }
163
164    #[test]
165    fn test_and_helper_multiple() {
166        let exprs = vec![
167            LicenseExpression::License("mit".to_string()),
168            LicenseExpression::License("apache-2.0".to_string()),
169        ];
170        let result = LicenseExpression::and(exprs).unwrap();
171        assert!(matches!(result, LicenseExpression::And { .. }));
172    }
173
174    #[test]
175    fn test_or_helper_empty() {
176        let result = LicenseExpression::or(vec![]);
177        assert!(result.is_none());
178    }
179
180    #[test]
181    fn test_or_helper_single() {
182        let expr = LicenseExpression::License("mit".to_string());
183        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
184        assert_eq!(result, expr);
185    }
186
187    #[test]
188    fn test_or_helper_multiple() {
189        let exprs = vec![
190            LicenseExpression::License("mit".to_string()),
191            LicenseExpression::License("apache-2.0".to_string()),
192        ];
193        let result = LicenseExpression::or(exprs).unwrap();
194        assert!(matches!(result, LicenseExpression::Or { .. }));
195    }
196
197    #[test]
198    fn test_validate_expression_valid() {
199        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
200        let mut known = HashSet::new();
201        known.insert("mit".to_string());
202        known.insert("apache-2.0".to_string());
203
204        let unknown: Vec<_> = expr
205            .license_keys()
206            .into_iter()
207            .filter(|key| !known.contains(key))
208            .collect();
209        assert!(unknown.is_empty());
210    }
211
212    #[test]
213    fn test_validate_expression_unknown_keys() {
214        let expr = parse_expression("MIT AND UnknownKey").unwrap();
215        let mut known = HashSet::new();
216        known.insert("mit".to_string());
217
218        let unknown: Vec<_> = expr
219            .license_keys()
220            .into_iter()
221            .filter(|key| !known.contains(key))
222            .collect();
223        assert_eq!(unknown, vec!["unknownkey".to_string()]);
224    }
225
226    #[test]
227    fn test_validate_expression_empty_known_keys() {
228        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
229        let known: HashSet<String> = HashSet::new();
230
231        let unknown: Vec<_> = expr
232            .license_keys()
233            .into_iter()
234            .filter(|key| !known.contains(key))
235            .collect();
236        assert_eq!(unknown.len(), 2);
237    }
238}