Skip to main content

provenant/utils/
spdx.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5
6use crate::license_detection::expression::{
7    LicenseExpression, parse_expression, simplify_expression,
8};
9
10#[derive(Clone, Copy)]
11pub(crate) enum ExpressionRelation {
12    And,
13    Or,
14}
15
16#[derive(Clone, Copy)]
17enum BooleanOperator {
18    And,
19    Or,
20}
21
22pub fn combine_license_expressions(
23    expressions: impl IntoIterator<Item = String>,
24) -> Option<String> {
25    combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
26}
27
28pub(crate) fn combine_license_expressions_with_relation(
29    expressions: impl IntoIterator<Item = String>,
30    relation: ExpressionRelation,
31) -> Option<String> {
32    let expressions: Vec<String> = expressions
33        .into_iter()
34        .map(|expression| expression.trim().to_string())
35        .filter(|expression| !expression.is_empty())
36        .collect();
37
38    if expressions.is_empty() {
39        return None;
40    }
41
42    combine_parsed_expressions(&expressions, relation)
43        .or_else(|| combine_license_expressions_fallback(&expressions, relation))
44}
45
46fn combine_parsed_expressions(
47    expressions: &[String],
48    relation: ExpressionRelation,
49) -> Option<String> {
50    let mut case_map = HashMap::new();
51    let parsed_expressions: Vec<LicenseExpression> = expressions
52        .iter()
53        .map(|expression| {
54            collect_term_case(expression, &mut case_map);
55            parse_expression(expression).ok()
56        })
57        .collect::<Option<Vec<_>>>()?;
58
59    let combined = match relation {
60        ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
61        ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
62    }?;
63
64    let combined = simplify_expression(&combined);
65    Some(render_expression_with_case_map(&combined, &case_map))
66}
67
68fn combine_license_expressions_fallback(
69    expressions: &[String],
70    relation: ExpressionRelation,
71) -> Option<String> {
72    let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
73    if unique_expressions.is_empty() {
74        return None;
75    }
76
77    let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
78    sorted_expressions.sort();
79
80    let separator = match relation {
81        ExpressionRelation::And => " AND ",
82        ExpressionRelation::Or => " OR ",
83    };
84
85    Some(
86        sorted_expressions
87            .iter()
88            .map(|expr| wrap_compound_expression(expr))
89            .collect::<Vec<_>>()
90            .join(separator),
91    )
92}
93
94fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
95    let chars: Vec<char> = expression.chars().collect();
96    let mut pos = 0;
97
98    while pos < chars.len() {
99        let ch = chars[pos];
100        if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
101            pos += 1;
102            continue;
103        }
104
105        let start = pos;
106        while pos < chars.len()
107            && (chars[pos].is_alphanumeric()
108                || chars[pos] == '-'
109                || chars[pos] == '.'
110                || chars[pos] == '_'
111                || chars[pos] == '+')
112        {
113            pos += 1;
114        }
115
116        let term: String = chars[start..pos].iter().collect();
117        let upper = term.to_ascii_uppercase();
118        if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
119            continue;
120        }
121
122        case_map.entry(term.to_ascii_lowercase()).or_insert(term);
123    }
124}
125
126fn render_expression_with_case_map(
127    expression: &LicenseExpression,
128    case_map: &HashMap<String, String>,
129) -> String {
130    match expression {
131        LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
132            case_map.get(key).cloned().unwrap_or_else(|| key.clone())
133        }
134        LicenseExpression::And { .. } => {
135            render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
136        }
137        LicenseExpression::Or { .. } => {
138            render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
139        }
140        LicenseExpression::With { left, right } => format!(
141            "{} WITH {}",
142            render_expression_with_case_map(left, case_map),
143            render_expression_with_case_map(right, case_map)
144        ),
145    }
146}
147
148fn render_flat_boolean_chain(
149    expression: &LicenseExpression,
150    operator: BooleanOperator,
151    case_map: &HashMap<String, String>,
152) -> String {
153    let mut parts = Vec::new();
154    collect_boolean_chain(expression, operator, &mut parts);
155
156    let separator = match operator {
157        BooleanOperator::And => " AND ",
158        BooleanOperator::Or => " OR ",
159    };
160
161    parts
162        .into_iter()
163        .map(|part| render_boolean_operand(part, operator, case_map))
164        .collect::<Vec<_>>()
165        .join(separator)
166}
167
168fn collect_boolean_chain<'a>(
169    expression: &'a LicenseExpression,
170    operator: BooleanOperator,
171    parts: &mut Vec<&'a LicenseExpression>,
172) {
173    match (operator, expression) {
174        (BooleanOperator::And, LicenseExpression::And { left, right })
175        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
176            collect_boolean_chain(left, operator, parts);
177            collect_boolean_chain(right, operator, parts);
178        }
179        _ => parts.push(expression),
180    }
181}
182
183fn render_boolean_operand(
184    expression: &LicenseExpression,
185    parent_operator: BooleanOperator,
186    case_map: &HashMap<String, String>,
187) -> String {
188    match expression {
189        LicenseExpression::And { .. } => match parent_operator {
190            BooleanOperator::And => render_expression_with_case_map(expression, case_map),
191            BooleanOperator::Or => format!(
192                "({})",
193                render_expression_with_case_map(expression, case_map)
194            ),
195        },
196        LicenseExpression::Or { .. } => match parent_operator {
197            BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
198            BooleanOperator::And => format!(
199                "({})",
200                render_expression_with_case_map(expression, case_map)
201            ),
202        },
203        _ => render_expression_with_case_map(expression, case_map),
204    }
205}
206
207fn wrap_compound_expression(expression: &str) -> String {
208    if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
209        format!("({})", expression)
210    } else {
211        expression.to_string()
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn combine_license_expressions_preserves_spdx_case() {
221        let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
222
223        assert_eq!(result.as_deref(), Some("Apache-2.0 AND MIT"));
224    }
225
226    #[test]
227    fn combine_license_expressions_flattens_same_operator_parentheses() {
228        let result = combine_license_expressions(vec![
229            "MIT".to_string(),
230            "ICU".to_string(),
231            "Unicode-TOU".to_string(),
232        ]);
233
234        assert_eq!(result.as_deref(), Some("ICU AND MIT AND Unicode-TOU"));
235    }
236
237    #[test]
238    fn combine_license_expressions_does_not_absorb_with_expressions() {
239        let result = combine_license_expressions(vec![
240            "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
241            "GPL-2.0".to_string(),
242        ]);
243
244        assert_eq!(
245            result.as_deref(),
246            Some("GPL-2.0 AND GPL-2.0 WITH Classpath-exception-2.0")
247        );
248    }
249
250    #[test]
251    fn combine_license_expressions_simplifies_absorbed_and_expression() {
252        let result = combine_license_expressions(vec![
253            "Apache-2.0 OR MIT".to_string(),
254            "Apache-2.0".to_string(),
255        ]);
256
257        assert_eq!(result.as_deref(), Some("Apache-2.0"));
258    }
259
260    #[test]
261    fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
262        let result = combine_license_expressions_with_relation(
263            vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
264            ExpressionRelation::Or,
265        );
266
267        assert_eq!(result.as_deref(), Some("MIT"));
268    }
269}