Skip to main content

provenant/utils/
spdx.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5
6use crate::license_detection::expression::{
7    LicenseExpression, parse_expression, simplify_expression,
8    simplify_expression_preserving_structure,
9};
10
11#[derive(Clone, Copy)]
12pub(crate) enum ExpressionRelation {
13    And,
14    Or,
15}
16
17#[derive(Clone, Copy)]
18enum BooleanOperator {
19    And,
20    Or,
21}
22
23pub fn combine_license_expressions(
24    expressions: impl IntoIterator<Item = String>,
25) -> Option<String> {
26    combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
27}
28
29pub fn combine_license_expressions_preserving_structure(
30    expressions: impl IntoIterator<Item = String>,
31) -> Option<String> {
32    combine_license_expressions_with_relation_and_mode(expressions, ExpressionRelation::And, true)
33}
34
35pub(crate) fn combine_license_expressions_with_relation_preserving_structure(
36    expressions: impl IntoIterator<Item = String>,
37    relation: ExpressionRelation,
38) -> Option<String> {
39    combine_license_expressions_with_relation_and_mode(expressions, relation, true)
40}
41
42pub(crate) fn combine_license_expressions_with_relation(
43    expressions: impl IntoIterator<Item = String>,
44    relation: ExpressionRelation,
45) -> Option<String> {
46    combine_license_expressions_with_relation_and_mode(expressions, relation, false)
47}
48
49fn combine_license_expressions_with_relation_and_mode(
50    expressions: impl IntoIterator<Item = String>,
51    relation: ExpressionRelation,
52    preserve_structure: bool,
53) -> Option<String> {
54    let expressions: Vec<String> = expressions
55        .into_iter()
56        .map(|expression| expression.trim().to_string())
57        .filter(|expression| !expression.is_empty())
58        .collect();
59
60    if expressions.is_empty() {
61        return None;
62    }
63
64    combine_parsed_expressions(&expressions, relation, preserve_structure)
65        .or_else(|| combine_license_expressions_fallback(&expressions, relation))
66}
67
68fn combine_parsed_expressions(
69    expressions: &[String],
70    relation: ExpressionRelation,
71    preserve_structure: bool,
72) -> Option<String> {
73    let mut case_map = HashMap::new();
74    let parsed_expressions: Vec<LicenseExpression> = expressions
75        .iter()
76        .map(|expression| {
77            collect_term_case(expression, &mut case_map);
78            parse_expression(expression).ok()
79        })
80        .collect::<Option<Vec<_>>>()?;
81
82    let combined = match relation {
83        ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
84        ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
85    }?;
86
87    let combined = if preserve_structure {
88        simplify_expression_preserving_structure(&combined)
89    } else {
90        simplify_expression(&combined)
91    };
92    Some(render_expression_with_case_map(&combined, &case_map))
93}
94
95fn combine_license_expressions_fallback(
96    expressions: &[String],
97    relation: ExpressionRelation,
98) -> Option<String> {
99    let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
100    if unique_expressions.is_empty() {
101        return None;
102    }
103
104    let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
105    sorted_expressions.sort();
106
107    let separator = match relation {
108        ExpressionRelation::And => " AND ",
109        ExpressionRelation::Or => " OR ",
110    };
111
112    Some(
113        sorted_expressions
114            .iter()
115            .map(|expr| wrap_compound_expression(expr))
116            .collect::<Vec<_>>()
117            .join(separator),
118    )
119}
120
121fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
122    let chars: Vec<char> = expression.chars().collect();
123    let mut pos = 0;
124
125    while pos < chars.len() {
126        let ch = chars[pos];
127        if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
128            pos += 1;
129            continue;
130        }
131
132        let start = pos;
133        while pos < chars.len()
134            && (chars[pos].is_alphanumeric()
135                || chars[pos] == '-'
136                || chars[pos] == '.'
137                || chars[pos] == '_'
138                || chars[pos] == '+')
139        {
140            pos += 1;
141        }
142
143        let term: String = chars[start..pos].iter().collect();
144        let upper = term.to_ascii_uppercase();
145        if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
146            continue;
147        }
148
149        case_map.entry(term.to_ascii_lowercase()).or_insert(term);
150    }
151}
152
153fn render_expression_with_case_map(
154    expression: &LicenseExpression,
155    case_map: &HashMap<String, String>,
156) -> String {
157    match expression {
158        LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
159            case_map.get(key).cloned().unwrap_or_else(|| key.clone())
160        }
161        LicenseExpression::And { .. } => {
162            render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
163        }
164        LicenseExpression::Or { .. } => {
165            render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
166        }
167        LicenseExpression::With { left, right } => format!(
168            "{} WITH {}",
169            render_expression_with_case_map(left, case_map),
170            render_expression_with_case_map(right, case_map)
171        ),
172    }
173}
174
175fn render_flat_boolean_chain(
176    expression: &LicenseExpression,
177    operator: BooleanOperator,
178    case_map: &HashMap<String, String>,
179) -> String {
180    let mut parts = Vec::new();
181    collect_boolean_chain(expression, operator, &mut parts);
182
183    let separator = match operator {
184        BooleanOperator::And => " AND ",
185        BooleanOperator::Or => " OR ",
186    };
187
188    parts
189        .into_iter()
190        .map(|part| render_boolean_operand(part, operator, case_map))
191        .collect::<Vec<_>>()
192        .join(separator)
193}
194
195fn collect_boolean_chain<'a>(
196    expression: &'a LicenseExpression,
197    operator: BooleanOperator,
198    parts: &mut Vec<&'a LicenseExpression>,
199) {
200    match (operator, expression) {
201        (BooleanOperator::And, LicenseExpression::And { left, right })
202        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
203            collect_boolean_chain(left, operator, parts);
204            collect_boolean_chain(right, operator, parts);
205        }
206        _ => parts.push(expression),
207    }
208}
209
210fn render_boolean_operand(
211    expression: &LicenseExpression,
212    parent_operator: BooleanOperator,
213    case_map: &HashMap<String, String>,
214) -> String {
215    match expression {
216        LicenseExpression::And { .. } => match parent_operator {
217            BooleanOperator::And => render_expression_with_case_map(expression, case_map),
218            BooleanOperator::Or => format!(
219                "({})",
220                render_expression_with_case_map(expression, case_map)
221            ),
222        },
223        LicenseExpression::Or { .. } => match parent_operator {
224            BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
225            BooleanOperator::And => format!(
226                "({})",
227                render_expression_with_case_map(expression, case_map)
228            ),
229        },
230        _ => render_expression_with_case_map(expression, case_map),
231    }
232}
233
234fn wrap_compound_expression(expression: &str) -> String {
235    if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
236        format!("({})", expression)
237    } else {
238        expression.to_string()
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn combine_license_expressions_preserves_spdx_case() {
248        let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
249
250        assert_eq!(result.as_deref(), Some("Apache-2.0 AND MIT"));
251    }
252
253    #[test]
254    fn combine_license_expressions_flattens_same_operator_parentheses() {
255        let result = combine_license_expressions(vec![
256            "MIT".to_string(),
257            "ICU".to_string(),
258            "Unicode-TOU".to_string(),
259        ]);
260
261        assert_eq!(result.as_deref(), Some("ICU AND MIT AND Unicode-TOU"));
262    }
263
264    #[test]
265    fn combine_license_expressions_does_not_absorb_with_expressions() {
266        let result = combine_license_expressions(vec![
267            "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
268            "GPL-2.0".to_string(),
269        ]);
270
271        assert_eq!(
272            result.as_deref(),
273            Some("GPL-2.0 AND GPL-2.0 WITH Classpath-exception-2.0")
274        );
275    }
276
277    #[test]
278    fn combine_license_expressions_simplifies_absorbed_and_expression() {
279        let result = combine_license_expressions(vec![
280            "Apache-2.0 OR MIT".to_string(),
281            "Apache-2.0".to_string(),
282        ]);
283
284        assert_eq!(result.as_deref(), Some("Apache-2.0"));
285    }
286
287    #[test]
288    fn combine_license_expressions_preserving_structure_keeps_distinct_nested_operands() {
289        let result = combine_license_expressions_preserving_structure(vec![
290            "MIT".to_string(),
291            "Apache-2.0 OR MIT".to_string(),
292        ]);
293
294        assert_eq!(result.as_deref(), Some("MIT AND (Apache-2.0 OR MIT)"));
295    }
296
297    #[test]
298    fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
299        let result = combine_license_expressions_with_relation(
300            vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
301            ExpressionRelation::Or,
302        );
303
304        assert_eq!(result.as_deref(), Some("MIT"));
305    }
306}