Skip to main content

provenant/utils/
spdx.rs

1use std::collections::{HashMap, HashSet};
2
3use crate::license_detection::expression::{
4    LicenseExpression, parse_expression, simplify_expression,
5};
6
7#[derive(Clone, Copy)]
8pub(crate) enum ExpressionRelation {
9    And,
10    Or,
11}
12
13#[derive(Clone, Copy)]
14enum BooleanOperator {
15    And,
16    Or,
17}
18
19pub fn combine_license_expressions(
20    expressions: impl IntoIterator<Item = String>,
21) -> Option<String> {
22    combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
23}
24
25pub(crate) fn combine_license_expressions_with_relation(
26    expressions: impl IntoIterator<Item = String>,
27    relation: ExpressionRelation,
28) -> Option<String> {
29    let expressions: Vec<String> = expressions
30        .into_iter()
31        .map(|expression| expression.trim().to_string())
32        .filter(|expression| !expression.is_empty())
33        .collect();
34
35    if expressions.is_empty() {
36        return None;
37    }
38
39    combine_parsed_expressions(&expressions, relation)
40        .or_else(|| combine_license_expressions_fallback(&expressions, relation))
41}
42
43fn combine_parsed_expressions(
44    expressions: &[String],
45    relation: ExpressionRelation,
46) -> Option<String> {
47    let mut case_map = HashMap::new();
48    let parsed_expressions: Vec<LicenseExpression> = expressions
49        .iter()
50        .map(|expression| {
51            collect_term_case(expression, &mut case_map);
52            parse_expression(expression).ok()
53        })
54        .collect::<Option<Vec<_>>>()?;
55
56    let combined = match relation {
57        ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
58        ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
59    }?;
60
61    let simplified = simplify_expression(&combined);
62    Some(render_expression_with_case_map(&simplified, &case_map))
63}
64
65fn combine_license_expressions_fallback(
66    expressions: &[String],
67    relation: ExpressionRelation,
68) -> Option<String> {
69    let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
70    if unique_expressions.is_empty() {
71        return None;
72    }
73
74    let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
75    sorted_expressions.sort();
76
77    let separator = match relation {
78        ExpressionRelation::And => " AND ",
79        ExpressionRelation::Or => " OR ",
80    };
81
82    Some(
83        sorted_expressions
84            .iter()
85            .map(|expr| wrap_compound_expression(expr))
86            .collect::<Vec<_>>()
87            .join(separator),
88    )
89}
90
91fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
92    let chars: Vec<char> = expression.chars().collect();
93    let mut pos = 0;
94
95    while pos < chars.len() {
96        let ch = chars[pos];
97        if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
98            pos += 1;
99            continue;
100        }
101
102        let start = pos;
103        while pos < chars.len()
104            && (chars[pos].is_alphanumeric()
105                || chars[pos] == '-'
106                || chars[pos] == '.'
107                || chars[pos] == '_'
108                || chars[pos] == '+')
109        {
110            pos += 1;
111        }
112
113        let term: String = chars[start..pos].iter().collect();
114        let upper = term.to_ascii_uppercase();
115        if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
116            continue;
117        }
118
119        case_map.entry(term.to_ascii_lowercase()).or_insert(term);
120    }
121}
122
123fn render_expression_with_case_map(
124    expression: &LicenseExpression,
125    case_map: &HashMap<String, String>,
126) -> String {
127    match expression {
128        LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
129            case_map.get(key).cloned().unwrap_or_else(|| key.clone())
130        }
131        LicenseExpression::And { .. } => {
132            render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
133        }
134        LicenseExpression::Or { .. } => {
135            render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
136        }
137        LicenseExpression::With { left, right } => format!(
138            "{} WITH {}",
139            render_expression_with_case_map(left, case_map),
140            render_expression_with_case_map(right, case_map)
141        ),
142    }
143}
144
145fn render_flat_boolean_chain(
146    expression: &LicenseExpression,
147    operator: BooleanOperator,
148    case_map: &HashMap<String, String>,
149) -> String {
150    let mut parts = Vec::new();
151    collect_boolean_chain(expression, operator, &mut parts);
152
153    let separator = match operator {
154        BooleanOperator::And => " AND ",
155        BooleanOperator::Or => " OR ",
156    };
157
158    parts
159        .into_iter()
160        .map(|part| render_boolean_operand(part, operator, case_map))
161        .collect::<Vec<_>>()
162        .join(separator)
163}
164
165fn collect_boolean_chain<'a>(
166    expression: &'a LicenseExpression,
167    operator: BooleanOperator,
168    parts: &mut Vec<&'a LicenseExpression>,
169) {
170    match (operator, expression) {
171        (BooleanOperator::And, LicenseExpression::And { left, right })
172        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
173            collect_boolean_chain(left, operator, parts);
174            collect_boolean_chain(right, operator, parts);
175        }
176        _ => parts.push(expression),
177    }
178}
179
180fn render_boolean_operand(
181    expression: &LicenseExpression,
182    parent_operator: BooleanOperator,
183    case_map: &HashMap<String, String>,
184) -> String {
185    match expression {
186        LicenseExpression::And { .. } => match parent_operator {
187            BooleanOperator::And => render_expression_with_case_map(expression, case_map),
188            BooleanOperator::Or => format!(
189                "({})",
190                render_expression_with_case_map(expression, case_map)
191            ),
192        },
193        LicenseExpression::Or { .. } => match parent_operator {
194            BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
195            BooleanOperator::And => format!(
196                "({})",
197                render_expression_with_case_map(expression, case_map)
198            ),
199        },
200        _ => render_expression_with_case_map(expression, case_map),
201    }
202}
203
204fn wrap_compound_expression(expression: &str) -> String {
205    if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
206        format!("({})", expression)
207    } else {
208        expression.to_string()
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn combine_license_expressions_preserves_spdx_case() {
218        let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
219
220        assert_eq!(result.as_deref(), Some("MIT AND Apache-2.0"));
221    }
222
223    #[test]
224    fn combine_license_expressions_flattens_same_operator_parentheses() {
225        let result = combine_license_expressions(vec![
226            "MIT".to_string(),
227            "ICU".to_string(),
228            "Unicode-TOU".to_string(),
229        ]);
230
231        assert_eq!(result.as_deref(), Some("MIT AND ICU AND Unicode-TOU"));
232    }
233
234    #[test]
235    fn combine_license_expressions_does_not_absorb_with_expressions() {
236        let result = combine_license_expressions(vec![
237            "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
238            "GPL-2.0".to_string(),
239        ]);
240
241        assert_eq!(
242            result.as_deref(),
243            Some("GPL-2.0 WITH Classpath-exception-2.0 AND GPL-2.0")
244        );
245    }
246
247    #[test]
248    fn combine_license_expressions_simplifies_absorbed_and_expression() {
249        let result = combine_license_expressions(vec![
250            "Apache-2.0 OR MIT".to_string(),
251            "Apache-2.0".to_string(),
252        ]);
253
254        assert_eq!(result.as_deref(), Some("Apache-2.0"));
255    }
256
257    #[test]
258    fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
259        let result = combine_license_expressions_with_relation(
260            vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
261            ExpressionRelation::Or,
262        );
263
264        assert_eq!(result.as_deref(), Some("MIT"));
265    }
266}