Skip to main content

provenant/utils/
spdx.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5
6use crate::license_detection::expression::{
7    LicenseExpression, expression_to_string, parse_expression, simplify_expression,
8    simplify_expression_preserving_structure,
9};
10
11#[derive(Clone, Copy)]
12pub(crate) enum ExpressionRelation {
13    And,
14    Or,
15}
16
17#[derive(Clone, Copy)]
18enum BooleanOperator {
19    And,
20    Or,
21}
22
23pub fn combine_license_expressions(
24    expressions: impl IntoIterator<Item = String>,
25) -> Option<String> {
26    combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
27}
28
29pub fn combine_license_expressions_preserving_structure(
30    expressions: impl IntoIterator<Item = String>,
31) -> Option<String> {
32    combine_license_expressions_with_relation_and_mode(expressions, ExpressionRelation::And, true)
33}
34
35pub fn select_primary_license_expression(
36    expressions: impl IntoIterator<Item = String>,
37) -> Option<String> {
38    let mut unique = Vec::new();
39
40    for expression in expressions {
41        let trimmed = expression.trim();
42        if trimmed.is_empty() {
43            continue;
44        }
45
46        if !unique.iter().any(|existing: &String| existing == trimmed) {
47            unique.push(trimmed.to_string());
48        }
49    }
50
51    if unique.is_empty() {
52        return None;
53    }
54
55    if unique.len() == 1 {
56        return unique.into_iter().next();
57    }
58
59    let joined: Vec<String> = unique
60        .iter()
61        .filter(|expression| is_joined_expression(expression))
62        .cloned()
63        .collect();
64
65    if joined.len() != 1 {
66        return None;
67    }
68
69    let candidate = &joined[0];
70    unique
71        .iter()
72        .filter(|expression| *expression != candidate)
73        .all(|expression| expression_covers(candidate, expression))
74        .then(|| candidate.clone())
75}
76
77pub(crate) fn combine_license_expressions_with_relation_preserving_structure(
78    expressions: impl IntoIterator<Item = String>,
79    relation: ExpressionRelation,
80) -> Option<String> {
81    combine_license_expressions_with_relation_and_mode(expressions, relation, true)
82}
83
84pub(crate) fn combine_license_expressions_with_relation(
85    expressions: impl IntoIterator<Item = String>,
86    relation: ExpressionRelation,
87) -> Option<String> {
88    combine_license_expressions_with_relation_and_mode(expressions, relation, false)
89}
90
91fn combine_license_expressions_with_relation_and_mode(
92    expressions: impl IntoIterator<Item = String>,
93    relation: ExpressionRelation,
94    preserve_structure: bool,
95) -> Option<String> {
96    let expressions: Vec<String> = expressions
97        .into_iter()
98        .map(|expression| expression.trim().to_string())
99        .filter(|expression| !expression.is_empty())
100        .collect();
101
102    if expressions.is_empty() {
103        return None;
104    }
105
106    combine_parsed_expressions(&expressions, relation, preserve_structure)
107        .or_else(|| combine_license_expressions_fallback(&expressions, relation))
108}
109
110fn combine_parsed_expressions(
111    expressions: &[String],
112    relation: ExpressionRelation,
113    preserve_structure: bool,
114) -> Option<String> {
115    let mut case_map = HashMap::new();
116    let parsed_expressions: Vec<LicenseExpression> = expressions
117        .iter()
118        .map(|expression| {
119            collect_term_case(expression, &mut case_map);
120            parse_expression(expression).ok()
121        })
122        .collect::<Option<Vec<_>>>()?;
123
124    let combined = match relation {
125        ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
126        ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
127    }?;
128
129    let combined = if preserve_structure {
130        simplify_expression_preserving_structure(&combined)
131    } else {
132        simplify_expression(&combined)
133    };
134    Some(render_expression_with_case_map(&combined, &case_map))
135}
136
137fn combine_license_expressions_fallback(
138    expressions: &[String],
139    relation: ExpressionRelation,
140) -> Option<String> {
141    let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
142    if unique_expressions.is_empty() {
143        return None;
144    }
145
146    let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
147    sorted_expressions.sort();
148
149    let separator = match relation {
150        ExpressionRelation::And => " AND ",
151        ExpressionRelation::Or => " OR ",
152    };
153
154    Some(
155        sorted_expressions
156            .iter()
157            .map(|expr| wrap_compound_expression(expr))
158            .collect::<Vec<_>>()
159            .join(separator),
160    )
161}
162
163fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
164    let chars: Vec<char> = expression.chars().collect();
165    let mut pos = 0;
166
167    while pos < chars.len() {
168        let ch = chars[pos];
169        if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
170            pos += 1;
171            continue;
172        }
173
174        let start = pos;
175        while pos < chars.len()
176            && (chars[pos].is_alphanumeric()
177                || chars[pos] == '-'
178                || chars[pos] == '.'
179                || chars[pos] == '_'
180                || chars[pos] == '+')
181        {
182            pos += 1;
183        }
184
185        let term: String = chars[start..pos].iter().collect();
186        let upper = term.to_ascii_uppercase();
187        if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
188            continue;
189        }
190
191        case_map.entry(term.to_ascii_lowercase()).or_insert(term);
192    }
193}
194
195fn render_expression_with_case_map(
196    expression: &LicenseExpression,
197    case_map: &HashMap<String, String>,
198) -> String {
199    match expression {
200        LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
201            case_map.get(key).cloned().unwrap_or_else(|| key.clone())
202        }
203        LicenseExpression::And { .. } => {
204            render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
205        }
206        LicenseExpression::Or { .. } => {
207            render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
208        }
209        LicenseExpression::With { left, right } => format!(
210            "{} WITH {}",
211            render_expression_with_case_map(left, case_map),
212            render_expression_with_case_map(right, case_map)
213        ),
214    }
215}
216
217fn render_flat_boolean_chain(
218    expression: &LicenseExpression,
219    operator: BooleanOperator,
220    case_map: &HashMap<String, String>,
221) -> String {
222    let mut parts = Vec::new();
223    collect_boolean_chain(expression, operator, &mut parts);
224
225    let separator = match operator {
226        BooleanOperator::And => " AND ",
227        BooleanOperator::Or => " OR ",
228    };
229
230    parts
231        .into_iter()
232        .map(|part| render_boolean_operand(part, operator, case_map))
233        .collect::<Vec<_>>()
234        .join(separator)
235}
236
237fn collect_boolean_chain<'a>(
238    expression: &'a LicenseExpression,
239    operator: BooleanOperator,
240    parts: &mut Vec<&'a LicenseExpression>,
241) {
242    match (operator, expression) {
243        (BooleanOperator::And, LicenseExpression::And { left, right })
244        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
245            collect_boolean_chain(left, operator, parts);
246            collect_boolean_chain(right, operator, parts);
247        }
248        _ => parts.push(expression),
249    }
250}
251
252fn render_boolean_operand(
253    expression: &LicenseExpression,
254    parent_operator: BooleanOperator,
255    case_map: &HashMap<String, String>,
256) -> String {
257    match expression {
258        LicenseExpression::And { .. } => match parent_operator {
259            BooleanOperator::And => render_expression_with_case_map(expression, case_map),
260            BooleanOperator::Or => format!(
261                "({})",
262                render_expression_with_case_map(expression, case_map)
263            ),
264        },
265        LicenseExpression::Or { .. } => match parent_operator {
266            BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
267            BooleanOperator::And => format!(
268                "({})",
269                render_expression_with_case_map(expression, case_map)
270            ),
271        },
272        _ => render_expression_with_case_map(expression, case_map),
273    }
274}
275
276fn wrap_compound_expression(expression: &str) -> String {
277    if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
278        format!("({})", expression)
279    } else {
280        expression.to_string()
281    }
282}
283
284fn is_joined_expression(expression: &str) -> bool {
285    let upper = expression.to_ascii_uppercase();
286    upper.contains(" AND ") || upper.contains(" OR ") || upper.contains(" WITH ")
287}
288
289fn expression_covers(container: &str, contained: &str) -> bool {
290    let Ok(parsed_container) = parse_expression(container) else {
291        return false;
292    };
293    let Ok(parsed_contained) = parse_expression(contained) else {
294        return false;
295    };
296
297    let simplified_container = simplify_expression(&parsed_container);
298    let simplified_contained = simplify_expression(&parsed_contained);
299
300    expression_covers_ast(&simplified_container, &simplified_contained)
301}
302
303fn expression_covers_ast(container: &LicenseExpression, contained: &LicenseExpression) -> bool {
304    if expression_to_string(container) == expression_to_string(contained) {
305        return true;
306    }
307
308    match (container, contained) {
309        (LicenseExpression::And { .. }, LicenseExpression::And { .. }) => {
310            let container_args = flat_and_args(container);
311            let contained_args = flat_and_args(contained);
312            contained_args.iter().all(|contained_arg| {
313                container_args.iter().any(|container_arg| {
314                    expression_to_string(container_arg) == expression_to_string(contained_arg)
315                })
316            })
317        }
318        (LicenseExpression::Or { .. }, LicenseExpression::Or { .. }) => {
319            let container_args = flat_or_args(container);
320            let contained_args = flat_or_args(contained);
321            contained_args.iter().all(|contained_arg| {
322                container_args.iter().any(|container_arg| {
323                    expression_to_string(container_arg) == expression_to_string(contained_arg)
324                })
325            })
326        }
327        (LicenseExpression::And { .. }, _) => {
328            flat_and_args(container).iter().any(|container_arg| {
329                expression_to_string(container_arg) == expression_to_string(contained)
330            })
331        }
332        (LicenseExpression::Or { .. }, _) => flat_or_args(container).iter().any(|container_arg| {
333            expression_to_string(container_arg) == expression_to_string(contained)
334        }),
335        _ => false,
336    }
337}
338
339fn flat_and_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
340    let mut args = Vec::new();
341    collect_flat_args(expr, true, &mut args);
342    args
343}
344
345fn flat_or_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
346    let mut args = Vec::new();
347    collect_flat_args(expr, false, &mut args);
348    args
349}
350
351fn collect_flat_args<'a>(
352    expr: &'a LicenseExpression,
353    and_operator: bool,
354    args: &mut Vec<&'a LicenseExpression>,
355) {
356    match expr {
357        LicenseExpression::And { left, right } if and_operator => {
358            collect_flat_args(left, and_operator, args);
359            collect_flat_args(right, and_operator, args);
360        }
361        LicenseExpression::Or { left, right } if !and_operator => {
362            collect_flat_args(left, and_operator, args);
363            collect_flat_args(right, and_operator, args);
364        }
365        _ => args.push(expr),
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    #[test]
374    fn combine_license_expressions_preserves_spdx_case() {
375        let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
376
377        assert_eq!(result.as_deref(), Some("Apache-2.0 AND MIT"));
378    }
379
380    #[test]
381    fn combine_license_expressions_flattens_same_operator_parentheses() {
382        let result = combine_license_expressions(vec![
383            "MIT".to_string(),
384            "ICU".to_string(),
385            "Unicode-TOU".to_string(),
386        ]);
387
388        assert_eq!(result.as_deref(), Some("ICU AND MIT AND Unicode-TOU"));
389    }
390
391    #[test]
392    fn combine_license_expressions_does_not_absorb_with_expressions() {
393        let result = combine_license_expressions(vec![
394            "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
395            "GPL-2.0".to_string(),
396        ]);
397
398        assert_eq!(
399            result.as_deref(),
400            Some("GPL-2.0 AND GPL-2.0 WITH Classpath-exception-2.0")
401        );
402    }
403
404    #[test]
405    fn combine_license_expressions_simplifies_absorbed_and_expression() {
406        let result = combine_license_expressions(vec![
407            "Apache-2.0 OR MIT".to_string(),
408            "Apache-2.0".to_string(),
409        ]);
410
411        assert_eq!(result.as_deref(), Some("Apache-2.0"));
412    }
413
414    #[test]
415    fn combine_license_expressions_preserving_structure_keeps_distinct_nested_operands() {
416        let result = combine_license_expressions_preserving_structure(vec![
417            "MIT".to_string(),
418            "Apache-2.0 OR MIT".to_string(),
419        ]);
420
421        assert_eq!(result.as_deref(), Some("MIT AND (Apache-2.0 OR MIT)"));
422    }
423
424    #[test]
425    fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
426        let result = combine_license_expressions_with_relation(
427            vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
428            ExpressionRelation::Or,
429        );
430
431        assert_eq!(result.as_deref(), Some("MIT"));
432    }
433
434    #[test]
435    fn select_primary_license_expression_prefers_joined_expression_covering_fragment() {
436        let result = select_primary_license_expression(vec![
437            "Apache-2.0 OR MIT".to_string(),
438            "Apache-2.0".to_string(),
439        ]);
440
441        assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
442    }
443
444    #[test]
445    fn select_primary_license_expression_prefers_joined_expression_covering_all_singles() {
446        let result = select_primary_license_expression(vec![
447            "MIT".to_string(),
448            "Apache-2.0 OR MIT".to_string(),
449            "Apache-2.0".to_string(),
450        ]);
451
452        assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
453    }
454
455    #[test]
456    fn select_primary_license_expression_returns_none_when_joined_expression_does_not_cover_rest() {
457        let result = select_primary_license_expression(vec![
458            "Apache-2.0 OR MIT".to_string(),
459            "GPL-2.0-only".to_string(),
460        ]);
461
462        assert_eq!(result, None);
463    }
464}