Skip to main content

provenant/utils/
spdx.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5
6use crate::license_detection::expression::{
7    LicenseExpression, expression_to_string, parse_expression, simplify_expression,
8    simplify_expression_preserving_structure,
9};
10
11#[derive(Clone, Copy)]
12pub(crate) enum ExpressionRelation {
13    And,
14    Or,
15}
16
17#[derive(Clone, Copy)]
18enum BooleanOperator {
19    And,
20    Or,
21}
22
23pub fn combine_license_expressions(
24    expressions: impl IntoIterator<Item = String>,
25) -> Option<String> {
26    combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
27}
28
29pub fn combine_license_expressions_preserving_structure(
30    expressions: impl IntoIterator<Item = String>,
31) -> Option<String> {
32    combine_license_expressions_with_relation_and_mode(expressions, ExpressionRelation::And, true)
33}
34
35pub(crate) fn combine_license_expressions_preserving_structure_strict(
36    expressions: impl IntoIterator<Item = String>,
37) -> Option<String> {
38    combine_license_expressions_with_relation_and_mode_strict(
39        expressions,
40        ExpressionRelation::And,
41        true,
42    )
43}
44
45pub fn select_primary_license_expression(
46    expressions: impl IntoIterator<Item = String>,
47) -> Option<String> {
48    let mut unique = Vec::new();
49
50    for expression in expressions {
51        let trimmed = expression.trim();
52        if trimmed.is_empty() {
53            continue;
54        }
55
56        if !unique.iter().any(|existing: &String| existing == trimmed) {
57            unique.push(trimmed.to_string());
58        }
59    }
60
61    if unique.is_empty() {
62        return None;
63    }
64
65    if unique.len() == 1 {
66        return unique.into_iter().next();
67    }
68
69    let joined: Vec<String> = unique
70        .iter()
71        .filter(|expression| is_joined_expression(expression))
72        .cloned()
73        .collect();
74
75    if joined.len() != 1 {
76        return None;
77    }
78
79    let candidate = &joined[0];
80    unique
81        .iter()
82        .filter(|expression| *expression != candidate)
83        .all(|expression| expression_covers(candidate, expression))
84        .then(|| candidate.clone())
85}
86
87pub(crate) fn select_primary_license_expression_strict(
88    expressions: impl IntoIterator<Item = String>,
89) -> Option<String> {
90    let expressions: Vec<String> = expressions.into_iter().collect();
91    select_primary_license_expression(expressions).and_then(|expression| {
92        combine_license_expressions_preserving_structure_strict([expression])
93    })
94}
95
96pub(crate) fn combine_license_expressions_with_relation_preserving_structure_strict(
97    expressions: impl IntoIterator<Item = String>,
98    relation: ExpressionRelation,
99) -> Option<String> {
100    combine_license_expressions_with_relation_and_mode_strict(expressions, relation, true)
101}
102
103pub(crate) fn combine_license_expressions_with_relation(
104    expressions: impl IntoIterator<Item = String>,
105    relation: ExpressionRelation,
106) -> Option<String> {
107    combine_license_expressions_with_relation_and_mode(expressions, relation, false)
108}
109
110fn combine_license_expressions_with_relation_and_mode(
111    expressions: impl IntoIterator<Item = String>,
112    relation: ExpressionRelation,
113    preserve_structure: bool,
114) -> Option<String> {
115    let expressions: Vec<String> = expressions
116        .into_iter()
117        .map(|expression| expression.trim().to_string())
118        .filter(|expression| !expression.is_empty())
119        .collect();
120
121    if expressions.is_empty() {
122        return None;
123    }
124
125    combine_parsed_expressions(&expressions, relation, preserve_structure)
126        .or_else(|| combine_license_expressions_fallback(&expressions, relation))
127}
128
129fn combine_license_expressions_with_relation_and_mode_strict(
130    expressions: impl IntoIterator<Item = String>,
131    relation: ExpressionRelation,
132    preserve_structure: bool,
133) -> Option<String> {
134    let expressions: Vec<String> = expressions
135        .into_iter()
136        .map(|expression| expression.trim().to_string())
137        .filter(|expression| !expression.is_empty())
138        .collect();
139
140    if expressions.is_empty() {
141        return None;
142    }
143
144    combine_parsed_expressions(&expressions, relation, preserve_structure)
145}
146
147fn combine_parsed_expressions(
148    expressions: &[String],
149    relation: ExpressionRelation,
150    preserve_structure: bool,
151) -> Option<String> {
152    let mut case_map = HashMap::new();
153    let parsed_expressions: Vec<LicenseExpression> = expressions
154        .iter()
155        .map(|expression| {
156            collect_term_case(expression, &mut case_map);
157            parse_expression(expression).ok()
158        })
159        .collect::<Option<Vec<_>>>()?;
160
161    let combined = match relation {
162        ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
163        ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
164    }?;
165
166    let combined = if preserve_structure {
167        simplify_expression_preserving_structure(&combined)
168    } else {
169        simplify_expression(&combined)
170    };
171    Some(render_expression_with_case_map(&combined, &case_map))
172}
173
174fn combine_license_expressions_fallback(
175    expressions: &[String],
176    relation: ExpressionRelation,
177) -> Option<String> {
178    let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
179    if unique_expressions.is_empty() {
180        return None;
181    }
182
183    let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
184    sorted_expressions.sort();
185
186    let separator = match relation {
187        ExpressionRelation::And => " AND ",
188        ExpressionRelation::Or => " OR ",
189    };
190
191    Some(
192        sorted_expressions
193            .iter()
194            .map(|expr| wrap_compound_expression(expr))
195            .collect::<Vec<_>>()
196            .join(separator),
197    )
198}
199
200fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
201    let chars: Vec<char> = expression.chars().collect();
202    let mut pos = 0;
203
204    while pos < chars.len() {
205        let ch = chars[pos];
206        if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
207            pos += 1;
208            continue;
209        }
210
211        let start = pos;
212        while pos < chars.len()
213            && (chars[pos].is_alphanumeric()
214                || chars[pos] == '-'
215                || chars[pos] == '.'
216                || chars[pos] == '_'
217                || chars[pos] == '+')
218        {
219            pos += 1;
220        }
221
222        let term: String = chars[start..pos].iter().collect();
223        let upper = term.to_ascii_uppercase();
224        if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
225            continue;
226        }
227
228        case_map.entry(term.to_ascii_lowercase()).or_insert(term);
229    }
230}
231
232fn render_expression_with_case_map(
233    expression: &LicenseExpression,
234    case_map: &HashMap<String, String>,
235) -> String {
236    match expression {
237        LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
238            case_map.get(key).cloned().unwrap_or_else(|| key.clone())
239        }
240        LicenseExpression::And { .. } => {
241            render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
242        }
243        LicenseExpression::Or { .. } => {
244            render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
245        }
246        LicenseExpression::With { left, right } => format!(
247            "{} WITH {}",
248            render_expression_with_case_map(left, case_map),
249            render_expression_with_case_map(right, case_map)
250        ),
251    }
252}
253
254fn render_flat_boolean_chain(
255    expression: &LicenseExpression,
256    operator: BooleanOperator,
257    case_map: &HashMap<String, String>,
258) -> String {
259    let mut parts = Vec::new();
260    collect_boolean_chain(expression, operator, &mut parts);
261
262    let separator = match operator {
263        BooleanOperator::And => " AND ",
264        BooleanOperator::Or => " OR ",
265    };
266
267    parts
268        .into_iter()
269        .map(|part| render_boolean_operand(part, operator, case_map))
270        .collect::<Vec<_>>()
271        .join(separator)
272}
273
274fn collect_boolean_chain<'a>(
275    expression: &'a LicenseExpression,
276    operator: BooleanOperator,
277    parts: &mut Vec<&'a LicenseExpression>,
278) {
279    match (operator, expression) {
280        (BooleanOperator::And, LicenseExpression::And { left, right })
281        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
282            collect_boolean_chain(left, operator, parts);
283            collect_boolean_chain(right, operator, parts);
284        }
285        _ => parts.push(expression),
286    }
287}
288
289fn render_boolean_operand(
290    expression: &LicenseExpression,
291    parent_operator: BooleanOperator,
292    case_map: &HashMap<String, String>,
293) -> String {
294    match expression {
295        LicenseExpression::And { .. } => match parent_operator {
296            BooleanOperator::And => render_expression_with_case_map(expression, case_map),
297            BooleanOperator::Or => format!(
298                "({})",
299                render_expression_with_case_map(expression, case_map)
300            ),
301        },
302        LicenseExpression::Or { .. } => match parent_operator {
303            BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
304            BooleanOperator::And => format!(
305                "({})",
306                render_expression_with_case_map(expression, case_map)
307            ),
308        },
309        _ => render_expression_with_case_map(expression, case_map),
310    }
311}
312
313fn wrap_compound_expression(expression: &str) -> String {
314    if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
315        format!("({})", expression)
316    } else {
317        expression.to_string()
318    }
319}
320
321fn is_joined_expression(expression: &str) -> bool {
322    let upper = expression.to_ascii_uppercase();
323    upper.contains(" AND ") || upper.contains(" OR ") || upper.contains(" WITH ")
324}
325
326fn expression_covers(container: &str, contained: &str) -> bool {
327    let Ok(parsed_container) = parse_expression(container) else {
328        return false;
329    };
330    let Ok(parsed_contained) = parse_expression(contained) else {
331        return false;
332    };
333
334    let simplified_container = simplify_expression(&parsed_container);
335    let simplified_contained = simplify_expression(&parsed_contained);
336
337    expression_covers_ast(&simplified_container, &simplified_contained)
338}
339
340fn expression_covers_ast(container: &LicenseExpression, contained: &LicenseExpression) -> bool {
341    if expression_to_string(container) == expression_to_string(contained) {
342        return true;
343    }
344
345    match (container, contained) {
346        (LicenseExpression::And { .. }, LicenseExpression::And { .. }) => {
347            let container_args = flat_and_args(container);
348            let contained_args = flat_and_args(contained);
349            contained_args.iter().all(|contained_arg| {
350                container_args.iter().any(|container_arg| {
351                    expression_to_string(container_arg) == expression_to_string(contained_arg)
352                })
353            })
354        }
355        (LicenseExpression::Or { .. }, LicenseExpression::Or { .. }) => {
356            let container_args = flat_or_args(container);
357            let contained_args = flat_or_args(contained);
358            contained_args.iter().all(|contained_arg| {
359                container_args.iter().any(|container_arg| {
360                    expression_to_string(container_arg) == expression_to_string(contained_arg)
361                })
362            })
363        }
364        (LicenseExpression::And { .. }, _) => {
365            flat_and_args(container).iter().any(|container_arg| {
366                expression_to_string(container_arg) == expression_to_string(contained)
367            })
368        }
369        (LicenseExpression::Or { .. }, _) => flat_or_args(container).iter().any(|container_arg| {
370            expression_to_string(container_arg) == expression_to_string(contained)
371        }),
372        _ => false,
373    }
374}
375
376fn flat_and_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
377    let mut args = Vec::new();
378    collect_flat_args(expr, true, &mut args);
379    args
380}
381
382fn flat_or_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
383    let mut args = Vec::new();
384    collect_flat_args(expr, false, &mut args);
385    args
386}
387
388fn collect_flat_args<'a>(
389    expr: &'a LicenseExpression,
390    and_operator: bool,
391    args: &mut Vec<&'a LicenseExpression>,
392) {
393    match expr {
394        LicenseExpression::And { left, right } if and_operator => {
395            collect_flat_args(left, and_operator, args);
396            collect_flat_args(right, and_operator, args);
397        }
398        LicenseExpression::Or { left, right } if !and_operator => {
399            collect_flat_args(left, and_operator, args);
400            collect_flat_args(right, and_operator, args);
401        }
402        _ => args.push(expr),
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn combine_license_expressions_preserves_spdx_case() {
412        let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
413
414        assert_eq!(result.as_deref(), Some("Apache-2.0 AND MIT"));
415    }
416
417    #[test]
418    fn combine_license_expressions_flattens_same_operator_parentheses() {
419        let result = combine_license_expressions(vec![
420            "MIT".to_string(),
421            "ICU".to_string(),
422            "Unicode-TOU".to_string(),
423        ]);
424
425        assert_eq!(result.as_deref(), Some("ICU AND MIT AND Unicode-TOU"));
426    }
427
428    #[test]
429    fn combine_license_expressions_does_not_absorb_with_expressions() {
430        let result = combine_license_expressions(vec![
431            "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
432            "GPL-2.0".to_string(),
433        ]);
434
435        assert_eq!(
436            result.as_deref(),
437            Some("GPL-2.0 AND GPL-2.0 WITH Classpath-exception-2.0")
438        );
439    }
440
441    #[test]
442    fn combine_license_expressions_simplifies_absorbed_and_expression() {
443        let result = combine_license_expressions(vec![
444            "Apache-2.0 OR MIT".to_string(),
445            "Apache-2.0".to_string(),
446        ]);
447
448        assert_eq!(result.as_deref(), Some("Apache-2.0"));
449    }
450
451    #[test]
452    fn combine_license_expressions_preserving_structure_keeps_distinct_nested_operands() {
453        let result = combine_license_expressions_preserving_structure(vec![
454            "MIT".to_string(),
455            "Apache-2.0 OR MIT".to_string(),
456        ]);
457
458        assert_eq!(result.as_deref(), Some("MIT AND (Apache-2.0 OR MIT)"));
459    }
460
461    #[test]
462    fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
463        let result = combine_license_expressions_with_relation(
464            vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
465            ExpressionRelation::Or,
466        );
467
468        assert_eq!(result.as_deref(), Some("MIT"));
469    }
470
471    #[test]
472    fn select_primary_license_expression_prefers_joined_expression_covering_fragment() {
473        let result = select_primary_license_expression(vec![
474            "Apache-2.0 OR MIT".to_string(),
475            "Apache-2.0".to_string(),
476        ]);
477
478        assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
479    }
480
481    #[test]
482    fn select_primary_license_expression_prefers_joined_expression_covering_all_singles() {
483        let result = select_primary_license_expression(vec![
484            "MIT".to_string(),
485            "Apache-2.0 OR MIT".to_string(),
486            "Apache-2.0".to_string(),
487        ]);
488
489        assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
490    }
491
492    #[test]
493    fn select_primary_license_expression_returns_none_when_joined_expression_does_not_cover_rest() {
494        let result = select_primary_license_expression(vec![
495            "Apache-2.0 OR MIT".to_string(),
496            "GPL-2.0-only".to_string(),
497        ]);
498
499        assert_eq!(result, None);
500    }
501
502    #[test]
503    fn combine_license_expressions_preserving_structure_strict_rejects_invalid_expression() {
504        let result = combine_license_expressions_preserving_structure_strict(vec![
505            "Apache-2.0".to_string(),
506            "MIT\" or malformed".to_string(),
507        ]);
508
509        assert_eq!(result, None);
510    }
511
512    #[test]
513    fn select_primary_license_expression_strict_rejects_invalid_primary_expression() {
514        let result =
515            select_primary_license_expression_strict(vec!["MIT\" or malformed".to_string()]);
516
517        assert_eq!(result, None);
518    }
519}