Skip to main content

provenant/license_detection/expression/
simplify.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression simplification and utilities.
5
6use std::collections::HashSet;
7
8use super::{LicenseExpression, ParseError};
9
10/// Simplify a license expression by deduplicating and reducing boolean clauses.
11///
12/// # Arguments
13/// * `expr` - The expression to simplify
14///
15/// # Returns
16/// Simplified expression with duplicate and subsumed licenses removed,
17/// using deterministic canonical ordering for boolean operand chains.
18pub fn simplify_expression(expr: &LicenseExpression) -> LicenseExpression {
19    canonicalize_expression(expr, true)
20}
21
22/// Canonicalize a license expression while preserving non-identical operands.
23///
24/// This still flattens boolean chains, removes exact duplicates, and sorts
25/// operands deterministically, but it deliberately avoids absorption such as
26/// `MIT AND (Apache-2.0 OR MIT) -> MIT`.
27pub fn simplify_expression_preserving_structure(expr: &LicenseExpression) -> LicenseExpression {
28    canonicalize_expression(expr, false)
29}
30
31fn canonicalize_expression(
32    expr: &LicenseExpression,
33    prune_subsumed_operands_enabled: bool,
34) -> LicenseExpression {
35    match expr {
36        LicenseExpression::License(key) => LicenseExpression::License(key.clone()),
37        LicenseExpression::LicenseRef(key) => LicenseExpression::LicenseRef(key.clone()),
38        LicenseExpression::With { left, right } => LicenseExpression::With {
39            left: Box::new(canonicalize_expression(
40                left,
41                prune_subsumed_operands_enabled,
42            )),
43            right: Box::new(canonicalize_expression(
44                right,
45                prune_subsumed_operands_enabled,
46            )),
47        },
48        LicenseExpression::And { .. } => {
49            let mut unique = Vec::new();
50            let mut seen = HashSet::new();
51            collect_unique_and(
52                expr,
53                &mut unique,
54                &mut seen,
55                prune_subsumed_operands_enabled,
56            );
57            if prune_subsumed_operands_enabled {
58                prune_subsumed_operands(&mut unique, true);
59                sort_operands_canonically(&mut unique);
60            }
61            build_expression_from_list(&unique, true)
62        }
63        LicenseExpression::Or { .. } => {
64            let mut unique = Vec::new();
65            let mut seen = HashSet::new();
66            collect_unique_or(
67                expr,
68                &mut unique,
69                &mut seen,
70                prune_subsumed_operands_enabled,
71            );
72            if prune_subsumed_operands_enabled {
73                prune_subsumed_operands(&mut unique, false);
74                sort_operands_canonically(&mut unique);
75            }
76            build_expression_from_list(&unique, false)
77        }
78    }
79}
80
81fn sort_operands_canonically(operands: &mut [LicenseExpression]) {
82    operands.sort_by_cached_key(canonical_operand_sort_key);
83}
84
85fn canonical_operand_sort_key(expr: &LicenseExpression) -> String {
86    expression_to_string(expr).to_ascii_lowercase()
87}
88
89fn prune_subsumed_operands(operands: &mut Vec<LicenseExpression>, outer_is_and: bool) {
90    let inner_is_and = !outer_is_and;
91    let pruned: Vec<LicenseExpression> = operands
92        .iter()
93        .enumerate()
94        .filter(|(candidate_idx, candidate)| {
95            !operands.iter().enumerate().any(|(other_idx, other)| {
96                candidate_idx != &other_idx && operand_subsumes(other, candidate, inner_is_and)
97            })
98        })
99        .map(|(_, operand)| operand.clone())
100        .collect();
101
102    *operands = pruned;
103}
104
105fn operand_subsumes(
106    other: &LicenseExpression,
107    candidate: &LicenseExpression,
108    inner_is_and: bool,
109) -> bool {
110    let other_args = get_flat_args(other);
111    let candidate_args = get_flat_args(candidate);
112
113    if other_args.len() >= candidate_args.len() {
114        return false;
115    }
116
117    let relevant_operator = matches!(other, LicenseExpression::And { .. })
118        || matches!(other, LicenseExpression::Or { .. })
119        || matches!(candidate, LicenseExpression::And { .. })
120        || matches!(candidate, LicenseExpression::Or { .. });
121
122    if !relevant_operator {
123        return false;
124    }
125
126    let operator_matches = if inner_is_and {
127        matches!(candidate, LicenseExpression::And { .. })
128            || matches!(other, LicenseExpression::And { .. })
129    } else {
130        matches!(candidate, LicenseExpression::Or { .. })
131            || matches!(other, LicenseExpression::Or { .. })
132    };
133
134    if !operator_matches {
135        return false;
136    }
137
138    other_args.iter().all(|other_arg| {
139        candidate_args
140            .iter()
141            .any(|arg| expressions_equal(arg, other_arg))
142    })
143}
144
145fn collect_unique_and(
146    expr: &LicenseExpression,
147    unique: &mut Vec<LicenseExpression>,
148    seen: &mut HashSet<String>,
149    prune_subsumed_operands_enabled: bool,
150) {
151    match expr {
152        LicenseExpression::And { left, right } => {
153            collect_unique_and(left, unique, seen, prune_subsumed_operands_enabled);
154            collect_unique_and(right, unique, seen, prune_subsumed_operands_enabled);
155        }
156        LicenseExpression::Or { .. } => {
157            let simplified = canonicalize_expression(expr, prune_subsumed_operands_enabled);
158            let key = expression_to_string(&simplified);
159            if !seen.contains(&key) {
160                seen.insert(key);
161                unique.push(simplified);
162            }
163        }
164        LicenseExpression::With { left, right } => {
165            let simplified = LicenseExpression::With {
166                left: Box::new(canonicalize_expression(
167                    left,
168                    prune_subsumed_operands_enabled,
169                )),
170                right: Box::new(canonicalize_expression(
171                    right,
172                    prune_subsumed_operands_enabled,
173                )),
174            };
175            let key = expression_to_string(&simplified);
176            if !seen.contains(&key) {
177                seen.insert(key);
178                unique.push(simplified);
179            }
180        }
181        LicenseExpression::License(key) => {
182            if !seen.contains(key) {
183                seen.insert(key.clone());
184                unique.push(LicenseExpression::License(key.clone()));
185            }
186        }
187        LicenseExpression::LicenseRef(key) => {
188            if !seen.contains(key) {
189                seen.insert(key.clone());
190                unique.push(LicenseExpression::LicenseRef(key.clone()));
191            }
192        }
193    }
194}
195
196fn collect_unique_or(
197    expr: &LicenseExpression,
198    unique: &mut Vec<LicenseExpression>,
199    seen: &mut HashSet<String>,
200    prune_subsumed_operands_enabled: bool,
201) {
202    match expr {
203        LicenseExpression::Or { left, right } => {
204            collect_unique_or(left, unique, seen, prune_subsumed_operands_enabled);
205            collect_unique_or(right, unique, seen, prune_subsumed_operands_enabled);
206        }
207        LicenseExpression::And { .. } => {
208            let simplified = canonicalize_expression(expr, prune_subsumed_operands_enabled);
209            let key = expression_to_string(&simplified);
210            if !seen.contains(&key) {
211                seen.insert(key);
212                unique.push(simplified);
213            }
214        }
215        LicenseExpression::With { left, right } => {
216            let simplified = LicenseExpression::With {
217                left: Box::new(canonicalize_expression(
218                    left,
219                    prune_subsumed_operands_enabled,
220                )),
221                right: Box::new(canonicalize_expression(
222                    right,
223                    prune_subsumed_operands_enabled,
224                )),
225            };
226            let key = expression_to_string(&simplified);
227            if !seen.contains(&key) {
228                seen.insert(key);
229                unique.push(simplified);
230            }
231        }
232        LicenseExpression::License(key) => {
233            if !seen.contains(key) {
234                seen.insert(key.clone());
235                unique.push(LicenseExpression::License(key.clone()));
236            }
237        }
238        LicenseExpression::LicenseRef(key) => {
239            if !seen.contains(key) {
240                seen.insert(key.clone());
241                unique.push(LicenseExpression::LicenseRef(key.clone()));
242            }
243        }
244    }
245}
246
247fn build_expression_from_list(unique: &[LicenseExpression], is_and: bool) -> LicenseExpression {
248    match unique.len() {
249        0 => panic!("build_expression_from_list called with empty list"),
250        1 => unique[0].clone(),
251        _ => {
252            let midpoint = unique.len() / 2;
253            let left = build_expression_from_list(&unique[..midpoint], is_and);
254            let right = build_expression_from_list(&unique[midpoint..], is_and);
255            if is_and {
256                LicenseExpression::And {
257                    left: Box::new(left),
258                    right: Box::new(right),
259                }
260            } else {
261                LicenseExpression::Or {
262                    left: Box::new(left),
263                    right: Box::new(right),
264                }
265            }
266        }
267    }
268}
269
270fn get_flat_args(expr: &LicenseExpression) -> Vec<LicenseExpression> {
271    match expr {
272        LicenseExpression::And { left, right } => {
273            let mut args = Vec::new();
274            collect_flat_and_args(left, &mut args);
275            collect_flat_and_args(right, &mut args);
276            args
277        }
278        LicenseExpression::Or { left, right } => {
279            let mut args = Vec::new();
280            collect_flat_or_args(left, &mut args);
281            collect_flat_or_args(right, &mut args);
282            args
283        }
284        _ => vec![expr.clone()],
285    }
286}
287
288fn collect_flat_and_args(expr: &LicenseExpression, args: &mut Vec<LicenseExpression>) {
289    match expr {
290        LicenseExpression::And { left, right } => {
291            collect_flat_and_args(left, args);
292            collect_flat_and_args(right, args);
293        }
294        _ => args.push(expr.clone()),
295    }
296}
297
298fn collect_flat_or_args(expr: &LicenseExpression, args: &mut Vec<LicenseExpression>) {
299    match expr {
300        LicenseExpression::Or { left, right } => {
301            collect_flat_or_args(left, args);
302            collect_flat_or_args(right, args);
303        }
304        _ => args.push(expr.clone()),
305    }
306}
307
308fn decompose_expr(expr: &LicenseExpression) -> Vec<LicenseExpression> {
309    match expr {
310        LicenseExpression::With { left, right } => {
311            let mut parts = decompose_expr(left);
312            parts.extend(decompose_expr(right));
313            parts
314        }
315        _ => vec![expr.clone()],
316    }
317}
318
319fn expressions_equal(a: &LicenseExpression, b: &LicenseExpression) -> bool {
320    match (a, b) {
321        (LicenseExpression::License(ka), LicenseExpression::License(kb)) => ka == kb,
322        (LicenseExpression::LicenseRef(ka), LicenseExpression::LicenseRef(kb)) => ka == kb,
323        (
324            LicenseExpression::With {
325                left: l1,
326                right: r1,
327            },
328            LicenseExpression::With {
329                left: l2,
330                right: r2,
331            },
332        ) => expressions_equal(l1, l2) && expressions_equal(r1, r2),
333        (LicenseExpression::And { .. }, LicenseExpression::And { .. }) => {
334            let args_a = get_flat_args(a);
335            let args_b = get_flat_args(b);
336            args_a.len() == args_b.len()
337                && args_b
338                    .iter()
339                    .all(|b_arg| args_a.iter().any(|a_arg| expressions_equal(a_arg, b_arg)))
340        }
341        (LicenseExpression::Or { .. }, LicenseExpression::Or { .. }) => {
342            let args_a = get_flat_args(a);
343            let args_b = get_flat_args(b);
344            args_a.len() == args_b.len()
345                && args_b
346                    .iter()
347                    .all(|b_arg| args_a.iter().any(|a_arg| expressions_equal(a_arg, b_arg)))
348        }
349        _ => false,
350    }
351}
352
353fn expr_in_args(expr: &LicenseExpression, args: &[LicenseExpression]) -> bool {
354    if args.iter().any(|a| expressions_equal(a, expr)) {
355        return true;
356    }
357    let decomposed = decompose_expr(expr);
358    if decomposed.len() == 1 {
359        return false;
360    }
361    decomposed
362        .iter()
363        .any(|d| args.iter().any(|a| expressions_equal(a, d)))
364}
365
366pub fn licensing_contains(container: &str, contained: &str) -> bool {
367    let container = container.trim();
368    let contained = contained.trim();
369    if container.is_empty() || contained.is_empty() {
370        return false;
371    }
372
373    if container == contained {
374        return true;
375    }
376
377    let Ok(parsed_container) = super::parse::parse_expression(container) else {
378        return false;
379    };
380    let Ok(parsed_contained) = super::parse::parse_expression(contained) else {
381        return false;
382    };
383
384    let simplified_container = simplify_expression(&parsed_container);
385    let simplified_contained = simplify_expression(&parsed_contained);
386
387    match (&simplified_container, &simplified_contained) {
388        (LicenseExpression::And { .. }, LicenseExpression::And { .. })
389        | (LicenseExpression::Or { .. }, LicenseExpression::Or { .. }) => {
390            let container_args = get_flat_args(&simplified_container);
391            let contained_args = get_flat_args(&simplified_contained);
392            contained_args
393                .iter()
394                .all(|c| container_args.iter().any(|ca| expressions_equal(ca, c)))
395        }
396        (
397            LicenseExpression::And { .. } | LicenseExpression::Or { .. },
398            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_),
399        ) => {
400            let container_args = get_flat_args(&simplified_container);
401            expr_in_args(&simplified_contained, &container_args)
402        }
403        (LicenseExpression::And { .. } | LicenseExpression::Or { .. }, _) => {
404            let container_args = get_flat_args(&simplified_container);
405            container_args
406                .iter()
407                .any(|ca| expressions_equal(ca, &simplified_contained))
408        }
409        (
410            LicenseExpression::With { .. },
411            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_),
412        ) => {
413            let decomposed = decompose_expr(&simplified_container);
414            decomposed
415                .iter()
416                .any(|d| expressions_equal(d, &simplified_contained))
417        }
418        (
419            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_),
420            LicenseExpression::And { .. }
421            | LicenseExpression::Or { .. }
422            | LicenseExpression::With { .. },
423        ) => false,
424        (LicenseExpression::License(k1), LicenseExpression::License(k2)) => k1 == k2,
425        (LicenseExpression::LicenseRef(k1), LicenseExpression::LicenseRef(k2)) => k1 == k2,
426        _ => false,
427    }
428}
429
430/// # Returns
431/// String representation of the expression
432///
433/// # Parentheses
434/// Parentheses are added when needed to preserve semantic meaning based on
435/// operator precedence (WITH > AND > OR). This matches the Python
436/// license-expression library behavior.
437/// Convert a license expression to its string representation.
438#[derive(Clone, Copy)]
439enum BooleanOperator {
440    And,
441    Or,
442}
443
444pub fn expression_to_string(expr: &LicenseExpression) -> String {
445    match expr {
446        LicenseExpression::License(key) => key.clone(),
447        LicenseExpression::LicenseRef(key) => key.clone(),
448        LicenseExpression::And { .. } => render_flat_boolean_chain(expr, BooleanOperator::And),
449        LicenseExpression::Or { .. } => render_flat_boolean_chain(expr, BooleanOperator::Or),
450        LicenseExpression::With { left, right } => {
451            let left_str = expression_to_string(left);
452            let right_str = expression_to_string(right);
453            format!("{} WITH {}", left_str, right_str)
454        }
455    }
456}
457
458fn render_flat_boolean_chain(expr: &LicenseExpression, operator: BooleanOperator) -> String {
459    let mut parts = Vec::new();
460    collect_boolean_chain(expr, operator, &mut parts);
461
462    let separator = match operator {
463        BooleanOperator::And => " AND ",
464        BooleanOperator::Or => " OR ",
465    };
466
467    parts
468        .into_iter()
469        .map(|part| render_boolean_operand(part, operator))
470        .collect::<Vec<_>>()
471        .join(separator)
472}
473
474fn collect_boolean_chain<'a>(
475    expr: &'a LicenseExpression,
476    operator: BooleanOperator,
477    parts: &mut Vec<&'a LicenseExpression>,
478) {
479    match (operator, expr) {
480        (BooleanOperator::And, LicenseExpression::And { left, right })
481        | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
482            collect_boolean_chain(left, operator, parts);
483            collect_boolean_chain(right, operator, parts);
484        }
485        _ => parts.push(expr),
486    }
487}
488
489fn render_boolean_operand(expr: &LicenseExpression, parent_operator: BooleanOperator) -> String {
490    match expr {
491        LicenseExpression::License(key) => key.clone(),
492        LicenseExpression::LicenseRef(key) => key.clone(),
493        LicenseExpression::And { .. } => match parent_operator {
494            BooleanOperator::And => expression_to_string(expr),
495            BooleanOperator::Or => format!("({})", expression_to_string(expr)),
496        },
497        LicenseExpression::Or { .. } => match parent_operator {
498            BooleanOperator::Or => expression_to_string(expr),
499            BooleanOperator::And => format!("({})", expression_to_string(expr)),
500        },
501        LicenseExpression::With { left, right } => {
502            let left_str = expression_to_string(left);
503            let right_str = expression_to_string(right);
504            format!("{} WITH {}", left_str, right_str)
505        }
506    }
507}
508
509fn combine_expressions_with(
510    expressions: &[&str],
511    unique: bool,
512    combiner: fn(Vec<LicenseExpression>) -> Option<LicenseExpression>,
513    simplifier: fn(&LicenseExpression) -> LicenseExpression,
514) -> Result<String, ParseError> {
515    if expressions.is_empty() {
516        return Ok(String::new());
517    }
518    if expressions.len() == 1 {
519        let parsed = super::parse::parse_expression(expressions[0])?;
520        return Ok(expression_to_string(&if unique {
521            simplifier(&parsed)
522        } else {
523            parsed
524        }));
525    }
526
527    let parsed_exprs: Vec<LicenseExpression> = expressions
528        .iter()
529        .map(|e| super::parse::parse_expression(e))
530        .collect::<Result<Vec<_>, _>>()?;
531
532    let combined = combiner(parsed_exprs);
533
534    match combined {
535        Some(expr) => {
536            let final_expr = if unique { simplifier(&expr) } else { expr };
537            Ok(expression_to_string(&final_expr))
538        }
539        None => Ok(String::new()),
540    }
541}
542
543/// Combine multiple license expressions with `AND`.
544///
545/// This function parses each expression string, combines them with `AND`, and
546/// optionally deduplicates license keys.
547pub fn combine_expressions_and(expressions: &[&str], unique: bool) -> Result<String, ParseError> {
548    combine_expressions_with(
549        expressions,
550        unique,
551        LicenseExpression::and,
552        simplify_expression,
553    )
554}
555
556/// Combine multiple license expressions with `AND` while preserving the
557/// original boolean structure of distinct operands.
558pub fn combine_expressions_and_preserving_structure(
559    expressions: &[&str],
560    unique: bool,
561) -> Result<String, ParseError> {
562    combine_expressions_with(
563        expressions,
564        unique,
565        LicenseExpression::and,
566        simplify_expression_preserving_structure,
567    )
568}
569
570/// Combine multiple license expressions with `OR`.
571///
572/// This function parses each expression string, combines them with `OR`, and
573/// optionally deduplicates license keys.
574#[allow(dead_code)]
575pub fn combine_expressions_or(expressions: &[&str], unique: bool) -> Result<String, ParseError> {
576    combine_expressions_with(
577        expressions,
578        unique,
579        LicenseExpression::or,
580        simplify_expression,
581    )
582}
583
584/// Combine multiple license expressions with `OR` while preserving the
585/// original boolean structure of distinct operands.
586pub fn combine_expressions_or_preserving_structure(
587    expressions: &[&str],
588    unique: bool,
589) -> Result<String, ParseError> {
590    combine_expressions_with(
591        expressions,
592        unique,
593        LicenseExpression::or,
594        simplify_expression_preserving_structure,
595    )
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601
602    fn expression_depth(expr: &LicenseExpression) -> usize {
603        match expr {
604            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_) => 1,
605            LicenseExpression::And { left, right }
606            | LicenseExpression::Or { left, right }
607            | LicenseExpression::With { left, right } => {
608                1 + expression_depth(left).max(expression_depth(right))
609            }
610        }
611    }
612
613    #[test]
614    fn test_simplify_expression_no_change() {
615        let expr = super::super::parse::parse_expression("MIT AND Apache-2.0").unwrap();
616        let simplified = simplify_expression(&expr);
617        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
618    }
619
620    #[test]
621    fn test_simplify_expression_with_duplicates() {
622        let expr = super::super::parse::parse_expression("MIT OR MIT").unwrap();
623        let simplified = simplify_expression(&expr);
624        assert_eq!(expression_to_string(&simplified), "mit");
625    }
626
627    #[test]
628    fn test_simplify_expression_preserving_structure_keeps_distinct_nested_operands() {
629        let expr = super::super::parse::parse_expression("mit AND (apache-2.0 OR mit)").unwrap();
630        let simplified = simplify_expression_preserving_structure(&expr);
631        assert_eq!(
632            expression_to_string(&simplified),
633            "mit AND (apache-2.0 OR mit)"
634        );
635    }
636
637    #[test]
638    fn test_simplify_and_duplicates() {
639        let expr = super::super::parse::parse_expression("crapl-0.1 AND crapl-0.1").unwrap();
640        let simplified = simplify_expression(&expr);
641        assert_eq!(expression_to_string(&simplified), "crapl-0.1");
642    }
643
644    #[test]
645    fn test_simplify_or_duplicates() {
646        let expr = super::super::parse::parse_expression("mit OR mit").unwrap();
647        let simplified = simplify_expression(&expr);
648        assert_eq!(expression_to_string(&simplified), "mit");
649    }
650
651    #[test]
652    fn test_combine_expressions_and_preserving_structure_keeps_distinct_nested_operands() {
653        let result =
654            combine_expressions_and_preserving_structure(&["mit", "apache-2.0 OR mit"], true)
655                .unwrap();
656        assert_eq!(result, "mit AND (apache-2.0 OR mit)");
657    }
658
659    #[test]
660    fn test_simplify_preserves_different_licenses() {
661        let expr = super::super::parse::parse_expression("mit AND apache-2.0").unwrap();
662        let simplified = simplify_expression(&expr);
663        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
664    }
665
666    #[test]
667    fn test_simplify_complex_duplicates() {
668        let expr = super::super::parse::parse_expression(
669            "gpl-2.0-plus AND gpl-2.0-plus AND lgpl-2.0-plus",
670        )
671        .unwrap();
672        let simplified = simplify_expression(&expr);
673        assert_eq!(
674            expression_to_string(&simplified),
675            "gpl-2.0-plus AND lgpl-2.0-plus"
676        );
677    }
678
679    #[test]
680    fn test_simplify_three_duplicates() {
681        let expr =
682            super::super::parse::parse_expression("fsf-free AND fsf-free AND fsf-free").unwrap();
683        let simplified = simplify_expression(&expr);
684        assert_eq!(expression_to_string(&simplified), "fsf-free");
685    }
686
687    #[test]
688    fn test_simplify_with_expression_dedup() {
689        let expr = super::super::parse::parse_expression(
690            "gpl-2.0 WITH classpath-exception-2.0 AND gpl-2.0 WITH classpath-exception-2.0",
691        )
692        .unwrap();
693        let simplified = simplify_expression(&expr);
694        assert_eq!(
695            expression_to_string(&simplified),
696            "gpl-2.0 WITH classpath-exception-2.0"
697        );
698    }
699
700    #[test]
701    fn test_simplify_nested_duplicates() {
702        let expr =
703            super::super::parse::parse_expression("(mit AND apache-2.0) OR (mit AND apache-2.0)")
704                .unwrap();
705        let simplified = simplify_expression(&expr);
706        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
707    }
708
709    #[test]
710    fn test_simplify_sorts_operands_canonically() {
711        let expr =
712            super::super::parse::parse_expression("apache-2.0 AND mit AND apache-2.0").unwrap();
713        let simplified = simplify_expression(&expr);
714        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
715    }
716
717    #[test]
718    fn test_simplify_mit_and_mit_and_apache() {
719        let expr = super::super::parse::parse_expression("mit AND mit AND apache-2.0").unwrap();
720        let simplified = simplify_expression(&expr);
721        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
722    }
723
724    #[test]
725    fn test_simplify_and_absorption() {
726        let expr = super::super::parse::parse_expression("mit AND (mit OR apache-2.0)").unwrap();
727        let simplified = simplify_expression(&expr);
728
729        assert_eq!(expression_to_string(&simplified), "mit");
730    }
731
732    #[test]
733    fn test_simplify_or_absorption() {
734        let expr = super::super::parse::parse_expression("mit OR (mit AND apache-2.0)").unwrap();
735        let simplified = simplify_expression(&expr);
736
737        assert_eq!(expression_to_string(&simplified), "mit");
738    }
739
740    #[test]
741    fn test_simplify_or_subsumption() {
742        let expr = super::super::parse::parse_expression(
743            "(mit AND apache-2.0) OR (mit AND apache-2.0 AND bsd-new)",
744        )
745        .unwrap();
746        let simplified = simplify_expression(&expr);
747
748        assert_eq!(expression_to_string(&simplified), "apache-2.0 AND mit");
749    }
750
751    #[test]
752    fn test_simplify_and_subsumption() {
753        let expr = super::super::parse::parse_expression(
754            "(mit OR apache-2.0) AND (mit OR apache-2.0 OR bsd-new)",
755        )
756        .unwrap();
757        let simplified = simplify_expression(&expr);
758
759        assert_eq!(expression_to_string(&simplified), "apache-2.0 OR mit");
760    }
761
762    #[test]
763    fn test_simplify_and_keeps_gpl_or_later_with_only() {
764        let expr =
765            super::super::parse::parse_expression("gpl-2.0-or-later AND gpl-2.0-only").unwrap();
766        let simplified = simplify_expression(&expr);
767
768        assert_eq!(
769            expression_to_string(&simplified),
770            "gpl-2.0-only AND gpl-2.0-or-later"
771        );
772    }
773
774    #[test]
775    fn test_expression_to_string_simple() {
776        let expr = LicenseExpression::License("mit".to_string());
777        assert_eq!(expression_to_string(&expr), "mit");
778    }
779
780    #[test]
781    fn test_expression_to_string_and() {
782        let expr = LicenseExpression::And {
783            left: Box::new(LicenseExpression::License("mit".to_string())),
784            right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
785        };
786        assert_eq!(expression_to_string(&expr), "mit AND apache-2.0");
787    }
788
789    #[test]
790    fn test_expression_to_string_or() {
791        let expr = LicenseExpression::Or {
792            left: Box::new(LicenseExpression::License("mit".to_string())),
793            right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
794        };
795        assert_eq!(expression_to_string(&expr), "mit OR apache-2.0");
796    }
797
798    #[test]
799    fn test_expression_to_string_with() {
800        let expr = LicenseExpression::With {
801            left: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
802            right: Box::new(LicenseExpression::License(
803                "classpath-exception-2.0".to_string(),
804            )),
805        };
806        assert_eq!(
807            expression_to_string(&expr),
808            "gpl-2.0 WITH classpath-exception-2.0"
809        );
810    }
811
812    #[test]
813    fn test_expression_to_string_licenseref() {
814        let expr = LicenseExpression::LicenseRef("licenseref-scancode-custom".to_string());
815        assert_eq!(expression_to_string(&expr), "licenseref-scancode-custom");
816    }
817
818    #[test]
819    fn test_expression_to_string_or_inside_and() {
820        let or_expr = LicenseExpression::Or {
821            left: Box::new(LicenseExpression::License("mit".to_string())),
822            right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
823        };
824        let and_expr = LicenseExpression::And {
825            left: Box::new(or_expr),
826            right: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
827        };
828        assert_eq!(
829            expression_to_string(&and_expr),
830            "(mit OR apache-2.0) AND gpl-2.0"
831        );
832    }
833
834    #[test]
835    fn test_expression_to_string_and_inside_or() {
836        let and_expr = LicenseExpression::And {
837            left: Box::new(LicenseExpression::License("mit".to_string())),
838            right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
839        };
840        let or_expr = LicenseExpression::Or {
841            left: Box::new(and_expr),
842            right: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
843        };
844        assert_eq!(
845            expression_to_string(&or_expr),
846            "(mit AND apache-2.0) OR gpl-2.0"
847        );
848    }
849
850    #[test]
851    fn test_expression_to_string_with_inside_or() {
852        let with_expr = LicenseExpression::With {
853            left: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
854            right: Box::new(LicenseExpression::License(
855                "classpath-exception-2.0".to_string(),
856            )),
857        };
858        let or_expr = LicenseExpression::Or {
859            left: Box::new(with_expr),
860            right: Box::new(LicenseExpression::License("mit".to_string())),
861        };
862        assert_eq!(
863            expression_to_string(&or_expr),
864            "gpl-2.0 WITH classpath-exception-2.0 OR mit"
865        );
866    }
867
868    #[test]
869    fn test_expression_to_string_with_inside_and() {
870        let with_expr = LicenseExpression::With {
871            left: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
872            right: Box::new(LicenseExpression::License(
873                "classpath-exception-2.0".to_string(),
874            )),
875        };
876        let and_expr = LicenseExpression::And {
877            left: Box::new(with_expr),
878            right: Box::new(LicenseExpression::License("mit".to_string())),
879        };
880        assert_eq!(
881            expression_to_string(&and_expr),
882            "gpl-2.0 WITH classpath-exception-2.0 AND mit"
883        );
884    }
885
886    #[test]
887    fn test_expression_to_string_nested_or_flattens_same_operator_grouping() {
888        let or_expr = LicenseExpression::Or {
889            left: Box::new(LicenseExpression::Or {
890                left: Box::new(LicenseExpression::License("mit".to_string())),
891                right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
892            }),
893            right: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
894        };
895        assert_eq!(
896            expression_to_string(&or_expr),
897            "mit OR apache-2.0 OR gpl-2.0"
898        );
899    }
900
901    #[test]
902    fn test_expression_to_string_nested_and_flattens_same_operator_grouping() {
903        let and_expr = LicenseExpression::And {
904            left: Box::new(LicenseExpression::And {
905                left: Box::new(LicenseExpression::License("mit".to_string())),
906                right: Box::new(LicenseExpression::License("apache-2.0".to_string())),
907            }),
908            right: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
909        };
910        assert_eq!(
911            expression_to_string(&and_expr),
912            "mit AND apache-2.0 AND gpl-2.0"
913        );
914    }
915
916    #[test]
917    fn test_expression_to_string_roundtrip_or_and() {
918        let input = "(mit OR apache-2.0) AND gpl-2.0";
919        let expr = super::super::parse::parse_expression(input).unwrap();
920        let output = expression_to_string(&expr);
921        assert_eq!(output, "(mit OR apache-2.0) AND gpl-2.0");
922    }
923
924    #[test]
925    fn test_expression_to_string_roundtrip_or_with() {
926        let input = "gpl-2.0 WITH classpath-exception-2.0 OR mit";
927        let expr = super::super::parse::parse_expression(input).unwrap();
928        let output = expression_to_string(&expr);
929        assert_eq!(output, "gpl-2.0 WITH classpath-exception-2.0 OR mit");
930    }
931
932    #[test]
933    fn test_combine_expressions_empty() {
934        let result = combine_expressions_and(&[], true).unwrap();
935        assert_eq!(result, "");
936    }
937
938    #[test]
939    fn test_combine_expressions_single() {
940        let result = combine_expressions_and(&["mit"], true).unwrap();
941        assert_eq!(result, "mit");
942    }
943
944    #[test]
945    fn test_combine_expressions_two_and() {
946        let result = combine_expressions_and(&["mit", "gpl-2.0-plus"], true).unwrap();
947        assert_eq!(result, "gpl-2.0-plus AND mit");
948    }
949
950    #[test]
951    fn test_combine_expressions_two_or() {
952        let result = combine_expressions_or(&["mit", "apache-2.0"], true).unwrap();
953        assert_eq!(result, "apache-2.0 OR mit");
954    }
955
956    #[test]
957    fn test_combine_expressions_multiple_and() {
958        let result = combine_expressions_and(&["mit", "apache-2.0", "gpl-2.0-plus"], true).unwrap();
959        assert_eq!(result, "apache-2.0 AND gpl-2.0-plus AND mit");
960    }
961
962    #[test]
963    fn test_combine_expressions_with_duplicates_unique() {
964        let result = combine_expressions_or(&["mit", "mit", "apache-2.0"], true).unwrap();
965        let expr = super::super::parse::parse_expression(&result).unwrap();
966        let keys = expr.license_keys();
967        assert_eq!(keys.len(), 2);
968        assert!(keys.contains(&"mit".to_string()));
969        assert!(keys.contains(&"apache-2.0".to_string()));
970    }
971
972    #[test]
973    fn test_combine_expressions_with_duplicates_not_unique() {
974        let result = combine_expressions_or(&["mit", "mit", "apache-2.0"], false).unwrap();
975        let expr = super::super::parse::parse_expression(&result).unwrap();
976        assert_eq!(result, "mit OR mit OR apache-2.0");
977        let keys = expr.license_keys();
978        assert_eq!(keys.len(), 2);
979    }
980
981    #[test]
982    fn test_combine_expressions_complex_with_simplification() {
983        let result = combine_expressions_and(&["mit OR apache-2.0", "gpl-2.0-plus"], true).unwrap();
984        assert_eq!(result, "(apache-2.0 OR mit) AND gpl-2.0-plus");
985        let expr = super::super::parse::parse_expression(&result).unwrap();
986        assert!(matches!(expr, LicenseExpression::And { .. }));
987        let keys = expr.license_keys();
988        assert_eq!(keys.len(), 3);
989    }
990
991    #[test]
992    fn test_combine_expressions_parse_error() {
993        let result = combine_expressions_and(&["mit", "@invalid@"], true);
994        assert!(result.is_err());
995    }
996
997    #[test]
998    fn test_combine_expressions_with_existing_and() {
999        let result = combine_expressions_and(&["mit AND apache-2.0", "gpl-2.0"], true).unwrap();
1000        assert!(result.contains("mit"));
1001        assert!(result.contains("apache-2.0"));
1002        assert!(result.contains("gpl-2.0"));
1003    }
1004
1005    #[test]
1006    fn test_combine_expressions_with_existing_or() {
1007        let result = combine_expressions_or(&["mit OR apache-2.0", "gpl-2.0"], true).unwrap();
1008        assert!(result.contains("mit"));
1009        assert!(result.contains("apache-2.0"));
1010        assert!(result.contains("gpl-2.0"));
1011    }
1012
1013    #[test]
1014    fn test_expression_to_string_with_no_outer_parens() {
1015        let with_expr = LicenseExpression::With {
1016            left: Box::new(LicenseExpression::License("gpl-2.0-plus".to_string())),
1017            right: Box::new(LicenseExpression::License(
1018                "classpath-exception-2.0".to_string(),
1019            )),
1020        };
1021        assert_eq!(
1022            expression_to_string(&with_expr),
1023            "gpl-2.0-plus WITH classpath-exception-2.0"
1024        );
1025    }
1026
1027    #[test]
1028    fn test_expression_to_string_with_as_right_operand_of_or() {
1029        let with_expr = LicenseExpression::With {
1030            left: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
1031            right: Box::new(LicenseExpression::License(
1032                "classpath-exception-2.0".to_string(),
1033            )),
1034        };
1035        let or_expr = LicenseExpression::Or {
1036            left: Box::new(LicenseExpression::License("mit".to_string())),
1037            right: Box::new(with_expr),
1038        };
1039        assert_eq!(
1040            expression_to_string(&or_expr),
1041            "mit OR gpl-2.0 WITH classpath-exception-2.0"
1042        );
1043    }
1044
1045    #[test]
1046    fn test_expression_to_string_with_as_right_operand_of_and() {
1047        let with_expr = LicenseExpression::With {
1048            left: Box::new(LicenseExpression::License("gpl-2.0".to_string())),
1049            right: Box::new(LicenseExpression::License(
1050                "classpath-exception-2.0".to_string(),
1051            )),
1052        };
1053        let and_expr = LicenseExpression::And {
1054            left: Box::new(LicenseExpression::License("mit".to_string())),
1055            right: Box::new(with_expr),
1056        };
1057        assert_eq!(
1058            expression_to_string(&and_expr),
1059            "mit AND gpl-2.0 WITH classpath-exception-2.0"
1060        );
1061    }
1062
1063    #[test]
1064    fn test_expression_to_string_complex_precedence() {
1065        let input = "mit OR apache-2.0 AND gpl-2.0";
1066        let expr = super::super::parse::parse_expression(input).unwrap();
1067        assert_eq!(
1068            expression_to_string(&expr),
1069            "mit OR (apache-2.0 AND gpl-2.0)"
1070        );
1071    }
1072
1073    #[test]
1074    fn test_expression_to_string_with_no_outer_parens_in_complex_and() {
1075        // WITH has higher precedence than AND
1076        // Parsed as: (bsd-new AND mit) AND (gpl-3.0-plus WITH autoconf-simple-exception)
1077        let input = "bsd-new AND mit AND gpl-3.0-plus WITH autoconf-simple-exception";
1078        let expr = super::super::parse::parse_expression(input).unwrap();
1079        assert_eq!(
1080            expression_to_string(&expr),
1081            "bsd-new AND mit AND gpl-3.0-plus WITH autoconf-simple-exception"
1082        );
1083    }
1084
1085    #[test]
1086    fn test_combine_expressions_and_flattens_reported_redundant_parentheses() {
1087        let result = combine_expressions_and(
1088            &[
1089                "Apache-2.0",
1090                "BSD-3-Clause",
1091                "GPL-2.0-only",
1092                "LicenseRef-scancode-oracle-openjdk-exception-2.0",
1093                "APSL-1.0",
1094                "APSL-2.0",
1095            ],
1096            true,
1097        )
1098        .unwrap();
1099
1100        assert_eq!(
1101            result,
1102            "apache-2.0 AND apsl-1.0 AND apsl-2.0 AND bsd-3-clause AND gpl-2.0-only AND licenseref-scancode-oracle-openjdk-exception-2.0"
1103        );
1104    }
1105
1106    #[test]
1107    fn test_build_expression_from_list_balances_large_and_chains() {
1108        let unique: Vec<_> = (0..1024)
1109            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
1110            .collect();
1111
1112        let result = build_expression_from_list(&unique, true);
1113
1114        assert!(expression_depth(&result) <= 12);
1115    }
1116
1117    #[test]
1118    fn test_build_expression_from_list_balances_large_or_chains() {
1119        let unique: Vec<_> = (0..1024)
1120            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
1121            .collect();
1122
1123        let result = build_expression_from_list(&unique, false);
1124
1125        assert!(expression_depth(&result) <= 12);
1126    }
1127}
1128
1129#[cfg(test)]
1130mod contains_tests {
1131    use super::*;
1132
1133    #[test]
1134    fn test_basic_containment() {
1135        assert!(licensing_contains("mit", "mit"));
1136        assert!(!licensing_contains("mit", "apache"));
1137    }
1138
1139    #[test]
1140    fn test_or_containment() {
1141        assert!(licensing_contains("mit OR apache", "mit"));
1142        assert!(licensing_contains("mit OR apache", "apache"));
1143        assert!(!licensing_contains("mit OR apache", "gpl"));
1144    }
1145
1146    #[test]
1147    fn test_and_containment() {
1148        assert!(licensing_contains("mit AND apache", "mit"));
1149        assert!(licensing_contains("mit AND apache", "apache"));
1150        assert!(!licensing_contains("mit", "mit AND apache"));
1151    }
1152
1153    #[test]
1154    fn test_expression_subset() {
1155        assert!(licensing_contains(
1156            "mit AND apache AND bsd",
1157            "mit AND apache"
1158        ));
1159        assert!(!licensing_contains(
1160            "mit AND apache",
1161            "mit AND apache AND bsd"
1162        ));
1163        assert!(licensing_contains("mit OR apache OR bsd", "mit OR apache"));
1164        assert!(!licensing_contains("mit OR apache", "mit OR apache OR bsd"));
1165    }
1166
1167    #[test]
1168    fn test_order_independence() {
1169        assert!(licensing_contains("mit AND apache", "apache AND mit"));
1170        assert!(licensing_contains("mit OR apache", "apache OR mit"));
1171    }
1172
1173    #[test]
1174    fn test_plus_suffix_no_containment() {
1175        assert!(!licensing_contains("gpl-2.0-plus", "gpl-2.0"));
1176        assert!(!licensing_contains("gpl-2.0", "gpl-2.0-plus"));
1177    }
1178
1179    #[test]
1180    fn test_with_decomposition() {
1181        assert!(licensing_contains(
1182            "gpl-2.0 WITH classpath-exception",
1183            "gpl-2.0"
1184        ));
1185        assert!(licensing_contains(
1186            "gpl-2.0 WITH classpath-exception",
1187            "classpath-exception"
1188        ));
1189        assert!(!licensing_contains(
1190            "gpl-2.0",
1191            "gpl-2.0 WITH classpath-exception"
1192        ));
1193    }
1194
1195    #[test]
1196    fn test_mixed_operators() {
1197        assert!(!licensing_contains("mit OR apache", "mit AND apache"));
1198        assert!(!licensing_contains("mit AND apache", "mit OR apache"));
1199    }
1200
1201    #[test]
1202    fn test_nested_expressions() {
1203        assert!(!licensing_contains("(mit OR apache) AND bsd", "mit"));
1204        assert!(licensing_contains(
1205            "(mit OR apache) AND bsd",
1206            "mit OR apache"
1207        ));
1208        assert!(licensing_contains("(mit OR apache) AND bsd", "bsd"));
1209    }
1210
1211    #[test]
1212    fn test_empty_expressions() {
1213        assert!(!licensing_contains("", "mit"));
1214        assert!(!licensing_contains("mit", ""));
1215        assert!(!licensing_contains("", ""));
1216        assert!(!licensing_contains("   ", "mit"));
1217    }
1218
1219    #[test]
1220    fn test_invalid_expressions() {
1221        assert!(!licensing_contains("mit AND", "mit"));
1222        assert!(!licensing_contains("mit", "AND apache"));
1223    }
1224}