1use std::collections::{HashMap, HashSet};
5
6use crate::license_detection::expression::{
7 LicenseExpression, expression_to_string, parse_expression, simplify_expression,
8 simplify_expression_preserving_structure,
9};
10
11#[derive(Clone, Copy)]
12pub(crate) enum ExpressionRelation {
13 And,
14 Or,
15}
16
17#[derive(Clone, Copy)]
18enum BooleanOperator {
19 And,
20 Or,
21}
22
23pub fn combine_license_expressions(
24 expressions: impl IntoIterator<Item = String>,
25) -> Option<String> {
26 combine_license_expressions_with_relation(expressions, ExpressionRelation::And)
27}
28
29pub fn combine_license_expressions_preserving_structure(
30 expressions: impl IntoIterator<Item = String>,
31) -> Option<String> {
32 combine_license_expressions_with_relation_and_mode(expressions, ExpressionRelation::And, true)
33}
34
35pub(crate) fn combine_license_expressions_preserving_structure_strict(
36 expressions: impl IntoIterator<Item = String>,
37) -> Option<String> {
38 combine_license_expressions_with_relation_and_mode_strict(
39 expressions,
40 ExpressionRelation::And,
41 true,
42 )
43}
44
45pub fn select_primary_license_expression(
46 expressions: impl IntoIterator<Item = String>,
47) -> Option<String> {
48 let mut unique = Vec::new();
49
50 for expression in expressions {
51 let trimmed = expression.trim();
52 if trimmed.is_empty() {
53 continue;
54 }
55
56 if !unique.iter().any(|existing: &String| existing == trimmed) {
57 unique.push(trimmed.to_string());
58 }
59 }
60
61 if unique.is_empty() {
62 return None;
63 }
64
65 if unique.len() == 1 {
66 return unique.into_iter().next();
67 }
68
69 let joined: Vec<String> = unique
70 .iter()
71 .filter(|expression| is_joined_expression(expression))
72 .cloned()
73 .collect();
74
75 if joined.len() != 1 {
76 return None;
77 }
78
79 let candidate = &joined[0];
80 unique
81 .iter()
82 .filter(|expression| *expression != candidate)
83 .all(|expression| expression_covers(candidate, expression))
84 .then(|| candidate.clone())
85}
86
87pub(crate) fn select_primary_license_expression_strict(
88 expressions: impl IntoIterator<Item = String>,
89) -> Option<String> {
90 let expressions: Vec<String> = expressions.into_iter().collect();
91 select_primary_license_expression(expressions).and_then(|expression| {
92 combine_license_expressions_preserving_structure_strict([expression])
93 })
94}
95
96pub(crate) fn combine_license_expressions_with_relation_preserving_structure_strict(
97 expressions: impl IntoIterator<Item = String>,
98 relation: ExpressionRelation,
99) -> Option<String> {
100 combine_license_expressions_with_relation_and_mode_strict(expressions, relation, true)
101}
102
103pub(crate) fn combine_license_expressions_with_relation(
104 expressions: impl IntoIterator<Item = String>,
105 relation: ExpressionRelation,
106) -> Option<String> {
107 combine_license_expressions_with_relation_and_mode(expressions, relation, false)
108}
109
110fn combine_license_expressions_with_relation_and_mode(
111 expressions: impl IntoIterator<Item = String>,
112 relation: ExpressionRelation,
113 preserve_structure: bool,
114) -> Option<String> {
115 let expressions: Vec<String> = expressions
116 .into_iter()
117 .map(|expression| expression.trim().to_string())
118 .filter(|expression| !expression.is_empty())
119 .collect();
120
121 if expressions.is_empty() {
122 return None;
123 }
124
125 combine_parsed_expressions(&expressions, relation, preserve_structure)
126 .or_else(|| combine_license_expressions_fallback(&expressions, relation))
127}
128
129fn combine_license_expressions_with_relation_and_mode_strict(
130 expressions: impl IntoIterator<Item = String>,
131 relation: ExpressionRelation,
132 preserve_structure: bool,
133) -> Option<String> {
134 let expressions: Vec<String> = expressions
135 .into_iter()
136 .map(|expression| expression.trim().to_string())
137 .filter(|expression| !expression.is_empty())
138 .collect();
139
140 if expressions.is_empty() {
141 return None;
142 }
143
144 combine_parsed_expressions(&expressions, relation, preserve_structure)
145}
146
147fn combine_parsed_expressions(
148 expressions: &[String],
149 relation: ExpressionRelation,
150 preserve_structure: bool,
151) -> Option<String> {
152 let mut case_map = HashMap::new();
153 let parsed_expressions: Vec<LicenseExpression> = expressions
154 .iter()
155 .map(|expression| {
156 collect_term_case(expression, &mut case_map);
157 parse_expression(expression).ok()
158 })
159 .collect::<Option<Vec<_>>>()?;
160
161 let combined = match relation {
162 ExpressionRelation::And => LicenseExpression::and(parsed_expressions),
163 ExpressionRelation::Or => LicenseExpression::or(parsed_expressions),
164 }?;
165
166 let combined = if preserve_structure {
167 simplify_expression_preserving_structure(&combined)
168 } else {
169 simplify_expression(&combined)
170 };
171 Some(render_expression_with_case_map(&combined, &case_map))
172}
173
174fn combine_license_expressions_fallback(
175 expressions: &[String],
176 relation: ExpressionRelation,
177) -> Option<String> {
178 let unique_expressions: HashSet<String> = expressions.iter().cloned().collect();
179 if unique_expressions.is_empty() {
180 return None;
181 }
182
183 let mut sorted_expressions: Vec<String> = unique_expressions.into_iter().collect();
184 sorted_expressions.sort();
185
186 let separator = match relation {
187 ExpressionRelation::And => " AND ",
188 ExpressionRelation::Or => " OR ",
189 };
190
191 Some(
192 sorted_expressions
193 .iter()
194 .map(|expr| wrap_compound_expression(expr))
195 .collect::<Vec<_>>()
196 .join(separator),
197 )
198}
199
200fn collect_term_case(expression: &str, case_map: &mut HashMap<String, String>) {
201 let chars: Vec<char> = expression.chars().collect();
202 let mut pos = 0;
203
204 while pos < chars.len() {
205 let ch = chars[pos];
206 if !(ch.is_alphanumeric() || ch == '-' || ch == '.' || ch == '_' || ch == '+') {
207 pos += 1;
208 continue;
209 }
210
211 let start = pos;
212 while pos < chars.len()
213 && (chars[pos].is_alphanumeric()
214 || chars[pos] == '-'
215 || chars[pos] == '.'
216 || chars[pos] == '_'
217 || chars[pos] == '+')
218 {
219 pos += 1;
220 }
221
222 let term: String = chars[start..pos].iter().collect();
223 let upper = term.to_ascii_uppercase();
224 if matches!(upper.as_str(), "AND" | "OR" | "WITH") {
225 continue;
226 }
227
228 case_map.entry(term.to_ascii_lowercase()).or_insert(term);
229 }
230}
231
232fn render_expression_with_case_map(
233 expression: &LicenseExpression,
234 case_map: &HashMap<String, String>,
235) -> String {
236 match expression {
237 LicenseExpression::License(key) | LicenseExpression::LicenseRef(key) => {
238 case_map.get(key).cloned().unwrap_or_else(|| key.clone())
239 }
240 LicenseExpression::And { .. } => {
241 render_flat_boolean_chain(expression, BooleanOperator::And, case_map)
242 }
243 LicenseExpression::Or { .. } => {
244 render_flat_boolean_chain(expression, BooleanOperator::Or, case_map)
245 }
246 LicenseExpression::With { left, right } => format!(
247 "{} WITH {}",
248 render_expression_with_case_map(left, case_map),
249 render_expression_with_case_map(right, case_map)
250 ),
251 }
252}
253
254fn render_flat_boolean_chain(
255 expression: &LicenseExpression,
256 operator: BooleanOperator,
257 case_map: &HashMap<String, String>,
258) -> String {
259 let mut parts = Vec::new();
260 collect_boolean_chain(expression, operator, &mut parts);
261
262 let separator = match operator {
263 BooleanOperator::And => " AND ",
264 BooleanOperator::Or => " OR ",
265 };
266
267 parts
268 .into_iter()
269 .map(|part| render_boolean_operand(part, operator, case_map))
270 .collect::<Vec<_>>()
271 .join(separator)
272}
273
274fn collect_boolean_chain<'a>(
275 expression: &'a LicenseExpression,
276 operator: BooleanOperator,
277 parts: &mut Vec<&'a LicenseExpression>,
278) {
279 match (operator, expression) {
280 (BooleanOperator::And, LicenseExpression::And { left, right })
281 | (BooleanOperator::Or, LicenseExpression::Or { left, right }) => {
282 collect_boolean_chain(left, operator, parts);
283 collect_boolean_chain(right, operator, parts);
284 }
285 _ => parts.push(expression),
286 }
287}
288
289fn render_boolean_operand(
290 expression: &LicenseExpression,
291 parent_operator: BooleanOperator,
292 case_map: &HashMap<String, String>,
293) -> String {
294 match expression {
295 LicenseExpression::And { .. } => match parent_operator {
296 BooleanOperator::And => render_expression_with_case_map(expression, case_map),
297 BooleanOperator::Or => format!(
298 "({})",
299 render_expression_with_case_map(expression, case_map)
300 ),
301 },
302 LicenseExpression::Or { .. } => match parent_operator {
303 BooleanOperator::Or => render_expression_with_case_map(expression, case_map),
304 BooleanOperator::And => format!(
305 "({})",
306 render_expression_with_case_map(expression, case_map)
307 ),
308 },
309 _ => render_expression_with_case_map(expression, case_map),
310 }
311}
312
313fn wrap_compound_expression(expression: &str) -> String {
314 if expression.contains(' ') && !(expression.starts_with('(') && expression.ends_with(')')) {
315 format!("({})", expression)
316 } else {
317 expression.to_string()
318 }
319}
320
321fn is_joined_expression(expression: &str) -> bool {
322 let upper = expression.to_ascii_uppercase();
323 upper.contains(" AND ") || upper.contains(" OR ") || upper.contains(" WITH ")
324}
325
326fn expression_covers(container: &str, contained: &str) -> bool {
327 let Ok(parsed_container) = parse_expression(container) else {
328 return false;
329 };
330 let Ok(parsed_contained) = parse_expression(contained) else {
331 return false;
332 };
333
334 let simplified_container = simplify_expression(&parsed_container);
335 let simplified_contained = simplify_expression(&parsed_contained);
336
337 expression_covers_ast(&simplified_container, &simplified_contained)
338}
339
340fn expression_covers_ast(container: &LicenseExpression, contained: &LicenseExpression) -> bool {
341 if expression_to_string(container) == expression_to_string(contained) {
342 return true;
343 }
344
345 match (container, contained) {
346 (LicenseExpression::And { .. }, LicenseExpression::And { .. }) => {
347 let container_args = flat_and_args(container);
348 let contained_args = flat_and_args(contained);
349 contained_args.iter().all(|contained_arg| {
350 container_args.iter().any(|container_arg| {
351 expression_to_string(container_arg) == expression_to_string(contained_arg)
352 })
353 })
354 }
355 (LicenseExpression::Or { .. }, LicenseExpression::Or { .. }) => {
356 let container_args = flat_or_args(container);
357 let contained_args = flat_or_args(contained);
358 contained_args.iter().all(|contained_arg| {
359 container_args.iter().any(|container_arg| {
360 expression_to_string(container_arg) == expression_to_string(contained_arg)
361 })
362 })
363 }
364 (LicenseExpression::And { .. }, _) => {
365 flat_and_args(container).iter().any(|container_arg| {
366 expression_to_string(container_arg) == expression_to_string(contained)
367 })
368 }
369 (LicenseExpression::Or { .. }, _) => flat_or_args(container).iter().any(|container_arg| {
370 expression_to_string(container_arg) == expression_to_string(contained)
371 }),
372 _ => false,
373 }
374}
375
376fn flat_and_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
377 let mut args = Vec::new();
378 collect_flat_args(expr, true, &mut args);
379 args
380}
381
382fn flat_or_args(expr: &LicenseExpression) -> Vec<&LicenseExpression> {
383 let mut args = Vec::new();
384 collect_flat_args(expr, false, &mut args);
385 args
386}
387
388fn collect_flat_args<'a>(
389 expr: &'a LicenseExpression,
390 and_operator: bool,
391 args: &mut Vec<&'a LicenseExpression>,
392) {
393 match expr {
394 LicenseExpression::And { left, right } if and_operator => {
395 collect_flat_args(left, and_operator, args);
396 collect_flat_args(right, and_operator, args);
397 }
398 LicenseExpression::Or { left, right } if !and_operator => {
399 collect_flat_args(left, and_operator, args);
400 collect_flat_args(right, and_operator, args);
401 }
402 _ => args.push(expr),
403 }
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409
410 #[test]
411 fn combine_license_expressions_preserves_spdx_case() {
412 let result = combine_license_expressions(vec!["MIT".to_string(), "Apache-2.0".to_string()]);
413
414 assert_eq!(result.as_deref(), Some("Apache-2.0 AND MIT"));
415 }
416
417 #[test]
418 fn combine_license_expressions_flattens_same_operator_parentheses() {
419 let result = combine_license_expressions(vec![
420 "MIT".to_string(),
421 "ICU".to_string(),
422 "Unicode-TOU".to_string(),
423 ]);
424
425 assert_eq!(result.as_deref(), Some("ICU AND MIT AND Unicode-TOU"));
426 }
427
428 #[test]
429 fn combine_license_expressions_does_not_absorb_with_expressions() {
430 let result = combine_license_expressions(vec![
431 "GPL-2.0 WITH Classpath-exception-2.0".to_string(),
432 "GPL-2.0".to_string(),
433 ]);
434
435 assert_eq!(
436 result.as_deref(),
437 Some("GPL-2.0 AND GPL-2.0 WITH Classpath-exception-2.0")
438 );
439 }
440
441 #[test]
442 fn combine_license_expressions_simplifies_absorbed_and_expression() {
443 let result = combine_license_expressions(vec![
444 "Apache-2.0 OR MIT".to_string(),
445 "Apache-2.0".to_string(),
446 ]);
447
448 assert_eq!(result.as_deref(), Some("Apache-2.0"));
449 }
450
451 #[test]
452 fn combine_license_expressions_preserving_structure_keeps_distinct_nested_operands() {
453 let result = combine_license_expressions_preserving_structure(vec![
454 "MIT".to_string(),
455 "Apache-2.0 OR MIT".to_string(),
456 ]);
457
458 assert_eq!(result.as_deref(), Some("MIT AND (Apache-2.0 OR MIT)"));
459 }
460
461 #[test]
462 fn combine_license_expressions_with_relation_simplifies_absorbed_or_expression() {
463 let result = combine_license_expressions_with_relation(
464 vec!["MIT AND Apache-2.0".to_string(), "MIT".to_string()],
465 ExpressionRelation::Or,
466 );
467
468 assert_eq!(result.as_deref(), Some("MIT"));
469 }
470
471 #[test]
472 fn select_primary_license_expression_prefers_joined_expression_covering_fragment() {
473 let result = select_primary_license_expression(vec![
474 "Apache-2.0 OR MIT".to_string(),
475 "Apache-2.0".to_string(),
476 ]);
477
478 assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
479 }
480
481 #[test]
482 fn select_primary_license_expression_prefers_joined_expression_covering_all_singles() {
483 let result = select_primary_license_expression(vec![
484 "MIT".to_string(),
485 "Apache-2.0 OR MIT".to_string(),
486 "Apache-2.0".to_string(),
487 ]);
488
489 assert_eq!(result.as_deref(), Some("Apache-2.0 OR MIT"));
490 }
491
492 #[test]
493 fn select_primary_license_expression_returns_none_when_joined_expression_does_not_cover_rest() {
494 let result = select_primary_license_expression(vec![
495 "Apache-2.0 OR MIT".to_string(),
496 "GPL-2.0-only".to_string(),
497 ]);
498
499 assert_eq!(result, None);
500 }
501
502 #[test]
503 fn combine_license_expressions_preserving_structure_strict_rejects_invalid_expression() {
504 let result = combine_license_expressions_preserving_structure_strict(vec![
505 "Apache-2.0".to_string(),
506 "MIT\" or malformed".to_string(),
507 ]);
508
509 assert_eq!(result, None);
510 }
511
512 #[test]
513 fn select_primary_license_expression_strict_rejects_invalid_primary_expression() {
514 let result =
515 select_primary_license_expression_strict(vec!["MIT\" or malformed".to_string()]);
516
517 assert_eq!(result, None);
518 }
519}