provenant/license_detection/expression/
parse.rs1use super::{LicenseExpression, ParseError};
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub(super) enum Token {
8 License(String),
10
11 And,
13
14 Or,
16
17 With,
19
20 LeftParen,
22
23 RightParen,
25}
26
27pub fn parse_expression(expr: &str) -> Result<LicenseExpression, ParseError> {
42 let trimmed = expr.trim();
43 if trimmed.is_empty() {
44 return Err(ParseError::EmptyExpression);
45 }
46
47 let tokens = tokenize(trimmed)?;
48 parse_tokens(&tokens)
49}
50
51pub(super) fn tokenize(expr: &str) -> Result<Vec<Token>, ParseError> {
53 let mut tokens = Vec::new();
54 let mut pos = 0;
55 let chars: Vec<char> = expr.chars().collect();
56
57 while pos < chars.len() {
58 let c = chars[pos];
59
60 if c.is_whitespace() {
61 pos += 1;
62 continue;
63 }
64
65 match c {
66 '(' => {
67 tokens.push(Token::LeftParen);
68 pos += 1;
69 }
70 ')' => {
71 tokens.push(Token::RightParen);
72 pos += 1;
73 }
74 _ => {
75 if c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '+' {
76 let start = pos;
77 while pos < chars.len()
78 && (chars[pos].is_alphanumeric()
79 || chars[pos] == '-'
80 || chars[pos] == '.'
81 || chars[pos] == '_'
82 || chars[pos] == '+')
83 {
84 pos += 1;
85 }
86 let text: String = chars[start..pos].iter().collect();
87 let token = match_text_to_token(&text);
88 tokens.push(token);
89 } else {
90 return Err(ParseError::UnexpectedToken {
91 token: c.to_string(),
92 position: pos,
93 });
94 }
95 }
96 }
97 }
98
99 Ok(tokens)
100}
101
102fn match_text_to_token(text: &str) -> Token {
104 let text_upper = text.to_uppercase();
105 match text_upper.as_str() {
106 "AND" => Token::And,
107 "OR" => Token::Or,
108 "WITH" => Token::With,
109 _ => Token::License(text.to_lowercase()),
110 }
111}
112
113pub(super) fn parse_tokens(tokens: &[Token]) -> Result<LicenseExpression, ParseError> {
115 if tokens.is_empty() {
116 return Err(ParseError::EmptyExpression);
117 }
118
119 let (expr, remaining) = parse_or(tokens)?;
120 if !remaining.is_empty() {
121 return Err(ParseError::ParseError(format!(
122 "Unexpected tokens after parsing: {:?}",
123 remaining
124 )));
125 }
126
127 Ok(expr)
128}
129
130pub(super) fn parse_or(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
132 let (mut expr, mut remaining) = parse_and(tokens)?;
133
134 while let Some(Token::Or) = remaining.first() {
135 remaining = &remaining[1..];
136 let (right, rest) = parse_and(remaining)?;
137 expr = LicenseExpression::Or {
138 left: Box::new(expr),
139 right: Box::new(right),
140 };
141 remaining = rest;
142 }
143
144 Ok((expr, remaining))
145}
146
147pub(super) fn parse_and(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
149 let (mut expr, mut remaining) = parse_with(tokens)?;
150
151 while let Some(Token::And) = remaining.first() {
152 remaining = &remaining[1..];
153 let (right, rest) = parse_with(remaining)?;
154 expr = LicenseExpression::And {
155 left: Box::new(expr),
156 right: Box::new(right),
157 };
158 remaining = rest;
159 }
160
161 Ok((expr, remaining))
162}
163
164pub(super) fn parse_with(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
166 let (mut expr, mut remaining) = parse_primary(tokens)?;
167
168 while let Some(Token::With) = remaining.first() {
169 remaining = &remaining[1..];
170 let (right, rest) = parse_primary(remaining)?;
171 expr = LicenseExpression::With {
172 left: Box::new(expr),
173 right: Box::new(right),
174 };
175 remaining = rest;
176 }
177
178 Ok((expr, remaining))
179}
180
181pub(super) fn parse_primary(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
183 if tokens.is_empty() {
184 return Err(ParseError::EmptyExpression);
185 }
186
187 match &tokens[0] {
188 Token::LeftParen => {
189 if tokens.len() < 2 {
190 return Err(ParseError::MismatchedParentheses);
191 }
192 let (expr, remaining) = parse_or(&tokens[1..])?;
193 if remaining.is_empty() || remaining[0] != Token::RightParen {
194 return Err(ParseError::MismatchedParentheses);
195 }
196 Ok((expr, &remaining[1..]))
197 }
198 Token::License(key) => {
199 let expr = if key.starts_with("licenseref-") {
200 LicenseExpression::LicenseRef(key.clone())
201 } else {
202 LicenseExpression::License(key.clone())
203 };
204 Ok((expr, &tokens[1..]))
205 }
206 Token::RightParen => Err(ParseError::MismatchedParentheses),
207 Token::And | Token::Or | Token::With => Err(ParseError::ParseError(format!(
208 "Unexpected operator at start: {:?}",
209 tokens[0]
210 ))),
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use super::super::{LicenseExpression, expression_to_string};
217 use super::*;
218
219 #[test]
220 fn test_parse_simple_license() {
221 let expr = parse_expression("MIT").unwrap();
222 assert_eq!(expr, LicenseExpression::License("mit".to_string()));
223 }
224
225 #[test]
226 fn test_parse_simple_lowercase() {
227 let expr = parse_expression("mit").unwrap();
228 assert_eq!(expr, LicenseExpression::License("mit".to_string()));
229 }
230
231 #[test]
232 fn test_parse_simple_mixed_case() {
233 let expr = parse_expression("MiT").unwrap();
234 assert_eq!(expr, LicenseExpression::License("mit".to_string()));
235 }
236
237 #[test]
238 fn test_parse_and_expression() {
239 let expr = parse_expression("MIT AND Apache-2.0").unwrap();
240 assert!(matches!(expr, LicenseExpression::And { .. }));
241 assert_eq!(expression_to_string(&expr), "mit AND apache-2.0");
242 }
243
244 #[test]
245 fn test_parse_or_expression() {
246 let expr = parse_expression("MIT OR Apache-2.0").unwrap();
247 assert!(matches!(expr, LicenseExpression::Or { .. }));
248 assert_eq!(expression_to_string(&expr), "mit OR apache-2.0");
249 }
250
251 #[test]
252 fn test_parse_with_expression() {
253 let expr = parse_expression("GPL-2.0 WITH Classpath-exception-2.0").unwrap();
254 assert!(matches!(expr, LicenseExpression::With { .. }));
255 assert_eq!(
256 expression_to_string(&expr),
257 "gpl-2.0 WITH classpath-exception-2.0"
258 );
259 }
260
261 #[test]
262 fn test_parse_parenthesized_expression() {
263 let expr = parse_expression("(MIT OR Apache-2.0)").unwrap();
264 assert!(matches!(expr, LicenseExpression::Or { .. }));
265 }
266
267 #[test]
268 fn test_parse_complex_expression() {
269 let expr =
270 parse_expression("(GPL-2.0 WITH Classpath-exception-2.0) AND Apache-2.0").unwrap();
271 assert!(matches!(expr, LicenseExpression::And { .. }));
272 }
273
274 #[test]
275 fn test_parse_nested_parens() {
276 let expr = parse_expression("((MIT OR Apache-2.0) AND GPL-2.0)").unwrap();
277 assert!(matches!(expr, LicenseExpression::And { .. }));
278 }
279
280 #[test]
281 fn test_parse_scancode_plus_license() {
282 let expr = parse_expression("gpl-2.0-plus").unwrap();
283 assert_eq!(expr, LicenseExpression::License("gpl-2.0-plus".to_string()));
284 }
285
286 #[test]
287 fn test_parse_licenseref() {
288 let expr = parse_expression("LicenseRef-scancode-custom-1").unwrap();
289 assert_eq!(
290 expr,
291 LicenseExpression::LicenseRef("licenseref-scancode-custom-1".to_string())
292 );
293 }
294
295 #[test]
296 fn test_parse_various_whitespace() {
297 let expr1 = parse_expression("MIT AND Apache-2.0").unwrap();
298 let expr2 = parse_expression("MIT AND Apache-2.0").unwrap();
299 assert_eq!(expr1, expr2);
300 }
301
302 #[test]
303 fn test_parse_trailing_whitespace() {
304 let expr = parse_expression("MIT ").unwrap();
305 assert_eq!(expr, LicenseExpression::License("mit".to_string()));
306 }
307
308 #[test]
309 fn test_parse_leading_whitespace() {
310 let expr = parse_expression(" MIT").unwrap();
311 assert_eq!(expr, LicenseExpression::License("mit".to_string()));
312 }
313
314 #[test]
315 fn test_parse_empty_expression() {
316 let result = parse_expression("");
317 assert!(matches!(result, Err(ParseError::EmptyExpression)));
318 }
319
320 #[test]
321 fn test_parse_whitespace_only() {
322 let result = parse_expression(" ");
323 assert!(matches!(result, Err(ParseError::EmptyExpression)));
324 }
325
326 #[test]
327 fn test_parse_mismatched_open_paren() {
328 let result = parse_expression("(MIT AND Apache-2.0");
329 assert!(matches!(result, Err(ParseError::MismatchedParentheses)));
330 }
331
332 #[test]
333 fn test_parse_mismatched_close_paren() {
334 let result = parse_expression("MIT AND Apache-2.0)");
335 assert!(matches!(result, Err(ParseError::ParseError(_))));
336 }
337
338 #[test]
339 fn test_parse_unexpected_character() {
340 let result = parse_expression("MIT @ Apache-2.0");
341 assert!(matches!(result, Err(ParseError::UnexpectedToken { .. })));
342 }
343
344 #[test]
345 fn test_parse_multiple_licenses_or() {
346 let expr = parse_expression("MIT OR Apache-2.0 OR GPL-2.0").unwrap();
347 assert!(matches!(expr, LicenseExpression::Or { .. }));
348 }
349
350 #[test]
351 fn test_parse_multiple_licenses_and() {
352 let expr = parse_expression("MIT AND Apache-2.0 AND GPL-2.0").unwrap();
353 assert!(matches!(expr, LicenseExpression::And { .. }));
354 }
355
356 #[test]
357 fn test_contractor_precedence_and_or() {
358 let expr = parse_expression("MIT OR Apache-2.0 AND GPL-2.0").unwrap();
359 assert!(matches!(expr, LicenseExpression::Or { .. }));
360 }
361
362 #[test]
363 fn test_license_keys_simple() {
364 let expr = parse_expression("MIT").unwrap();
365 let keys = expr.license_keys();
366 assert_eq!(keys, vec!["mit"]);
367 }
368
369 #[test]
370 fn test_license_keys_multiple() {
371 let expr = parse_expression("MIT OR Apache-2.0 AND GPL-2.0").unwrap();
372 let keys = expr.license_keys();
373 assert_eq!(keys.len(), 3);
374 assert!(keys.contains(&"mit".to_string()));
375 assert!(keys.contains(&"apache-2.0".to_string()));
376 assert!(keys.contains(&"gpl-2.0".to_string()));
377 }
378
379 #[test]
380 fn test_license_keys_deduplication() {
381 let expr = parse_expression("MIT AND MIT OR Apache-2.0").unwrap();
382 let keys = expr.license_keys();
383 assert_eq!(keys.len(), 2);
384 assert!(keys.contains(&"mit".to_string()));
385 assert!(keys.contains(&"apache-2.0".to_string()));
386 }
387
388 #[test]
389 fn test_parse_gpl_or_later_license() {
390 let expr = parse_expression("gpl-2.0-plus").unwrap();
391 assert_eq!(expr, LicenseExpression::License("gpl-2.0-plus".to_string()));
392 }
393
394 #[test]
395 fn test_parse_gpl_plus_license() {
396 let expr = parse_expression("GPL-2.0+").unwrap();
397 assert_eq!(expr, LicenseExpression::License("gpl-2.0+".to_string()));
398 }
399
400 #[test]
401 fn test_parse_complex_nested_expression() {
402 let input = "(MIT OR Apache-2.0) AND (GPL-2.0 OR BSD-3-Clause)";
403 let expr = parse_expression(input).unwrap();
404 assert!(matches!(expr, LicenseExpression::And { .. }));
405 let keys = expr.license_keys();
406 assert_eq!(keys.len(), 4);
407 }
408
409 #[test]
410 fn test_parse_multiple_with_expressions() {
411 let expr = parse_expression(
412 "GPL-2.0 WITH Classpath-exception-2.0 AND GPL-2.0 WITH GCC-exception-2.0",
413 )
414 .unwrap();
415 assert!(matches!(expr, LicenseExpression::And { .. }));
416 let keys = expr.license_keys();
417 assert!(keys.contains(&"gpl-2.0".to_string()));
418 assert!(keys.contains(&"classpath-exception-2.0".to_string()));
419 assert!(keys.contains(&"gcc-exception-2.0".to_string()));
420 }
421
422 #[test]
423 fn test_parse_with_inside_and_inside_or() {
424 let expr = parse_expression("MIT OR (Apache-2.0 AND GPL-2.0 WITH Classpath-exception-2.0)")
425 .unwrap();
426 assert!(matches!(expr, LicenseExpression::Or { .. }));
427 }
428
429 #[test]
430 fn test_parse_operator_at_start_error() {
431 let result = parse_expression("AND MIT");
432 assert!(result.is_err());
433 }
434
435 #[test]
436 fn test_parse_operator_at_end_error() {
437 let result = parse_expression("MIT AND");
438 assert!(result.is_err());
439 }
440
441 #[test]
442 fn test_parse_double_operator_error() {
443 let result = parse_expression("MIT AND AND Apache-2.0");
444 assert!(result.is_err());
445 }
446
447 #[test]
448 fn test_parse_license_with_dots() {
449 let expr = parse_expression("LicenseRef-scancode-1.0").unwrap();
450 assert_eq!(
451 expr,
452 LicenseExpression::LicenseRef("licenseref-scancode-1.0".to_string())
453 );
454 }
455
456 #[test]
457 fn test_parse_deeply_nested_expression() {
458 let input = "((MIT OR Apache-2.0) AND GPL-2.0) OR BSD-3-Clause";
459 let expr = parse_expression(input).unwrap();
460 assert!(matches!(expr, LicenseExpression::Or { .. }));
461 let keys = expr.license_keys();
462 assert_eq!(keys.len(), 4);
463 }
464
465 #[test]
466 fn test_parse_case_insensitive_operators() {
467 let expr1 = parse_expression("MIT and Apache-2.0").unwrap();
468 let expr2 = parse_expression("MIT AND Apache-2.0").unwrap();
469 let expr3 = parse_expression("MIT And Apache-2.0").unwrap();
470 assert_eq!(expression_to_string(&expr1), "mit AND apache-2.0");
471 assert_eq!(expression_to_string(&expr2), "mit AND apache-2.0");
472 assert_eq!(expression_to_string(&expr3), "mit AND apache-2.0");
473 }
474
475 #[test]
476 fn test_parse_or_case_insensitive() {
477 let expr1 = parse_expression("MIT or Apache-2.0").unwrap();
478 let expr2 = parse_expression("MIT OR Apache-2.0").unwrap();
479 assert_eq!(expression_to_string(&expr1), "mit OR apache-2.0");
480 assert_eq!(expression_to_string(&expr2), "mit OR apache-2.0");
481 }
482
483 #[test]
484 fn test_parse_with_case_insensitive() {
485 let expr1 = parse_expression("GPL-2.0 with Classpath-exception-2.0").unwrap();
486 let expr2 = parse_expression("GPL-2.0 WITH Classpath-exception-2.0").unwrap();
487 assert_eq!(
488 expression_to_string(&expr1),
489 "gpl-2.0 WITH classpath-exception-2.0"
490 );
491 assert_eq!(
492 expression_to_string(&expr2),
493 "gpl-2.0 WITH classpath-exception-2.0"
494 );
495 }
496}