Skip to main content

provenant/license_detection/expression/
parse.rs

1//! License expression parsing implementation.
2
3use super::{LicenseExpression, ParseError};
4
5/// Token in a license expression.
6#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub(super) enum Token {
8    /// License key
9    License(String),
10
11    /// Operator: AND
12    And,
13
14    /// Operator: OR
15    Or,
16
17    /// Operator: WITH
18    With,
19
20    /// Opening parenthesis
21    LeftParen,
22
23    /// Closing parenthesis
24    RightParen,
25}
26
27/// Parse a license expression string into a structured expression.
28///
29/// # Arguments
30/// * `expr` - The license expression string to parse
31///
32/// # Returns
33/// Ok with parsed LicenseExpression, or Err with ParseError
34///
35/// # Examples
36/// ```
37/// use provenant::license_detection::expression::parse_expression;
38///
39/// let expr = parse_expression("MIT AND Apache-2.0").unwrap();
40/// ```
41pub fn parse_expression(expr: &str) -> Result<LicenseExpression, ParseError> {
42    let trimmed = expr.trim();
43    if trimmed.is_empty() {
44        return Err(ParseError::EmptyExpression);
45    }
46
47    let tokens = tokenize(trimmed)?;
48    parse_tokens(&tokens)
49}
50
51/// Tokenize a license expression string into tokens.
52pub(super) fn tokenize(expr: &str) -> Result<Vec<Token>, ParseError> {
53    let mut tokens = Vec::new();
54    let mut pos = 0;
55    let chars: Vec<char> = expr.chars().collect();
56
57    while pos < chars.len() {
58        let c = chars[pos];
59
60        if c.is_whitespace() {
61            pos += 1;
62            continue;
63        }
64
65        match c {
66            '(' => {
67                tokens.push(Token::LeftParen);
68                pos += 1;
69            }
70            ')' => {
71                tokens.push(Token::RightParen);
72                pos += 1;
73            }
74            _ => {
75                if c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '+' {
76                    let start = pos;
77                    while pos < chars.len()
78                        && (chars[pos].is_alphanumeric()
79                            || chars[pos] == '-'
80                            || chars[pos] == '.'
81                            || chars[pos] == '_'
82                            || chars[pos] == '+')
83                    {
84                        pos += 1;
85                    }
86                    let text: String = chars[start..pos].iter().collect();
87                    let token = match_text_to_token(&text);
88                    tokens.push(token);
89                } else {
90                    return Err(ParseError::UnexpectedToken {
91                        token: c.to_string(),
92                        position: pos,
93                    });
94                }
95            }
96        }
97    }
98
99    Ok(tokens)
100}
101
102/// Match text to appropriate token.
103fn match_text_to_token(text: &str) -> Token {
104    let text_upper = text.to_uppercase();
105    match text_upper.as_str() {
106        "AND" => Token::And,
107        "OR" => Token::Or,
108        "WITH" => Token::With,
109        _ => Token::License(text.to_lowercase()),
110    }
111}
112
113/// Parse tokens into a LicenseExpression using recursive descent.
114pub(super) fn parse_tokens(tokens: &[Token]) -> Result<LicenseExpression, ParseError> {
115    if tokens.is_empty() {
116        return Err(ParseError::EmptyExpression);
117    }
118
119    let (expr, remaining) = parse_or(tokens)?;
120    if !remaining.is_empty() {
121        return Err(ParseError::ParseError(format!(
122            "Unexpected tokens after parsing: {:?}",
123            remaining
124        )));
125    }
126
127    Ok(expr)
128}
129
130/// Parse OR expressions (lowest precedence).
131pub(super) fn parse_or(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
132    let (mut expr, mut remaining) = parse_and(tokens)?;
133
134    while let Some(Token::Or) = remaining.first() {
135        remaining = &remaining[1..];
136        let (right, rest) = parse_and(remaining)?;
137        expr = LicenseExpression::Or {
138            left: Box::new(expr),
139            right: Box::new(right),
140        };
141        remaining = rest;
142    }
143
144    Ok((expr, remaining))
145}
146
147/// Parse AND expressions (medium precedence).
148pub(super) fn parse_and(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
149    let (mut expr, mut remaining) = parse_with(tokens)?;
150
151    while let Some(Token::And) = remaining.first() {
152        remaining = &remaining[1..];
153        let (right, rest) = parse_with(remaining)?;
154        expr = LicenseExpression::And {
155            left: Box::new(expr),
156            right: Box::new(right),
157        };
158        remaining = rest;
159    }
160
161    Ok((expr, remaining))
162}
163
164/// Parse WITH expressions (highest precedence for operators).
165pub(super) fn parse_with(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
166    let (mut expr, mut remaining) = parse_primary(tokens)?;
167
168    while let Some(Token::With) = remaining.first() {
169        remaining = &remaining[1..];
170        let (right, rest) = parse_primary(remaining)?;
171        expr = LicenseExpression::With {
172            left: Box::new(expr),
173            right: Box::new(right),
174        };
175        remaining = rest;
176    }
177
178    Ok((expr, remaining))
179}
180
181/// Parse primary expressions (license keys or parenthesized expressions).
182pub(super) fn parse_primary(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
183    if tokens.is_empty() {
184        return Err(ParseError::EmptyExpression);
185    }
186
187    match &tokens[0] {
188        Token::LeftParen => {
189            if tokens.len() < 2 {
190                return Err(ParseError::MismatchedParentheses);
191            }
192            let (expr, remaining) = parse_or(&tokens[1..])?;
193            if remaining.is_empty() || remaining[0] != Token::RightParen {
194                return Err(ParseError::MismatchedParentheses);
195            }
196            Ok((expr, &remaining[1..]))
197        }
198        Token::License(key) => {
199            let expr = if key.starts_with("licenseref-") {
200                LicenseExpression::LicenseRef(key.clone())
201            } else {
202                LicenseExpression::License(key.clone())
203            };
204            Ok((expr, &tokens[1..]))
205        }
206        Token::RightParen => Err(ParseError::MismatchedParentheses),
207        Token::And | Token::Or | Token::With => Err(ParseError::ParseError(format!(
208            "Unexpected operator at start: {:?}",
209            tokens[0]
210        ))),
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::super::{LicenseExpression, expression_to_string};
217    use super::*;
218
219    #[test]
220    fn test_parse_simple_license() {
221        let expr = parse_expression("MIT").unwrap();
222        assert_eq!(expr, LicenseExpression::License("mit".to_string()));
223    }
224
225    #[test]
226    fn test_parse_simple_lowercase() {
227        let expr = parse_expression("mit").unwrap();
228        assert_eq!(expr, LicenseExpression::License("mit".to_string()));
229    }
230
231    #[test]
232    fn test_parse_simple_mixed_case() {
233        let expr = parse_expression("MiT").unwrap();
234        assert_eq!(expr, LicenseExpression::License("mit".to_string()));
235    }
236
237    #[test]
238    fn test_parse_and_expression() {
239        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
240        assert!(matches!(expr, LicenseExpression::And { .. }));
241        assert_eq!(expression_to_string(&expr), "mit AND apache-2.0");
242    }
243
244    #[test]
245    fn test_parse_or_expression() {
246        let expr = parse_expression("MIT OR Apache-2.0").unwrap();
247        assert!(matches!(expr, LicenseExpression::Or { .. }));
248        assert_eq!(expression_to_string(&expr), "mit OR apache-2.0");
249    }
250
251    #[test]
252    fn test_parse_with_expression() {
253        let expr = parse_expression("GPL-2.0 WITH Classpath-exception-2.0").unwrap();
254        assert!(matches!(expr, LicenseExpression::With { .. }));
255        assert_eq!(
256            expression_to_string(&expr),
257            "gpl-2.0 WITH classpath-exception-2.0"
258        );
259    }
260
261    #[test]
262    fn test_parse_parenthesized_expression() {
263        let expr = parse_expression("(MIT OR Apache-2.0)").unwrap();
264        assert!(matches!(expr, LicenseExpression::Or { .. }));
265    }
266
267    #[test]
268    fn test_parse_complex_expression() {
269        let expr =
270            parse_expression("(GPL-2.0 WITH Classpath-exception-2.0) AND Apache-2.0").unwrap();
271        assert!(matches!(expr, LicenseExpression::And { .. }));
272    }
273
274    #[test]
275    fn test_parse_nested_parens() {
276        let expr = parse_expression("((MIT OR Apache-2.0) AND GPL-2.0)").unwrap();
277        assert!(matches!(expr, LicenseExpression::And { .. }));
278    }
279
280    #[test]
281    fn test_parse_scancode_plus_license() {
282        let expr = parse_expression("gpl-2.0-plus").unwrap();
283        assert_eq!(expr, LicenseExpression::License("gpl-2.0-plus".to_string()));
284    }
285
286    #[test]
287    fn test_parse_licenseref() {
288        let expr = parse_expression("LicenseRef-scancode-custom-1").unwrap();
289        assert_eq!(
290            expr,
291            LicenseExpression::LicenseRef("licenseref-scancode-custom-1".to_string())
292        );
293    }
294
295    #[test]
296    fn test_parse_various_whitespace() {
297        let expr1 = parse_expression("MIT AND Apache-2.0").unwrap();
298        let expr2 = parse_expression("MIT   AND   Apache-2.0").unwrap();
299        assert_eq!(expr1, expr2);
300    }
301
302    #[test]
303    fn test_parse_trailing_whitespace() {
304        let expr = parse_expression("MIT   ").unwrap();
305        assert_eq!(expr, LicenseExpression::License("mit".to_string()));
306    }
307
308    #[test]
309    fn test_parse_leading_whitespace() {
310        let expr = parse_expression("   MIT").unwrap();
311        assert_eq!(expr, LicenseExpression::License("mit".to_string()));
312    }
313
314    #[test]
315    fn test_parse_empty_expression() {
316        let result = parse_expression("");
317        assert!(matches!(result, Err(ParseError::EmptyExpression)));
318    }
319
320    #[test]
321    fn test_parse_whitespace_only() {
322        let result = parse_expression("   ");
323        assert!(matches!(result, Err(ParseError::EmptyExpression)));
324    }
325
326    #[test]
327    fn test_parse_mismatched_open_paren() {
328        let result = parse_expression("(MIT AND Apache-2.0");
329        assert!(matches!(result, Err(ParseError::MismatchedParentheses)));
330    }
331
332    #[test]
333    fn test_parse_mismatched_close_paren() {
334        let result = parse_expression("MIT AND Apache-2.0)");
335        assert!(matches!(result, Err(ParseError::ParseError(_))));
336    }
337
338    #[test]
339    fn test_parse_unexpected_character() {
340        let result = parse_expression("MIT @ Apache-2.0");
341        assert!(matches!(result, Err(ParseError::UnexpectedToken { .. })));
342    }
343
344    #[test]
345    fn test_parse_multiple_licenses_or() {
346        let expr = parse_expression("MIT OR Apache-2.0 OR GPL-2.0").unwrap();
347        assert!(matches!(expr, LicenseExpression::Or { .. }));
348    }
349
350    #[test]
351    fn test_parse_multiple_licenses_and() {
352        let expr = parse_expression("MIT AND Apache-2.0 AND GPL-2.0").unwrap();
353        assert!(matches!(expr, LicenseExpression::And { .. }));
354    }
355
356    #[test]
357    fn test_contractor_precedence_and_or() {
358        let expr = parse_expression("MIT OR Apache-2.0 AND GPL-2.0").unwrap();
359        assert!(matches!(expr, LicenseExpression::Or { .. }));
360    }
361
362    #[test]
363    fn test_license_keys_simple() {
364        let expr = parse_expression("MIT").unwrap();
365        let keys = expr.license_keys();
366        assert_eq!(keys, vec!["mit"]);
367    }
368
369    #[test]
370    fn test_license_keys_multiple() {
371        let expr = parse_expression("MIT OR Apache-2.0 AND GPL-2.0").unwrap();
372        let keys = expr.license_keys();
373        assert_eq!(keys.len(), 3);
374        assert!(keys.contains(&"mit".to_string()));
375        assert!(keys.contains(&"apache-2.0".to_string()));
376        assert!(keys.contains(&"gpl-2.0".to_string()));
377    }
378
379    #[test]
380    fn test_license_keys_deduplication() {
381        let expr = parse_expression("MIT AND MIT OR Apache-2.0").unwrap();
382        let keys = expr.license_keys();
383        assert_eq!(keys.len(), 2);
384        assert!(keys.contains(&"mit".to_string()));
385        assert!(keys.contains(&"apache-2.0".to_string()));
386    }
387
388    #[test]
389    fn test_parse_gpl_or_later_license() {
390        let expr = parse_expression("gpl-2.0-plus").unwrap();
391        assert_eq!(expr, LicenseExpression::License("gpl-2.0-plus".to_string()));
392    }
393
394    #[test]
395    fn test_parse_gpl_plus_license() {
396        let expr = parse_expression("GPL-2.0+").unwrap();
397        assert_eq!(expr, LicenseExpression::License("gpl-2.0+".to_string()));
398    }
399
400    #[test]
401    fn test_parse_complex_nested_expression() {
402        let input = "(MIT OR Apache-2.0) AND (GPL-2.0 OR BSD-3-Clause)";
403        let expr = parse_expression(input).unwrap();
404        assert!(matches!(expr, LicenseExpression::And { .. }));
405        let keys = expr.license_keys();
406        assert_eq!(keys.len(), 4);
407    }
408
409    #[test]
410    fn test_parse_multiple_with_expressions() {
411        let expr = parse_expression(
412            "GPL-2.0 WITH Classpath-exception-2.0 AND GPL-2.0 WITH GCC-exception-2.0",
413        )
414        .unwrap();
415        assert!(matches!(expr, LicenseExpression::And { .. }));
416        let keys = expr.license_keys();
417        assert!(keys.contains(&"gpl-2.0".to_string()));
418        assert!(keys.contains(&"classpath-exception-2.0".to_string()));
419        assert!(keys.contains(&"gcc-exception-2.0".to_string()));
420    }
421
422    #[test]
423    fn test_parse_with_inside_and_inside_or() {
424        let expr = parse_expression("MIT OR (Apache-2.0 AND GPL-2.0 WITH Classpath-exception-2.0)")
425            .unwrap();
426        assert!(matches!(expr, LicenseExpression::Or { .. }));
427    }
428
429    #[test]
430    fn test_parse_operator_at_start_error() {
431        let result = parse_expression("AND MIT");
432        assert!(result.is_err());
433    }
434
435    #[test]
436    fn test_parse_operator_at_end_error() {
437        let result = parse_expression("MIT AND");
438        assert!(result.is_err());
439    }
440
441    #[test]
442    fn test_parse_double_operator_error() {
443        let result = parse_expression("MIT AND AND Apache-2.0");
444        assert!(result.is_err());
445    }
446
447    #[test]
448    fn test_parse_license_with_dots() {
449        let expr = parse_expression("LicenseRef-scancode-1.0").unwrap();
450        assert_eq!(
451            expr,
452            LicenseExpression::LicenseRef("licenseref-scancode-1.0".to_string())
453        );
454    }
455
456    #[test]
457    fn test_parse_deeply_nested_expression() {
458        let input = "((MIT OR Apache-2.0) AND GPL-2.0) OR BSD-3-Clause";
459        let expr = parse_expression(input).unwrap();
460        assert!(matches!(expr, LicenseExpression::Or { .. }));
461        let keys = expr.license_keys();
462        assert_eq!(keys.len(), 4);
463    }
464
465    #[test]
466    fn test_parse_case_insensitive_operators() {
467        let expr1 = parse_expression("MIT and Apache-2.0").unwrap();
468        let expr2 = parse_expression("MIT AND Apache-2.0").unwrap();
469        let expr3 = parse_expression("MIT And Apache-2.0").unwrap();
470        assert_eq!(expression_to_string(&expr1), "mit AND apache-2.0");
471        assert_eq!(expression_to_string(&expr2), "mit AND apache-2.0");
472        assert_eq!(expression_to_string(&expr3), "mit AND apache-2.0");
473    }
474
475    #[test]
476    fn test_parse_or_case_insensitive() {
477        let expr1 = parse_expression("MIT or Apache-2.0").unwrap();
478        let expr2 = parse_expression("MIT OR Apache-2.0").unwrap();
479        assert_eq!(expression_to_string(&expr1), "mit OR apache-2.0");
480        assert_eq!(expression_to_string(&expr2), "mit OR apache-2.0");
481    }
482
483    #[test]
484    fn test_parse_with_case_insensitive() {
485        let expr1 = parse_expression("GPL-2.0 with Classpath-exception-2.0").unwrap();
486        let expr2 = parse_expression("GPL-2.0 WITH Classpath-exception-2.0").unwrap();
487        assert_eq!(
488            expression_to_string(&expr1),
489            "gpl-2.0 WITH classpath-exception-2.0"
490        );
491        assert_eq!(
492            expression_to_string(&expr2),
493            "gpl-2.0 WITH classpath-exception-2.0"
494        );
495    }
496}