Skip to main content

provenant/license_detection/expression/
parse.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing implementation.
5
6use super::{LicenseExpression, ParseError};
7
8/// Token in a license expression.
9#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10pub(super) enum Token {
11    /// License key
12    License(String),
13
14    /// Operator: AND
15    And,
16
17    /// Operator: OR
18    Or,
19
20    /// Operator: WITH
21    With,
22
23    /// Opening parenthesis
24    LeftParen,
25
26    /// Closing parenthesis
27    RightParen,
28}
29
30/// Parse a license expression string into a structured expression.
31///
32/// # Arguments
33/// * `expr` - The license expression string to parse
34///
35/// # Returns
36/// Ok with parsed LicenseExpression, or Err with ParseError
37///
38/// # Examples
39/// ```
40/// use provenant::license_detection::expression::parse_expression;
41///
42/// let expr = parse_expression("MIT AND Apache-2.0").unwrap();
43/// ```
44pub fn parse_expression(expr: &str) -> Result<LicenseExpression, ParseError> {
45    let trimmed = expr.trim();
46    if trimmed.is_empty() {
47        return Err(ParseError::EmptyExpression);
48    }
49
50    let tokens = tokenize(trimmed)?;
51    parse_tokens(&tokens)
52}
53
54/// Tokenize a license expression string into tokens.
55pub(super) fn tokenize(expr: &str) -> Result<Vec<Token>, ParseError> {
56    let mut tokens = Vec::new();
57    let mut pos = 0;
58    let chars: Vec<char> = expr.chars().collect();
59
60    while pos < chars.len() {
61        let c = chars[pos];
62
63        if c.is_whitespace() {
64            pos += 1;
65            continue;
66        }
67
68        match c {
69            '(' => {
70                tokens.push(Token::LeftParen);
71                pos += 1;
72            }
73            ')' => {
74                tokens.push(Token::RightParen);
75                pos += 1;
76            }
77            _ => {
78                if c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '+' {
79                    let start = pos;
80                    while pos < chars.len()
81                        && (chars[pos].is_alphanumeric()
82                            || chars[pos] == '-'
83                            || chars[pos] == '.'
84                            || chars[pos] == '_'
85                            || chars[pos] == '+')
86                    {
87                        pos += 1;
88                    }
89                    let text: String = chars[start..pos].iter().collect();
90                    let token = match_text_to_token(&text);
91                    tokens.push(token);
92                } else {
93                    return Err(ParseError::UnexpectedToken {
94                        token: c.to_string(),
95                        position: pos,
96                    });
97                }
98            }
99        }
100    }
101
102    Ok(tokens)
103}
104
105/// Match text to appropriate token.
106fn match_text_to_token(text: &str) -> Token {
107    let text_upper = text.to_uppercase();
108    match text_upper.as_str() {
109        "AND" => Token::And,
110        "OR" => Token::Or,
111        "WITH" => Token::With,
112        _ => Token::License(text.to_lowercase()),
113    }
114}
115
116/// Parse tokens into a LicenseExpression using recursive descent.
117pub(super) fn parse_tokens(tokens: &[Token]) -> Result<LicenseExpression, ParseError> {
118    if tokens.is_empty() {
119        return Err(ParseError::EmptyExpression);
120    }
121
122    let (expr, remaining) = parse_or(tokens)?;
123    if !remaining.is_empty() {
124        return Err(ParseError::ParseError(format!(
125            "Unexpected tokens after parsing: {:?}",
126            remaining
127        )));
128    }
129
130    Ok(expr)
131}
132
133/// Parse OR expressions (lowest precedence).
134pub(super) fn parse_or(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
135    let (mut expr, mut remaining) = parse_and(tokens)?;
136
137    while let Some(Token::Or) = remaining.first() {
138        remaining = &remaining[1..];
139        let (right, rest) = parse_and(remaining)?;
140        expr = LicenseExpression::Or {
141            left: Box::new(expr),
142            right: Box::new(right),
143        };
144        remaining = rest;
145    }
146
147    Ok((expr, remaining))
148}
149
150/// Parse AND expressions (medium precedence).
151pub(super) fn parse_and(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
152    let (mut expr, mut remaining) = parse_with(tokens)?;
153
154    while let Some(Token::And) = remaining.first() {
155        remaining = &remaining[1..];
156        let (right, rest) = parse_with(remaining)?;
157        expr = LicenseExpression::And {
158            left: Box::new(expr),
159            right: Box::new(right),
160        };
161        remaining = rest;
162    }
163
164    Ok((expr, remaining))
165}
166
167/// Parse WITH expressions (highest precedence for operators).
168pub(super) fn parse_with(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
169    let (mut expr, mut remaining) = parse_primary(tokens)?;
170
171    while let Some(Token::With) = remaining.first() {
172        remaining = &remaining[1..];
173        let (right, rest) = parse_primary(remaining)?;
174        expr = LicenseExpression::With {
175            left: Box::new(expr),
176            right: Box::new(right),
177        };
178        remaining = rest;
179    }
180
181    Ok((expr, remaining))
182}
183
184/// Parse primary expressions (license keys or parenthesized expressions).
185pub(super) fn parse_primary(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
186    if tokens.is_empty() {
187        return Err(ParseError::EmptyExpression);
188    }
189
190    match &tokens[0] {
191        Token::LeftParen => {
192            if tokens.len() < 2 {
193                return Err(ParseError::MismatchedParentheses);
194            }
195            let (expr, remaining) = parse_or(&tokens[1..])?;
196            if remaining.is_empty() || remaining[0] != Token::RightParen {
197                return Err(ParseError::MismatchedParentheses);
198            }
199            Ok((expr, &remaining[1..]))
200        }
201        Token::License(key) => {
202            let expr = if key.starts_with("licenseref-") {
203                LicenseExpression::LicenseRef(key.clone())
204            } else {
205                LicenseExpression::License(key.clone())
206            };
207            Ok((expr, &tokens[1..]))
208        }
209        Token::RightParen => Err(ParseError::MismatchedParentheses),
210        Token::And | Token::Or | Token::With => Err(ParseError::ParseError(format!(
211            "Unexpected operator at start: {:?}",
212            tokens[0]
213        ))),
214    }
215}
216
217#[cfg(test)]
218#[path = "parse_test.rs"]
219mod tests;