Skip to main content

provenant/license_detection/expression/
parse.rs

1//! License expression parsing implementation.
2
3use super::{LicenseExpression, ParseError};
4
5/// Token in a license expression.
6#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub(super) enum Token {
8    /// License key
9    License(String),
10
11    /// Operator: AND
12    And,
13
14    /// Operator: OR
15    Or,
16
17    /// Operator: WITH
18    With,
19
20    /// Opening parenthesis
21    LeftParen,
22
23    /// Closing parenthesis
24    RightParen,
25}
26
27/// Parse a license expression string into a structured expression.
28///
29/// # Arguments
30/// * `expr` - The license expression string to parse
31///
32/// # Returns
33/// Ok with parsed LicenseExpression, or Err with ParseError
34///
35/// # Examples
36/// ```
37/// use provenant::license_detection::expression::parse_expression;
38///
39/// let expr = parse_expression("MIT AND Apache-2.0").unwrap();
40/// ```
41pub fn parse_expression(expr: &str) -> Result<LicenseExpression, ParseError> {
42    let trimmed = expr.trim();
43    if trimmed.is_empty() {
44        return Err(ParseError::EmptyExpression);
45    }
46
47    let tokens = tokenize(trimmed)?;
48    parse_tokens(&tokens)
49}
50
51/// Tokenize a license expression string into tokens.
52pub(super) fn tokenize(expr: &str) -> Result<Vec<Token>, ParseError> {
53    let mut tokens = Vec::new();
54    let mut pos = 0;
55    let chars: Vec<char> = expr.chars().collect();
56
57    while pos < chars.len() {
58        let c = chars[pos];
59
60        if c.is_whitespace() {
61            pos += 1;
62            continue;
63        }
64
65        match c {
66            '(' => {
67                tokens.push(Token::LeftParen);
68                pos += 1;
69            }
70            ')' => {
71                tokens.push(Token::RightParen);
72                pos += 1;
73            }
74            _ => {
75                if c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '+' {
76                    let start = pos;
77                    while pos < chars.len()
78                        && (chars[pos].is_alphanumeric()
79                            || chars[pos] == '-'
80                            || chars[pos] == '.'
81                            || chars[pos] == '_'
82                            || chars[pos] == '+')
83                    {
84                        pos += 1;
85                    }
86                    let text: String = chars[start..pos].iter().collect();
87                    let token = match_text_to_token(&text);
88                    tokens.push(token);
89                } else {
90                    return Err(ParseError::UnexpectedToken {
91                        token: c.to_string(),
92                        position: pos,
93                    });
94                }
95            }
96        }
97    }
98
99    Ok(tokens)
100}
101
102/// Match text to appropriate token.
103fn match_text_to_token(text: &str) -> Token {
104    let text_upper = text.to_uppercase();
105    match text_upper.as_str() {
106        "AND" => Token::And,
107        "OR" => Token::Or,
108        "WITH" => Token::With,
109        _ => Token::License(text.to_lowercase()),
110    }
111}
112
113/// Parse tokens into a LicenseExpression using recursive descent.
114pub(super) fn parse_tokens(tokens: &[Token]) -> Result<LicenseExpression, ParseError> {
115    if tokens.is_empty() {
116        return Err(ParseError::EmptyExpression);
117    }
118
119    let (expr, remaining) = parse_or(tokens)?;
120    if !remaining.is_empty() {
121        return Err(ParseError::ParseError(format!(
122            "Unexpected tokens after parsing: {:?}",
123            remaining
124        )));
125    }
126
127    Ok(expr)
128}
129
130/// Parse OR expressions (lowest precedence).
131pub(super) fn parse_or(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
132    let (mut expr, mut remaining) = parse_and(tokens)?;
133
134    while let Some(Token::Or) = remaining.first() {
135        remaining = &remaining[1..];
136        let (right, rest) = parse_and(remaining)?;
137        expr = LicenseExpression::Or {
138            left: Box::new(expr),
139            right: Box::new(right),
140        };
141        remaining = rest;
142    }
143
144    Ok((expr, remaining))
145}
146
147/// Parse AND expressions (medium precedence).
148pub(super) fn parse_and(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
149    let (mut expr, mut remaining) = parse_with(tokens)?;
150
151    while let Some(Token::And) = remaining.first() {
152        remaining = &remaining[1..];
153        let (right, rest) = parse_with(remaining)?;
154        expr = LicenseExpression::And {
155            left: Box::new(expr),
156            right: Box::new(right),
157        };
158        remaining = rest;
159    }
160
161    Ok((expr, remaining))
162}
163
164/// Parse WITH expressions (highest precedence for operators).
165pub(super) fn parse_with(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
166    let (mut expr, mut remaining) = parse_primary(tokens)?;
167
168    while let Some(Token::With) = remaining.first() {
169        remaining = &remaining[1..];
170        let (right, rest) = parse_primary(remaining)?;
171        expr = LicenseExpression::With {
172            left: Box::new(expr),
173            right: Box::new(right),
174        };
175        remaining = rest;
176    }
177
178    Ok((expr, remaining))
179}
180
181/// Parse primary expressions (license keys or parenthesized expressions).
182pub(super) fn parse_primary(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
183    if tokens.is_empty() {
184        return Err(ParseError::EmptyExpression);
185    }
186
187    match &tokens[0] {
188        Token::LeftParen => {
189            if tokens.len() < 2 {
190                return Err(ParseError::MismatchedParentheses);
191            }
192            let (expr, remaining) = parse_or(&tokens[1..])?;
193            if remaining.is_empty() || remaining[0] != Token::RightParen {
194                return Err(ParseError::MismatchedParentheses);
195            }
196            Ok((expr, &remaining[1..]))
197        }
198        Token::License(key) => {
199            let expr = if key.starts_with("licenseref-") {
200                LicenseExpression::LicenseRef(key.clone())
201            } else {
202                LicenseExpression::License(key.clone())
203            };
204            Ok((expr, &tokens[1..]))
205        }
206        Token::RightParen => Err(ParseError::MismatchedParentheses),
207        Token::And | Token::Or | Token::With => Err(ParseError::ParseError(format!(
208            "Unexpected operator at start: {:?}",
209            tokens[0]
210        ))),
211    }
212}
213
214#[cfg(test)]
215#[path = "parse_test.rs"]
216mod tests;