Skip to main content

provenant/license_detection/expression/
parse.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! License expression parsing implementation.
5
6use super::{LicenseExpression, ParseError};
7
8/// Token in a license expression.
9#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10pub(super) enum Token {
11    /// License key
12    License(String),
13
14    /// Operator: AND
15    And,
16
17    /// Operator: OR
18    Or,
19
20    /// Operator: WITH
21    With,
22
23    /// Opening parenthesis
24    LeftParen,
25
26    /// Closing parenthesis
27    RightParen,
28}
29
30/// Parse a license expression string into a structured expression.
31///
32/// # Arguments
33/// * `expr` - The license expression string to parse
34///
35/// # Returns
36/// Ok with parsed LicenseExpression, or Err with ParseError
37///
38/// For example, `parse_expression("MIT AND Apache-2.0")` parses a simple
39/// conjunction expression.
40pub fn parse_expression(expr: &str) -> Result<LicenseExpression, ParseError> {
41    let trimmed = expr.trim();
42    if trimmed.is_empty() {
43        return Err(ParseError::EmptyExpression);
44    }
45
46    let tokens = tokenize(trimmed)?;
47    parse_tokens(&tokens)
48}
49
50/// Tokenize a license expression string into tokens.
51pub(super) fn tokenize(expr: &str) -> Result<Vec<Token>, ParseError> {
52    let mut tokens = Vec::new();
53    let mut pos = 0;
54    let chars: Vec<char> = expr.chars().collect();
55
56    while pos < chars.len() {
57        let c = chars[pos];
58
59        if c.is_whitespace() {
60            pos += 1;
61            continue;
62        }
63
64        match c {
65            '(' => {
66                tokens.push(Token::LeftParen);
67                pos += 1;
68            }
69            ')' => {
70                tokens.push(Token::RightParen);
71                pos += 1;
72            }
73            _ => {
74                if c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '+' {
75                    let start = pos;
76                    while pos < chars.len()
77                        && (chars[pos].is_alphanumeric()
78                            || chars[pos] == '-'
79                            || chars[pos] == '.'
80                            || chars[pos] == '_'
81                            || chars[pos] == '+')
82                    {
83                        pos += 1;
84                    }
85                    let text: String = chars[start..pos].iter().collect();
86                    let token = match_text_to_token(&text);
87                    tokens.push(token);
88                } else {
89                    return Err(ParseError::UnexpectedToken {
90                        token: c.to_string(),
91                        position: pos,
92                    });
93                }
94            }
95        }
96    }
97
98    Ok(tokens)
99}
100
101/// Match text to appropriate token.
102fn match_text_to_token(text: &str) -> Token {
103    let text_upper = text.to_uppercase();
104    match text_upper.as_str() {
105        "AND" => Token::And,
106        "OR" => Token::Or,
107        "WITH" => Token::With,
108        _ => Token::License(text.to_lowercase()),
109    }
110}
111
112/// Parse tokens into a LicenseExpression using recursive descent.
113pub(super) fn parse_tokens(tokens: &[Token]) -> Result<LicenseExpression, ParseError> {
114    if tokens.is_empty() {
115        return Err(ParseError::EmptyExpression);
116    }
117
118    let (expr, remaining) = parse_or(tokens)?;
119    if !remaining.is_empty() {
120        return Err(ParseError::ParseError(format!(
121            "Unexpected tokens after parsing: {:?}",
122            remaining
123        )));
124    }
125
126    Ok(expr)
127}
128
129/// Parse OR expressions (lowest precedence).
130pub(super) fn parse_or(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
131    let (mut expr, mut remaining) = parse_and(tokens)?;
132
133    while let Some(Token::Or) = remaining.first() {
134        remaining = &remaining[1..];
135        let (right, rest) = parse_and(remaining)?;
136        expr = LicenseExpression::Or {
137            left: Box::new(expr),
138            right: Box::new(right),
139        };
140        remaining = rest;
141    }
142
143    Ok((expr, remaining))
144}
145
146/// Parse AND expressions (medium precedence).
147pub(super) fn parse_and(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
148    let (mut expr, mut remaining) = parse_with(tokens)?;
149
150    while let Some(Token::And) = remaining.first() {
151        remaining = &remaining[1..];
152        let (right, rest) = parse_with(remaining)?;
153        expr = LicenseExpression::And {
154            left: Box::new(expr),
155            right: Box::new(right),
156        };
157        remaining = rest;
158    }
159
160    Ok((expr, remaining))
161}
162
163/// Parse WITH expressions (highest precedence for operators).
164pub(super) fn parse_with(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
165    let (mut expr, mut remaining) = parse_primary(tokens)?;
166
167    while let Some(Token::With) = remaining.first() {
168        remaining = &remaining[1..];
169        let (right, rest) = parse_primary(remaining)?;
170        expr = LicenseExpression::With {
171            left: Box::new(expr),
172            right: Box::new(right),
173        };
174        remaining = rest;
175    }
176
177    Ok((expr, remaining))
178}
179
180/// Parse primary expressions (license keys or parenthesized expressions).
181pub(super) fn parse_primary(tokens: &[Token]) -> Result<(LicenseExpression, &[Token]), ParseError> {
182    if tokens.is_empty() {
183        return Err(ParseError::EmptyExpression);
184    }
185
186    match &tokens[0] {
187        Token::LeftParen => {
188            if tokens.len() < 2 {
189                return Err(ParseError::MismatchedParentheses);
190            }
191            let (expr, remaining) = parse_or(&tokens[1..])?;
192            if remaining.is_empty() || remaining[0] != Token::RightParen {
193                return Err(ParseError::MismatchedParentheses);
194            }
195            Ok((expr, &remaining[1..]))
196        }
197        Token::License(key) => {
198            let expr = if key.starts_with("licenseref-") {
199                LicenseExpression::LicenseRef(key.clone())
200            } else {
201                LicenseExpression::License(key.clone())
202            };
203            Ok((expr, &tokens[1..]))
204        }
205        Token::RightParen => Err(ParseError::MismatchedParentheses),
206        Token::And | Token::Or | Token::With => Err(ParseError::ParseError(format!(
207            "Unexpected operator at start: {:?}",
208            tokens[0]
209        ))),
210    }
211}
212
213#[cfg(test)]
214#[path = "parse_test.rs"]
215mod tests;