provenant-cli 0.1.6

Rust scanner for ScanCode-compatible workflows, licenses, package metadata, SBOMs, and provenance data.
Documentation
// SPDX-FileCopyrightText: Provenant contributors
// SPDX-License-Identifier: Apache-2.0

//! License expression parsing and manipulation.
//!
//! This module provides a parser for ScanCode license expressions, supporting:
//! - ScanCode license keys (e.g., `mit`, `gpl-2.0-plus`, `apache-2.0`)
//! - SPDX operators: `AND`, `OR`, `WITH` (case-insensitive)
//! - Parenthetical grouping
//! - The `LicenseRef-scancode-*` format for non-SPDX licenses
//!
//! The parser converts license expression strings into an AST (Abstract Syntax Tree)
//! and provides functions for validation and simplification.

mod parse;
mod simplify;

pub use parse::parse_expression;
pub use simplify::{
    combine_expressions_and, combine_expressions_and_preserving_structure, combine_expressions_or,
    combine_expressions_or_preserving_structure, expression_to_string, licensing_contains,
    simplify_expression, simplify_expression_preserving_structure,
};

/// Error type for license expression parsing.
#[derive(Debug, Clone, PartialEq, thiserror::Error)]
#[allow(clippy::enum_variant_names)]
pub enum ParseError {
    /// Empty expression
    #[error("Empty license expression")]
    EmptyExpression,

    /// Unexpected token at position
    #[error("Unexpected token '{token}' at position {position}")]
    UnexpectedToken { token: String, position: usize },

    /// Mismatched parentheses
    #[error("Mismatched parentheses")]
    MismatchedParentheses,

    /// Generic parse error with message
    #[error("Parse error: {0}")]
    ParseError(String),
}

/// A parsed license expression represented as an AST.
#[derive(Debug, Clone, PartialEq)]
pub enum LicenseExpression {
    /// A single license key
    License(String),

    /// A LicenseRef-scancode-* reference
    LicenseRef(String),

    /// AND operation: left AND right
    And {
        left: Box<LicenseExpression>,
        right: Box<LicenseExpression>,
    },

    /// OR operation: left OR right
    Or {
        left: Box<LicenseExpression>,
        right: Box<LicenseExpression>,
    },

    /// WITH operation: left WITH right (exception)
    With {
        left: Box<LicenseExpression>,
        right: Box<LicenseExpression>,
    },
}

impl LicenseExpression {
    /// Extract all license keys from the expression.
    #[allow(dead_code)]
    pub fn license_keys(&self) -> Vec<String> {
        let mut keys = Vec::new();
        self.collect_keys(&mut keys);
        keys.sort();
        keys.dedup();
        keys
    }

    #[allow(dead_code)]
    fn collect_keys(&self, keys: &mut Vec<String>) {
        match self {
            Self::License(key) => keys.push(key.clone()),
            Self::LicenseRef(key) => keys.push(key.clone()),
            Self::And { left, right } | Self::Or { left, right } | Self::With { left, right } => {
                left.collect_keys(keys);
                right.collect_keys(keys);
            }
        }
    }

    /// Create an AND expression combining multiple expressions.
    pub fn and(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
        if expressions.is_empty() {
            None
        } else {
            Some(build_balanced_boolean_expression(
                &expressions,
                |left, right| LicenseExpression::And { left, right },
            ))
        }
    }

    /// Create an OR expression combining multiple expressions.
    pub fn or(expressions: Vec<LicenseExpression>) -> Option<LicenseExpression> {
        if expressions.is_empty() {
            None
        } else {
            Some(build_balanced_boolean_expression(
                &expressions,
                |left, right| LicenseExpression::Or { left, right },
            ))
        }
    }
}

fn build_balanced_boolean_expression(
    expressions: &[LicenseExpression],
    combine: fn(Box<LicenseExpression>, Box<LicenseExpression>) -> LicenseExpression,
) -> LicenseExpression {
    debug_assert!(
        !expressions.is_empty(),
        "build_balanced_boolean_expression called with empty list"
    );
    match expressions.len() {
        1 => expressions[0].clone(),
        _ => {
            let midpoint = expressions.len() / 2;
            let left = build_balanced_boolean_expression(&expressions[..midpoint], combine);
            let right = build_balanced_boolean_expression(&expressions[midpoint..], combine);
            combine(Box::new(left), Box::new(right))
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashSet;

    fn expression_depth(expr: &LicenseExpression) -> usize {
        match expr {
            LicenseExpression::License(_) | LicenseExpression::LicenseRef(_) => 1,
            LicenseExpression::And { left, right }
            | LicenseExpression::Or { left, right }
            | LicenseExpression::With { left, right } => {
                1 + expression_depth(left).max(expression_depth(right))
            }
        }
    }

    #[test]
    fn test_and_helper_empty() {
        let result = LicenseExpression::and(vec![]);
        assert!(result.is_none());
    }

    #[test]
    fn test_and_helper_single() {
        let expr = LicenseExpression::License("mit".to_string());
        let result = LicenseExpression::and(vec![expr.clone()]).unwrap();
        assert_eq!(result, expr);
    }

    #[test]
    fn test_and_helper_multiple() {
        let exprs = vec![
            LicenseExpression::License("mit".to_string()),
            LicenseExpression::License("apache-2.0".to_string()),
        ];
        let result = LicenseExpression::and(exprs).unwrap();
        assert!(matches!(result, LicenseExpression::And { .. }));
    }

    #[test]
    fn test_or_helper_empty() {
        let result = LicenseExpression::or(vec![]);
        assert!(result.is_none());
    }

    #[test]
    fn test_or_helper_single() {
        let expr = LicenseExpression::License("mit".to_string());
        let result = LicenseExpression::or(vec![expr.clone()]).unwrap();
        assert_eq!(result, expr);
    }

    #[test]
    fn test_or_helper_multiple() {
        let exprs = vec![
            LicenseExpression::License("mit".to_string()),
            LicenseExpression::License("apache-2.0".to_string()),
        ];
        let result = LicenseExpression::or(exprs).unwrap();
        assert!(matches!(result, LicenseExpression::Or { .. }));
    }

    #[test]
    fn test_and_helper_balances_large_expression_depth() {
        let exprs: Vec<_> = (0..1024)
            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
            .collect();

        let result = LicenseExpression::and(exprs).unwrap();

        assert!(expression_depth(&result) <= 12);
    }

    #[test]
    fn test_or_helper_balances_large_expression_depth() {
        let exprs: Vec<_> = (0..1024)
            .map(|idx| LicenseExpression::License(format!("license-{idx}")))
            .collect();

        let result = LicenseExpression::or(exprs).unwrap();

        assert!(expression_depth(&result) <= 12);
    }

    #[test]
    fn test_validate_expression_valid() {
        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
        let mut known = HashSet::new();
        known.insert("mit".to_string());
        known.insert("apache-2.0".to_string());

        let unknown: Vec<_> = expr
            .license_keys()
            .into_iter()
            .filter(|key| !known.contains(key))
            .collect();
        assert!(unknown.is_empty());
    }

    #[test]
    fn test_validate_expression_unknown_keys() {
        let expr = parse_expression("MIT AND UnknownKey").unwrap();
        let mut known = HashSet::new();
        known.insert("mit".to_string());

        let unknown: Vec<_> = expr
            .license_keys()
            .into_iter()
            .filter(|key| !known.contains(key))
            .collect();
        assert_eq!(unknown, vec!["unknownkey".to_string()]);
    }

    #[test]
    fn test_validate_expression_empty_known_keys() {
        let expr = parse_expression("MIT AND Apache-2.0").unwrap();
        let known: HashSet<String> = HashSet::new();

        let unknown: Vec<_> = expr
            .license_keys()
            .into_iter()
            .filter(|key| !known.contains(key))
            .collect();
        assert_eq!(unknown.len(), 2);
    }
}