shiftkit 0.2.0

A Rust library for building parsers and grammars
Documentation
//! Defines the core grammar structures, including tokens, AST nodes,
//! grammar rules, and the grammar itself

pub mod builder;

/// Each token type has a unique ID
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct TokenType(pub u32);

/// Each AST node type has a unique ID
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct AstNodeType(pub u32);

/// A single entry in a grammar rule - either a token type or AST node type
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum GrammarItem {
    /// Represents a terminal token type
    Token(TokenType),
    /// Represents a non-terminal AST node type
    AstNode(AstNodeType),
}

impl From<TokenType> for GrammarItem {
    fn from(token: TokenType) -> Self {
        Self::Token(token)
    }
}

impl From<AstNodeType> for GrammarItem {
    fn from(node: AstNodeType) -> Self {
        Self::AstNode(node)
    }
}

/// Index into the token slice provided to the parser
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct TokenId(pub usize);

/// Index into the AST Vec
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct AstNodeId(pub usize);

/// Index is either a token position or AST node position
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Index {
    /// Refers to a token in the input stream
    Token(TokenId),
    /// Refers to an AST node in the parsed output
    AstNode(AstNodeId),
}

impl Index {
    /// Unwrap as `TokenId`
    ///
    /// # Panics
    ///
    /// Panics if the index is not a `Token` variant
    #[must_use]
    pub fn as_token_id(&self) -> TokenId {
        let &Self::Token(id) = self else {
            panic!("Expected `Token`, found `AstNode`")
        };
        id
    }

    /// Unwrap as [`AstNodeId`]
    ///
    /// # Panics
    ///
    /// Panics if the index is not an `AstNode` variant
    #[must_use]
    pub fn as_ast_node_id(&self) -> AstNodeId {
        let &Self::AstNode(id) = self else {
            panic!("Expected `AstNode`, found `Token`")
        };
        id
    }
}

/// Result of a reduction function - either creates a new node or forwards an existing one
#[derive(Debug, Clone)]
pub enum ReductionResult<A> {
    /// Create a new AST node and append it to the `Vec`
    NewNode(A),
    /// Reuse an existing AST node (for pass-through rules like `Sum => Product`)
    Forward(AstNodeId),
}

/// Trait that tokens must implement to provide their token type
pub trait HasTokenType {
    /// Returns the [`TokenType`] of the implementor
    fn token_type(&self) -> TokenType;
}

/// Reduction function signature
pub type ReductionFn<T, A> = fn(&[Index], &[T], &[A]) -> ReductionResult<A>;

/// A grammar rule with its reduction function
#[derive(Debug, Clone)]
pub struct GrammarRule<T, A> {
    /// The AST node type that this rule reduces to
    pub result: AstNodeType,
    /// The sequence of grammar items (tokens or AST nodes) that form this rule
    pub components: Vec<GrammarItem>,
    /// The function that is called when this rule is reduced
    ///
    /// Responsible for constructing the new AST node or forwarding an existing one.
    pub reduction: ReductionFn<T, A>,
}

/// Each grammar rule has a unique ID (the index into the `Grammar#rules` array)
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct GrammarRuleId(pub usize);

/// A grammar defined as a set of grammar rules
#[derive(Clone)]
pub struct Grammar<T, A> {
    /// The root AST node type of the grammar
    pub root_ast_node: AstNodeType,
    /// The list of all grammar rules
    pub rules: Vec<GrammarRule<T, A>>,
}

impl<T, A> Grammar<T, A> {
    /// Create a new empty grammar
    #[must_use]
    pub fn new(root_ast_node: AstNodeType) -> Self {
        Self {
            rules: vec![GrammarRule {
                result: root_ast_node,
                components: vec![root_ast_node.into()],
                reduction: |indices, _, _| ReductionResult::Forward(indices[0].as_ast_node_id()),
            }],
            root_ast_node,
        }
    }

    /// Add a production rule to the grammar
    pub fn add_rule(
        &mut self,
        result: AstNodeType,
        components: &[GrammarItem],
        reduction: ReductionFn<T, A>,
    ) {
        self.rules.push(GrammarRule {
            result,
            components: components.to_vec(),
            reduction,
        });
    }

    /// Get a rule by its ID
    #[must_use]
    pub fn get_rule(&self, id: GrammarRuleId) -> &GrammarRule<T, A> {
        &self.rules[id.0]
    }
}