Expand description
§Sipha
A flexible, incremental parsing library for Rust with support for multiple parsing algorithms.
§Overview
Sipha provides the foundational types and traits for building parsers. It supports:
- Multiple parsing backends: LL(k), LR, and more (via feature flags)
- Incremental parsing: Efficient re-parsing of edited code
- Syntax trees: Immutable green/red tree representation
- Error recovery: Configurable error recovery strategies
- Grammar definition: Flexible grammar builder API
§Quick Start
This example shows how to create a simple arithmetic expression parser:
use sipha::grammar::{GrammarBuilder, Token, NonTerminal, Expr};
use sipha::syntax::SyntaxKind as SyntaxKindTrait;
use sipha::backend::ll::{LlParser, LlConfig};
use sipha::backend::ParserBackend;
use sipha::syntax::SyntaxNode;
// 1. Define your syntax kinds (unified enum for both terminals and non-terminals)
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum MySyntaxKind {
// Terminals (produced by lexer)
Number,
Plus,
Minus,
Whitespace, // Trivia: whitespace that should be ignored
Eof,
// Non-terminals (produced by parser)
Expr,
}
// Implement SyntaxKind for all variants
impl SyntaxKindTrait for MySyntaxKind {
fn is_terminal(self) -> bool {
!matches!(self, MySyntaxKind::Expr)
}
fn is_trivia(self) -> bool {
matches!(self, MySyntaxKind::Whitespace)
}
}
// 2. Build a lexer to tokenize text input
use sipha::{LexerBuilder, lexer::{Pattern, CharSet}};
let lexer = LexerBuilder::new()
// Match numbers using repeating character class [0-9]+ (one or more digits)
.token(MySyntaxKind::Number, Pattern::Repeat {
pattern: Box::new(Pattern::CharClass(CharSet::digits())),
min: 1,
max: None, // No maximum - match as many as possible
})
.token(MySyntaxKind::Plus, Pattern::Literal("+".into()))
.token(MySyntaxKind::Minus, Pattern::Literal("-".into()))
// Match whitespace using repeating character class (one or more whitespace chars)
.token(MySyntaxKind::Whitespace, Pattern::Repeat {
pattern: Box::new(Pattern::CharClass(CharSet::whitespace())),
min: 1,
max: None,
})
// Mark whitespace as trivia so it's automatically skipped during parsing
.trivia(MySyntaxKind::Whitespace)
.build(MySyntaxKind::Eof, MySyntaxKind::Number)
.expect("Failed to build lexer");
// 3. Tokenize input text (with whitespace - trivia will be automatically skipped)
// Start with a simple expression: just a number
let input_text = "1";
let lexer_tokens = lexer.tokenize(input_text)
.expect("Failed to tokenize input");
// Verify lexer produced tokens
assert!(!lexer_tokens.is_empty(), "Lexer should produce at least one token");
// 4. Use lexer tokens directly - they implement grammar::Token
// Filter out Eof token (parser handles EOF automatically)
let grammar_tokens: Vec<sipha::lexer::Token<MySyntaxKind>> = lexer_tokens
.into_iter()
.filter(|t| t.kind != MySyntaxKind::Eof)
.collect();
// 5. Define your non-terminals
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum MyNonTerminal {
Expr,
}
impl NonTerminal for MyNonTerminal {
fn name(&self) -> &str {
match self {
MyNonTerminal::Expr => "Expr",
}
}
}
// 6. Build the grammar using GrammarBuilder
use sipha::lexer::Token as LexerToken;
let grammar = GrammarBuilder::new()
.entry_point(MyNonTerminal::Expr)
.rule(MyNonTerminal::Expr, Expr::token(LexerToken::new(
MySyntaxKind::Number,
"",
sipha::syntax::TextRange::at(sipha::syntax::TextSize::zero(), sipha::syntax::TextSize::zero()),
)))
.build()
.expect("Failed to build grammar");
// 7. Create the parser with default configuration
let config = LlConfig::default();
let mut parser = LlParser::new(&grammar, config)
.expect("Failed to create parser");
// 8. Verify we have tokens to parse
assert!(!grammar_tokens.is_empty(), "Should have at least one token to parse");
// 9. Parse using the tokenized input
let result = parser.parse(&grammar_tokens, MyNonTerminal::Expr);
// 10. Verify parsing succeeded (no errors)
if !result.errors.is_empty() {
panic!("Parsing failed with errors: {:?}", result.errors);
}
// Verify we consumed the expected token
assert!(result.metrics.tokens_consumed >= 1, "Should consume at least 1 token");
// 11. Verify syntax tree structure
let red_tree = SyntaxNode::new_root(result.root.clone());
// The root kind should be Expr or Number (depending on implementation)
assert!(matches!(red_tree.kind(), MySyntaxKind::Expr | MySyntaxKind::Number));
// 12. Verify we can traverse the syntax tree
let children: Vec<_> = red_tree.children().collect();
assert!(!children.is_empty(), "Syntax tree should have children");
// 13. Verify metrics
assert!(result.metrics.nodes_created > 0, "Should create at least one node");
assert!(result.metrics.parse_time.as_nanos() > 0, "Parse time should be recorded");§Modules
Re-exports§
pub use error::LexerError;pub use error::ParseError;pub use error::ParseMetrics;pub use error::ParseResult;pub use grammar::Expr;pub use grammar::Grammar;pub use grammar::GrammarBuilder;pub use grammar::NonTerminal;pub use grammar::Rule;pub use grammar::Token as GrammarToken;pub use lexer::CompiledLexer;pub use lexer::LexerBuilder;pub use lexer::Token;pub use syntax::GreenNode;pub use syntax::GreenNodeBuilder;pub use syntax::GreenToken;pub use syntax::SyntaxElement;pub use syntax::SyntaxKind;pub use syntax::SyntaxNode;pub use syntax::SyntaxToken;pub use syntax::TextRange;pub use syntax::TextSize;pub use syntax::SyntaxVisitor;pub use syntax::SyntaxWalker;pub use syntax::QueryBuilder;pub use syntax::XPathQuery;pub use syntax::TreeDiff;pub use syntax::TreeStats;pub use syntax::ValidationResult;