#[allow(unused_imports)]
use super::ast_impl::{Decl, Located};
#[allow(unused_imports)]
use super::prettyprint::{print_decl, print_expr};
#[allow(unused_imports)]
use super::tokens::{Token, TokenKind};
#[allow(unused_imports)]
use super::{Lexer, Parser};
#[allow(missing_docs)]
pub const PARSER_VERSION: &str = "0.1.2";
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct ParserConfig {
pub max_depth: usize,
#[allow(missing_docs)]
pub unicode_ops: bool,
pub suggestions: bool,
#[allow(missing_docs)]
pub error_recovery: bool,
pub allow_commands: bool,
}
impl Default for ParserConfig {
fn default() -> Self {
ParserConfig {
max_depth: 512,
unicode_ops: true,
suggestions: true,
error_recovery: false,
allow_commands: true,
}
}
}
impl ParserConfig {
#[allow(missing_docs)]
pub fn new() -> Self {
Self::default()
}
#[allow(missing_docs)]
pub fn with_max_depth(mut self, depth: usize) -> Self {
self.max_depth = depth;
self
}
#[allow(missing_docs)]
pub fn with_unicode_ops(mut self, enabled: bool) -> Self {
self.unicode_ops = enabled;
self
}
#[allow(missing_docs)]
pub fn with_error_recovery(mut self, enabled: bool) -> Self {
self.error_recovery = enabled;
self
}
#[allow(missing_docs)]
pub fn strict() -> Self {
ParserConfig {
max_depth: 256,
unicode_ops: false,
suggestions: false,
error_recovery: false,
allow_commands: false,
}
}
}
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct SourceFile {
pub path: String,
pub source: String,
#[allow(missing_docs)]
pub byte_len: usize,
}
impl SourceFile {
#[allow(missing_docs)]
pub fn new(path: impl Into<String>, source: impl Into<String>) -> Self {
let source = source.into();
let byte_len = source.len();
SourceFile {
path: path.into(),
source,
byte_len,
}
}
#[allow(missing_docs)]
pub fn virtual_(source: impl Into<String>) -> Self {
Self::new("<virtual>", source)
}
#[allow(missing_docs)]
pub fn line_count(&self) -> usize {
self.source.lines().count()
}
#[allow(missing_docs)]
pub fn is_empty(&self) -> bool {
self.source.is_empty()
}
#[allow(missing_docs)]
pub fn line(&self, idx: usize) -> Option<&str> {
self.source.lines().nth(idx)
}
}
#[allow(missing_docs)]
pub type ParseResult<T> = Result<T, String>;
#[allow(missing_docs)]
pub fn parse_expr_str(source: &str) -> ParseResult<String> {
if source.trim().is_empty() {
return Err("empty source".to_string());
}
let tokens = Lexer::new(source).tokenize();
let mut parser = Parser::new(tokens);
let expr = parser
.parse_expr()
.map_err(|e| format!("parse error: {e}"))?;
Ok(print_expr(&expr.value))
}
#[allow(missing_docs)]
pub fn parse_decl_str(source: &str) -> ParseResult<String> {
if source.trim().is_empty() {
return Err("empty declaration source".to_string());
}
let tokens = Lexer::new(source).tokenize();
let mut parser = Parser::new(tokens);
let decl = parser
.parse_decl()
.map_err(|e| format!("parse error: {e}"))?;
Ok(print_decl(&decl.value))
}
#[allow(missing_docs)]
pub fn parse_source_file(source: &str, mut errors: Option<&mut Vec<String>>) -> ParseResult<usize> {
if source.trim().is_empty() {
return Ok(0);
}
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize();
let mut parser = Parser::new(tokens);
let mut count = 0usize;
while !parser.is_eof() {
match parser.parse_decl() {
Ok(_) => count += 1,
Err(e) => {
if let Some(ref mut errs) = errors {
errs.push(e.to_string());
}
parser.advance();
}
}
}
Ok(count)
}
#[allow(missing_docs)]
pub fn parse_file(source: &str) -> ParseResult<Vec<Located<Decl>>> {
if source.trim().is_empty() {
return Ok(Vec::new());
}
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize();
let mut parser = Parser::new(tokens);
let mut decls = Vec::new();
while !parser.is_eof() {
match parser.parse_decl() {
Ok(decl) => decls.push(decl),
Err(e) => return Err(e.to_string()),
}
}
Ok(decls)
}
pub mod token_utils {
#[allow(missing_docs)]
pub fn is_valid_ident(s: &str) -> bool {
let mut chars = s.chars();
match chars.next() {
Some(c) if c.is_alphabetic() || c == '_' => {
chars.all(|c| c.is_alphanumeric() || c == '_' || c == '\'')
}
_ => false,
}
}
#[allow(missing_docs)]
pub fn is_keyword(s: &str) -> bool {
super::keywords::ALL_KEYWORDS.contains(&s)
}
#[allow(missing_docs)]
pub fn escape_string(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\t', "\\t")
}
#[allow(missing_docs)]
pub fn char_count(s: &str) -> usize {
s.chars().count()
}
}
pub mod prec {
#[allow(missing_docs)]
pub const ARROW: u32 = 25;
pub const AND: u32 = 35;
pub const OR: u32 = 30;
#[allow(missing_docs)]
pub const IFF: u32 = 20;
pub const CMP: u32 = 50;
pub const ADD: u32 = 65;
#[allow(missing_docs)]
pub const MUL: u32 = 70;
pub const UNARY: u32 = 75;
pub const APP: u32 = 1024;
}
pub mod keywords {
#[allow(missing_docs)]
pub const ALL_KEYWORDS: &[&str] = &[
"def",
"theorem",
"lemma",
"axiom",
"opaque",
"abbrev",
"fun",
"forall",
"exists",
"let",
"in",
"have",
"show",
"if",
"then",
"else",
"match",
"with",
"return",
"structure",
"class",
"instance",
"where",
"namespace",
"section",
"end",
"open",
"import",
"by",
"do",
"pure",
"Prop",
"Type",
"Sort",
"true",
"false",
];
#[allow(missing_docs)]
pub const TACTIC_KEYWORDS: &[&str] = &[
"intro",
"intros",
"exact",
"apply",
"rw",
"rewrite",
"simp",
"ring",
"linarith",
"omega",
"norm_num",
"cases",
"induction",
"constructor",
"left",
"right",
"split",
"have",
"show",
"exact",
"assumption",
"contradiction",
"trivial",
"rfl",
"refl",
"push_neg",
"by_contra",
"by_contradiction",
"contrapose",
"exfalso",
"use",
"exists",
"obtain",
"clear",
"rename",
"revert",
"repeat",
"first",
"try",
"all_goals",
"calc",
"conv",
"norm_cast",
"push_cast",
];
#[allow(missing_docs)]
pub const BUILTIN_TYPES: &[&str] = &[
"Nat", "Int", "Float", "Bool", "Char", "String", "List", "Array", "Option", "Result",
"Prod", "Sum", "Sigma", "Subtype", "And", "Or", "Not", "Iff", "Eq", "Ne", "HEq", "True",
"False", "Empty", "Unit", "Fin", "Prop", "Type",
];
#[allow(missing_docs)]
pub fn is_tactic_keyword(s: &str) -> bool {
TACTIC_KEYWORDS.contains(&s)
}
#[allow(missing_docs)]
pub fn is_builtin_type(s: &str) -> bool {
BUILTIN_TYPES.contains(&s)
}
}
#[cfg(test)]
mod extended_parse_tests {
use super::*;
#[test]
fn test_parser_config_default() {
let cfg = ParserConfig::default();
assert_eq!(cfg.max_depth, 512);
assert!(cfg.unicode_ops);
assert!(cfg.suggestions);
assert!(!cfg.error_recovery);
}
#[test]
fn test_parser_config_builder() {
let cfg = ParserConfig::new()
.with_max_depth(100)
.with_unicode_ops(false)
.with_error_recovery(true);
assert_eq!(cfg.max_depth, 100);
assert!(!cfg.unicode_ops);
assert!(cfg.error_recovery);
}
#[test]
fn test_parser_config_strict() {
let cfg = ParserConfig::strict();
assert!(!cfg.unicode_ops);
assert!(!cfg.error_recovery);
assert!(!cfg.suggestions);
}
#[test]
fn test_source_file_line_count() {
let sf = SourceFile::new("test.lean", "line1\nline2\nline3");
assert_eq!(sf.line_count(), 3);
}
#[test]
fn test_source_file_is_empty() {
let sf = SourceFile::virtual_("");
assert!(sf.is_empty());
}
#[test]
fn test_source_file_line() {
let sf = SourceFile::new("f.lean", "alpha\nbeta\ngamma");
assert_eq!(sf.line(1), Some("beta"));
assert_eq!(sf.line(99), None);
}
#[test]
fn test_source_file_byte_len() {
let sf = SourceFile::virtual_("hello");
assert_eq!(sf.byte_len, 5);
}
#[test]
fn test_parse_expr_str_ok() {
let result = parse_expr_str("1 + 2");
assert!(result.is_ok());
}
#[test]
fn test_parse_expr_str_empty() {
let result = parse_expr_str("");
assert!(result.is_err());
}
#[test]
fn test_parse_decl_str_ok() {
let result = parse_decl_str("def foo := 42");
assert!(result.is_ok());
}
#[test]
fn test_is_valid_ident_ok() {
assert!(token_utils::is_valid_ident("foo"));
assert!(token_utils::is_valid_ident("Bar_baz"));
assert!(token_utils::is_valid_ident("x'"));
}
#[test]
fn test_is_valid_ident_fail() {
assert!(!token_utils::is_valid_ident("123"));
assert!(!token_utils::is_valid_ident(""));
assert!(!token_utils::is_valid_ident("a b"));
}
#[test]
fn test_is_keyword() {
assert!(token_utils::is_keyword("def"));
assert!(token_utils::is_keyword("theorem"));
assert!(!token_utils::is_keyword("myFunc"));
}
#[test]
fn test_escape_string() {
let s = token_utils::escape_string("hello\nworld");
assert!(s.contains("\\n"));
assert!(!s.contains('\n'));
}
#[test]
fn test_prec_ordering() {
let (app, mul, add, cmp, and, or, iff, arrow) = (
prec::APP,
prec::MUL,
prec::ADD,
prec::CMP,
prec::AND,
prec::OR,
prec::IFF,
prec::ARROW,
);
assert!(app > mul);
assert!(mul > add);
assert!(add > cmp);
assert!(cmp > and);
assert!(and > or);
assert!(or > iff);
assert!(arrow > iff);
}
#[test]
fn test_is_tactic_keyword() {
assert!(keywords::is_tactic_keyword("intro"));
assert!(keywords::is_tactic_keyword("simp"));
assert!(!keywords::is_tactic_keyword("def"));
}
#[test]
fn test_is_builtin_type() {
assert!(keywords::is_builtin_type("Nat"));
assert!(keywords::is_builtin_type("Bool"));
assert!(!keywords::is_builtin_type("MyType"));
}
#[test]
fn test_parser_version_nonempty() {
assert!(!PARSER_VERSION.is_empty());
}
#[test]
fn test_char_count_unicode() {
let s = "αβγ";
assert_eq!(token_utils::char_count(s), 3);
assert!(s.len() > 3); }
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct Annotated<T> {
pub value: T,
pub start: usize,
#[allow(missing_docs)]
pub end: usize,
}
impl<T> Annotated<T> {
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn new(value: T, start: usize, end: usize) -> Self {
Annotated { value, start, end }
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Annotated<U> {
Annotated {
value: f(self.value),
start: self.start,
end: self.end,
}
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn span_len(&self) -> usize {
self.end.saturating_sub(self.start)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct ParseWarning {
pub message: String,
pub start: usize,
#[allow(missing_docs)]
pub end: usize,
}
impl ParseWarning {
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn new(message: impl Into<String>, start: usize, end: usize) -> Self {
ParseWarning {
message: message.into(),
start,
end,
}
}
}
#[allow(dead_code)]
#[derive(Debug, Clone, Default)]
#[allow(missing_docs)]
pub struct ParseDiagnostics {
pub warnings: Vec<ParseWarning>,
pub errors: Vec<String>,
}
impl ParseDiagnostics {
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn new() -> Self {
Self::default()
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn warn(&mut self, msg: impl Into<String>, start: usize, end: usize) {
self.warnings.push(ParseWarning::new(msg, start, end));
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn error(&mut self, msg: impl Into<String>) {
self.errors.push(msg.into());
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn is_ok(&self) -> bool {
self.errors.is_empty()
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn total(&self) -> usize {
self.warnings.len() + self.errors.len()
}
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn normalize_whitespace(src: &str) -> String {
src.split_whitespace().collect::<Vec<_>>().join(" ")
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn count_keyword_occurrences(src: &str, keyword: &str) -> usize {
src.split_whitespace().filter(|w| *w == keyword).count()
}
#[allow(dead_code)]
#[allow(missing_docs)]
pub fn rough_ident_set(src: &str) -> std::collections::HashSet<String> {
src.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '\'')
.filter(|s| !s.is_empty() && token_utils::is_valid_ident(s))
.map(|s| s.to_string())
.collect()
}
#[cfg(test)]
mod extra_parse_tests {
use super::*;
#[test]
fn test_annotated_new_and_span_len() {
let a = Annotated::new("hello", 0, 5);
assert_eq!(a.span_len(), 5);
}
#[test]
fn test_annotated_map() {
let a = Annotated::new(42u32, 10, 12);
let b = a.map(|v| v * 2);
assert_eq!(b.value, 84);
assert_eq!(b.start, 10);
}
#[test]
fn test_parse_warning_new() {
let w = ParseWarning::new("unused variable", 0, 5);
assert!(w.message.contains("unused"));
}
#[test]
fn test_parse_diagnostics_ok() {
let mut d = ParseDiagnostics::new();
d.warn("something odd", 0, 3);
assert!(d.is_ok()); d.error("bad token");
assert!(!d.is_ok());
}
#[test]
fn test_parse_diagnostics_total() {
let mut d = ParseDiagnostics::new();
d.warn("w1", 0, 1);
d.error("e1");
assert_eq!(d.total(), 2);
}
#[test]
fn test_normalize_whitespace() {
let result = normalize_whitespace(" hello world ");
assert_eq!(result, "hello world");
}
#[test]
fn test_count_keyword_occurrences() {
let src = "def foo def bar theorem baz";
assert_eq!(count_keyword_occurrences(src, "def"), 2);
assert_eq!(count_keyword_occurrences(src, "theorem"), 1);
}
#[test]
fn test_rough_ident_set() {
let src = "def foo (x : Nat) := x + x";
let ids = rough_ident_set(src);
assert!(ids.contains("foo"));
assert!(ids.contains("x"));
assert!(ids.contains("Nat"));
}
}