oxilean_parse/lib.rs
1#![allow(unused_imports)]
2#![allow(dead_code)]
3
4//! # OxiLean Parser — Surface Syntax to AST
5//!
6//! This crate converts concrete OxiLean source syntax (text) into abstract syntax trees (ASTs)
7//! that the elaborator can process into kernel-checkable terms.
8//!
9//! ## Quick Start
10//!
11//! ### Parsing an Expression
12//!
13//! ```ignore
14//! use oxilean_parse::{Lexer, Parser, SurfaceExpr};
15//!
16//! let source = "fun (x : Nat) => x + 1";
17//! let lexer = Lexer::new(source);
18//! let tokens = lexer.tokenize()?;
19//! let mut parser = Parser::new(tokens);
20//! let expr = parser.parse_expr()?;
21//! ```
22//!
23//! ### Parsing a Module
24//!
25//! ```ignore
26//! use oxilean_parse::Module;
27//!
28//! let source = "def double (n : Nat) : Nat := n + n";
29//! let module = Module::parse_source(source)?;
30//! ```
31//!
32//! ## Architecture Overview
33//!
34//! The parser is a three-stage pipeline:
35//!
36//! ```text
37//! Source Text (.oxilean file)
38//! │
39//! ▼
40//! ┌──────────────────────┐
41//! │ Lexer │ → Tokenizes text
42//! │ (lexer.rs) │ → Handles UTF-8, comments, strings
43//! └──────────────────────┘
44//! │
45//! ▼
46//! Token Stream
47//! │
48//! ▼
49//! ┌──────────────────────┐
50//! │ Parser │ → Builds AST from tokens
51//! │ (parser_impl.rs) │ → Handles precedence, associativity
52//! │ + Helpers │ → Pattern, macro, tactic parsers
53//! └──────────────────────┘
54//! │
55//! ▼
56//! Abstract Syntax Tree (AST)
57//! │
58//! └─→ SurfaceExpr, SurfaceDecl, Module, etc.
59//! └─→ Diagnostic information (errors, warnings)
60//! └─→ Source mapping (for IDE integration)
61//! ```
62//!
63//! ## Key Concepts & Terminology
64//!
65//! ### Tokens
66//!
67//! Basic lexical elements:
68//! - **Identifiers**: `x`, `Nat`, `add_comm`, etc.
69//! - **Keywords**: `fun`, `def`, `theorem`, `inductive`, etc.
70//! - **Operators**: `+`, `-`, `:`, `=>`, `|`, etc.
71//! - **Literals**: Numbers, strings, characters
72//! - **Delimiters**: `(`, `)`, `[`, `]`, `{`, `}`
73//!
74//! ### Surface Syntax (SurfaceExpr)
75//!
76//! Represents OxiLean code before elaboration:
77//! - **Applications**: `f x y` (function calls)
78//! - **Lambda**: `fun x => body`
79//! - **Pi types**: `(x : A) -> B`
80//! - **Matches**: `match x with | nil => ... | cons h t => ...`
81//! - **Tactics**: `by (tac1; tac2)`
82//! - **Attributes**: `@[simp] def foo := ...`
83//!
84//! ### AST vs Kernel Expr
85//!
86//! - **AST (this crate)**: Surface syntax with implicit info
87//! - Contains `?` (implicit args), `_` (placeholders)
88//! - No type annotations required
89//! - Represents user-written code
90//! - **Kernel Expr (oxilean-kernel)**: Type-checked terms
91//! - All types explicit
92//! - All implicit args resolved
93//! - Fully elaborated
94//!
95//! ## Module Organization
96//!
97//! ### Core Parsing Modules
98//!
99//! | Module | Purpose |
100//! |--------|---------|
101//! | `lexer` | Tokenization: text → tokens |
102//! | `tokens` | Token and TokenKind definitions |
103//! | `parser_impl` | Main parser: tokens → AST |
104//! | `command` | Command parsing (`def`, `theorem`, etc.) |
105//!
106//! ### AST Definition
107//!
108//! | Module | Purpose |
109//! |--------|---------|
110//! | `ast_impl` | Core AST types: `SurfaceExpr`, `SurfaceDecl`, etc. |
111//! | `pattern` | Pattern matching syntax |
112//! | `literal` | Number and string literals |
113//!
114//! ### Specialized Parsers
115//!
116//! | Module | Purpose |
117//! |--------|---------|
118//! | `tactic_parser` | Tactic syntax: `intro`, `apply`, `rw`, etc. |
119//! | `macro_parser` | Macro definition and expansion |
120//! | `notation_system` | Operator precedence and associativity |
121//!
122//! ### Diagnostics & Source Mapping
123//!
124//! | Module | Purpose |
125//! |--------|---------|
126//! | `diagnostic` | Error and warning collection |
127//! | `error_impl` | Parse error types and messages |
128//! | `sourcemap` | Source position tracking for IDE |
129//! | `span_util` | Source span utilities |
130//!
131//! ### Utilities
132//!
133//! | Module | Purpose |
134//! |--------|---------|
135//! | `prettyprint` | AST pretty-printing |
136//! | `module` | Module system and imports |
137//! | `repl_parser` | REPL command parsing |
138//!
139//! ## Parsing Pipeline Details
140//!
141//! ### Phase 1: Lexical Analysis (Lexer)
142//!
143//! Transforms character stream into token stream:
144//! - Handles Unicode identifiers (UTF-8)
145//! - Recognizes keywords vs identifiers
146//! - Tracks line/column positions (for error reporting)
147//! - Supports:
148//! - Single-line comments: `-- comment`
149//! - Multi-line comments: `/- -/`
150//! - String literals: `"hello"`
151//! - Number literals: `42`, `0xFF`, `3.14`
152//!
153//! ### Phase 2: Syntactic Analysis (Parser)
154//!
155//! Transforms token stream into AST:
156//! - **Recursive descent**: For statements and declarations
157//! - **Pratt parsing**: For expressions (handles precedence)
158//! - **Lookahead(1)**: LL(1) grammar for predictive parsing
159//! - **Error recovery**: Continues parsing after errors
160//!
161//! ### Phase 3: Post-Processing
162//!
163//! - **Notation expansion**: Apply infix/prefix operators
164//! - **Macro expansion**: Expand syntax sugar
165//! - **Span assignment**: Map AST nodes to source positions
166//!
167//! ## Usage Examples
168//!
169//! ### Example 1: Parse and Pretty-Print
170//!
171//! ```text
172//! use oxilean_parse::{Lexer, Parser, print_expr};
173//!
174//! let source = "(x : Nat) -> Nat";
175//! let mut parser = Parser::from_source(source)?;
176//! let expr = parser.parse_expr()?;
177//! println!("{}", print_expr(&expr));
178//! ```
179//!
180//! ### Example 2: Parse a Definition
181//!
182//! ```text
183//! use oxilean_parse::{Lexer, Parser, Decl};
184//!
185//! let source = "def double (n : Nat) : Nat := n + n";
186//! let mut parser = Parser::from_source(source)?;
187//! let decl = parser.parse_decl()?;
188//! assert!(matches!(decl, Decl::Def { .. }));
189//! ```
190//!
191//! ### Example 3: Collect Diagnostics
192//!
193//! ```text
194//! use oxilean_parse::DiagnosticCollector;
195//!
196//! let mut collector = DiagnosticCollector::new();
197//! // ... parse code ...
198//! for diag in collector.diagnostics() {
199//! println!("{:?}", diag);
200//! }
201//! ```
202//!
203//! ## Operator Precedence
204//!
205//! Operators are organized by precedence levels (0-100):
206//! - **Level 100** (highest): Projections, applications
207//! - **Level 90**: Power/exponentiation
208//! - **Level 70**: Multiplication, division
209//! - **Level 65**: Addition, subtraction
210//! - **Level 50**: Comparison (`<`, `>`, `=`, etc.)
211//! - **Level 40**: Conjunction (`and`)
212//! - **Level 35**: Disjunction (`or`)
213//! - **Level 25**: Implication (`->`)
214//! - **Level 0** (lowest): Binders (`fun`, `:`, etc.)
215//!
216//! Associativity (left/right/non-associative) is per-operator.
217//!
218//! ## Error Handling
219//!
220//! Parser errors include:
221//! - **Unexpected token**: Parser expected a different token
222//! - **Expected `type` token**: Specific token was expected but not found
223//! - **Unclosed delimiter**: Missing closing bracket/paren
224//! - **Undeclared operator**: Unknown infix operator
225//! - **Invalid pattern**: Malformed pattern in match/fun
226//!
227//! All errors carry:
228//! - **Source location** (span): File, line, column
229//! - **Error message**: Human-readable description
230//! - **Context**: Surrounding code snippet (for IDE tooltips)
231//!
232//! ## Source Mapping & IDE Integration
233//!
234//! The parser builds a **source map** tracking:
235//! - AST node → source location
236//! - Hover information (for IDE hover tooltips)
237//! - Semantic tokens (for syntax highlighting)
238//! - Reference locations (for "go to definition")
239//!
240//! This enables:
241//! - Accurate error reporting
242//! - IDE language server protocol (LSP) support
243//! - Refactoring tools
244//!
245//! ## Extensibility
246//!
247//! ### Adding New Operators
248//!
249//! Operators are registered in `notation_system`:
250//! ```ignore
251//! let notation = Notation {
252//! name: "my_op",
253//! kind: NotationKind::Infix,
254//! level: 60,
255//! associativity: Associativity::Left,
256//! };
257//! notation_table.insert(notation);
258//! ```
259//!
260//! ### Adding New Keywords
261//!
262//! Keywords are hardcoded in `lexer::keyword_of_string()`.
263//! Add new keyword, then handle in parser.
264//!
265//! ### Custom Macros
266//!
267//! Macros are parsed by `macro_parser` and expanded during parsing:
268//! ```text
269//! syntax "list" ["[", expr, (",", expr)*, "]"] => ...
270//! macro list_to_cons : list => (...)
271//! ```
272//!
273//! ## Integration with Other Crates
274//!
275//! ### With oxilean-elab
276//!
277//! The elaborator consumes this crate's AST:
278//! ```text
279//! Parser: Source → SurfaceExpr
280//! Elaborator: SurfaceExpr → Kernel Expr (with type checking)
281//! ```
282//!
283//! ### With oxilean-kernel
284//!
285//! Kernel types (Name, Level, Literal) are re-exported by parser for convenience.
286//!
287//! ## Performance Considerations
288//!
289//! - **Linear parsing**: O(n) where n = source length
290//! - **Minimal allocations**: AST nodes are typically smaller than source
291//! - **Single pass**: No tokenization+parsing phase, done in parallel
292//!
293//! ## Further Reading
294//!
295//! - [ARCHITECTURE.md](../../ARCHITECTURE.md) — System architecture
296//! - [BLUEPRINT.md](../../BLUEPRINT.md) — Formal syntax specification
297//! - Module documentation for specific subcomponents
298
299#![allow(missing_docs)]
300#![warn(clippy::all)]
301#![allow(clippy::collapsible_if)]
302
303// Module stubs for future implementation
304pub mod ast;
305pub mod error;
306pub mod parser;
307pub mod token;
308
309// Full implementations
310pub mod ast_impl;
311pub mod command;
312pub mod diagnostic;
313pub mod error_impl;
314pub mod expr_cache;
315/// Advanced formatter with Wadler-Lindig optimal layout.
316pub mod formatter_adv;
317pub mod incremental;
318pub mod indent_tracker;
319pub mod lexer;
320pub mod macro_parser;
321pub mod module;
322pub mod notation_system;
323pub mod parser_impl;
324pub mod pattern;
325pub mod prettyprint;
326pub mod repl_parser;
327pub mod roundtrip;
328pub mod sourcemap;
329pub mod span_util;
330pub mod tactic_parser;
331pub mod tokens;
332pub mod wasm_source_map;
333
334pub use ast::SurfaceExpr as OldSurfaceExpr;
335pub use ast_impl::{
336 AstNotationKind, AttributeKind, Binder, BinderKind, CalcStep as AstCalcStep, Constructor, Decl,
337 DoAction, FieldDecl, Literal, Located, MatchArm, Pattern, SortKind, SurfaceExpr, TacticRef,
338 WhereClause,
339};
340pub use command::{Command, CommandParser, NotationKind, OpenItem};
341pub use diagnostic::{Diagnostic, DiagnosticCollector, DiagnosticLabel, Severity};
342pub use error::ParseError as OldParseError;
343pub use error_impl::{ParseError, ParseErrorKind};
344pub use lexer::Lexer;
345pub use macro_parser::{
346 HygieneInfo, MacroDef, MacroError, MacroErrorKind, MacroExpander, MacroParser, MacroRule,
347 MacroToken, SyntaxDef, SyntaxItem, SyntaxKind,
348};
349pub use module::{Module, ModuleRegistry};
350pub use notation_system::{
351 Fixity, NotationEntry, NotationKind as NotationSystemKind, NotationPart, NotationTable,
352 OperatorEntry,
353};
354pub use parser_impl::Parser;
355pub use pattern::{MatchClause, PatternCompiler};
356pub use prettyprint::{print_decl, print_expr, ParensMode, PrettyConfig, PrettyPrinter};
357pub use repl_parser::{is_complete, ReplCommand, ReplParser};
358pub use sourcemap::{
359 EntryKind, HoverInfo, SemanticToken, SourceEntry, SourceMap, SourceMapBuilder,
360};
361pub use span_util::{dummy_span, merge_spans, span_contains, span_len};
362pub use tactic_parser::{
363 CalcStep, CaseArm, ConvSide, RewriteRule, SimpArgs, TacticExpr, TacticParser,
364};
365pub use tokens::{Span, StringPart, Token, TokenKind};
366
367pub mod core_types;
368pub use core_types::*;
369
370pub mod error_recovery;
371pub mod hygienic_macro;
372pub mod module_system;
373pub mod notation_elaboration;
374pub mod source_map;