Skip to main content

perl_ast_v2/
lib.rs

1//! Enhanced AST with full position tracking for incremental parsing
2//!
3//! This module provides an updated AST that uses Range instead of SourceLocation
4//! to support incremental parsing and better error reporting.
5
6use perl_position_tracking::Range;
7
8/// A unique identifier for AST nodes to support incremental parsing.
9pub type NodeId = usize;
10
11/// Index into the diagnostics array in `ParseOutput`.
12///
13/// This type enables lightweight error nodes that reference diagnostics
14/// stored separately from the AST, reducing memory overhead and decoupling
15/// tree structure from human-readable messages.
16pub type DiagnosticId = u32;
17
18/// Kinds of missing syntax elements for error recovery.
19///
20/// This enum provides specific information about what was expected
21/// but not found, enabling better IDE diagnostics and recovery.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum MissingKind {
24    /// Missing expression (e.g., after `=` in assignment)
25    Expression,
26    /// Missing statement
27    Statement,
28    /// Missing identifier/name
29    Identifier,
30    /// Missing block `{ ... }`
31    Block,
32    /// Missing closing delimiter
33    ClosingDelimiter(char),
34    /// Missing semicolon
35    Semicolon,
36    /// Missing condition (e.g., in `if`)
37    Condition,
38    /// Missing argument
39    Argument,
40    /// Missing operator
41    Operator,
42}
43
44/// Enhanced AST node with full position tracking
45#[derive(Debug, Clone, PartialEq)]
46pub struct Node {
47    /// Unique identifier for this node
48    pub id: NodeId,
49    /// The kind of syntax node
50    pub kind: NodeKind,
51    /// Source range with line/column information
52    pub range: Range,
53}
54
55impl Node {
56    /// Create a new AST node
57    pub fn new(id: NodeId, kind: NodeKind, range: Range) -> Self {
58        Node { id, kind, range }
59    }
60
61    /// Convert to tree-sitter compatible S-expression
62    pub fn to_sexp(&self) -> String {
63        // Delegate to existing implementation
64        self.kind.to_sexp()
65    }
66}
67
68/// The kinds of AST nodes used by the parser.
69///
70/// Each variant represents a specific syntactic construct in the Perl source
71/// and carries the child nodes or data needed to represent that construct.
72#[derive(Debug, Clone, PartialEq)]
73pub enum NodeKind {
74    // Program structure
75    /// Top-level program containing a list of statements.
76    Program {
77        /// Statements contained by the program/root node.
78        statements: Vec<Node>,
79    },
80    /// Block node containing a list of statements.
81    Block {
82        /// Statements inside a block.
83        statements: Vec<Node>,
84    },
85
86    // Declarations
87    /// A single variable declaration (`my`, `our`, `local`, `state`, ...).
88    VariableDeclaration {
89        /// The declarator keyword (e.g. `my`, `our`).
90        declarator: String, // my, our, local, state
91        /// The variable node being declared.
92        variable: Box<Node>,
93        /// Any attributes attached to the declaration.
94        attributes: Vec<String>,
95        /// Optional initializer expression.
96        initializer: Option<Box<Node>>,
97    },
98
99    /// A list-style variable declaration (e.g. `my ($a, $b) = ...`).
100    VariableListDeclaration {
101        /// The declarator keyword.
102        declarator: String,
103        /// Variables declared in the list.
104        variables: Vec<Node>,
105        /// Any attributes attached to the declaration.
106        attributes: Vec<String>,
107        /// Optional initializer for the list.
108        initializer: Option<Box<Node>>,
109    },
110
111    // Variables
112    /// A variable usage with sigil and name (e.g. `$foo`, `@arr`).
113    Variable {
114        /// The sigil character (e.g. `$`, `@`, `%`).
115        sigil: String, // $, @, %, *
116        /// The identifier/name of the variable.
117        name: String,
118    },
119
120    // Error recovery nodes
121    /// An error/recovery node produced during parsing (legacy, rich payload).
122    ///
123    /// This variant embeds the error information directly in the AST node.
124    /// For new code, prefer `ErrorRef` which stores only a diagnostic index.
125    Error {
126        /// Human readable error message.
127        message: String,
128        /// Tokens or node kinds that were expected at this location.
129        expected: Vec<String>,
130        /// Optional partially parsed node for recovery contexts.
131        partial: Option<Box<Node>>,
132    },
133
134    /// Lightweight error node referencing a diagnostic by index.
135    ///
136    /// This is the preferred error representation for memory efficiency.
137    /// The actual diagnostic information is stored in `ParseOutput.diagnostics`
138    /// and can be looked up by the `diag_id`.
139    ///
140    /// # Example
141    ///
142    /// ```ignore
143    /// let output = parser.parse_with_recovery();
144    /// for node in output.ast.walk() {
145    ///     if let NodeKind::ErrorRef { diag_id } = &node.kind {
146    ///         let diagnostic = &output.diagnostics[*diag_id as usize];
147    ///         println!("Error at {:?}: {}", node.range, diagnostic);
148    ///     }
149    /// }
150    /// ```
151    ErrorRef {
152        /// Index into the diagnostics array in `ParseOutput`.
153        diag_id: DiagnosticId,
154    },
155
156    /// Placeholder for a missing expression during error recovery.
157    MissingExpression,
158    /// Placeholder for a missing statement during error recovery.
159    MissingStatement,
160    /// Placeholder for a missing identifier during error recovery.
161    MissingIdentifier,
162    /// Placeholder for a missing block during error recovery.
163    MissingBlock,
164
165    /// Specific kind of missing syntax element.
166    ///
167    /// This provides more granular information about what's missing
168    /// without embedding full diagnostic details in the AST.
169    Missing(MissingKind),
170
171    // Include all other variants from original AST...
172    // (Abbreviated for example - would include all original variants)
173
174    // Expressions
175    /// A binary expression (e.g. `a + b`).
176    Binary {
177        /// The operator token as text.
178        op: String,
179        /// Left-hand side expression.
180        left: Box<Node>,
181        /// Right-hand side expression.
182        right: Box<Node>,
183    },
184
185    /// A unary expression (e.g. `-x`, `!flag`).
186    Unary {
187        /// The operator token.
188        op: String,
189        /// The operand expression.
190        operand: Box<Node>,
191    },
192
193    // Control flow
194    /// An `if` control-flow construct, including `elsif` and `else` branches.
195    If {
196        /// The conditional expression.
197        condition: Box<Node>,
198        /// The then-branch block node.
199        then_branch: Box<Node>,
200        /// Zero or more `elsif` branches represented as (condition, block).
201        elsif_branches: Vec<(Node, Node)>,
202        /// Optional else branch.
203        else_branch: Option<Box<Node>>,
204    },
205
206    // Literals
207    /// Numeric literal node.
208    Number {
209        /// The literal text of the number.
210        value: String,
211    },
212    /// String literal node; may be interpolated.
213    String {
214        /// The string contents.
215        value: String,
216        /// Whether the string contains interpolation.
217        interpolated: bool,
218    },
219    /// An identifier token.
220    Identifier {
221        /// The identifier text.
222        name: String,
223    },
224    // Other essential variants...
225}
226
227impl NodeKind {
228    /// Convert to S-expression format
229    pub fn to_sexp(&self) -> String {
230        use NodeKind::*;
231
232        match self {
233            Program { statements } => {
234                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
235                format!("(source_file {})", stmts)
236            }
237
238            Block { statements } => {
239                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
240                format!("(block {})", stmts)
241            }
242
243            Variable { sigil, name } => {
244                format!("(variable {} {})", sigil, name)
245            }
246
247            Number { value } => format!("(number {})", value),
248
249            String { value, interpolated } => {
250                if *interpolated {
251                    format!("(string_interpolated {:?})", value)
252                } else {
253                    format!("(string {:?})", value)
254                }
255            }
256
257            Binary { op, left, right } => {
258                format!("(binary_{} {} {})", op, left.to_sexp(), right.to_sexp())
259            }
260
261            Error { message, .. } => format!("(ERROR {})", message),
262            ErrorRef { diag_id } => format!("(ERROR_REF #{})", diag_id),
263
264            MissingExpression => "(MISSING_EXPRESSION)".to_string(),
265            MissingStatement => "(MISSING_STATEMENT)".to_string(),
266            MissingIdentifier => "(MISSING_IDENTIFIER)".to_string(),
267            MissingBlock => "(MISSING_BLOCK)".to_string(),
268            Missing(kind) => format!("(MISSING {:?})", kind),
269
270            // Add other variants...
271            _ => format!("({:?})", self),
272        }
273    }
274}
275
276/// Generator for producing unique `NodeId` values used across the AST.
277///
278/// This utility ensures each constructed `Node` receives a distinct identifier
279/// which is useful for incremental parsing, diffing and node references.
280pub struct NodeIdGenerator {
281    /// The next identifier to hand out.
282    next_id: NodeId,
283}
284
285impl NodeIdGenerator {
286    /// Create a new `NodeIdGenerator` starting at zero.
287    pub fn new() -> Self {
288        NodeIdGenerator { next_id: 0 }
289    }
290
291    /// Return the next unique `NodeId` and advance the generator.
292    pub fn next_id(&mut self) -> NodeId {
293        let id = self.next_id;
294        self.next_id += 1;
295        id
296    }
297}
298
299impl Default for NodeIdGenerator {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308    use perl_position_tracking::{Position, Range};
309
310    #[test]
311    fn test_node_creation() {
312        let mut id_gen = NodeIdGenerator::new();
313        let range = Range::new(Position::new(0, 1, 1), Position::new(5, 1, 6));
314
315        let node = Node::new(id_gen.next_id(), NodeKind::Number { value: "42".to_string() }, range);
316
317        assert_eq!(node.id, 0);
318        assert_eq!(node.to_sexp(), "(number 42)");
319    }
320
321    #[test]
322    fn test_error_nodes() {
323        let mut id_gen = NodeIdGenerator::new();
324        let range = Range::new(Position::new(0, 1, 1), Position::new(0, 1, 1));
325
326        let error = Node::new(
327            id_gen.next_id(),
328            NodeKind::Error {
329                message: "Unexpected token".to_string(),
330                expected: vec!["identifier".to_string()],
331                partial: None,
332            },
333            range,
334        );
335
336        assert_eq!(error.to_sexp(), "(ERROR Unexpected token)");
337    }
338}