Skip to main content

rigsql_core/
segment.rs

1use strum::{Display, EnumString};
2
3use crate::{Span, Token};
4
5/// A node in the Concrete Syntax Tree.
6///
7/// Leaf nodes wrap individual tokens. Branch nodes group children
8/// under a named production (e.g. `SelectStatement`, `WhereClause`).
9#[derive(Debug, Clone)]
10pub enum Segment {
11    Token(TokenSegment),
12    Node(NodeSegment),
13}
14
15impl Segment {
16    pub fn span(&self) -> Span {
17        match self {
18            Segment::Token(t) => t.token.span,
19            Segment::Node(n) => n.span,
20        }
21    }
22
23    pub fn segment_type(&self) -> SegmentType {
24        match self {
25            Segment::Token(t) => t.segment_type,
26            Segment::Node(n) => n.segment_type,
27        }
28    }
29
30    /// Recursively collect all leaf tokens in order.
31    pub fn tokens(&self) -> Vec<&Token> {
32        match self {
33            Segment::Token(t) => vec![&t.token],
34            Segment::Node(n) => n.children.iter().flat_map(|c| c.tokens()).collect(),
35        }
36    }
37
38    /// Iterator over direct children (empty for token segments).
39    pub fn children(&self) -> &[Segment] {
40        match self {
41            Segment::Token(_) => &[],
42            Segment::Node(n) => &n.children,
43        }
44    }
45
46    /// Recursively visit all segments depth-first.
47    pub fn walk(&self, visitor: &mut dyn FnMut(&Segment)) {
48        visitor(self);
49        if let Segment::Node(n) = self {
50            for child in &n.children {
51                child.walk(visitor);
52            }
53        }
54    }
55
56    /// Reconstruct source text from leaf tokens.
57    pub fn raw(&self) -> String {
58        self.tokens().iter().map(|t| t.text.as_str()).collect()
59    }
60}
61
62/// A leaf segment wrapping a single token.
63#[derive(Debug, Clone)]
64pub struct TokenSegment {
65    pub token: Token,
66    pub segment_type: SegmentType,
67}
68
69/// A branch segment grouping children under a named production.
70#[derive(Debug, Clone)]
71pub struct NodeSegment {
72    pub segment_type: SegmentType,
73    pub children: Vec<Segment>,
74    pub span: Span,
75}
76
77impl NodeSegment {
78    /// Create a new node from children, computing span automatically.
79    pub fn new(segment_type: SegmentType, children: Vec<Segment>) -> Self {
80        let span = if children.is_empty() {
81            Span::new(0, 0)
82        } else {
83            let first = children.first().unwrap().span();
84            let last = children.last().unwrap().span();
85            first.merge(last)
86        };
87        Self {
88            segment_type,
89            children,
90            span,
91        }
92    }
93}
94
95/// Type tag for CST segments.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString)]
97pub enum SegmentType {
98    // Top-level
99    File,
100    Statement,
101
102    // DML Statements
103    SelectStatement,
104    InsertStatement,
105    UpdateStatement,
106    DeleteStatement,
107
108    // DDL Statements
109    CreateTableStatement,
110    AlterTableStatement,
111    DropStatement,
112
113    // TSQL Statements
114    DeclareStatement,
115    SetVariableStatement,
116    IfStatement,
117    BeginEndBlock,
118    WhileStatement,
119    TryCatchBlock,
120    ExecStatement,
121    ReturnStatement,
122    PrintStatement,
123    ThrowStatement,
124    RaiserrorStatement,
125    GoStatement,
126
127    // Clauses
128    SelectClause,
129    FromClause,
130    WhereClause,
131    GroupByClause,
132    HavingClause,
133    OrderByClause,
134    LimitClause,
135    OffsetClause,
136    JoinClause,
137    OnClause,
138    UsingClause,
139    SetClause,
140    ValuesClause,
141    ReturningClause,
142    WithClause,
143    CteDefinition,
144    InsertColumnsClause,
145
146    // Expressions
147    ColumnRef,
148    TableRef,
149    FunctionCall,
150    FunctionArgs,
151    Expression,
152    BinaryExpression,
153    UnaryExpression,
154    ParenExpression,
155    CaseExpression,
156    WhenClause,
157    ElseClause,
158    Subquery,
159    ExistsExpression,
160    InExpression,
161    BetweenExpression,
162    CastExpression,
163    IsNullExpression,
164    LikeExpression,
165
166    // Window functions
167    WindowExpression,
168    OverClause,
169    PartitionByClause,
170    WindowFrameClause,
171
172    // Alias
173    AliasExpression,
174
175    // Column / Table definition
176    ColumnDefinition,
177    DataType,
178    ColumnConstraint,
179    TableConstraint,
180
181    // Order
182    OrderByExpression,
183    SortOrder,
184
185    // Atoms (leaf-level semantic types)
186    Keyword,
187    Identifier,
188    QualifiedIdentifier,
189    QuotedIdentifier,
190    Literal,
191    NumericLiteral,
192    StringLiteral,
193    BooleanLiteral,
194    NullLiteral,
195    Operator,
196    ComparisonOperator,
197    ArithmeticOperator,
198    Comma,
199    Dot,
200    Semicolon,
201    Star,
202    LParen,
203    RParen,
204
205    // Trivia
206    Whitespace,
207    Newline,
208    LineComment,
209    BlockComment,
210
211    // Fallback
212    Unparsable,
213}
214
215impl SegmentType {
216    /// Returns true if this is a trivia type (whitespace/comment).
217    pub fn is_trivia(self) -> bool {
218        matches!(
219            self,
220            SegmentType::Whitespace
221                | SegmentType::Newline
222                | SegmentType::LineComment
223                | SegmentType::BlockComment
224        )
225    }
226}