Skip to main content

rigsql_core/
segment.rs

1use strum::{Display, EnumString};
2
3use crate::{Span, Token};
4
5/// A node in the Concrete Syntax Tree.
6///
7/// Leaf nodes wrap individual tokens. Branch nodes group children
8/// under a named production (e.g. `SelectStatement`, `WhereClause`).
9#[derive(Debug, Clone)]
10pub enum Segment {
11    Token(TokenSegment),
12    Node(NodeSegment),
13}
14
15impl Segment {
16    pub fn span(&self) -> Span {
17        match self {
18            Segment::Token(t) => t.token.span,
19            Segment::Node(n) => n.span,
20        }
21    }
22
23    pub fn segment_type(&self) -> SegmentType {
24        match self {
25            Segment::Token(t) => t.segment_type,
26            Segment::Node(n) => n.segment_type,
27        }
28    }
29
30    /// Recursively collect all leaf tokens in order.
31    pub fn tokens(&self) -> Vec<&Token> {
32        match self {
33            Segment::Token(t) => vec![&t.token],
34            Segment::Node(n) => n.children.iter().flat_map(|c| c.tokens()).collect(),
35        }
36    }
37
38    /// Iterator over direct children (empty for token segments).
39    pub fn children(&self) -> &[Segment] {
40        match self {
41            Segment::Token(_) => &[],
42            Segment::Node(n) => &n.children,
43        }
44    }
45
46    /// Recursively visit all segments depth-first.
47    pub fn walk(&self, visitor: &mut dyn FnMut(&Segment)) {
48        visitor(self);
49        if let Segment::Node(n) = self {
50            for child in &n.children {
51                child.walk(visitor);
52            }
53        }
54    }
55
56    /// Reconstruct source text from leaf tokens.
57    pub fn raw(&self) -> String {
58        self.tokens().iter().map(|t| t.text.as_str()).collect()
59    }
60}
61
62/// A leaf segment wrapping a single token.
63#[derive(Debug, Clone)]
64pub struct TokenSegment {
65    pub token: Token,
66    pub segment_type: SegmentType,
67}
68
69/// A branch segment grouping children under a named production.
70#[derive(Debug, Clone)]
71pub struct NodeSegment {
72    pub segment_type: SegmentType,
73    pub children: Vec<Segment>,
74    pub span: Span,
75}
76
77impl NodeSegment {
78    /// Create a new node from children, computing span automatically.
79    pub fn new(segment_type: SegmentType, children: Vec<Segment>) -> Self {
80        let span = if children.is_empty() {
81            Span::new(0, 0)
82        } else {
83            let first = children.first().unwrap().span();
84            let last = children.last().unwrap().span();
85            first.merge(last)
86        };
87        Self {
88            segment_type,
89            children,
90            span,
91        }
92    }
93}
94
95/// Type tag for CST segments.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString)]
97pub enum SegmentType {
98    // Top-level
99    File,
100    Statement,
101
102    // DML Statements
103    SelectStatement,
104    InsertStatement,
105    UpdateStatement,
106    DeleteStatement,
107
108    // DDL Statements
109    CreateTableStatement,
110    AlterTableStatement,
111    DropStatement,
112
113    // Clauses
114    SelectClause,
115    FromClause,
116    WhereClause,
117    GroupByClause,
118    HavingClause,
119    OrderByClause,
120    LimitClause,
121    OffsetClause,
122    JoinClause,
123    OnClause,
124    UsingClause,
125    SetClause,
126    ValuesClause,
127    ReturningClause,
128    WithClause,
129    CteDefinition,
130    InsertColumnsClause,
131
132    // Expressions
133    ColumnRef,
134    TableRef,
135    FunctionCall,
136    FunctionArgs,
137    Expression,
138    BinaryExpression,
139    UnaryExpression,
140    ParenExpression,
141    CaseExpression,
142    WhenClause,
143    ElseClause,
144    Subquery,
145    ExistsExpression,
146    InExpression,
147    BetweenExpression,
148    CastExpression,
149    IsNullExpression,
150    LikeExpression,
151
152    // Window functions
153    WindowExpression,
154    OverClause,
155    PartitionByClause,
156    WindowFrameClause,
157
158    // Alias
159    AliasExpression,
160
161    // Column / Table definition
162    ColumnDefinition,
163    DataType,
164    ColumnConstraint,
165    TableConstraint,
166
167    // Order
168    OrderByExpression,
169    SortOrder,
170
171    // Atoms (leaf-level semantic types)
172    Keyword,
173    Identifier,
174    QualifiedIdentifier,
175    QuotedIdentifier,
176    Literal,
177    NumericLiteral,
178    StringLiteral,
179    BooleanLiteral,
180    NullLiteral,
181    Operator,
182    ComparisonOperator,
183    ArithmeticOperator,
184    Comma,
185    Dot,
186    Semicolon,
187    Star,
188    LParen,
189    RParen,
190
191    // Trivia
192    Whitespace,
193    Newline,
194    LineComment,
195    BlockComment,
196
197    // Fallback
198    Unparsable,
199}
200
201impl SegmentType {
202    /// Returns true if this is a trivia type (whitespace/comment).
203    pub fn is_trivia(self) -> bool {
204        matches!(
205            self,
206            SegmentType::Whitespace
207                | SegmentType::Newline
208                | SegmentType::LineComment
209                | SegmentType::BlockComment
210        )
211    }
212}