ptx_parser/
lexer.rs

1use crate::parser::Span;
2use logos::Logos;
3
4/// PTX specification token types for lexical analysis.
5///
6/// This enum represents all token types that can appear in PTX assembly code,
7/// including keywords, operators, literals, identifiers, and special markers.
8#[derive(Logos, Debug, Clone, PartialEq, Eq)]
9#[logos(error = LexError)]
10#[logos(skip r"[ \t\r\n]+")]
11pub enum PtxToken {
12    #[regex(r"//[^\n]*", logos::skip)]
13    #[regex(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", logos::skip)]
14    #[token("::")]
15    DoubleColon,
16    #[token(".")]
17    Dot,
18    #[token(",")]
19    Comma,
20    #[token(";")]
21    Semicolon,
22    #[token(":")]
23    Colon,
24    #[token("(")]
25    LParen,
26    #[token(")")]
27    RParen,
28    #[token("[")]
29    LBracket,
30    #[token("]")]
31    RBracket,
32    #[token("{")]
33    LBrace,
34    #[token("}")]
35    RBrace,
36    #[token("+")]
37    Plus,
38    #[token("-")]
39    Minus,
40    #[token("*")]
41    Star,
42    #[token("/")]
43    Slash,
44    #[token("<")]
45    LAngle,
46    #[token(">")]
47    RAngle,
48    #[token("=")]
49    Equals,
50    #[token("%")]
51    Percent,
52    #[token("!")]
53    Exclaim,
54    #[token("|")]
55    Pipe,
56    #[token("&")]
57    Ampersand,
58    #[token("^")]
59    Caret,
60    #[token("~")]
61    Tilde,
62    #[token("@")]
63    At,
64    // Single-precision hex float: 0f12345678
65    #[regex(r"0[fF][0-9a-fA-F]{8}", |lex| lex.slice().to_string())]
66    HexFloatSingle(String),
67    // Double-precision hex float: 0d1234567890abcdef
68    #[regex(r"0[dD][0-9a-fA-F]{16}", |lex| lex.slice().to_string())]
69    HexFloatDouble(String),
70    #[regex(r"0[xX][0-9a-fA-F]+U?", |lex| lex.slice().to_string())]
71    HexInteger(String),
72    #[regex(r"0[bB][01]+U?", |lex| lex.slice().to_string())]
73    BinaryInteger(String),
74    #[regex(r"0[0-7]+U?", |lex| lex.slice().to_string())]
75    OctalInteger(String),
76    #[regex(r"[0-9]+\.[0-9]+[eE][+-]?[0-9]+", |lex| lex.slice().to_string())]
77    #[regex(r"[0-9]+[eE][+-]?[0-9]+", |lex| lex.slice().to_string())]
78    FloatExponent(String),
79    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().to_string())]
80    Float(String),
81    #[regex(r"[1-9][0-9]*U?", priority = 2, callback = |lex| lex.slice().to_string())]
82    #[regex(r"0U?", |lex| lex.slice().to_string())]
83    DecimalInteger(String),
84    #[regex(r"%[a-zA-Z_][a-zA-Z0-9_]*", priority = 2, callback = |lex| lex.slice().to_string())]
85    Register(String),
86    #[regex(r"[a-zA-Z][a-zA-Z0-9_$]*", |lex| lex.slice().to_string())]
87    #[regex(r"[_$%][a-zA-Z0-9_$]*", priority = 1, callback = |lex| lex.slice().to_string())]
88    Identifier(String),
89    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
90        let slice = lex.slice();
91        slice[1..slice.len() - 1].to_string()
92    })]
93    StringLiteral(String),
94}
95
96impl PtxToken {
97    /// Extract the string value from a token, if it has one
98    pub fn as_str(&self) -> &str {
99        match self {
100            PtxToken::Identifier(s)
101            | PtxToken::DecimalInteger(s)
102            | PtxToken::HexInteger(s)
103            | PtxToken::BinaryInteger(s)
104            | PtxToken::OctalInteger(s)
105            | PtxToken::Float(s)
106            | PtxToken::FloatExponent(s)
107            | PtxToken::HexFloatSingle(s)
108            | PtxToken::HexFloatDouble(s)
109            | PtxToken::Register(s)
110            | PtxToken::StringLiteral(s) => s.as_str(),
111            PtxToken::DoubleColon => "::",
112            PtxToken::Dot => ".",
113            PtxToken::Comma => ",",
114            PtxToken::Semicolon => ";",
115            PtxToken::Colon => ":",
116            PtxToken::LParen => "(",
117            PtxToken::RParen => ")",
118            PtxToken::LBracket => "[",
119            PtxToken::RBracket => "]",
120            PtxToken::LBrace => "{",
121            PtxToken::RBrace => "}",
122            PtxToken::Plus => "+",
123            PtxToken::Minus => "-",
124            PtxToken::Star => "*",
125            PtxToken::Slash => "/",
126            PtxToken::LAngle => "<",
127            PtxToken::RAngle => ">",
128            PtxToken::Equals => "=",
129            PtxToken::Percent => "%",
130            PtxToken::Exclaim => "!",
131            PtxToken::Pipe => "|",
132            PtxToken::Ampersand => "&",
133            PtxToken::Caret => "^",
134            PtxToken::Tilde => "~",
135            PtxToken::At => "@",
136        }
137    }
138
139    pub fn len(&self) -> usize {
140        match self {
141            PtxToken::Identifier(s)
142            | PtxToken::DecimalInteger(s)
143            | PtxToken::HexInteger(s)
144            | PtxToken::BinaryInteger(s)
145            | PtxToken::OctalInteger(s)
146            | PtxToken::Float(s)
147            | PtxToken::FloatExponent(s)
148            | PtxToken::HexFloatSingle(s)
149            | PtxToken::HexFloatDouble(s)
150            | PtxToken::Register(s)
151            | PtxToken::StringLiteral(s) => s.len(),
152            PtxToken::DoubleColon => 2,
153            PtxToken::Dot
154            | PtxToken::Comma
155            | PtxToken::Semicolon
156            | PtxToken::Colon
157            | PtxToken::LParen
158            | PtxToken::RParen
159            | PtxToken::LBracket
160            | PtxToken::RBracket
161            | PtxToken::LBrace
162            | PtxToken::RBrace
163            | PtxToken::Plus
164            | PtxToken::Minus
165            | PtxToken::Star
166            | PtxToken::Slash
167            | PtxToken::LAngle
168            | PtxToken::RAngle
169            | PtxToken::Equals
170            | PtxToken::Percent
171            | PtxToken::Exclaim
172            | PtxToken::Pipe
173            | PtxToken::Ampersand
174            | PtxToken::Caret
175            | PtxToken::Tilde
176            | PtxToken::At => 1,
177        }
178    }
179}
180
181/// Lexical analysis error type.
182#[derive(Debug, Clone, PartialEq, Default)]
183pub struct LexError {
184    /// The span in the source code where the error occurred
185    pub span: Span,
186}
187
188impl From<Span> for LexError {
189    fn from(span: Span) -> Self {
190        LexError { span }
191    }
192}
193
194/// Tokenize a PTX source string into a sequence of tokens with their spans.
195///
196/// This is the main entry point for lexical analysis. It converts raw PTX
197/// source code into a vector of tokens that can be parsed.
198///
199/// # Arguments
200///
201/// * `source` - The PTX source code as a string slice
202///
203/// # Returns
204///
205/// Returns a vector of tuples containing each token and its span in the source,
206/// or a `LexError` if tokenization fails.
207///
208/// # Example
209///
210/// ```no_run
211/// use ptx_parser::tokenize;
212///
213/// let source = ".version 8.5\n.target sm_90";
214/// let tokens = tokenize(source).expect("Failed to tokenize");
215/// ```
216pub fn tokenize(source: &str) -> Result<Vec<(PtxToken, Span)>, LexError> {
217    let mut lexer = PtxToken::lexer(source);
218    let mut tokens = Vec::new();
219
220    while let Some(item) = lexer.next() {
221        match item {
222            Ok(token) => tokens.push((token, Span::from(lexer.span()))),
223            Err(_) => {
224                return Err(LexError {
225                    span: Span::from(lexer.span()),
226                });
227            }
228        }
229    }
230
231    Ok(tokens)
232}