ptx_parser/
lexer.rs

1use logos::Logos;
2
3pub use logos::Span;
4
5/// PTX specification token types for lexical analysis.
6///
7/// This enum represents all token types that can appear in PTX assembly code,
8/// including keywords, operators, literals, identifiers, and special markers.
9#[derive(Logos, Debug, Clone, PartialEq, Eq)]
10#[logos(error = LexError)]
11#[logos(skip r"[ \t\r\n]+")]
12pub enum PtxToken {
13    #[regex(r"//[^\n]*", logos::skip)]
14    #[regex(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", logos::skip)]
15    #[token("::")]
16    DoubleColon,
17    #[token(".")]
18    Dot,
19    #[token(",")]
20    Comma,
21    #[token(";")]
22    Semicolon,
23    #[token(":")]
24    Colon,
25    #[token("(")]
26    LParen,
27    #[token(")")]
28    RParen,
29    #[token("[")]
30    LBracket,
31    #[token("]")]
32    RBracket,
33    #[token("{")]
34    LBrace,
35    #[token("}")]
36    RBrace,
37    #[token("+")]
38    Plus,
39    #[token("-")]
40    Minus,
41    #[token("*")]
42    Star,
43    #[token("/")]
44    Slash,
45    #[token("<")]
46    LAngle,
47    #[token(">")]
48    RAngle,
49    #[token("=")]
50    Equals,
51    #[token("%")]
52    Percent,
53    #[token("!")]
54    Exclaim,
55    #[token("|")]
56    Pipe,
57    #[token("&")]
58    Ampersand,
59    #[token("^")]
60    Caret,
61    #[token("~")]
62    Tilde,
63    #[token("@")]
64    At,
65    #[regex(r"0[fFdD][0-9a-fA-F]{8}", |lex| lex.slice().to_string())]
66    #[regex(r"0[fFdD][0-9a-fA-F]{16}", |lex| lex.slice().to_string())]
67    HexFloat(String),
68    #[regex(r"0[xX][0-9a-fA-F]+", |lex| lex.slice().to_string())]
69    HexInteger(String),
70    #[regex(r"0[bB][01]+", |lex| lex.slice().to_string())]
71    BinaryInteger(String),
72    #[regex(r"0[0-7]+", |lex| lex.slice().to_string())]
73    OctalInteger(String),
74    #[regex(r"[0-9]+\.[0-9]+[eE][+-]?[0-9]+", |lex| lex.slice().to_string())]
75    #[regex(r"[0-9]+[eE][+-]?[0-9]+", |lex| lex.slice().to_string())]
76    FloatExponent(String),
77    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().to_string())]
78    Float(String),
79    #[regex(r"[0-9]+", |lex| lex.slice().to_string())]
80    DecimalInteger(String),
81    #[regex(r"%[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
82    Register(String),
83    #[regex(r"[a-zA-Z_$][a-zA-Z0-9_$-]*", |lex| lex.slice().to_string())]
84    Identifier(String),
85    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
86        let slice = lex.slice();
87        slice[1..slice.len() - 1].to_string()
88    })]
89    StringLiteral(String),
90}
91
92impl PtxToken {
93    /// Extract the string value from a token, if it has one
94    pub fn as_str(&self) -> &str {
95        match self {
96            PtxToken::Identifier(s)
97            | PtxToken::DecimalInteger(s)
98            | PtxToken::HexInteger(s)
99            | PtxToken::BinaryInteger(s)
100            | PtxToken::OctalInteger(s)
101            | PtxToken::Float(s)
102            | PtxToken::FloatExponent(s)
103            | PtxToken::HexFloat(s)
104            | PtxToken::Register(s)
105            | PtxToken::StringLiteral(s) => s.as_str(),
106            _ => "",
107        }
108    }
109}
110
111/// Lexical analysis error type.
112#[derive(Debug, Clone, PartialEq, Default)]
113pub struct LexError {
114    /// The span in the source code where the error occurred
115    pub span: Span,
116}
117
118impl From<Span> for LexError {
119    fn from(span: Span) -> Self {
120        LexError { span }
121    }
122}
123
124/// Tokenize a PTX source string into a sequence of tokens with their spans.
125///
126/// This is the main entry point for lexical analysis. It converts raw PTX
127/// source code into a vector of tokens that can be parsed.
128///
129/// # Arguments
130///
131/// * `source` - The PTX source code as a string slice
132///
133/// # Returns
134///
135/// Returns a vector of tuples containing each token and its span in the source,
136/// or a `LexError` if tokenization fails.
137///
138/// # Example
139///
140/// ```no_run
141/// use ptx_parser::tokenize;
142///
143/// let source = ".version 8.5\n.target sm_90";
144/// let tokens = tokenize(source).expect("Failed to tokenize");
145/// ```
146pub fn tokenize(source: &str) -> Result<Vec<(PtxToken, Span)>, LexError> {
147    let mut lexer = PtxToken::lexer(source);
148    let mut tokens = Vec::new();
149
150    while let Some(item) = lexer.next() {
151        match item {
152            Ok(token) => tokens.push((token, lexer.span())),
153            Err(_) => return Err(LexError { span: lexer.span() }),
154        }
155    }
156
157    Ok(tokens)
158}