kotoba_formatter/
parser.rs

1//! パーサーモジュール(簡易版)
2
3use std::collections::HashMap;
4
5/// トークンの種類
6#[derive(Debug, Clone, PartialEq)]
7pub enum TokenKind {
8    // キーワード
9    Graph,
10    Node,
11    Edge,
12    Query,
13    Fn,
14    If,
15    For,
16    While,
17    Return,
18
19    // シンボル
20    LeftBrace,
21    RightBrace,
22    LeftParen,
23    RightParen,
24    LeftBracket,
25    RightBracket,
26    Semicolon,
27    Colon,
28    Comma,
29    Dot,
30    Arrow,
31
32    // 演算子
33    Assign,
34    Equal,
35    NotEqual,
36    Less,
37    Greater,
38    LessEqual,
39    GreaterEqual,
40    Plus,
41    Minus,
42    Star,
43    Slash,
44    And,
45    Or,
46
47    // リテラル
48    Identifier,
49    String,
50    Number,
51    Boolean,
52
53    // その他
54    Comment,
55    Whitespace,
56    Newline,
57    Eof,
58}
59
60/// トークン
61#[derive(Debug, Clone)]
62pub struct Token {
63    pub kind: TokenKind,
64    pub text: String,
65    pub line: usize,
66    pub column: usize,
67}
68
69impl Token {
70    pub fn new(kind: TokenKind, text: String, line: usize, column: usize) -> Self {
71        Self {
72            kind,
73            text,
74            line,
75            column,
76        }
77    }
78}
79
80/// 簡易パーサー
81#[derive(Debug)]
82pub struct Parser {
83    input: String,
84    tokens: Vec<Token>,
85    position: usize,
86}
87
88impl Parser {
89    /// 新しいパーサーを作成
90    pub fn new(input: String) -> Self {
91        Self {
92            input,
93            tokens: Vec::new(),
94            position: 0,
95        }
96    }
97
98    /// 入力をトークン化
99    pub fn tokenize(&mut self) -> Result<&[Token], Box<dyn std::error::Error>> {
100        self.tokens.clear();
101        self.position = 0;
102
103        while self.position < self.input.len() {
104            let ch = self.current_char();
105
106            match ch {
107                ' ' | '\t' => {
108                    self.consume_whitespace();
109                }
110                '\n' | '\r' => {
111                    self.consume_newline();
112                }
113                '/' => {
114                    if self.peek_char() == Some('/') {
115                        self.consume_comment();
116                    } else if self.peek_char() == Some('*') {
117                        self.consume_multiline_comment();
118                    } else {
119                        self.add_token(TokenKind::Slash, "/".to_string());
120                        self.position += 1;
121                    }
122                }
123                '{' => {
124                    self.add_token(TokenKind::LeftBrace, "{".to_string());
125                    self.position += 1;
126                }
127                '}' => {
128                    self.add_token(TokenKind::RightBrace, "}".to_string());
129                    self.position += 1;
130                }
131                '(' => {
132                    self.add_token(TokenKind::LeftParen, "(".to_string());
133                    self.position += 1;
134                }
135                ')' => {
136                    self.add_token(TokenKind::RightParen, ")".to_string());
137                    self.position += 1;
138                }
139                '[' => {
140                    self.add_token(TokenKind::LeftBracket, "[".to_string());
141                    self.position += 1;
142                }
143                ']' => {
144                    self.add_token(TokenKind::RightBracket, "]".to_string());
145                    self.position += 1;
146                }
147                ';' => {
148                    self.add_token(TokenKind::Semicolon, ";".to_string());
149                    self.position += 1;
150                }
151                ':' => {
152                    self.add_token(TokenKind::Colon, ":".to_string());
153                    self.position += 1;
154                }
155                ',' => {
156                    self.add_token(TokenKind::Comma, ",".to_string());
157                    self.position += 1;
158                }
159                '.' => {
160                    self.add_token(TokenKind::Dot, ".".to_string());
161                    self.position += 1;
162                }
163                '=' => {
164                    if self.peek_char() == Some('=') {
165                        self.add_token(TokenKind::Equal, "==".to_string());
166                        self.position += 2;
167                    } else {
168                        self.add_token(TokenKind::Assign, "=".to_string());
169                        self.position += 1;
170                    }
171                }
172                '!' => {
173                    if self.peek_char() == Some('=') {
174                        self.add_token(TokenKind::NotEqual, "!=".to_string());
175                        self.position += 2;
176                    } else {
177                        // 識別子として扱う
178                        self.consume_identifier();
179                    }
180                }
181                '<' => {
182                    if self.peek_char() == Some('=') {
183                        self.add_token(TokenKind::LessEqual, "<=".to_string());
184                        self.position += 2;
185                    } else {
186                        self.add_token(TokenKind::Less, "<".to_string());
187                        self.position += 1;
188                    }
189                }
190                '>' => {
191                    if self.peek_char() == Some('=') {
192                        self.add_token(TokenKind::GreaterEqual, ">=".to_string());
193                        self.position += 2;
194                    } else {
195                        self.add_token(TokenKind::Greater, ">".to_string());
196                        self.position += 1;
197                    }
198                }
199                '+' => {
200                    self.add_token(TokenKind::Plus, "+".to_string());
201                    self.position += 1;
202                }
203                '-' => {
204                    if self.peek_char() == Some('>') {
205                        self.add_token(TokenKind::Arrow, "->".to_string());
206                        self.position += 2;
207                    } else {
208                        self.add_token(TokenKind::Minus, "-".to_string());
209                        self.position += 1;
210                    }
211                }
212                '*' => {
213                    self.add_token(TokenKind::Star, "*".to_string());
214                    self.position += 1;
215                }
216                '&' => {
217                    if self.peek_char() == Some('&') {
218                        self.add_token(TokenKind::And, "&&".to_string());
219                        self.position += 2;
220                    } else {
221                        // 識別子として扱う
222                        self.consume_identifier();
223                    }
224                }
225                '|' => {
226                    if self.peek_char() == Some('|') {
227                        self.add_token(TokenKind::Or, "||".to_string());
228                        self.position += 2;
229                    } else {
230                        // 識別子として扱う
231                        self.consume_identifier();
232                    }
233                }
234                '"' => {
235                    self.consume_string();
236                }
237                '0'..='9' => {
238                    self.consume_number();
239                }
240                'a'..='z' | 'A'..='Z' | '_' => {
241                    self.consume_identifier();
242                }
243                _ => {
244                    // 不明な文字はスキップ
245                    self.position += 1;
246                }
247            }
248        }
249
250        self.add_token(TokenKind::Eof, "".to_string());
251        Ok(&self.tokens)
252    }
253
254    /// 現在の文字を取得
255    fn current_char(&self) -> char {
256        self.input[self.position..].chars().next().unwrap()
257    }
258
259    /// 次の文字を覗く
260    fn peek_char(&self) -> Option<char> {
261        self.input[self.position + 1..].chars().next()
262    }
263
264    /// 空白を消費
265    fn consume_whitespace(&mut self) {
266        let start = self.position;
267        while self.position < self.input.len() &&
268              matches!(self.current_char(), ' ' | '\t') {
269            self.position += 1;
270        }
271
272        if start != self.position {
273            let text = self.input[start..self.position].to_string();
274            self.add_token(TokenKind::Whitespace, text);
275        }
276    }
277
278    /// 改行を消費
279    fn consume_newline(&mut self) {
280        let start = self.position;
281        while self.position < self.input.len() &&
282              matches!(self.current_char(), '\n' | '\r') {
283            self.position += 1;
284        }
285
286        if start != self.position {
287            let text = self.input[start..self.position].to_string();
288            self.add_token(TokenKind::Newline, text);
289        }
290    }
291
292    /// コメントを消費
293    fn consume_comment(&mut self) {
294        let start = self.position;
295        while self.position < self.input.len() && self.current_char() != '\n' {
296            self.position += 1;
297        }
298
299        let text = self.input[start..self.position].to_string();
300        self.add_token(TokenKind::Comment, text);
301    }
302
303    /// 複数行コメントを消費
304    fn consume_multiline_comment(&mut self) {
305        let start = self.position;
306        self.position += 2; // /* をスキップ
307
308        while self.position < self.input.len() - 1 {
309            if self.current_char() == '*' && self.peek_char() == Some('/') {
310                self.position += 2;
311                break;
312            }
313            self.position += 1;
314        }
315
316        let text = self.input[start..self.position].to_string();
317        self.add_token(TokenKind::Comment, text);
318    }
319
320    /// 文字列を消費
321    fn consume_string(&mut self) {
322        let start = self.position;
323        self.position += 1; // " をスキップ
324
325        while self.position < self.input.len() && self.current_char() != '"' {
326            if self.current_char() == '\\' {
327                self.position += 2; // エスケープシーケンスをスキップ
328            } else {
329                self.position += 1;
330            }
331        }
332
333        if self.position < self.input.len() {
334            self.position += 1; // 終端の " をスキップ
335        }
336
337        let text = self.input[start..self.position].to_string();
338        self.add_token(TokenKind::String, text);
339    }
340
341    /// 数字を消費
342    fn consume_number(&mut self) {
343        let start = self.position;
344
345        while self.position < self.input.len() &&
346              matches!(self.current_char(), '0'..='9' | '.' | 'e' | 'E' | '+' | '-') {
347            self.position += 1;
348        }
349
350        let text = self.input[start..self.position].to_string();
351        self.add_token(TokenKind::Number, text);
352    }
353
354    /// 識別子を消費
355    fn consume_identifier(&mut self) {
356        let start = self.position;
357
358        while self.position < self.input.len() &&
359              matches!(self.current_char(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
360            self.position += 1;
361        }
362
363        let text = self.input[start..self.position].to_string();
364
365        // キーワードかどうかをチェック
366        let kind = match text.as_str() {
367            "graph" => TokenKind::Graph,
368            "node" => TokenKind::Node,
369            "edge" => TokenKind::Edge,
370            "query" => TokenKind::Query,
371            "fn" => TokenKind::Fn,
372            "if" => TokenKind::If,
373            "for" => TokenKind::For,
374            "while" => TokenKind::While,
375            "return" => TokenKind::Return,
376            "true" | "false" => TokenKind::Boolean,
377            _ => TokenKind::Identifier,
378        };
379
380        self.add_token(kind, text);
381    }
382
383    /// トークンを追加
384    fn add_token(&mut self, kind: TokenKind, text: String) {
385        // 行番号と列番号の計算(簡易版)
386        let line = self.input[..self.position].chars().filter(|&c| c == '\n').count() + 1;
387        let last_newline = self.input[..self.position].rfind('\n').unwrap_or(0);
388        let column = self.position - last_newline;
389
390        self.tokens.push(Token::new(kind, text, line, column));
391    }
392}
393
394/// パース結果
395#[derive(Debug)]
396pub struct ParseResult {
397    pub tokens: Vec<Token>,
398    pub errors: Vec<String>,
399}
400
401impl ParseResult {
402    pub fn new() -> Self {
403        Self {
404            tokens: Vec::new(),
405            errors: Vec::new(),
406        }
407    }
408
409    pub fn success(tokens: Vec<Token>) -> Self {
410        Self {
411            tokens,
412            errors: Vec::new(),
413        }
414    }
415
416    pub fn error(error: String) -> Self {
417        Self {
418            tokens: Vec::new(),
419            errors: vec![error],
420        }
421    }
422}
423
424/// 入力文字列をパース
425pub fn parse(input: &str) -> ParseResult {
426    let mut parser = Parser::new(input.to_string());
427
428    match parser.tokenize() {
429        Ok(tokens) => ParseResult::success(tokens.to_vec()),
430        Err(e) => ParseResult::error(e.to_string()),
431    }
432}