ruxnasm/tokenizer/
mod.rs

1use super::{Identifier, Token};
2use super::{Span, Spanned, Spanning};
3use crate::anomalies::{Error, Warning};
4use crate::{Instruction, InstructionKind};
5use std::fmt;
6
7mod hex_number;
8
9#[derive(Clone)]
10pub(crate) enum Word {
11    Fine {
12        token: Spanned<Token>,
13        warnings: Vec<Warning>,
14    },
15    Faulty {
16        errors: Vec<Error>,
17        warnings: Vec<Warning>,
18    },
19}
20
21impl Word {
22    pub(crate) fn new(symbols: &[Spanned<char>]) -> Self {
23        debug_assert!({
24            const WHITESPACES: [char; 6] = [' ', '\t', '\n', 0x0b as char, 0x0c as char, '\r'];
25
26            let chars: Vec<char> = symbols.iter().map(|Spanned { node: ch, .. }| *ch).collect();
27            WHITESPACES.iter().all(|ch| !chars.contains(ch))
28        });
29
30        match tokenize(symbols) {
31            Ok((token, warnings)) => Self::Fine { token, warnings },
32            Err(error) => Self::Faulty {
33                errors: vec![error],
34                warnings: Vec::new(),
35            },
36        }
37    }
38}
39
40fn tokenize(word: &[Spanned<char>]) -> Result<(Spanned<Token>, Vec<Warning>), Error> {
41    match word.first().cloned().unwrap() {
42        Spanned { node: '[', span } => {
43            return Ok((Token::OpeningBracket.spanning(span), Vec::new()))
44        }
45        Spanned { node: ']', span } => {
46            return Ok((Token::ClosingBracket.spanning(span), Vec::new()))
47        }
48        Spanned { node: '{', span } => return Ok((Token::OpeningBrace.spanning(span), Vec::new())),
49        Spanned { node: '}', span } => return Ok((Token::ClosingBrace.spanning(span), Vec::new())),
50        Spanned { node: '%', span } => match parse_macro(span, &word[1..]) {
51            Ok(name) => {
52                return Ok((
53                    Token::MacroDefine(name).spanning(to_span(word).unwrap()),
54                    Vec::new(),
55                ));
56            }
57            Err(err) => Err(err),
58        },
59        Spanned { node: '|', span } => hex_number::parse_hex_number_unconstrained(&word[1..])
60            .map_err(|err| match err {
61                hex_number::Error2::DigitExpected => Error::HexNumberExpected { span: span.into() },
62                hex_number::Error2::DigitInvalid { digit, span } => Error::HexDigitInvalid {
63                    digit,
64                    number: to_string(&word[1..]),
65                    span: span.into(),
66                },
67                hex_number::Error2::TooLong { length } => Error::HexNumberTooLong {
68                    length,
69                    number: to_string(&word[1..]),
70                    span: to_span(&word[1..]).unwrap().into(),
71                },
72            })
73            .map(|value| Token::PadAbsolute(value))
74            .map(|token| (token.spanning(to_span(word).unwrap()), Vec::new())),
75        Spanned { node: '$', span } => hex_number::parse_hex_number_unconstrained(&word[1..])
76            .map_err(|err| match err {
77                hex_number::Error2::DigitExpected => Error::HexNumberExpected { span: span.into() },
78                hex_number::Error2::DigitInvalid { digit, span } => Error::HexDigitInvalid {
79                    digit,
80                    number: to_string(&word[1..]),
81                    span: span.into(),
82                },
83                hex_number::Error2::TooLong { length } => Error::HexNumberTooLong {
84                    length,
85                    number: to_string(&word[1..]),
86                    span: to_span(&word[1..]).unwrap().into(),
87                },
88            })
89            .map(|value| Token::PadRelative(value))
90            .map(|token| (token.spanning(to_span(word).unwrap()), Vec::new())),
91        Spanned { node: '@', span } => {
92            if !word[1..].is_empty() {
93                if word[1].node != '&' {
94                    if let Some(position) = word[1..]
95                        .iter()
96                        .map(|Spanned { node: ch, .. }| *ch)
97                        .position(|c| c == '/')
98                    {
99                        Err(Error::SlashInLabelOrSublabel {
100                            span: word[1 + position].span.into(),
101                        })
102                    } else {
103                        Ok((
104                            Token::LabelDefine(to_string(&word[1..]))
105                                .spanning(to_span(word).unwrap()),
106                            Vec::new(),
107                        ))
108                    }
109                } else {
110                    Err(Error::AmpersandAtTheStartOfLabel {
111                        span: word[1].span.into(),
112                    })
113                }
114            } else {
115                Err(Error::LabelExpected { span: span.into() })
116            }
117        }
118        Spanned { node: '&', span } => {
119            if !word[1..].is_empty() {
120                if let Some(position) = word[1..]
121                    .iter()
122                    .map(|Spanned { node: ch, .. }| *ch)
123                    .position(|c| c == '/')
124                {
125                    Err(Error::SlashInLabelOrSublabel {
126                        span: word[1 + position].span.into(),
127                    })
128                } else {
129                    Ok((
130                        Token::SublabelDefine(to_string(&word[1..]))
131                            .spanning(to_span(word).unwrap()),
132                        Vec::new(),
133                    ))
134                }
135            } else {
136                Err(Error::LabelExpected { span: span.into() })
137            }
138        }
139        Spanned { node: '#', span } => match hex_number::parse_hex_number(&word[1..]) {
140            Ok(hex_number::HexNumber::Byte(value)) => Ok(Token::LiteralHexByte(value)),
141            Ok(hex_number::HexNumber::Short(value)) => Ok(Token::LiteralHexShort(value)),
142            Err(hex_number::Error::DigitExpected) => {
143                Err(Error::HexNumberOrCharacterExpected { span: span.into() })
144            }
145            Err(hex_number::Error::DigitInvalid { digit, span }) => Err(Error::HexDigitInvalid {
146                digit,
147                number: to_string(&word[1..]),
148                span: span.into(),
149            }),
150            Err(hex_number::Error::UnevenLength { length: 1 }) => {
151                Ok(Token::LiteralHexByte(word[1].node as u8))
152            }
153            Err(hex_number::Error::UnevenLength { length }) => Err(Error::HexNumberUnevenLength {
154                length,
155                number: to_string(&word[1..]),
156                span: to_span(&word[1..]).unwrap().into(),
157            }),
158            Err(hex_number::Error::TooLong { length }) => Err(Error::HexNumberTooLong {
159                length,
160                number: to_string(&word[1..]),
161                span: to_span(&word[1..]).unwrap().into(),
162            }),
163        }
164        .map(|token| (token.spanning(to_span(word).unwrap()), Vec::new())),
165        Spanned { node: '.', span } => match parse_identifier(span, &word[1..]) {
166            Ok(name) => {
167                return Ok((
168                    Token::LiteralZeroPageAddress(name).spanning(to_span(word).unwrap()),
169                    Vec::new(),
170                ));
171            }
172            Err(err) => Err(err),
173        },
174        Spanned { node: ',', span } => match parse_identifier(span, &word[1..]) {
175            Ok(name) => {
176                return Ok((
177                    Token::LiteralRelativeAddress(name).spanning(to_span(word).unwrap()),
178                    Vec::new(),
179                ));
180            }
181            Err(err) => Err(err),
182        },
183        Spanned { node: ';', span } => match parse_identifier(span, &word[1..]) {
184            Ok(name) => {
185                return Ok((
186                    Token::LiteralAbsoluteAddress(name).spanning(to_span(word).unwrap()),
187                    Vec::new(),
188                ));
189            }
190            Err(err) => Err(err),
191        },
192        Spanned { node: ':', span } => match parse_identifier(span, &word[1..]) {
193            Ok(name) => {
194                return Ok((
195                    Token::RawAddress(name).spanning(to_span(word).unwrap()),
196                    Vec::new(),
197                ));
198            }
199            Err(err) => Err(err),
200        },
201        Spanned { node: '\'', span } => {
202            let bytes: Vec<u8> = to_string(&word[1..]).bytes().collect();
203            match bytes.len() {
204                0 => Err(Error::CharacterExpected { span: span.into() }),
205                1 => Ok((
206                    Token::RawChar(bytes[0]).spanning(Span::combine(&span, &word[1].span)),
207                    Vec::new(),
208                )),
209                _ => {
210                    let span = to_span(&word[1..]).unwrap();
211                    Err(Error::MoreThanOneByteFound {
212                        bytes,
213                        span: span.into(),
214                    })
215                }
216            }
217        }
218        Spanned { node: '"', .. } => {
219            return Ok((
220                Token::RawWord(to_string(&word[1..])).spanning(to_span(word).unwrap()),
221                Vec::new(),
222            ));
223        }
224        _ => {
225            if let Ok(hex_number) = hex_number::parse_hex_number(word) {
226                return Ok((
227                    match hex_number {
228                        hex_number::HexNumber::Byte(value) => Token::RawHexByte(value),
229                        hex_number::HexNumber::Short(value) => Token::RawHexShort(value),
230                    }
231                    .spanning(to_span(word).unwrap()),
232                    Vec::new(),
233                ));
234            };
235            if let Some((instruction, new_warnings)) = parse_instruction(word) {
236                return Ok((
237                    Token::Instruction(instruction).spanning(to_span(word).unwrap()),
238                    new_warnings,
239                ));
240            };
241            return Ok((
242                to_spanned_string(word)
243                    .unwrap()
244                    .map(|s| (Token::MacroInvoke(s))),
245                Vec::new(),
246            ));
247        }
248    }
249}
250
251fn to_string(symbols: &[Spanned<char>]) -> String {
252    symbols.iter().map(|Spanned { node: ch, .. }| *ch).collect()
253}
254
255fn to_span(symbols: &[Spanned<char>]) -> Option<Span> {
256    Some(Span::combine(&symbols.first()?.span, &symbols.last()?.span))
257}
258
259fn to_spanned_string(symbols: &[Spanned<char>]) -> Option<Spanned<String>> {
260    to_span(symbols).map(|span| to_string(symbols).spanning(span))
261}
262
263fn parse_macro(rune_span: Span, symbols: &[Spanned<char>]) -> Result<String, Error> {
264    if symbols.is_empty() {
265        return Err(Error::MacroNameExpected {
266            span: rune_span.into(),
267        });
268    }
269
270    if let Ok(_) = hex_number::parse_hex_number(symbols) {
271        return Err(Error::MacroCannotBeAHexNumber {
272            span: to_span(symbols).unwrap().into(),
273            number: to_string(symbols),
274        });
275    }
276    if let Some(_) = parse_instruction(symbols) {
277        return Err(Error::MacroCannotBeAnInstruction {
278            span: to_span(symbols).unwrap().into(),
279            instruction: to_string(symbols),
280        });
281    }
282
283    Ok(to_string(symbols))
284}
285
286fn parse_identifier(rune_span: Span, symbols: &[Spanned<char>]) -> Result<Identifier, Error> {
287    if symbols.is_empty() {
288        return Err(Error::IdentifierExpected {
289            span: rune_span.into(),
290        });
291    }
292
293    if let Some(Spanned { node: '&', span }) = symbols.first() {
294        let rune_span = Span::combine(&rune_span, &span);
295        if symbols[1..].is_empty() {
296            return Err(Error::SublabelExpected {
297                span: rune_span.into(),
298            });
299        }
300        return Ok(Identifier::Sublabel(to_string(&symbols[1..])));
301    }
302
303    match symbols
304        .iter()
305        .map(|Spanned { node: ch, .. }| *ch)
306        .position(|c| c == '/')
307    {
308        Some(position) => {
309            if let Some(second_position) = symbols[position + 1..]
310                .iter()
311                .map(|Spanned { node: ch, .. }| *ch)
312                .position(|c| c == '/')
313            {
314                return Err(Error::MoreThanOneSlashInIdentifier {
315                    span: symbols[position + 1 + second_position].span.into(),
316                });
317            }
318
319            let label = {
320                let label_symbols = &symbols[..position];
321                if label_symbols.is_empty() {
322                    return Err(Error::LabelExpected {
323                        span: rune_span.into(),
324                    });
325                }
326                to_string(label_symbols)
327            };
328            let sublabel = {
329                let sublabel_symbols = &symbols[position + 1..];
330                if sublabel_symbols.is_empty() {
331                    return Err(Error::SublabelExpected {
332                        span: symbols[position].span.into(),
333                    });
334                }
335                to_string(sublabel_symbols)
336            };
337            Ok(Identifier::Path(label, sublabel))
338        }
339        None => {
340            if symbols.is_empty() {
341                return Err(Error::LabelExpected {
342                    span: rune_span.into(),
343                });
344            }
345            Ok(Identifier::Label(to_string(symbols)))
346        }
347    }
348}
349
350/// `symbols` must not be empty.
351fn parse_instruction(symbols: &[Spanned<char>]) -> Option<(Instruction, Vec<Warning>)> {
352    if symbols.len() < 3 {
353        return None;
354    }
355
356    let instruction_kind = match to_string(&symbols[..3]).as_str() {
357        "BRK" => Some(InstructionKind::Break),
358        "LIT" => Some(InstructionKind::Literal),
359        "NOP" => Some(InstructionKind::NoOperation),
360        "POP" => Some(InstructionKind::Pop),
361        "DUP" => Some(InstructionKind::Duplicate),
362        "SWP" => Some(InstructionKind::Swap),
363        "OVR" => Some(InstructionKind::Over),
364        "ROT" => Some(InstructionKind::Rotate),
365        "EQU" => Some(InstructionKind::Equal),
366        "NEQ" => Some(InstructionKind::NotEqual),
367        "GTH" => Some(InstructionKind::GreaterThan),
368        "LTH" => Some(InstructionKind::LesserThan),
369        "JMP" => Some(InstructionKind::Jump),
370        "JCN" => Some(InstructionKind::JumpCondition),
371        "JSR" => Some(InstructionKind::JumpStash),
372        "STH" => Some(InstructionKind::Stash),
373        "LDZ" => Some(InstructionKind::LoadZeroPage),
374        "STZ" => Some(InstructionKind::StoreZeroPage),
375        "LDR" => Some(InstructionKind::LoadRelative),
376        "STR" => Some(InstructionKind::StoreRelative),
377        "LDA" => Some(InstructionKind::LoadAbsolute),
378        "STA" => Some(InstructionKind::StoreAbsolute),
379        "DEI" => Some(InstructionKind::DeviceIn),
380        "DEO" => Some(InstructionKind::DeviceOut),
381        "ADD" => Some(InstructionKind::Add),
382        "SUB" => Some(InstructionKind::Subtract),
383        "MUL" => Some(InstructionKind::Multiply),
384        "DIV" => Some(InstructionKind::Divide),
385        "AND" => Some(InstructionKind::And),
386        "ORA" => Some(InstructionKind::Or),
387        "EOR" => Some(InstructionKind::ExclusiveOr),
388        "SFT" => Some(InstructionKind::Shift),
389        _ => None,
390    }?;
391
392    let mut keep: Option<Span> = None;
393    let mut r#return: Option<Span> = None;
394    let mut short: Option<Span> = None;
395    let mut warnings = Vec::new();
396
397    for Spanned { node: ch, span } in &symbols[3..] {
398        match ch {
399            'k' => {
400                if let Some(other_span) = keep {
401                    warnings.push(Warning::InstructionModeDefinedMoreThanOnce {
402                        instruction_mode: 'k',
403                        instruction: to_string(&symbols[..3]),
404                        span: (*span).into(),
405                        other_span: other_span.into(),
406                    });
407                }
408                keep = Some(*span);
409            }
410            'r' => {
411                if let Some(other_span) = r#return {
412                    warnings.push(Warning::InstructionModeDefinedMoreThanOnce {
413                        instruction_mode: 'r',
414                        instruction: to_string(&symbols[..3]),
415                        span: (*span).into(),
416                        other_span: other_span.into(),
417                    });
418                }
419                r#return = Some(*span);
420            }
421            '2' => {
422                if let Some(other_span) = short {
423                    warnings.push(Warning::InstructionModeDefinedMoreThanOnce {
424                        instruction_mode: '2',
425                        instruction: to_string(&symbols[..3]),
426                        span: (*span).into(),
427                        other_span: other_span.into(),
428                    });
429                }
430                short = Some(*span);
431            }
432            _ => {
433                return None;
434            }
435        }
436    }
437
438    return Some((
439        Instruction {
440            instruction_kind,
441            keep: keep.is_some(),
442            r#return: r#return.is_some(),
443            short: short.is_some(),
444        },
445        warnings,
446    ));
447}
448
449impl fmt::Debug for Word {
450    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
451        match self {
452            Word::Fine { token, warnings } => {
453                let mut debug_struct = f.debug_struct("Fine");
454                debug_struct.field("token", token);
455                if !warnings.is_empty() {
456                    debug_struct.field("warnings", warnings);
457                }
458                debug_struct.finish()
459            }
460            Word::Faulty { errors, warnings } => {
461                let mut debug_struct = f.debug_struct("Faulty");
462                debug_struct.field("errors", errors);
463                if !warnings.is_empty() {
464                    debug_struct.field("warnings", warnings);
465                }
466                debug_struct.finish()
467            }
468        }
469    }
470}