b2c2_casl2/
parser.rs

1// b2c2-casl2 crate::parser
2// author: Leonardone @ NEETSDKASU
3
4use super::*;
5use crate::SyntaxError;
6
7pub fn parse(src: &str) -> Result<Vec<Statement>, SyntaxError> {
8    let mut ret = vec![];
9    for (i, line) in src.lines().enumerate() {
10        if line.trim().is_empty() {
11            continue;
12        }
13        let stmt = Statement::parse(line).ok_or_else(|| {
14            SyntaxError::new(
15                i + 1,
16                0,
17                format!(r#"不正なCASL2ステートメントです: "{}""#, line),
18            )
19        })?;
20        ret.push(stmt);
21    }
22    Ok(ret)
23}
24
25impl Statement {
26    fn parse(src: &str) -> Option<Self> {
27        let mut tokenizer = Tokenizer::new(src);
28
29        let mut label: Option<Label> = None;
30        if tokenizer.space() {
31            if let Some(comment) = tokenizer.comment() {
32                let indent = tokenizer.space_count;
33                return Some(Statement::Comment {
34                    indent,
35                    text: comment,
36                });
37            }
38        } else if let Some(word) = tokenizer.word() {
39            let word = Label::from(word);
40            if !word.is_valid() {
41                return None;
42            }
43            if !tokenizer.space() {
44                return None;
45            }
46            label = Some(word);
47        } else if let Some(comment) = tokenizer.comment() {
48            return Some(Statement::Comment {
49                indent: 0,
50                text: comment,
51            });
52        } else {
53            return None;
54        }
55
56        let (command, comment) = Command::parse(tokenizer)?;
57
58        Some(Statement::Code {
59            label,
60            command,
61            comment,
62        })
63    }
64}
65
66impl Command {
67    fn parse(mut tokenizer: Tokenizer) -> Option<(Self, Option<String>)> {
68        let cmd_word = tokenizer.word()?;
69
70        if !tokenizer.space() {
71            let rest = tokenizer.rest();
72            if !rest.is_empty() {
73                return None;
74            }
75        }
76
77        let values = tokenizer.values()?;
78
79        if values.is_empty() {
80            let command = match cmd_word.as_str() {
81                "START" => Command::Start { entry_point: None },
82                "RPUSH" => Command::Rpush,
83                "RPOP" => Command::Rpop,
84                "RET" => Command::Ret,
85                "END" => Command::End,
86                "NOP" => Command::Nop,
87                _ => return None,
88            };
89            if !tokenizer.space() {
90                if let Some(comment) = tokenizer.comment() {
91                    return Some((command, Some(comment)));
92                }
93            }
94            if tokenizer.rest().is_empty() {
95                return Some((command, None));
96            }
97            return None;
98        }
99
100        let comment = if tokenizer.space() {
101            if let Some(comment) = tokenizer.comment() {
102                Some(comment)
103            } else {
104                let rest = tokenizer.rest();
105                if rest.is_empty() {
106                    None
107                } else {
108                    Some(rest)
109                }
110            }
111        } else {
112            let rest = tokenizer.rest();
113            if rest.is_empty() {
114                None
115            } else {
116                return None;
117            }
118        };
119
120        let command = match cmd_word.as_str() {
121            "LD" => Self::parse_r_or_a(R::Ld, A::Ld, &values)?,
122            "ST" => Self::parse_a(A::St, &values)?,
123            "LAD" => Self::parse_a(A::Lad, &values)?,
124            "ADDA" => Self::parse_r_or_a(R::Adda, A::Adda, &values)?,
125            "ADDL" => Self::parse_r_or_a(R::Addl, A::Addl, &values)?,
126            "SUBA" => Self::parse_r_or_a(R::Suba, A::Suba, &values)?,
127            "SUBL" => Self::parse_r_or_a(R::Subl, A::Subl, &values)?,
128            "AND" => Self::parse_r_or_a(R::And, A::And, &values)?,
129            "OR" => Self::parse_r_or_a(R::Or, A::Or, &values)?,
130            "XOR" => Self::parse_r_or_a(R::Xor, A::Xor, &values)?,
131            "CPA" => Self::parse_r_or_a(R::Cpa, A::Cpa, &values)?,
132            "CPL" => Self::parse_r_or_a(R::Cpl, A::Cpl, &values)?,
133            "SLA" => Self::parse_a(A::Sla, &values)?,
134            "SRA" => Self::parse_a(A::Sra, &values)?,
135            "SLL" => Self::parse_a(A::Sll, &values)?,
136            "SRL" => Self::parse_a(A::Srl, &values)?,
137            "JPL" => Self::parse_p(P::Jpl, &values)?,
138            "JMI" => Self::parse_p(P::Jmi, &values)?,
139            "JNZ" => Self::parse_p(P::Jnz, &values)?,
140            "JZE" => Self::parse_p(P::Jze, &values)?,
141            "JOV" => Self::parse_p(P::Jov, &values)?,
142            "JUMP" => Self::parse_p(P::Jump, &values)?,
143            "PUSH" => Self::parse_p(P::Push, &values)?,
144            "CALL" => Self::parse_p(P::Call, &values)?,
145            "SVC" => Self::parse_p(P::Svc, &values)?,
146            "START" => Self::parse_start(&values)?,
147            "POP" => Self::parse_pop(&values)?,
148            "IN" => Self::parse_in(&values)?,
149            "OUT" => Self::parse_out(&values)?,
150            "DC" => Self::parse_dc(&values)?,
151            "DS" => Self::parse_ds(&values)?,
152            _ => return None,
153        };
154
155        Some((command, comment))
156    }
157
158    fn parse_dc(values: &[Token]) -> Option<Command> {
159        let mut constants = vec![];
160        for v in values {
161            constants.push(Constant::parse(v)?);
162        }
163        Some(Command::Dc { constants })
164    }
165
166    fn parse_ds(values: &[Token]) -> Option<Command> {
167        if let [Token::Dec(v)] = values {
168            Some(Command::Ds { size: *v as u16 })
169        } else {
170            None
171        }
172    }
173
174    fn parse_start(values: &[Token]) -> Option<Command> {
175        if let [label] = values {
176            let label = Label::parse(label)?;
177            Some(Command::Start {
178                entry_point: Some(label),
179            })
180        } else {
181            None
182        }
183    }
184
185    fn parse_a(code: A, values: &[Token]) -> Option<Command> {
186        match values {
187            [r, adr] => {
188                let r = Register::parse(r)?;
189                let adr = Adr::parse(adr)?;
190                Some(Command::A {
191                    code,
192                    r,
193                    adr,
194                    x: None,
195                })
196            }
197            [r, adr, x] => {
198                let r = Register::parse(r)?;
199                let adr = Adr::parse(adr)?;
200                let x = IndexRegister::parse(x)?;
201                Some(Command::A {
202                    code,
203                    r,
204                    adr,
205                    x: Some(x),
206                })
207            }
208            _ => None,
209        }
210    }
211
212    fn parse_p(code: P, values: &[Token]) -> Option<Command> {
213        match values {
214            [adr] => {
215                let adr = Adr::parse(adr)?;
216                Some(Command::P { code, adr, x: None })
217            }
218            [adr, x] => {
219                let adr = Adr::parse(adr)?;
220                let x = IndexRegister::parse(x)?;
221                Some(Command::P {
222                    code,
223                    adr,
224                    x: Some(x),
225                })
226            }
227            _ => None,
228        }
229    }
230
231    fn parse_pop(values: &[Token]) -> Option<Command> {
232        if let [r] = values {
233            let r = Register::parse(r)?;
234            Some(Command::Pop { r })
235        } else {
236            None
237        }
238    }
239
240    fn parse_in(values: &[Token]) -> Option<Command> {
241        if let [pos, len] = values {
242            let pos = Label::parse(pos)?;
243            let len = Label::parse(len)?;
244            Some(Command::In { pos, len })
245        } else {
246            None
247        }
248    }
249
250    fn parse_out(values: &[Token]) -> Option<Command> {
251        if let [pos, len] = values {
252            let pos = Label::parse(pos)?;
253            let len = Label::parse(len)?;
254            Some(Command::Out { pos, len })
255        } else {
256            None
257        }
258    }
259
260    fn parse_r_or_a(r: R, a: A, values: &[Token]) -> Option<Command> {
261        if let Some(command) = Self::parse_a(a, values) {
262            return Some(command);
263        }
264        if let [r1, r2] = values {
265            let r1 = Register::parse(r1)?;
266            let r2 = Register::parse(r2)?;
267            Some(Command::R { code: r, r1, r2 })
268        } else {
269            None
270        }
271    }
272}
273
274impl Constant {
275    fn parse(token: &Token) -> Option<Self> {
276        let c = match token {
277            word @ Token::Word(_) => Label::parse(word)?.into(),
278            Token::Dec(v) => Self::Dec(*v),
279            Token::Hex(v) => Self::Hex(*v),
280            Token::Str(s) => Self::Str(s.clone()),
281            Token::LitDec(_) | Token::LitHex(_) | Token::LitStr(_) => return None,
282        };
283        Some(c)
284    }
285}
286
287impl Label {
288    fn parse(token: &Token) -> Option<Self> {
289        if let Token::Word(w) = token {
290            let label = Self::from(w);
291            if label.is_valid() {
292                return Some(label);
293            }
294        }
295        None
296    }
297}
298
299impl Adr {
300    fn parse(token: &Token) -> Option<Self> {
301        let adr = match token {
302            word @ Token::Word(_) => Label::parse(word)?.into(),
303            Token::Dec(v) => Self::Dec(*v),
304            Token::Hex(v) => Self::Hex(*v),
305            Token::Str(_) => return None,
306            Token::LitDec(v) => Self::LiteralDec(*v),
307            Token::LitHex(v) => Self::LiteralHex(*v),
308            Token::LitStr(s) => Self::LiteralStr(s.clone()),
309        };
310        Some(adr)
311    }
312}
313
314impl Register {
315    fn parse(token: &Token) -> Option<Self> {
316        let s = if let Token::Word(w) = token {
317            w
318        } else {
319            return None;
320        };
321        match s.as_str() {
322            "GR0" => Some(Self::Gr0),
323            "GR1" => Some(Self::Gr1),
324            "GR2" => Some(Self::Gr2),
325            "GR3" => Some(Self::Gr3),
326            "GR4" => Some(Self::Gr4),
327            "GR5" => Some(Self::Gr5),
328            "GR6" => Some(Self::Gr6),
329            "GR7" => Some(Self::Gr7),
330            _ => None,
331        }
332    }
333}
334
335impl IndexRegister {
336    fn parse(token: &Token) -> Option<Self> {
337        let s = if let Token::Word(w) = token {
338            w
339        } else {
340            return None;
341        };
342        match s.as_str() {
343            "GR1" => Some(Self::Gr1),
344            "GR2" => Some(Self::Gr2),
345            "GR3" => Some(Self::Gr3),
346            "GR4" => Some(Self::Gr4),
347            "GR5" => Some(Self::Gr5),
348            "GR6" => Some(Self::Gr6),
349            "GR7" => Some(Self::Gr7),
350            _ => None,
351        }
352    }
353}
354
355pub struct Tokenizer<'a> {
356    chars: std::str::Chars<'a>,
357    stack: Vec<char>,
358    temp: String,
359    space_count: usize,
360}
361
362pub enum Token {
363    Word(String),
364    Dec(i16),
365    Hex(u16),
366    Str(String),
367    LitDec(i16),
368    LitHex(u16),
369    LitStr(String),
370}
371
372impl<'a> Tokenizer<'a> {
373    pub fn new(s: &'a str) -> Self {
374        Self {
375            chars: s.chars(),
376            stack: Vec::new(),
377            temp: String::new(),
378            space_count: 0,
379        }
380    }
381
382    fn next(&mut self) -> Option<char> {
383        if let Some(ch) = self.stack.pop() {
384            self.temp.push(ch);
385            Some(ch)
386        } else if let Some(ch) = self.chars.next() {
387            self.temp.push(ch);
388            Some(ch)
389        } else {
390            None
391        }
392    }
393
394    fn back(&mut self) {
395        if let Some(ch) = self.temp.pop() {
396            self.stack.push(ch);
397        }
398    }
399
400    fn recover(&mut self) {
401        while let Some(ch) = self.temp.pop() {
402            self.stack.push(ch);
403        }
404    }
405
406    fn take(&mut self) -> String {
407        self.temp.drain(..).collect()
408    }
409
410    fn clear(&mut self) {
411        self.temp.clear();
412    }
413
414    pub fn value(&mut self) -> Option<Token> {
415        if let Some(w) = self.word() {
416            return Some(Token::Word(w));
417        }
418        if let Some(i) = self.integer() {
419            return Some(Token::Dec(i));
420        }
421        if let Some(h) = self.hex() {
422            return Some(Token::Hex(h));
423        }
424        if let Some(s) = self.string() {
425            return Some(Token::Str(s));
426        }
427        if let Some(i) = self.lit_integer() {
428            return Some(Token::LitDec(i));
429        }
430        if let Some(h) = self.lit_hex() {
431            return Some(Token::LitHex(h));
432        }
433        if let Some(s) = self.lit_string() {
434            return Some(Token::LitStr(s));
435        }
436        None
437    }
438
439    // 破壊的操作、recover不可能
440    fn values(&mut self) -> Option<Vec<Token>> {
441        let mut ret = vec![];
442        if let Some(t) = self.value() {
443            ret.push(t);
444        } else {
445            // オペランドなし (あるいはシンタックスエラー)
446            return Some(ret);
447        }
448        while self.comma() {
449            if let Some(t) = self.value() {
450                ret.push(t);
451            } else {
452                // カンマのあとに値がないのはおかしいので
453                return None;
454            }
455        }
456        Some(ret)
457    }
458
459    fn comment(&mut self) -> Option<String> {
460        if !matches!(self.next(), Some(';')) {
461            self.recover();
462            return None;
463        }
464        while self.next().is_some() {}
465        let comment = if matches!(
466            self.temp.chars().nth(1),
467            Some(ch) if ch.is_ascii_whitespace()
468        ) {
469            self.temp.chars().skip(2).collect()
470        } else {
471            self.temp.chars().skip(1).collect()
472        };
473        self.clear();
474        Some(comment)
475    }
476
477    pub fn rest(&mut self) -> String {
478        while self.next().is_some() {}
479        self.take()
480    }
481
482    pub fn word(&mut self) -> Option<String> {
483        if !matches!(self.next(), Some(ch) if ch.is_ascii_uppercase()) {
484            self.recover();
485            return None;
486        }
487        while let Some(ch) = self.next() {
488            if !ch.is_ascii_uppercase() && !ch.is_ascii_digit() {
489                self.back();
490                break;
491            }
492        }
493        Some(self.take())
494    }
495
496    pub fn space(&mut self) -> bool {
497        if !matches!(self.next(),Some(ch)if ch.is_ascii_whitespace()) {
498            self.recover();
499            return false;
500        }
501        while let Some(ch) = self.next() {
502            if !ch.is_ascii_whitespace() {
503                self.back();
504                break;
505            }
506        }
507        self.space_count = self.temp.chars().count();
508        self.clear();
509        true
510    }
511
512    pub fn integer(&mut self) -> Option<i16> {
513        if !matches!(self.next(),
514                Some(ch) if ch == '-' || ch.is_ascii_digit())
515        {
516            self.recover();
517            return None;
518        }
519        while let Some(ch) = self.next() {
520            if !ch.is_ascii_digit() {
521                self.back();
522                break;
523            }
524        }
525        if let Ok(value) = self.temp.parse::<i64>() {
526            self.clear();
527            Some(value as i16)
528        } else {
529            self.recover();
530            None
531        }
532    }
533
534    pub fn lit_integer(&mut self) -> Option<i16> {
535        if !matches!(self.next(), Some('=')) {
536            self.recover();
537            return None;
538        }
539        if !matches!(self.next(),
540                Some(ch) if ch == '-' || ch.is_ascii_digit())
541        {
542            self.recover();
543            return None;
544        }
545        while let Some(ch) = self.next() {
546            if !ch.is_ascii_digit() {
547                self.back();
548                break;
549            }
550        }
551        let s: String = self.temp.chars().skip(1).collect();
552        if let Ok(value) = s.parse::<i64>() {
553            self.clear();
554            Some(value as i16)
555        } else {
556            self.recover();
557            None
558        }
559    }
560
561    pub fn lit_hex(&mut self) -> Option<u16> {
562        if !matches!(self.next(), Some('=')) {
563            self.recover();
564            return None;
565        }
566        if !matches!(self.next(), Some('#')) {
567            self.recover();
568            return None;
569        }
570        for _ in 0..4 {
571            if !matches!(
572                self.next(),
573                Some(ch) if ch.is_ascii_digit()
574                            || (ch.is_ascii_uppercase() && ch.is_ascii_hexdigit())
575            ) {
576                self.recover();
577                return None;
578            }
579        }
580        let h: String = self.temp.chars().skip(2).collect();
581        if let Ok(value) = u16::from_str_radix(&h, 16) {
582            self.clear();
583            Some(value)
584        } else {
585            self.recover();
586            None
587        }
588    }
589
590    pub fn hex(&mut self) -> Option<u16> {
591        if !matches!(self.next(), Some('#')) {
592            self.recover();
593            return None;
594        }
595        for _ in 0..4 {
596            if !matches!(
597                self.next(),
598                Some(ch) if ch.is_ascii_digit()
599                            || (ch.is_ascii_uppercase() && ch.is_ascii_hexdigit())
600            ) {
601                self.recover();
602                return None;
603            }
604        }
605        let h: String = self.temp.chars().skip(1).collect();
606        if let Ok(value) = u16::from_str_radix(&h, 16) {
607            self.clear();
608            Some(value)
609        } else {
610            self.recover();
611            None
612        }
613    }
614
615    pub fn string(&mut self) -> Option<String> {
616        if !matches!(self.next(), Some('\'')) {
617            self.recover();
618            return None;
619        }
620        let mut quote = false;
621        let mut text = String::new();
622        while let Some(ch) = self.next() {
623            if quote {
624                if ch == '\'' {
625                    quote = false;
626                    text.push(ch);
627                } else {
628                    self.back();
629                    break;
630                }
631            } else if ch == '\'' {
632                quote = true;
633            } else {
634                text.push(ch);
635            }
636        }
637        if quote {
638            self.clear();
639            Some(text)
640        } else {
641            self.recover();
642            None
643        }
644    }
645
646    pub fn lit_string(&mut self) -> Option<String> {
647        if !matches!(self.next(), Some('=')) {
648            self.recover();
649            return None;
650        }
651        self.string()
652    }
653
654    pub fn comma(&mut self) -> bool {
655        if matches!(self.next(), Some(',')) {
656            self.clear();
657            true
658        } else {
659            self.recover();
660            false
661        }
662    }
663
664    pub fn colon(&mut self) -> bool {
665        if matches!(self.next(), Some(':')) {
666            self.clear();
667            true
668        } else {
669            self.recover();
670            false
671        }
672    }
673
674    pub fn atmark(&mut self) -> bool {
675        if matches!(self.next(), Some('@')) {
676            self.clear();
677            true
678        } else {
679            self.recover();
680            false
681        }
682    }
683
684    pub fn plus(&mut self) -> bool {
685        if matches!(self.next(), Some('+')) {
686            self.clear();
687            true
688        } else {
689            self.recover();
690            false
691        }
692    }
693
694    pub fn minus(&mut self) -> bool {
695        if matches!(self.next(), Some('-')) {
696            self.clear();
697            true
698        } else {
699            self.recover();
700            false
701        }
702    }
703
704    pub fn dot(&mut self) -> bool {
705        if matches!(self.next(), Some('.')) {
706            self.clear();
707            true
708        } else {
709            self.recover();
710            false
711        }
712    }
713
714    pub fn open_bracket(&mut self) -> bool {
715        if matches!(self.next(), Some('(')) {
716            self.clear();
717            true
718        } else {
719            self.recover();
720            false
721        }
722    }
723
724    pub fn close_bracket(&mut self) -> bool {
725        if matches!(self.next(), Some(')')) {
726            self.clear();
727            true
728        } else {
729            self.recover();
730            false
731        }
732    }
733
734    pub fn ignore_case_word(&mut self) -> Option<String> {
735        if !matches!(self.next(), Some(ch) if ch.is_ascii_alphabetic()) {
736            self.recover();
737            return None;
738        }
739        while let Some(ch) = self.next() {
740            if !ch.is_ascii_alphanumeric() {
741                self.back();
742                break;
743            }
744        }
745        Some(self.take())
746    }
747
748    pub fn ignore_case_lit_hex(&mut self) -> Option<u16> {
749        if !matches!(self.next(), Some('=')) {
750            self.recover();
751            return None;
752        }
753        if !matches!(self.next(), Some('#')) {
754            self.recover();
755            return None;
756        }
757        for _ in 0..4 {
758            if !matches!(
759                self.next(),
760                Some(ch) if ch.is_ascii_hexdigit()
761            ) {
762                self.recover();
763                return None;
764            }
765        }
766        let h: String = self.temp.chars().skip(2).collect();
767        if let Ok(value) = u16::from_str_radix(&h, 16) {
768            self.clear();
769            Some(value)
770        } else {
771            self.recover();
772            None
773        }
774    }
775
776    pub fn ignore_case_hex(&mut self) -> Option<u16> {
777        if !matches!(self.next(), Some('#')) {
778            self.recover();
779            return None;
780        }
781        for _ in 0..4 {
782            if !matches!(
783                self.next(),
784                Some(ch) if ch.is_ascii_hexdigit()
785            ) {
786                self.recover();
787                return None;
788            }
789        }
790        let h: String = self.temp.chars().skip(1).collect();
791        if let Ok(value) = u16::from_str_radix(&h, 16) {
792            self.clear();
793            Some(value)
794        } else {
795            self.recover();
796            None
797        }
798    }
799
800    pub fn ignore_case_value(&mut self) -> Option<Token> {
801        if let Some(w) = self.ignore_case_word() {
802            return Some(Token::Word(w));
803        }
804        if let Some(i) = self.integer() {
805            return Some(Token::Dec(i));
806        }
807        if let Some(h) = self.ignore_case_hex() {
808            return Some(Token::Hex(h));
809        }
810        if let Some(s) = self.string() {
811            return Some(Token::Str(s));
812        }
813        if let Some(i) = self.lit_integer() {
814            return Some(Token::LitDec(i));
815        }
816        if let Some(h) = self.ignore_case_lit_hex() {
817            return Some(Token::LitHex(h));
818        }
819        if let Some(s) = self.lit_string() {
820            return Some(Token::LitStr(s));
821        }
822        None
823    }
824
825    pub fn uinteger(&mut self) -> Option<u16> {
826        if !matches!(self.next(),
827                Some(ch) if ch.is_ascii_digit())
828        {
829            self.recover();
830            return None;
831        }
832        while let Some(ch) = self.next() {
833            if !ch.is_ascii_digit() {
834                self.back();
835                break;
836            }
837        }
838        if let Ok(value) = self.temp.parse::<u64>() {
839            self.clear();
840            Some(value as u16)
841        } else {
842            self.recover();
843            None
844        }
845    }
846}
847
848impl std::fmt::Display for Token {
849    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
850        match self {
851            Self::Word(s) => s.to_ascii_uppercase().fmt(f),
852            Self::Dec(d) => d.to_string().fmt(f),
853            Self::Hex(h) => format!("#{:04X}", h).fmt(f),
854            Self::Str(s) => format!("'{}'", s.replace('\'', "''")).fmt(f),
855            Self::LitDec(d) => format!("={}", d).fmt(f),
856            Self::LitHex(h) => format!("=#{:04X}", h).fmt(f),
857            Self::LitStr(s) => format!("='{}'", s.replace('\'', "''")).fmt(f),
858        }
859    }
860}