1use super::*;
5use crate::SyntaxError;
6
7pub fn parse(src: &str) -> Result<Vec<Statement>, SyntaxError> {
8 let mut ret = vec![];
9 for (i, line) in src.lines().enumerate() {
10 if line.trim().is_empty() {
11 continue;
12 }
13 let stmt = Statement::parse(line).ok_or_else(|| {
14 SyntaxError::new(
15 i + 1,
16 0,
17 format!(r#"不正なCASL2ステートメントです: "{}""#, line),
18 )
19 })?;
20 ret.push(stmt);
21 }
22 Ok(ret)
23}
24
25impl Statement {
26 fn parse(src: &str) -> Option<Self> {
27 let mut tokenizer = Tokenizer::new(src);
28
29 let mut label: Option<Label> = None;
30 if tokenizer.space() {
31 if let Some(comment) = tokenizer.comment() {
32 let indent = tokenizer.space_count;
33 return Some(Statement::Comment {
34 indent,
35 text: comment,
36 });
37 }
38 } else if let Some(word) = tokenizer.word() {
39 let word = Label::from(word);
40 if !word.is_valid() {
41 return None;
42 }
43 if !tokenizer.space() {
44 return None;
45 }
46 label = Some(word);
47 } else if let Some(comment) = tokenizer.comment() {
48 return Some(Statement::Comment {
49 indent: 0,
50 text: comment,
51 });
52 } else {
53 return None;
54 }
55
56 let (command, comment) = Command::parse(tokenizer)?;
57
58 Some(Statement::Code {
59 label,
60 command,
61 comment,
62 })
63 }
64}
65
66impl Command {
67 fn parse(mut tokenizer: Tokenizer) -> Option<(Self, Option<String>)> {
68 let cmd_word = tokenizer.word()?;
69
70 if !tokenizer.space() {
71 let rest = tokenizer.rest();
72 if !rest.is_empty() {
73 return None;
74 }
75 }
76
77 let values = tokenizer.values()?;
78
79 if values.is_empty() {
80 let command = match cmd_word.as_str() {
81 "START" => Command::Start { entry_point: None },
82 "RPUSH" => Command::Rpush,
83 "RPOP" => Command::Rpop,
84 "RET" => Command::Ret,
85 "END" => Command::End,
86 "NOP" => Command::Nop,
87 _ => return None,
88 };
89 if !tokenizer.space() {
90 if let Some(comment) = tokenizer.comment() {
91 return Some((command, Some(comment)));
92 }
93 }
94 if tokenizer.rest().is_empty() {
95 return Some((command, None));
96 }
97 return None;
98 }
99
100 let comment = if tokenizer.space() {
101 if let Some(comment) = tokenizer.comment() {
102 Some(comment)
103 } else {
104 let rest = tokenizer.rest();
105 if rest.is_empty() {
106 None
107 } else {
108 Some(rest)
109 }
110 }
111 } else {
112 let rest = tokenizer.rest();
113 if rest.is_empty() {
114 None
115 } else {
116 return None;
117 }
118 };
119
120 let command = match cmd_word.as_str() {
121 "LD" => Self::parse_r_or_a(R::Ld, A::Ld, &values)?,
122 "ST" => Self::parse_a(A::St, &values)?,
123 "LAD" => Self::parse_a(A::Lad, &values)?,
124 "ADDA" => Self::parse_r_or_a(R::Adda, A::Adda, &values)?,
125 "ADDL" => Self::parse_r_or_a(R::Addl, A::Addl, &values)?,
126 "SUBA" => Self::parse_r_or_a(R::Suba, A::Suba, &values)?,
127 "SUBL" => Self::parse_r_or_a(R::Subl, A::Subl, &values)?,
128 "AND" => Self::parse_r_or_a(R::And, A::And, &values)?,
129 "OR" => Self::parse_r_or_a(R::Or, A::Or, &values)?,
130 "XOR" => Self::parse_r_or_a(R::Xor, A::Xor, &values)?,
131 "CPA" => Self::parse_r_or_a(R::Cpa, A::Cpa, &values)?,
132 "CPL" => Self::parse_r_or_a(R::Cpl, A::Cpl, &values)?,
133 "SLA" => Self::parse_a(A::Sla, &values)?,
134 "SRA" => Self::parse_a(A::Sra, &values)?,
135 "SLL" => Self::parse_a(A::Sll, &values)?,
136 "SRL" => Self::parse_a(A::Srl, &values)?,
137 "JPL" => Self::parse_p(P::Jpl, &values)?,
138 "JMI" => Self::parse_p(P::Jmi, &values)?,
139 "JNZ" => Self::parse_p(P::Jnz, &values)?,
140 "JZE" => Self::parse_p(P::Jze, &values)?,
141 "JOV" => Self::parse_p(P::Jov, &values)?,
142 "JUMP" => Self::parse_p(P::Jump, &values)?,
143 "PUSH" => Self::parse_p(P::Push, &values)?,
144 "CALL" => Self::parse_p(P::Call, &values)?,
145 "SVC" => Self::parse_p(P::Svc, &values)?,
146 "START" => Self::parse_start(&values)?,
147 "POP" => Self::parse_pop(&values)?,
148 "IN" => Self::parse_in(&values)?,
149 "OUT" => Self::parse_out(&values)?,
150 "DC" => Self::parse_dc(&values)?,
151 "DS" => Self::parse_ds(&values)?,
152 _ => return None,
153 };
154
155 Some((command, comment))
156 }
157
158 fn parse_dc(values: &[Token]) -> Option<Command> {
159 let mut constants = vec![];
160 for v in values {
161 constants.push(Constant::parse(v)?);
162 }
163 Some(Command::Dc { constants })
164 }
165
166 fn parse_ds(values: &[Token]) -> Option<Command> {
167 if let [Token::Dec(v)] = values {
168 Some(Command::Ds { size: *v as u16 })
169 } else {
170 None
171 }
172 }
173
174 fn parse_start(values: &[Token]) -> Option<Command> {
175 if let [label] = values {
176 let label = Label::parse(label)?;
177 Some(Command::Start {
178 entry_point: Some(label),
179 })
180 } else {
181 None
182 }
183 }
184
185 fn parse_a(code: A, values: &[Token]) -> Option<Command> {
186 match values {
187 [r, adr] => {
188 let r = Register::parse(r)?;
189 let adr = Adr::parse(adr)?;
190 Some(Command::A {
191 code,
192 r,
193 adr,
194 x: None,
195 })
196 }
197 [r, adr, x] => {
198 let r = Register::parse(r)?;
199 let adr = Adr::parse(adr)?;
200 let x = IndexRegister::parse(x)?;
201 Some(Command::A {
202 code,
203 r,
204 adr,
205 x: Some(x),
206 })
207 }
208 _ => None,
209 }
210 }
211
212 fn parse_p(code: P, values: &[Token]) -> Option<Command> {
213 match values {
214 [adr] => {
215 let adr = Adr::parse(adr)?;
216 Some(Command::P { code, adr, x: None })
217 }
218 [adr, x] => {
219 let adr = Adr::parse(adr)?;
220 let x = IndexRegister::parse(x)?;
221 Some(Command::P {
222 code,
223 adr,
224 x: Some(x),
225 })
226 }
227 _ => None,
228 }
229 }
230
231 fn parse_pop(values: &[Token]) -> Option<Command> {
232 if let [r] = values {
233 let r = Register::parse(r)?;
234 Some(Command::Pop { r })
235 } else {
236 None
237 }
238 }
239
240 fn parse_in(values: &[Token]) -> Option<Command> {
241 if let [pos, len] = values {
242 let pos = Label::parse(pos)?;
243 let len = Label::parse(len)?;
244 Some(Command::In { pos, len })
245 } else {
246 None
247 }
248 }
249
250 fn parse_out(values: &[Token]) -> Option<Command> {
251 if let [pos, len] = values {
252 let pos = Label::parse(pos)?;
253 let len = Label::parse(len)?;
254 Some(Command::Out { pos, len })
255 } else {
256 None
257 }
258 }
259
260 fn parse_r_or_a(r: R, a: A, values: &[Token]) -> Option<Command> {
261 if let Some(command) = Self::parse_a(a, values) {
262 return Some(command);
263 }
264 if let [r1, r2] = values {
265 let r1 = Register::parse(r1)?;
266 let r2 = Register::parse(r2)?;
267 Some(Command::R { code: r, r1, r2 })
268 } else {
269 None
270 }
271 }
272}
273
274impl Constant {
275 fn parse(token: &Token) -> Option<Self> {
276 let c = match token {
277 word @ Token::Word(_) => Label::parse(word)?.into(),
278 Token::Dec(v) => Self::Dec(*v),
279 Token::Hex(v) => Self::Hex(*v),
280 Token::Str(s) => Self::Str(s.clone()),
281 Token::LitDec(_) | Token::LitHex(_) | Token::LitStr(_) => return None,
282 };
283 Some(c)
284 }
285}
286
287impl Label {
288 fn parse(token: &Token) -> Option<Self> {
289 if let Token::Word(w) = token {
290 let label = Self::from(w);
291 if label.is_valid() {
292 return Some(label);
293 }
294 }
295 None
296 }
297}
298
299impl Adr {
300 fn parse(token: &Token) -> Option<Self> {
301 let adr = match token {
302 word @ Token::Word(_) => Label::parse(word)?.into(),
303 Token::Dec(v) => Self::Dec(*v),
304 Token::Hex(v) => Self::Hex(*v),
305 Token::Str(_) => return None,
306 Token::LitDec(v) => Self::LiteralDec(*v),
307 Token::LitHex(v) => Self::LiteralHex(*v),
308 Token::LitStr(s) => Self::LiteralStr(s.clone()),
309 };
310 Some(adr)
311 }
312}
313
314impl Register {
315 fn parse(token: &Token) -> Option<Self> {
316 let s = if let Token::Word(w) = token {
317 w
318 } else {
319 return None;
320 };
321 match s.as_str() {
322 "GR0" => Some(Self::Gr0),
323 "GR1" => Some(Self::Gr1),
324 "GR2" => Some(Self::Gr2),
325 "GR3" => Some(Self::Gr3),
326 "GR4" => Some(Self::Gr4),
327 "GR5" => Some(Self::Gr5),
328 "GR6" => Some(Self::Gr6),
329 "GR7" => Some(Self::Gr7),
330 _ => None,
331 }
332 }
333}
334
335impl IndexRegister {
336 fn parse(token: &Token) -> Option<Self> {
337 let s = if let Token::Word(w) = token {
338 w
339 } else {
340 return None;
341 };
342 match s.as_str() {
343 "GR1" => Some(Self::Gr1),
344 "GR2" => Some(Self::Gr2),
345 "GR3" => Some(Self::Gr3),
346 "GR4" => Some(Self::Gr4),
347 "GR5" => Some(Self::Gr5),
348 "GR6" => Some(Self::Gr6),
349 "GR7" => Some(Self::Gr7),
350 _ => None,
351 }
352 }
353}
354
355pub struct Tokenizer<'a> {
356 chars: std::str::Chars<'a>,
357 stack: Vec<char>,
358 temp: String,
359 space_count: usize,
360}
361
362pub enum Token {
363 Word(String),
364 Dec(i16),
365 Hex(u16),
366 Str(String),
367 LitDec(i16),
368 LitHex(u16),
369 LitStr(String),
370}
371
372impl<'a> Tokenizer<'a> {
373 pub fn new(s: &'a str) -> Self {
374 Self {
375 chars: s.chars(),
376 stack: Vec::new(),
377 temp: String::new(),
378 space_count: 0,
379 }
380 }
381
382 fn next(&mut self) -> Option<char> {
383 if let Some(ch) = self.stack.pop() {
384 self.temp.push(ch);
385 Some(ch)
386 } else if let Some(ch) = self.chars.next() {
387 self.temp.push(ch);
388 Some(ch)
389 } else {
390 None
391 }
392 }
393
394 fn back(&mut self) {
395 if let Some(ch) = self.temp.pop() {
396 self.stack.push(ch);
397 }
398 }
399
400 fn recover(&mut self) {
401 while let Some(ch) = self.temp.pop() {
402 self.stack.push(ch);
403 }
404 }
405
406 fn take(&mut self) -> String {
407 self.temp.drain(..).collect()
408 }
409
410 fn clear(&mut self) {
411 self.temp.clear();
412 }
413
414 pub fn value(&mut self) -> Option<Token> {
415 if let Some(w) = self.word() {
416 return Some(Token::Word(w));
417 }
418 if let Some(i) = self.integer() {
419 return Some(Token::Dec(i));
420 }
421 if let Some(h) = self.hex() {
422 return Some(Token::Hex(h));
423 }
424 if let Some(s) = self.string() {
425 return Some(Token::Str(s));
426 }
427 if let Some(i) = self.lit_integer() {
428 return Some(Token::LitDec(i));
429 }
430 if let Some(h) = self.lit_hex() {
431 return Some(Token::LitHex(h));
432 }
433 if let Some(s) = self.lit_string() {
434 return Some(Token::LitStr(s));
435 }
436 None
437 }
438
439 fn values(&mut self) -> Option<Vec<Token>> {
441 let mut ret = vec![];
442 if let Some(t) = self.value() {
443 ret.push(t);
444 } else {
445 return Some(ret);
447 }
448 while self.comma() {
449 if let Some(t) = self.value() {
450 ret.push(t);
451 } else {
452 return None;
454 }
455 }
456 Some(ret)
457 }
458
459 fn comment(&mut self) -> Option<String> {
460 if !matches!(self.next(), Some(';')) {
461 self.recover();
462 return None;
463 }
464 while self.next().is_some() {}
465 let comment = if matches!(
466 self.temp.chars().nth(1),
467 Some(ch) if ch.is_ascii_whitespace()
468 ) {
469 self.temp.chars().skip(2).collect()
470 } else {
471 self.temp.chars().skip(1).collect()
472 };
473 self.clear();
474 Some(comment)
475 }
476
477 pub fn rest(&mut self) -> String {
478 while self.next().is_some() {}
479 self.take()
480 }
481
482 pub fn word(&mut self) -> Option<String> {
483 if !matches!(self.next(), Some(ch) if ch.is_ascii_uppercase()) {
484 self.recover();
485 return None;
486 }
487 while let Some(ch) = self.next() {
488 if !ch.is_ascii_uppercase() && !ch.is_ascii_digit() {
489 self.back();
490 break;
491 }
492 }
493 Some(self.take())
494 }
495
496 pub fn space(&mut self) -> bool {
497 if !matches!(self.next(),Some(ch)if ch.is_ascii_whitespace()) {
498 self.recover();
499 return false;
500 }
501 while let Some(ch) = self.next() {
502 if !ch.is_ascii_whitespace() {
503 self.back();
504 break;
505 }
506 }
507 self.space_count = self.temp.chars().count();
508 self.clear();
509 true
510 }
511
512 pub fn integer(&mut self) -> Option<i16> {
513 if !matches!(self.next(),
514 Some(ch) if ch == '-' || ch.is_ascii_digit())
515 {
516 self.recover();
517 return None;
518 }
519 while let Some(ch) = self.next() {
520 if !ch.is_ascii_digit() {
521 self.back();
522 break;
523 }
524 }
525 if let Ok(value) = self.temp.parse::<i64>() {
526 self.clear();
527 Some(value as i16)
528 } else {
529 self.recover();
530 None
531 }
532 }
533
534 pub fn lit_integer(&mut self) -> Option<i16> {
535 if !matches!(self.next(), Some('=')) {
536 self.recover();
537 return None;
538 }
539 if !matches!(self.next(),
540 Some(ch) if ch == '-' || ch.is_ascii_digit())
541 {
542 self.recover();
543 return None;
544 }
545 while let Some(ch) = self.next() {
546 if !ch.is_ascii_digit() {
547 self.back();
548 break;
549 }
550 }
551 let s: String = self.temp.chars().skip(1).collect();
552 if let Ok(value) = s.parse::<i64>() {
553 self.clear();
554 Some(value as i16)
555 } else {
556 self.recover();
557 None
558 }
559 }
560
561 pub fn lit_hex(&mut self) -> Option<u16> {
562 if !matches!(self.next(), Some('=')) {
563 self.recover();
564 return None;
565 }
566 if !matches!(self.next(), Some('#')) {
567 self.recover();
568 return None;
569 }
570 for _ in 0..4 {
571 if !matches!(
572 self.next(),
573 Some(ch) if ch.is_ascii_digit()
574 || (ch.is_ascii_uppercase() && ch.is_ascii_hexdigit())
575 ) {
576 self.recover();
577 return None;
578 }
579 }
580 let h: String = self.temp.chars().skip(2).collect();
581 if let Ok(value) = u16::from_str_radix(&h, 16) {
582 self.clear();
583 Some(value)
584 } else {
585 self.recover();
586 None
587 }
588 }
589
590 pub fn hex(&mut self) -> Option<u16> {
591 if !matches!(self.next(), Some('#')) {
592 self.recover();
593 return None;
594 }
595 for _ in 0..4 {
596 if !matches!(
597 self.next(),
598 Some(ch) if ch.is_ascii_digit()
599 || (ch.is_ascii_uppercase() && ch.is_ascii_hexdigit())
600 ) {
601 self.recover();
602 return None;
603 }
604 }
605 let h: String = self.temp.chars().skip(1).collect();
606 if let Ok(value) = u16::from_str_radix(&h, 16) {
607 self.clear();
608 Some(value)
609 } else {
610 self.recover();
611 None
612 }
613 }
614
615 pub fn string(&mut self) -> Option<String> {
616 if !matches!(self.next(), Some('\'')) {
617 self.recover();
618 return None;
619 }
620 let mut quote = false;
621 let mut text = String::new();
622 while let Some(ch) = self.next() {
623 if quote {
624 if ch == '\'' {
625 quote = false;
626 text.push(ch);
627 } else {
628 self.back();
629 break;
630 }
631 } else if ch == '\'' {
632 quote = true;
633 } else {
634 text.push(ch);
635 }
636 }
637 if quote {
638 self.clear();
639 Some(text)
640 } else {
641 self.recover();
642 None
643 }
644 }
645
646 pub fn lit_string(&mut self) -> Option<String> {
647 if !matches!(self.next(), Some('=')) {
648 self.recover();
649 return None;
650 }
651 self.string()
652 }
653
654 pub fn comma(&mut self) -> bool {
655 if matches!(self.next(), Some(',')) {
656 self.clear();
657 true
658 } else {
659 self.recover();
660 false
661 }
662 }
663
664 pub fn colon(&mut self) -> bool {
665 if matches!(self.next(), Some(':')) {
666 self.clear();
667 true
668 } else {
669 self.recover();
670 false
671 }
672 }
673
674 pub fn atmark(&mut self) -> bool {
675 if matches!(self.next(), Some('@')) {
676 self.clear();
677 true
678 } else {
679 self.recover();
680 false
681 }
682 }
683
684 pub fn plus(&mut self) -> bool {
685 if matches!(self.next(), Some('+')) {
686 self.clear();
687 true
688 } else {
689 self.recover();
690 false
691 }
692 }
693
694 pub fn minus(&mut self) -> bool {
695 if matches!(self.next(), Some('-')) {
696 self.clear();
697 true
698 } else {
699 self.recover();
700 false
701 }
702 }
703
704 pub fn dot(&mut self) -> bool {
705 if matches!(self.next(), Some('.')) {
706 self.clear();
707 true
708 } else {
709 self.recover();
710 false
711 }
712 }
713
714 pub fn open_bracket(&mut self) -> bool {
715 if matches!(self.next(), Some('(')) {
716 self.clear();
717 true
718 } else {
719 self.recover();
720 false
721 }
722 }
723
724 pub fn close_bracket(&mut self) -> bool {
725 if matches!(self.next(), Some(')')) {
726 self.clear();
727 true
728 } else {
729 self.recover();
730 false
731 }
732 }
733
734 pub fn ignore_case_word(&mut self) -> Option<String> {
735 if !matches!(self.next(), Some(ch) if ch.is_ascii_alphabetic()) {
736 self.recover();
737 return None;
738 }
739 while let Some(ch) = self.next() {
740 if !ch.is_ascii_alphanumeric() {
741 self.back();
742 break;
743 }
744 }
745 Some(self.take())
746 }
747
748 pub fn ignore_case_lit_hex(&mut self) -> Option<u16> {
749 if !matches!(self.next(), Some('=')) {
750 self.recover();
751 return None;
752 }
753 if !matches!(self.next(), Some('#')) {
754 self.recover();
755 return None;
756 }
757 for _ in 0..4 {
758 if !matches!(
759 self.next(),
760 Some(ch) if ch.is_ascii_hexdigit()
761 ) {
762 self.recover();
763 return None;
764 }
765 }
766 let h: String = self.temp.chars().skip(2).collect();
767 if let Ok(value) = u16::from_str_radix(&h, 16) {
768 self.clear();
769 Some(value)
770 } else {
771 self.recover();
772 None
773 }
774 }
775
776 pub fn ignore_case_hex(&mut self) -> Option<u16> {
777 if !matches!(self.next(), Some('#')) {
778 self.recover();
779 return None;
780 }
781 for _ in 0..4 {
782 if !matches!(
783 self.next(),
784 Some(ch) if ch.is_ascii_hexdigit()
785 ) {
786 self.recover();
787 return None;
788 }
789 }
790 let h: String = self.temp.chars().skip(1).collect();
791 if let Ok(value) = u16::from_str_radix(&h, 16) {
792 self.clear();
793 Some(value)
794 } else {
795 self.recover();
796 None
797 }
798 }
799
800 pub fn ignore_case_value(&mut self) -> Option<Token> {
801 if let Some(w) = self.ignore_case_word() {
802 return Some(Token::Word(w));
803 }
804 if let Some(i) = self.integer() {
805 return Some(Token::Dec(i));
806 }
807 if let Some(h) = self.ignore_case_hex() {
808 return Some(Token::Hex(h));
809 }
810 if let Some(s) = self.string() {
811 return Some(Token::Str(s));
812 }
813 if let Some(i) = self.lit_integer() {
814 return Some(Token::LitDec(i));
815 }
816 if let Some(h) = self.ignore_case_lit_hex() {
817 return Some(Token::LitHex(h));
818 }
819 if let Some(s) = self.lit_string() {
820 return Some(Token::LitStr(s));
821 }
822 None
823 }
824
825 pub fn uinteger(&mut self) -> Option<u16> {
826 if !matches!(self.next(),
827 Some(ch) if ch.is_ascii_digit())
828 {
829 self.recover();
830 return None;
831 }
832 while let Some(ch) = self.next() {
833 if !ch.is_ascii_digit() {
834 self.back();
835 break;
836 }
837 }
838 if let Ok(value) = self.temp.parse::<u64>() {
839 self.clear();
840 Some(value as u16)
841 } else {
842 self.recover();
843 None
844 }
845 }
846}
847
848impl std::fmt::Display for Token {
849 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
850 match self {
851 Self::Word(s) => s.to_ascii_uppercase().fmt(f),
852 Self::Dec(d) => d.to_string().fmt(f),
853 Self::Hex(h) => format!("#{:04X}", h).fmt(f),
854 Self::Str(s) => format!("'{}'", s.replace('\'', "''")).fmt(f),
855 Self::LitDec(d) => format!("={}", d).fmt(f),
856 Self::LitHex(h) => format!("=#{:04X}", h).fmt(f),
857 Self::LitStr(s) => format!("='{}'", s.replace('\'', "''")).fmt(f),
858 }
859 }
860}