yang_rs/
parser.rs

1//
2// YANG - Parser
3//  Copyright (C) 2021 Toshiaki Takada
4//
5
6use std::cell::Cell;
7use std::io::Error;
8use std::io::ErrorKind;
9
10use super::config::Config;
11use super::core::*;
12use super::error::*;
13use super::stmt::*;
14use super::substmt::*;
15
16use crate::collect_a_stmt;
17
18/// 6.1.3. Quoting
19///
20///   If a double-quoted string contains a line break followed by space or
21///   tab characters that are used to indent the text according to the
22///   layout in the YANG file, this leading whitespace is stripped from the
23///   string, up to and including the column of the starting double quote
24///   character, or to the first non-whitespace character, whichever occurs
25///   first.  Any tab character in a succeeding line that must be examined
26///   for stripping is first converted into 8 space characters.
27///
28///   If a double-quoted string contains space or tab characters before a
29///   line break, this trailing whitespace is stripped from the string.
30///
31fn trim_spaces(l: &str, indent: usize) -> String {
32    let mut s = String::new();
33    let mut chars = l.chars();
34
35    'outer: while let Some(mut c) = chars.next() {
36        let mut count = 0;
37        loop {
38            if c == ' ' {
39                count += 1;
40            } else if c == '\t' {
41                count += 8;
42            } else if c == '\n' {
43                s.push('\n');
44                continue 'outer;
45            } else {
46                break;
47            }
48
49            if let Some(d) = chars.next() {
50                c = d;
51            } else {
52                return s;
53            }
54        }
55
56        if count > indent {
57            s.push_str(&(" ".repeat(count - indent)));
58        }
59        s.push(c);
60
61        loop {
62            if let Some(d) = chars.next() {
63                if d == '\n' {
64                    break;
65                } else {
66                    s.push(d);
67                }
68            } else {
69                return s;
70            }
71        }
72
73        while s.ends_with(|c: char| c == ' ' || c == '\t') {
74            s.pop().unwrap();
75        }
76
77        s.push('\n');
78    }
79
80    s
81}
82
83/// YANG Token type.
84#[derive(PartialEq, Debug, Clone)]
85pub enum Token {
86    /// Space, tab, carriage return and/or line feed.
87    Whitespace(String),
88
89    /// Single line or multi line comments.
90    Comment(String),
91
92    /// "+"
93    PlusSign,
94
95    /// "{".
96    BlockBegin,
97
98    /// "}".
99    BlockEnd,
100
101    /// Single quoted or double qouted string.
102    QuotedString(String),
103
104    /// Unquoted identifier.
105    Identifier(String),
106
107    /// ";".
108    StatementEnd,
109
110    /// End of Input.
111    EndOfInput,
112}
113
114impl ToString for Token {
115    fn to_string(&self) -> String {
116        match &self {
117            Token::Whitespace(_) => String::from("Whitespace"),
118            Token::Comment(_) => String::from("Comment"),
119            Token::PlusSign => String::from("PlusSign"),
120            Token::BlockBegin => String::from("BlockBegin"),
121            Token::BlockEnd => String::from("BlockEnd"),
122            Token::QuotedString(_) => String::from("QuotedString"),
123            Token::Identifier(s) => format!("Identifier '{:?}'", s),
124            Token::StatementEnd => String::from("StatementEnd"),
125            Token::EndOfInput => String::from("EndOfInput"),
126        }
127    }
128}
129
130/// Parser.
131pub struct Parser {
132    /// Config.
133    config: Config,
134
135    /// Input string.
136    input: String,
137
138    /// Cursor position in bytes from the beginning.
139    pos: Cell<usize>,
140
141    /// Line number at cursor.
142    line: Cell<usize>,
143
144    /// Chars from last line feed.
145    column: Cell<usize>,
146
147    /// Saved token.
148    saved: Cell<Option<Token>>,
149}
150
151impl Parser {
152    /// Constructor.
153    pub fn new(s: String) -> Parser {
154        Parser {
155            config: Config::new(),
156            input: s,
157            pos: Cell::new(0),
158            line: Cell::new(0),
159            column: Cell::new(0),
160            saved: Cell::new(None),
161        }
162    }
163
164    /// Constructor with config.
165    pub fn new_with_config(s: String, config: Config) -> Parser {
166        Parser {
167            config,
168            input: s,
169            pos: Cell::new(0),
170            line: Cell::new(0),
171            column: Cell::new(0),
172            saved: Cell::new(None),
173        }
174    }
175
176    /// Get config reference.
177    pub fn config(&self) -> &Config {
178        &self.config
179    }
180
181    /// Get input string at current position.
182    pub fn input(&self) -> &str {
183        &self.input[self.pos.get()..]
184    }
185
186    /// Return remaining input length.
187    pub fn input_len(&self) -> usize {
188        self.input.len() - self.pos.get()
189    }
190
191    /// Return parser cusor position.
192    pub fn pos(&self) -> usize {
193        self.pos.get()
194    }
195
196    /// Move cursor position forward.
197    pub fn pos_add(&mut self, pos: usize) {
198        self.pos.set(self.pos.get() + pos);
199    }
200
201    /// Return line number.
202    pub fn line(&self) -> usize {
203        self.line.get()
204    }
205
206    /// Add len to line number.
207    pub fn line_add(&self, len: usize) {
208        self.line.set(self.line.get() + len);
209    }
210
211    /// Add chars to column.
212    pub fn column_add(&self, num: usize) {
213        self.column.set(self.column.get() + num);
214    }
215
216    /// Set chars to column from last linefeed.
217    pub fn column_set_from(&self, l: &str) {
218        let rpos = l.rfind("\n").unwrap();
219        self.column.set(l.len() - rpos - 1);
220    }
221
222    /// Save token to saved.
223    pub fn save_token(&mut self, token: Token) {
224        self.saved.replace(Some(token));
225    }
226
227    /// Load token from saved.
228    pub fn load_token(&mut self) -> Option<Token> {
229        self.saved.replace(None)
230    }
231
232    /// Get a token and save it.
233    pub fn peek_token(&mut self) -> Result<Token, YangError> {
234        let token = self.get_token()?;
235        self.save_token(token.clone());
236        Ok(token)
237    }
238
239    /// Get a token except whitespace and comment.
240    pub fn get_token(&mut self) -> Result<Token, YangError> {
241        let mut st = String::new();
242        let mut concat_str = false;
243        let mut string_parsed = false;
244
245        if let Some(token) = self.load_token() {
246            return Ok(token);
247        }
248
249        loop {
250            if self.input_len() == 0 {
251                if st.len() > 0 {
252                    return Ok(Token::QuotedString(st));
253                } else if concat_str {
254                    return Err(YangError::UnexpectedEof);
255                }
256
257                return Ok(Token::EndOfInput);
258            }
259
260            let (token, _pos) = self.get_single_token()?;
261            match token {
262                Token::Whitespace(_) | Token::Comment(_) => {}
263                Token::QuotedString(s) => {
264                    if st.len() == 0 || concat_str {
265                        st.push_str(&s);
266                        concat_str = false;
267                        string_parsed = true;
268                    } else {
269                        return Err(YangError::InvalidString(s));
270                    }
271                }
272                Token::PlusSign => {
273                    if concat_str {
274                        return Err(YangError::InvalidString(st));
275                    } else {
276                        concat_str = true;
277                    }
278                }
279                _ => {
280                    if concat_str {
281                        return Err(YangError::InvalidString(st));
282                    }
283
284                    if string_parsed {
285                        self.save_token(token);
286                        return Ok(Token::QuotedString(st));
287                    }
288
289                    return Ok(token);
290                }
291            }
292        }
293    }
294
295    /// Get a single token and position.
296    pub fn get_single_token(&mut self) -> Result<(Token, usize), YangError> {
297        let input = &self.input();
298        let token: Token;
299        let mut pos: usize = 0;
300
301        if input.starts_with(char::is_whitespace) {
302            pos = match input.find(|c: char| !c.is_whitespace()) {
303                Some(pos) => pos,
304                None => input.len(),
305            };
306
307            let l = &input[..pos];
308            let line = l.matches("\n").count();
309            if line > 0 {
310                self.column_set_from(l);
311                self.line_add(line);
312            } else {
313                self.column_add(l.len());
314            }
315
316            token = Token::Whitespace(String::from(l));
317        } else if input.starts_with("//") {
318            pos = match input.find(|c: char| c == '\r' || c == '\n') {
319                Some(pos) => pos,
320                None => input.len(),
321            };
322            token = Token::Comment(String::from(&input[2..pos]));
323        } else if input.starts_with("/*") {
324            let mut l = &input[2..];
325            pos = match l.find("*/") {
326                Some(pos) => pos,
327                None => return Err(YangError::InvalidComment),
328            };
329
330            l = &l[..pos];
331
332            let line = l.matches("\n").count();
333            if line > 0 {
334                self.column_set_from(l);
335                self.line_add(line);
336            } else {
337                self.column_add(pos + 4);
338            }
339
340            token = Token::Comment(String::from(l));
341            pos += 4;
342        } else if input.starts_with('+') {
343            pos = 1;
344            self.column_add(1);
345            token = Token::PlusSign;
346        } else if input.starts_with('{') {
347            pos = 1;
348            self.column_add(1);
349            token = Token::BlockBegin;
350        } else if input.starts_with('}') {
351            pos = 1;
352            self.column_add(1);
353            token = Token::BlockEnd;
354        } else if input.starts_with(';') {
355            pos = 1;
356            self.column_add(1);
357            token = Token::StatementEnd;
358        } else if input.starts_with('"') {
359            let mut l = &input[1..];
360
361            let mut chars = l.chars();
362            loop {
363                let c = match chars.next() {
364                    Some(c) => c,
365                    None => {
366                        return Err(YangError::InvalidString(
367                            "String not terminated".to_string(),
368                        ))
369                    }
370                };
371
372                if c == '\\' {
373                    let d = match chars.next() {
374                        Some(d) => d,
375                        None => {
376                            return Err(YangError::InvalidString(
377                                "String not terminated".to_string(),
378                            ))
379                        }
380                    };
381                    if d != 'n' && d != 't' && d != '"' && d != '\\' {
382                        return Err(YangError::InvalidString(format!(
383                            "backslash followed by invalid char '{}'",
384                            d
385                        )));
386                    }
387                    pos += 2;
388                } else if c == '"' {
389                    l = &l[..pos];
390                    break;
391                } else {
392                    pos += c.len_utf8();
393                }
394            }
395
396            let line = l[..pos].matches("\n").count();
397            if line > 0 {
398                let column = self.column.get() + 1;
399                let s = trim_spaces(l, column);
400
401                self.line_add(line);
402                token = Token::QuotedString(s);
403            } else {
404                token = Token::QuotedString(String::from(&l[..pos]));
405            }
406
407            pos += 2;
408        } else if input.starts_with("'") {
409            let l = &input[1..];
410            pos = match l.find("'") {
411                Some(pos) => pos,
412                None => {
413                    return Err(YangError::InvalidString(
414                        "String not terminated".to_string(),
415                    ))
416                }
417            };
418
419            let line = l[..pos].matches("\n").count();
420            self.line_add(line);
421
422            token = Token::QuotedString(String::from(&l[..pos]));
423            pos += 2;
424        } else {
425            // 6.1.3. Quoting
426            // An unquoted string is any sequence of characters that does not
427            // contain any space, tab, carriage return, or line feed characters, a
428            // single or double quote character, a semicolon (";"), braces ("{" or
429            // "}"), or comment sequences ("//", "/*", or "*/").
430
431            let mut l = &input[pos..];
432            while l.len() > 0 {
433                let c = l.chars().next().unwrap();
434
435                if c.is_whitespace() || c == '"' || c == '\'' || c == '}' || c == '{' || c == ';' {
436                    break;
437                }
438
439                if l.starts_with("//") || l.starts_with("/*") || l.starts_with("*/") {
440                    break;
441                }
442
443                pos += c.len_utf8();
444                l = &input[pos..];
445            }
446
447            token = Token::Identifier(String::from(&input[..pos]));
448        }
449
450        self.pos_add(pos);
451        Ok((token, pos))
452    }
453
454    /// Return substatements definition.
455    fn substmts_def() -> Vec<SubStmtDef> {
456        vec![
457            SubStmtDef::Optional(SubStmtWith::Stmt(ModuleStmt::keyword)),
458            SubStmtDef::Optional(SubStmtWith::Stmt(SubmoduleStmt::keyword)),
459        ]
460    }
461
462    /// Entry point of YANG parser.  An input and a config has to be set.
463    /// It will return a module or submodule statement.
464    pub fn parse_yang(&mut self) -> Result<YangStmt, YangError> {
465        let mut stmts = SubStmtUtil::parse_substmts(self, Self::substmts_def())?;
466
467        if stmts.contains_key("module") {
468            let module = collect_a_stmt!(stmts, ModuleStmt)?;
469            Ok(YangStmt::ModuleStmt(module))
470        } else if stmts.contains_key("submodule") {
471            let submodule = collect_a_stmt!(stmts, SubmoduleStmt)?;
472            Ok(YangStmt::SubmoduleStmt(submodule))
473        } else {
474            Err(YangError::UnexpectedEof)
475        }
476    }
477
478    /// Parse string as an input, and return YangStmt. Encapsulate YangError into io::Error.
479    pub fn parse_yang_from_string(s: String, config: Config) -> Result<YangStmt, std::io::Error> {
480        let mut parser = Parser::new_with_config(s, config);
481        parser.parse_yang().map_err(|err| {
482            Error::new(
483                ErrorKind::Other,
484                format!(
485                    "YangError: {:?} at line {}, pos {}",
486                    err,
487                    parser.line(),
488                    parser.pos()
489                ),
490            )
491        })
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    #[test]
500    pub fn test_get_token() {
501        let s = "module { }";
502        let mut parser = Parser::new(s.to_string());
503
504        let token = parser.get_token().unwrap();
505        assert_eq!(token, Token::Identifier("module".to_string()));
506
507        let token = parser.get_token().unwrap();
508        assert_eq!(token, Token::BlockBegin);
509
510        let token = parser.get_token().unwrap();
511        assert_eq!(token, Token::BlockEnd);
512    }
513
514    #[test]
515    pub fn test_get_token_comment_1() {
516        let s = "module; /* comment */ statement";
517        let mut parser = Parser::new(s.to_string());
518
519        let token = parser.get_token().unwrap();
520        assert_eq!(token, Token::Identifier("module".to_string()));
521
522        let token = parser.get_token().unwrap();
523        assert_eq!(token, Token::StatementEnd);
524
525        let token = parser.get_token().unwrap();
526        assert_eq!(token, Token::Identifier("statement".to_string()));
527    }
528
529    #[test]
530    pub fn test_get_token_comment_2() {
531        let s = "module // comment
532";
533        let mut parser = Parser::new(s.to_string());
534
535        let token = parser.get_token().unwrap();
536        assert_eq!(token, Token::Identifier("module".to_string()));
537
538        let token = parser.get_token().unwrap();
539        assert_eq!(token, Token::EndOfInput);
540    }
541
542    #[test]
543    pub fn test_get_token_comment_3() {
544        let s = "/* comment // */ module";
545        let mut parser = Parser::new(s.to_string());
546
547        let token = parser.get_token().unwrap();
548        assert_eq!(token, Token::Identifier("module".to_string()));
549
550        let token = parser.get_token().unwrap();
551        assert_eq!(token, Token::EndOfInput);
552    }
553
554    #[test]
555    pub fn test_get_token_comment_4() {
556        let s = "// /* comment */ module";
557        let mut parser = Parser::new(s.to_string());
558
559        let token = parser.get_token().unwrap();
560        assert_eq!(token, Token::EndOfInput);
561    }
562
563    #[test]
564    pub fn test_get_token_string_1() {
565        let s = r#" "string" "#;
566        let mut parser = Parser::new(s.to_string());
567
568        let token = parser.get_token().unwrap();
569        assert_eq!(
570            token,
571            Token::QuotedString(String::from("string".to_string()))
572        );
573
574        let token = parser.get_token().unwrap();
575        assert_eq!(token, Token::EndOfInput);
576    }
577
578    #[test]
579    pub fn test_get_token_string_2() {
580        let s = r#" '"string"' "#;
581        let mut parser = Parser::new(s.to_string());
582
583        let token = parser.get_token().unwrap();
584        assert_eq!(
585            token,
586            Token::QuotedString(String::from(r#""string""#.to_string()))
587        );
588
589        let token = parser.get_token().unwrap();
590        assert_eq!(token, Token::EndOfInput);
591    }
592
593    #[test]
594    pub fn test_get_token_string_3() {
595        let s = r#" "Hello" + "World" { }"#;
596        let mut parser = Parser::new(s.to_string());
597
598        let token = parser.get_token().unwrap();
599        assert_eq!(
600            token,
601            Token::QuotedString(String::from(r#"HelloWorld"#.to_string()))
602        );
603
604        let token = parser.get_token().unwrap();
605        assert_eq!(token, Token::BlockBegin);
606
607        let token = parser.get_token().unwrap();
608        assert_eq!(token, Token::BlockEnd);
609
610        let token = parser.get_token().unwrap();
611        assert_eq!(token, Token::EndOfInput);
612    }
613
614    #[test]
615    pub fn test_get_token_string_4() {
616        let s = r#" 'string1
617 string2 ' "#;
618
619        let mut parser = Parser::new(s.to_string());
620
621        let token = parser.get_token().unwrap();
622        assert_eq!(
623            token,
624            Token::QuotedString(String::from("string1\n string2 ".to_string()))
625        );
626
627        let token = parser.get_token().unwrap();
628        assert_eq!(token, Token::EndOfInput);
629    }
630
631    #[test]
632    pub fn test_get_token_string_5() {
633        let s = r#"    "string1
634     string2" "#;
635
636        let mut parser = Parser::new(s.to_string());
637
638        let token = parser.get_token().unwrap();
639        assert_eq!(
640            token,
641            Token::QuotedString(String::from("string1\nstring2".to_string()))
642        );
643
644        let token = parser.get_token().unwrap();
645        assert_eq!(token, Token::EndOfInput);
646    }
647
648    #[test]
649    pub fn test_get_token_string_6() {
650        let s = r#"/**/"string1
651
652      string2   	 
653	
654 string3	" + "string4" "#;
655
656        let mut parser = Parser::new(s.to_string());
657
658        let token = parser.get_token().unwrap();
659        assert_eq!(
660            token,
661            Token::QuotedString(String::from(
662                "string1\n\n string2\n\nstring3	string4".to_string()
663            ))
664        );
665
666        let token = parser.get_token().unwrap();
667        assert_eq!(token, Token::EndOfInput);
668    }
669
670    #[test]
671    pub fn test_get_token_empty() {
672        let s = r#"identifier " ";"#;
673        let mut parser = Parser::new(s.to_string());
674
675        let token = parser.get_token().unwrap();
676        assert_eq!(token, Token::Identifier(String::from("identifier")));
677
678        let token = parser.get_token().unwrap();
679        assert_eq!(token, Token::QuotedString(String::from(" ")));
680
681        let s = r#"identifier "";"#;
682        let mut parser = Parser::new(s.to_string());
683
684        let token = parser.get_token().unwrap();
685        assert_eq!(token, Token::Identifier(String::from("identifier")));
686
687        let token = parser.get_token().unwrap();
688        assert_eq!(token, Token::QuotedString(String::from("")));
689    }
690}