namelist/
lib.rs

1pub mod namelists;
2pub mod tokenizer;
3use namelists::parse_namelist;
4use std::{collections::HashMap, fmt::Display, io::Read, slice::Iter};
5use tokenizer::{LocatedToken, NmlParseError, Span, Token, TokenIter, TokenizerError};
6
7#[derive(Clone, Debug, PartialEq, Eq)]
8pub struct NamelistFile {
9    pub namelists: Vec<Namelist>,
10}
11
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub enum Namelist {
14    Actual { tokens: Vec<LocatedToken> },
15    Other { tokens: Vec<LocatedToken> },
16}
17
18impl Namelist {
19    pub fn tokens(&self) -> &[LocatedToken] {
20        match self {
21            Self::Actual { tokens } => tokens,
22            Self::Other { tokens } => tokens,
23        }
24    }
25    pub fn tokens_mut(&mut self) -> &mut Vec<LocatedToken> {
26        match self {
27            Self::Actual { tokens } => tokens,
28            Self::Other { tokens } => tokens,
29        }
30    }
31    pub fn into_tokens(self) -> Vec<LocatedToken> {
32        match self {
33            Self::Actual { tokens } => tokens,
34            Self::Other { tokens } => tokens,
35        }
36    }
37    pub fn append_token(&mut self, token: Token) {
38        let located = LocatedToken { span: None, token };
39        let tokens = match self {
40            Self::Actual { tokens } => tokens,
41            Self::Other { tokens } => tokens,
42        };
43        if tokens.last().map(|l| &l.token) == Some(&Token::RightSlash) {
44            tokens.insert(tokens.len() - 1, located);
45        } else {
46            tokens.push(located);
47        }
48    }
49
50    /// Returns true if successfull.
51    pub fn remove_parameter(&mut self, parameter_name: &str) -> bool {
52        // TODO: consume and replace whitespace either side and replace with a
53        // single space. This would need to deal with comments too.
54        if let Some((start, length)) = self.find_parameter(parameter_name) {
55            let tokens = self.tokens_mut();
56            tokens.drain(start..(start + length));
57            true
58        } else {
59            false
60        }
61    }
62
63    pub fn find_parameter(&self, parameter_name: &str) -> Option<(usize, usize)> {
64        let tokens = match self {
65            Self::Actual { tokens } => tokens,
66            Self::Other { tokens } => tokens,
67        };
68        let mut iter = tokens.iter().enumerate();
69        while let Some((i, located_token)) = iter.next() {
70            if located_token.token() == &Token::Identifier(parameter_name.to_string()) {
71                let start = i;
72                let mut length = 1;
73                // Take any whitespace.
74                for (_, located_token) in iter.by_ref() {
75                    let token = located_token.token();
76                    if token.is_whitespace() || token.is_comment() || matches!(token, Token::Equals)
77                    {
78                        length += 1;
79                        continue;
80                    } else {
81                        break;
82                    }
83                }
84                // TODO: this is incorrect as params might have multiple values.
85                length += 1;
86                return Some((start, length));
87            }
88        }
89        None
90    }
91}
92
93impl Display for Namelist {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        for token in self.tokens() {
96            write!(f, "{token}")?;
97        }
98        Ok(())
99    }
100}
101
102pub struct NmlParser<R: Read> {
103    tokenizer: TokenIter<R>,
104    state: ParserState,
105    next_namelist: Vec<LocatedToken>,
106}
107
108impl<R: Read> NmlParser<R> {
109    pub fn new(input: R) -> Self {
110        NmlParser {
111            tokenizer: TokenIter::new(input),
112            state: ParserState::Start,
113            next_namelist: Vec::new(),
114        }
115    }
116}
117
118#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
119pub enum ParserState {
120    Start,
121    InNamelist,
122}
123
124impl<R: Read> Iterator for NmlParser<R> {
125    type Item = Result<Namelist, TokenizerError>;
126
127    fn next(&mut self) -> Option<Self::Item> {
128        let tokens = loop {
129            let token = match self.tokenizer.next() {
130                Some(Ok(token)) => token,
131                Some(Err(err)) => {
132                    return Some(Err(err));
133                }
134                None => {
135                    if !self.next_namelist.is_empty() {
136                        let tokens = std::mem::take(&mut self.next_namelist);
137                        break Some(tokens);
138                    } else {
139                        break None;
140                    }
141                }
142            };
143            match self.state {
144                ParserState::Start => {
145                    if token.token == Token::Ampersand {
146                        let tokens = std::mem::take(&mut self.next_namelist);
147                        self.next_namelist.push(token);
148                        self.state = ParserState::InNamelist;
149                        if !tokens.is_empty() {
150                            break Some(tokens);
151                        }
152                    } else {
153                        self.next_namelist.push(token);
154                    }
155                }
156                ParserState::InNamelist => {
157                    if token.token == Token::Ampersand {
158                        self.state = ParserState::Start;
159                        let tokens = std::mem::take(&mut self.next_namelist);
160                        self.next_namelist.push(token);
161                        break Some(tokens);
162                    } else if token.token == Token::RightSlash {
163                        self.next_namelist.push(token);
164                        self.state = ParserState::Start;
165                        let tokens = std::mem::take(&mut self.next_namelist);
166                        break Some(tokens);
167                    } else {
168                        self.next_namelist.push(token);
169                    }
170                }
171            }
172        }?;
173        parse_namelist(tokens).map(Ok)
174    }
175}
176
177#[derive(Clone, Debug, PartialEq, Eq)]
178pub struct ParsedNamelist {
179    pub group: String,
180    pub span: Option<Span>,
181    pub parameters: HashMap<String, ParameterValues>,
182}
183
184impl ParsedNamelist {
185    pub fn from_namelist(nml: &Namelist) -> Result<ParsedNamelist, NmlParseError> {
186        let mut tokens = nml.tokens().iter();
187        let first_token = tokens.next().ok_or(NmlParseError::NoTokens)?;
188        if first_token.token != Token::Ampersand {
189            return Err(NmlParseError::NoAmpersand(first_token.span));
190        }
191
192        let (group, group_span) = {
193            let gn = tokens.next().ok_or(NmlParseError::NoTokens)?;
194            if let Token::Identifier(s) = &gn.token {
195                (s.to_string(), gn.span())
196            } else {
197                return Err(NmlParseError::InvalidGroupName(gn.span));
198            }
199        };
200        let mut parameters: HashMap<String, ParameterValues> = Default::default();
201        let mut token_buf: Vec<LocatedToken> = vec![];
202        while let Some(pn) = token_buf
203            .pop()
204            .or_else(|| next_non_ws(&mut tokens).cloned())
205        {
206            // Take parameter name
207            let parameter_name = pn.clone();
208            let mut location_tokens: Vec<LocatedToken> = vec![];
209            if parameter_name.token == Token::RightSlash {
210                break;
211            }
212            if let Token::Identifier(name) = &parameter_name.token {
213                let mut in_location = false;
214                loop {
215                    let b = token_buf.pop();
216                    let located_token = b.as_ref().or_else(|| next_non_ws(&mut tokens));
217                    if let Some(token) = located_token {
218                        match &token.token {
219                            Token::LeftBracket => {
220                                in_location = true;
221                                location_tokens.push(token.clone());
222                            }
223                            Token::RightBracket => {
224                                in_location = false;
225                                location_tokens.push(token.clone());
226                            }
227                            Token::Equals => {
228                                if in_location {
229                                    panic!("Found '=' in location");
230                                } else {
231                                    break;
232                                }
233                            }
234                            t => {
235                                if t.is_location_token() {
236                                    location_tokens.push(token.clone());
237                                } else {
238                                    panic!("invalid location token {:?}", t);
239                                }
240                            }
241                        }
242                    } else {
243                        return Err(NmlParseError::NoEquals(located_token.and_then(|t| t.span)));
244                    };
245                }
246                let mut value_tokens: Vec<LocatedToken> = vec![];
247                while let Some(token) = token_buf
248                    .pop()
249                    .as_ref()
250                    .or_else(|| next_non_ws(&mut tokens))
251                {
252                    if token.token == Token::RightSlash {
253                        break;
254                    }
255                    if token.token == Token::Equals {
256                        token_buf.push(token.clone());
257                        if let Some(t) = value_tokens.pop() {
258                            token_buf.push(t);
259                        }
260                        break;
261                    }
262                    value_tokens.push(token.clone());
263                }
264                parameters.insert(
265                    name.to_string(),
266                    ParameterValues {
267                        span: pn.span,
268                        dimensions: location_tokens,
269                        values: value_tokens,
270                    },
271                );
272                // loop until we hit equals or right slash
273                continue;
274            } else {
275                return Err(NmlParseError::InvalidParameterName(parameter_name.span));
276            }
277        }
278        Ok(ParsedNamelist {
279            group,
280            span: group_span,
281            parameters,
282        })
283    }
284}
285
286#[derive(Clone, Debug, PartialEq, Eq)]
287pub struct ParameterValues {
288    pub span: Option<Span>,
289    pub dimensions: Vec<LocatedToken>,
290    pub values: Vec<LocatedToken>,
291}
292
293fn next_non_ws<'a>(tokens: &'a mut Iter<LocatedToken>) -> Option<&'a LocatedToken> {
294    loop {
295        let token = tokens.next()?;
296        match token.token {
297            Token::Whitespace(_) | Token::Comma => {
298                continue;
299            }
300            _ => return Some(token),
301        }
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308    use crate::tokenizer::Span;
309
310    #[test]
311    fn single_nml() {
312        let input = "&Head val = 2 /";
313        let parser = NmlParser::new(std::io::Cursor::new(input));
314        let nmls = parser
315            .collect::<Result<Vec<Namelist>, _>>()
316            .expect("test parse failed");
317        let expected = vec![Namelist::Actual {
318            tokens: vec![
319                LocatedToken {
320                    token: Token::Ampersand,
321                    span: Some(Span {
322                        lo: 0,
323                        len: 1,
324                        column: 0,
325                        line: 0,
326                    }),
327                },
328                LocatedToken {
329                    token: Token::Identifier("Head".to_string()),
330                    span: Some(Span {
331                        lo: 1,
332                        len: 4,
333                        column: 1,
334                        line: 0,
335                    }),
336                },
337                LocatedToken {
338                    token: Token::Whitespace(" ".to_string()),
339                    span: Some(Span {
340                        lo: 5,
341                        len: 1,
342                        column: 5,
343                        line: 0,
344                    }),
345                },
346                LocatedToken {
347                    token: Token::Identifier("val".to_string()),
348                    span: Some(Span {
349                        lo: 6,
350                        len: 3,
351                        column: 6,
352                        line: 0,
353                    }),
354                },
355                LocatedToken {
356                    token: Token::Whitespace(" ".to_string()),
357                    span: Some(Span {
358                        lo: 9,
359                        len: 1,
360                        column: 9,
361                        line: 0,
362                    }),
363                },
364                LocatedToken {
365                    token: Token::Equals,
366                    span: Some(Span {
367                        lo: 10,
368                        len: 1,
369                        column: 10,
370                        line: 0,
371                    }),
372                },
373                LocatedToken {
374                    token: Token::Whitespace(" ".to_string()),
375                    span: Some(Span {
376                        lo: 11,
377                        len: 1,
378                        column: 11,
379                        line: 0,
380                    }),
381                },
382                LocatedToken {
383                    token: Token::Number("2".to_string()),
384                    span: Some(Span {
385                        lo: 12,
386                        len: 1,
387                        column: 12,
388                        line: 0,
389                    }),
390                },
391                LocatedToken {
392                    token: Token::Whitespace(" ".to_string()),
393                    span: Some(Span {
394                        lo: 13,
395                        len: 1,
396                        column: 13,
397                        line: 0,
398                    }),
399                },
400                LocatedToken {
401                    token: Token::RightSlash,
402                    span: Some(Span {
403                        lo: 14,
404                        len: 1,
405                        column: 14,
406                        line: 0,
407                    }),
408                },
409            ],
410        }];
411        assert_eq!(nmls, expected);
412    }
413    #[test]
414    fn single_nml_append() {
415        let input = "&Head val = 2 /";
416        let parser = NmlParser::new(std::io::Cursor::new(input));
417        let mut nmls = parser
418            .collect::<Result<Vec<Namelist>, _>>()
419            .expect("test parse failed");
420        if let Some(nml) = nmls.last_mut() {
421            nml.append_token(Token::Identifier("hello".to_string()))
422        }
423        let nmls: Vec<Vec<Token>> = nmls
424            .into_iter()
425            .map(|x| x.tokens().iter().map(|x| x.token.clone()).collect())
426            .collect();
427        let expected = vec![vec![
428            Token::Ampersand,
429            Token::Identifier("Head".to_string()),
430            Token::Whitespace(" ".to_string()),
431            Token::Identifier("val".to_string()),
432            Token::Whitespace(" ".to_string()),
433            Token::Equals,
434            Token::Whitespace(" ".to_string()),
435            Token::Number("2".to_string()),
436            Token::Whitespace(" ".to_string()),
437            Token::Identifier("hello".to_string()),
438            Token::RightSlash,
439        ]];
440        assert_eq!(nmls, expected);
441    }
442
443    #[test]
444    fn location1() {
445        let input = "&Head val(1:3)= 1,2,3 /";
446        let parser = NmlParser::new(std::io::Cursor::new(input));
447        let mut nmls = parser
448            .collect::<Result<Vec<Namelist>, _>>()
449            .expect("test parse failed");
450        if let Some(nml) = nmls.last_mut() {
451            nml.append_token(Token::Identifier("hello".to_string()))
452        }
453        let nmls: Vec<Vec<Token>> = nmls
454            .into_iter()
455            .map(|x| x.tokens().iter().map(|x| x.token.clone()).collect())
456            .collect();
457        let expected = vec![vec![
458            Token::Ampersand,
459            Token::Identifier("Head".to_string()),
460            Token::Whitespace(" ".to_string()),
461            Token::Identifier("val".to_string()),
462            Token::LeftBracket,
463            Token::Number("1".to_string()),
464            Token::Colon,
465            Token::Number("3".to_string()),
466            Token::RightBracket,
467            Token::Equals,
468            Token::Whitespace(" ".to_string()),
469            Token::Number("1".to_string()),
470            Token::Comma,
471            Token::Number("2".to_string()),
472            Token::Comma,
473            Token::Number("3".to_string()),
474            Token::Whitespace(" ".to_string()),
475            Token::Identifier("hello".to_string()),
476            Token::RightSlash,
477        ]];
478        assert_eq!(nmls, expected);
479    }
480
481    #[test]
482    fn parsenml1() {
483        let input = "&Head val= 1,2,3 /";
484        let parser = NmlParser::new(std::io::Cursor::new(input));
485        let nmls = parser
486            .collect::<Result<Vec<Namelist>, _>>()
487            .expect("test parse failed");
488        for nml in nmls {
489            ParsedNamelist::from_namelist(&nml).unwrap();
490        }
491    }
492
493    #[test]
494    fn parsenml2() {
495        let input = "&Head val(1:3)= 1,2,3 /";
496        let parser = NmlParser::new(std::io::Cursor::new(input));
497        let nmls = parser
498            .collect::<Result<Vec<Namelist>, _>>()
499            .expect("test parse failed");
500        for nml in nmls {
501            let pnml = ParsedNamelist::from_namelist(&nml).unwrap();
502            assert_eq!(pnml.group, "Head");
503            let p = pnml.parameters.get("val").unwrap();
504            eprintln!("pnml: {pnml:#?}");
505            assert_eq!(p.dimensions.len(), 5);
506            assert_eq!(
507                p.dimensions[0],
508                LocatedToken {
509                    span: Some(Span {
510                        lo: 9,
511                        column: 9,
512                        line: 0,
513                        len: 1
514                    }),
515                    token: Token::LeftBracket
516                }
517            );
518            assert_eq!(
519                p.dimensions[1],
520                LocatedToken {
521                    span: Some(Span {
522                        lo: 10,
523                        column: 10,
524                        line: 0,
525                        len: 1
526                    }),
527                    token: Token::Number("1".to_string())
528                }
529            );
530            assert_eq!(
531                p.dimensions[2],
532                LocatedToken {
533                    span: Some(Span {
534                        lo: 11,
535                        column: 11,
536                        line: 0,
537                        len: 1
538                    }),
539                    token: Token::Colon
540                }
541            );
542            assert_eq!(
543                p.dimensions[3],
544                LocatedToken {
545                    span: Some(Span {
546                        lo: 12,
547                        column: 12,
548                        line: 0,
549                        len: 1
550                    }),
551                    token: Token::Number("3".to_string())
552                }
553            );
554            assert_eq!(
555                p.dimensions[4],
556                LocatedToken {
557                    span: Some(Span {
558                        lo: 13,
559                        column: 13,
560                        line: 0,
561                        len: 1
562                    }),
563                    token: Token::RightBracket
564                }
565            );
566        }
567    }
568    #[test]
569    fn parsenml3() {
570        let input = "&Head val(1:  3)= 1,2,3 /";
571        let parser = NmlParser::new(std::io::Cursor::new(input));
572        let nmls = parser
573            .collect::<Result<Vec<Namelist>, _>>()
574            .expect("test parse failed");
575        for nml in nmls {
576            let pnml = ParsedNamelist::from_namelist(&nml).unwrap();
577            assert_eq!(pnml.group, "Head");
578            let p = pnml.parameters.get("val").unwrap();
579            eprintln!("pnml: {pnml:#?}");
580            assert_eq!(p.dimensions.len(), 5);
581            assert_eq!(p.dimensions[0].token, Token::LeftBracket);
582            assert_eq!(p.dimensions[1].token, Token::Number("1".to_string()));
583            assert_eq!(p.dimensions[2].token, Token::Colon);
584            assert_eq!(p.dimensions[3].token, Token::Number("3".to_string()));
585            assert_eq!(p.dimensions[4].token, Token::RightBracket,);
586        }
587    }
588
589    #[test]
590    fn two_nmls() {
591        let input = "&Head val = 2 /\n&DUMP x=2,3,4 /";
592        let parser = NmlParser::new(std::io::Cursor::new(input));
593        let nmls: Vec<Vec<Token>> = parser
594            .map(|nml| {
595                let tokens: Result<Vec<Token>, _> =
596                    nml.map(|nml| nml.into_tokens().into_iter().map(|x| x.token).collect());
597                tokens
598            })
599            .collect::<Result<Vec<Vec<Token>>, _>>()
600            .expect("test parse failed");
601        let expected = vec![
602            vec![
603                Token::Ampersand,
604                Token::Identifier("Head".to_string()),
605                Token::Whitespace(" ".to_string()),
606                Token::Identifier("val".to_string()),
607                Token::Whitespace(" ".to_string()),
608                Token::Equals,
609                Token::Whitespace(" ".to_string()),
610                Token::Number("2".to_string()),
611                Token::Whitespace(" ".to_string()),
612                Token::RightSlash,
613            ],
614            vec![Token::Comment("\n".to_string())],
615            vec![
616                Token::Ampersand,
617                Token::Identifier("DUMP".to_string()),
618                Token::Whitespace(" ".to_string()),
619                Token::Identifier("x".to_string()),
620                Token::Equals,
621                Token::Number("2".to_string()),
622                Token::Comma,
623                Token::Number("3".to_string()),
624                Token::Comma,
625                Token::Number("4".to_string()),
626                Token::Whitespace(" ".to_string()),
627                Token::RightSlash,
628            ],
629        ];
630        assert_eq!(nmls, expected);
631    }
632}