logix_type/token/parse/
mod.rs

1mod string;
2
3use std::{ops::Range, str::from_utf8};
4
5const IDENT1: ByteSet = ByteSet(concat!(
6    "abcdefghijklmnopqrstuvwxyz",
7    "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
8    "0123456789",
9    "_-",
10));
11
12use bstr::ByteSlice;
13
14use super::{Action, Brace, ByteSet, Delim, Literal, Token, TokenError};
15
16#[derive(Debug, PartialEq)]
17pub struct ParseRes<'a> {
18    /// How much to skip before the next parse_token call
19    pub len: usize,
20    /// The range that contains the token
21    pub range: Range<usize>,
22    /// Number of lines skipped, normally 0, but set for multi-line comments and strings
23    pub lines: usize,
24    /// The current token
25    pub token: Result<Token<'a>, TokenError>,
26}
27
28impl<'a> ParseRes<'a> {
29    pub(super) fn new(range: Range<usize>, token: Token<'a>) -> Self {
30        Self::new_res(range, 0, Ok(token))
31    }
32
33    pub(super) fn new_res(
34        range: Range<usize>,
35        extra: usize,
36        token: Result<Token<'a>, TokenError>,
37    ) -> ParseRes<'a> {
38        Self {
39            len: range.end + extra,
40            range,
41            lines: 0,
42            token,
43        }
44    }
45
46    pub(super) fn new_lines(
47        buf: &[u8],
48        range: Range<usize>,
49        extra: usize,
50        token: Result<Token<'a>, TokenError>,
51    ) -> Self {
52        let lines = buf[range.start..range.end + extra].find_iter(b"\n").count();
53        Self {
54            len: range.end + extra,
55            range,
56            lines,
57            token,
58        }
59    }
60
61    pub(super) fn take_byteset(
62        buf: &'a [u8],
63        span_start: usize,
64        start: usize,
65        byteset: ByteSet,
66        f: impl FnOnce(&'a str) -> Token,
67    ) -> Self {
68        let len = buf[start..]
69            .find_not_byteset(byteset.0)
70            .unwrap_or(buf.len() - start);
71        let end = start + len;
72        ParseRes::new(span_start..end, f(from_utf8(&buf[start..end]).unwrap()))
73    }
74
75    fn new_brace(pos: usize, start: bool, brace: Brace) -> Self {
76        Self::new(pos..pos + 1, Token::Brace { start, brace })
77    }
78}
79
80pub fn parse_token(buf: &[u8]) -> ParseRes {
81    let start = buf.find_not_byteset(b" \t").unwrap_or(0);
82
83    match buf.get(start) {
84        Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
85            ParseRes::take_byteset(buf, start, start, IDENT1, Token::Ident)
86        }
87        Some(b'-' | b'0'..=b'9') => {
88            ParseRes::take_byteset(buf, start, start, ByteSet("0123456789-._"), |s| {
89                Token::Literal(Literal::Num(s))
90            })
91        }
92        Some(b'/') => {
93            if let Some(ret) = super::comment::parse_comment(buf, start) {
94                ret
95            } else {
96                ParseRes::new_res(start..start + 1, 0, Err(TokenError::UnexpectedChar('/')))
97            }
98        }
99        Some(b'{') => ParseRes::new_brace(start, true, Brace::Curly),
100        Some(b'}') => ParseRes::new_brace(start, false, Brace::Curly),
101        Some(b'(') => ParseRes::new_brace(start, true, Brace::Paren),
102        Some(b')') => ParseRes::new_brace(start, false, Brace::Paren),
103        Some(b'[') => ParseRes::new_brace(start, true, Brace::Square),
104        Some(b']') => ParseRes::new_brace(start, false, Brace::Square),
105        Some(b'<') => ParseRes::new_brace(start, true, Brace::Angle),
106        Some(b'>') => ParseRes::new_brace(start, false, Brace::Angle),
107        Some(b':') => ParseRes::new(start..start + 1, Token::Delim(Delim::Colon)),
108        Some(b',') => ParseRes::new(start..start + 1, Token::Delim(Delim::Comma)),
109        Some(b'\n') => {
110            let off = buf[start..]
111                .find_not_byteset("\r\n \t")
112                .unwrap_or(buf.len() - start);
113            ParseRes::new_lines(
114                buf,
115                start..start,
116                off,
117                Ok(Token::Newline(buf.len() == start + off)),
118            )
119        }
120        Some(b'"') => string::parse_basic(buf, start),
121        Some(b'#') => {
122            if let Some(ret) = string::parse_tagged(buf, start) {
123                ret
124            } else {
125                ParseRes::new_res(start..start + 1, 0, Err(TokenError::UnexpectedChar('#')))
126            }
127        }
128        Some(b'@') => {
129            if matches!(buf.get(start + 1), Some(b'a'..=b'z' | b'A'..=b'Z' | b'_')) {
130                ParseRes::take_byteset(buf, start, start + 1, IDENT1, |a| match a {
131                    "include" => Token::Action(Action::Include),
132                    _ => todo!(),
133                })
134            } else {
135                todo!()
136            }
137        }
138        _ => {
139            if let Some((_, off, chr)) = buf[start..].char_indices().next() {
140                ParseRes::new_res(start..start + off, 0, Err(TokenError::UnexpectedChar(chr)))
141            } else {
142                ParseRes::new(buf.len()..buf.len(), Token::Newline(true))
143            }
144        }
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn basics() {
154        assert_eq!(
155            parse_token(b"{"),
156            ParseRes {
157                len: 1,
158                range: 0..1,
159                lines: 0,
160                token: Ok(Token::Brace {
161                    start: true,
162                    brace: Brace::Curly
163                }),
164            }
165        );
166    }
167}