1mod string;
2
3use std::{ops::Range, str::from_utf8};
4
5const IDENT1: ByteSet = ByteSet(concat!(
6 "abcdefghijklmnopqrstuvwxyz",
7 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
8 "0123456789",
9 "_-",
10));
11
12use bstr::ByteSlice;
13
14use super::{Action, Brace, ByteSet, Delim, Literal, Token, TokenError};
15
16#[derive(Debug, PartialEq)]
17pub struct ParseRes<'a> {
18 pub len: usize,
20 pub range: Range<usize>,
22 pub lines: usize,
24 pub token: Result<Token<'a>, TokenError>,
26}
27
28impl<'a> ParseRes<'a> {
29 pub(super) fn new(range: Range<usize>, token: Token<'a>) -> Self {
30 Self::new_res(range, 0, Ok(token))
31 }
32
33 pub(super) fn new_res(
34 range: Range<usize>,
35 extra: usize,
36 token: Result<Token<'a>, TokenError>,
37 ) -> ParseRes<'a> {
38 Self {
39 len: range.end + extra,
40 range,
41 lines: 0,
42 token,
43 }
44 }
45
46 pub(super) fn new_lines(
47 buf: &[u8],
48 range: Range<usize>,
49 extra: usize,
50 token: Result<Token<'a>, TokenError>,
51 ) -> Self {
52 let lines = buf[range.start..range.end + extra].find_iter(b"\n").count();
53 Self {
54 len: range.end + extra,
55 range,
56 lines,
57 token,
58 }
59 }
60
61 pub(super) fn take_byteset(
62 buf: &'a [u8],
63 span_start: usize,
64 start: usize,
65 byteset: ByteSet,
66 f: impl FnOnce(&'a str) -> Token,
67 ) -> Self {
68 let len = buf[start..]
69 .find_not_byteset(byteset.0)
70 .unwrap_or(buf.len() - start);
71 let end = start + len;
72 ParseRes::new(span_start..end, f(from_utf8(&buf[start..end]).unwrap()))
73 }
74
75 fn new_brace(pos: usize, start: bool, brace: Brace) -> Self {
76 Self::new(pos..pos + 1, Token::Brace { start, brace })
77 }
78}
79
80pub fn parse_token(buf: &[u8]) -> ParseRes {
81 let start = buf.find_not_byteset(b" \t").unwrap_or(0);
82
83 match buf.get(start) {
84 Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
85 ParseRes::take_byteset(buf, start, start, IDENT1, Token::Ident)
86 }
87 Some(b'-' | b'0'..=b'9') => {
88 ParseRes::take_byteset(buf, start, start, ByteSet("0123456789-._"), |s| {
89 Token::Literal(Literal::Num(s))
90 })
91 }
92 Some(b'/') => {
93 if let Some(ret) = super::comment::parse_comment(buf, start) {
94 ret
95 } else {
96 ParseRes::new_res(start..start + 1, 0, Err(TokenError::UnexpectedChar('/')))
97 }
98 }
99 Some(b'{') => ParseRes::new_brace(start, true, Brace::Curly),
100 Some(b'}') => ParseRes::new_brace(start, false, Brace::Curly),
101 Some(b'(') => ParseRes::new_brace(start, true, Brace::Paren),
102 Some(b')') => ParseRes::new_brace(start, false, Brace::Paren),
103 Some(b'[') => ParseRes::new_brace(start, true, Brace::Square),
104 Some(b']') => ParseRes::new_brace(start, false, Brace::Square),
105 Some(b'<') => ParseRes::new_brace(start, true, Brace::Angle),
106 Some(b'>') => ParseRes::new_brace(start, false, Brace::Angle),
107 Some(b':') => ParseRes::new(start..start + 1, Token::Delim(Delim::Colon)),
108 Some(b',') => ParseRes::new(start..start + 1, Token::Delim(Delim::Comma)),
109 Some(b'\n') => {
110 let off = buf[start..]
111 .find_not_byteset("\r\n \t")
112 .unwrap_or(buf.len() - start);
113 ParseRes::new_lines(
114 buf,
115 start..start,
116 off,
117 Ok(Token::Newline(buf.len() == start + off)),
118 )
119 }
120 Some(b'"') => string::parse_basic(buf, start),
121 Some(b'#') => {
122 if let Some(ret) = string::parse_tagged(buf, start) {
123 ret
124 } else {
125 ParseRes::new_res(start..start + 1, 0, Err(TokenError::UnexpectedChar('#')))
126 }
127 }
128 Some(b'@') => {
129 if matches!(buf.get(start + 1), Some(b'a'..=b'z' | b'A'..=b'Z' | b'_')) {
130 ParseRes::take_byteset(buf, start, start + 1, IDENT1, |a| match a {
131 "include" => Token::Action(Action::Include),
132 _ => todo!(),
133 })
134 } else {
135 todo!()
136 }
137 }
138 _ => {
139 if let Some((_, off, chr)) = buf[start..].char_indices().next() {
140 ParseRes::new_res(start..start + off, 0, Err(TokenError::UnexpectedChar(chr)))
141 } else {
142 ParseRes::new(buf.len()..buf.len(), Token::Newline(true))
143 }
144 }
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn basics() {
154 assert_eq!(
155 parse_token(b"{"),
156 ParseRes {
157 len: 1,
158 range: 0..1,
159 lines: 0,
160 token: Ok(Token::Brace {
161 start: true,
162 brace: Brace::Curly
163 }),
164 }
165 );
166 }
167}