1use crate::error::{NxsError, Result};
2
3#[derive(Debug, Clone, PartialEq)]
4pub enum Token {
5 Int(i64),
7 Float(f64),
8 Bool(bool),
9 Keyword(String),
10 Str(String),
11 Time(i64), Binary(Vec<u8>),
13 Link(i32),
14 Macro(String),
15 Null,
16
17 Ident(String),
19 Colon,
20 LBrace,
21 RBrace,
22 LBracket,
23 RBracket,
24 Comma,
25 LParen,
26 RParen,
27
28 Eof,
29}
30
31pub struct Lexer {
32 input: Vec<char>,
33 pos: usize,
34}
35
36impl Lexer {
37 pub fn new(input: &str) -> Self {
38 Lexer {
39 input: input.chars().collect(),
40 pos: 0,
41 }
42 }
43
44 fn peek(&self) -> Option<char> {
45 self.input.get(self.pos).copied()
46 }
47
48 fn advance(&mut self) -> Option<char> {
49 let c = self.input.get(self.pos).copied();
50 self.pos += 1;
51 c
52 }
53
54 fn skip_whitespace_and_comments(&mut self) {
55 while let Some(c) = self.peek() {
56 if c == '#' {
57 while let Some(c) = self.peek() {
58 self.advance();
59 if c == '\n' {
60 break;
61 }
62 }
63 } else if c.is_whitespace() {
64 self.advance();
65 } else {
66 break;
67 }
68 }
69 }
70
71 fn read_while<F: Fn(char) -> bool>(&mut self, pred: F) -> String {
72 let mut s = String::new();
73 while let Some(c) = self.peek() {
74 if pred(c) {
75 s.push(c);
76 self.advance();
77 } else {
78 break;
79 }
80 }
81 s
82 }
83
84 fn read_string(&mut self) -> Result<String> {
85 let mut s = String::new();
87 loop {
88 match self.advance() {
89 None => return Err(NxsError::ParseError("unterminated string".into())),
90 Some('"') => break,
91 Some('\\') => match self.advance() {
92 Some('\\') => s.push('\\'),
93 Some('"') => s.push('"'),
94 Some('n') => s.push('\n'),
95 Some('r') => s.push('\r'),
96 Some('t') => s.push('\t'),
97 Some('0') => s.push('\0'),
98 Some('u') => {
99 let hex: String = (0..4).filter_map(|_| self.advance()).collect();
100 let code = u32::from_str_radix(&hex, 16)
101 .map_err(|_| NxsError::ParseError(format!("bad \\u escape: {hex}")))?;
102 let ch = char::from_u32(code).ok_or_else(|| {
103 NxsError::ParseError(format!("invalid unicode: {code}"))
104 })?;
105 s.push(ch);
106 }
107 Some('U') => {
108 let hex: String = (0..8).filter_map(|_| self.advance()).collect();
109 let code = u32::from_str_radix(&hex, 16)
110 .map_err(|_| NxsError::ParseError(format!("bad \\U escape: {hex}")))?;
111 let ch = char::from_u32(code).ok_or_else(|| {
112 NxsError::ParseError(format!("invalid unicode: {code}"))
113 })?;
114 s.push(ch);
115 }
116 Some(c) => return Err(NxsError::BadEscape(c)),
117 None => return Err(NxsError::ParseError("unterminated escape".into())),
118 },
119 Some(c) => s.push(c),
120 }
121 }
122 Ok(s)
123 }
124
125 fn read_binary(&mut self) -> Result<Vec<u8>> {
126 let mut hex = String::new();
128 loop {
129 match self.advance() {
130 Some('>') => break,
131 Some(c) if c.is_ascii_hexdigit() || c.is_whitespace() => {
132 if c.is_ascii_hexdigit() {
133 hex.push(c);
134 }
135 }
136 Some(c) => {
137 return Err(NxsError::ParseError(format!(
138 "unexpected char in binary: '{c}'"
139 )));
140 }
141 None => return Err(NxsError::ParseError("unterminated binary literal".into())),
142 }
143 }
144 if hex.len() % 2 != 0 {
145 return Err(NxsError::ParseError(
146 "binary hex must have even number of digits".into(),
147 ));
148 }
149 (0..hex.len())
150 .step_by(2)
151 .map(|i| {
152 u8::from_str_radix(&hex[i..i + 2], 16)
153 .map_err(|_| NxsError::ParseError(format!("bad hex byte: {}", &hex[i..i + 2])))
154 })
155 .collect()
156 }
157
158 fn read_macro_expr(&mut self) -> String {
159 let mut s = String::new();
161 while let Some(c) = self.peek() {
162 if c == '\n' || c == ',' || c == '}' {
163 break;
164 }
165 s.push(c);
166 self.advance();
167 }
168 s.trim().to_string()
169 }
170
171 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
172 let mut tokens = Vec::new();
173 loop {
174 self.skip_whitespace_and_comments();
175 match self.peek() {
176 None => {
177 tokens.push(Token::Eof);
178 break;
179 }
180 Some(c) => {
181 self.advance();
182 let tok = match c {
183 '{' => Token::LBrace,
184 '}' => Token::RBrace,
185 '[' => Token::LBracket,
186 ']' => Token::RBracket,
187 '(' => Token::LParen,
188 ')' => Token::RParen,
189 ':' => Token::Colon,
190 ',' => Token::Comma,
191
192 '=' => {
194 let neg = if self.peek() == Some('-') {
195 self.advance();
196 true
197 } else {
198 false
199 };
200 let s = self.read_while(|c| c.is_ascii_digit());
201 let n: i64 = s
202 .parse()
203 .map_err(|_| NxsError::ParseError(format!("bad int: {s}")))?;
204 Token::Int(if neg { -n } else { n })
205 }
206 '~' => {
207 let neg = if self.peek() == Some('-') {
208 self.advance();
209 true
210 } else {
211 false
212 };
213 let s = self.read_while(|c| {
214 c.is_ascii_digit()
215 || c == '.'
216 || c == 'e'
217 || c == 'E'
218 || c == '+'
219 || c == '-'
220 });
221 let f: f64 = s
222 .parse()
223 .map_err(|_| NxsError::ParseError(format!("bad float: {s}")))?;
224 Token::Float(if neg { -f } else { f })
225 }
226 '?' => {
227 let s = self.read_while(|c| c.is_alphabetic());
228 match s.as_str() {
229 "true" => Token::Bool(true),
230 "false" => Token::Bool(false),
231 _ => return Err(NxsError::ParseError(format!("bad bool: {s}"))),
232 }
233 }
234 '$' => {
235 let s = self.read_while(|c| c.is_alphanumeric() || c == '_');
236 Token::Keyword(s)
237 }
238 '"' => Token::Str(self.read_string()?),
239 '@' => {
240 if self.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
242 let s = self.read_while(|c| {
244 !c.is_whitespace() && c != ',' && c != '}' && c != ']'
245 });
246 let ns = parse_temporal(&s)?;
247 Token::Time(ns)
248 } else {
249 let ident = self.read_while(|c| c.is_alphanumeric() || c == '_');
251 Token::Macro(format!("@{ident}"))
252 }
253 }
254 '<' => Token::Binary(self.read_binary()?),
255 '&' => {
256 let neg = if self.peek() == Some('-') {
257 self.advance();
258 true
259 } else {
260 false
261 };
262 let s = self.read_while(|c| c.is_ascii_digit());
263 let n: i32 = s.parse().map_err(|_| {
264 NxsError::ParseError(format!("bad link offset: {s}"))
265 })?;
266 Token::Link(if neg { -n } else { n })
267 }
268 '!' => Token::Macro(self.read_macro_expr()),
269 '^' => Token::Null,
270
271 c if c.is_alphabetic() || c == '_' => {
273 let mut s = c.to_string();
274 s.push_str(
275 &self.read_while(|c| c.is_alphanumeric() || c == '_' || c == '-'),
276 );
277 Token::Ident(s)
278 }
279
280 other => return Err(NxsError::UnknownSigil(other)),
281 };
282 tokens.push(tok);
283 }
284 }
285 }
286 Ok(tokens)
287 }
288}
289
290fn parse_temporal(s: &str) -> Result<i64> {
291 if s.len() == 10 && s.chars().nth(4) == Some('-') {
293 let year: i64 = s[0..4]
294 .parse()
295 .map_err(|_| NxsError::ParseError(format!("bad date: {s}")))?;
296 let month: i64 = s[5..7]
297 .parse()
298 .map_err(|_| NxsError::ParseError(format!("bad date: {s}")))?;
299 let day: i64 = s[8..10]
300 .parse()
301 .map_err(|_| NxsError::ParseError(format!("bad date: {s}")))?;
302 let days = days_since_epoch(year, month, day);
304 return days
305 .checked_mul(86_400_000_000_000i64)
306 .ok_or_else(|| NxsError::ParseError(format!("temporal overflow: {s}")))
307 .map(Some)
308 .map(|v| v.unwrap());
309 }
310 if s.len() >= 19 && s.as_bytes().get(4) == Some(&b'-') && s.as_bytes().get(10) == Some(&b'T') {
312 let date_ns = parse_temporal(&s[..10])?;
313 let hour: i64 = s[11..13]
314 .parse()
315 .map_err(|_| NxsError::ParseError(format!("bad temporal: {s}")))?;
316 let minute: i64 = s[14..16]
317 .parse()
318 .map_err(|_| NxsError::ParseError(format!("bad temporal: {s}")))?;
319 let second: i64 = s[17..19]
320 .parse()
321 .map_err(|_| NxsError::ParseError(format!("bad temporal: {s}")))?;
322 if hour > 23 || minute > 59 || second > 59 {
323 return Err(NxsError::ParseError(format!("bad temporal: {s}")));
324 }
325 let frac_ns = if let Some(frac) = s.get(19..).and_then(|rest| rest.strip_prefix('.')) {
326 if frac.is_empty() || frac.len() > 9 || !frac.bytes().all(|b| b.is_ascii_digit()) {
327 return Err(NxsError::ParseError(format!("bad temporal: {s}")));
328 }
329 let mut padded = frac.to_string();
330 while padded.len() < 9 {
331 padded.push('0');
332 }
333 padded
334 .parse::<i64>()
335 .map_err(|_| NxsError::ParseError(format!("bad temporal: {s}")))?
336 } else if s.len() == 19 {
337 0
338 } else {
339 return Err(NxsError::ParseError(format!("bad temporal: {s}")));
340 };
341 return date_ns
342 .checked_add(hour * 3_600_000_000_000)
343 .and_then(|v| v.checked_add(minute * 60_000_000_000))
344 .and_then(|v| v.checked_add(second * 1_000_000_000))
345 .and_then(|v| v.checked_add(frac_ns))
346 .ok_or_else(|| NxsError::ParseError(format!("temporal overflow: {s}")));
347 }
348 s.parse::<i64>()
350 .map_err(|_| NxsError::ParseError(format!("bad temporal: {s}")))
351}
352
353fn days_since_epoch(year: i64, month: i64, day: i64) -> i64 {
354 let a = (14 - month) / 12;
356 let y = year + 4800 - a;
357 let m = month + 12 * a - 3;
358 let jdn = day + (153 * m + 2) / 5 + 365 * y + y / 4 - y / 100 + y / 400 - 32045;
359 jdn - 2_440_588 }