1use crate::{generror, verify, Error, Result};
4use log::{debug, error, trace};
5use serde::{Deserialize, Serialize};
6use std::path::Path;
7
8#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
10pub enum Token {
11 Include,
13
14 Incdir,
16
17 Resource,
19
20 Type,
22
23 Define,
25
26 Meta,
28
29 CrocOpen,
31
32 CrocClose,
34
35 ParenOpen,
37
38 ParenClose,
40
41 BracketOpen,
43
44 BracketClose,
46
47 SquareOpen,
49
50 SquareClose,
52
53 Colon,
55
56 Comma,
58 Newline,
59
60 Equal,
62
63 Dollar,
65
66 Comment(String),
68
69 String(String),
71
72 Name(String),
74
75 Char(char),
77}
78
79impl Token {
80 pub fn from_file(s: &Path) -> Result<Vec<Token>> {
82 debug!("loading file {s:?}");
83 let data = std::fs::read(s)?;
84 let data = std::str::from_utf8(&data)?;
85 Self::create_from_str(data)
86 }
87 pub fn all_globs(dir: &Path, pattern: &str) -> Result<Vec<Vec<Token>>> {
89 debug!("loading globs {pattern} @ {dir:?}");
90 if let Some(q) = dir.as_os_str().to_str() {
91 let mut q = q.to_string();
92 q.push('/');
93 q.push_str(pattern);
94
95 let mut ret = Vec::new();
96 for file in glob::glob(&q).unwrap() {
97 let file = file.unwrap();
98 debug!("file {file:?}");
99 let n = Self::from_file(file.as_path())?;
100 ret.push(n);
101 }
102 Ok(ret)
103 } else {
104 Err(Error::Error(format!(
105 "unable to parse dir to string {dir:?}"
106 )))
107 }
108 }
109
110 pub fn create_from_str(data: &str) -> Result<Vec<Token>> {
112 trace!("parsing '{data}'");
113 let mut ret = Vec::new();
114 let mut curr = String::default();
115 let mut quote = None;
116
117 for (i, line) in data.split('\n').enumerate() {
119 trace!("line[{i}]: {line}");
120
121 let line = line.trim();
123 if quote.is_none() && line.is_empty() {
124 ret.push(Token::Newline);
125 continue;
126 }
127
128 if line.starts_with('#') {
129 let ins = Token::Comment(line.to_string());
130 ret.push(ins);
131 ret.push(Token::Newline);
132 continue;
133 }
134 for item in line.split([' ', '\t']) {
135 trace!("item = '{item}'");
136 if let Some(q) = "e {
137 curr.push(' ');
138 curr.push_str(item);
139 if Self::quote_enclosed(&curr, *q) || *q == '\'' {
140 Self::parse_loop(curr, &mut ret)?;
141 curr = String::default();
143 quote = None;
144 continue;
145 }
146 } else if !Self::quote_enclosed(item, '"') {
147 quote = Some('"');
148 curr.push_str(item);
149 } else if !Self::quote_enclosed(item, '\'') {
150 quote = Some('\'');
151 curr.push_str(item);
152 } else if !Self::quote_enclosed(item, '`') {
153 quote = Some('`');
154 curr.push_str(item);
155 } else {
156 Self::parse_loop(item, &mut ret)?;
157 }
158 }
159
160 ret.push(Token::Newline);
161 }
162 ret.pop();
164
165 if !curr.is_empty() {
166 return Err(Self::error(format!(
167 "remaining data from unenclosed quote '{curr}'"
168 )));
169 }
170 let ret = Self::post_proc(ret);
171 Ok(ret)
172 }
173
174 pub fn to_name(&self) -> Result<&String> {
176 debug!("calling to_name {self:?}");
177 match self {
178 Token::Name(n) => Ok(n),
179 _ => generror!(format!("cannot parse {self:?} as string")),
180 }
181 }
182 fn error<S: Into<String>>(err: S) -> Error {
183 let err: String = err.into();
184 error!("tokenize error {err}");
185 Error::Tokenize(err)
186 }
187 fn valid_name_char(c: char) -> bool {
188 c.is_ascii_lowercase()
189 || c.is_ascii_uppercase()
190 || c.is_ascii_digit()
191 || c == '_' || c == '/'
192 || c == '.' || c == '?'
193 || c == '-' || c == '\''
194 }
195 fn post_proc(mut tokens: Vec<Token>) -> Vec<Token> {
196 let mut ret = Vec::with_capacity(tokens.len());
197 let mut paren = 0;
198 let mut bracket = 0;
199 let mut square = 0;
200 while !tokens.is_empty() {
201 let r = tokens.remove(0);
202 match &r {
203 Token::ParenOpen => paren += 1,
204 Token::ParenClose => paren -= 1,
205 Token::BracketOpen => bracket += 1,
206 Token::BracketClose => bracket -= 1,
207 Token::SquareOpen => square += 1,
208 Token::SquareClose => square -= 1,
209 Token::Type => {
210 if paren > 0 || bracket > 0 || square > 0 {
211 ret.push(Token::Name(String::from("type")));
215 continue;
216 }
217 }
218 Token::Meta => {
219 if let Some(x) = tokens.first() {
220 if let Token::Name(n) = x {
221 if n != "noextract" && n != "arches" {
222 ret.push(Token::Name(String::from("meta")));
223 continue;
224 }
225 }
227 } else {
228 ret.push(Token::Name(String::from("meta")));
229 continue;
230 }
231 }
232 _ => {}
233 }
234 ret.push(r);
235 }
236 ret
237 }
238 fn parse(s: String) -> Result<(Self, Option<String>)> {
239 trace!("parse {s}");
240 verify!(!s.is_empty(), UnexpectedToken);
241 let mut ss = s.chars();
242 let f = ss.next().unwrap();
243 let rem: String = ss.collect();
244 trace!("checking char {f:?}");
245 trace!("rem {rem:?}");
246 let n = match f {
247 '(' => (Token::ParenOpen, Some(rem)),
248 ')' => (Token::ParenClose, Some(rem)),
249 '[' => (Token::SquareOpen, Some(rem)),
250 ']' => (Token::SquareClose, Some(rem)),
251 '{' => (Token::BracketOpen, Some(rem)),
252 '}' => (Token::BracketClose, Some(rem)),
253 ':' => (Token::Colon, Some(rem)),
254 '<' => (Token::CrocOpen, Some(rem)),
255 '>' => (Token::CrocClose, Some(rem)),
256 ',' => (Token::Comma, Some(rem)),
257 '=' => (Token::Equal, Some(rem)),
258 '$' => (Token::Dollar, Some(rem)),
259 '\'' => {
260 let val = rem.chars().next();
261 let nq = rem.chars().nth(1);
262 if nq == Some('\'') {
263 (Token::Char(val.unwrap()), Some(rem[2..].to_string()))
264 } else {
265 (Token::String(String::from("'")), Some(rem))
266 }
267 }
268 '"' | '`' => {
269 if let Some(idx) = rem.find(f) {
270 let str = rem[..idx].to_string();
271 let rem = rem[idx + 1..].to_string();
272 (Token::String(str), Some(rem))
273 } else {
274 return Err(Self::error(format!(
275 "Unable to find enclosing quote in {rem}"
276 )));
277 }
278 }
279 '\n' => (Token::Newline, Some(rem)),
280 _ => {
281 let empty = None;
283 match s.as_str() {
284 "include" => (Token::Include, empty),
285 "incdir" => (Token::Incdir, empty),
286 "resource" => (Token::Resource, empty),
287 "type" => (Token::Type, empty),
288 "define" => (Token::Define, empty),
289 "meta" => (Token::Meta, empty),
290 _ => {
291 let mut start = String::from("");
292 start.push(f);
293
294 let mut prem = String::from("");
295 let mut ss = rem.chars();
296
297 while let Some(c) = ss.next() {
298 if Self::valid_name_char(c) {
299 start.push(c)
300 } else {
301 prem.push(c);
302 let ins: String = ss.collect();
303 prem.push_str(&ins);
304 break;
305 }
306 }
307 trace!("start {start} | prem: '{prem}'");
308 let ins = Token::Name(start);
309 (ins, Some(prem))
310 }
311 }
312 }
313 };
314 Ok(n)
315 }
316 fn quote_enclosed(s: &str, quote: char) -> bool {
317 let chars = s.chars();
318 let mut count = 0;
319
320 for n in chars {
321 if n == quote {
322 count += 1;
323 }
324 }
325 count % 2 == 0
326 }
327 fn parse_loop<S: Into<String>>(item: S, tokens: &mut Vec<Token>) -> Result<()> {
328 let mut item: String = item.into();
329 while !item.is_empty() {
330 let (ins, rem) = Token::parse(item)?;
331 tokens.push(ins);
332 if let Some(n) = rem {
333 item = n;
334 } else {
335 break;
336 }
337 }
338 Ok(())
339 }
340}
341
342#[cfg(test)]
343mod test {
344 use super::*;
345 use test::Bencher;
346 extern crate test;
347
348 #[bench]
349 fn bench_token1(b: &mut Bencher) {
350 let s = r#"abcd = "hello", `world`, "!", "Hello World!""#;
351 b.iter(|| Token::create_from_str(s).unwrap())
352 }
353
354 #[bench]
355 fn bench_token0(b: &mut Bencher) {
356 let s = r#"resource fd[int32]"#;
357 b.iter(|| Token::create_from_str(s).unwrap())
358 }
359
360 #[bench]
361 fn bench_token2(b: &mut Bencher) {
362 let s = r#"
363 # Some comment
364
365 func$abcd(type int32, meta int64) fd
366
367"#;
368 b.iter(|| Token::create_from_str(s).unwrap())
369 }
370
371 #[test]
372 fn tokens0() {
373 let s = r#"resource fd[int32]: -1"#;
374 let t = Token::create_from_str(s).unwrap();
375 assert_eq!(
376 t,
377 vec![
378 Token::Resource,
379 Token::Name(String::from("fd")),
380 Token::SquareOpen,
381 Token::Name(String::from("int32")),
382 Token::SquareClose,
383 Token::Colon,
384 Token::Name(String::from("-1")),
385 ]
386 );
387 }
388
389 #[test]
390 fn tokens1() {
391 let s = r#"abcd = "hello", `world`, "!", "Hello World!", `acdb efgh`"#;
392 let t = Token::create_from_str(s).unwrap();
393 assert_eq!(
394 t,
395 vec![
396 Token::Name(String::from("abcd")),
397 Token::Equal,
398 Token::String(String::from("hello")),
399 Token::Comma,
400 Token::String(String::from("world")),
401 Token::Comma,
402 Token::String(String::from("!")),
403 Token::Comma,
404 Token::String(String::from("Hello World!")),
405 Token::Comma,
406 Token::String(String::from("acdb efgh"))
407 ]
408 );
409 }
410
411 #[test]
412 fn tokens2() {
413 let s = r#"
415# Some comment
416
417func$abcd(type int32, meta int64) fd
418
419"#;
420 let t = Token::create_from_str(s).unwrap();
421 assert_eq!(
422 t,
423 vec![
424 Token::Newline,
425 Token::Comment(String::from("# Some comment")),
426 Token::Newline,
427 Token::Newline,
428 Token::Name(String::from("func")),
429 Token::Dollar,
430 Token::Name(String::from("abcd")),
431 Token::ParenOpen,
432 Token::Name(String::from("type")),
433 Token::Name(String::from("int32")),
434 Token::Comma,
435 Token::Name(String::from("meta")),
436 Token::Name(String::from("int64")),
437 Token::ParenClose,
438 Token::Name(String::from("fd")),
439 Token::Newline,
440 Token::Newline
441 ]
442 );
443 }
444 #[test]
445 fn tokens3() {
446 let s = r#"const[' ', int8]"#;
447 let t = Token::create_from_str(s).unwrap();
448 assert_eq!(
449 t,
450 vec![
451 Token::Name(String::from("const")),
452 Token::SquareOpen,
453 Token::Char(' '),
454 Token::Comma,
455 Token::Name(String::from("int8")),
456 Token::SquareClose
457 ]
458 );
459 }
460
461 #[test]
462 fn bad_tokens0() {
463 let s = r#"value = "asd", "qwert"#;
464 let t = Token::create_from_str(s);
465 assert!(t.is_err());
466 }
467}