1use crate::{generror, verify, Error, Result};
4use log::{debug, error, trace};
5use serde::{Deserialize, Serialize};
6use std::path::Path;
7
8#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
10pub enum Token {
11 Include,
13
14 Incdir,
16
17 Resource,
19
20 Type,
22
23 Define,
25
26 Meta,
28
29 CrocOpen,
31
32 CrocClose,
34
35 ParenOpen,
37
38 ParenClose,
40
41 BracketOpen,
43
44 BracketClose,
46
47 SquareOpen,
49
50 SquareClose,
52
53 Colon,
55
56 Comma,
58 Newline,
59
60 Equal,
62
63 Dollar,
65
66 Comment(String),
68
69 String(String),
71
72 Name(String),
74
75 Char(char),
77}
78
79impl Token {
80 pub fn from_file(s: &Path) -> Result<Vec<Token>> {
82 debug!("loading file {s:?}");
83 let data = std::fs::read(s)?;
84 let data = std::str::from_utf8(&data)?;
85 Self::create_from_str(data)
86 }
87 pub fn all_globs(dir: &Path, pattern: &str) -> Result<Vec<Vec<Token>>> {
89 debug!("loading globs {pattern} @ {dir:?}");
90 if let Some(q) = dir.as_os_str().to_str() {
91 let mut q = q.to_string();
92 q.push('/');
93 q.push_str(pattern);
94
95 let mut ret = Vec::new();
96 for file in glob::glob(&q).unwrap() {
97 let file = file.unwrap();
98 debug!("file {file:?}");
99 let n = Self::from_file(file.as_path())?;
100 ret.push(n);
101 }
102 Ok(ret)
103 } else {
104 Err(Error::Error(format!(
105 "unable to parse dir to string {dir:?}"
106 )))
107 }
108 }
109
110 pub fn create_from_str(data: &str) -> Result<Vec<Token>> {
112 trace!("parsing '{data}'");
113 let mut ret = Vec::new();
114 let mut curr = String::default();
115 let mut quote = None;
116
117 for (i, line) in data.split('\n').enumerate() {
119 trace!("line[{i}]: {line}");
120
121 let line = line.trim();
123 if quote.is_none() && line.is_empty() {
124 ret.push(Token::Newline);
125 continue;
126 }
127
128 if line.starts_with('#') {
129 let ins = Token::Comment(line.to_string());
130 ret.push(ins);
131 ret.push(Token::Newline);
132 continue;
133 }
134 for item in line.split([' ', '\t']) {
135 trace!("item = '{item}'");
136 if let Some(q) = "e {
137 curr.push(' ');
138 curr.push_str(item);
139 if Self::quote_enclosed(&curr, *q) || *q == '\'' {
140 Self::parse_loop(curr, &mut ret)?;
141 curr = String::default();
143 quote = None;
144 continue;
145 }
146 } else if !Self::quote_enclosed(item, '"') {
147 quote = Some('"');
148 curr.push_str(item);
149 } else if !Self::quote_enclosed(item, '\'') {
150 quote = Some('\'');
151 curr.push_str(item);
152 } else if !Self::quote_enclosed(item, '`') {
153 quote = Some('`');
154 curr.push_str(item);
155 } else {
156 Self::parse_loop(item, &mut ret)?;
157 }
158 }
159
160 ret.push(Token::Newline);
161 }
162 ret.pop();
164
165 if !curr.is_empty() {
166 return Err(Self::error(format!(
167 "remaining data from unenclosed quote '{curr}'"
168 )));
169 }
170 let ret = Self::post_proc(ret);
171 Ok(ret)
172 }
173
174 pub fn to_name(&self) -> Result<&String> {
176 debug!("calling to_name {self:?}");
177 match self {
178 Token::Name(n) => Ok(n),
179 _ => generror!(format!("cannot parse {self:?} as string")),
180 }
181 }
182 fn error<S: Into<String>>(err: S) -> Error {
183 let err: String = err.into();
184 error!("tokenize error {err}");
185 Error::Tokenize(err)
186 }
187 fn valid_name_char(c: char) -> bool {
188 c.is_ascii_lowercase()
189 || c.is_ascii_uppercase()
190 || c.is_ascii_digit()
191 || c == '_'
192 || c == '/'
193 || c == '.'
194 || c == '?'
195 || c == '-'
196 || c == '\''
197 }
198 fn post_proc(mut tokens: Vec<Token>) -> Vec<Token> {
199 let mut ret = Vec::with_capacity(tokens.len());
200 let mut paren = 0;
201 let mut bracket = 0;
202 let mut square = 0;
203 while !tokens.is_empty() {
204 let r = tokens.remove(0);
205 match &r {
206 Token::ParenOpen => paren += 1,
207 Token::ParenClose => paren -= 1,
208 Token::BracketOpen => bracket += 1,
209 Token::BracketClose => bracket -= 1,
210 Token::SquareOpen => square += 1,
211 Token::SquareClose => square -= 1,
212 Token::Type => {
213 if paren > 0 || bracket > 0 || square > 0 {
214 ret.push(Token::Name(String::from("type")));
218 continue;
219 }
220 }
221 Token::Meta => {
222 if let Some(x) = tokens.first() {
223 if let Token::Name(n) = x {
224 if n != "noextract" && n != "arches" {
225 ret.push(Token::Name(String::from("meta")));
226 continue;
227 }
228 }
230 } else {
231 ret.push(Token::Name(String::from("meta")));
232 continue;
233 }
234 }
235 _ => {}
236 }
237 ret.push(r);
238 }
239 ret
240 }
241 fn parse(s: String) -> Result<(Self, Option<String>)> {
242 trace!("parse {s}");
243 verify!(!s.is_empty(), UnexpectedToken);
244 let mut ss = s.chars();
245 let f = ss.next().unwrap();
246 let rem: String = ss.collect();
247 trace!("checking char {f:?}");
248 trace!("rem {rem:?}");
249 let n = match f {
250 '(' => (Token::ParenOpen, Some(rem)),
251 ')' => (Token::ParenClose, Some(rem)),
252 '[' => (Token::SquareOpen, Some(rem)),
253 ']' => (Token::SquareClose, Some(rem)),
254 '{' => (Token::BracketOpen, Some(rem)),
255 '}' => (Token::BracketClose, Some(rem)),
256 ':' => (Token::Colon, Some(rem)),
257 '<' => (Token::CrocOpen, Some(rem)),
258 '>' => (Token::CrocClose, Some(rem)),
259 ',' => (Token::Comma, Some(rem)),
260 '=' => (Token::Equal, Some(rem)),
261 '$' => (Token::Dollar, Some(rem)),
262 '\'' => {
263 let val = rem.chars().next();
264 let nq = rem.chars().nth(1);
265 if nq == Some('\'') {
266 (Token::Char(val.unwrap()), Some(rem[2..].to_string()))
267 } else {
268 (Token::String(String::from("'")), Some(rem))
269 }
270 }
271 '"' | '`' => {
272 if let Some(idx) = rem.find(f) {
273 let str = rem[..idx].to_string();
274 let rem = rem[idx + 1..].to_string();
275 (Token::String(str), Some(rem))
276 } else {
277 return Err(Self::error(format!(
278 "Unable to find enclosing quote in {rem}"
279 )));
280 }
281 }
282 '\n' => (Token::Newline, Some(rem)),
283 _ => {
284 let empty = None;
286 match s.as_str() {
287 "include" => (Token::Include, empty),
288 "incdir" => (Token::Incdir, empty),
289 "resource" => (Token::Resource, empty),
290 "type" => (Token::Type, empty),
291 "define" => (Token::Define, empty),
292 "meta" => (Token::Meta, empty),
293 _ => {
294 let mut start = String::from("");
295 start.push(f);
296
297 let mut prem = String::from("");
298 let mut ss = rem.chars();
299
300 while let Some(c) = ss.next() {
301 if Self::valid_name_char(c) {
302 start.push(c)
303 } else {
304 prem.push(c);
305 let ins: String = ss.collect();
306 prem.push_str(&ins);
307 break;
308 }
309 }
310 trace!("start {start} | prem: '{prem}'");
311 let ins = Token::Name(start);
312 (ins, Some(prem))
313 }
314 }
315 }
316 };
317 Ok(n)
318 }
319 fn quote_enclosed(s: &str, quote: char) -> bool {
320 let chars = s.chars();
321 let mut count = 0;
322
323 for n in chars {
324 if n == quote {
325 count += 1;
326 }
327 }
328 count % 2 == 0
329 }
330 fn parse_loop<S: Into<String>>(item: S, tokens: &mut Vec<Token>) -> Result<()> {
331 let mut item: String = item.into();
332 while !item.is_empty() {
333 let (ins, rem) = Token::parse(item)?;
334 tokens.push(ins);
335 if let Some(n) = rem {
336 item = n;
337 } else {
338 break;
339 }
340 }
341 Ok(())
342 }
343}
344
345#[cfg(test)]
346mod test {
347 use super::*;
348
349 #[test]
350 fn tokens0() {
351 let s = r#"resource fd[int32]: -1"#;
352 let t = Token::create_from_str(s).unwrap();
353 assert_eq!(
354 t,
355 vec![
356 Token::Resource,
357 Token::Name(String::from("fd")),
358 Token::SquareOpen,
359 Token::Name(String::from("int32")),
360 Token::SquareClose,
361 Token::Colon,
362 Token::Name(String::from("-1")),
363 ]
364 );
365 }
366
367 #[test]
368 fn tokens1() {
369 let s = r#"abcd = "hello", `world`, "!", "Hello World!", `acdb efgh`"#;
370 let t = Token::create_from_str(s).unwrap();
371 assert_eq!(
372 t,
373 vec![
374 Token::Name(String::from("abcd")),
375 Token::Equal,
376 Token::String(String::from("hello")),
377 Token::Comma,
378 Token::String(String::from("world")),
379 Token::Comma,
380 Token::String(String::from("!")),
381 Token::Comma,
382 Token::String(String::from("Hello World!")),
383 Token::Comma,
384 Token::String(String::from("acdb efgh"))
385 ]
386 );
387 }
388
389 #[test]
390 fn tokens2() {
391 let s = r#"
393# Some comment
394
395func$abcd(type int32, meta int64) fd
396
397"#;
398 let t = Token::create_from_str(s).unwrap();
399 assert_eq!(
400 t,
401 vec![
402 Token::Newline,
403 Token::Comment(String::from("# Some comment")),
404 Token::Newline,
405 Token::Newline,
406 Token::Name(String::from("func")),
407 Token::Dollar,
408 Token::Name(String::from("abcd")),
409 Token::ParenOpen,
410 Token::Name(String::from("type")),
411 Token::Name(String::from("int32")),
412 Token::Comma,
413 Token::Name(String::from("meta")),
414 Token::Name(String::from("int64")),
415 Token::ParenClose,
416 Token::Name(String::from("fd")),
417 Token::Newline,
418 Token::Newline
419 ]
420 );
421 }
422 #[test]
423 fn tokens3() {
424 let s = r#"const[' ', int8]"#;
425 let t = Token::create_from_str(s).unwrap();
426 assert_eq!(
427 t,
428 vec![
429 Token::Name(String::from("const")),
430 Token::SquareOpen,
431 Token::Char(' '),
432 Token::Comma,
433 Token::Name(String::from("int8")),
434 Token::SquareClose
435 ]
436 );
437 }
438
439 #[test]
440 fn bad_tokens0() {
441 let s = r#"value = "asd", "qwert"#;
442 let t = Token::create_from_str(s);
443 assert!(t.is_err());
444 }
445}