1use crate::{generror, verify, Error, Result};
4use log::{debug, error, trace};
5use serde::{Deserialize, Serialize};
6use std::path::Path;
7
8#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
10pub enum Token {
11	Include,
13
14	Incdir,
16
17	Resource,
19
20	Type,
22
23	Define,
25
26	Meta,
28
29	CrocOpen,
31
32	CrocClose,
34
35	ParenOpen,
37
38	ParenClose,
40
41	BracketOpen,
43
44	BracketClose,
46
47	SquareOpen,
49
50	SquareClose,
52
53	Colon,
55
56	Comma,
58	Newline,
59
60	Equal,
62
63	Dollar,
65
66	Comment(String),
68
69	String(String),
71
72	Name(String),
74
75	Char(char),
77}
78
79impl Token {
80	pub fn from_file(s: &Path) -> Result<Vec<Token>> {
82		debug!("loading file {s:?}");
83		let data = std::fs::read(s)?;
84		let data = std::str::from_utf8(&data)?;
85		Self::create_from_str(data)
86	}
87	pub fn all_globs(dir: &Path, pattern: &str) -> Result<Vec<Vec<Token>>> {
89		debug!("loading globs {pattern} @ {dir:?}");
90		if let Some(q) = dir.as_os_str().to_str() {
91			let mut q = q.to_string();
92			q.push('/');
93			q.push_str(pattern);
94
95			let mut ret = Vec::new();
96			for file in glob::glob(&q).unwrap() {
97				let file = file.unwrap();
98				debug!("file {file:?}");
99				let n = Self::from_file(file.as_path())?;
100				ret.push(n);
101			}
102			Ok(ret)
103		} else {
104			Err(Error::Error(format!(
105				"unable to parse dir to string {dir:?}"
106			)))
107		}
108	}
109
110	pub fn create_from_str(data: &str) -> Result<Vec<Token>> {
112		trace!("parsing '{data}'");
113		let mut ret = Vec::new();
114		let mut curr = String::default();
115		let mut quote = None;
116
117		for (i, line) in data.split('\n').enumerate() {
119			trace!("line[{i}]: {line}");
120
121			let line = line.trim();
123			if quote.is_none() && line.is_empty() {
124				ret.push(Token::Newline);
125				continue;
126			}
127
128			if line.starts_with('#') {
129				let ins = Token::Comment(line.to_string());
130				ret.push(ins);
131				ret.push(Token::Newline);
132				continue;
133			}
134			for item in line.split([' ', '\t']) {
135				trace!("item = '{item}'");
136				if let Some(q) = "e {
137					curr.push(' ');
138					curr.push_str(item);
139					if Self::quote_enclosed(&curr, *q) || *q == '\'' {
140						Self::parse_loop(curr, &mut ret)?;
141						curr = String::default();
143						quote = None;
144						continue;
145					}
146				} else if !Self::quote_enclosed(item, '"') {
147					quote = Some('"');
148					curr.push_str(item);
149				} else if !Self::quote_enclosed(item, '\'') {
150					quote = Some('\'');
151					curr.push_str(item);
152				} else if !Self::quote_enclosed(item, '`') {
153					quote = Some('`');
154					curr.push_str(item);
155				} else {
156					Self::parse_loop(item, &mut ret)?;
157				}
158			}
159
160			ret.push(Token::Newline);
161		}
162		ret.pop();
164
165		if !curr.is_empty() {
166			return Err(Self::error(format!(
167				"remaining data from unenclosed quote '{curr}'"
168			)));
169		}
170		let ret = Self::post_proc(ret);
171		Ok(ret)
172	}
173
174	pub fn to_name(&self) -> Result<&String> {
176		debug!("calling to_name {self:?}");
177		match self {
178			Token::Name(n) => Ok(n),
179			_ => generror!(format!("cannot parse {self:?} as string")),
180		}
181	}
182	fn error<S: Into<String>>(err: S) -> Error {
183		let err: String = err.into();
184		error!("tokenize error {err}");
185		Error::Tokenize(err)
186	}
187	fn valid_name_char(c: char) -> bool {
188		c.is_ascii_lowercase()
189			|| c.is_ascii_uppercase()
190			|| c.is_ascii_digit()
191			|| c == '_' || c == '/'
192			|| c == '.' || c == '?'
193			|| c == '-' || c == '\''
194	}
195	fn post_proc(mut tokens: Vec<Token>) -> Vec<Token> {
196		let mut ret = Vec::with_capacity(tokens.len());
197		let mut paren = 0;
198		let mut bracket = 0;
199		let mut square = 0;
200		while !tokens.is_empty() {
201			let r = tokens.remove(0);
202			match &r {
203				Token::ParenOpen => paren += 1,
204				Token::ParenClose => paren -= 1,
205				Token::BracketOpen => bracket += 1,
206				Token::BracketClose => bracket -= 1,
207				Token::SquareOpen => square += 1,
208				Token::SquareClose => square -= 1,
209				Token::Type => {
210					if paren > 0 || bracket > 0 || square > 0 {
211						ret.push(Token::Name(String::from("type")));
215						continue;
216					}
217				}
218				Token::Meta => {
219					if let Some(x) = tokens.first() {
220						if let Token::Name(n) = x {
221							if n != "noextract" && n != "arches" {
222								ret.push(Token::Name(String::from("meta")));
223								continue;
224							}
225							}
227					} else {
228						ret.push(Token::Name(String::from("meta")));
229						continue;
230					}
231				}
232				_ => {}
233			}
234			ret.push(r);
235		}
236		ret
237	}
238	fn parse(s: String) -> Result<(Self, Option<String>)> {
239		trace!("parse {s}");
240		verify!(!s.is_empty(), UnexpectedToken);
241		let mut ss = s.chars();
242		let f = ss.next().unwrap();
243		let rem: String = ss.collect();
244		trace!("checking char {f:?}");
245		trace!("rem {rem:?}");
246		let n = match f {
247			'(' => (Token::ParenOpen, Some(rem)),
248			')' => (Token::ParenClose, Some(rem)),
249			'[' => (Token::SquareOpen, Some(rem)),
250			']' => (Token::SquareClose, Some(rem)),
251			'{' => (Token::BracketOpen, Some(rem)),
252			'}' => (Token::BracketClose, Some(rem)),
253			':' => (Token::Colon, Some(rem)),
254			'<' => (Token::CrocOpen, Some(rem)),
255			'>' => (Token::CrocClose, Some(rem)),
256			',' => (Token::Comma, Some(rem)),
257			'=' => (Token::Equal, Some(rem)),
258			'$' => (Token::Dollar, Some(rem)),
259			'\'' => {
260				let val = rem.chars().next();
261				let nq = rem.chars().nth(1);
262				if nq == Some('\'') {
263					(Token::Char(val.unwrap()), Some(rem[2..].to_string()))
264				} else {
265					(Token::String(String::from("'")), Some(rem))
266				}
267			}
268			'"' | '`' => {
269				if let Some(idx) = rem.find(f) {
270					let str = rem[..idx].to_string();
271					let rem = rem[idx + 1..].to_string();
272					(Token::String(str), Some(rem))
273				} else {
274					return Err(Self::error(format!(
275						"Unable to find enclosing quote in {rem}"
276					)));
277				}
278			}
279			'\n' => (Token::Newline, Some(rem)),
280			_ => {
281				let empty = None;
283				match s.as_str() {
284					"include" => (Token::Include, empty),
285					"incdir" => (Token::Incdir, empty),
286					"resource" => (Token::Resource, empty),
287					"type" => (Token::Type, empty),
288					"define" => (Token::Define, empty),
289					"meta" => (Token::Meta, empty),
290					_ => {
291						let mut start = String::from("");
292						start.push(f);
293
294						let mut prem = String::from("");
295						let mut ss = rem.chars();
296
297						while let Some(c) = ss.next() {
298							if Self::valid_name_char(c) {
299								start.push(c)
300							} else {
301								prem.push(c);
302								let ins: String = ss.collect();
303								prem.push_str(&ins);
304								break;
305							}
306						}
307						trace!("start {start} | prem: '{prem}'");
308						let ins = Token::Name(start);
309						(ins, Some(prem))
310					}
311				}
312			}
313		};
314		Ok(n)
315	}
316	fn quote_enclosed(s: &str, quote: char) -> bool {
317		let chars = s.chars();
318		let mut count = 0;
319
320		for n in chars {
321			if n == quote {
322				count += 1;
323			}
324		}
325		count % 2 == 0
326	}
327	fn parse_loop<S: Into<String>>(item: S, tokens: &mut Vec<Token>) -> Result<()> {
328		let mut item: String = item.into();
329		while !item.is_empty() {
330			let (ins, rem) = Token::parse(item)?;
331			tokens.push(ins);
332			if let Some(n) = rem {
333				item = n;
334			} else {
335				break;
336			}
337		}
338		Ok(())
339	}
340}
341
342#[cfg(test)]
343mod test {
344	use super::*;
345	use test::Bencher;
346	extern crate test;
347
348	#[bench]
349	fn bench_token1(b: &mut Bencher) {
350		let s = r#"abcd = "hello", `world`, "!", "Hello World!""#;
351		b.iter(|| Token::create_from_str(s).unwrap())
352	}
353
354	#[bench]
355	fn bench_token0(b: &mut Bencher) {
356		let s = r#"resource fd[int32]"#;
357		b.iter(|| Token::create_from_str(s).unwrap())
358	}
359
360	#[bench]
361	fn bench_token2(b: &mut Bencher) {
362		let s = r#"
363		# Some comment
364		
365		func$abcd(type int32, meta int64) fd
366		
367"#;
368		b.iter(|| Token::create_from_str(s).unwrap())
369	}
370
371	#[test]
372	fn tokens0() {
373		let s = r#"resource fd[int32]: -1"#;
374		let t = Token::create_from_str(s).unwrap();
375		assert_eq!(
376			t,
377			vec![
378				Token::Resource,
379				Token::Name(String::from("fd")),
380				Token::SquareOpen,
381				Token::Name(String::from("int32")),
382				Token::SquareClose,
383				Token::Colon,
384				Token::Name(String::from("-1")),
385			]
386		);
387	}
388
389	#[test]
390	fn tokens1() {
391		let s = r#"abcd = "hello", `world`, "!", "Hello World!", `acdb efgh`"#;
392		let t = Token::create_from_str(s).unwrap();
393		assert_eq!(
394			t,
395			vec![
396				Token::Name(String::from("abcd")),
397				Token::Equal,
398				Token::String(String::from("hello")),
399				Token::Comma,
400				Token::String(String::from("world")),
401				Token::Comma,
402				Token::String(String::from("!")),
403				Token::Comma,
404				Token::String(String::from("Hello World!")),
405				Token::Comma,
406				Token::String(String::from("acdb efgh"))
407			]
408		);
409	}
410
411	#[test]
412	fn tokens2() {
413		let s = r#"
415# Some comment
416
417func$abcd(type int32, meta int64) fd
418
419"#;
420		let t = Token::create_from_str(s).unwrap();
421		assert_eq!(
422			t,
423			vec![
424				Token::Newline,
425				Token::Comment(String::from("# Some comment")),
426				Token::Newline,
427				Token::Newline,
428				Token::Name(String::from("func")),
429				Token::Dollar,
430				Token::Name(String::from("abcd")),
431				Token::ParenOpen,
432				Token::Name(String::from("type")),
433				Token::Name(String::from("int32")),
434				Token::Comma,
435				Token::Name(String::from("meta")),
436				Token::Name(String::from("int64")),
437				Token::ParenClose,
438				Token::Name(String::from("fd")),
439				Token::Newline,
440				Token::Newline
441			]
442		);
443	}
444	#[test]
445	fn tokens3() {
446		let s = r#"const[' ', int8]"#;
447		let t = Token::create_from_str(s).unwrap();
448		assert_eq!(
449			t,
450			vec![
451				Token::Name(String::from("const")),
452				Token::SquareOpen,
453				Token::Char(' '),
454				Token::Comma,
455				Token::Name(String::from("int8")),
456				Token::SquareClose
457			]
458		);
459	}
460
461	#[test]
462	fn bad_tokens0() {
463		let s = r#"value = "asd", "qwert"#;
464		let t = Token::create_from_str(s);
465		assert!(t.is_err());
466	}
467}