mesdoc/selector/
pattern.rs

1/*
2*
3* all: *
4* id: #{identity}
5* class: .{identity}
6* attribute: [{identity}{rule##"(^|*~$)?=('")"##}]
7*/
8use crate::utils::{
9	chars_to_int, divide_isize, is_char_available_in_key, to_static_str, RoundType,
10};
11use lazy_static::lazy_static;
12use regex::Regex;
13use std::sync::{Arc, Mutex};
14use std::{collections::HashMap, fmt::Debug, usize};
15
16pub type FromParamsFn = Box<dyn Fn(&str, &str) -> Result<BoxDynPattern, String> + Send + 'static>;
17lazy_static! {
18	static ref REGEXS: Mutex<HashMap<&'static str, Arc<Regex>>> = Mutex::new(HashMap::new());
19	static ref PATTERNS: Mutex<HashMap<&'static str, FromParamsFn>> = Mutex::new(HashMap::new());
20}
21
22pub type BoxDynPattern = Box<dyn Pattern>;
23
24fn no_implemented(name: &str) -> ! {
25	panic!("No supported Pattern type '{}' found", name);
26}
27
28pub type MatchedData = HashMap<&'static str, &'static str>;
29#[derive(Debug, Default, Clone)]
30pub struct Matched {
31	pub chars: Vec<char>,
32	pub ignore_chars: Option<usize>,
33	pub name: &'static str,
34	pub data: MatchedData,
35}
36
37pub trait Pattern: Send + Sync + Debug {
38	fn matched(&self, chars: &[char]) -> Option<Matched>;
39	// check if nested pattern
40	fn is_nested(&self) -> bool {
41		false
42	}
43	// get a pattern trait object
44	fn from_params(s: &str, _p: &str) -> Result<BoxDynPattern, String>
45	where
46		Self: Sized + Send + 'static,
47	{
48		no_implemented(s);
49	}
50}
51
52impl Pattern for char {
53	fn matched(&self, chars: &[char]) -> Option<Matched> {
54		let ch = chars[0];
55		if *self == ch {
56			return Some(Matched {
57				chars: vec![ch],
58				..Default::default()
59			});
60		}
61		None
62	}
63}
64
65impl Pattern for &[char] {
66	fn matched(&self, chars: &[char]) -> Option<Matched> {
67		let total = self.len();
68		if total > chars.len() {
69			return None;
70		}
71		let mut result: Vec<char> = Vec::with_capacity(total);
72		for (index, &ch) in self.iter().enumerate() {
73			let cur = chars
74				.get(index)
75				.expect("Pattern for slice char's length must great than target's chars.z");
76			if ch == *cur {
77				result.push(ch);
78			} else {
79				return None;
80			}
81		}
82		Some(Matched {
83			chars: result,
84			..Default::default()
85		})
86	}
87}
88
89impl Pattern for Vec<char> {
90	fn matched(&self, chars: &[char]) -> Option<Matched> {
91		self.as_slice().matched(chars)
92	}
93}
94
95/// Identity
96#[derive(Debug, Default)]
97pub struct Identity(bool);
98
99impl Pattern for Identity {
100	fn matched(&self, chars: &[char]) -> Option<Matched> {
101		let mut result: Vec<char> = Vec::with_capacity(5);
102		let first = chars[0];
103		let name: &str = "identity";
104		if !(first.is_ascii_alphabetic() || first == '_') {
105			if self.0 {
106				// optional
107				return Some(Matched {
108					name,
109					..Default::default()
110				});
111			}
112			return None;
113		}
114		// allow translate character '\': fix issue #2
115		let mut is_in_translate = false;
116		let mut ignore_chars = 0;
117		for &c in chars {
118			if !is_in_translate {
119				if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
120					result.push(c);
121				} else if c == '\\' {
122					is_in_translate = true;
123					ignore_chars += 1;
124				} else {
125					break;
126				}
127			} else {
128				result.push(c);
129				is_in_translate = false;
130			}
131		}
132		let ignore_chars = if ignore_chars > 0 {
133			Some(ignore_chars)
134		} else {
135			None
136		};
137		Some(Matched {
138			chars: result,
139			name,
140			ignore_chars,
141			..Default::default()
142		})
143	}
144	// from_str
145	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
146		if s == "?" {
147			Ok(Box::new(Identity(true)))
148		} else {
149			check_params_return(&[p], || Box::new(Identity::default()))
150		}
151	}
152}
153/// AttrKey
154#[derive(Debug, Default)]
155pub struct AttrKey;
156
157impl Pattern for AttrKey {
158	fn matched(&self, chars: &[char]) -> Option<Matched> {
159		let mut result = Vec::with_capacity(5);
160		for ch in chars {
161			if is_char_available_in_key(ch) {
162				result.push(*ch);
163			} else {
164				break;
165			}
166		}
167		if !result.is_empty() {
168			return Some(Matched {
169				chars: result,
170				name: "attr_key",
171				..Default::default()
172			});
173		}
174		None
175	}
176	// from_params
177	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
178		check_params_return(&[s, p], || Box::new(AttrKey::default()))
179	}
180}
181/// Spaces
182#[derive(Debug, Default)]
183pub struct Spaces(usize);
184
185impl Pattern for Spaces {
186	fn matched(&self, chars: &[char]) -> Option<Matched> {
187		let mut result: Vec<char> = Vec::with_capacity(2);
188		for ch in chars {
189			if ch.is_ascii_whitespace() {
190				result.push(*ch);
191			} else {
192				break;
193			}
194		}
195		if result.len() >= self.0 {
196			return Some(Matched {
197				chars: result,
198				name: "spaces",
199				..Default::default()
200			});
201		}
202		None
203	}
204	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
205		let mut min_count = 0;
206		if !p.is_empty() {
207			return Err(format!("Spaces not support param '{}'", p));
208		}
209		if !s.trim().is_empty() {
210			let rule: [BoxDynPattern; 3] = [Box::new('('), Box::new(Index::default()), Box::new(')')];
211			let chars: Vec<char> = s.chars().collect();
212			let (result, _, _, match_all) = exec(&rule, &chars);
213			if !match_all {
214				return Err(format!("Wrong 'Spaces{}'", s));
215			}
216			min_count = chars_to_int(&result[1].chars).map_err(|e| e.to_string())?;
217		}
218		Ok(Box::new(Spaces(min_count)))
219	}
220}
221
222/// Index
223#[derive(Debug, Default)]
224pub struct Index;
225
226impl Pattern for Index {
227	fn matched(&self, chars: &[char]) -> Option<Matched> {
228		let first = chars[0];
229		let mut result = Vec::with_capacity(2);
230		let numbers = '0'..'9';
231		if numbers.contains(&first) {
232			result.push(first);
233			if first != '0' {
234				for ch in &chars[1..] {
235					if numbers.contains(ch) {
236						result.push(*ch);
237					}
238				}
239			}
240			return Some(Matched {
241				chars: result,
242				name: "index",
243				..Default::default()
244			});
245		}
246		None
247	}
248	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
249		check_params_return(&[s, p], || Box::new(Index::default()))
250	}
251}
252
253/// `Nth`
254/// 2n/+2n+1/2n-1/-2n+1/+0/-1/2
255#[derive(Debug, Default)]
256pub struct Nth;
257
258impl Pattern for Nth {
259	fn matched(&self, chars: &[char]) -> Option<Matched> {
260		let rule: RegExp = RegExp {
261			cache: true,
262			context: r#"^(?:([-+])?([0-9]|[1-9]\d+)?n(?:\s*([+-])\s*([0-9]|[1-9]\d+))?|([-+])?([0-9]|[1-9]\d+))"#,
263		};
264		let mut data = HashMap::with_capacity(2);
265		let mut matched_chars: Vec<char> = Vec::new();
266		if let Some(v) = Pattern::matched(&rule, chars) {
267			let rule_data = v.data;
268			// when the group index 6,
269			let only_index = rule_data.get("6").is_some();
270			let index_keys = if only_index { ("6", "5") } else { ("4", "3") };
271			// set index
272			if let Some(index) = Nth::get_number(&rule_data, index_keys, None) {
273				data.insert("index", index);
274			}
275			// also has `n`
276			if !only_index {
277				if let Some(n) = Nth::get_number(&rule_data, ("2", "1"), Some("1")) {
278					data.insert("n", n);
279				}
280			}
281			matched_chars = v.chars;
282		} else {
283			// maybe 'even' or 'odd'
284			let even = vec!['e', 'v', 'e', 'n'];
285			let odd = vec!['o', 'd', 'd'];
286			if Pattern::matched(&even, chars).is_some() {
287				data.insert("n", "2");
288				data.insert("index", "0");
289				matched_chars = even;
290			} else if Pattern::matched(&odd, chars).is_some() {
291				data.insert("n", "2");
292				data.insert("index", "1");
293				matched_chars = odd;
294			}
295		}
296		if !data.is_empty() {
297			return Some(Matched {
298				name: "nth",
299				data,
300				chars: matched_chars,
301				ignore_chars: None,
302			});
303		}
304		None
305	}
306	// from params to pattern
307	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
308		check_params_return(&[s, p], || Box::new(Nth::default()))
309	}
310}
311
312impl Nth {
313	fn get_number(data: &MatchedData, keys: (&str, &str), def: Option<&str>) -> Option<&'static str> {
314		const MINUS: &str = "-";
315		if let Some(&idx) = data.get(keys.0).or_else(|| def.as_ref()) {
316			let mut index = String::from(idx);
317			if let Some(&op) = data.get(keys.1) {
318				if op == MINUS {
319					index = String::from(op) + &index;
320				}
321			}
322			return Some(to_static_str(index));
323		}
324		None
325	}
326	// get indexs allowed
327	pub fn get_allowed_indexs(n: Option<&str>, index: Option<&str>, total: usize) -> Vec<usize> {
328		// has n
329		if let Some(n) = n {
330			let n = n.parse::<isize>().unwrap();
331			let index = index
332				.map(|index| index.parse::<isize>().unwrap())
333				.unwrap_or(0);
334			// n == 0
335			if n == 0 {
336				if index > 0 {
337					let index = index as usize;
338					if index <= total {
339						return vec![index - 1];
340					}
341				}
342				return vec![];
343			}
344			// n < 0 or n > 0
345			let mut start_loop: isize;
346			let end_loop: isize;
347			if n < 0 {
348				// -2n - 1/ -2n + 0
349				if index <= 0 {
350					return vec![];
351				}
352				// -2n + 1
353				if index <= -n {
354					let index = index as usize;
355					if index <= total {
356						return vec![index - 1];
357					}
358					return vec![];
359				}
360				start_loop = divide_isize(index - (total as isize), -n, RoundType::Ceil);
361				end_loop = divide_isize(index - 1, -n, RoundType::Floor);
362			} else {
363				// n > 0
364				start_loop = divide_isize(1 - index, n, RoundType::Ceil);
365				end_loop = divide_isize((total as isize) - index, n, RoundType::Floor);
366			}
367			// set start_loop min 0
368			if start_loop < 0 {
369				start_loop = 0;
370			}
371			// when start_loop >= end_loop, no index is allowed
372			if start_loop > end_loop {
373				return vec![];
374			}
375			let start = start_loop as usize;
376			let end = end_loop as usize;
377			let mut allow_indexs = Vec::with_capacity((end - start + 1) as usize);
378			for i in start..=end {
379				let cur_index = (i as isize * n + index) as usize;
380				if cur_index < 1 {
381					continue;
382				}
383				// last index need -1 for real list index
384				allow_indexs.push(cur_index - 1);
385			}
386			return allow_indexs;
387		}
388		// only index
389		let index = index
390			.expect("Nth must have 'index' value when 'n' is not setted.")
391			.parse::<isize>()
392			.expect("Nth's index is not a correct number");
393		if index <= 0 || index > (total as isize) {
394			return vec![];
395		}
396		return vec![(index - 1) as usize];
397	}
398}
399
400/// RegExp
401#[derive(Debug)]
402pub struct RegExp<'a> {
403	pub cache: bool,
404	pub context: &'a str,
405}
406
407impl<'a> Pattern for RegExp<'a> {
408	fn matched(&self, chars: &[char]) -> Option<Matched> {
409		let Self { context, cache } = *self;
410		let content = chars.iter().collect::<String>();
411		let rule = RegExp::get_rule(context, cache);
412		if let Some(caps) = rule.captures(to_static_str(content)) {
413			let total_len = caps[0].len();
414			let mut data = HashMap::with_capacity(caps.len() - 1);
415			for (index, m) in caps.iter().skip(1).enumerate() {
416				if let Some(m) = m {
417					data.insert(to_static_str((index + 1).to_string()), m.as_str());
418				}
419			}
420			let result = chars[..total_len].to_vec();
421			return Some(Matched {
422				chars: result,
423				name: "regexp",
424				data,
425				ignore_chars: None,
426			});
427		}
428		None
429	}
430	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
431		let mut cache = true;
432		if !s.is_empty() {
433			if s == "!" {
434				cache = false;
435			} else {
436				return Err("Wrong param of Pattern type 'regexp', just allow '!' to generate a regexp with 'cached' field falsely.".into());
437			}
438		}
439		Ok(Box::new(RegExp {
440			context: to_static_str(p.to_string()),
441			cache,
442		}))
443	}
444}
445
446impl<'a> RegExp<'a> {
447	pub fn get_rule(context: &str, cache: bool) -> Arc<Regex> {
448		let wrong_regex = format!("Wrong regex context '{}'", context);
449		let last_context = String::from("^") + context;
450		let rule = if cache {
451			let mut regexs = REGEXS.lock().unwrap();
452			if let Some(rule) = regexs.get(&last_context[..]) {
453				Arc::clone(rule)
454			} else {
455				let key = &to_static_str(last_context);
456				let rule = Regex::new(key).expect(&wrong_regex);
457				let value = Arc::new(rule);
458				let result = Arc::clone(&value);
459				regexs.insert(key, value);
460				result
461			}
462		} else {
463			let key = &last_context[..];
464			Arc::new(Regex::new(key).expect(&wrong_regex))
465		};
466		rule
467	}
468}
469
470/// Nested
471#[derive(Debug, Default)]
472pub struct NestedSelector;
473
474impl Pattern for NestedSelector {
475	fn matched(&self, _chars: &[char]) -> Option<Matched> {
476		None
477	}
478	// from params to pattern
479	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
480		check_params_return(&[s, p], || Box::new(NestedSelector::default()))
481	}
482	// set to be nested
483	fn is_nested(&self) -> bool {
484		true
485	}
486}
487
488pub fn add_pattern(name: &'static str, from_handle: FromParamsFn) {
489	let mut patterns = PATTERNS.lock().unwrap();
490	if patterns.get(name).is_some() {
491		panic!("The pattern '{}' is already exist.", name);
492	} else {
493		patterns.insert(name, from_handle);
494	}
495}
496
497pub(crate) fn init() {
498	// add lib supported patterns
499	add_pattern("identity", Box::new(Identity::from_params));
500	add_pattern("spaces", Box::new(Spaces::from_params));
501	add_pattern("attr_key", Box::new(AttrKey::from_params));
502	add_pattern("index", Box::new(Index::from_params));
503	add_pattern("nth", Box::new(Nth::from_params));
504	add_pattern("regexp", Box::new(RegExp::from_params));
505	add_pattern("selector", Box::new(NestedSelector::from_params));
506}
507
508pub fn to_pattern(name: &str, s: &str, p: &str) -> Result<BoxDynPattern, String> {
509	let patterns = PATTERNS.lock().unwrap();
510	if let Some(cb) = patterns.get(name) {
511		return cb(s, p);
512	}
513	no_implemented(name);
514}
515
516pub fn exec(queues: &[BoxDynPattern], chars: &[char]) -> (Vec<Matched>, usize, usize, bool) {
517	let mut start_index = 0;
518	let mut result: Vec<Matched> = Vec::with_capacity(queues.len());
519	let mut matched_num: usize = 0;
520	for item in queues {
521		if let Some(matched) = item.matched(&chars[start_index..]) {
522			start_index += matched.chars.len() + matched.ignore_chars.unwrap_or(0);
523			matched_num += 1;
524			result.push(matched);
525		} else {
526			break;
527		}
528	}
529	(result, start_index, matched_num, start_index == chars.len())
530}
531
532pub fn check_params_return<F: Fn() -> BoxDynPattern>(
533	params: &[&str],
534	cb: F,
535) -> Result<BoxDynPattern, String> {
536	for &p in params {
537		if !p.is_empty() {
538			let all_params = params.iter().fold(String::from(""), |mut r, &s| {
539				r.push_str(s);
540				r
541			});
542			return Err(format!("Unrecognized params '{}'", all_params));
543		}
544	}
545	Ok(cb())
546}