visdom/mesdoc/selector/
pattern.rs

1use crate::mesdoc::utils::{divide_isize, is_char_available_in_key, RoundType};
2use lazy_static::lazy_static;
3use regex::Regex;
4use std::borrow::Cow;
5use std::sync::{Arc, Mutex};
6use std::{collections::HashMap, fmt::Debug};
7
8pub type FromParamsFn = Box<dyn Fn(&str, &str) -> Result<BoxDynPattern, String> + Send + 'static>;
9lazy_static! {
10	static ref REGEXS: Mutex<HashMap<String, Arc<Regex>>> = Mutex::new(HashMap::new());
11	static ref PATTERNS: Mutex<HashMap<&'static str, FromParamsFn>> = Mutex::new(HashMap::new());
12}
13
14pub type BoxDynPattern = Box<dyn Pattern>;
15
16fn no_implemented(name: &str) -> ! {
17	panic!("No supported pattern '{}' was found", name);
18}
19
20pub type MatchedData = HashMap<String, String>;
21pub type MatchedQueue = Vec<Matched>;
22#[derive(Debug, Default, Clone)]
23pub struct Matched {
24	pub chars: Vec<char>,
25	pub ignore_chars: Option<usize>,
26	#[allow(dead_code)]
27	pub name: &'static str,
28	pub data: MatchedData,
29}
30
31pub trait Pattern: Send + Sync + Debug {
32	fn matched(&self, chars: &[char]) -> Option<Matched>;
33	// check if nested pattern
34	fn is_nested(&self) -> bool {
35		false
36	}
37	// get a pattern trait object
38	fn from_params(s: &str, _p: &str) -> Result<BoxDynPattern, String>
39	where
40		Self: Sized + Send + 'static,
41	{
42		no_implemented(s);
43	}
44}
45
46impl Pattern for char {
47	fn matched(&self, chars: &[char]) -> Option<Matched> {
48		let ch = chars[0];
49		if *self == ch {
50			return Some(Matched {
51				chars: vec![ch],
52				..Default::default()
53			});
54		}
55		None
56	}
57}
58
59impl Pattern for &[char] {
60	fn matched(&self, chars: &[char]) -> Option<Matched> {
61		let total = self.len();
62		if total > chars.len() {
63			return None;
64		}
65		let mut result: Vec<char> = Vec::with_capacity(total);
66		for (index, &ch) in self.iter().enumerate() {
67			let cur = chars
68				.get(index)
69				.expect("Pattern for slice char's length must great than target's chars.z");
70			if ch == *cur {
71				result.push(ch);
72			} else {
73				return None;
74			}
75		}
76		Some(Matched {
77			chars: result,
78			..Default::default()
79		})
80	}
81}
82
83impl Pattern for Vec<char> {
84	fn matched(&self, chars: &[char]) -> Option<Matched> {
85		self.as_slice().matched(chars)
86	}
87}
88
89/// Identity
90#[derive(Debug, Default)]
91pub struct Identity;
92
93impl Pattern for Identity {
94	fn matched(&self, chars: &[char]) -> Option<Matched> {
95		let mut result: Vec<char> = Vec::with_capacity(5);
96		let first = chars[0];
97		let name: &str = "identity";
98		if !(first.is_ascii_alphabetic() || first == '_') {
99			return None;
100		}
101		// allow translate character '\': fix issue #2
102		let mut is_in_translate = false;
103		let mut ignore_chars = 0;
104		for &c in chars {
105			if !is_in_translate {
106				if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
107					result.push(c);
108				} else if c == '\\' {
109					is_in_translate = true;
110					ignore_chars += 1;
111				} else {
112					break;
113				}
114			} else {
115				result.push(c);
116				is_in_translate = false;
117			}
118		}
119		let ignore_chars = if ignore_chars > 0 {
120			Some(ignore_chars)
121		} else {
122			None
123		};
124		Some(Matched {
125			chars: result,
126			name,
127			ignore_chars,
128			..Default::default()
129		})
130	}
131	// from_str
132	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
133		check_params_return(&[s, p], || Box::new(Identity))
134	}
135}
136/// AttrKey
137#[derive(Debug, Default)]
138pub struct AttrKey;
139
140impl Pattern for AttrKey {
141	fn matched(&self, chars: &[char]) -> Option<Matched> {
142		let mut result = Vec::with_capacity(5);
143		for ch in chars {
144			if is_char_available_in_key(ch) {
145				result.push(*ch);
146			} else {
147				break;
148			}
149		}
150		if !result.is_empty() {
151			return Some(Matched {
152				chars: result,
153				name: "attr_key",
154				..Default::default()
155			});
156		}
157		None
158	}
159	// from_params
160	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
161		check_params_return(&[s, p], || Box::new(AttrKey))
162	}
163}
164/// Spaces
165#[derive(Debug, Default)]
166pub struct Spaces;
167
168impl Pattern for Spaces {
169	fn matched(&self, chars: &[char]) -> Option<Matched> {
170		let mut result: Vec<char> = Vec::with_capacity(2);
171		for ch in chars {
172			if ch.is_ascii_whitespace() {
173				result.push(*ch);
174			} else {
175				break;
176			}
177		}
178		Some(Matched {
179			chars: result,
180			name: "spaces",
181			..Default::default()
182		})
183	}
184	// from params
185	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
186		check_params_return(&[s, p], || Box::new(Spaces))
187	}
188}
189
190/// `Nth`
191/// 2n/+2n+1/2n-1/-2n+1/+0/-1/2
192#[derive(Debug, Default)]
193pub struct Nth;
194
195impl Pattern for Nth {
196	fn matched(&self, chars: &[char]) -> Option<Matched> {
197		let rule: RegExp = RegExp {
198			context: Cow::from(
199				r#"^(?:([-+])?([1-9]\d+|[0-9])?n(?:\s*([+-])\s*([1-9]\d+|[0-9]))?|([-+])?([1-9]\d+|[0-9]))"#,
200			),
201		};
202		let mut data = HashMap::with_capacity(2);
203		let mut matched_chars: Vec<char> = Vec::new();
204		if let Some(v) = Pattern::matched(&rule, chars) {
205			let rule_data = v.data;
206			// when the group index 6,
207			let only_index = rule_data.contains_key("6");
208			let index_keys = if only_index { ("6", "5") } else { ("4", "3") };
209			// set index
210			if let Some(index) = Nth::get_number(&rule_data, index_keys, None) {
211				data.insert("index".to_string(), index);
212			}
213			// also has `n`
214			if !only_index {
215				if let Some(n) = Nth::get_number(&rule_data, ("2", "1"), Some("1".to_string())) {
216					data.insert("n".to_string(), n);
217				}
218			}
219			matched_chars = v.chars;
220		} else {
221			// maybe 'even' or 'odd'
222			let even = vec!['e', 'v', 'e', 'n'];
223			let odd = vec!['o', 'd', 'd'];
224			if Pattern::matched(&even, chars).is_some() {
225				data.insert("n".to_string(), "2".to_string());
226				data.insert("index".to_string(), "0".to_string());
227				matched_chars = even;
228			} else if Pattern::matched(&odd, chars).is_some() {
229				data.insert("n".to_string(), "2".to_string());
230				data.insert("index".to_string(), "1".to_string());
231				matched_chars = odd;
232			}
233		}
234		if !data.is_empty() {
235			return Some(Matched {
236				name: "nth",
237				data,
238				chars: matched_chars,
239				ignore_chars: None,
240			});
241		}
242		None
243	}
244	// from params to pattern
245	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
246		check_params_return(&[s, p], || Box::new(Nth))
247	}
248}
249
250impl Nth {
251	fn get_number(data: &MatchedData, keys: (&str, &str), def: Option<String>) -> Option<String> {
252		const MINUS: &str = "-";
253		if let Some(idx) = data.get(keys.0).or(def.as_ref()) {
254			let mut index = idx.to_owned();
255			if let Some(op) = data.get(keys.1) {
256				if op == MINUS {
257					index = String::from(op) + &index;
258				}
259			}
260			return Some(index);
261		}
262		None
263	}
264	// get indexs allowed
265	pub fn get_allowed_indexs(
266		n: &Option<String>,
267		index: &Option<String>,
268		total: usize,
269	) -> Vec<usize> {
270		// has n
271		if let Some(n) = n {
272			let n = n.parse::<isize>().unwrap();
273			let index = index
274				.as_ref()
275				.map(|index| index.parse::<isize>().unwrap())
276				.unwrap_or(0);
277			// n == 0
278			if n == 0 {
279				if index > 0 {
280					let index = index as usize;
281					if index <= total {
282						return vec![index - 1];
283					}
284				}
285				return vec![];
286			}
287			// n < 0 or n > 0
288			let mut start_loop: isize;
289			let end_loop: isize;
290			if n < 0 {
291				// -2n - 1/ -2n + 0
292				if index <= 0 {
293					return vec![];
294				}
295				// -2n + 1
296				if index <= -n {
297					let index = index as usize;
298					if index <= total {
299						return vec![index - 1];
300					}
301					return vec![];
302				}
303				start_loop = divide_isize(index - (total as isize), -n, RoundType::Ceil);
304				end_loop = divide_isize(index - 1, -n, RoundType::Floor);
305			} else {
306				// n > 0
307				start_loop = divide_isize(1 - index, n, RoundType::Ceil);
308				end_loop = divide_isize((total as isize) - index, n, RoundType::Floor);
309			}
310			// set start_loop min 0
311			if start_loop < 0 {
312				start_loop = 0;
313			}
314			// when start_loop >= end_loop, no index is allowed
315			if start_loop > end_loop {
316				return vec![];
317			}
318			let start = start_loop as usize;
319			let end = end_loop as usize;
320			let mut allow_indexs = Vec::with_capacity(end - start + 1);
321			for i in start..=end {
322				let cur_index = (i as isize * n + index) as usize;
323				if cur_index < 1 {
324					continue;
325				}
326				// last index need -1 for real list index
327				allow_indexs.push(cur_index - 1);
328			}
329			if n < 0 {
330				allow_indexs.reverse();
331			}
332			return allow_indexs;
333		}
334		// only index
335		let index = index
336			.as_ref()
337			.expect("Nth must have 'index' value when 'n' is not setted.")
338			.parse::<isize>()
339			.expect("Nth's index is not a correct number");
340		if index <= 0 || index > (total as isize) {
341			return vec![];
342		}
343		vec![(index - 1) as usize]
344	}
345}
346
347/// RegExp
348#[derive(Debug)]
349pub struct RegExp<'a> {
350	pub context: Cow<'a, str>,
351}
352
353impl<'a> Pattern for RegExp<'a> {
354	/// impl `matched`
355	fn matched(&self, chars: &[char]) -> Option<Matched> {
356		let Self { context } = self;
357		let content = chars.iter().collect::<String>();
358		let rule = RegExp::get_rule(context);
359		if let Some(caps) = rule.captures(&content) {
360			let total_len = caps[0].chars().count();
361			let mut data = HashMap::with_capacity(caps.len() - 1);
362			for (index, m) in caps.iter().skip(1).enumerate() {
363				if let Some(m) = m {
364					data.insert((index + 1).to_string(), m.as_str().to_string());
365				}
366			}
367			let result = chars[..total_len].to_vec();
368			return Some(Matched {
369				chars: result,
370				name: "regexp",
371				data,
372				ignore_chars: None,
373			});
374		}
375		None
376	}
377	/// impl `from_params`
378	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
379		check_params_return(&[s], || {
380			Box::new(RegExp {
381				context: Cow::Owned(p.to_string()),
382			})
383		})
384	}
385}
386
387impl<'a> RegExp<'a> {
388	pub fn get_rule(context: &str) -> Arc<Regex> {
389		let wrong_regex = format!("Wrong regex context '{context}'");
390		let last_context = String::from("^") + context;
391		let mut regexs = REGEXS.lock().unwrap();
392		if let Some(rule) = regexs.get(&last_context[..]) {
393			Arc::clone(rule)
394		} else {
395			let key = last_context;
396			let rule = Regex::new(&key).expect(&wrong_regex);
397			let value = Arc::new(rule);
398			let result = Arc::clone(&value);
399			regexs.insert(key, value);
400			result
401		}
402	}
403}
404
405/// Nested
406#[derive(Debug, Default)]
407pub struct NestedSelector;
408
409impl Pattern for NestedSelector {
410	fn matched(&self, _chars: &[char]) -> Option<Matched> {
411		None
412	}
413	// from params to pattern
414	fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String> {
415		check_params_return(&[s, p], || Box::new(NestedSelector))
416	}
417	// set to be nested
418	fn is_nested(&self) -> bool {
419		true
420	}
421}
422
423pub fn add_pattern(name: &'static str, from_handle: FromParamsFn) {
424	let mut patterns = PATTERNS.lock().unwrap();
425	if patterns.get(name).is_some() {
426		panic!("The pattern '{}' is already exist.", name);
427	} else {
428		patterns.insert(name, from_handle);
429	}
430}
431
432pub(crate) fn init() {
433	// add lib supported patterns
434	add_pattern("identity", Box::new(Identity::from_params));
435	add_pattern("spaces", Box::new(Spaces::from_params));
436	add_pattern("attr_key", Box::new(AttrKey::from_params));
437	add_pattern("nth", Box::new(Nth::from_params));
438	add_pattern("regexp", Box::new(RegExp::from_params));
439	add_pattern("selector", Box::new(NestedSelector::from_params));
440}
441
442pub fn to_pattern(name: &str, s: &str, p: &str) -> Result<BoxDynPattern, String> {
443	let patterns = PATTERNS.lock().unwrap();
444	if let Some(cb) = patterns.get(name) {
445		return cb(s, p);
446	}
447	no_implemented(name);
448}
449
450pub fn exec(queues: &[BoxDynPattern], chars: &[char]) -> (MatchedQueue, usize, usize, bool) {
451	let mut start_index = 0;
452	let mut result: MatchedQueue = Vec::with_capacity(queues.len());
453	let mut matched_num: usize = 0;
454	for item in queues {
455		if let Some(matched) = item.matched(&chars[start_index..]) {
456			start_index += matched.chars.len() + matched.ignore_chars.unwrap_or(0);
457			matched_num += 1;
458			result.push(matched);
459		} else {
460			break;
461		}
462	}
463	(result, start_index, matched_num, start_index == chars.len())
464}
465
466pub fn check_params_return<F: Fn() -> BoxDynPattern>(
467	params: &[&str],
468	cb: F,
469) -> Result<BoxDynPattern, String> {
470	for &p in params {
471		if !p.is_empty() {
472			let all_params = params.iter().fold(String::from(""), |mut r, &s| {
473				r.push_str(s);
474				r
475			});
476			return Err(format!("Unrecognized params '{all_params}'"));
477		}
478	}
479	Ok(cb())
480}
481
482#[cfg(test)]
483mod tests {
484	use super::{
485		add_pattern, check_params_return, AttrKey, BoxDynPattern, Matched, Nth, Pattern, RegExp,
486	};
487	#[test]
488	fn test_allow_indexs() {
489		assert_eq!(
490			Nth::get_allowed_indexs(&Some("-2".to_string()), &Some("3".to_string()), 9),
491			vec![0, 2]
492		);
493		assert_eq!(
494			Nth::get_allowed_indexs(&Some("2".to_string()), &Some("3".to_string()), 9),
495			vec![2, 4, 6, 8]
496		);
497		assert_eq!(
498			Nth::get_allowed_indexs(&None, &Some("3".to_string()), 9),
499			vec![2]
500		);
501		assert!(Nth::get_allowed_indexs(&None, &Some("3".to_string()), 2).is_empty());
502		assert_eq!(
503			Nth::get_allowed_indexs(&Some("0".to_string()), &Some("3".to_string()), 9),
504			vec![2]
505		);
506		assert!(Nth::get_allowed_indexs(&Some("0".to_string()), &Some("-3".to_string()), 9).is_empty());
507		assert!(Nth::get_allowed_indexs(&Some("1".to_string()), &Some("6".to_string()), 5).is_empty());
508		assert_eq!(
509			Nth::get_allowed_indexs(&Some("2".to_string()), &None, 9),
510			vec![1, 3, 5, 7]
511		);
512		assert!(Nth::get_allowed_indexs(&Some("-2".to_string()), &None, 9).is_empty());
513		assert!(Nth::get_allowed_indexs(&Some("-4".to_string()), &Some("3".to_string()), 2).is_empty());
514	}
515
516	#[test]
517	fn test_check_params_return() {
518		assert!(check_params_return(&["a"], || Box::new('c')).is_err());
519		assert!(check_params_return(&["", "a"], || Box::new('c')).is_err());
520		assert!(check_params_return(&["", ""], || Box::new('c')).is_ok());
521	}
522	#[test]
523	#[should_panic]
524	fn test_new_pattern() {
525		#[derive(Debug)]
526		struct TestPattern;
527		impl Pattern for TestPattern {
528			fn matched(&self, _: &[char]) -> Option<Matched> {
529				None
530			}
531			fn from_params(s: &str, p: &str) -> Result<BoxDynPattern, String>
532			where
533				Self: Sized + Send + 'static,
534			{
535				check_params_return(&[s, p], || Box::new(TestPattern))
536			}
537		}
538		let pat: Box<dyn Pattern> = Box::new(TestPattern);
539		assert!(!pat.is_nested());
540		assert!(pat.matched(&['a']).is_none());
541		assert!(format!("{pat:?}").contains("Pattern"));
542		assert!(TestPattern::from_params("a", "").is_err());
543		add_pattern("test", Box::new(TestPattern::from_params));
544		add_pattern("test", Box::new(TestPattern::from_params));
545	}
546
547	#[test]
548	#[should_panic]
549	fn test_from_params() {
550		let _ = char::from_params("", "");
551	}
552
553	#[test]
554	fn test_pattern_matched() {
555		let nth: BoxDynPattern = Box::new(Nth);
556		assert!(nth.matched(&['-', 'a']).is_none());
557		assert!(nth.matched(&['-', '1']).is_some());
558		let part_matched = nth.matched(&['-', '2', 'n', '+', 'a']);
559		assert!(part_matched.is_some());
560		assert_eq!(part_matched.unwrap().chars, vec!['-', '2', 'n']);
561		// attr key
562		let attr_key: BoxDynPattern = Box::new(AttrKey);
563		assert!(attr_key.matched(&[',']).is_none());
564		assert!(attr_key.matched(&[' ']).is_none());
565		assert!(attr_key.matched(&['\u{0000}']).is_none());
566		// regexp
567		let reg_exp: BoxDynPattern = Box::new(RegExp {
568			context: std::borrow::Cow::from("abc"),
569		});
570		assert!(format!("{reg_exp:?}").contains("abc"));
571	}
572}