mesdoc/selector/
rule.rs

1use super::pattern::{self, exec, to_pattern, BoxDynPattern, Matched, Pattern};
2use crate::{constants::PRIORITY_PSEUDO_SELECTOR, interface::Elements};
3use crate::{
4	interface::BoxDynElement,
5	utils::{to_static_str, vec_char_to_clean_str},
6};
7use lazy_static::lazy_static;
8use std::collections::HashMap;
9use std::fmt;
10use std::sync::{Arc, Mutex};
11lazy_static! {
12	pub static ref RULES: Mutex<Vec<(&'static str, Arc<Rule>)>> = Mutex::new(Vec::with_capacity(20));
13}
14// matcher handles
15pub type MatchAllHandle = Box<dyn (for<'a, 'r> Fn(&'a Elements<'r>, Option<bool>) -> Elements<'r>)>;
16pub type MatchOneHandle = Box<dyn Fn(&BoxDynElement, Option<bool>) -> bool>;
17// matcher data
18pub type MatcherData = HashMap<SavedDataKey, &'static str>;
19// matcher factory
20pub type MatcherFactory = Box<dyn (Fn(MatcherData) -> Matcher) + Send + Sync>;
21
22#[derive(Default)]
23pub struct Matcher {
24	pub all_handle: Option<MatchAllHandle>,
25	pub one_handle: Option<MatchOneHandle>,
26	pub priority: u32,
27	pub in_cache: bool,
28}
29
30impl fmt::Debug for Matcher {
31	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32		f.write_str(
33			format!(
34				"Matcher{{ all_handle: {}, one_handle: {} }}",
35				self.all_handle.is_some(),
36				self.one_handle.is_some(),
37			)
38			.as_str(),
39		)
40	}
41}
42
43impl Matcher {
44	//
45	pub fn apply<'a, 'r>(&self, eles: &'a Elements<'r>, use_cache: Option<bool>) -> Elements<'r> {
46		if let Some(handle) = &self.all_handle {
47			return handle(eles, use_cache);
48		}
49		let handle = self.one_handle.as_ref().unwrap();
50		let mut result = Elements::with_capacity(5);
51		for ele in eles.get_ref() {
52			if handle(ele, use_cache) {
53				result.push(ele.cloned());
54			}
55		}
56		result
57	}
58	// execute one handle
59	pub fn one(&self, ele: &BoxDynElement, use_cache: Option<bool>) -> bool {
60		let handle = self.one_handle.as_ref().unwrap();
61		handle(ele, use_cache)
62	}
63	// get all handle
64	pub fn get_all_handle(&self) -> &MatchAllHandle {
65		self.all_handle.as_ref().expect("All handle is None")
66	}
67}
68
69#[derive(Debug, Hash, Eq, PartialEq, Clone)]
70pub struct SavedDataKey(&'static str, usize, &'static str);
71pub type DataKey = (&'static str, usize);
72
73impl From<(&'static str,)> for SavedDataKey {
74	fn from(t: (&'static str,)) -> Self {
75		SavedDataKey(t.0, 0, "_")
76	}
77}
78
79impl From<(&'static str, usize)> for SavedDataKey {
80	fn from(t: (&'static str, usize)) -> Self {
81		SavedDataKey(t.0, t.1, "_")
82	}
83}
84
85impl From<(&'static str, usize, &'static str)> for SavedDataKey {
86	fn from(t: (&'static str, usize, &'static str)) -> Self {
87		SavedDataKey(t.0, t.1, t.2)
88	}
89}
90
91impl From<&'static str> for SavedDataKey {
92	fn from(s: &'static str) -> Self {
93		(s,).into()
94	}
95}
96
97// get char vec
98const DEF_SIZE: usize = 2;
99fn get_char_vec() -> Vec<char> {
100	Vec::with_capacity(DEF_SIZE)
101}
102
103// unmatched start or end
104fn panic_unmatched(ch: char, index: usize) -> ! {
105	panic!(
106		"Unmatched '{ch}' at index {index},you can escape it using both {ch}{ch}",
107		ch = ch,
108		index = index
109	)
110}
111
112struct MatchedStore {
113	hashs_num: usize,
114	is_wait_end: bool,
115	is_in_matched: bool,
116	raw_params: Vec<char>,
117	suf_params: Vec<char>,
118	names: Vec<char>,
119}
120
121impl Default for MatchedStore {
122	fn default() -> Self {
123		MatchedStore {
124			hashs_num: 0,
125			is_wait_end: false,
126			is_in_matched: false,
127			raw_params: get_char_vec(),
128			suf_params: get_char_vec(),
129			names: get_char_vec(),
130		}
131	}
132}
133impl MatchedStore {
134	fn next(&mut self) -> Result<Box<dyn Pattern>, String> {
135		self.hashs_num = 0;
136		self.is_in_matched = false;
137		self.is_wait_end = false;
138		let name = vec_char_to_clean_str(&mut self.names);
139		let s = vec_char_to_clean_str(&mut self.suf_params);
140		let r = vec_char_to_clean_str(&mut self.raw_params);
141		to_pattern(name, s, r)
142	}
143}
144
145pub struct Rule {
146	pub in_cache: bool,
147	pub priority: u32,
148	pub(crate) queues: Vec<Box<dyn Pattern>>,
149	pub fields: Vec<DataKey>,
150	pub handle: MatcherFactory,
151}
152
153impl fmt::Debug for Rule {
154	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155		f.write_str(format!("Rule{{ queues: {:?} }}", self.queues).as_str())
156	}
157}
158
159// Rule methods
160impl Rule {
161	// translate string to queues
162	pub(crate) fn get_queues(content: &str) -> Vec<Box<dyn Pattern>> {
163		const ANCHOR_CHAR: char = '\0';
164		const START_CHAR: char = '{';
165		const END_CHAR: char = '}';
166		let mut prev_char = ANCHOR_CHAR;
167		let mut store: MatchedStore = Default::default();
168		let mut raw_chars = get_char_vec();
169		let mut queues: Vec<Box<dyn Pattern>> = Vec::with_capacity(DEF_SIZE);
170		let mut is_matched_finish = false;
171		let mut index: usize = 0;
172		for ch in content.chars() {
173			index += 1;
174			let is_prev_matched_finish = if is_matched_finish {
175				is_matched_finish = false;
176				true
177			} else {
178				false
179			};
180			if store.is_wait_end {
181				if ch.is_ascii_whitespace() {
182					continue;
183				}
184				if ch == END_CHAR {
185					is_matched_finish = true;
186				} else {
187					panic!(
188						"Unexpect end of Pattern type '{}' at index {}, expect '{}' but found '{}'",
189						vec_char_to_clean_str(&mut store.names),
190						index - 1,
191						END_CHAR,
192						ch
193					);
194				}
195			} else if !store.is_in_matched {
196				// when not in matched
197				if prev_char == START_CHAR {
198					// translate '{'
199					if ch == START_CHAR {
200						prev_char = ANCHOR_CHAR;
201						continue;
202					} else {
203						store.names.push(ch);
204						store.is_in_matched = true;
205						// remove the '{'
206						raw_chars.pop();
207						if !raw_chars.is_empty() {
208							queues.push(Box::new(raw_chars.clone()));
209							raw_chars.clear();
210						}
211					}
212				} else if prev_char == END_CHAR {
213					if is_prev_matched_finish {
214						// is just end of the Pattern type.
215						raw_chars.push(ch);
216					} else if ch == END_CHAR {
217						// translate end char '}'
218						prev_char = ANCHOR_CHAR;
219						continue;
220					} else {
221						// panic no matched
222						panic_unmatched(END_CHAR, index - 2);
223					}
224				} else {
225					raw_chars.push(ch);
226				}
227			} else if !store.raw_params.is_empty() {
228				// in Pattern's raw params ##gfh#def##
229				if ch == '#' {
230					let leave_count = store.hashs_num - 1;
231					if leave_count == 0 {
232						// only one hash
233						store.is_wait_end = true;
234					} else {
235						let raw_len = store.raw_params.len();
236						let last_index = raw_len - leave_count;
237						if last_index > 0 {
238							store.is_wait_end = store.raw_params[last_index..]
239								.iter()
240								.filter(|&&ch| ch == '#')
241								.count() == leave_count;
242							if store.is_wait_end {
243								store.raw_params.truncate(last_index);
244							}
245						}
246					}
247					if !store.is_wait_end {
248						store.raw_params.push(ch);
249					}
250				} else {
251					// in raw params
252					store.raw_params.push(ch);
253				}
254			} else {
255				// in suf_params or names
256				if ch == '}' {
257					if store.hashs_num > 0 {
258						panic!("Uncomplete raw params: ''");
259					}
260					is_matched_finish = true;
261				} else if ch == '#' {
262					// in hashs
263					store.hashs_num += 1;
264				} else {
265					// in names or suf_params
266					if prev_char == '#' {
267						// in raw params now
268						store.raw_params.push(ch);
269					} else {
270						// check if in suf_params, or character is not a name's character
271						if !store.suf_params.is_empty() || !(ch.is_ascii_alphanumeric() || ch == '_') {
272							store.suf_params.push(ch);
273						} else {
274							store.names.push(ch);
275						}
276					}
277				}
278			}
279			if is_matched_finish {
280				match store.next() {
281					Ok(queue) => queues.push(queue),
282					Err(reason) => panic!(reason),
283				};
284			}
285			prev_char = ch;
286		}
287		// not end
288		if store.is_wait_end || store.is_in_matched {
289			panic!(
290				"The Mathed type '{}' is not complete",
291				store.names.iter().collect::<String>()
292			);
293		}
294		if prev_char == START_CHAR || (prev_char == END_CHAR && !is_matched_finish) {
295			panic_unmatched(prev_char, index - 1);
296		}
297		if !raw_chars.is_empty() {
298			if raw_chars.len() == 1 {
299				queues.push(Box::new(raw_chars[0]));
300			} else {
301				queues.push(Box::new(raw_chars));
302			}
303		}
304		queues
305	}
306
307	pub fn exec(&self, chars: &[char]) -> Option<(Vec<Matched>, usize, usize)> {
308		Rule::exec_queues(&self.queues, chars)
309	}
310
311	pub fn exec_queues(
312		queues: &[BoxDynPattern],
313		chars: &[char],
314	) -> Option<(Vec<Matched>, usize, usize)> {
315		let (result, matched_len, matched_queue_item, _) = exec(&queues, chars);
316		if matched_len > 0 {
317			Some((result, matched_len, matched_queue_item))
318		} else {
319			None
320		}
321	}
322	/// make a matcher
323	pub fn make(&self, data: &[Matched]) -> Matcher {
324		let handle = &self.handle;
325		let data = self.data(data);
326		let mut matcher = handle(data);
327		matcher.priority = self.priority;
328		matcher.in_cache = self.in_cache;
329		matcher
330	}
331	/// make a matcher by alias
332	pub fn make_alias(selector: &'static str) -> Matcher {
333		// if parse the selector string into Selector and save to the closure
334		// the mutex rules will trigger a dead lock
335		// so there give up, just lost some performance
336		Matcher {
337			all_handle: Some(Box::new(move |eles: &Elements, _| eles.filter(selector))),
338			one_handle: None,
339			// priority
340			priority: PRIORITY_PSEUDO_SELECTOR,
341			in_cache: false,
342		}
343	}
344
345	pub fn data(&self, data: &[Matched]) -> MatcherData {
346		let mut result: MatcherData = HashMap::with_capacity(5);
347		let mut indexs = HashMap::with_capacity(5);
348		let fields = &self.fields;
349		for item in data.iter() {
350			let Matched {
351				name,
352				data: hash_data,
353				chars,
354				..
355			} = item;
356			if !name.is_empty() {
357				let index = indexs.entry(name).or_insert(0);
358				let data_key = (*name, *index);
359				if fields.contains(&data_key) {
360					let count = hash_data.len();
361					if count == 0 {
362						let cur_key = (*name, *index);
363						result.insert(
364							cur_key.into(),
365							to_static_str(chars.iter().collect::<String>()),
366						);
367					} else {
368						for (&key, &val) in hash_data.iter() {
369							let cur_key = (*name, *index, key);
370							result.insert(cur_key.into(), val);
371						}
372					}
373				}
374			}
375		}
376		result
377	}
378	// add a rule
379	pub fn add(context: &str, mut rule: Rule) -> Self {
380		rule.queues = Rule::get_queues(context);
381		rule
382	}
383	// quick method to get param
384	pub fn param<T: Into<SavedDataKey>>(params: &MatcherData, v: T) -> Option<&'static str> {
385		params.get(&v.into()).copied()
386	}
387}
388
389pub struct RuleDefItem(
390	pub &'static str,
391	pub &'static str,
392	pub u32,
393	pub Vec<DataKey>,
394	pub MatcherFactory,
395);
396
397pub struct RuleItem {
398	pub rule: Rule,
399	pub context: &'static str,
400	pub name: &'static str,
401}
402
403impl From<RuleDefItem> for RuleItem {
404	fn from(item: RuleDefItem) -> Self {
405		RuleItem {
406			name: item.0,
407			context: item.1,
408			rule: Rule {
409				priority: item.2,
410				in_cache: false,
411				fields: item.3,
412				handle: item.4,
413				queues: Vec::new(),
414			},
415		}
416	}
417}
418
419pub fn add_rules(rules: Vec<RuleItem>) {
420	let mut all_rules = RULES.lock().unwrap();
421	for RuleItem {
422		name,
423		context,
424		rule,
425	} in rules
426	{
427		let cur_rule = Rule::add(context, rule);
428		all_rules.push((name, Arc::new(cur_rule)));
429	}
430}
431
432pub(crate) fn init() {
433	pattern::init();
434}