mesdoc/selector/
mod.rs

1pub mod pattern;
2pub mod rule;
3
4use crate::{constants::NAME_SELECTOR_ALL, error::Error};
5use lazy_static::lazy_static;
6use pattern::{exec, Matched};
7use rule::{Rule, RULES};
8use std::{
9	str::FromStr,
10	sync::{Arc, Mutex},
11};
12
13use self::{pattern::BoxDynPattern, rule::Matcher};
14
15lazy_static! {
16	static ref SPLITTER: Mutex<Vec<BoxDynPattern>> =
17		Mutex::new(Rule::get_queues(r##"{regexp#(\s*[>,~+]\s*|\s+)#}"##));
18	static ref ALL_RULE: Mutex<Option<Arc<Rule>>> = Mutex::new(None);
19}
20#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
21pub enum Combinator {
22	// descendants
23	ChildrenAll,
24	// children
25	Children,
26	// reverse for child
27	Parent,
28	// reverse for childrens
29	ParentAll,
30	// next all siblings
31	NextAll,
32	// next sibling
33	Next,
34	// reverse for next siblings
35	PrevAll,
36	// reverse for next sibling
37	Prev,
38	// siblings
39	Siblings,
40	// chain selectors
41	Chain,
42}
43
44// change string to combinator
45impl From<&str> for Combinator {
46	fn from(comb: &str) -> Self {
47		use Combinator::*;
48		match comb {
49			"" => ChildrenAll,
50			">" => Children,
51			"~" => NextAll,
52			"+" => Next,
53			_ => panic!("Not supported combinator string '{}'", comb),
54		}
55	}
56}
57
58impl Combinator {
59	pub fn reverse(&self) -> Self {
60		use Combinator::*;
61		match self {
62			ChildrenAll => ParentAll,
63			Children => Parent,
64			NextAll => PrevAll,
65			Next => Prev,
66			Chain => Chain,
67			_ => panic!("Not supported combinator reverse for '{:?}'", self),
68		}
69	}
70}
71
72pub type SelectorSegment = (Matcher, Combinator);
73#[derive(Default, Debug)]
74pub struct QueryProcess {
75	pub should_in: Option<SelectorGroupsItem>,
76	pub query: SelectorGroupsItem,
77}
78
79#[derive(Default, Debug)]
80pub struct Selector {
81	pub process: Vec<QueryProcess>,
82}
83
84type SelectorGroupsItem = Vec<Vec<SelectorSegment>>;
85type SelectorGroups = Vec<SelectorGroupsItem>;
86impl Selector {
87	pub fn new() -> Self {
88		Selector {
89			process: Vec::with_capacity(1),
90		}
91	}
92	pub fn from_str(context: &str, use_lookup: bool) -> Result<Self, Error> {
93		let chars: Vec<char> = context.chars().collect();
94		let total_len = chars.len();
95		let mut selector = Selector::new();
96		if total_len > 0 {
97			let mut index: usize = 0;
98			let mut comb = Combinator::ChildrenAll;
99			let mut prev_in = PrevInSelector::Begin;
100			let mut last_in = prev_in;
101			let mut groups: SelectorGroups = Vec::new();
102			let splitter = SPLITTER.lock().unwrap();
103			let rules = RULES.lock().unwrap();
104			Selector::add_group(&mut groups);
105			while index < total_len {
106				let next_chars = &chars[index..];
107				// first check if combinator
108				if let Some((matched, len, _)) = Rule::exec_queues(&splitter, next_chars) {
109					let op = matched[0].chars.iter().collect::<String>();
110					let op = op.trim();
111					if prev_in == PrevInSelector::Splitter {
112						// wrong multiple combinator
113						return Err(Error::InvalidSelector {
114							context: String::from(context),
115							reason: format!(
116								"Wrong combinator '{}' at index {}",
117								matched[0].chars.iter().collect::<String>(),
118								index
119							),
120						});
121					}
122					// find the match
123					index += len;
124					// set combinator
125					if op == "," {
126						if prev_in != PrevInSelector::Selector {
127							return Err(Error::InvalidSelector {
128								context: String::from(context),
129								reason: format!("Wrong empty selector before ',' at index  {}", index),
130							});
131						}
132						Selector::add_group(&mut groups);
133						comb = Combinator::ChildrenAll;
134					} else {
135						comb = Combinator::from(op);
136					}
137					// set prev is splitter
138					if op.is_empty() {
139						last_in = prev_in;
140						prev_in = PrevInSelector::Splitter;
141					} else {
142						prev_in = PrevInSelector::Splitter;
143						last_in = prev_in;
144					}
145					continue;
146				}
147				// then it must match a selector rule
148				let mut is_new_item = true;
149				if prev_in == PrevInSelector::Selector {
150					comb = Combinator::Chain;
151					is_new_item = false;
152				} else {
153					prev_in = PrevInSelector::Selector;
154					last_in = prev_in;
155				}
156				let mut finded = false;
157				for (_, r) in rules.iter() {
158					if let Some((mut matched, len, queue_num)) = r.exec(next_chars) {
159						// find the rule
160						index += len;
161						let queues = &r.queues;
162						if queue_num == queues.len() {
163							// push to selector
164							Selector::add_group_item(&mut groups, (r.make(&matched), comb), is_new_item);
165							finded = true;
166						} else if queues[queue_num].is_nested() {
167							// nested selector
168							let (len, nested_matched) = Selector::parse_until(
169								&chars[index..],
170								&queues[queue_num + 1..],
171								&rules,
172								&splitter,
173								0,
174							);
175							index += len;
176							matched.extend(nested_matched);
177							Selector::add_group_item(&mut groups, (r.make(&matched), comb), is_new_item);
178							finded = true;
179						}
180						break;
181					}
182				}
183				if !finded {
184					// no splitter, no selector rule
185					return Err(Error::InvalidSelector {
186						context: String::from(context),
187						reason: format!(
188							"Unrecognized selector '{}' at index {}",
189							next_chars.iter().collect::<String>(),
190							index
191						),
192					});
193				}
194			}
195			if last_in != PrevInSelector::Selector {
196				return Err(Error::InvalidSelector {
197					context: String::from(context),
198					reason: String::from("Wrong selector rule at last"),
199				});
200			}
201			// optimize groups to query process
202			selector.optimize(groups, use_lookup);
203		}
204
205		Ok(selector)
206	}
207	// add a selector group, splitted by ','
208	fn add_group(groups: &mut SelectorGroups) {
209		groups.push(Vec::with_capacity(2));
210	}
211	// add a selector group item
212	fn add_group_item(groups: &mut SelectorGroups, item: SelectorSegment, is_new: bool) {
213		if let Some(last_group) = groups.last_mut() {
214			if is_new {
215				last_group.push(vec![item]);
216			} else if let Some(last) = last_group.last_mut() {
217				last.push(item);
218			}
219		}
220	}
221	// optimize the parse process
222	fn optimize(&mut self, groups: SelectorGroups, use_lookup: bool) {
223		let mut process: Vec<QueryProcess> = Vec::with_capacity(groups.len());
224		for mut group in groups {
225			// first optimize the chain selectors, the rule who's priority is bigger will apply first
226			let mut max_index: usize = 0;
227			let mut max_priority: u32 = 0;
228			for (index, r) in group.iter_mut().enumerate() {
229				let mut total_priority = 0;
230				if r.len() > 1 {
231					let chain_comb = r[0].1;
232					r.sort_by(|a, b| b.0.priority.partial_cmp(&a.0.priority).unwrap());
233					let mut now_first = &mut r[0];
234					if now_first.1 != chain_comb {
235						now_first.1 = chain_comb;
236						total_priority += now_first.0.priority;
237						for n in &mut r[1..] {
238							n.1 = Combinator::Chain;
239							total_priority += n.0.priority;
240						}
241						continue;
242					}
243				}
244				if use_lookup {
245					total_priority = r.iter().map(|p| p.0.priority).sum();
246					if total_priority > max_priority {
247						max_priority = total_priority;
248						max_index = index;
249					}
250				}
251			}
252			// if the first combinator is child, and the max_index > 1, use the max_index's rule first
253			if use_lookup && max_index > 0 {
254				let is_child = matches!(
255					group[0][0].1,
256					Combinator::Children | Combinator::ChildrenAll
257				);
258				if is_child {
259					let query = group.split_off(max_index);
260					let should_in = Some(group);
261					process.push(QueryProcess { should_in, query });
262					continue;
263				}
264			}
265			process.push(QueryProcess {
266				should_in: None,
267				query: group,
268			});
269		}
270		self.process = process;
271	}
272	// change the combinator
273	pub fn head_combinator(&mut self, comb: Combinator) {
274		for p in &mut self.process {
275			let v = if let Some(should_in) = &mut p.should_in {
276				should_in
277			} else {
278				&mut p.query
279			};
280			if let Some(rule) = v.get_mut(0) {
281				let first_comb = rule[0].1;
282				match first_comb {
283					Combinator::ChildrenAll => rule[0].1 = comb,
284					_ => {
285						let segment = Selector::make_comb_all(comb);
286						v.insert(0, vec![segment]);
287					}
288				};
289			}
290		}
291	}
292	// make '*' with combinator
293	pub fn make_comb_all(comb: Combinator) -> SelectorSegment {
294		let mut all_rule = ALL_RULE.lock().unwrap();
295		if all_rule.is_none() {
296			let rules = RULES.lock().unwrap();
297			for (name, rule) in &rules[..] {
298				if *name == NAME_SELECTOR_ALL {
299					*all_rule = Some(Arc::clone(rule));
300					break;
301				}
302			}
303		}
304		let cur_rule = Arc::clone(all_rule.as_ref().expect("All rule must add to rules"));
305		let matcher = cur_rule.make(&[]);
306		(matcher, comb)
307	}
308	// build a selector from a segment
309	pub fn from_segment(segment: SelectorSegment) -> Self {
310		let process = QueryProcess {
311			query: vec![vec![segment]],
312			should_in: None,
313		};
314		Selector {
315			process: vec![process],
316		}
317	}
318	// parse until
319	pub fn parse_until(
320		chars: &[char],
321		until: &[BoxDynPattern],
322		rules: &[(&str, Arc<Rule>)],
323		splitter: &[BoxDynPattern],
324		level: usize,
325	) -> (usize, Vec<Matched>) {
326		let mut index = 0;
327		let total = chars.len();
328		let mut matched: Vec<Matched> = Vec::with_capacity(until.len() + 1);
329		while index < total {
330			let next_chars = &chars[index..];
331			if let Some((_, len, _)) = Rule::exec_queues(splitter, next_chars) {
332				index += len;
333				continue;
334			}
335			let mut finded = false;
336			for (_, r) in rules.iter() {
337				if let Some((_, len, queue_num)) = r.exec(next_chars) {
338					let queues = &r.queues;
339					// find the rule
340					index += len;
341					if queue_num == queues.len() {
342						// push to selector
343						finded = true;
344					} else {
345						let (nest_count, _) = Selector::parse_until(
346							&chars[index..],
347							&queues[queue_num + 1..],
348							rules,
349							splitter,
350							level + 1,
351						);
352						index += nest_count;
353					}
354					break;
355				}
356			}
357			if !finded {
358				if level == 0 {
359					matched.push(Matched {
360						chars: chars[0..index].iter().copied().collect(),
361						name: "selector",
362						..Default::default()
363					});
364				}
365				if !until.is_empty() {
366					let (util_matched, count, queue_num, _) = exec(until, &chars[index..]);
367					if queue_num != until.len() {
368						panic!("nested selector parse error");
369					} else {
370						index += count;
371						if level == 0 {
372							matched.extend(util_matched);
373						}
374					}
375				}
376				break;
377			}
378		}
379		(index, matched)
380	}
381}
382
383#[derive(PartialEq, Eq, Clone, Copy, Debug)]
384enum PrevInSelector {
385	Begin,
386	Splitter,
387	Selector,
388}
389
390impl FromStr for Selector {
391	type Err = Error;
392	fn from_str(selector: &str) -> Result<Self, Self::Err> {
393		Selector::from_str(selector, true)
394	}
395}