1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use super::node::Node;
7use super::node_type::NodeType;
8use super::schema::Schema;
9#[derive(Clone, PartialEq, Eq, Debug)]
10pub struct MatchEdge {
11 pub node_type: NodeType,
12 pub next: ContentMatch,
13}
14
15#[derive(Clone, PartialEq, Eq, Debug, Default)]
16pub struct ContentMatch {
17 pub next: Vec<MatchEdge>,
18 pub wrap_cache: Vec<Option<NodeType>>,
19 pub valid_end: bool,
20}
21impl Ord for ContentMatch {
22 fn cmp(
23 &self,
24 other: &Self,
25 ) -> Ordering {
26 let _ = other;
27 Ordering::Equal
28 }
29}
30impl PartialOrd for ContentMatch {
31 fn partial_cmp(
32 &self,
33 other: &Self,
34 ) -> Option<Ordering> {
35 Some(self.cmp(other))
36 }
37}
38
39impl ContentMatch {
40 pub fn parse(
41 str: String,
42 nodes: &HashMap<String, NodeType>,
43 ) -> ContentMatch {
44 let mut stream = TokenStream::new(str, nodes.clone());
45 if stream.next().is_none() {
46 return ContentMatch::empty();
47 }
48 let expr = parse_expr(&mut stream);
49
50 let arr = nfa(expr);
51
52 dfa(arr)
53 }
54 pub fn empty() -> Self {
55 ContentMatch { next: Vec::new(), wrap_cache: Vec::new(), valid_end: true }
56 }
57
58 pub fn match_type(
59 &self,
60 node_type: &NodeType,
61 ) -> Option<&ContentMatch> {
62 self.next.iter().find(|edge| &edge.node_type == node_type).map(|edge| &edge.next)
63 }
64
65 pub fn match_fragment(
66 &self,
67 frag: &[Node],
68 schema: &Schema,
69 ) -> Option<&ContentMatch> {
70 let mut current: &ContentMatch = self;
71
72 for content in frag.iter() {
73 if let Some(next) = current.match_type(schema.nodes.get(&content.r#type).unwrap()) {
74 current = next;
75 }
76 }
77 Some(current)
78 }
79
80 pub fn fill(
81 &self,
82 after: &Vec<Node>,
83 to_end: bool,
84 schema: &Schema,
85 ) -> Option<Vec<Node>> {
86 let mut seen: Vec<ContentMatch> = Vec::new();
87 seen.push(self.clone());
88 fn search(
89 seen: &mut Vec<ContentMatch>,
90 to_end: bool,
91 after: &Vec<Node>,
92 match_: &ContentMatch,
93 types: &mut Vec<NodeType>,
94 schema: &Schema,
95 ) -> Option<Vec<Node>> {
96 if let Some(finished) = match_.match_fragment(after, schema) {
97 if finished.valid_end || !to_end {
98 let mut nodes = vec![];
99 for tp in types.iter() {
100 let mut node = tp.create_and_fill(None, None, vec![], None, schema);
101 nodes.append(&mut node);
102 }
103 return Some(nodes);
104 }
105 }
106 for edge in &match_.next {
107 if !edge.node_type.has_required_attrs() && !seen.contains(&edge.next) {
108 seen.push(edge.next.clone());
109 types.push(edge.node_type.clone());
110 if let Some(found) = search(seen, to_end, after, &edge.next, types, schema) {
111 return Some(found);
112 }
113 types.pop();
114 }
115 }
116 None
117 }
118
119 search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
120 }
121
122 pub fn default_type(&self) -> Option<&NodeType> {
123 self.next.iter().find(|edge| !edge.node_type.has_required_attrs()).map(|edge| &edge.node_type)
124 }
125
126 pub fn compatible(
127 &self,
128 other: &ContentMatch,
129 ) -> bool {
130 for edge1 in &self.next {
131 for edge2 in &other.next {
132 if edge1.node_type == edge2.node_type {
133 return true;
134 }
135 }
136 }
137 false
138 }
139
140 pub fn edge_count(&self) -> usize {
141 self.next.len()
142 }
143
144 pub fn edge(
145 &self,
146 n: usize,
147 ) -> Result<&MatchEdge, String> {
148 if n >= self.next.len() { Err(format!("{} 超出了 {}", n, self.next.len())) } else { Ok(&self.next[n]) }
149 }
150}
151impl fmt::Display for ContentMatch {
152 fn fmt(
153 &self,
154 f: &mut fmt::Formatter<'_>,
155 ) -> fmt::Result {
156 let mut seen = Vec::new();
157 fn scan(
158 m: &ContentMatch,
159 seen: &mut Vec<ContentMatch>,
160 ) {
161 seen.push(m.clone());
162 for edge in &m.next {
163 if !seen.iter().any(|s| s == &edge.next) {
164 scan(&edge.next, seen);
165 }
166 }
167 }
168 scan(self, &mut seen);
169
170 let str = seen
171 .iter()
172 .enumerate()
173 .map(|(i, m)| {
174 let mut out = format!("{} ", if m.valid_end { i + 1 } else { i });
175 for (j, edge) in m.next.iter().enumerate() {
176 if j > 0 {
177 out.push_str(", ");
178 }
179 out.push_str(&format!(
180 "{}->{}",
181 edge.node_type.name,
182 seen.iter().position(|s| s == &edge.next).unwrap() + 1
183 ));
184 }
185 out
186 })
187 .collect::<Vec<_>>()
188 .join("\n");
189
190 write!(f, "{}", str)
191 }
192}
193
194#[derive(Clone, PartialEq, Eq, Debug)]
195pub struct TokenStream {
196 pos: usize,
197 tokens: Vec<String>,
198 node_types: HashMap<String, NodeType>,
199 string: String,
200}
201
202impl TokenStream {
203 pub fn new(
204 string: String,
205 node_types: HashMap<String, NodeType>,
206 ) -> Self {
207 let mut tokens = Vec::new();
208 let mut current_token = String::new();
209 for c in string.chars() {
210 if c.is_whitespace() {
211 if !current_token.is_empty() {
213 tokens.push(current_token.clone());
214 current_token.clear(); }
216 } else if !c.is_alphanumeric() {
217 if !current_token.is_empty() {
219 tokens.push(current_token.clone());
220 current_token.clear(); }
222 tokens.push(c.to_string());
224 } else {
225 current_token.push(c);
227 }
228 }
229
230 if !current_token.is_empty() {
232 tokens.push(current_token);
233 }
234 TokenStream { pos: 0, tokens, node_types, string }
235 }
236
237 pub fn next(&self) -> Option<&str> {
238 self.tokens.get(self.pos).map(|s| s.as_str())
239 }
240
241 pub fn eat(
242 &mut self,
243 tok: &str,
244 ) -> bool {
245 if self.next() == Some(tok) {
246 self.pos += 1;
247 true
248 } else {
249 false
250 }
251 }
252
253 pub fn err(
254 &self,
255 str: &str,
256 ) -> ! {
257 panic!("{} (约束必须是 '{}')", str, self.string);
258 }
259}
260
261#[derive(Debug, Clone)]
262enum Expr {
263 Choice { exprs: Vec<Expr> },
264 Seq { exprs: Vec<Expr> },
265 Plus { expr: Box<Expr> },
266 Star { expr: Box<Expr> },
267 Opt { expr: Box<Expr> },
268 Range { min: usize, max: isize, expr: Box<Expr> },
269 Name { value: NodeType },
270}
271fn parse_expr(stream: &mut TokenStream) -> Expr {
272 let mut exprs = Vec::new();
273
274 loop {
275 exprs.push(parse_expr_seq(stream));
276 if !stream.eat("|") {
277 break;
278 }
279 }
280 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
281}
282fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
283 let mut exprs = Vec::new();
284
285 while let Some(next) = stream.next() {
286 if next == ")" || next == "|" {
287 break;
288 }
289 exprs.push(parse_expr_subscript(stream));
290 }
291 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
292}
293
294fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
295 let mut expr = parse_expr_atom(stream);
296 loop {
297 if stream.eat("+") {
298 expr = Expr::Plus { expr: Box::new(expr) };
299 } else if stream.eat("*") {
300 expr = Expr::Star { expr: Box::new(expr) };
301 } else if stream.eat("?") {
302 expr = Expr::Opt { expr: Box::new(expr) };
303 } else if stream.eat("{") {
304 expr = parse_expr_range(stream, expr);
305 } else {
306 break;
307 }
308 }
309 expr
310}
311
312fn parse_num(stream: &mut TokenStream) -> usize {
313 let next = stream.next().unwrap();
314 if !next.chars().all(|c| c.is_ascii_digit()) {
315 stream.err(&format!("Expected number, got '{}'", next));
316 }
317 let result = next.parse().unwrap();
318 stream.pos += 1;
319 result
320}
321fn parse_expr_range(
322 stream: &mut TokenStream,
323 expr: Expr,
324) -> Expr {
325 let min = parse_num(stream);
326 let max = if stream.eat(",") {
327 if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
328 } else {
329 min as isize
330 };
331 if !stream.eat("}") {
332 stream.err("Unclosed braced range");
333 }
334 Expr::Range { min, max, expr: Box::new(expr) }
335}
336
337fn resolve_name(
338 stream: &TokenStream,
339 name: &str,
340) -> Vec<NodeType> {
341 let types = &stream.node_types;
342 if let Some(type_) = types.get(name) {
343 return vec![type_.clone()];
344 }
345 let mut result = Vec::new();
346
347 for type_ in types.values() {
348 if type_.groups.contains(&name.to_string()) {
349 result.push(type_.clone());
350 }
351 }
352 if result.is_empty() {
353 stream.err(&format!("No node type or group '{}' found", name));
354 }
355 result
356}
357
358fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
359 if stream.eat("(") {
360 let expr = parse_expr(stream);
361 if !stream.eat(")") {
362 stream.err("Missing closing paren");
363 }
364 expr
365 } else if let Some(next) = stream.next() {
366 if next.chars().all(|c| c.is_alphanumeric()) {
367 let exprs: Vec<Expr> =
368 resolve_name(stream, next).into_iter().map(|type_| Expr::Name { value: type_ }).collect();
369 stream.pos += 1;
370 if exprs.len() == 1 { exprs.into_iter().next().unwrap() } else { Expr::Choice { exprs } }
371 } else {
372 stream.err(&format!("Unexpected token '{}'", next));
373 }
374 } else {
375 stream.err("Unexpected end of input");
376 }
377}
378#[derive(Debug, Clone)]
379pub struct Edge {
380 term: Option<NodeType>,
381 to: Option<usize>,
382}
383fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
384 let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
385
386 fn explore(
387 states: Vec<usize>,
388 nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
389 labeled: &mut HashMap<String, ContentMatch>,
390 ) -> ContentMatch {
391 let mut out: Vec<(NodeType, Vec<usize>)> = Vec::new();
392 for &node in &states {
393 for edge in &nfa[node] {
394 if edge.borrow().term.is_none() {
395 continue;
396 }
397 let term = edge.borrow().term.clone().unwrap();
398 let mut set: Option<&mut Vec<usize>> = None;
399
400 for (t, s) in &mut out {
401 if *t == term {
402 set = Some(s);
403 break;
404 }
405 }
406
407 if set.is_none() {
408 out.push((term.clone(), Vec::new()));
409 set = Some(&mut out.last_mut().unwrap().1);
410 }
411 for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
412 set.as_mut().unwrap().push(node);
413 }
414 }
415 }
416 let mut state =
417 ContentMatch { next: Vec::new(), wrap_cache: vec![], valid_end: states.contains(&(nfa.len() - 1)) };
418
419 let state_key = states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
420 labeled.insert(state_key.clone(), state.clone());
421
422 for (term, states) in out {
423 let states_key = states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
424 let next_state = labeled.get(&states_key).cloned().unwrap_or_else(|| explore(states, nfa, labeled));
425 labeled.insert(states_key, next_state.clone());
426 state.next.push(MatchEdge { node_type: term, next: next_state });
427 }
428
429 state
430 }
431
432 explore(null_from(&nfa, 0), &nfa, &mut labeled)
433}
434
435pub fn null_from(
436 nfa: &[Vec<Rc<RefCell<Edge>>>],
437 node: usize,
438) -> Vec<usize> {
439 let mut result = Vec::new();
440 fn scan(
441 nfa: &[Vec<Rc<RefCell<Edge>>>],
442 node: usize,
443 result: &mut Vec<usize>,
444 ) {
445 let edges = &nfa[node];
446 if edges.len() == 1 && edges[0].borrow().term.is_none() {
447 if let Some(to) = edges[0].borrow().to {
448 scan(nfa, to, result);
449 }
450 return;
451 }
452 if !result.contains(&node) {
453 result.push(node);
454 }
455 for edge in edges {
456 if edge.borrow().term.is_none() {
457 if let Some(to) = edge.borrow().to {
458 if !result.contains(&to) {
459 scan(nfa, to, result);
460 }
461 }
462 }
463 }
464 }
465
466 scan(nfa, node, &mut result);
467 result.sort();
468 result
469}
470fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
471 let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
472 connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
473 nfa
474}
475fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
476 nfa.push(vec![]);
477 nfa.len() - 1
478}
479
480fn edge(
481 from: usize,
482 to: Option<usize>,
483 term: Option<NodeType>,
484 nfa: &mut [Vec<Rc<RefCell<Edge>>>],
485) -> Rc<RefCell<Edge>> {
486 let edge = Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
487 nfa[from].push(edge.clone());
488 edge.clone()
489}
490fn connect(
491 edges: &mut [Rc<RefCell<Edge>>],
492 to: usize,
493) {
494 for edge in edges {
495 edge.borrow_mut().to = Some(to);
496 }
497}
498fn compile(
499 expr: Expr,
500 from: usize,
501 nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
502) -> Vec<Rc<RefCell<Edge>>> {
503 match expr {
504 Expr::Choice { exprs } => exprs.into_iter().flat_map(|expr| compile(expr, from, nfa)).collect(),
505 Expr::Seq { exprs } => {
506 let mut cur = from;
507 let len = exprs.len(); for (i, expr) in exprs.into_iter().enumerate() {
509 let mut next = compile(expr, cur, nfa);
510 if i == len - 1 {
511 return next;
512 }
513 connect(&mut next, node(nfa));
514 cur = node(nfa);
515 }
516 vec![]
517 },
518 Expr::Star { expr } => {
519 let loop_node = node(nfa);
520 edge(from, Some(loop_node), None, nfa);
521 let mut compiled_expr = compile(*expr, loop_node, nfa);
522 connect(&mut compiled_expr, loop_node);
523 vec![edge(loop_node, None, None, nfa)]
524 },
525 Expr::Plus { expr } => {
526 let loop_node = node(nfa);
527 connect(&mut compile(*expr.clone(), from, nfa), loop_node);
528 let mut compiled_expr = compile(*expr, loop_node, nfa);
529 connect(&mut compiled_expr, loop_node);
530
531 vec![edge(loop_node, None, None, nfa)]
532 },
533 Expr::Opt { expr } => {
534 let mut edges = vec![edge(from, None, None, nfa)];
535 edges.extend(compile(*expr, from, nfa));
536 edges
537 },
538 Expr::Range { expr, min, max } => {
539 let mut cur = from;
540 for _ in 0..min {
541 let next = node(nfa);
542 connect(&mut compile(*expr.clone(), cur, nfa), next);
543 cur = next;
544 }
545 if max == -1 {
546 connect(&mut compile(*expr, cur, nfa), cur);
547 } else {
548 for _ in min..max as usize {
549 let next = node(nfa);
550 edge(cur, Some(next), None, nfa);
551 connect(&mut compile(*expr.clone(), cur, nfa), next);
552 cur = next;
553 }
554 }
555 vec![edge(cur, None, None, nfa)]
556 },
557 Expr::Name { value } => {
558 vec![edge(from, None, Some(value), nfa)]
559 },
560 }
561}