1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use super::node::Node;
7use super::node_type::NodeType;
8use super::schema::Schema;
9#[derive(Clone, PartialEq, Eq, Debug)]
10pub struct MatchEdge {
11 pub node_type: NodeType,
12 pub next: ContentMatch,
13}
14
15#[derive(Clone, PartialEq, Eq, Debug, Default)]
16pub struct ContentMatch {
17 pub next: Vec<MatchEdge>,
18 pub wrap_cache: Vec<Option<NodeType>>,
19 pub valid_end: bool,
20}
21impl Ord for ContentMatch {
22 fn cmp(
23 &self,
24 other: &Self,
25 ) -> Ordering {
26 let _ = other;
27 Ordering::Equal
28 }
29}
30impl PartialOrd for ContentMatch {
31 fn partial_cmp(
32 &self,
33 other: &Self,
34 ) -> Option<Ordering> {
35 Some(self.cmp(other))
36 }
37}
38
39impl ContentMatch {
40 pub fn parse(
41 str: String,
42 nodes: &HashMap<String, NodeType>,
43 ) -> ContentMatch {
44 let mut stream = TokenStream::new(str, nodes.clone());
45 if stream.next().is_none() {
46 return ContentMatch::empty();
47 }
48 let expr = parse_expr(&mut stream);
49
50 let arr = nfa(expr);
51
52 dfa(arr)
53 }
54 pub fn empty() -> Self {
55 ContentMatch {
56 next: Vec::new(),
57 wrap_cache: Vec::new(),
58 valid_end: true,
59 }
60 }
61
62 pub fn match_type(
63 &self,
64 node_type: &NodeType,
65 ) -> Option<&ContentMatch> {
66 self.next
67 .iter()
68 .find(|edge| &edge.node_type == node_type)
69 .map(|edge| &edge.next)
70 }
71
72 pub fn match_fragment(
73 &self,
74 frag: &[Node],
75 schema: &Schema,
76 ) -> Option<&ContentMatch> {
77 let mut current: &ContentMatch = self;
78
79 for content in frag.iter() {
80 if let Some(next) =
81 current.match_type(schema.nodes.get(&content.r#type).unwrap())
82 {
83 current = next;
84 }
85 }
86 Some(current)
87 }
88
89 pub fn fill(
99 &self,
100 after: &Vec<Node>,
101 to_end: bool,
102 schema: &Schema,
103 ) -> Option<Vec<Node>> {
104 let mut seen: Vec<ContentMatch> = Vec::new();
105 seen.push(self.clone());
106 fn search(
107 seen: &mut Vec<ContentMatch>,
108 to_end: bool,
109 after: &Vec<Node>,
110 match_: &ContentMatch,
111 types: &mut Vec<NodeType>,
112 schema: &Schema,
113 ) -> Option<Vec<Node>> {
114 if let Some(finished) = match_.match_fragment(after, schema) {
116 if finished.valid_end || !to_end {
117 let mut nodes = vec![];
118 for tp in types.iter() {
119 let node = tp.create(None, None, vec![], None);
121 nodes.push(node);
122 }
123 return Some(nodes);
124 }
125 }
126
127 for edge in &match_.next {
129 if !edge.node_type.has_required_attrs()
130 && !seen.contains(&edge.next)
131 {
132 seen.push(edge.next.clone());
133 types.push(edge.node_type.clone());
134 if let Some(found) =
135 search(seen, to_end, after, &edge.next, types, schema)
136 {
137 return Some(found);
138 }
139 types.pop();
140 }
141 }
142 None
143 }
144
145 search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146 }
147
148 pub fn default_type(&self) -> Option<&NodeType> {
149 self.next
150 .iter()
151 .find(|edge| !edge.node_type.has_required_attrs())
152 .map(|edge| &edge.node_type)
153 }
154
155 pub fn compatible(
156 &self,
157 other: &ContentMatch,
158 ) -> bool {
159 for edge1 in &self.next {
160 for edge2 in &other.next {
161 if edge1.node_type == edge2.node_type {
162 return true;
163 }
164 }
165 }
166 false
167 }
168
169 pub fn edge_count(&self) -> usize {
170 self.next.len()
171 }
172
173 pub fn edge(
174 &self,
175 n: usize,
176 ) -> Result<&MatchEdge, String> {
177 if n >= self.next.len() {
178 Err(format!("{} 超出了 {}", n, self.next.len()))
179 } else {
180 Ok(&self.next[n])
181 }
182 }
183}
184impl fmt::Display for ContentMatch {
185 fn fmt(
186 &self,
187 f: &mut fmt::Formatter<'_>,
188 ) -> fmt::Result {
189 let mut seen = Vec::new();
190 fn scan(
191 m: &ContentMatch,
192 seen: &mut Vec<ContentMatch>,
193 ) {
194 seen.push(m.clone());
195 for edge in &m.next {
196 if !seen.iter().any(|s| s == &edge.next) {
197 scan(&edge.next, seen);
198 }
199 }
200 }
201 scan(self, &mut seen);
202
203 let str = seen
204 .iter()
205 .enumerate()
206 .map(|(i, m)| {
207 let mut out =
208 format!("{} ", if m.valid_end { i + 1 } else { i });
209 for (j, edge) in m.next.iter().enumerate() {
210 if j > 0 {
211 out.push_str(", ");
212 }
213 out.push_str(&format!(
214 "{}->{}",
215 edge.node_type.name,
216 seen.iter().position(|s| s == &edge.next).unwrap() + 1
217 ));
218 }
219 out
220 })
221 .collect::<Vec<_>>()
222 .join("\n");
223
224 write!(f, "{}", str)
225 }
226}
227
228#[derive(Clone, PartialEq, Eq, Debug)]
229pub struct TokenStream {
230 pos: usize,
231 tokens: Vec<String>,
232 node_types: HashMap<String, NodeType>,
233 string: String,
234}
235
236impl TokenStream {
237 pub fn new(
238 string: String,
239 node_types: HashMap<String, NodeType>,
240 ) -> Self {
241 let mut tokens = Vec::new();
242 let mut current_token = String::new();
243 for c in string.chars() {
244 if c.is_whitespace() {
245 if !current_token.is_empty() {
247 tokens.push(current_token.clone());
248 current_token.clear(); }
250 } else if !c.is_alphanumeric() {
251 if !current_token.is_empty() {
253 tokens.push(current_token.clone());
254 current_token.clear(); }
256 tokens.push(c.to_string());
258 } else {
259 current_token.push(c);
261 }
262 }
263
264 if !current_token.is_empty() {
266 tokens.push(current_token);
267 }
268 TokenStream { pos: 0, tokens, node_types, string }
269 }
270
271 pub fn next(&self) -> Option<&str> {
272 self.tokens.get(self.pos).map(|s| s.as_str())
273 }
274
275 pub fn eat(
276 &mut self,
277 tok: &str,
278 ) -> bool {
279 if self.next() == Some(tok) {
280 self.pos += 1;
281 true
282 } else {
283 false
284 }
285 }
286
287 pub fn err(
288 &self,
289 str: &str,
290 ) -> ! {
291 panic!("{} (约束必须是 '{}')", str, self.string);
292 }
293}
294
295#[derive(Debug, Clone)]
296enum Expr {
297 Choice { exprs: Vec<Expr> },
298 Seq { exprs: Vec<Expr> },
299 Plus { expr: Box<Expr> },
300 Star { expr: Box<Expr> },
301 Opt { expr: Box<Expr> },
302 Range { min: usize, max: isize, expr: Box<Expr> },
303 Name { value: NodeType },
304}
305fn parse_expr(stream: &mut TokenStream) -> Expr {
306 let mut exprs = Vec::new();
307
308 loop {
309 exprs.push(parse_expr_seq(stream));
310 if !stream.eat("|") {
311 break;
312 }
313 }
314 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
315}
316fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
317 let mut exprs = Vec::new();
318
319 while let Some(next) = stream.next() {
320 if next == ")" || next == "|" {
321 break;
322 }
323 exprs.push(parse_expr_subscript(stream));
324 }
325 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
326}
327
328fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
329 let mut expr = parse_expr_atom(stream);
330 loop {
331 if stream.eat("+") {
332 expr = Expr::Plus { expr: Box::new(expr) };
333 } else if stream.eat("*") {
334 expr = Expr::Star { expr: Box::new(expr) };
335 } else if stream.eat("?") {
336 expr = Expr::Opt { expr: Box::new(expr) };
337 } else if stream.eat("{") {
338 expr = parse_expr_range(stream, expr);
339 } else {
340 break;
341 }
342 }
343 expr
344}
345
346fn parse_num(stream: &mut TokenStream) -> usize {
347 let next = stream.next().unwrap();
348 if !next.chars().all(|c| c.is_ascii_digit()) {
349 stream.err(&format!("Expected number, got '{}'", next));
350 }
351 let result = next.parse().unwrap();
352 stream.pos += 1;
353 result
354}
355fn parse_expr_range(
356 stream: &mut TokenStream,
357 expr: Expr,
358) -> Expr {
359 let min = parse_num(stream);
360 let max = if stream.eat(",") {
361 if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
362 } else {
363 min as isize
364 };
365 if !stream.eat("}") {
366 stream.err("Unclosed braced range");
367 }
368 Expr::Range { min, max, expr: Box::new(expr) }
369}
370
371fn resolve_name(
372 stream: &TokenStream,
373 name: &str,
374) -> Vec<NodeType> {
375 let types = &stream.node_types;
376 if let Some(type_) = types.get(name) {
377 return vec![type_.clone()];
378 }
379 let mut result = Vec::new();
380
381 for type_ in types.values() {
382 if type_.groups.contains(&name.to_string()) {
383 result.push(type_.clone());
384 }
385 }
386 if result.is_empty() {
387 stream.err(&format!("没找到类型 '{}'", name));
388 }
389 result
390}
391
392fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
393 if stream.eat("(") {
394 let expr = parse_expr(stream);
395 if !stream.eat(")") {
396 stream.err("Missing closing paren");
397 }
398 expr
399 } else if let Some(next) = stream.next() {
400 if next.chars().all(|c| c.is_alphanumeric()) {
401 let exprs: Vec<Expr> = resolve_name(stream, next)
402 .into_iter()
403 .map(|type_| Expr::Name { value: type_ })
404 .collect();
405 stream.pos += 1;
406 if exprs.len() == 1 {
407 exprs.into_iter().next().unwrap()
408 } else {
409 Expr::Choice { exprs }
410 }
411 } else {
412 stream.err(&format!("Unexpected token '{}'", next));
413 }
414 } else {
415 stream.err("Unexpected end of input");
416 }
417}
418#[derive(Debug, Clone)]
419pub struct Edge {
420 term: Option<NodeType>,
421 to: Option<usize>,
422}
423fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
424 let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
425
426 fn explore(
427 states: Vec<usize>,
428 nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
429 labeled: &mut HashMap<String, ContentMatch>,
430 ) -> ContentMatch {
431 let mut out: Vec<(NodeType, Vec<usize>)> = Vec::new();
432 for &node in &states {
433 for edge in &nfa[node] {
434 if edge.borrow().term.is_none() {
435 continue;
436 }
437 let term = edge.borrow().term.clone().unwrap();
438 let mut set: Option<&mut Vec<usize>> = None;
439
440 for (t, s) in &mut out {
441 if *t == term {
442 set = Some(s);
443 break;
444 }
445 }
446
447 if set.is_none() {
448 out.push((term.clone(), Vec::new()));
449 set = Some(&mut out.last_mut().unwrap().1);
450 }
451 for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
452 set.as_mut().unwrap().push(node);
453 }
454 }
455 }
456 let mut state = ContentMatch {
457 next: Vec::new(),
458 wrap_cache: vec![],
459 valid_end: states.contains(&(nfa.len() - 1)),
460 };
461
462 let state_key =
463 states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
464 labeled.insert(state_key.clone(), state.clone());
465
466 for (term, states) in out {
467 let states_key = states
468 .iter()
469 .map(|&x| x.to_string())
470 .collect::<Vec<_>>()
471 .join(",");
472 let next_state = labeled
473 .get(&states_key)
474 .cloned()
475 .unwrap_or_else(|| explore(states, nfa, labeled));
476 labeled.insert(states_key, next_state.clone());
477 state.next.push(MatchEdge { node_type: term, next: next_state });
478 }
479
480 state
481 }
482
483 explore(null_from(&nfa, 0), &nfa, &mut labeled)
484}
485
486pub fn null_from(
487 nfa: &[Vec<Rc<RefCell<Edge>>>],
488 node: usize,
489) -> Vec<usize> {
490 let mut result = Vec::new();
491 fn scan(
492 nfa: &[Vec<Rc<RefCell<Edge>>>],
493 node: usize,
494 result: &mut Vec<usize>,
495 ) {
496 let edges = &nfa[node];
497 if edges.len() == 1 && edges[0].borrow().term.is_none() {
498 if let Some(to) = edges[0].borrow().to {
499 scan(nfa, to, result);
500 }
501 return;
502 }
503 if !result.contains(&node) {
504 result.push(node);
505 }
506 for edge in edges {
507 if edge.borrow().term.is_none() {
508 if let Some(to) = edge.borrow().to {
509 if !result.contains(&to) {
510 scan(nfa, to, result);
511 }
512 }
513 }
514 }
515 }
516
517 scan(nfa, node, &mut result);
518 result.sort();
519 result
520}
521fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
522 let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
523 connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
524 nfa
525}
526fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
527 nfa.push(vec![]);
528 nfa.len() - 1
529}
530
531fn edge(
532 from: usize,
533 to: Option<usize>,
534 term: Option<NodeType>,
535 nfa: &mut [Vec<Rc<RefCell<Edge>>>],
536) -> Rc<RefCell<Edge>> {
537 let edge =
538 Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
539 nfa[from].push(edge.clone());
540 edge.clone()
541}
542fn connect(
543 edges: &mut [Rc<RefCell<Edge>>],
544 to: usize,
545) {
546 for edge in edges {
547 edge.borrow_mut().to = Some(to);
548 }
549}
550fn compile(
551 expr: Expr,
552 from: usize,
553 nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
554) -> Vec<Rc<RefCell<Edge>>> {
555 match expr {
556 Expr::Choice { exprs } => exprs
557 .into_iter()
558 .flat_map(|expr| compile(expr, from, nfa))
559 .collect(),
560 Expr::Seq { exprs } => {
561 let mut cur = from;
562 let mut last_edges = Vec::new();
563 let exprs_len = exprs.len();
564
565 for (i, expr) in exprs.into_iter().enumerate() {
566 let next = if i == exprs_len - 1 { cur } else { node(nfa) };
567
568 let mut edges = compile(expr, cur, nfa);
569 if i < exprs_len - 1 {
570 connect(&mut edges, next);
571 cur = next;
572 } else {
573 last_edges = edges;
574 }
575 }
576
577 if last_edges.is_empty() {
578 vec![edge(cur, None, None, nfa)]
579 } else {
580 last_edges
581 }
582 },
583 Expr::Star { expr } => {
584 let loop_node = node(nfa);
585 edge(from, Some(loop_node), None, nfa);
586 let mut compiled_expr = compile(*expr, loop_node, nfa);
587 connect(&mut compiled_expr, loop_node);
588 vec![edge(loop_node, None, None, nfa)]
589 },
590 Expr::Plus { expr } => {
591 let loop_node = node(nfa);
592 connect(&mut compile(*expr.clone(), from, nfa), loop_node);
593 let mut compiled_expr = compile(*expr, loop_node, nfa);
594 connect(&mut compiled_expr, loop_node);
595 vec![edge(loop_node, None, None, nfa)]
596 },
597 Expr::Opt { expr } => {
598 let mut edges = vec![edge(from, None, None, nfa)];
599 edges.extend(compile(*expr, from, nfa));
600 edges
601 },
602 Expr::Range { expr, min, max } => {
603 let mut cur = from;
604 for _ in 0..min {
605 let next = node(nfa);
606 connect(&mut compile(*expr.clone(), cur, nfa), next);
607 cur = next;
608 }
609 if max == -1 {
610 connect(&mut compile(*expr, cur, nfa), cur);
611 } else {
612 for _ in min..max as usize {
613 let next = node(nfa);
614 edge(cur, Some(next), None, nfa);
615 connect(&mut compile(*expr.clone(), cur, nfa), next);
616 cur = next;
617 }
618 }
619 vec![edge(cur, None, None, nfa)]
620 },
621 Expr::Name { value } => {
622 vec![edge(from, None, Some(value), nfa)]
623 },
624 }
625}