1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use crate::error::PoolResult;
7
8use super::node::Node;
9use super::node_definition::NodeDefinition;
10use super::schema::Schema;
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub struct MatchEdge {
13 pub node_type: NodeDefinition,
14 pub next: ContentMatch,
15}
16
17#[derive(Clone, PartialEq, Eq, Debug, Default)]
18pub struct ContentMatch {
19 pub next: Vec<MatchEdge>,
20 pub wrap_cache: Vec<Option<NodeDefinition>>,
21 pub valid_end: bool,
22}
23impl Ord for ContentMatch {
24 fn cmp(
25 &self,
26 other: &Self,
27 ) -> Ordering {
28 let _ = other;
29 Ordering::Equal
30 }
31}
32impl PartialOrd for ContentMatch {
33 fn partial_cmp(
34 &self,
35 other: &Self,
36 ) -> Option<Ordering> {
37 Some(self.cmp(other))
38 }
39}
40
41impl ContentMatch {
42 pub fn parse(
43 str: String,
44 nodes: &HashMap<String, NodeDefinition>,
45 ) -> ContentMatch {
46 let mut stream = TokenStream::new(str, nodes.clone());
47 if stream.next().is_none() {
48 return ContentMatch::empty();
49 }
50 let expr = parse_expr(&mut stream);
51
52 let arr = nfa(expr);
53
54 dfa(arr)
55 }
56 pub fn empty() -> Self {
57 ContentMatch {
58 next: Vec::new(),
59 wrap_cache: Vec::new(),
60 valid_end: true,
61 }
62 }
63
64 pub fn match_type(
65 &self,
66 node_type: &NodeDefinition,
67 ) -> Option<&ContentMatch> {
68 self.next
69 .iter()
70 .find(|edge| &edge.node_type == node_type)
71 .map(|edge| &edge.next)
72 }
73
74 pub fn match_fragment(
75 &self,
76 frag: &[Node],
77 schema: &Schema,
78 ) -> Option<&ContentMatch> {
79 let mut current: &ContentMatch = self;
80
81 for content in frag.iter() {
82 if let Some(next) =
83 current.match_type(schema.nodes.get(&content.r#type).unwrap())
84 {
85 current = next;
86 } else {
87 return None;
89 }
90 }
91 Some(current)
92 }
93
94 pub fn fill(
104 &self,
105 after: &Vec<Node>,
106 to_end: bool,
107 schema: &Schema,
108 ) -> Option<Vec<String>> {
109 let mut seen: Vec<ContentMatch> = Vec::new();
110 seen.push(self.clone());
111 fn search(
112 seen: &mut Vec<ContentMatch>,
113 to_end: bool,
114 after: &Vec<Node>,
115 match_: &ContentMatch,
116 types: &mut Vec<String>,
117 schema: &Schema,
118 ) -> Option<Vec<String>> {
119 if let Some(finished) = match_.match_fragment(after, schema) {
121 if finished.valid_end || !to_end {
122 return Some(types.clone());
123 }
124 } else if !after.is_empty() {
125 return None;
127 }
128
129 for edge in &match_.next {
131 if !seen.contains(&edge.next) {
132 seen.push(edge.next.clone());
133 types.push(edge.node_type.name.clone());
134 if let Some(found) =
135 search(seen, to_end, after, &edge.next, types, schema)
136 {
137 return Some(found);
138 }
139 types.pop();
140 }
141 }
142 None
143 }
144
145 search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146 }
147
148 pub fn default_type(&self) -> Option<&NodeDefinition> {
149 self.next
150 .iter()
151 .find(|edge| !edge.node_type.has_required_attrs())
152 .map(|edge| &edge.node_type)
153 }
154
155 pub fn compatible(
156 &self,
157 other: &ContentMatch,
158 ) -> bool {
159 for edge1 in &self.next {
160 for edge2 in &other.next {
161 if edge1.node_type == edge2.node_type {
162 return true;
163 }
164 }
165 }
166 false
167 }
168
169 pub fn edge_count(&self) -> usize {
170 self.next.len()
171 }
172
173 pub fn edge(
174 &self,
175 n: usize,
176 ) -> PoolResult<&MatchEdge> {
178 if n >= self.next.len() {
179 Err(anyhow::anyhow!(format!("{} 超出了 {}", n, self.next.len())))
180 } else {
181 Ok(&self.next[n])
182 }
183 }
184}
185impl fmt::Display for ContentMatch {
186 fn fmt(
187 &self,
188 f: &mut fmt::Formatter<'_>,
189 ) -> fmt::Result {
190 let mut seen = Vec::new();
191 fn scan(
192 m: &ContentMatch,
193 seen: &mut Vec<ContentMatch>,
194 ) {
195 seen.push(m.clone());
196 for edge in &m.next {
197 if !seen.iter().any(|s| s == &edge.next) {
198 scan(&edge.next, seen);
199 }
200 }
201 }
202 scan(self, &mut seen);
203
204 let str = seen
205 .iter()
206 .enumerate()
207 .map(|(i, m)| {
208 let mut out =
209 format!("{} ", if m.valid_end { i + 1 } else { i });
210 for (j, edge) in m.next.iter().enumerate() {
211 if j > 0 {
212 out.push_str(", ");
213 }
214 out.push_str(&format!(
215 "{}->{}",
216 edge.node_type.name,
217 seen.iter().position(|s| s == &edge.next).unwrap() + 1
218 ));
219 }
220 out
221 })
222 .collect::<Vec<_>>()
223 .join("\n");
224
225 write!(f, "{str}")
226 }
227}
228
229#[derive(Clone, PartialEq, Eq, Debug)]
230pub struct TokenStream {
231 pos: usize,
232 tokens: Vec<String>,
233 node_types: HashMap<String, NodeDefinition>,
234 string: String,
235}
236
237impl TokenStream {
238 pub fn new(
239 string: String,
240 node_types: HashMap<String, NodeDefinition>,
241 ) -> Self {
242 let mut tokens = Vec::new();
243 let mut current_token = String::new();
244 for c in string.chars() {
245 if c.is_whitespace() {
246 if !current_token.is_empty() {
248 tokens.push(current_token.clone());
249 current_token.clear(); }
251 } else if !c.is_alphanumeric() && c != '_' {
252 if !current_token.is_empty() {
254 tokens.push(current_token.clone());
255 current_token.clear(); }
257 tokens.push(c.to_string());
259 } else {
260 current_token.push(c);
262 }
263 }
264
265 if !current_token.is_empty() {
267 tokens.push(current_token);
268 }
269 TokenStream { pos: 0, tokens, node_types, string }
270 }
271
272 pub fn next(&self) -> Option<&str> {
273 self.tokens.get(self.pos).map(|s| s.as_str())
274 }
275
276 pub fn eat(
277 &mut self,
278 tok: &str,
279 ) -> bool {
280 if self.next() == Some(tok) {
281 self.pos += 1;
282 true
283 } else {
284 false
285 }
286 }
287
288 pub fn err(
289 &self,
290 str: &str,
291 ) -> ! {
292 panic!("{} (约束必须是 '{}')", str, self.string);
293 }
294}
295
296#[derive(Debug, Clone)]
297enum Expr {
298 Choice { exprs: Vec<Expr> },
299 Seq { exprs: Vec<Expr> },
300 Plus { expr: Box<Expr> },
301 Star { expr: Box<Expr> },
302 Opt { expr: Box<Expr> },
303 Range { min: usize, max: isize, expr: Box<Expr> },
304 Name { value: Box<NodeDefinition> },
305}
306fn parse_expr(stream: &mut TokenStream) -> Expr {
307 let mut exprs = Vec::new();
308
309 loop {
310 exprs.push(parse_expr_seq(stream));
311 if !stream.eat("|") {
312 break;
313 }
314 }
315 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
316}
317fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
318 let mut exprs = Vec::new();
319
320 while let Some(next) = stream.next() {
321 if next == ")" || next == "|" {
322 break;
323 }
324 exprs.push(parse_expr_subscript(stream));
325 }
326 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
327}
328
329fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
330 let mut expr = parse_expr_atom(stream);
331 loop {
332 if stream.eat("+") {
333 expr = Expr::Plus { expr: Box::new(expr) };
334 } else if stream.eat("*") {
335 expr = Expr::Star { expr: Box::new(expr) };
336 } else if stream.eat("?") {
337 expr = Expr::Opt { expr: Box::new(expr) };
338 } else if stream.eat("{") {
339 expr = parse_expr_range(stream, expr);
340 } else {
341 break;
342 }
343 }
344 expr
345}
346
347fn parse_num(stream: &mut TokenStream) -> usize {
348 let next = stream.next().unwrap();
349 if !next.chars().all(|c| c.is_ascii_digit()) {
350 stream.err(&format!("Expected number, got '{next}'"));
351 }
352 let result = next.parse().unwrap();
353 stream.pos += 1;
354 result
355}
356fn parse_expr_range(
357 stream: &mut TokenStream,
358 expr: Expr,
359) -> Expr {
360 let min = parse_num(stream);
361 let max = if stream.eat(",") {
362 if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
363 } else {
364 min as isize
365 };
366 if !stream.eat("}") {
367 stream.err("Unclosed braced range");
368 }
369 Expr::Range { min, max, expr: Box::new(expr) }
370}
371
372fn resolve_name(
373 stream: &TokenStream,
374 name: &str,
375) -> Vec<NodeDefinition> {
376 let types = &stream.node_types;
377 if let Some(type_) = types.get(name) {
378 return vec![type_.clone()];
379 }
380 let mut result = Vec::new();
381
382 for type_ in types.values() {
383 if type_.groups.contains(&name.to_string()) {
384 result.push(type_.clone());
385 }
386 }
387 if result.is_empty() {
388 stream.err(&format!("没找到类型 '{name}'"));
389 }
390 result
391}
392
393fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
394 if stream.eat("(") {
395 let expr = parse_expr(stream);
396 if !stream.eat(")") {
397 stream.err("Missing closing paren");
398 }
399 expr
400 } else if let Some(next) = stream.next() {
401 if next.chars().all(|c| c.is_alphanumeric() || c == '_') {
402 let exprs: Vec<Expr> = resolve_name(stream, next)
403 .into_iter()
404 .map(|type_| Expr::Name { value: Box::new(type_) })
405 .collect();
406 stream.pos += 1;
407 if exprs.len() == 1 {
408 exprs.into_iter().next().unwrap()
409 } else {
410 Expr::Choice { exprs }
411 }
412 } else {
413 stream.err(&format!("Unexpected token '{next}'"));
414 }
415 } else {
416 stream.err("Unexpected end of input");
417 }
418}
419#[derive(Debug, Clone)]
420pub struct Edge {
421 term: Option<NodeDefinition>,
422 to: Option<usize>,
423}
424fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
425 let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
426
427 fn explore(
428 states: Vec<usize>,
429 nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
430 labeled: &mut HashMap<String, ContentMatch>,
431 ) -> ContentMatch {
432 let mut out: Vec<(NodeDefinition, Vec<usize>)> = Vec::new();
433 for &node in &states {
434 for edge in &nfa[node] {
435 if edge.borrow().term.is_none() {
436 continue;
437 }
438 let term = edge.borrow().term.clone().unwrap();
439 let mut set: Option<&mut Vec<usize>> = None;
440
441 for (t, s) in &mut out {
442 if *t == term {
443 set = Some(s);
444 break;
445 }
446 }
447
448 if set.is_none() {
449 out.push((term.clone(), Vec::new()));
450 set = Some(&mut out.last_mut().unwrap().1);
451 }
452 for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
453 set.as_mut().unwrap().push(node);
454 }
455 }
456 }
457 let mut state = ContentMatch {
458 next: Vec::new(),
459 wrap_cache: vec![],
460 valid_end: states.contains(&(nfa.len() - 1)),
461 };
462
463 let state_key =
464 states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
465 labeled.insert(state_key.clone(), state.clone());
466
467 for (term, states) in out {
468 let states_key = states
469 .iter()
470 .map(|&x| x.to_string())
471 .collect::<Vec<_>>()
472 .join(",");
473 let next_state = labeled
474 .get(&states_key)
475 .cloned()
476 .unwrap_or_else(|| explore(states, nfa, labeled));
477 labeled.insert(states_key, next_state.clone());
478 state.next.push(MatchEdge { node_type: term, next: next_state });
479 }
480
481 state
482 }
483
484 explore(null_from(&nfa, 0), &nfa, &mut labeled)
485}
486
487pub fn null_from(
488 nfa: &[Vec<Rc<RefCell<Edge>>>],
489 node: usize,
490) -> Vec<usize> {
491 let mut result = Vec::new();
492 fn scan(
493 nfa: &[Vec<Rc<RefCell<Edge>>>],
494 node: usize,
495 result: &mut Vec<usize>,
496 ) {
497 let edges = &nfa[node];
498 if edges.len() == 1 && edges[0].borrow().term.is_none() {
499 if let Some(to) = edges[0].borrow().to {
500 scan(nfa, to, result);
501 }
502 return;
503 }
504 if !result.contains(&node) {
505 result.push(node);
506 }
507 for edge in edges {
508 if edge.borrow().term.is_none() {
509 if let Some(to) = edge.borrow().to {
510 if !result.contains(&to) {
511 scan(nfa, to, result);
512 }
513 }
514 }
515 }
516 }
517
518 scan(nfa, node, &mut result);
519 result.sort();
520 result
521}
522fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
523 let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
524 connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
525 nfa
526}
527fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
528 nfa.push(vec![]);
529 nfa.len() - 1
530}
531
532fn edge(
533 from: usize,
534 to: Option<usize>,
535 term: Option<NodeDefinition>,
536 nfa: &mut [Vec<Rc<RefCell<Edge>>>],
537) -> Rc<RefCell<Edge>> {
538 let edge =
539 Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
540 nfa[from].push(edge.clone());
541 edge.clone()
542}
543fn connect(
544 edges: &mut [Rc<RefCell<Edge>>],
545 to: usize,
546) {
547 for edge in edges {
548 edge.borrow_mut().to = Some(to);
549 }
550}
551fn compile(
552 expr: Expr,
553 from: usize,
554 nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
555) -> Vec<Rc<RefCell<Edge>>> {
556 match expr {
557 Expr::Choice { exprs } => exprs
558 .into_iter()
559 .flat_map(|expr| compile(expr, from, nfa))
560 .collect(),
561 Expr::Seq { exprs } => {
562 let mut cur = from;
563 let mut last_edges = Vec::new();
564 let exprs_len = exprs.len();
565
566 for (i, expr) in exprs.into_iter().enumerate() {
567 let next = if i == exprs_len - 1 { cur } else { node(nfa) };
568
569 let mut edges = compile(expr, cur, nfa);
570 if i < exprs_len - 1 {
571 connect(&mut edges, next);
572 cur = next;
573 } else {
574 last_edges = edges;
575 }
576 }
577
578 if last_edges.is_empty() {
579 vec![edge(cur, None, None, nfa)]
580 } else {
581 last_edges
582 }
583 },
584 Expr::Star { expr } => {
585 let loop_node = node(nfa);
586 edge(from, Some(loop_node), None, nfa);
587 let mut compiled_expr = compile(*expr, loop_node, nfa);
588 connect(&mut compiled_expr, loop_node);
589 vec![edge(loop_node, None, None, nfa)]
590 },
591 Expr::Plus { expr } => {
592 let loop_node = node(nfa);
593 connect(&mut compile(*expr.clone(), from, nfa), loop_node);
594 let mut compiled_expr = compile(*expr, loop_node, nfa);
595 connect(&mut compiled_expr, loop_node);
596 vec![edge(loop_node, None, None, nfa)]
597 },
598 Expr::Opt { expr } => {
599 let mut edges = vec![edge(from, None, None, nfa)];
600 edges.extend(compile(*expr, from, nfa));
601 edges
602 },
603 Expr::Range { expr, min, max } => {
604 let mut cur = from;
605 for _ in 0..min {
606 let next = node(nfa);
607 connect(&mut compile(*expr.clone(), cur, nfa), next);
608 cur = next;
609 }
610 if max == -1 {
611 connect(&mut compile(*expr, cur, nfa), cur);
612 } else {
613 for _ in min..max as usize {
614 let next = node(nfa);
615 edge(cur, Some(next), None, nfa);
616 connect(&mut compile(*expr.clone(), cur, nfa), next);
617 cur = next;
618 }
619 }
620 vec![edge(cur, None, None, nfa)]
621 },
622 Expr::Name { value } => {
623 vec![edge(from, None, Some((*value).clone()), nfa)]
624 },
625 }
626}