1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use crate::error::PoolResult;
7
8use super::node::Node;
9use super::node_definition::NodeDefinition;
10use super::schema::Schema;
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub struct MatchEdge {
13 pub node_type: NodeDefinition,
14 pub next: ContentMatch,
15}
16
17#[derive(Clone, PartialEq, Eq, Debug, Default)]
18pub struct ContentMatch {
19 pub next: Vec<MatchEdge>,
20 pub wrap_cache: Vec<Option<NodeDefinition>>,
21 pub valid_end: bool,
22}
23impl Ord for ContentMatch {
24 fn cmp(
25 &self,
26 other: &Self,
27 ) -> Ordering {
28 let _ = other;
29 Ordering::Equal
30 }
31}
32impl PartialOrd for ContentMatch {
33 fn partial_cmp(
34 &self,
35 other: &Self,
36 ) -> Option<Ordering> {
37 Some(self.cmp(other))
38 }
39}
40
41impl ContentMatch {
42 pub fn parse(
43 str: String,
44 nodes: &HashMap<String, NodeDefinition>,
45 ) -> ContentMatch {
46 let mut stream = TokenStream::new(str, nodes.clone());
47 if stream.next().is_none() {
48 return ContentMatch::empty();
49 }
50 let expr = parse_expr(&mut stream);
51
52 let arr = nfa(expr);
53
54 dfa(arr)
55 }
56 pub fn empty() -> Self {
57 ContentMatch {
58 next: Vec::new(),
59 wrap_cache: Vec::new(),
60 valid_end: true,
61 }
62 }
63
64 pub fn match_type(
65 &self,
66 node_type: &NodeDefinition,
67 ) -> Option<&ContentMatch> {
68 self.next
69 .iter()
70 .find(|edge| &edge.node_type == node_type)
71 .map(|edge| &edge.next)
72 }
73
74 pub fn match_fragment(
75 &self,
76 frag: &[Node],
77 schema: &Schema,
78 ) -> Option<&ContentMatch> {
79 let mut current: &ContentMatch = self;
80
81 for content in frag.iter() {
82 if let Some(next) =
83 current.match_type(schema.nodes.get(&content.r#type).unwrap())
84 {
85 current = next;
86 } else {
87 return None;
89 }
90 }
91 Some(current)
92 }
93
94 pub fn fill(
104 &self,
105 after: &Vec<Node>,
106 to_end: bool,
107 schema: &Schema,
108 ) -> Option<Vec<String>> {
109 let mut seen: Vec<ContentMatch> = Vec::new();
110 seen.push(self.clone());
111 fn search(
112 seen: &mut Vec<ContentMatch>,
113 to_end: bool,
114 after: &Vec<Node>,
115 match_: &ContentMatch,
116 types: &mut Vec<String>,
117 schema: &Schema,
118 ) -> Option<Vec<String>> {
119 if let Some(finished) = match_.match_fragment(after, schema) {
121 if finished.valid_end || !to_end {
122 return Some(types.clone());
123 }
124 } else if !after.is_empty() {
125 return None;
127 }
128
129 for edge in &match_.next {
131 if !seen.contains(&edge.next) {
132 seen.push(edge.next.clone());
133 types.push(edge.node_type.name.clone());
134 if let Some(found) =
135 search(seen, to_end, after, &edge.next, types, schema)
136 {
137 return Some(found);
138 }
139 types.pop();
140 }
141 }
142 None
143 }
144
145 search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146 }
147
148 pub fn default_type(&self) -> Option<&NodeDefinition> {
149 self.next
150 .iter()
151 .find(|edge| !edge.node_type.has_required_attrs())
152 .map(|edge| &edge.node_type)
153 }
154
155 pub fn compatible(
156 &self,
157 other: &ContentMatch,
158 ) -> bool {
159 for edge1 in &self.next {
160 for edge2 in &other.next {
161 if edge1.node_type == edge2.node_type {
162 return true;
163 }
164 }
165 }
166 false
167 }
168
169 pub fn edge_count(&self) -> usize {
170 self.next.len()
171 }
172
173 pub fn edge(
174 &self,
175 n: usize,
176 ) -> PoolResult<&MatchEdge> {
178 if n >= self.next.len() {
179 Err(anyhow::anyhow!(format!("{} 超出了 {}", n, self.next.len())))
180 } else {
181 Ok(&self.next[n])
182 }
183 }
184}
185impl fmt::Display for ContentMatch {
186 fn fmt(
187 &self,
188 f: &mut fmt::Formatter<'_>,
189 ) -> fmt::Result {
190 let mut seen = Vec::new();
191 fn scan(
192 m: &ContentMatch,
193 seen: &mut Vec<ContentMatch>,
194 ) {
195 seen.push(m.clone());
196 for edge in &m.next {
197 if !seen.iter().any(|s| s == &edge.next) {
198 scan(&edge.next, seen);
199 }
200 }
201 }
202 scan(self, &mut seen);
203
204 let str = seen
205 .iter()
206 .enumerate()
207 .map(|(i, m)| {
208 let mut out =
209 format!("{} ", if m.valid_end { i + 1 } else { i });
210 for (j, edge) in m.next.iter().enumerate() {
211 if j > 0 {
212 out.push_str(", ");
213 }
214 out.push_str(&format!(
215 "{}->{}",
216 edge.node_type.name,
217 seen.iter().position(|s| s == &edge.next).unwrap() + 1
218 ));
219 }
220 out
221 })
222 .collect::<Vec<_>>()
223 .join("\n");
224
225 write!(f, "{str}")
226 }
227}
228
229#[derive(Clone, PartialEq, Eq, Debug)]
230pub struct TokenStream {
231 pos: usize,
232 tokens: Vec<String>,
233 node_types: HashMap<String, NodeDefinition>,
234 string: String,
235}
236
237impl TokenStream {
238 pub fn new(
239 string: String,
240 node_types: HashMap<String, NodeDefinition>,
241 ) -> Self {
242 let mut tokens = Vec::new();
243 let mut current_token = String::new();
244 for c in string.chars() {
245 if c.is_whitespace() {
246 if !current_token.is_empty() {
248 tokens.push(current_token.clone());
249 current_token.clear(); }
251 } else if !c.is_alphanumeric() && c != '_' {
252 if !current_token.is_empty() {
254 tokens.push(current_token.clone());
255 current_token.clear(); }
257 tokens.push(c.to_string());
259 } else {
260 current_token.push(c);
262 }
263 }
264
265 if !current_token.is_empty() {
267 tokens.push(current_token);
268 }
269 TokenStream { pos: 0, tokens, node_types, string }
270 }
271
272 pub fn next(&self) -> Option<&str> {
273 self.tokens.get(self.pos).map(|s| s.as_str())
274 }
275
276 pub fn eat(
277 &mut self,
278 tok: &str,
279 ) -> bool {
280 if self.next() == Some(tok) {
281 self.pos += 1;
282 true
283 } else {
284 false
285 }
286 }
287
288 pub fn err(
289 &self,
290 msg: &str,
291 ) -> ! {
292 let token_index = self.pos.min(self.tokens.len().saturating_sub(1));
293 let current = self
294 .tokens
295 .get(self.pos)
296 .cloned()
297 .unwrap_or_else(|| "<结束>".into());
298 let start = self.pos.saturating_sub(3);
299 let end = (self.pos + 3).min(self.tokens.len());
300 let context: Vec<String> = (start..end)
301 .map(|idx| format!(r#"{}:"{}""#, idx, self.tokens[idx]))
302 .collect();
303
304 panic!(
305 "内容表达式解析失败: {}\n - 位置: token #{} (当前令牌: \"{}\")\n - 上下文: [{}]\n - 原始表达式: \"{}\"",
306 msg,
307 token_index,
308 current,
309 context.join(", "),
310 self.string.trim()
311 );
312 }
313}
314
315#[derive(Debug, Clone)]
316enum Expr {
317 Choice { exprs: Vec<Expr> },
318 Seq { exprs: Vec<Expr> },
319 Plus { expr: Box<Expr> },
320 Star { expr: Box<Expr> },
321 Opt { expr: Box<Expr> },
322 Range { min: usize, max: isize, expr: Box<Expr> },
323 Name { value: Box<NodeDefinition> },
324}
325fn parse_expr(stream: &mut TokenStream) -> Expr {
326 let mut exprs = Vec::new();
327
328 loop {
329 exprs.push(parse_expr_seq(stream));
330 if !stream.eat("|") {
331 break;
332 }
333 }
334 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
335}
336fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
337 let mut exprs = Vec::new();
338
339 while let Some(next) = stream.next() {
340 if next == ")" || next == "|" {
341 break;
342 }
343 exprs.push(parse_expr_subscript(stream));
344 }
345 if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
346}
347
348fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
349 let mut expr = parse_expr_atom(stream);
350 loop {
351 if stream.eat("+") {
352 expr = Expr::Plus { expr: Box::new(expr) };
353 } else if stream.eat("*") {
354 expr = Expr::Star { expr: Box::new(expr) };
355 } else if stream.eat("?") {
356 expr = Expr::Opt { expr: Box::new(expr) };
357 } else if stream.eat("{") {
358 expr = parse_expr_range(stream, expr);
359 } else {
360 break;
361 }
362 }
363 expr
364}
365
366fn parse_num(stream: &mut TokenStream) -> usize {
367 let next = match stream.next() {
368 Some(token) => token,
369 None => stream.err("需要一个数字,但内容表达式已经结束"),
370 };
371
372 if !next.chars().all(|c| c.is_ascii_digit()) {
373 stream.err(&format!(r#"需要一个数字,但遇到了 "{next}""#));
374 }
375
376 match next.parse::<usize>() {
377 Ok(value) => {
378 stream.pos += 1;
379 value
380 },
381 Err(_) => stream.err(&format!(r#"无法将 "{next}" 解析为数字"#)),
382 }
383}
384
385fn parse_expr_range(
386 stream: &mut TokenStream,
387 expr: Expr,
388) -> Expr {
389 let min = parse_num(stream);
390 let max = if stream.eat(",") {
391 if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
392 } else {
393 min as isize
394 };
395 if !stream.eat("}") {
396 stream.err(r#"范围量词缺少右大括号 "}""#);
397 }
398 Expr::Range { min, max, expr: Box::new(expr) }
399}
400
401fn resolve_name(
402 stream: &TokenStream,
403 name: &str,
404) -> Vec<NodeDefinition> {
405 let types = &stream.node_types;
406 if let Some(type_) = types.get(name) {
407 return vec![type_.clone()];
408 }
409 let mut result = Vec::new();
410
411 for type_ in types.values() {
412 if type_.groups.contains(&name.to_string()) {
413 result.push(type_.clone());
414 }
415 }
416 if result.is_empty() {
417 let mut available: Vec<&String> = stream.node_types.keys().collect();
418 available.sort();
419 let preview: Vec<String> =
420 available.iter().take(5).map(|name| (*name).clone()).collect();
421 let hint = if preview.is_empty() {
422 "当前 Schema 中未声明任何节点".to_string()
423 } else {
424 format!("可用的节点/分组示例: {}", preview.join(", "))
425 };
426 stream.err(&format!(
427 r#"无法在 Schema 中找到名称为 "{name}" 的节点或分组。{}"#,
428 hint
429 ));
430 }
431 result
432}
433
434fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
435 if stream.eat("(") {
436 let expr = parse_expr(stream);
437 if !stream.eat(")") {
438 stream.err(r#"缺少对应的右括号 ")""#);
439 }
440 expr
441 } else if let Some(next) = stream.next() {
442 if next.chars().all(|c| c.is_alphanumeric() || c == '_') {
443 let exprs: Vec<Expr> = resolve_name(stream, next)
444 .into_iter()
445 .map(|type_| Expr::Name { value: Box::new(type_) })
446 .collect();
447 stream.pos += 1;
448 if exprs.len() == 1 {
449 exprs.into_iter().next().unwrap()
450 } else {
451 Expr::Choice { exprs }
452 }
453 } else {
454 stream.err(&format!(r#"无法识别的符号 "{next}",请检查是否书写了正确的节点名称或分组"#));
455 }
456 } else {
457 stream.err("内容表达式意外结束,请检查括号与量词是否成对出现");
458 }
459}
460#[derive(Debug, Clone)]
461pub struct Edge {
462 term: Option<NodeDefinition>,
463 to: Option<usize>,
464}
465fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
466 let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
467
468 fn explore(
469 states: Vec<usize>,
470 nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
471 labeled: &mut HashMap<String, ContentMatch>,
472 ) -> ContentMatch {
473 let mut out: Vec<(NodeDefinition, Vec<usize>)> = Vec::new();
474 for &node in &states {
475 for edge in &nfa[node] {
476 if edge.borrow().term.is_none() {
477 continue;
478 }
479 let term = edge.borrow().term.clone().unwrap();
480 let mut set: Option<&mut Vec<usize>> = None;
481
482 for (t, s) in &mut out {
483 if *t == term {
484 set = Some(s);
485 break;
486 }
487 }
488
489 if set.is_none() {
490 out.push((term.clone(), Vec::new()));
491 set = Some(&mut out.last_mut().unwrap().1);
492 }
493 for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
494 set.as_mut().unwrap().push(node);
495 }
496 }
497 }
498 let mut state = ContentMatch {
499 next: Vec::new(),
500 wrap_cache: vec![],
501 valid_end: states.contains(&(nfa.len() - 1)),
502 };
503
504 let state_key =
505 states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
506 labeled.insert(state_key.clone(), state.clone());
507
508 for (term, states) in out {
509 let states_key = states
510 .iter()
511 .map(|&x| x.to_string())
512 .collect::<Vec<_>>()
513 .join(",");
514 let next_state = labeled
515 .get(&states_key)
516 .cloned()
517 .unwrap_or_else(|| explore(states, nfa, labeled));
518 labeled.insert(states_key, next_state.clone());
519 state.next.push(MatchEdge { node_type: term, next: next_state });
520 }
521
522 state
523 }
524
525 explore(null_from(&nfa, 0), &nfa, &mut labeled)
526}
527
528pub fn null_from(
529 nfa: &[Vec<Rc<RefCell<Edge>>>],
530 node: usize,
531) -> Vec<usize> {
532 let mut result = Vec::new();
533 fn scan(
534 nfa: &[Vec<Rc<RefCell<Edge>>>],
535 node: usize,
536 result: &mut Vec<usize>,
537 ) {
538 let edges = &nfa[node];
539 if edges.len() == 1 && edges[0].borrow().term.is_none() {
540 if let Some(to) = edges[0].borrow().to {
541 scan(nfa, to, result);
542 }
543 return;
544 }
545 if !result.contains(&node) {
546 result.push(node);
547 }
548 for edge in edges {
549 if edge.borrow().term.is_none() {
550 if let Some(to) = edge.borrow().to {
551 if !result.contains(&to) {
552 scan(nfa, to, result);
553 }
554 }
555 }
556 }
557 }
558
559 scan(nfa, node, &mut result);
560 result.sort();
561 result
562}
563fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
564 let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
565 connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
566 nfa
567}
568
569#[cfg(test)]
570mod tests {
571 use super::*;
572 use crate::node_definition::{NodeDefinition, NodeSpec};
573 use std::panic::{catch_unwind, UnwindSafe};
574
575 fn build_nodes() -> HashMap<String, NodeDefinition> {
576 let mut nodes = HashMap::new();
577 nodes.insert(
578 "doc".to_string(),
579 NodeDefinition::new("doc".to_string(), NodeSpec::default()),
580 );
581 nodes
582 }
583
584 fn panic_message<F, R>(f: F) -> String
585 where
586 F: FnOnce() -> R + UnwindSafe,
587 {
588 match catch_unwind(f) {
589 Ok(_) => panic!("expected panic"),
590 Err(err) => {
591 if let Some(s) = err.downcast_ref::<String>() {
592 s.clone()
593 } else if let Some(s) = err.downcast_ref::<&str>() {
594 (*s).to_string()
595 } else {
596 panic!("未预期的 panic 消息");
597 }
598 },
599 }
600 }
601
602 #[test]
603 fn range_missing_brace_reports_context() {
604 let nodes = build_nodes();
605 let msg =
606 panic_message(|| ContentMatch::parse("doc{".to_string(), &nodes));
607
608 assert!(msg.contains("内容表达式解析失败"), "actual: {msg}");
609 assert!(msg.contains("token #1"), "actual: {msg}");
610 assert!(msg.contains("doc{"), "actual: {msg}");
611 }
612
613 #[test]
614 fn unknown_node_suggests_available_names() {
615 let nodes = build_nodes();
616 let msg = panic_message(|| {
617 ContentMatch::parse("unknown".to_string(), &nodes)
618 });
619
620 assert!(msg.contains("无法在 Schema 中找到名称为"), "actual: {msg}");
621 assert!(msg.contains("可用的节点/分组示例"), "actual: {msg}");
622 }
623}
624fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
625 nfa.push(vec![]);
626 nfa.len() - 1
627}
628
629fn edge(
630 from: usize,
631 to: Option<usize>,
632 term: Option<NodeDefinition>,
633 nfa: &mut [Vec<Rc<RefCell<Edge>>>],
634) -> Rc<RefCell<Edge>> {
635 let edge =
636 Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
637 nfa[from].push(edge.clone());
638 edge.clone()
639}
640fn connect(
641 edges: &mut [Rc<RefCell<Edge>>],
642 to: usize,
643) {
644 for edge in edges {
645 edge.borrow_mut().to = Some(to);
646 }
647}
648fn compile(
649 expr: Expr,
650 from: usize,
651 nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
652) -> Vec<Rc<RefCell<Edge>>> {
653 match expr {
654 Expr::Choice { exprs } => exprs
655 .into_iter()
656 .flat_map(|expr| compile(expr, from, nfa))
657 .collect(),
658 Expr::Seq { exprs } => {
659 let mut cur = from;
660 let mut last_edges = Vec::new();
661 let exprs_len = exprs.len();
662
663 for (i, expr) in exprs.into_iter().enumerate() {
664 let next = if i == exprs_len - 1 { cur } else { node(nfa) };
665
666 let mut edges = compile(expr, cur, nfa);
667 if i < exprs_len - 1 {
668 connect(&mut edges, next);
669 cur = next;
670 } else {
671 last_edges = edges;
672 }
673 }
674
675 if last_edges.is_empty() {
676 vec![edge(cur, None, None, nfa)]
677 } else {
678 last_edges
679 }
680 },
681 Expr::Star { expr } => {
682 let loop_node = node(nfa);
683 edge(from, Some(loop_node), None, nfa);
684 let mut compiled_expr = compile(*expr, loop_node, nfa);
685 connect(&mut compiled_expr, loop_node);
686 vec![edge(loop_node, None, None, nfa)]
687 },
688 Expr::Plus { expr } => {
689 let loop_node = node(nfa);
690 connect(&mut compile(*expr.clone(), from, nfa), loop_node);
691 let mut compiled_expr = compile(*expr, loop_node, nfa);
692 connect(&mut compiled_expr, loop_node);
693 vec![edge(loop_node, None, None, nfa)]
694 },
695 Expr::Opt { expr } => {
696 let mut edges = vec![edge(from, None, None, nfa)];
697 edges.extend(compile(*expr, from, nfa));
698 edges
699 },
700 Expr::Range { expr, min, max } => {
701 let mut cur = from;
702 for _ in 0..min {
703 let next = node(nfa);
704 connect(&mut compile(*expr.clone(), cur, nfa), next);
705 cur = next;
706 }
707 if max == -1 {
708 connect(&mut compile(*expr, cur, nfa), cur);
709 } else {
710 for _ in min..max as usize {
711 let next = node(nfa);
712 edge(cur, Some(next), None, nfa);
713 connect(&mut compile(*expr.clone(), cur, nfa), next);
714 cur = next;
715 }
716 }
717 vec![edge(cur, None, None, nfa)]
718 },
719 Expr::Name { value } => {
720 vec![edge(from, None, Some((*value).clone()), nfa)]
721 },
722 }
723}