mf_model/
content.rs

1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use crate::error::PoolResult;
7
8use super::node::Node;
9use super::node_definition::NodeDefinition;
10use super::schema::Schema;
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub struct MatchEdge {
13    pub node_type: NodeDefinition,
14    pub next: ContentMatch,
15}
16
17#[derive(Clone, PartialEq, Eq, Debug, Default)]
18pub struct ContentMatch {
19    pub next: Vec<MatchEdge>,
20    pub wrap_cache: Vec<Option<NodeDefinition>>,
21    pub valid_end: bool,
22}
23impl Ord for ContentMatch {
24    fn cmp(
25        &self,
26        other: &Self,
27    ) -> Ordering {
28        let _ = other;
29        Ordering::Equal
30    }
31}
32impl PartialOrd for ContentMatch {
33    fn partial_cmp(
34        &self,
35        other: &Self,
36    ) -> Option<Ordering> {
37        Some(self.cmp(other))
38    }
39}
40
41impl ContentMatch {
42    pub fn parse(
43        str: String,
44        nodes: &HashMap<String, NodeDefinition>,
45    ) -> ContentMatch {
46        let mut stream = TokenStream::new(str, nodes.clone());
47        if stream.next().is_none() {
48            return ContentMatch::empty();
49        }
50        let expr = parse_expr(&mut stream);
51
52        let arr = nfa(expr);
53
54        dfa(arr)
55    }
56    pub fn empty() -> Self {
57        ContentMatch {
58            next: Vec::new(),
59            wrap_cache: Vec::new(),
60            valid_end: true,
61        }
62    }
63
64    pub fn match_type(
65        &self,
66        node_type: &NodeDefinition,
67    ) -> Option<&ContentMatch> {
68        self.next
69            .iter()
70            .find(|edge| &edge.node_type == node_type)
71            .map(|edge| &edge.next)
72    }
73
74    pub fn match_fragment(
75        &self,
76        frag: &[Node],
77        schema: &Schema,
78    ) -> Option<&ContentMatch> {
79        let mut current: &ContentMatch = self;
80
81        for content in frag.iter() {
82            if let Some(next) =
83                current.match_type(schema.nodes.get(&content.r#type).unwrap())
84            {
85                current = next;
86            } else {
87                // 如果无法匹配某个节点类型,返回 None 表示匹配失败
88                return None;
89            }
90        }
91        Some(current)
92    }
93
94    /// 根据内容匹配规则推导需要的节点类型
95    ///
96    /// # 参数
97    /// - `after`: 待匹配的节点列表
98    /// - `to_end`: 是否需要匹配到结束状态
99    /// - `schema`: 当前使用的文档模式
100    ///
101    /// # 返回值
102    /// 返回需要的节点类型名称列表,如果无法匹配则返回None
103    pub fn fill(
104        &self,
105        after: &Vec<Node>,
106        to_end: bool,
107        schema: &Schema,
108    ) -> Option<Vec<String>> {
109        let mut seen: Vec<ContentMatch> = Vec::new();
110        seen.push(self.clone());
111        fn search(
112            seen: &mut Vec<ContentMatch>,
113            to_end: bool,
114            after: &Vec<Node>,
115            match_: &ContentMatch,
116            types: &mut Vec<String>,
117            schema: &Schema,
118        ) -> Option<Vec<String>> {
119            // 首先检查是否可以匹配当前片段
120            if let Some(finished) = match_.match_fragment(after, schema) {
121                if finished.valid_end || !to_end {
122                    return Some(types.clone());
123                }
124            } else if !after.is_empty() {
125                // 如果 after 不为空但无法匹配,直接返回 None
126                return None;
127            }
128
129            // 然后尝试按顺序匹配每个边
130            for edge in &match_.next {
131                if !seen.contains(&edge.next) {
132                    seen.push(edge.next.clone());
133                    types.push(edge.node_type.name.clone());
134                    if let Some(found) =
135                        search(seen, to_end, after, &edge.next, types, schema)
136                    {
137                        return Some(found);
138                    }
139                    types.pop();
140                }
141            }
142            None
143        }
144
145        search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146    }
147
148    pub fn default_type(&self) -> Option<&NodeDefinition> {
149        self.next
150            .iter()
151            .find(|edge| !edge.node_type.has_required_attrs())
152            .map(|edge| &edge.node_type)
153    }
154
155    pub fn compatible(
156        &self,
157        other: &ContentMatch,
158    ) -> bool {
159        for edge1 in &self.next {
160            for edge2 in &other.next {
161                if edge1.node_type == edge2.node_type {
162                    return true;
163                }
164            }
165        }
166        false
167    }
168
169    pub fn edge_count(&self) -> usize {
170        self.next.len()
171    }
172
173    pub fn edge(
174        &self,
175        n: usize,
176        // 根据错误提示,PoolResult 类型别名可能只接受一个泛型参数,这里修改为只传递一个泛型参数
177    ) -> PoolResult<&MatchEdge> {
178        if n >= self.next.len() {
179            Err(anyhow::anyhow!(format!("{} 超出了 {}", n, self.next.len())))
180        } else {
181            Ok(&self.next[n])
182        }
183    }
184}
185impl fmt::Display for ContentMatch {
186    fn fmt(
187        &self,
188        f: &mut fmt::Formatter<'_>,
189    ) -> fmt::Result {
190        let mut seen = Vec::new();
191        fn scan(
192            m: &ContentMatch,
193            seen: &mut Vec<ContentMatch>,
194        ) {
195            seen.push(m.clone());
196            for edge in &m.next {
197                if !seen.iter().any(|s| s == &edge.next) {
198                    scan(&edge.next, seen);
199                }
200            }
201        }
202        scan(self, &mut seen);
203
204        let str = seen
205            .iter()
206            .enumerate()
207            .map(|(i, m)| {
208                let mut out =
209                    format!("{} ", if m.valid_end { i + 1 } else { i });
210                for (j, edge) in m.next.iter().enumerate() {
211                    if j > 0 {
212                        out.push_str(", ");
213                    }
214                    out.push_str(&format!(
215                        "{}->{}",
216                        edge.node_type.name,
217                        seen.iter().position(|s| s == &edge.next).unwrap() + 1
218                    ));
219                }
220                out
221            })
222            .collect::<Vec<_>>()
223            .join("\n");
224
225        write!(f, "{str}")
226    }
227}
228
229#[derive(Clone, PartialEq, Eq, Debug)]
230pub struct TokenStream {
231    pos: usize,
232    tokens: Vec<String>,
233    node_types: HashMap<String, NodeDefinition>,
234    string: String,
235}
236
237impl TokenStream {
238    pub fn new(
239        string: String,
240        node_types: HashMap<String, NodeDefinition>,
241    ) -> Self {
242        let mut tokens = Vec::new();
243        let mut current_token = String::new();
244        for c in string.chars() {
245            if c.is_whitespace() {
246                // 如果当前字符是空白字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
247                if !current_token.is_empty() {
248                    tokens.push(current_token.clone());
249                    current_token.clear(); // 清空当前令牌
250                }
251            } else if !c.is_alphanumeric() && c != '_' {
252                // 如果当前字符是非字母数字字符(不包括下划线),且当前令牌不为空,则将当前令牌添加到令牌列表中
253                if !current_token.is_empty() {
254                    tokens.push(current_token.clone());
255                    current_token.clear(); // 清空当前令牌
256                }
257                // 将非字母数字字符作为单独的令牌添加到列表中
258                tokens.push(c.to_string());
259            } else {
260                // 如果当前字符是字母数字字符或下划线,则将其添加到当前令牌中
261                current_token.push(c);
262            }
263        }
264
265        // 如果最后一个令牌不为空,则将其添加到令牌列表中
266        if !current_token.is_empty() {
267            tokens.push(current_token);
268        }
269        TokenStream { pos: 0, tokens, node_types, string }
270    }
271
272    pub fn next(&self) -> Option<&str> {
273        self.tokens.get(self.pos).map(|s| s.as_str())
274    }
275
276    pub fn eat(
277        &mut self,
278        tok: &str,
279    ) -> bool {
280        if self.next() == Some(tok) {
281            self.pos += 1;
282            true
283        } else {
284            false
285        }
286    }
287
288    pub fn err(
289        &self,
290        msg: &str,
291    ) -> ! {
292        let token_index = self.pos.min(self.tokens.len().saturating_sub(1));
293        let current = self
294            .tokens
295            .get(self.pos)
296            .cloned()
297            .unwrap_or_else(|| "<结束>".into());
298        let start = self.pos.saturating_sub(3);
299        let end = (self.pos + 3).min(self.tokens.len());
300        let context: Vec<String> = (start..end)
301            .map(|idx| format!(r#"{}:"{}""#, idx, self.tokens[idx]))
302            .collect();
303
304        panic!(
305            "内容表达式解析失败: {}\n  - 位置: token #{} (当前令牌: \"{}\")\n  - 上下文: [{}]\n  - 原始表达式: \"{}\"",
306            msg,
307            token_index,
308            current,
309            context.join(", "),
310            self.string.trim()
311        );
312    }
313}
314
315#[derive(Debug, Clone)]
316enum Expr {
317    Choice { exprs: Vec<Expr> },
318    Seq { exprs: Vec<Expr> },
319    Plus { expr: Box<Expr> },
320    Star { expr: Box<Expr> },
321    Opt { expr: Box<Expr> },
322    Range { min: usize, max: isize, expr: Box<Expr> },
323    Name { value: Box<NodeDefinition> },
324}
325fn parse_expr(stream: &mut TokenStream) -> Expr {
326    let mut exprs = Vec::new();
327
328    loop {
329        exprs.push(parse_expr_seq(stream));
330        if !stream.eat("|") {
331            break;
332        }
333    }
334    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
335}
336fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
337    let mut exprs = Vec::new();
338
339    while let Some(next) = stream.next() {
340        if next == ")" || next == "|" {
341            break;
342        }
343        exprs.push(parse_expr_subscript(stream));
344    }
345    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
346}
347
348fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
349    let mut expr = parse_expr_atom(stream);
350    loop {
351        if stream.eat("+") {
352            expr = Expr::Plus { expr: Box::new(expr) };
353        } else if stream.eat("*") {
354            expr = Expr::Star { expr: Box::new(expr) };
355        } else if stream.eat("?") {
356            expr = Expr::Opt { expr: Box::new(expr) };
357        } else if stream.eat("{") {
358            expr = parse_expr_range(stream, expr);
359        } else {
360            break;
361        }
362    }
363    expr
364}
365
366fn parse_num(stream: &mut TokenStream) -> usize {
367    let next = match stream.next() {
368        Some(token) => token,
369        None => stream.err("需要一个数字,但内容表达式已经结束"),
370    };
371
372    if !next.chars().all(|c| c.is_ascii_digit()) {
373        stream.err(&format!(r#"需要一个数字,但遇到了 "{next}""#));
374    }
375
376    match next.parse::<usize>() {
377        Ok(value) => {
378            stream.pos += 1;
379            value
380        },
381        Err(_) => stream.err(&format!(r#"无法将 "{next}" 解析为数字"#)),
382    }
383}
384
385fn parse_expr_range(
386    stream: &mut TokenStream,
387    expr: Expr,
388) -> Expr {
389    let min = parse_num(stream);
390    let max = if stream.eat(",") {
391        if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
392    } else {
393        min as isize
394    };
395    if !stream.eat("}") {
396        stream.err(r#"范围量词缺少右大括号 "}""#);
397    }
398    Expr::Range { min, max, expr: Box::new(expr) }
399}
400
401fn resolve_name(
402    stream: &TokenStream,
403    name: &str,
404) -> Vec<NodeDefinition> {
405    let types = &stream.node_types;
406    if let Some(type_) = types.get(name) {
407        return vec![type_.clone()];
408    }
409    let mut result = Vec::new();
410
411    for type_ in types.values() {
412        if type_.groups.contains(&name.to_string()) {
413            result.push(type_.clone());
414        }
415    }
416    if result.is_empty() {
417        let mut available: Vec<&String> = stream.node_types.keys().collect();
418        available.sort();
419        let preview: Vec<String> =
420            available.iter().take(5).map(|name| (*name).clone()).collect();
421        let hint = if preview.is_empty() {
422            "当前 Schema 中未声明任何节点".to_string()
423        } else {
424            format!("可用的节点/分组示例: {}", preview.join(", "))
425        };
426        stream.err(&format!(
427            r#"无法在 Schema 中找到名称为 "{name}" 的节点或分组。{}"#,
428            hint
429        ));
430    }
431    result
432}
433
434fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
435    if stream.eat("(") {
436        let expr = parse_expr(stream);
437        if !stream.eat(")") {
438            stream.err(r#"缺少对应的右括号 ")""#);
439        }
440        expr
441    } else if let Some(next) = stream.next() {
442        if next.chars().all(|c| c.is_alphanumeric() || c == '_') {
443            let exprs: Vec<Expr> = resolve_name(stream, next)
444                .into_iter()
445                .map(|type_| Expr::Name { value: Box::new(type_) })
446                .collect();
447            stream.pos += 1;
448            if exprs.len() == 1 {
449                exprs.into_iter().next().unwrap()
450            } else {
451                Expr::Choice { exprs }
452            }
453        } else {
454            stream.err(&format!(r#"无法识别的符号 "{next}",请检查是否书写了正确的节点名称或分组"#));
455        }
456    } else {
457        stream.err("内容表达式意外结束,请检查括号与量词是否成对出现");
458    }
459}
460#[derive(Debug, Clone)]
461pub struct Edge {
462    term: Option<NodeDefinition>,
463    to: Option<usize>,
464}
465fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
466    let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
467
468    fn explore(
469        states: Vec<usize>,
470        nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
471        labeled: &mut HashMap<String, ContentMatch>,
472    ) -> ContentMatch {
473        let mut out: Vec<(NodeDefinition, Vec<usize>)> = Vec::new();
474        for &node in &states {
475            for edge in &nfa[node] {
476                if edge.borrow().term.is_none() {
477                    continue;
478                }
479                let term = edge.borrow().term.clone().unwrap();
480                let mut set: Option<&mut Vec<usize>> = None;
481
482                for (t, s) in &mut out {
483                    if *t == term {
484                        set = Some(s);
485                        break;
486                    }
487                }
488
489                if set.is_none() {
490                    out.push((term.clone(), Vec::new()));
491                    set = Some(&mut out.last_mut().unwrap().1);
492                }
493                for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
494                    set.as_mut().unwrap().push(node);
495                }
496            }
497        }
498        let mut state = ContentMatch {
499            next: Vec::new(),
500            wrap_cache: vec![],
501            valid_end: states.contains(&(nfa.len() - 1)),
502        };
503
504        let state_key =
505            states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
506        labeled.insert(state_key.clone(), state.clone());
507
508        for (term, states) in out {
509            let states_key = states
510                .iter()
511                .map(|&x| x.to_string())
512                .collect::<Vec<_>>()
513                .join(",");
514            let next_state = labeled
515                .get(&states_key)
516                .cloned()
517                .unwrap_or_else(|| explore(states, nfa, labeled));
518            labeled.insert(states_key, next_state.clone());
519            state.next.push(MatchEdge { node_type: term, next: next_state });
520        }
521
522        state
523    }
524
525    explore(null_from(&nfa, 0), &nfa, &mut labeled)
526}
527
528pub fn null_from(
529    nfa: &[Vec<Rc<RefCell<Edge>>>],
530    node: usize,
531) -> Vec<usize> {
532    let mut result = Vec::new();
533    fn scan(
534        nfa: &[Vec<Rc<RefCell<Edge>>>],
535        node: usize,
536        result: &mut Vec<usize>,
537    ) {
538        let edges = &nfa[node];
539        if edges.len() == 1 && edges[0].borrow().term.is_none() {
540            if let Some(to) = edges[0].borrow().to {
541                scan(nfa, to, result);
542            }
543            return;
544        }
545        if !result.contains(&node) {
546            result.push(node);
547        }
548        for edge in edges {
549            if edge.borrow().term.is_none() {
550                if let Some(to) = edge.borrow().to {
551                    if !result.contains(&to) {
552                        scan(nfa, to, result);
553                    }
554                }
555            }
556        }
557    }
558
559    scan(nfa, node, &mut result);
560    result.sort();
561    result
562}
563fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
564    let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
565    connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
566    nfa
567}
568
569#[cfg(test)]
570mod tests {
571    use super::*;
572    use crate::node_definition::{NodeDefinition, NodeSpec};
573    use std::panic::{catch_unwind, UnwindSafe};
574
575    fn build_nodes() -> HashMap<String, NodeDefinition> {
576        let mut nodes = HashMap::new();
577        nodes.insert(
578            "doc".to_string(),
579            NodeDefinition::new("doc".to_string(), NodeSpec::default()),
580        );
581        nodes
582    }
583
584    fn panic_message<F, R>(f: F) -> String
585    where
586        F: FnOnce() -> R + UnwindSafe,
587    {
588        match catch_unwind(f) {
589            Ok(_) => panic!("expected panic"),
590            Err(err) => {
591                if let Some(s) = err.downcast_ref::<String>() {
592                    s.clone()
593                } else if let Some(s) = err.downcast_ref::<&str>() {
594                    (*s).to_string()
595                } else {
596                    panic!("未预期的 panic 消息");
597                }
598            },
599        }
600    }
601
602    #[test]
603    fn range_missing_brace_reports_context() {
604        let nodes = build_nodes();
605        let msg =
606            panic_message(|| ContentMatch::parse("doc{".to_string(), &nodes));
607
608        assert!(msg.contains("内容表达式解析失败"), "actual: {msg}");
609        assert!(msg.contains("token #1"), "actual: {msg}");
610        assert!(msg.contains("doc{"), "actual: {msg}");
611    }
612
613    #[test]
614    fn unknown_node_suggests_available_names() {
615        let nodes = build_nodes();
616        let msg = panic_message(|| {
617            ContentMatch::parse("unknown".to_string(), &nodes)
618        });
619
620        assert!(msg.contains("无法在 Schema 中找到名称为"), "actual: {msg}");
621        assert!(msg.contains("可用的节点/分组示例"), "actual: {msg}");
622    }
623}
624fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
625    nfa.push(vec![]);
626    nfa.len() - 1
627}
628
629fn edge(
630    from: usize,
631    to: Option<usize>,
632    term: Option<NodeDefinition>,
633    nfa: &mut [Vec<Rc<RefCell<Edge>>>],
634) -> Rc<RefCell<Edge>> {
635    let edge =
636        Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
637    nfa[from].push(edge.clone());
638    edge.clone()
639}
640fn connect(
641    edges: &mut [Rc<RefCell<Edge>>],
642    to: usize,
643) {
644    for edge in edges {
645        edge.borrow_mut().to = Some(to);
646    }
647}
648fn compile(
649    expr: Expr,
650    from: usize,
651    nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
652) -> Vec<Rc<RefCell<Edge>>> {
653    match expr {
654        Expr::Choice { exprs } => exprs
655            .into_iter()
656            .flat_map(|expr| compile(expr, from, nfa))
657            .collect(),
658        Expr::Seq { exprs } => {
659            let mut cur = from;
660            let mut last_edges = Vec::new();
661            let exprs_len = exprs.len();
662
663            for (i, expr) in exprs.into_iter().enumerate() {
664                let next = if i == exprs_len - 1 { cur } else { node(nfa) };
665
666                let mut edges = compile(expr, cur, nfa);
667                if i < exprs_len - 1 {
668                    connect(&mut edges, next);
669                    cur = next;
670                } else {
671                    last_edges = edges;
672                }
673            }
674
675            if last_edges.is_empty() {
676                vec![edge(cur, None, None, nfa)]
677            } else {
678                last_edges
679            }
680        },
681        Expr::Star { expr } => {
682            let loop_node = node(nfa);
683            edge(from, Some(loop_node), None, nfa);
684            let mut compiled_expr = compile(*expr, loop_node, nfa);
685            connect(&mut compiled_expr, loop_node);
686            vec![edge(loop_node, None, None, nfa)]
687        },
688        Expr::Plus { expr } => {
689            let loop_node = node(nfa);
690            connect(&mut compile(*expr.clone(), from, nfa), loop_node);
691            let mut compiled_expr = compile(*expr, loop_node, nfa);
692            connect(&mut compiled_expr, loop_node);
693            vec![edge(loop_node, None, None, nfa)]
694        },
695        Expr::Opt { expr } => {
696            let mut edges = vec![edge(from, None, None, nfa)];
697            edges.extend(compile(*expr, from, nfa));
698            edges
699        },
700        Expr::Range { expr, min, max } => {
701            let mut cur = from;
702            for _ in 0..min {
703                let next = node(nfa);
704                connect(&mut compile(*expr.clone(), cur, nfa), next);
705                cur = next;
706            }
707            if max == -1 {
708                connect(&mut compile(*expr, cur, nfa), cur);
709            } else {
710                for _ in min..max as usize {
711                    let next = node(nfa);
712                    edge(cur, Some(next), None, nfa);
713                    connect(&mut compile(*expr.clone(), cur, nfa), next);
714                    cur = next;
715                }
716            }
717            vec![edge(cur, None, None, nfa)]
718        },
719        Expr::Name { value } => {
720            vec![edge(from, None, Some((*value).clone()), nfa)]
721        },
722    }
723}