mf_model/
content.rs

1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use crate::error::PoolResult;
7
8use super::node::Node;
9use super::node_definition::NodeDefinition;
10use super::schema::Schema;
11#[derive(Clone, PartialEq, Eq, Debug)]
12pub struct MatchEdge {
13    pub node_type: NodeDefinition,
14    pub next: ContentMatch,
15}
16
17#[derive(Clone, PartialEq, Eq, Debug, Default)]
18pub struct ContentMatch {
19    pub next: Vec<MatchEdge>,
20    pub wrap_cache: Vec<Option<NodeDefinition>>,
21    pub valid_end: bool,
22}
23impl Ord for ContentMatch {
24    fn cmp(
25        &self,
26        other: &Self,
27    ) -> Ordering {
28        let _ = other;
29        Ordering::Equal
30    }
31}
32impl PartialOrd for ContentMatch {
33    fn partial_cmp(
34        &self,
35        other: &Self,
36    ) -> Option<Ordering> {
37        Some(self.cmp(other))
38    }
39}
40
41impl ContentMatch {
42    pub fn parse(
43        str: String,
44        nodes: &HashMap<String, NodeDefinition>,
45    ) -> ContentMatch {
46        let mut stream = TokenStream::new(str, nodes.clone());
47        if stream.next().is_none() {
48            return ContentMatch::empty();
49        }
50        let expr = parse_expr(&mut stream);
51
52        let arr = nfa(expr);
53
54        dfa(arr)
55    }
56    pub fn empty() -> Self {
57        ContentMatch {
58            next: Vec::new(),
59            wrap_cache: Vec::new(),
60            valid_end: true,
61        }
62    }
63
64    pub fn match_type(
65        &self,
66        node_type: &NodeDefinition,
67    ) -> Option<&ContentMatch> {
68        self.next
69            .iter()
70            .find(|edge| &edge.node_type == node_type)
71            .map(|edge| &edge.next)
72    }
73
74    pub fn match_fragment(
75        &self,
76        frag: &[Node],
77        schema: &Schema,
78    ) -> Option<&ContentMatch> {
79        let mut current: &ContentMatch = self;
80
81        for content in frag.iter() {
82            if let Some(next) =
83                current.match_type(schema.nodes.get(&content.r#type).unwrap())
84            {
85                current = next;
86            } else {
87                // 如果无法匹配某个节点类型,返回 None 表示匹配失败
88                return None;
89            }
90        }
91        Some(current)
92    }
93
94    /// 根据内容匹配规则推导需要的节点类型
95    ///
96    /// # 参数
97    /// - `after`: 待匹配的节点列表
98    /// - `to_end`: 是否需要匹配到结束状态
99    /// - `schema`: 当前使用的文档模式
100    ///
101    /// # 返回值
102    /// 返回需要的节点类型名称列表,如果无法匹配则返回None
103    pub fn fill(
104        &self,
105        after: &Vec<Node>,
106        to_end: bool,
107        schema: &Schema,
108    ) -> Option<Vec<String>> {
109        let mut seen: Vec<ContentMatch> = Vec::new();
110        seen.push(self.clone());
111        fn search(
112            seen: &mut Vec<ContentMatch>,
113            to_end: bool,
114            after: &Vec<Node>,
115            match_: &ContentMatch,
116            types: &mut Vec<String>,
117            schema: &Schema,
118        ) -> Option<Vec<String>> {
119            // 首先检查是否可以匹配当前片段
120            if let Some(finished) = match_.match_fragment(after, schema) {
121                if finished.valid_end || !to_end {
122                    return Some(types.clone());
123                }
124            } else if !after.is_empty() {
125                // 如果 after 不为空但无法匹配,直接返回 None
126                return None;
127            }
128
129            // 然后尝试按顺序匹配每个边
130            for edge in &match_.next {
131                if !seen.contains(&edge.next) {
132                    seen.push(edge.next.clone());
133                    types.push(edge.node_type.name.clone());
134                    if let Some(found) =
135                        search(seen, to_end, after, &edge.next, types, schema)
136                    {
137                        return Some(found);
138                    }
139                    types.pop();
140                }
141            }
142            None
143        }
144
145        search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146    }
147
148    pub fn default_type(&self) -> Option<&NodeDefinition> {
149        self.next
150            .iter()
151            .find(|edge| !edge.node_type.has_required_attrs())
152            .map(|edge| &edge.node_type)
153    }
154
155    pub fn compatible(
156        &self,
157        other: &ContentMatch,
158    ) -> bool {
159        for edge1 in &self.next {
160            for edge2 in &other.next {
161                if edge1.node_type == edge2.node_type {
162                    return true;
163                }
164            }
165        }
166        false
167    }
168
169    pub fn edge_count(&self) -> usize {
170        self.next.len()
171    }
172
173    pub fn edge(
174        &self,
175        n: usize,
176        // 根据错误提示,PoolResult 类型别名可能只接受一个泛型参数,这里修改为只传递一个泛型参数
177    ) -> PoolResult<&MatchEdge> {
178        if n >= self.next.len() {
179            Err(anyhow::anyhow!(format!("{} 超出了 {}", n, self.next.len())))
180        } else {
181            Ok(&self.next[n])
182        }
183    }
184}
185impl fmt::Display for ContentMatch {
186    fn fmt(
187        &self,
188        f: &mut fmt::Formatter<'_>,
189    ) -> fmt::Result {
190        let mut seen = Vec::new();
191        fn scan(
192            m: &ContentMatch,
193            seen: &mut Vec<ContentMatch>,
194        ) {
195            seen.push(m.clone());
196            for edge in &m.next {
197                if !seen.iter().any(|s| s == &edge.next) {
198                    scan(&edge.next, seen);
199                }
200            }
201        }
202        scan(self, &mut seen);
203
204        let str = seen
205            .iter()
206            .enumerate()
207            .map(|(i, m)| {
208                let mut out =
209                    format!("{} ", if m.valid_end { i + 1 } else { i });
210                for (j, edge) in m.next.iter().enumerate() {
211                    if j > 0 {
212                        out.push_str(", ");
213                    }
214                    out.push_str(&format!(
215                        "{}->{}",
216                        edge.node_type.name,
217                        seen.iter().position(|s| s == &edge.next).unwrap() + 1
218                    ));
219                }
220                out
221            })
222            .collect::<Vec<_>>()
223            .join("\n");
224
225        write!(f, "{str}")
226    }
227}
228
229#[derive(Clone, PartialEq, Eq, Debug)]
230pub struct TokenStream {
231    pos: usize,
232    tokens: Vec<String>,
233    node_types: HashMap<String, NodeDefinition>,
234    string: String,
235}
236
237impl TokenStream {
238    pub fn new(
239        string: String,
240        node_types: HashMap<String, NodeDefinition>,
241    ) -> Self {
242        let mut tokens = Vec::new();
243        let mut current_token = String::new();
244        for c in string.chars() {
245            if c.is_whitespace() {
246                // 如果当前字符是空白字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
247                if !current_token.is_empty() {
248                    tokens.push(current_token.clone());
249                    current_token.clear(); // 清空当前令牌
250                }
251            } else if !c.is_alphanumeric() && c != '_' {
252                // 如果当前字符是非字母数字字符(不包括下划线),且当前令牌不为空,则将当前令牌添加到令牌列表中
253                if !current_token.is_empty() {
254                    tokens.push(current_token.clone());
255                    current_token.clear(); // 清空当前令牌
256                }
257                // 将非字母数字字符作为单独的令牌添加到列表中
258                tokens.push(c.to_string());
259            } else {
260                // 如果当前字符是字母数字字符或下划线,则将其添加到当前令牌中
261                current_token.push(c);
262            }
263        }
264
265        // 如果最后一个令牌不为空,则将其添加到令牌列表中
266        if !current_token.is_empty() {
267            tokens.push(current_token);
268        }
269        TokenStream { pos: 0, tokens, node_types, string }
270    }
271
272    pub fn next(&self) -> Option<&str> {
273        self.tokens.get(self.pos).map(|s| s.as_str())
274    }
275
276    pub fn eat(
277        &mut self,
278        tok: &str,
279    ) -> bool {
280        if self.next() == Some(tok) {
281            self.pos += 1;
282            true
283        } else {
284            false
285        }
286    }
287
288    pub fn err(
289        &self,
290        str: &str,
291    ) -> ! {
292        panic!("{} (约束必须是 '{}')", str, self.string);
293    }
294}
295
296#[derive(Debug, Clone)]
297enum Expr {
298    Choice { exprs: Vec<Expr> },
299    Seq { exprs: Vec<Expr> },
300    Plus { expr: Box<Expr> },
301    Star { expr: Box<Expr> },
302    Opt { expr: Box<Expr> },
303    Range { min: usize, max: isize, expr: Box<Expr> },
304    Name { value: Box<NodeDefinition> },
305}
306fn parse_expr(stream: &mut TokenStream) -> Expr {
307    let mut exprs = Vec::new();
308
309    loop {
310        exprs.push(parse_expr_seq(stream));
311        if !stream.eat("|") {
312            break;
313        }
314    }
315    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
316}
317fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
318    let mut exprs = Vec::new();
319
320    while let Some(next) = stream.next() {
321        if next == ")" || next == "|" {
322            break;
323        }
324        exprs.push(parse_expr_subscript(stream));
325    }
326    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
327}
328
329fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
330    let mut expr = parse_expr_atom(stream);
331    loop {
332        if stream.eat("+") {
333            expr = Expr::Plus { expr: Box::new(expr) };
334        } else if stream.eat("*") {
335            expr = Expr::Star { expr: Box::new(expr) };
336        } else if stream.eat("?") {
337            expr = Expr::Opt { expr: Box::new(expr) };
338        } else if stream.eat("{") {
339            expr = parse_expr_range(stream, expr);
340        } else {
341            break;
342        }
343    }
344    expr
345}
346
347fn parse_num(stream: &mut TokenStream) -> usize {
348    let next = stream.next().unwrap();
349    if !next.chars().all(|c| c.is_ascii_digit()) {
350        stream.err(&format!("Expected number, got '{next}'"));
351    }
352    let result = next.parse().unwrap();
353    stream.pos += 1;
354    result
355}
356fn parse_expr_range(
357    stream: &mut TokenStream,
358    expr: Expr,
359) -> Expr {
360    let min = parse_num(stream);
361    let max = if stream.eat(",") {
362        if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
363    } else {
364        min as isize
365    };
366    if !stream.eat("}") {
367        stream.err("Unclosed braced range");
368    }
369    Expr::Range { min, max, expr: Box::new(expr) }
370}
371
372fn resolve_name(
373    stream: &TokenStream,
374    name: &str,
375) -> Vec<NodeDefinition> {
376    let types = &stream.node_types;
377    if let Some(type_) = types.get(name) {
378        return vec![type_.clone()];
379    }
380    let mut result = Vec::new();
381
382    for type_ in types.values() {
383        if type_.groups.contains(&name.to_string()) {
384            result.push(type_.clone());
385        }
386    }
387    if result.is_empty() {
388        stream.err(&format!("没找到类型 '{name}'"));
389    }
390    result
391}
392
393fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
394    if stream.eat("(") {
395        let expr = parse_expr(stream);
396        if !stream.eat(")") {
397            stream.err("Missing closing paren");
398        }
399        expr
400    } else if let Some(next) = stream.next() {
401        if next.chars().all(|c| c.is_alphanumeric() || c == '_') {
402            let exprs: Vec<Expr> = resolve_name(stream, next)
403                .into_iter()
404                .map(|type_| Expr::Name { value: Box::new(type_) })
405                .collect();
406            stream.pos += 1;
407            if exprs.len() == 1 {
408                exprs.into_iter().next().unwrap()
409            } else {
410                Expr::Choice { exprs }
411            }
412        } else {
413            stream.err(&format!("Unexpected token '{next}'"));
414        }
415    } else {
416        stream.err("Unexpected end of input");
417    }
418}
419#[derive(Debug, Clone)]
420pub struct Edge {
421    term: Option<NodeDefinition>,
422    to: Option<usize>,
423}
424fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
425    let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
426
427    fn explore(
428        states: Vec<usize>,
429        nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
430        labeled: &mut HashMap<String, ContentMatch>,
431    ) -> ContentMatch {
432        let mut out: Vec<(NodeDefinition, Vec<usize>)> = Vec::new();
433        for &node in &states {
434            for edge in &nfa[node] {
435                if edge.borrow().term.is_none() {
436                    continue;
437                }
438                let term = edge.borrow().term.clone().unwrap();
439                let mut set: Option<&mut Vec<usize>> = None;
440
441                for (t, s) in &mut out {
442                    if *t == term {
443                        set = Some(s);
444                        break;
445                    }
446                }
447
448                if set.is_none() {
449                    out.push((term.clone(), Vec::new()));
450                    set = Some(&mut out.last_mut().unwrap().1);
451                }
452                for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
453                    set.as_mut().unwrap().push(node);
454                }
455            }
456        }
457        let mut state = ContentMatch {
458            next: Vec::new(),
459            wrap_cache: vec![],
460            valid_end: states.contains(&(nfa.len() - 1)),
461        };
462
463        let state_key =
464            states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
465        labeled.insert(state_key.clone(), state.clone());
466
467        for (term, states) in out {
468            let states_key = states
469                .iter()
470                .map(|&x| x.to_string())
471                .collect::<Vec<_>>()
472                .join(",");
473            let next_state = labeled
474                .get(&states_key)
475                .cloned()
476                .unwrap_or_else(|| explore(states, nfa, labeled));
477            labeled.insert(states_key, next_state.clone());
478            state.next.push(MatchEdge { node_type: term, next: next_state });
479        }
480
481        state
482    }
483
484    explore(null_from(&nfa, 0), &nfa, &mut labeled)
485}
486
487pub fn null_from(
488    nfa: &[Vec<Rc<RefCell<Edge>>>],
489    node: usize,
490) -> Vec<usize> {
491    let mut result = Vec::new();
492    fn scan(
493        nfa: &[Vec<Rc<RefCell<Edge>>>],
494        node: usize,
495        result: &mut Vec<usize>,
496    ) {
497        let edges = &nfa[node];
498        if edges.len() == 1 && edges[0].borrow().term.is_none() {
499            if let Some(to) = edges[0].borrow().to {
500                scan(nfa, to, result);
501            }
502            return;
503        }
504        if !result.contains(&node) {
505            result.push(node);
506        }
507        for edge in edges {
508            if edge.borrow().term.is_none() {
509                if let Some(to) = edge.borrow().to {
510                    if !result.contains(&to) {
511                        scan(nfa, to, result);
512                    }
513                }
514            }
515        }
516    }
517
518    scan(nfa, node, &mut result);
519    result.sort();
520    result
521}
522fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
523    let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
524    connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
525    nfa
526}
527fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
528    nfa.push(vec![]);
529    nfa.len() - 1
530}
531
532fn edge(
533    from: usize,
534    to: Option<usize>,
535    term: Option<NodeDefinition>,
536    nfa: &mut [Vec<Rc<RefCell<Edge>>>],
537) -> Rc<RefCell<Edge>> {
538    let edge =
539        Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
540    nfa[from].push(edge.clone());
541    edge.clone()
542}
543fn connect(
544    edges: &mut [Rc<RefCell<Edge>>],
545    to: usize,
546) {
547    for edge in edges {
548        edge.borrow_mut().to = Some(to);
549    }
550}
551fn compile(
552    expr: Expr,
553    from: usize,
554    nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
555) -> Vec<Rc<RefCell<Edge>>> {
556    match expr {
557        Expr::Choice { exprs } => exprs
558            .into_iter()
559            .flat_map(|expr| compile(expr, from, nfa))
560            .collect(),
561        Expr::Seq { exprs } => {
562            let mut cur = from;
563            let mut last_edges = Vec::new();
564            let exprs_len = exprs.len();
565
566            for (i, expr) in exprs.into_iter().enumerate() {
567                let next = if i == exprs_len - 1 { cur } else { node(nfa) };
568
569                let mut edges = compile(expr, cur, nfa);
570                if i < exprs_len - 1 {
571                    connect(&mut edges, next);
572                    cur = next;
573                } else {
574                    last_edges = edges;
575                }
576            }
577
578            if last_edges.is_empty() {
579                vec![edge(cur, None, None, nfa)]
580            } else {
581                last_edges
582            }
583        },
584        Expr::Star { expr } => {
585            let loop_node = node(nfa);
586            edge(from, Some(loop_node), None, nfa);
587            let mut compiled_expr = compile(*expr, loop_node, nfa);
588            connect(&mut compiled_expr, loop_node);
589            vec![edge(loop_node, None, None, nfa)]
590        },
591        Expr::Plus { expr } => {
592            let loop_node = node(nfa);
593            connect(&mut compile(*expr.clone(), from, nfa), loop_node);
594            let mut compiled_expr = compile(*expr, loop_node, nfa);
595            connect(&mut compiled_expr, loop_node);
596            vec![edge(loop_node, None, None, nfa)]
597        },
598        Expr::Opt { expr } => {
599            let mut edges = vec![edge(from, None, None, nfa)];
600            edges.extend(compile(*expr, from, nfa));
601            edges
602        },
603        Expr::Range { expr, min, max } => {
604            let mut cur = from;
605            for _ in 0..min {
606                let next = node(nfa);
607                connect(&mut compile(*expr.clone(), cur, nfa), next);
608                cur = next;
609            }
610            if max == -1 {
611                connect(&mut compile(*expr, cur, nfa), cur);
612            } else {
613                for _ in min..max as usize {
614                    let next = node(nfa);
615                    edge(cur, Some(next), None, nfa);
616                    connect(&mut compile(*expr.clone(), cur, nfa), next);
617                    cur = next;
618                }
619            }
620            vec![edge(cur, None, None, nfa)]
621        },
622        Expr::Name { value } => {
623            vec![edge(from, None, Some((*value).clone()), nfa)]
624        },
625    }
626}