moduforge_model/
content.rs

1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use super::node::Node;
7use super::node_type::NodeType;
8use super::schema::Schema;
9#[derive(Clone, PartialEq, Eq, Debug)]
10pub struct MatchEdge {
11    pub node_type: NodeType,
12    pub next: ContentMatch,
13}
14
15#[derive(Clone, PartialEq, Eq, Debug, Default)]
16pub struct ContentMatch {
17    pub next: Vec<MatchEdge>,
18    pub wrap_cache: Vec<Option<NodeType>>,
19    pub valid_end: bool,
20}
21impl Ord for ContentMatch {
22    fn cmp(
23        &self,
24        other: &Self,
25    ) -> Ordering {
26        let _ = other;
27        Ordering::Equal
28    }
29}
30impl PartialOrd for ContentMatch {
31    fn partial_cmp(
32        &self,
33        other: &Self,
34    ) -> Option<Ordering> {
35        Some(self.cmp(other))
36    }
37}
38
39impl ContentMatch {
40    pub fn parse(
41        str: String,
42        nodes: &HashMap<String, NodeType>,
43    ) -> ContentMatch {
44        let mut stream = TokenStream::new(str, nodes.clone());
45        if stream.next().is_none() {
46            return ContentMatch::empty();
47        }
48        let expr = parse_expr(&mut stream);
49
50        let arr = nfa(expr);
51
52        dfa(arr)
53    }
54    pub fn empty() -> Self {
55        ContentMatch {
56            next: Vec::new(),
57            wrap_cache: Vec::new(),
58            valid_end: true,
59        }
60    }
61
62    pub fn match_type(
63        &self,
64        node_type: &NodeType,
65    ) -> Option<&ContentMatch> {
66        self.next
67            .iter()
68            .find(|edge| &edge.node_type == node_type)
69            .map(|edge| &edge.next)
70    }
71
72    pub fn match_fragment(
73        &self,
74        frag: &[Node],
75        schema: &Schema,
76    ) -> Option<&ContentMatch> {
77        let mut current: &ContentMatch = self;
78
79        for content in frag.iter() {
80            if let Some(next) =
81                current.match_type(schema.nodes.get(&content.r#type).unwrap())
82            {
83                current = next;
84            }
85        }
86        Some(current)
87    }
88
89    /// 根据内容匹配规则填充节点
90    ///
91    /// # 参数
92    /// - `after`: 待匹配的节点列表
93    /// - `to_end`: 是否需要匹配到结束状态
94    /// - `schema`: 当前使用的文档模式
95    ///
96    /// # 返回值
97    /// 返回填充后的节点列表,如果无法匹配则返回None
98    pub fn fill(
99        &self,
100        after: &Vec<Node>,
101        to_end: bool,
102        schema: &Schema,
103    ) -> Option<Vec<Node>> {
104        let mut seen: Vec<ContentMatch> = Vec::new();
105        seen.push(self.clone());
106        fn search(
107            seen: &mut Vec<ContentMatch>,
108            to_end: bool,
109            after: &Vec<Node>,
110            match_: &ContentMatch,
111            types: &mut Vec<NodeType>,
112            schema: &Schema,
113        ) -> Option<Vec<Node>> {
114            // 首先检查是否可以匹配当前片段
115            if let Some(finished) = match_.match_fragment(after, schema) {
116                if finished.valid_end || !to_end {
117                    let mut nodes = vec![];
118                    for tp in types.iter() {
119                        // 创建一个节点,但不递归创建子节点
120                        let node = tp.create(None, None, vec![], None);
121                        nodes.push(node);
122                    }
123                    return Some(nodes);
124                }
125            }
126
127            // 然后尝试按顺序匹配每个边
128            for edge in &match_.next {
129                if !edge.node_type.has_required_attrs()
130                    && !seen.contains(&edge.next)
131                {
132                    seen.push(edge.next.clone());
133                    types.push(edge.node_type.clone());
134                    if let Some(found) =
135                        search(seen, to_end, after, &edge.next, types, schema)
136                    {
137                        return Some(found);
138                    }
139                    types.pop();
140                }
141            }
142            None
143        }
144
145        search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
146    }
147
148    pub fn default_type(&self) -> Option<&NodeType> {
149        self.next
150            .iter()
151            .find(|edge| !edge.node_type.has_required_attrs())
152            .map(|edge| &edge.node_type)
153    }
154
155    pub fn compatible(
156        &self,
157        other: &ContentMatch,
158    ) -> bool {
159        for edge1 in &self.next {
160            for edge2 in &other.next {
161                if edge1.node_type == edge2.node_type {
162                    return true;
163                }
164            }
165        }
166        false
167    }
168
169    pub fn edge_count(&self) -> usize {
170        self.next.len()
171    }
172
173    pub fn edge(
174        &self,
175        n: usize,
176    ) -> Result<&MatchEdge, String> {
177        if n >= self.next.len() {
178            Err(format!("{} 超出了 {}", n, self.next.len()))
179        } else {
180            Ok(&self.next[n])
181        }
182    }
183}
184impl fmt::Display for ContentMatch {
185    fn fmt(
186        &self,
187        f: &mut fmt::Formatter<'_>,
188    ) -> fmt::Result {
189        let mut seen = Vec::new();
190        fn scan(
191            m: &ContentMatch,
192            seen: &mut Vec<ContentMatch>,
193        ) {
194            seen.push(m.clone());
195            for edge in &m.next {
196                if !seen.iter().any(|s| s == &edge.next) {
197                    scan(&edge.next, seen);
198                }
199            }
200        }
201        scan(self, &mut seen);
202
203        let str = seen
204            .iter()
205            .enumerate()
206            .map(|(i, m)| {
207                let mut out =
208                    format!("{} ", if m.valid_end { i + 1 } else { i });
209                for (j, edge) in m.next.iter().enumerate() {
210                    if j > 0 {
211                        out.push_str(", ");
212                    }
213                    out.push_str(&format!(
214                        "{}->{}",
215                        edge.node_type.name,
216                        seen.iter().position(|s| s == &edge.next).unwrap() + 1
217                    ));
218                }
219                out
220            })
221            .collect::<Vec<_>>()
222            .join("\n");
223
224        write!(f, "{}", str)
225    }
226}
227
228#[derive(Clone, PartialEq, Eq, Debug)]
229pub struct TokenStream {
230    pos: usize,
231    tokens: Vec<String>,
232    node_types: HashMap<String, NodeType>,
233    string: String,
234}
235
236impl TokenStream {
237    pub fn new(
238        string: String,
239        node_types: HashMap<String, NodeType>,
240    ) -> Self {
241        let mut tokens = Vec::new();
242        let mut current_token = String::new();
243        for c in string.chars() {
244            if c.is_whitespace() {
245                // 如果当前字符是空白字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
246                if !current_token.is_empty() {
247                    tokens.push(current_token.clone());
248                    current_token.clear(); // 清空当前令牌
249                }
250            } else if !c.is_alphanumeric() {
251                // 如果当前字符是非字母数字字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
252                if !current_token.is_empty() {
253                    tokens.push(current_token.clone());
254                    current_token.clear(); // 清空当前令牌
255                }
256                // 将非字母数字字符作为单独的令牌添加到列表中
257                tokens.push(c.to_string());
258            } else {
259                // 如果当前字符是字母数字字符,则将其添加到当前令牌中
260                current_token.push(c);
261            }
262        }
263
264        // 如果最后一个令牌不为空,则将其添加到令牌列表中
265        if !current_token.is_empty() {
266            tokens.push(current_token);
267        }
268        TokenStream { pos: 0, tokens, node_types, string }
269    }
270
271    pub fn next(&self) -> Option<&str> {
272        self.tokens.get(self.pos).map(|s| s.as_str())
273    }
274
275    pub fn eat(
276        &mut self,
277        tok: &str,
278    ) -> bool {
279        if self.next() == Some(tok) {
280            self.pos += 1;
281            true
282        } else {
283            false
284        }
285    }
286
287    pub fn err(
288        &self,
289        str: &str,
290    ) -> ! {
291        panic!("{} (约束必须是 '{}')", str, self.string);
292    }
293}
294
295#[derive(Debug, Clone)]
296enum Expr {
297    Choice { exprs: Vec<Expr> },
298    Seq { exprs: Vec<Expr> },
299    Plus { expr: Box<Expr> },
300    Star { expr: Box<Expr> },
301    Opt { expr: Box<Expr> },
302    Range { min: usize, max: isize, expr: Box<Expr> },
303    Name { value: NodeType },
304}
305fn parse_expr(stream: &mut TokenStream) -> Expr {
306    let mut exprs = Vec::new();
307
308    loop {
309        exprs.push(parse_expr_seq(stream));
310        if !stream.eat("|") {
311            break;
312        }
313    }
314    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
315}
316fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
317    let mut exprs = Vec::new();
318
319    while let Some(next) = stream.next() {
320        if next == ")" || next == "|" {
321            break;
322        }
323        exprs.push(parse_expr_subscript(stream));
324    }
325    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
326}
327
328fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
329    let mut expr = parse_expr_atom(stream);
330    loop {
331        if stream.eat("+") {
332            expr = Expr::Plus { expr: Box::new(expr) };
333        } else if stream.eat("*") {
334            expr = Expr::Star { expr: Box::new(expr) };
335        } else if stream.eat("?") {
336            expr = Expr::Opt { expr: Box::new(expr) };
337        } else if stream.eat("{") {
338            expr = parse_expr_range(stream, expr);
339        } else {
340            break;
341        }
342    }
343    expr
344}
345
346fn parse_num(stream: &mut TokenStream) -> usize {
347    let next = stream.next().unwrap();
348    if !next.chars().all(|c| c.is_ascii_digit()) {
349        stream.err(&format!("Expected number, got '{}'", next));
350    }
351    let result = next.parse().unwrap();
352    stream.pos += 1;
353    result
354}
355fn parse_expr_range(
356    stream: &mut TokenStream,
357    expr: Expr,
358) -> Expr {
359    let min = parse_num(stream);
360    let max = if stream.eat(",") {
361        if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
362    } else {
363        min as isize
364    };
365    if !stream.eat("}") {
366        stream.err("Unclosed braced range");
367    }
368    Expr::Range { min, max, expr: Box::new(expr) }
369}
370
371fn resolve_name(
372    stream: &TokenStream,
373    name: &str,
374) -> Vec<NodeType> {
375    let types = &stream.node_types;
376    if let Some(type_) = types.get(name) {
377        return vec![type_.clone()];
378    }
379    let mut result = Vec::new();
380
381    for type_ in types.values() {
382        if type_.groups.contains(&name.to_string()) {
383            result.push(type_.clone());
384        }
385    }
386    if result.is_empty() {
387        stream.err(&format!("没找到类型 '{}'", name));
388    }
389    result
390}
391
392fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
393    if stream.eat("(") {
394        let expr = parse_expr(stream);
395        if !stream.eat(")") {
396            stream.err("Missing closing paren");
397        }
398        expr
399    } else if let Some(next) = stream.next() {
400        if next.chars().all(|c| c.is_alphanumeric()) {
401            let exprs: Vec<Expr> = resolve_name(stream, next)
402                .into_iter()
403                .map(|type_| Expr::Name { value: type_ })
404                .collect();
405            stream.pos += 1;
406            if exprs.len() == 1 {
407                exprs.into_iter().next().unwrap()
408            } else {
409                Expr::Choice { exprs }
410            }
411        } else {
412            stream.err(&format!("Unexpected token '{}'", next));
413        }
414    } else {
415        stream.err("Unexpected end of input");
416    }
417}
418#[derive(Debug, Clone)]
419pub struct Edge {
420    term: Option<NodeType>,
421    to: Option<usize>,
422}
423fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
424    let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
425
426    fn explore(
427        states: Vec<usize>,
428        nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
429        labeled: &mut HashMap<String, ContentMatch>,
430    ) -> ContentMatch {
431        let mut out: Vec<(NodeType, Vec<usize>)> = Vec::new();
432        for &node in &states {
433            for edge in &nfa[node] {
434                if edge.borrow().term.is_none() {
435                    continue;
436                }
437                let term = edge.borrow().term.clone().unwrap();
438                let mut set: Option<&mut Vec<usize>> = None;
439
440                for (t, s) in &mut out {
441                    if *t == term {
442                        set = Some(s);
443                        break;
444                    }
445                }
446
447                if set.is_none() {
448                    out.push((term.clone(), Vec::new()));
449                    set = Some(&mut out.last_mut().unwrap().1);
450                }
451                for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
452                    set.as_mut().unwrap().push(node);
453                }
454            }
455        }
456        let mut state = ContentMatch {
457            next: Vec::new(),
458            wrap_cache: vec![],
459            valid_end: states.contains(&(nfa.len() - 1)),
460        };
461
462        let state_key =
463            states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
464        labeled.insert(state_key.clone(), state.clone());
465
466        for (term, states) in out {
467            let states_key = states
468                .iter()
469                .map(|&x| x.to_string())
470                .collect::<Vec<_>>()
471                .join(",");
472            let next_state = labeled
473                .get(&states_key)
474                .cloned()
475                .unwrap_or_else(|| explore(states, nfa, labeled));
476            labeled.insert(states_key, next_state.clone());
477            state.next.push(MatchEdge { node_type: term, next: next_state });
478        }
479
480        state
481    }
482
483    explore(null_from(&nfa, 0), &nfa, &mut labeled)
484}
485
486pub fn null_from(
487    nfa: &[Vec<Rc<RefCell<Edge>>>],
488    node: usize,
489) -> Vec<usize> {
490    let mut result = Vec::new();
491    fn scan(
492        nfa: &[Vec<Rc<RefCell<Edge>>>],
493        node: usize,
494        result: &mut Vec<usize>,
495    ) {
496        let edges = &nfa[node];
497        if edges.len() == 1 && edges[0].borrow().term.is_none() {
498            if let Some(to) = edges[0].borrow().to {
499                scan(nfa, to, result);
500            }
501            return;
502        }
503        if !result.contains(&node) {
504            result.push(node);
505        }
506        for edge in edges {
507            if edge.borrow().term.is_none() {
508                if let Some(to) = edge.borrow().to {
509                    if !result.contains(&to) {
510                        scan(nfa, to, result);
511                    }
512                }
513            }
514        }
515    }
516
517    scan(nfa, node, &mut result);
518    result.sort();
519    result
520}
521fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
522    let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
523    connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
524    nfa
525}
526fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
527    nfa.push(vec![]);
528    nfa.len() - 1
529}
530
531fn edge(
532    from: usize,
533    to: Option<usize>,
534    term: Option<NodeType>,
535    nfa: &mut [Vec<Rc<RefCell<Edge>>>],
536) -> Rc<RefCell<Edge>> {
537    let edge =
538        Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
539    nfa[from].push(edge.clone());
540    edge.clone()
541}
542fn connect(
543    edges: &mut [Rc<RefCell<Edge>>],
544    to: usize,
545) {
546    for edge in edges {
547        edge.borrow_mut().to = Some(to);
548    }
549}
550fn compile(
551    expr: Expr,
552    from: usize,
553    nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
554) -> Vec<Rc<RefCell<Edge>>> {
555    match expr {
556        Expr::Choice { exprs } => exprs
557            .into_iter()
558            .flat_map(|expr| compile(expr, from, nfa))
559            .collect(),
560        Expr::Seq { exprs } => {
561            let mut cur = from;
562            let mut last_edges = Vec::new();
563            let exprs_len = exprs.len();
564
565            for (i, expr) in exprs.into_iter().enumerate() {
566                let next = if i == exprs_len - 1 { cur } else { node(nfa) };
567
568                let mut edges = compile(expr, cur, nfa);
569                if i < exprs_len - 1 {
570                    connect(&mut edges, next);
571                    cur = next;
572                } else {
573                    last_edges = edges;
574                }
575            }
576
577            if last_edges.is_empty() {
578                vec![edge(cur, None, None, nfa)]
579            } else {
580                last_edges
581            }
582        },
583        Expr::Star { expr } => {
584            let loop_node = node(nfa);
585            edge(from, Some(loop_node), None, nfa);
586            let mut compiled_expr = compile(*expr, loop_node, nfa);
587            connect(&mut compiled_expr, loop_node);
588            vec![edge(loop_node, None, None, nfa)]
589        },
590        Expr::Plus { expr } => {
591            let loop_node = node(nfa);
592            connect(&mut compile(*expr.clone(), from, nfa), loop_node);
593            let mut compiled_expr = compile(*expr, loop_node, nfa);
594            connect(&mut compiled_expr, loop_node);
595            vec![edge(loop_node, None, None, nfa)]
596        },
597        Expr::Opt { expr } => {
598            let mut edges = vec![edge(from, None, None, nfa)];
599            edges.extend(compile(*expr, from, nfa));
600            edges
601        },
602        Expr::Range { expr, min, max } => {
603            let mut cur = from;
604            for _ in 0..min {
605                let next = node(nfa);
606                connect(&mut compile(*expr.clone(), cur, nfa), next);
607                cur = next;
608            }
609            if max == -1 {
610                connect(&mut compile(*expr, cur, nfa), cur);
611            } else {
612                for _ in min..max as usize {
613                    let next = node(nfa);
614                    edge(cur, Some(next), None, nfa);
615                    connect(&mut compile(*expr.clone(), cur, nfa), next);
616                    cur = next;
617                }
618            }
619            vec![edge(cur, None, None, nfa)]
620        },
621        Expr::Name { value } => {
622            vec![edge(from, None, Some(value), nfa)]
623        },
624    }
625}