moduforge_core/model/
content.rs

1use std::fmt;
2use std::{cell::RefCell, collections::HashMap, rc::Rc};
3
4use std::cmp::Ordering;
5
6use super::node::Node;
7use super::node_type::NodeType;
8use super::schema::Schema;
9#[derive(Clone, PartialEq, Eq, Debug)]
10pub struct MatchEdge {
11    pub node_type: NodeType,
12    pub next: ContentMatch,
13}
14
15#[derive(Clone, PartialEq, Eq, Debug, Default)]
16pub struct ContentMatch {
17    pub next: Vec<MatchEdge>,
18    pub wrap_cache: Vec<Option<NodeType>>,
19    pub valid_end: bool,
20}
21impl Ord for ContentMatch {
22    fn cmp(
23        &self,
24        other: &Self,
25    ) -> Ordering {
26        let _ = other;
27        Ordering::Equal
28    }
29}
30impl PartialOrd for ContentMatch {
31    fn partial_cmp(
32        &self,
33        other: &Self,
34    ) -> Option<Ordering> {
35        Some(self.cmp(other))
36    }
37}
38
39impl ContentMatch {
40    pub fn parse(
41        str: String,
42        nodes: &HashMap<String, NodeType>,
43    ) -> ContentMatch {
44        let mut stream = TokenStream::new(str, nodes.clone());
45        if stream.next().is_none() {
46            return ContentMatch::empty();
47        }
48        let expr = parse_expr(&mut stream);
49
50        let arr = nfa(expr);
51
52        dfa(arr)
53    }
54    pub fn empty() -> Self {
55        ContentMatch { next: Vec::new(), wrap_cache: Vec::new(), valid_end: true }
56    }
57
58    pub fn match_type(
59        &self,
60        node_type: &NodeType,
61    ) -> Option<&ContentMatch> {
62        self.next.iter().find(|edge| &edge.node_type == node_type).map(|edge| &edge.next)
63    }
64
65    pub fn match_fragment(
66        &self,
67        frag: &[Node],
68        schema: &Schema,
69    ) -> Option<&ContentMatch> {
70        let mut current: &ContentMatch = self;
71
72        for content in frag.iter() {
73            if let Some(next) = current.match_type(schema.nodes.get(&content.r#type).unwrap()) {
74                current = next;
75            }
76        }
77        Some(current)
78    }
79
80    pub fn fill(
81        &self,
82        after: &Vec<Node>,
83        to_end: bool,
84        schema: &Schema,
85    ) -> Option<Vec<Node>> {
86        let mut seen: Vec<ContentMatch> = Vec::new();
87        seen.push(self.clone());
88        fn search(
89            seen: &mut Vec<ContentMatch>,
90            to_end: bool,
91            after: &Vec<Node>,
92            match_: &ContentMatch,
93            types: &mut Vec<NodeType>,
94            schema: &Schema,
95        ) -> Option<Vec<Node>> {
96            if let Some(finished) = match_.match_fragment(after, schema) {
97                if finished.valid_end || !to_end {
98                    let mut nodes = vec![];
99                    for tp in types.iter() {
100                        let mut node = tp.create_and_fill(None, None, vec![], None, schema);
101                        nodes.append(&mut node);
102                    }
103                    return Some(nodes);
104                }
105            }
106            for edge in &match_.next {
107                if !edge.node_type.has_required_attrs() && !seen.contains(&edge.next) {
108                    seen.push(edge.next.clone());
109                    types.push(edge.node_type.clone());
110                    if let Some(found) = search(seen, to_end, after, &edge.next, types, schema) {
111                        return Some(found);
112                    }
113                    types.pop();
114                }
115            }
116            None
117        }
118
119        search(&mut seen, to_end, after, self, &mut Vec::new(), schema)
120    }
121
122    pub fn default_type(&self) -> Option<&NodeType> {
123        self.next.iter().find(|edge| !edge.node_type.has_required_attrs()).map(|edge| &edge.node_type)
124    }
125
126    pub fn compatible(
127        &self,
128        other: &ContentMatch,
129    ) -> bool {
130        for edge1 in &self.next {
131            for edge2 in &other.next {
132                if edge1.node_type == edge2.node_type {
133                    return true;
134                }
135            }
136        }
137        false
138    }
139
140    pub fn edge_count(&self) -> usize {
141        self.next.len()
142    }
143
144    pub fn edge(
145        &self,
146        n: usize,
147    ) -> Result<&MatchEdge, String> {
148        if n >= self.next.len() { Err(format!("{} 超出了 {}", n, self.next.len())) } else { Ok(&self.next[n]) }
149    }
150}
151impl fmt::Display for ContentMatch {
152    fn fmt(
153        &self,
154        f: &mut fmt::Formatter<'_>,
155    ) -> fmt::Result {
156        let mut seen = Vec::new();
157        fn scan(
158            m: &ContentMatch,
159            seen: &mut Vec<ContentMatch>,
160        ) {
161            seen.push(m.clone());
162            for edge in &m.next {
163                if !seen.iter().any(|s| s == &edge.next) {
164                    scan(&edge.next, seen);
165                }
166            }
167        }
168        scan(self, &mut seen);
169
170        let str = seen
171            .iter()
172            .enumerate()
173            .map(|(i, m)| {
174                let mut out = format!("{} ", if m.valid_end { i + 1 } else { i });
175                for (j, edge) in m.next.iter().enumerate() {
176                    if j > 0 {
177                        out.push_str(", ");
178                    }
179                    out.push_str(&format!(
180                        "{}->{}",
181                        edge.node_type.name,
182                        seen.iter().position(|s| s == &edge.next).unwrap() + 1
183                    ));
184                }
185                out
186            })
187            .collect::<Vec<_>>()
188            .join("\n");
189
190        write!(f, "{}", str)
191    }
192}
193
194#[derive(Clone, PartialEq, Eq, Debug)]
195pub struct TokenStream {
196    pos: usize,
197    tokens: Vec<String>,
198    node_types: HashMap<String, NodeType>,
199    string: String,
200}
201
202impl TokenStream {
203    pub fn new(
204        string: String,
205        node_types: HashMap<String, NodeType>,
206    ) -> Self {
207        let mut tokens = Vec::new();
208        let mut current_token = String::new();
209        for c in string.chars() {
210            if c.is_whitespace() {
211                // 如果当前字符是空白字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
212                if !current_token.is_empty() {
213                    tokens.push(current_token.clone());
214                    current_token.clear(); // 清空当前令牌
215                }
216            } else if !c.is_alphanumeric() {
217                // 如果当前字符是非字母数字字符,且当前令牌不为空,则将当前令牌添加到令牌列表中
218                if !current_token.is_empty() {
219                    tokens.push(current_token.clone());
220                    current_token.clear(); // 清空当前令牌
221                }
222                // 将非字母数字字符作为单独的令牌添加到列表中
223                tokens.push(c.to_string());
224            } else {
225                // 如果当前字符是字母数字字符,则将其添加到当前令牌中
226                current_token.push(c);
227            }
228        }
229
230        // 如果最后一个令牌不为空,则将其添加到令牌列表中
231        if !current_token.is_empty() {
232            tokens.push(current_token);
233        }
234        TokenStream { pos: 0, tokens, node_types, string }
235    }
236
237    pub fn next(&self) -> Option<&str> {
238        self.tokens.get(self.pos).map(|s| s.as_str())
239    }
240
241    pub fn eat(
242        &mut self,
243        tok: &str,
244    ) -> bool {
245        if self.next() == Some(tok) {
246            self.pos += 1;
247            true
248        } else {
249            false
250        }
251    }
252
253    pub fn err(
254        &self,
255        str: &str,
256    ) -> ! {
257        panic!("{} (约束必须是 '{}')", str, self.string);
258    }
259}
260
261#[derive(Debug, Clone)]
262enum Expr {
263    Choice { exprs: Vec<Expr> },
264    Seq { exprs: Vec<Expr> },
265    Plus { expr: Box<Expr> },
266    Star { expr: Box<Expr> },
267    Opt { expr: Box<Expr> },
268    Range { min: usize, max: isize, expr: Box<Expr> },
269    Name { value: NodeType },
270}
271fn parse_expr(stream: &mut TokenStream) -> Expr {
272    let mut exprs = Vec::new();
273
274    loop {
275        exprs.push(parse_expr_seq(stream));
276        if !stream.eat("|") {
277            break;
278        }
279    }
280    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Choice { exprs } }
281}
282fn parse_expr_seq(stream: &mut TokenStream) -> Expr {
283    let mut exprs = Vec::new();
284
285    while let Some(next) = stream.next() {
286        if next == ")" || next == "|" {
287            break;
288        }
289        exprs.push(parse_expr_subscript(stream));
290    }
291    if exprs.len() == 1 { exprs.pop().unwrap() } else { Expr::Seq { exprs } }
292}
293
294fn parse_expr_subscript(stream: &mut TokenStream) -> Expr {
295    let mut expr = parse_expr_atom(stream);
296    loop {
297        if stream.eat("+") {
298            expr = Expr::Plus { expr: Box::new(expr) };
299        } else if stream.eat("*") {
300            expr = Expr::Star { expr: Box::new(expr) };
301        } else if stream.eat("?") {
302            expr = Expr::Opt { expr: Box::new(expr) };
303        } else if stream.eat("{") {
304            expr = parse_expr_range(stream, expr);
305        } else {
306            break;
307        }
308    }
309    expr
310}
311
312fn parse_num(stream: &mut TokenStream) -> usize {
313    let next = stream.next().unwrap();
314    if !next.chars().all(|c| c.is_ascii_digit()) {
315        stream.err(&format!("Expected number, got '{}'", next));
316    }
317    let result = next.parse().unwrap();
318    stream.pos += 1;
319    result
320}
321fn parse_expr_range(
322    stream: &mut TokenStream,
323    expr: Expr,
324) -> Expr {
325    let min = parse_num(stream);
326    let max = if stream.eat(",") {
327        if stream.next() != Some("}") { parse_num(stream) as isize } else { -1 }
328    } else {
329        min as isize
330    };
331    if !stream.eat("}") {
332        stream.err("Unclosed braced range");
333    }
334    Expr::Range { min, max, expr: Box::new(expr) }
335}
336
337fn resolve_name(
338    stream: &TokenStream,
339    name: &str,
340) -> Vec<NodeType> {
341    let types = &stream.node_types;
342    if let Some(type_) = types.get(name) {
343        return vec![type_.clone()];
344    }
345    let mut result = Vec::new();
346
347    for type_ in types.values() {
348        if type_.groups.contains(&name.to_string()) {
349            result.push(type_.clone());
350        }
351    }
352    if result.is_empty() {
353        stream.err(&format!("No node type or group '{}' found", name));
354    }
355    result
356}
357
358fn parse_expr_atom(stream: &mut TokenStream) -> Expr {
359    if stream.eat("(") {
360        let expr = parse_expr(stream);
361        if !stream.eat(")") {
362            stream.err("Missing closing paren");
363        }
364        expr
365    } else if let Some(next) = stream.next() {
366        if next.chars().all(|c| c.is_alphanumeric()) {
367            let exprs: Vec<Expr> =
368                resolve_name(stream, next).into_iter().map(|type_| Expr::Name { value: type_ }).collect();
369            stream.pos += 1;
370            if exprs.len() == 1 { exprs.into_iter().next().unwrap() } else { Expr::Choice { exprs } }
371        } else {
372            stream.err(&format!("Unexpected token '{}'", next));
373        }
374    } else {
375        stream.err("Unexpected end of input");
376    }
377}
378#[derive(Debug, Clone)]
379pub struct Edge {
380    term: Option<NodeType>,
381    to: Option<usize>,
382}
383fn dfa(nfa: Vec<Vec<Rc<RefCell<Edge>>>>) -> ContentMatch {
384    let mut labeled: HashMap<String, ContentMatch> = HashMap::new();
385
386    fn explore(
387        states: Vec<usize>,
388        nfa: &Vec<Vec<Rc<RefCell<Edge>>>>,
389        labeled: &mut HashMap<String, ContentMatch>,
390    ) -> ContentMatch {
391        let mut out: Vec<(NodeType, Vec<usize>)> = Vec::new();
392        for &node in &states {
393            for edge in &nfa[node] {
394                if edge.borrow().term.is_none() {
395                    continue;
396                }
397                let term = edge.borrow().term.clone().unwrap();
398                let mut set: Option<&mut Vec<usize>> = None;
399
400                for (t, s) in &mut out {
401                    if *t == term {
402                        set = Some(s);
403                        break;
404                    }
405                }
406
407                if set.is_none() {
408                    out.push((term.clone(), Vec::new()));
409                    set = Some(&mut out.last_mut().unwrap().1);
410                }
411                for &node in &null_from(nfa, edge.borrow().to.unwrap_or(0)) {
412                    set.as_mut().unwrap().push(node);
413                }
414            }
415        }
416        let mut state =
417            ContentMatch { next: Vec::new(), wrap_cache: vec![], valid_end: states.contains(&(nfa.len() - 1)) };
418
419        let state_key = states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
420        labeled.insert(state_key.clone(), state.clone());
421
422        for (term, states) in out {
423            let states_key = states.iter().map(|&x| x.to_string()).collect::<Vec<_>>().join(",");
424            let next_state = labeled.get(&states_key).cloned().unwrap_or_else(|| explore(states, nfa, labeled));
425            labeled.insert(states_key, next_state.clone());
426            state.next.push(MatchEdge { node_type: term, next: next_state });
427        }
428
429        state
430    }
431
432    explore(null_from(&nfa, 0), &nfa, &mut labeled)
433}
434
435pub fn null_from(
436    nfa: &[Vec<Rc<RefCell<Edge>>>],
437    node: usize,
438) -> Vec<usize> {
439    let mut result = Vec::new();
440    fn scan(
441        nfa: &[Vec<Rc<RefCell<Edge>>>],
442        node: usize,
443        result: &mut Vec<usize>,
444    ) {
445        let edges = &nfa[node];
446        if edges.len() == 1 && edges[0].borrow().term.is_none() {
447            if let Some(to) = edges[0].borrow().to {
448                scan(nfa, to, result);
449            }
450            return;
451        }
452        if !result.contains(&node) {
453            result.push(node);
454        }
455        for edge in edges {
456            if edge.borrow().term.is_none() {
457                if let Some(to) = edge.borrow().to {
458                    if !result.contains(&to) {
459                        scan(nfa, to, result);
460                    }
461                }
462            }
463        }
464    }
465
466    scan(nfa, node, &mut result);
467    result.sort();
468    result
469}
470fn nfa(expr: Expr) -> Vec<Vec<Rc<RefCell<Edge>>>> {
471    let mut nfa: Vec<Vec<Rc<RefCell<Edge>>>> = vec![vec![]];
472    connect(&mut compile(expr, 0, &mut nfa), node(&mut nfa));
473    nfa
474}
475fn node(nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>) -> usize {
476    nfa.push(vec![]);
477    nfa.len() - 1
478}
479
480fn edge(
481    from: usize,
482    to: Option<usize>,
483    term: Option<NodeType>,
484    nfa: &mut [Vec<Rc<RefCell<Edge>>>],
485) -> Rc<RefCell<Edge>> {
486    let edge = Rc::new(RefCell::new(Edge { term, to: Option::from(to.unwrap_or(0)) }));
487    nfa[from].push(edge.clone());
488    edge.clone()
489}
490fn connect(
491    edges: &mut [Rc<RefCell<Edge>>],
492    to: usize,
493) {
494    for edge in edges {
495        edge.borrow_mut().to = Some(to);
496    }
497}
498fn compile(
499    expr: Expr,
500    from: usize,
501    nfa: &mut Vec<Vec<Rc<RefCell<Edge>>>>,
502) -> Vec<Rc<RefCell<Edge>>> {
503    match expr {
504        Expr::Choice { exprs } => exprs.into_iter().flat_map(|expr| compile(expr, from, nfa)).collect(),
505        Expr::Seq { exprs } => {
506            let mut cur = from;
507            let len = exprs.len(); // 在这里获取长度
508            for (i, expr) in exprs.into_iter().enumerate() {
509                let mut next = compile(expr, cur, nfa);
510                if i == len - 1 {
511                    return next;
512                }
513                connect(&mut next, node(nfa));
514                cur = node(nfa);
515            }
516            vec![]
517        },
518        Expr::Star { expr } => {
519            let loop_node = node(nfa);
520            edge(from, Some(loop_node), None, nfa);
521            let mut compiled_expr = compile(*expr, loop_node, nfa);
522            connect(&mut compiled_expr, loop_node);
523            vec![edge(loop_node, None, None, nfa)]
524        },
525        Expr::Plus { expr } => {
526            let loop_node = node(nfa);
527            connect(&mut compile(*expr.clone(), from, nfa), loop_node);
528            let mut compiled_expr = compile(*expr, loop_node, nfa);
529            connect(&mut compiled_expr, loop_node);
530
531            vec![edge(loop_node, None, None, nfa)]
532        },
533        Expr::Opt { expr } => {
534            let mut edges = vec![edge(from, None, None, nfa)];
535            edges.extend(compile(*expr, from, nfa));
536            edges
537        },
538        Expr::Range { expr, min, max } => {
539            let mut cur = from;
540            for _ in 0..min {
541                let next = node(nfa);
542                connect(&mut compile(*expr.clone(), cur, nfa), next);
543                cur = next;
544            }
545            if max == -1 {
546                connect(&mut compile(*expr, cur, nfa), cur);
547            } else {
548                for _ in min..max as usize {
549                    let next = node(nfa);
550                    edge(cur, Some(next), None, nfa);
551                    connect(&mut compile(*expr.clone(), cur, nfa), next);
552                    cur = next;
553                }
554            }
555            vec![edge(cur, None, None, nfa)]
556        },
557        Expr::Name { value } => {
558            vec![edge(from, None, Some(value), nfa)]
559        },
560    }
561}