cfg_predict_distance/
lib.rs

1//! Calculation of minimum distance from one part of the grammar to another.
2
3#![deny(unsafe_code)]
4#![deny(missing_docs)]
5
6use cfg_grammar::*;
7use cfg_symbol::Symbol;
8
9/// Calculation of minimum distance from one part of the grammar to another.
10/// Similar to multi-source shortest path search in a graph.
11pub struct MinimalDistance<'a> {
12    grammar: &'a Cfg,
13    distances: Vec<Vec<Option<u32>>>,
14    forward_distances: Distances,
15    backward_distances: Distances,
16    min_length: Vec<Option<u32>>,
17}
18
19struct Distances {
20    before: Vec<Option<u32>>,
21    after: Vec<Option<u32>>,
22}
23
24/// Determines how the distance propagates through
25/// the rule RHS.
26#[derive(Copy, Clone)]
27pub enum DistanceDirection {
28    /// The distance propagates forward.
29    ///
30    /// # Example
31    ///
32    /// ```ignore
33    /// A ::= B C D
34    /// ```
35    ///
36    /// The distance to C is:
37    ///
38    /// ```ignore
39    /// A ::= 1 B 0 C null D null
40    /// ```
41    ///
42    /// We find it like so:
43    ///
44    /// ```
45    /// use cfg_grammar::Cfg;
46    /// use cfg_predict_distance::{MinimalDistance, DistanceDirection};
47    /// let mut cfg = Cfg::new();
48    /// let [a, b, c, d] = cfg.sym();
49    /// cfg.set_roots([a]);
50    /// cfg.rule(a).rhs([b, c, d]);
51    /// let mut distance = MinimalDistance::new(&cfg);
52    /// let result = distance.minimal_distances(&[(0, 1)], DistanceDirection::Forward);
53    /// let expected = vec![Some(1), Some(0), None, None];
54    /// assert_eq!(result, &[expected]);
55    /// ```
56    Forward,
57    /// The distance propagates only backwards.
58    Backward,
59    /// The distance propagates both ways thorughout
60    /// the RHS.
61    Symmetric,
62}
63
64impl DistanceDirection {
65    fn includes_forward(self) -> bool {
66        match self {
67            DistanceDirection::Backward => false,
68            _ => true,
69        }
70    }
71
72    fn includes_backward(self) -> bool {
73        match self {
74            DistanceDirection::Forward => false,
75            _ => true,
76        }
77    }
78}
79
80impl Distances {
81    fn new(num_syms: usize) -> Self {
82        Distances {
83            before: vec![None; num_syms],
84            after: vec![None; num_syms],
85        }
86    }
87}
88
89impl<'a> MinimalDistance<'a> {
90    /// Returns a new `MinimalDistance` for a grammar.
91    pub fn new(grammar: &'a Cfg) -> Self {
92        let distances = grammar
93            .rules()
94            .map(|rule| vec![None; rule.rhs.len() + 1])
95            .collect();
96        MinimalDistance {
97            grammar,
98            distances,
99            forward_distances: Distances::new(grammar.num_syms()),
100            backward_distances: Distances::new(grammar.num_syms()),
101            min_length: vec![None; grammar.num_syms()],
102        }
103    }
104
105    /// Returns distances in order respective to the order of rule iteration.
106    pub fn distances(&self) -> &[Vec<Option<u32>>] {
107        &self.distances[..]
108    }
109
110    /// Calculates minimal distance from one parts of the grammar to others.
111    /// Returns distances in order respective to the order of rule iteration.
112    pub fn minimal_distances(
113        &mut self,
114        dots: &[(usize, usize)],
115        direction: DistanceDirection,
116    ) -> &[Vec<Option<u32>>] {
117        let mut dots = dots.to_vec();
118        dots.sort_unstable();
119        self.minimal_sentence_lengths();
120        self.immediate_minimal_distances(&dots[..], direction);
121        self.transitive_minimal_distances();
122        self.distances()
123    }
124
125    fn minimal_sentence_lengths(&mut self) {
126        // The distance for terminals is 1.
127        let terminal_set = self.grammar.terminal_symbols();
128        for terminal in terminal_set.iter() {
129            self.min_length[terminal.usize()] = Some(1);
130        }
131        // The distance for nullable symbols is 0.
132        for rule in self.grammar.rules() {
133            if rule.rhs.is_empty() {
134                self.min_length[rule.lhs.usize()] = Some(0);
135            }
136        }
137        // Calculate minimal lengths for nonterminals.
138        self.grammar
139            .clone()
140            .rhs_closure_with_values(&mut self.min_length);
141    }
142
143    fn immediate_minimal_distances(
144        &mut self,
145        dots: &[(usize, usize)],
146        direction: DistanceDirection,
147    ) {
148        // Calculates distances within rules.
149        for (idx, rule) in self.grammar.rules().enumerate() {
150            for (_rule_idx, dot_pos) in binary_search_span(dots, idx) {
151                debug_assert_eq!(idx, _rule_idx);
152                self.immediate_process_dot(idx, rule, dot_pos, direction);
153            }
154        }
155    }
156
157    fn immediate_process_dot(
158        &mut self,
159        rule_idx: usize,
160        rule: &CfgRule,
161        dot_pos: usize,
162        direction: DistanceDirection,
163    ) {
164        if direction.includes_forward() {
165            let (min, _) = self.update_rule_distances(0, &rule.rhs[..dot_pos], rule_idx, false);
166            set_min(&mut self.forward_distances.before[rule.lhs.usize()], min);
167        }
168        if direction.includes_backward() {
169            let (min, _) = self.update_rule_distances(0, &rule.rhs[dot_pos..], rule_idx, true);
170            set_min(&mut self.backward_distances.before[rule.lhs.usize()], min);
171        }
172    }
173
174    /// Calculates lengths of shortest paths that cross transitions (predictions and completions).
175    fn transitive_minimal_distances(&mut self) {
176        let mut changed = true;
177        while changed {
178            // Keep going for as long as any completion distances were lowered in the last pass.
179            changed = false;
180            for (idx, rule) in self.grammar.rules().enumerate() {
181                if let Some(distance) = self.forward_distances.after[rule.lhs.usize()] {
182                    let (_, changed_now) =
183                        self.update_rule_distances(distance, &rule.rhs[..], idx, false);
184                    changed |= changed_now;
185                }
186                if let Some(distance) = self.backward_distances.after[rule.lhs.usize()] {
187                    let (_, changed_now) =
188                        self.update_rule_distances(distance, &rule.rhs[..], idx, true);
189                    changed |= changed_now;
190                }
191            }
192        }
193    }
194
195    // Update distances in a rule.
196    fn update_rule_distances(
197        &mut self,
198        mut cur: u32,
199        rhs: &[Symbol],
200        idx: usize,
201        reverse: bool,
202    ) -> (u32, bool) {
203        let last = if reverse {
204            for (dot, &sym) in rhs.iter().enumerate() {
205                set_min(&mut self.distances[idx][dot], cur);
206                cur = self.update_sym_distance(cur, sym, true);
207            }
208            rhs.len()
209        } else {
210            for (dot, &sym) in rhs.iter().enumerate().rev() {
211                set_min(&mut self.distances[idx][dot + 1], cur);
212                cur = self.update_sym_distance(cur, sym, false);
213            }
214            0
215        };
216        let changed = set_min(&mut self.distances[idx][last], cur);
217        (cur, changed)
218    }
219
220    fn update_sym_distance(&mut self, mut cur: u32, sym: Symbol, reverse: bool) -> u32 {
221        set_min(
222            if reverse {
223                &mut self.backward_distances.after[sym.usize()]
224            } else {
225                &mut self.forward_distances.after[sym.usize()]
226            },
227            cur,
228        );
229        cur += self.min_length[sym.usize()].unwrap();
230        if let Some(sym_predicted) = if reverse {
231            &self.backward_distances
232        } else {
233            &self.forward_distances
234        }
235        .before[sym.usize()]
236        {
237            cur.min(sym_predicted)
238        } else {
239            cur
240        }
241    }
242}
243
244/// Updates a value with a minimum of two values.
245fn set_min(current: &mut Option<u32>, new: u32) -> bool {
246    if let Some(ref mut current) = *current {
247        if *current > new {
248            *current = new;
249            true
250        } else {
251            false
252        }
253    } else {
254        *current = Some(new);
255        true
256    }
257}
258
259fn binary_search_span(
260    dots: &[(usize, usize)],
261    idx: usize,
262) -> impl Iterator<Item = (usize, usize)> + '_ {
263    let dot_idx = match dots.binary_search(&(idx, 0)) {
264        Ok(pos) | Err(pos) => pos,
265    };
266    dots[dot_idx..]
267        .iter()
268        .copied()
269        .take_while(move |&(rule_id, _)| rule_id == idx)
270}