polonius_engine/output/
mod.rs

1// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use datafrog::Relation;
12use rustc_hash::{FxHashMap, FxHashSet};
13use std::borrow::Cow;
14use std::collections::{BTreeMap, BTreeSet};
15
16use crate::facts::{AllFacts, Atom, FactTypes};
17
18mod datafrog_opt;
19mod initialization;
20mod liveness;
21mod location_insensitive;
22mod naive;
23
24#[derive(Debug, Clone, Copy)]
25pub enum Algorithm {
26    /// Simple rules, but slower to execute
27    Naive,
28
29    /// Optimized variant of the rules
30    DatafrogOpt,
31
32    /// Fast to compute, but imprecise: there can be false-positives
33    /// but no false-negatives. Tailored for quick "early return" situations.
34    LocationInsensitive,
35
36    /// Compares the `Naive` and `DatafrogOpt` variants to ensure they indeed
37    /// compute the same errors.
38    Compare,
39
40    /// Combination of the fast `LocationInsensitive` pre-pass, followed by
41    /// the more expensive `DatafrogOpt` variant.
42    Hybrid,
43}
44
45impl Algorithm {
46    /// Optimized variants that ought to be equivalent to "naive"
47    pub const OPTIMIZED: &'static [Algorithm] = &[Algorithm::DatafrogOpt];
48
49    pub fn variants() -> [&'static str; 5] {
50        [
51            "Naive",
52            "DatafrogOpt",
53            "LocationInsensitive",
54            "Compare",
55            "Hybrid",
56        ]
57    }
58}
59
60impl ::std::str::FromStr for Algorithm {
61    type Err = String;
62    fn from_str(s: &str) -> Result<Self, Self::Err> {
63        match s.to_lowercase().as_ref() {
64            "naive" => Ok(Algorithm::Naive),
65            "datafrogopt" => Ok(Algorithm::DatafrogOpt),
66            "locationinsensitive" => Ok(Algorithm::LocationInsensitive),
67            "compare" => Ok(Algorithm::Compare),
68            "hybrid" => Ok(Algorithm::Hybrid),
69            _ => Err(String::from(
70                "valid values: Naive, DatafrogOpt, LocationInsensitive, Compare, Hybrid",
71            )),
72        }
73    }
74}
75
76#[derive(Clone, Debug)]
77pub struct Output<T: FactTypes> {
78    pub errors: FxHashMap<T::Point, Vec<T::Loan>>,
79    pub subset_errors: FxHashMap<T::Point, BTreeSet<(T::Origin, T::Origin)>>,
80    pub move_errors: FxHashMap<T::Point, Vec<T::Path>>,
81
82    pub dump_enabled: bool,
83
84    // these are just for debugging
85    pub loan_live_at: FxHashMap<T::Point, Vec<T::Loan>>,
86    pub origin_contains_loan_at: FxHashMap<T::Point, BTreeMap<T::Origin, BTreeSet<T::Loan>>>,
87    pub origin_contains_loan_anywhere: FxHashMap<T::Origin, BTreeSet<T::Loan>>,
88    pub origin_live_on_entry: FxHashMap<T::Point, Vec<T::Origin>>,
89    pub loan_invalidated_at: FxHashMap<T::Point, Vec<T::Loan>>,
90    pub subset: FxHashMap<T::Point, BTreeMap<T::Origin, BTreeSet<T::Origin>>>,
91    pub subset_anywhere: FxHashMap<T::Origin, BTreeSet<T::Origin>>,
92    pub var_live_on_entry: FxHashMap<T::Point, Vec<T::Variable>>,
93    pub var_drop_live_on_entry: FxHashMap<T::Point, Vec<T::Variable>>,
94    pub path_maybe_initialized_on_exit: FxHashMap<T::Point, Vec<T::Path>>,
95    pub path_maybe_uninitialized_on_exit: FxHashMap<T::Point, Vec<T::Path>>,
96    pub known_contains: FxHashMap<T::Origin, BTreeSet<T::Loan>>,
97    pub var_maybe_partly_initialized_on_exit: FxHashMap<T::Point, Vec<T::Variable>>,
98}
99
100/// Subset of `AllFacts` dedicated to initialization
101struct InitializationContext<T: FactTypes> {
102    child_path: Vec<(T::Path, T::Path)>,
103    path_is_var: Vec<(T::Path, T::Variable)>,
104    path_assigned_at_base: Vec<(T::Path, T::Point)>,
105    path_moved_at_base: Vec<(T::Path, T::Point)>,
106    path_accessed_at_base: Vec<(T::Path, T::Point)>,
107}
108
109/// Subset of `AllFacts` dedicated to liveness
110struct LivenessContext<T: FactTypes> {
111    var_used_at: Vec<(T::Variable, T::Point)>,
112    var_defined_at: Vec<(T::Variable, T::Point)>,
113    var_dropped_at: Vec<(T::Variable, T::Point)>,
114    use_of_var_derefs_origin: Vec<(T::Variable, T::Origin)>,
115    drop_of_var_derefs_origin: Vec<(T::Variable, T::Origin)>,
116}
117
118/// Subset of `AllFacts` dedicated to borrow checking, and data ready to use by the variants
119struct Context<'ctx, T: FactTypes> {
120    // `Relation`s used as static inputs, by all variants
121    origin_live_on_entry: Relation<(T::Origin, T::Point)>,
122    loan_invalidated_at: Relation<(T::Loan, T::Point)>,
123
124    // static inputs used via `Variable`s, by all variants
125    subset_base: &'ctx Vec<(T::Origin, T::Origin, T::Point)>,
126    loan_issued_at: &'ctx Vec<(T::Origin, T::Loan, T::Point)>,
127
128    // static inputs used by variants other than `LocationInsensitive`
129    loan_killed_at: Relation<(T::Loan, T::Point)>,
130    known_contains: Relation<(T::Origin, T::Loan)>,
131    placeholder_origin: Relation<(T::Origin, ())>,
132    placeholder_loan: Relation<(T::Loan, T::Origin)>,
133
134    // The `known_placeholder_subset` relation in the facts does not necessarily contain all the
135    // transitive subsets. The transitive closure is always needed, so this version here is fully
136    // closed over.
137    known_placeholder_subset: Relation<(T::Origin, T::Origin)>,
138
139    // while this static input is unused by `LocationInsensitive`, it's depended on by
140    // initialization and liveness, so already computed by the time we get to borrowcking.
141    cfg_edge: Relation<(T::Point, T::Point)>,
142
143    // Partial results possibly used by other variants as input. Not currently used yet.
144    #[allow(dead_code)]
145    potential_errors: Option<FxHashSet<T::Loan>>,
146    #[allow(dead_code)]
147    potential_subset_errors: Option<Relation<(T::Origin, T::Origin)>>,
148}
149
150impl<T: FactTypes> Output<T> {
151    /// All variants require the same initial preparations, done in multiple
152    /// successive steps:
153    /// - compute initialization data
154    /// - compute liveness
155    /// - prepare static inputs as shared `Relation`s
156    /// - in cases where `LocationInsensitive` variant is ran as a filtering pre-pass,
157    ///   partial results can also be stored in the context, so that the following
158    ///   variant can use it to prune its own input data
159    pub fn compute(all_facts: &AllFacts<T>, algorithm: Algorithm, dump_enabled: bool) -> Self {
160        let mut result = Output::new(dump_enabled);
161
162        // TODO: remove all the cloning thereafter, but that needs to be done in concert with rustc
163
164        let cfg_edge = all_facts.cfg_edge.clone().into();
165
166        // 1) Initialization
167        let initialization_ctx = InitializationContext {
168            child_path: all_facts.child_path.clone(),
169            path_is_var: all_facts.path_is_var.clone(),
170            path_assigned_at_base: all_facts.path_assigned_at_base.clone(),
171            path_moved_at_base: all_facts.path_moved_at_base.clone(),
172            path_accessed_at_base: all_facts.path_accessed_at_base.clone(),
173        };
174
175        let initialization::InitializationResult::<T>(
176            var_maybe_partly_initialized_on_exit,
177            move_errors,
178        ) = initialization::compute(initialization_ctx, &cfg_edge, &mut result);
179
180        // FIXME: move errors should prevent the computation from continuing: we can't compute
181        // liveness and analyze loans accurately when there are move errors, and should early
182        // return here.
183        for &(path, location) in move_errors.iter() {
184            result.move_errors.entry(location).or_default().push(path);
185        }
186
187        // 2) Liveness
188        let liveness_ctx = LivenessContext {
189            var_used_at: all_facts.var_used_at.clone(),
190            var_defined_at: all_facts.var_defined_at.clone(),
191            var_dropped_at: all_facts.var_dropped_at.clone(),
192            use_of_var_derefs_origin: all_facts.use_of_var_derefs_origin.clone(),
193            drop_of_var_derefs_origin: all_facts.drop_of_var_derefs_origin.clone(),
194        };
195
196        let mut origin_live_on_entry = liveness::compute_live_origins(
197            liveness_ctx,
198            &cfg_edge,
199            var_maybe_partly_initialized_on_exit,
200            &mut result,
201        );
202
203        let cfg_node = cfg_edge
204            .iter()
205            .map(|&(point1, _)| point1)
206            .chain(cfg_edge.iter().map(|&(_, point2)| point2))
207            .collect();
208
209        liveness::make_universal_regions_live::<T>(
210            &mut origin_live_on_entry,
211            &cfg_node,
212            &all_facts.universal_region,
213        );
214
215        // 3) Borrow checking
216
217        // Prepare data as datafrog relations, ready to join.
218        //
219        // Note: if rustc and polonius had more interaction, we could also delay or avoid
220        // generating some of the facts that are now always present here. For example,
221        // the `LocationInsensitive` variant doesn't use the `loan_killed_at` relation, so we could
222        // technically delay computing and passing it from rustc, when using this or the `Hybrid`
223        // variants, to after the pre-pass has made sure we actually need to compute the full
224        // analysis. If these facts happened to be recorded in separate MIR walks, we might also
225        // avoid generating those facts.
226
227        let origin_live_on_entry = origin_live_on_entry.into();
228
229        // TODO: also flip the order of this relation's arguments in rustc
230        // from `loan_invalidated_at(point, loan)` to `loan_invalidated_at(loan, point)`.
231        // to avoid this allocation.
232        let loan_invalidated_at = Relation::from_iter(
233            all_facts
234                .loan_invalidated_at
235                .iter()
236                .map(|&(point, loan)| (loan, point)),
237        );
238
239        let loan_killed_at = all_facts.loan_killed_at.clone().into();
240
241        // `known_placeholder_subset` is a list of all the `'a: 'b` subset relations the user gave:
242        // it's not required to be transitive. `known_contains` is its transitive closure: a list
243        // of all the known placeholder loans that each of these placeholder origins contains.
244        // Given the `known_placeholder_subset`s `'a: 'b` and `'b: 'c`: in the `known_contains`
245        // relation, `'a` will also contain `'c`'s placeholder loan.
246        let known_placeholder_subset = all_facts.known_placeholder_subset.clone().into();
247        let known_contains =
248            Output::<T>::compute_known_contains(&known_placeholder_subset, &all_facts.placeholder);
249
250        // Fully close over the `known_placeholder_subset` relation.
251        let known_placeholder_subset =
252            Output::<T>::compute_known_placeholder_subset(&known_placeholder_subset);
253
254        let placeholder_origin: Relation<_> = Relation::from_iter(
255            all_facts
256                .universal_region
257                .iter()
258                .map(|&origin| (origin, ())),
259        );
260
261        let placeholder_loan = Relation::from_iter(
262            all_facts
263                .placeholder
264                .iter()
265                .map(|&(origin, loan)| (loan, origin)),
266        );
267
268        // Ask the variants to compute errors in their own way
269        let mut ctx = Context {
270            origin_live_on_entry,
271            loan_invalidated_at,
272            cfg_edge,
273            subset_base: &all_facts.subset_base,
274            loan_issued_at: &all_facts.loan_issued_at,
275            loan_killed_at,
276            known_contains,
277            known_placeholder_subset,
278            placeholder_origin,
279            placeholder_loan,
280            potential_errors: None,
281            potential_subset_errors: None,
282        };
283
284        let (errors, subset_errors) = match algorithm {
285            Algorithm::LocationInsensitive => {
286                let (potential_errors, potential_subset_errors) =
287                    location_insensitive::compute(&ctx, &mut result);
288
289                // Note: the error location is meaningless for a location-insensitive
290                // subset error analysis. This is acceptable here as this variant is not one
291                // which should be used directly besides debugging, the `Hybrid` variant will
292                // take advantage of its result.
293                let potential_subset_errors: Relation<(T::Origin, T::Origin, T::Point)> =
294                    Relation::from_iter(
295                        potential_subset_errors
296                            .into_iter()
297                            .map(|&(origin1, origin2)| (origin1, origin2, 0.into())),
298                    );
299
300                (potential_errors, potential_subset_errors)
301            }
302            Algorithm::Naive => naive::compute(&ctx, &mut result),
303            Algorithm::DatafrogOpt => datafrog_opt::compute(&ctx, &mut result),
304            Algorithm::Hybrid => {
305                // Execute the fast `LocationInsensitive` computation as a pre-pass:
306                // if it finds no possible errors, we don't need to do the more complex
307                // computations as they won't find errors either, and we can return early.
308                let (potential_errors, potential_subset_errors) =
309                    location_insensitive::compute(&ctx, &mut result);
310
311                if potential_errors.is_empty() && potential_subset_errors.is_empty() {
312                    // There are no loan errors, nor subset errors, we can early return
313                    // empty errors lists and avoid doing the heavy analysis.
314                    (potential_errors, Vec::new().into())
315                } else {
316                    // Record these potential errors as they can be used to limit the next
317                    // variant's work to only these loans.
318                    ctx.potential_errors =
319                        Some(potential_errors.iter().map(|&(loan, _)| loan).collect());
320                    ctx.potential_subset_errors = Some(potential_subset_errors);
321
322                    datafrog_opt::compute(&ctx, &mut result)
323                }
324            }
325            Algorithm::Compare => {
326                // Ensure the `Naive` and `DatafrogOpt` errors are the same
327                let (naive_errors, naive_subset_errors) = naive::compute(&ctx, &mut result);
328                let (opt_errors, _) = datafrog_opt::compute(&ctx, &mut result);
329
330                // TODO: compare illegal subset relations errors as well here ?
331
332                let mut naive_errors_by_point = FxHashMap::default();
333                for &(loan, point) in naive_errors.iter() {
334                    naive_errors_by_point
335                        .entry(point)
336                        .or_insert_with(Vec::new)
337                        .push(loan);
338                }
339
340                let mut opt_errors_by_point = FxHashMap::default();
341                for &(loan, point) in opt_errors.iter() {
342                    opt_errors_by_point
343                        .entry(point)
344                        .or_insert_with(Vec::new)
345                        .push(loan);
346                }
347
348                if compare_errors(&naive_errors_by_point, &opt_errors_by_point) {
349                    panic!(concat!(
350                        "The errors reported by the naive algorithm differ from ",
351                        "the errors reported by the optimized algorithm. ",
352                        "See the error log for details."
353                    ));
354                } else {
355                    debug!("Naive and optimized algorithms reported the same errors.");
356                }
357
358                (naive_errors, naive_subset_errors)
359            }
360        };
361
362        // Record illegal access errors
363        for &(loan, location) in errors.iter() {
364            result.errors.entry(location).or_default().push(loan);
365        }
366
367        // Record illegal subset errors
368        for &(origin1, origin2, location) in subset_errors.iter() {
369            result
370                .subset_errors
371                .entry(location)
372                .or_default()
373                .insert((origin1, origin2));
374        }
375
376        // Record more debugging info when asked to do so
377        if dump_enabled {
378            for &(origin, location) in ctx.origin_live_on_entry.iter() {
379                result
380                    .origin_live_on_entry
381                    .entry(location)
382                    .or_default()
383                    .push(origin);
384            }
385
386            for &(origin, loan) in ctx.known_contains.iter() {
387                result
388                    .known_contains
389                    .entry(origin)
390                    .or_default()
391                    .insert(loan);
392            }
393        }
394
395        result
396    }
397
398    /// Computes the transitive closure of the `known_placeholder_subset` relation, so that we have
399    /// the full list of placeholder loans contained by the placeholder origins.
400    fn compute_known_contains(
401        known_placeholder_subset: &Relation<(T::Origin, T::Origin)>,
402        placeholder: &[(T::Origin, T::Loan)],
403    ) -> Relation<(T::Origin, T::Loan)> {
404        let mut iteration = datafrog::Iteration::new();
405        let known_contains = iteration.variable("known_contains");
406
407        // known_contains(Origin1, Loan1) :-
408        //   placeholder(Origin1, Loan1).
409        known_contains.extend(placeholder.iter());
410
411        while iteration.changed() {
412            // known_contains(Origin2, Loan1) :-
413            //   known_contains(Origin1, Loan1),
414            //   known_placeholder_subset(Origin1, Origin2).
415            known_contains.from_join(
416                &known_contains,
417                known_placeholder_subset,
418                |&_origin1, &loan1, &origin2| (origin2, loan1),
419            );
420        }
421
422        known_contains.complete()
423    }
424
425    /// Computes the transitive closure of the `known_placeholder_subset` relation.
426    fn compute_known_placeholder_subset(
427        known_placeholder_subset_base: &Relation<(T::Origin, T::Origin)>,
428    ) -> Relation<(T::Origin, T::Origin)> {
429        use datafrog::{Iteration, RelationLeaper};
430        let mut iteration = Iteration::new();
431
432        let known_placeholder_subset = iteration.variable("known_placeholder_subset");
433
434        // known_placeholder_subset(Origin1, Origin2) :-
435        //   known_placeholder_subset_base(Origin1, Origin2).
436        known_placeholder_subset.extend(known_placeholder_subset_base.iter());
437
438        while iteration.changed() {
439            // known_placeholder_subset(Origin1, Origin3) :-
440            //   known_placeholder_subset(Origin1, Origin2),
441            //   known_placeholder_subset_base(Origin2, Origin3).
442            known_placeholder_subset.from_leapjoin(
443                &known_placeholder_subset,
444                known_placeholder_subset_base.extend_with(|&(_origin1, origin2)| origin2),
445                |&(origin1, _origin2), &origin3| (origin1, origin3),
446            );
447        }
448
449        known_placeholder_subset.complete()
450    }
451
452    fn new(dump_enabled: bool) -> Self {
453        Output {
454            errors: FxHashMap::default(),
455            subset_errors: FxHashMap::default(),
456            dump_enabled,
457            loan_live_at: FxHashMap::default(),
458            origin_contains_loan_at: FxHashMap::default(),
459            origin_contains_loan_anywhere: FxHashMap::default(),
460            origin_live_on_entry: FxHashMap::default(),
461            loan_invalidated_at: FxHashMap::default(),
462            move_errors: FxHashMap::default(),
463            subset: FxHashMap::default(),
464            subset_anywhere: FxHashMap::default(),
465            var_live_on_entry: FxHashMap::default(),
466            var_drop_live_on_entry: FxHashMap::default(),
467            path_maybe_initialized_on_exit: FxHashMap::default(),
468            path_maybe_uninitialized_on_exit: FxHashMap::default(),
469            var_maybe_partly_initialized_on_exit: FxHashMap::default(),
470            known_contains: FxHashMap::default(),
471        }
472    }
473
474    pub fn errors_at(&self, location: T::Point) -> &[T::Loan] {
475        match self.errors.get(&location) {
476            Some(v) => v,
477            None => &[],
478        }
479    }
480
481    pub fn loans_in_scope_at(&self, location: T::Point) -> &[T::Loan] {
482        match self.loan_live_at.get(&location) {
483            Some(p) => p,
484            None => &[],
485        }
486    }
487
488    pub fn origin_contains_loan_at(
489        &self,
490        location: T::Point,
491    ) -> Cow<'_, BTreeMap<T::Origin, BTreeSet<T::Loan>>> {
492        assert!(self.dump_enabled);
493        match self.origin_contains_loan_at.get(&location) {
494            Some(map) => Cow::Borrowed(map),
495            None => Cow::Owned(BTreeMap::default()),
496        }
497    }
498
499    pub fn origins_live_at(&self, location: T::Point) -> &[T::Origin] {
500        assert!(self.dump_enabled);
501        match self.origin_live_on_entry.get(&location) {
502            Some(v) => v,
503            None => &[],
504        }
505    }
506
507    pub fn subsets_at(
508        &self,
509        location: T::Point,
510    ) -> Cow<'_, BTreeMap<T::Origin, BTreeSet<T::Origin>>> {
511        assert!(self.dump_enabled);
512        match self.subset.get(&location) {
513            Some(v) => Cow::Borrowed(v),
514            None => Cow::Owned(BTreeMap::default()),
515        }
516    }
517}
518
519/// Compares errors reported by Naive implementation with the errors
520/// reported by the optimized implementation.
521fn compare_errors<Loan: Atom, Point: Atom>(
522    all_naive_errors: &FxHashMap<Point, Vec<Loan>>,
523    all_opt_errors: &FxHashMap<Point, Vec<Loan>>,
524) -> bool {
525    let points = all_naive_errors.keys().chain(all_opt_errors.keys());
526
527    let mut differ = false;
528    for point in points {
529        let mut naive_errors = all_naive_errors.get(&point).cloned().unwrap_or_default();
530        naive_errors.sort();
531
532        let mut opt_errors = all_opt_errors.get(&point).cloned().unwrap_or_default();
533        opt_errors.sort();
534
535        for err in naive_errors.iter() {
536            if !opt_errors.contains(err) {
537                error!(
538                    "Error {0:?} at {1:?} reported by naive, but not opt.",
539                    err, point
540                );
541                differ = true;
542            }
543        }
544
545        for err in opt_errors.iter() {
546            if !naive_errors.contains(err) {
547                error!(
548                    "Error {0:?} at {1:?} reported by opt, but not naive.",
549                    err, point
550                );
551                differ = true;
552            }
553        }
554    }
555
556    differ
557}
558
559#[cfg(test)]
560mod tests {
561    use super::*;
562
563    impl Atom for usize {
564        fn index(self) -> usize {
565            self
566        }
567    }
568
569    fn compare(
570        errors1: &FxHashMap<usize, Vec<usize>>,
571        errors2: &FxHashMap<usize, Vec<usize>>,
572    ) -> bool {
573        let diff1 = compare_errors(errors1, errors2);
574        let diff2 = compare_errors(errors2, errors1);
575        assert_eq!(diff1, diff2);
576        diff1
577    }
578
579    #[test]
580    fn test_compare_errors() {
581        let empty = FxHashMap::default();
582        assert_eq!(false, compare(&empty, &empty));
583        let mut empty_vec = FxHashMap::default();
584        empty_vec.insert(1, vec![]);
585        empty_vec.insert(2, vec![]);
586        assert_eq!(false, compare(&empty, &empty_vec));
587
588        let mut singleton1 = FxHashMap::default();
589        singleton1.insert(1, vec![10]);
590        assert_eq!(false, compare(&singleton1, &singleton1));
591        let mut singleton2 = FxHashMap::default();
592        singleton2.insert(1, vec![11]);
593        assert_eq!(false, compare(&singleton2, &singleton2));
594        let mut singleton3 = FxHashMap::default();
595        singleton3.insert(2, vec![10]);
596        assert_eq!(false, compare(&singleton3, &singleton3));
597
598        assert_eq!(true, compare(&singleton1, &singleton2));
599        assert_eq!(true, compare(&singleton2, &singleton3));
600        assert_eq!(true, compare(&singleton1, &singleton3));
601
602        assert_eq!(true, compare(&empty, &singleton1));
603        assert_eq!(true, compare(&empty, &singleton2));
604        assert_eq!(true, compare(&empty, &singleton3));
605
606        let mut errors1 = FxHashMap::default();
607        errors1.insert(1, vec![11]);
608        errors1.insert(2, vec![10]);
609        assert_eq!(false, compare(&errors1, &errors1));
610        assert_eq!(true, compare(&errors1, &singleton1));
611        assert_eq!(true, compare(&errors1, &singleton2));
612        assert_eq!(true, compare(&errors1, &singleton3));
613    }
614}