Skip to main content

bayes/decision/
mod.rs

1use nalgebra::*;
2use crate::distr::*;
3use std::default::Default;
4
5/// Error rate, used by the user to obtain an optimized decision
6/// boundary; or to store empirical decision boundaries after
7/// a decision process has been taken. true_pos + false_neg should
8/// sum to one; and true_neg + false_pos should also sum to one.
9pub struct ErrorRate {
10
11    pub true_pos : f64,
12
13    pub true_neg : f64,
14
15    pub false_pos : f64,
16
17    pub false_neg : f64
18}
19
20impl Default for ErrorRate {
21
22    fn default() -> Self {
23        Self {
24            true_pos : 0.5,
25            true_neg : 0.5,
26            false_pos : 0.5,
27            false_neg : 0.5
28        }
29    }
30
31}
32
33/// A decision boundary is the output of an optimized decision process
34/// to select between two alternative probabilistic models, after
35/// considering an error criterion.
36/// The difference in log-probability between a model
37/// and an alternative lies on the real line (where zero means indifference to which model
38/// is best considering that false positives are as bad as false negativas).
39/// The decision boundary is a partition of this line away from zero in either
40/// direction that gives more weight to false positives relative to false negatives.
41///
42/// While useful as a final output of an inference procedure, DecisionBoundary(.) also
43/// implements distribution, and so can be composed with other distributions inside a graph
44/// (behaving as a Bernoulli random varialble that decides if
45/// the left hand branch is best than the right hand branch given the informed error criterion).
46pub struct DecisionBoundary<'a> {
47    // In the sample(.) forward pass through the graph, the samples from the left branch are transformed
48    // via a user-defined function to the fixed Bernoulli parameters, and those parameters are used
49    // to evaluate if the incoming transformed samples from the right branch satisfy the boundary
50    // established by the criterion;
51    // In the log_prob(.) backward pass, the incoming
52    // sample has its log-probability calculated relative to the fixed decision vector (which is passed to the right)
53    // and the fixed decision vector log_probability is passed to the left.
54
55    /// Sample for which this decision
56    _sample : &'a DMatrix<f64>,
57
58    /// Single point over the log-likelihood difference between two models
59    _log_lik : f64,
60
61    _ideal_rate : ErrorRate,
62
63    /// Empirical error rate, after the boundary has been optimized over a sample.
64    /// empirical_rate should be as close as possible to ideal_rate given the sample
65    /// and the pair of models used to make decisions over the sample.
66    _empirical_rate : ErrorRate,
67
68}
69
70impl<'a> DecisionBoundary<'a> {
71
72    /// Creates a new decision boundary over the informed sample,
73    /// by trying to approach the ideal Error Rate as close as possible. If all missing criteria
74    /// are equally important, use ErrorRate::default() (which yields (0.5, 0.5, 0.5, 0.5)).
75    pub fn new(_y : &'a DMatrix<f64>, _ideal : ErrorRate) -> Self {
76        unimplemented!()
77    }
78
79    fn _d_prime() -> f64 {
80        unimplemented!()
81    }
82
83    fn _roc() -> f64 {
84        unimplemented!()
85    }
86
87    /// Returns the actual estimated error rate from the informed sample. This quantity is
88    /// supposed to be as close to
89    fn _error_rate(&'a self) -> ErrorRate {
90        unimplemented!()
91    }
92
93}
94
95/// BayesFactor can be used to compare posteriors to arbitrary analytical
96/// distributions (Null or saturated); or to compare the same posterior
97/// with itself at different values by comparing their conditional log-posteriors.
98/// A peak detection problem, for example, can be formulated
99/// as:
100///
101/// ```rust
102/// // let bf = m1.compare(m2);
103///
104/// // Verify if self is more likely than the alternative relative to the informed sample
105/// // and the default error criterion (indiference to false positives vs. false negatives);
106/// // bf.best(y, default());
107///
108/// // Obtain optimized decision boundary over the log-posterior for the given criterion.
109/// // let bound = bf.optimize(y, crit, true_values);
110/// ```
111pub struct BayesFactor<'a, D, E>
112    where
113        D : Distribution,
114        E : Distribution
115{
116
117    _a : &'a D,
118
119    _b : &'a E,
120
121    _bound : DecisionBoundary<'a>
122}
123
124impl<'a, D,E> BayesFactor<'a, D, E>
125    where
126        D : Distribution,
127        E : Distribution
128{
129
130    /// Decision boundary accepts Default::default() for a standard
131    /// cost to positive/negative errors.
132    pub fn best(
133        &'a self,
134        _y : &'a DMatrix<f64>,
135        _boundary : DecisionBoundary<'a>
136    ) -> bool {
137        unimplemented!()
138    }
139
140    /// Calls self.best(.) iteratevely, changing a scalar that partitions the model log-posterior differences
141    /// until the decisions (taken not at zero, but at the optimized value) match the observed
142    /// 0/1 decision vector as close as possible given the desired criterion (potentially after applying a transformation f to the sample).
143    /// f(.) is any function that maps the potentially continuous outcomes to the 0/1
144    /// domain (this is just identity if the Bernoulli).
145    /// Models for which the output
146    /// is a categorical can define a decision rule as some linear combination of the categories
147    /// (for example, an ordered outcome is a categorical output summed up to the kth element compared
148    /// against all other elements). Models with univariate or multivariate continuous outcomes can
149    /// determine arbitrary step functions of those to yield an output. Future decisions are then not made at
150    /// zero, but at the chosen decision boundary.
151    /// The method also calculates the empirical Error Rates, which
152    /// should be as close to the ideal criterion informed by the user as possible. The lifetime of the
153    /// boundary becomes tied to the lifetime of the sample used to calculate it.
154    pub fn optimize(
155        &'a self,
156        _y : DMatrix<f64>,
157        _criterion : ErrorRate,
158        _outcomes : DVector<f64>,
159        _f : &'a dyn Fn(DMatrix<f64>)->DVector<f64>
160    ) -> DecisionBoundary<'a> {
161        unimplemented!()
162    }
163
164    pub fn new(_a : &'a D, _b : &'a E) -> Self {
165        unimplemented!()
166    }
167}
168
169