bayes/decision/mod.rs
1use nalgebra::*;
2use crate::distr::*;
3use std::default::Default;
4
5/// Error rate, used by the user to obtain an optimized decision
6/// boundary; or to store empirical decision boundaries after
7/// a decision process has been taken. true_pos + false_neg should
8/// sum to one; and true_neg + false_pos should also sum to one.
9pub struct ErrorRate {
10
11 pub true_pos : f64,
12
13 pub true_neg : f64,
14
15 pub false_pos : f64,
16
17 pub false_neg : f64
18}
19
20impl Default for ErrorRate {
21
22 fn default() -> Self {
23 Self {
24 true_pos : 0.5,
25 true_neg : 0.5,
26 false_pos : 0.5,
27 false_neg : 0.5
28 }
29 }
30
31}
32
33/// A decision boundary is the output of an optimized decision process
34/// to select between two alternative probabilistic models, after
35/// considering an error criterion.
36/// The difference in log-probability between a model
37/// and an alternative lies on the real line (where zero means indifference to which model
38/// is best considering that false positives are as bad as false negativas).
39/// The decision boundary is a partition of this line away from zero in either
40/// direction that gives more weight to false positives relative to false negatives.
41///
42/// While useful as a final output of an inference procedure, DecisionBoundary(.) also
43/// implements distribution, and so can be composed with other distributions inside a graph
44/// (behaving as a Bernoulli random varialble that decides if
45/// the left hand branch is best than the right hand branch given the informed error criterion).
46pub struct DecisionBoundary<'a> {
47 // In the sample(.) forward pass through the graph, the samples from the left branch are transformed
48 // via a user-defined function to the fixed Bernoulli parameters, and those parameters are used
49 // to evaluate if the incoming transformed samples from the right branch satisfy the boundary
50 // established by the criterion;
51 // In the log_prob(.) backward pass, the incoming
52 // sample has its log-probability calculated relative to the fixed decision vector (which is passed to the right)
53 // and the fixed decision vector log_probability is passed to the left.
54
55 /// Sample for which this decision
56 _sample : &'a DMatrix<f64>,
57
58 /// Single point over the log-likelihood difference between two models
59 _log_lik : f64,
60
61 _ideal_rate : ErrorRate,
62
63 /// Empirical error rate, after the boundary has been optimized over a sample.
64 /// empirical_rate should be as close as possible to ideal_rate given the sample
65 /// and the pair of models used to make decisions over the sample.
66 _empirical_rate : ErrorRate,
67
68}
69
70impl<'a> DecisionBoundary<'a> {
71
72 /// Creates a new decision boundary over the informed sample,
73 /// by trying to approach the ideal Error Rate as close as possible. If all missing criteria
74 /// are equally important, use ErrorRate::default() (which yields (0.5, 0.5, 0.5, 0.5)).
75 pub fn new(_y : &'a DMatrix<f64>, _ideal : ErrorRate) -> Self {
76 unimplemented!()
77 }
78
79 fn _d_prime() -> f64 {
80 unimplemented!()
81 }
82
83 fn _roc() -> f64 {
84 unimplemented!()
85 }
86
87 /// Returns the actual estimated error rate from the informed sample. This quantity is
88 /// supposed to be as close to
89 fn _error_rate(&'a self) -> ErrorRate {
90 unimplemented!()
91 }
92
93}
94
95/// BayesFactor can be used to compare posteriors to arbitrary analytical
96/// distributions (Null or saturated); or to compare the same posterior
97/// with itself at different values by comparing their conditional log-posteriors.
98/// A peak detection problem, for example, can be formulated
99/// as:
100///
101/// ```rust
102/// // let bf = m1.compare(m2);
103///
104/// // Verify if self is more likely than the alternative relative to the informed sample
105/// // and the default error criterion (indiference to false positives vs. false negatives);
106/// // bf.best(y, default());
107///
108/// // Obtain optimized decision boundary over the log-posterior for the given criterion.
109/// // let bound = bf.optimize(y, crit, true_values);
110/// ```
111pub struct BayesFactor<'a, D, E>
112 where
113 D : Distribution,
114 E : Distribution
115{
116
117 _a : &'a D,
118
119 _b : &'a E,
120
121 _bound : DecisionBoundary<'a>
122}
123
124impl<'a, D,E> BayesFactor<'a, D, E>
125 where
126 D : Distribution,
127 E : Distribution
128{
129
130 /// Decision boundary accepts Default::default() for a standard
131 /// cost to positive/negative errors.
132 pub fn best(
133 &'a self,
134 _y : &'a DMatrix<f64>,
135 _boundary : DecisionBoundary<'a>
136 ) -> bool {
137 unimplemented!()
138 }
139
140 /// Calls self.best(.) iteratevely, changing a scalar that partitions the model log-posterior differences
141 /// until the decisions (taken not at zero, but at the optimized value) match the observed
142 /// 0/1 decision vector as close as possible given the desired criterion (potentially after applying a transformation f to the sample).
143 /// f(.) is any function that maps the potentially continuous outcomes to the 0/1
144 /// domain (this is just identity if the Bernoulli).
145 /// Models for which the output
146 /// is a categorical can define a decision rule as some linear combination of the categories
147 /// (for example, an ordered outcome is a categorical output summed up to the kth element compared
148 /// against all other elements). Models with univariate or multivariate continuous outcomes can
149 /// determine arbitrary step functions of those to yield an output. Future decisions are then not made at
150 /// zero, but at the chosen decision boundary.
151 /// The method also calculates the empirical Error Rates, which
152 /// should be as close to the ideal criterion informed by the user as possible. The lifetime of the
153 /// boundary becomes tied to the lifetime of the sample used to calculate it.
154 pub fn optimize(
155 &'a self,
156 _y : DMatrix<f64>,
157 _criterion : ErrorRate,
158 _outcomes : DVector<f64>,
159 _f : &'a dyn Fn(DMatrix<f64>)->DVector<f64>
160 ) -> DecisionBoundary<'a> {
161 unimplemented!()
162 }
163
164 pub fn new(_a : &'a D, _b : &'a E) -> Self {
165 unimplemented!()
166 }
167}
168
169