opendp/measures/mod.rs
1//! Various definitions of Measures (and associated Distances).
2//!
3//! A Privacy Measure is used to measure the distance between distributions.
4//! The distance is expressed in terms of an **associated type**.
5
6#[cfg(feature = "ffi")]
7pub(crate) mod ffi;
8
9use std::{cmp::Ordering, fmt::Debug, sync::Arc};
10
11use crate::{
12 core::{Function, Measure},
13 error::Fallible,
14};
15
16/// Privacy measure used to define $\epsilon$-pure differential privacy.
17///
18/// In the following proof definition, $d$ corresponds to $\epsilon$ when also quantified over all adjacent datasets.
19/// That is, $\epsilon$ is the greatest possible $d$
20/// over all pairs of adjacent datasets $x, x'$ where $Y \sim M(x)$, $Y' \sim M(x')$.
21/// $M(\cdot)$ is a measurement (commonly known as a mechanism).
22/// The measurement's input metric defines the notion of adjacency,
23/// and the measurement's input domain defines the set of possible datasets.
24///
25/// # Proof Definition
26///
27/// ### `d`-closeness
28///
29/// For any two distributions $Y, Y'$ and any non-negative $d$,
30/// $Y, Y'$ are $d$-close under the max divergence measure whenever
31///
32/// ```math
33/// D_\infty(Y, Y') = \max_{S \subseteq \textrm{Supp}(Y)} \Big[\ln \dfrac{\Pr[Y \in S]}{\Pr[Y' \in S]} \Big] \leq d.
34/// ```
35#[derive(Default, Clone, Debug, PartialEq)]
36pub struct MaxDivergence;
37
38impl Measure for MaxDivergence {
39 type Distance = f64;
40}
41
42/// Privacy measure used to define $\delta(\epsilon)$-approximate differential privacy.
43///
44/// In the following proof definition, $d$ corresponds to a privacy profile when also quantified over all adjacent datasets.
45/// That is, a privacy profile $\delta(\epsilon)$ is no smaller than $d(\epsilon)$ for all possible choices of $\epsilon$,
46/// and over all pairs of adjacent datasets $x, x'$ where $Y \sim M(x)$, $Y' \sim M(x')$.
47/// $M(\cdot)$ is a measurement (commonly known as a mechanism).
48/// The measurement's input metric defines the notion of adjacency,
49/// and the measurement's input domain defines the set of possible datasets.
50///
51/// The distance $d$ is of type [`PrivacyProfile`], so it can be invoked with an $\epsilon$
52/// to retrieve the corresponding $\delta$.
53///
54/// # Proof Definition
55///
56/// ### `d`-closeness
57///
58/// For any two distributions $Y, Y'$ and any curve $d(\cdot)$,
59/// $Y, Y'$ are $d$-close under the smoothed max divergence measure whenever,
60/// for any choice of non-negative $\epsilon$, and $\delta = d(\epsilon)$,
61///
62/// ```math
63/// D_\infty^\delta(Y, Y') = \max_{S \subseteq \textrm{Supp}(Y)} \Big[\ln \dfrac{\Pr[Y \in S] + \delta}{\Pr[Y' \in S]} \Big] \leq \epsilon.
64/// ```
65///
66/// Note that $\epsilon$ and $\delta$ are not privacy parameters $\epsilon$ and $\delta$ until quantified over all adjacent datasets,
67/// as is done in the definition of a measurement.
68#[derive(Default, Clone, Debug, PartialEq)]
69pub struct SmoothedMaxDivergence;
70
71impl Measure for SmoothedMaxDivergence {
72 type Distance = PrivacyProfile;
73}
74
75/// A function mapping from $\epsilon$ to $\delta$
76///
77/// This is the distance type for [`SmoothedMaxDivergence`].
78#[derive(Clone)]
79pub struct PrivacyProfile(Arc<dyn Fn(f64) -> Fallible<f64> + Send + Sync>);
80
81impl PrivacyProfile {
82 pub fn new(delta: impl Fn(f64) -> Fallible<f64> + 'static + Send + Sync) -> Self {
83 PrivacyProfile(Arc::new(delta))
84 }
85
86 pub fn epsilon(&self, delta: f64) -> Fallible<f64> {
87 // reject negative zero
88 if delta.is_sign_negative() {
89 return fallible!(FailedMap, "delta ({}) must not be negative", delta);
90 }
91
92 if !(0.0..=1.0).contains(&delta) {
93 return fallible!(FailedMap, "delta ({}) must be between zero and one", delta);
94 }
95
96 if delta == 1.0 {
97 return Ok(0.0);
98 }
99
100 self.epsilon_unchecked(delta)
101 }
102
103 pub(crate) fn epsilon_unchecked(&self, delta: f64) -> Fallible<f64> {
104 let mut e_min: f64 = 0.0;
105 let mut e_max: f64 = 2.0;
106 while self.delta(e_max)? > delta {
107 e_max *= e_max;
108 if e_max.is_infinite() {
109 // For mechanisms that are not pureDP, e_max will be infinity.
110 // This loop can be very long running.
111 return Ok(f64::INFINITY);
112 }
113 }
114
115 // delta(e_max) <= delta <= delta(e_min) -> always holds
116 // We always try to find the smallest e that minimizes |delta(e) - delta| and enforces delta(e) <= delta
117 // -> if delta == delta(e_min), we can pick e_min, otherwise we have to take e_max
118 // same as -> if e
119 // For delta == 1.0, we find the largest e that gives delta(e) == 1.0
120 // (so as not to create a discontinuity and go to zero.)
121 let mut e_mid = e_min;
122 loop {
123 let new_mid = e_min + ((e_max - e_min) / 2.0);
124
125 // converge when midpoint doesn't change
126 if new_mid == e_mid {
127 if delta == 1. {
128 return Ok(e_max);
129 }
130
131 return Ok(if delta == self.delta(e_min)? {
132 e_min
133 } else {
134 e_max
135 });
136 }
137
138 e_mid = new_mid;
139
140 // get delta corresponding to e_mid
141 let d_mid: f64 = self.delta(e_mid)?;
142 match d_mid.partial_cmp(&delta) {
143 Some(Ordering::Greater) => e_min = e_mid,
144 Some(Ordering::Less) => e_max = e_mid,
145 Some(Ordering::Equal) => {
146 if delta == 1. {
147 e_min = e_mid
148 } else {
149 e_max = e_mid
150 }
151 }
152 None => return fallible!(FailedMap, "not comparable"),
153 }
154 }
155 }
156
157 pub fn delta(&self, epsilon: f64) -> Fallible<f64> {
158 (self.0)(epsilon)
159 }
160}
161
162/// Privacy measure used to define $\delta$-approximate PM-differential privacy.
163///
164/// In the following definition, $d$ corresponds to privacy parameters $(d', \delta)$
165/// when also quantified over all adjacent datasets
166/// ($d'$ is the privacy parameter corresponding to privacy measure PM).
167/// That is, $(d', \delta)$ is no smaller than $d$ (by product ordering),
168/// over all pairs of adjacent datasets $x, x'$ where $Y \sim M(x)$, $Y' \sim M(x')$.
169/// $M(\cdot)$ is a measurement (commonly known as a mechanism).
170/// The measurement's input metric defines the notion of adjacency,
171/// and the measurement's input domain defines the set of possible datasets.
172///
173/// # Proof Definition
174///
175/// ### `d`-closeness
176/// For any two distributions $Y, Y'$ and 2-tuple $d = (d', \delta)$,
177/// where $d'$ is the distance with respect to privacy measure PM,
178/// $Y, Y'$ are $d$-close under the approximate PM measure whenever,
179/// for any choice of $\delta \in [0, 1]$,
180/// there exist events $E$ (depending on $Y$) and $E'$ (depending on $Y'$)
181/// such that $\Pr[E] \ge 1 - \delta$, $\Pr[E'] \ge 1 - \delta$, and
182///
183/// ```math
184/// D_{\mathrm{PM}}^\delta(Y|_E, Y'|_{E'}) = D_{\mathrm{PM}}(Y|_E, Y'|_{E'})
185/// ```
186///
187/// where $Y|_E$ denotes the distribution of $Y$ conditioned on the event $E$.
188///
189/// Note that this $\delta$ is not privacy parameter $\delta$ until quantified over all adjacent datasets,
190/// as is done in the definition of a measurement.
191#[derive(Clone, PartialEq, Debug, Default)]
192pub struct Approximate<PM: Measure>(pub PM);
193
194impl<M: Measure> Measure for Approximate<M> {
195 type Distance = (M::Distance, f64);
196}
197
198/// Privacy measure used to define $\rho$-zero concentrated differential privacy.
199///
200/// In the following proof definition, $d$ corresponds to $\rho$ when also quantified over all adjacent datasets.
201/// That is, $\rho$ is the greatest possible $d$
202/// over all pairs of adjacent datasets $x, x'$ where $Y \sim M(x)$, $Y' \sim M(x')$.
203/// $M(\cdot)$ is a measurement (commonly known as a mechanism).
204/// The measurement's input metric defines the notion of adjacency,
205/// and the measurement's input domain defines the set of possible datasets.
206///
207/// # Proof Definition
208///
209/// ### `d`-closeness
210///
211/// For any two distributions $Y, Y'$ and any non-negative $d$,
212/// $Y, Y'$ are $d$-close under the zero-concentrated divergence measure if,
213/// for every possible choice of $\alpha \in (1, \infty)$,
214///
215/// ```math
216/// D_\alpha(Y, Y') = \frac{1}{1 - \alpha} \mathbb{E}_{x \sim Y'} \Big[\ln \left( \dfrac{\Pr[Y = x]}{\Pr[Y' = x]} \right)^\alpha \Big] \leq d \cdot \alpha.
217/// ```
218#[derive(Default, Clone, Debug, PartialEq)]
219pub struct ZeroConcentratedDivergence;
220
221impl Measure for ZeroConcentratedDivergence {
222 type Distance = f64;
223}
224
225/// Privacy measure used to define $\epsilon(\alpha)$-Rényi differential privacy.
226///
227/// In the following proof definition, $d$ corresponds to an RDP curve when also quantified over all adjacent datasets.
228/// That is, an RDP curve $\epsilon(\alpha)$ is no smaller than $d(\alpha)$ for any possible choices of $\alpha$,
229/// and over all pairs of adjacent datasets $x, x'$ where $Y \sim M(x)$, $Y' \sim M(x')$.
230/// $M(\cdot)$ is a measurement (commonly known as a mechanism).
231/// The measurement's input metric defines the notion of adjacency,
232/// and the measurement's input domain defines the set of possible datasets.
233///
234/// # Proof Definition
235///
236/// ### `d`-closeness
237/// For any two distributions $Y, Y'$ and any curve $d$,
238/// $Y, Y'$ are $d$-close under the Rényi divergence measure if,
239/// for any given $\alpha \in (1, \infty)$,
240///
241/// ```math
242/// D_\alpha(Y, Y') = \frac{1}{1 - \alpha} \mathbb{E}_{x \sim Y'} \Big[\ln \left( \dfrac{\Pr[Y = x]}{\Pr[Y' = x]} \right)^\alpha \Big] \leq d(\alpha).
243/// ```
244///
245/// Note that this $\epsilon$ and $\alpha$ are not privacy parameters $\epsilon$ and $\alpha$ until quantified over all adjacent datasets,
246/// as is done in the definition of a measurement.
247#[derive(Default, Clone, Debug, PartialEq)]
248pub struct RenyiDivergence;
249
250impl Measure for RenyiDivergence {
251 type Distance = Function<f64, f64>;
252}