opendp/transformations/manipulation/
mod.rs

1#[cfg(feature = "ffi")]
2mod ffi;
3
4use opendp_derive::bootstrap;
5
6use crate::core::{Domain, Function, Metric, MetricSpace, StabilityMap, Transformation};
7use crate::domains::{AtomDomain, VectorDomain};
8use crate::error::*;
9use crate::metrics::EventLevelMetric;
10use crate::traits::{CheckAtom, CheckNull};
11
12/// A [`Domain`] representing a dataset.
13///
14/// This is distinguished from other domains
15/// because each element in the dataset corresponds to an individual.
16pub trait DatasetDomain: Domain {
17    /// The domain of each element in the dataset.
18    ///
19    /// For vectors, this is the domain of the vector elements,
20    /// for dataframes, this is the domain of the dataframe rows,
21    /// and so on.
22    type ElementDomain: Domain;
23}
24
25impl<D: Domain> DatasetDomain for VectorDomain<D> {
26    type ElementDomain = D;
27}
28
29pub trait RowByRowDomain<DO: DatasetDomain>: DatasetDomain {
30    fn translate(&self, output_row_domain: DO::ElementDomain) -> DO;
31    fn apply_rows(
32        value: &Self::Carrier,
33        row_function: &impl Fn(
34            &<Self::ElementDomain as Domain>::Carrier,
35        ) -> Fallible<<DO::ElementDomain as Domain>::Carrier>,
36    ) -> Fallible<DO::Carrier>;
37}
38
39impl<DIA: Domain, DOA: Domain> RowByRowDomain<VectorDomain<DOA>> for VectorDomain<DIA> {
40    fn translate(
41        &self,
42        output_row_domain: <VectorDomain<DOA> as DatasetDomain>::ElementDomain,
43    ) -> VectorDomain<DOA> {
44        VectorDomain {
45            element_domain: output_row_domain,
46            size: self.size,
47        }
48    }
49
50    fn apply_rows(
51        value: &Self::Carrier,
52        row_function: &impl Fn(&DIA::Carrier) -> Fallible<DOA::Carrier>,
53    ) -> Fallible<Vec<DOA::Carrier>> {
54        value.iter().map(row_function).collect()
55    }
56}
57
58/// Constructs a [`Transformation`] representing an arbitrary row-by-row transformation.
59pub(crate) fn make_row_by_row<DI, DO, M>(
60    input_domain: DI,
61    input_metric: M,
62    output_row_domain: DO::ElementDomain,
63    row_function: impl 'static
64    + Fn(
65        &<DI::ElementDomain as Domain>::Carrier,
66    ) -> <DO::ElementDomain as Domain>::Carrier
67    + Send
68    + Sync,
69) -> Fallible<Transformation<DI, M, DO, M>>
70where
71    DI: RowByRowDomain<DO>,
72    DO: DatasetDomain,
73    M: EventLevelMetric,
74    (DI, M): MetricSpace,
75    (DO, M): MetricSpace,
76{
77    let row_function = move |arg: &<DI::ElementDomain as Domain>::Carrier| Ok(row_function(arg));
78    make_row_by_row_fallible(input_domain, input_metric, output_row_domain, row_function)
79}
80
81/// Constructs a [`Transformation`] representing an arbitrary row-by-row transformation.
82pub(crate) fn make_row_by_row_fallible<DI, DO, M>(
83    input_domain: DI,
84    input_metric: M,
85    output_row_domain: DO::ElementDomain,
86    row_function: impl 'static
87    + Fn(
88        &<DI::ElementDomain as Domain>::Carrier,
89    ) -> Fallible<<DO::ElementDomain as Domain>::Carrier>
90    + Send
91    + Sync,
92) -> Fallible<Transformation<DI, M, DO, M>>
93where
94    DI: RowByRowDomain<DO>,
95    DO: DatasetDomain,
96    M: EventLevelMetric,
97    (DI, M): MetricSpace,
98    (DO, M): MetricSpace,
99{
100    let output_domain = input_domain.translate(output_row_domain);
101    Transformation::new(
102        input_domain,
103        input_metric.clone(),
104        output_domain,
105        input_metric,
106        Function::new_fallible(move |arg: &DI::Carrier| DI::apply_rows(arg, &row_function)),
107        StabilityMap::new_from_constant(1),
108    )
109}
110
111#[bootstrap(
112    features("contrib", "honest-but-curious"),
113    generics(D(suppress), M(suppress))
114)]
115/// Make a Transformation representing the identity function.
116///
117/// WARNING: In Python, this function does not ensure that the domain and metric form a valid metric space.
118/// However, if the domain and metric do not form a valid metric space,
119/// then the resulting Transformation won't be chainable with any valid Transformation,
120/// so it cannot be used to introduce an invalid metric space into a chain of valid Transformations.
121///
122/// # Arguments
123/// * `domain` - Domain of input data
124/// * `metric` - Metric on input domain
125///
126/// # Generics
127/// * `D` - Domain of the identity function. Must be `VectorDomain<AtomDomain<T>>` or `AtomDomain<T>`
128/// * `M` - Metric. Must be a dataset metric if D is a VectorDomain or a sensitivity metric if D is an AtomDomain
129///
130/// # Why honest-but-curious?
131/// For the result to be a valid transformation, the `input_domain` and `input_metric` pairing must form a valid metric space.
132/// For instance, the symmetric distance metric and atom domain do not form a valid metric space,
133/// because the metric cannot be used to measure distances between any two elements of an atom domain.
134/// Whereas, the symmetric distance metric and vector domain,
135/// or absolute distance metric and atom domain on a scalar type, both form valid metric spaces.
136pub fn make_identity<D, M>(domain: D, metric: M) -> Fallible<Transformation<D, M, D, M>>
137where
138    D: Domain,
139    D::Carrier: Clone,
140    M: Metric,
141    M::Distance: Clone,
142    (D, M): MetricSpace,
143{
144    Transformation::new(
145        domain.clone(),
146        metric.clone(),
147        domain,
148        metric,
149        Function::new(|arg: &D::Carrier| arg.clone()),
150        StabilityMap::new(|d_in: &M::Distance| d_in.clone()),
151    )
152}
153
154#[bootstrap(
155    features("contrib"),
156    arguments(
157        input_domain(c_type = "AnyDomain *"),
158        input_metric(c_type = "AnyMetric *")
159    ),
160    generics(TIA(suppress), M(suppress)),
161    derived_types(
162        TIA = "$get_atom(get_type(input_domain))",
163        M = "$get_type(input_metric)"
164    )
165)]
166/// Make a Transformation that checks if each element is equal to `value`.
167///
168/// # Arguments
169/// * `input_domain` - Domain of input data
170/// * `input_metric` - Metric on input domain
171/// * `value` - value to check against
172///
173/// # Generics
174/// * `TIA` - Atomic Input Type. Type of elements in the input vector
175pub fn make_is_equal<TIA, M>(
176    input_domain: VectorDomain<AtomDomain<TIA>>,
177    input_metric: M,
178    value: TIA,
179) -> Fallible<Transformation<VectorDomain<AtomDomain<TIA>>, M, VectorDomain<AtomDomain<bool>>, M>>
180where
181    TIA: 'static + PartialEq + CheckAtom,
182    M: EventLevelMetric,
183    (VectorDomain<AtomDomain<TIA>>, M): MetricSpace,
184    (VectorDomain<AtomDomain<bool>>, M): MetricSpace,
185{
186    make_row_by_row(
187        input_domain,
188        input_metric,
189        AtomDomain::default(),
190        move |v| v == &value,
191    )
192}
193
194#[bootstrap(features("contrib"), generics(M(suppress), DIA(suppress)))]
195/// Make a Transformation that checks if each element in a vector is null or nan.
196///
197/// # Arguments
198/// * `input_domain` - Domain of input data
199/// * `input_metric` - Metric on input domain
200///
201/// # Generics
202/// * `M` - Metric on input domain.
203/// * `DIA` - Atomic Input Domain. Either `OptionDomain<AtomDomain<TIA>>` or `AtomDomain<TIA>`
204pub fn make_is_null<M, DIA>(
205    input_domain: VectorDomain<DIA>,
206    input_metric: M,
207) -> Fallible<Transformation<VectorDomain<DIA>, M, VectorDomain<AtomDomain<bool>>, M>>
208where
209    DIA: Domain,
210    DIA::Carrier: 'static + CheckNull,
211    M: EventLevelMetric,
212    (VectorDomain<DIA>, M): MetricSpace,
213    (VectorDomain<AtomDomain<bool>>, M): MetricSpace,
214{
215    make_row_by_row(input_domain, input_metric, AtomDomain::default(), |v| {
216        v.is_null()
217    })
218}
219
220#[cfg(test)]
221mod test;