smartnoise_validator 0.1.2

A library for validating whether or not an analysis is differentially private.
syntax = "proto3";

package smartnoise;

import "value.proto";
import "components.proto";

message Error {
    string message = 1;
}

message Analysis {
    PrivacyDefinition privacy_definition = 1;
    ComputationGraph computation_graph = 2;
}

// The definition of privacy determines parameters for sensitivity derivations and the set of available algorithms.
message PrivacyDefinition {
    // Privacy leakage with respect `group_size` number of rows. This is typically one.
    uint32 group_size = 1;

    enum Neighboring {
        SUBSTITUTE = 0;
        ADD_REMOVE = 1;
    }
    // Define the kind of perturbation that may be applied to a dataset to create a neighboring dataset.
    Neighboring neighboring = 2;

    // enable to reject the use of algorithms using delta when n is not known
    // enable to reject the use of algorithms when some soft violations of assumptions are observed
    // - epsilon greater than one with the gaussian mechanism
    bool strict_parameter_checks = 3;

    // enable for tighter bounds checking to prevent leaks via overflow/underflow
    bool protect_overflow = 4;
    // enable if side-channel elapsed execution time is considered part of the release
    bool protect_elapsed_time = 5;
    // enable if side-channel memory usage is considered part of the release
    bool protect_memory_utilization = 6;
    // enable to block mechanisms known to be vulnerable to floating point attacks
    bool protect_floating_point = 7;
}

message ComputationGraph {
    map<uint32, Component> value = 1;
}

message Release {
    map<uint32, ReleaseNode> values = 1;
}

enum FilterLevel {
    // release from runtime should include public data (either literals or sanitized data)
    PUBLIC = 0;
    // release from runtime should include public and prior known values
    PUBLIC_AND_PRIOR = 1;
    // release from runtime should include evaluations from all nodes
    ALL = 2;
}

// derived properties for the top-level Value type
message ValueProperties {
    oneof variant {
        DataframeProperties dataframe = 1;
        PartitionsProperties partitions = 2;
        ArrayProperties array = 3;
        JaggedProperties jagged = 4;
        FunctionProperties function = 5;
    }
}

message ArgumentProperties {
    repeated IndexKey keys = 1;
    repeated ValueProperties values = 2;
}

message DataframeProperties {
    repeated IndexKey keys = 1;
    repeated ValueProperties values = 2;
}

message PartitionsProperties {
    repeated IndexKey keys = 1;
    repeated ValueProperties values = 2;
}


// sub-properties for Value::* types that may be aggregated
message AggregatorProperties {
    Component component = 1;
    ArgumentProperties properties = 2;
    Value lipschitz_constants = 3;
}

/// derived properties for the Value::Array
///    a homogeneously-typed (0, 1, 2)-dimensional array
message ArrayProperties {

    /// length of axis zero. May be unknown
    I64Null num_records = 1;

    /// length of axis one. If dimensionality is one, then one. May be unknown
    I64Null num_columns = 2;

    /// true if data may contain null values
    bool nullity = 3;

    /// number of records one individual may influence
    uint32 c_stability = 4;

    /// description of the aggregation that has been applied to the data
    /// used to help compute sensitivity in the mechanisms
    AggregatorProperties aggregator = 5;

    /// atomic type
    DataType data_type = 6;

    /// true if the data has been sanitized
    bool releasable = 7;

    /// node_id of the dataset this observation originated from
    /// used to check for conformability, is erased upon resize, is reset upon filter
    I64Null dataset_id = 8;

    /// true if the row length is known to be greater than zero
    bool is_not_empty = 9;

    /// number of axes in the array
    I64Null dimensionality = 10;

    /// used for tracking subpartitions
    repeated GroupId group_id = 11;

    oneof nature {
        /// numerical bounds of each column
        NatureContinuous continuous = 100;
        /// categories of each column
        NatureCategorical categorical = 101;
    }

    // true if row ordering has not changed
    bool naturally_ordered = 12;

    // proportion of records this array contains sampled from the original dataset
    F64Null sample_proportion = 13;

    // useful to reference an intermediate calculation
    uint32 node_id = 14;
}

message NatureContinuous {
    Array1dNull minimum = 1;
    Array1dNull maximum = 2;
}

message NatureCategorical {
    Jagged categories = 1;
}

message GroupId {
    /// node id of partition
    uint32 partition_id = 1;
    /// indexes referenced in the partition
    IndexKey index = 2;
}

/// derived properties for the Value::Jagged type
///   a homogeneously-typed vector of vectors
///   each vector represents a column
message JaggedProperties {

    /// number of records per column
    Array1dI64 num_records = 1;

    /// true if the data may contain null values
    bool nullity = 2;

    /// description of the aggregation that has been applied to the data
    /// used to help compute sensitivity in the mechanisms
    AggregatorProperties aggregator = 3;

    /// atomic type
    DataType data_type = 4;

    /// true if the data has been sanitized
    bool releasable = 5;

    oneof nature {
        /// numerical bounds of each column
        NatureContinuous continuous = 100;
        /// categories of each column
        NatureCategorical categorical = 101;
    }
}

// derived properties for the Value::Function type
message FunctionProperties {
    bool releasable = 1;
}

// properties for each node on a graph
message GraphProperties {
    map<uint32, ValueProperties> properties = 1;
    repeated Error warnings = 2;
}

message Accuracies {
    repeated Accuracy values = 1;
}
message Accuracy {
    double value = 1;
    double alpha = 2;
}

message ComponentExpansion {
    map<uint32, Component> computation_graph = 1;
    map<uint32, ValueProperties> properties = 2;
    map<uint32, ReleaseNode> releases = 3;
    repeated uint32 traversal = 4;
    repeated Error warnings = 5;
}

// literals
message Value {
    oneof data {
        // bytes bytes = 1;

        // N-dimensional homogeneously typed array
        Array array = 2;

        // Key-Value pairs
        Dataframe dataframe = 3;
        Partitions partitions = 4;

        // Data structure with mixed column lengths
        Jagged jagged = 5;

        // Evaluable function
        Function function = 6;
    }
}

message Dataframe {
    repeated IndexKey keys = 1;
    repeated Value values = 2;
}

message Partitions {
    repeated IndexKey keys = 1;
    repeated Value values = 2;
}

message Function {
    ComputationGraph computation_graph = 1;
    Release release = 2;
    map<string, uint32> arguments = 3;
    map<string, uint32> outputs = 4;
}

message ReleaseNode {
    Value value = 1;
    PrivacyUsages privacy_usages = 2;
    bool public = 3;
}


message IndexmapReleaseNode {
    repeated IndexKey keys = 1;
    repeated ReleaseNode values = 2;
}