libpep 0.12.0

Library for polymorphic encryption and pseudonymization
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
//! Python bindings for PEP JSON encryption.

#[cfg(all(feature = "insecure", feature = "offline"))]
use crate::client::decrypt_global;
#[cfg(feature = "offline")]
use crate::client::encrypt_global;
use crate::data::json::builder::PEPJSONBuilder;
use crate::data::json::data::{EncryptedPEPJSONValue, PEPJSONValue};
use crate::data::json::structure::JSONStructure;
use crate::data::json::utils;
use crate::data::traits::Transcryptable;
use crate::factors::py::contexts::{
    PyEncryptionContext, PyPseudonymizationDomain, PyTranscryptionInfo,
};
use crate::factors::TranscryptionInfo;
#[cfg(feature = "offline")]
use crate::keys::py::types::PyGlobalPublicKeys;
#[cfg(all(feature = "insecure", feature = "offline"))]
use crate::keys::py::types::PyGlobalSecretKeys;
use crate::keys::py::types::{PyEncryptionSecret, PyPseudonymizationSecret};
#[cfg(feature = "offline")]
use crate::keys::GlobalPublicKeys;
#[cfg(all(feature = "insecure", feature = "offline"))]
use crate::keys::GlobalSecretKeys;
#[cfg(feature = "batch")]
use crate::transcryptor::transcrypt_batch;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyAny, PyDict, PyList};
use serde_json::Value;

/// A PEP JSON value that can be encrypted.
///
/// This wraps JSON values where primitive types are stored as unencrypted PEP types.
#[pyclass(name = "PEPJSONValue", from_py_object)]
#[derive(Clone)]
pub struct PyPEPJSONValue(pub(crate) PEPJSONValue);

#[pymethods]
impl PyPEPJSONValue {
    /// Create a PEPJSONValue from a regular Python object.
    ///
    /// Args:
    ///     value: A JSON-serializable Python object
    ///
    /// Returns:
    ///     A PEPJSONValue
    #[staticmethod]
    #[pyo3(name = "from_value")]
    fn from_value(value: &Bound<PyAny>) -> PyResult<Self> {
        let json_value = python_to_json(value)?;
        Ok(Self(PEPJSONValue::from_value(&json_value)))
    }

    /// Convert this PEPJSONValue to a regular Python object.
    ///
    /// Returns:
    ///     A Python object (dict, list, str, int, float, bool, or None)
    #[pyo3(name = "to_json")]
    fn to_json(&self) -> PyResult<Py<PyAny>> {
        let json_value = self
            .0
            .to_value()
            .map_err(|e| PyValueError::new_err(format!("Conversion failed: {}", e)))?;
        Python::attach(|py| json_to_python(py, &json_value))
    }

    /// Get the structure/shape of this PEPJSONValue.
    ///
    /// Returns:
    ///     A JSONStructure describing the shape
    #[pyo3(name = "structure")]
    fn structure(&self) -> PyJSONStructure {
        PyJSONStructure(self.0.structure())
    }

    /// Pads this PEPJSONValue to match a target structure by adding external padding blocks.
    ///
    /// This method adds external padding blocks (separate from PKCS#7 padding) to
    /// LongString and LongPseudonym variants to ensure all instances have the same
    /// number of blocks when encrypted. This is necessary for batch transcryption where
    /// all values must have identical structure.
    ///
    /// Args:
    ///     structure: The target structure specifying the number of blocks for each field
    ///
    /// Returns:
    ///     A padded PEPJSONValue with padding blocks added where necessary
    ///
    /// Raises:
    ///     ValueError: If the current structure doesn't match the target structure type
    ///                 or if the current size exceeds the target size
    #[pyo3(name = "pad_to")]
    fn pad_to(&self, structure: &PyJSONStructure) -> PyResult<Self> {
        self.0
            .pad_to(&structure.0)
            .map(Self)
            .map_err(|e| PyValueError::new_err(format!("Padding failed: {}", e)))
    }
}

/// An encrypted PEP JSON value.
///
/// This wraps JSON values where primitive types are encrypted as PEP types.
#[pyclass(name = "EncryptedPEPJSONValue", from_py_object)]
#[derive(Clone)]
pub struct PyEncryptedPEPJSONValue(pub(crate) EncryptedPEPJSONValue);

#[pymethods]
impl PyEncryptedPEPJSONValue {
    /// Get the structure/shape of this EncryptedPEPJSONValue.
    ///
    /// Returns:
    ///     A JSONStructure describing the shape
    #[pyo3(name = "structure")]
    fn structure(&self) -> PyJSONStructure {
        PyJSONStructure(self.0.structure())
    }

    /// Transcrypt this EncryptedPEPJSONValue from one context to another.
    ///
    /// Args:
    ///     from_domain: Source pseudonymization domain
    ///     to_domain: Target pseudonymization domain
    ///     from_session: Source encryption session
    ///     to_session: Target encryption session
    ///     pseudonymization_secret: Pseudonymization secret
    ///     encryption_secret: Encryption secret
    ///
    /// Returns:
    ///     A transcrypted EncryptedPEPJSONValue
    #[pyo3(name = "transcrypt")]
    fn transcrypt(
        &self,
        from_domain: &PyPseudonymizationDomain,
        to_domain: &PyPseudonymizationDomain,
        from_session: &PyEncryptionContext,
        to_session: &PyEncryptionContext,
        pseudonymization_secret: &PyPseudonymizationSecret,
        encryption_secret: &PyEncryptionSecret,
    ) -> PyResult<Self> {
        let transcryption_info = TranscryptionInfo::new(
            &from_domain.0,
            &to_domain.0,
            &from_session.0,
            &to_session.0,
            &pseudonymization_secret.0,
            &encryption_secret.0,
        );

        let transcrypted = self.0.transcrypt(&transcryption_info);
        Ok(Self(transcrypted))
    }

    /// Serialize to JSON string.
    ///
    /// Returns:
    ///     A JSON string representation
    #[pyo3(name = "to_json")]
    fn to_json(&self) -> PyResult<String> {
        serde_json::to_string(&self.0)
            .map_err(|e| PyValueError::new_err(format!("Serialization failed: {}", e)))
    }

    /// Deserialize from JSON string.
    ///
    /// Args:
    ///     json_str: A JSON string
    ///
    /// Returns:
    ///     An EncryptedPEPJSONValue
    #[staticmethod]
    #[pyo3(name = "from_json")]
    fn from_json(json_str: &str) -> PyResult<Self> {
        let value: EncryptedPEPJSONValue = serde_json::from_str(json_str)
            .map_err(|e| PyValueError::new_err(format!("Deserialization failed: {}", e)))?;
        Ok(Self(value))
    }
}

/// A JSON structure descriptor that describes the shape of an EncryptedPEPJSONValue.
#[pyclass(name = "JSONStructure", from_py_object)]
#[derive(Clone)]
pub struct PyJSONStructure(pub(crate) JSONStructure);

#[pymethods]
impl PyJSONStructure {
    /// Convert to a human-readable string.
    fn __repr__(&self) -> String {
        format!("{:?}", self.0)
    }

    /// Compare two structures for equality.
    fn __eq__(&self, other: &Self) -> bool {
        self.0 == other.0
    }

    /// Serialize to JSON string.
    ///
    /// Returns:
    ///     A JSON string representation
    #[pyo3(name = "to_json")]
    fn to_json(&self) -> PyResult<String> {
        serde_json::to_string(&self.0)
            .map_err(|e| PyValueError::new_err(format!("Serialization failed: {}", e)))
    }
}

/// Builder for constructing PEPJSONValue objects with mixed attribute and pseudonym fields.
#[pyclass(name = "PEPJSONBuilder", skip_from_py_object)]
pub struct PyPEPJSONBuilder {
    builder: PEPJSONBuilder,
}

#[pymethods]
impl PyPEPJSONBuilder {
    /// Create a new builder.
    #[new]
    fn new() -> Self {
        Self {
            builder: PEPJSONBuilder::new(),
        }
    }

    /// Create a builder from a JSON object (dict), marking specified fields as pseudonyms.
    ///
    /// Args:
    ///     value: A Python dict or JSON-serializable object
    ///     pseudonyms: A list of field names that should be treated as pseudonyms
    ///
    /// Returns:
    ///     A PEPJSONBuilder
    #[staticmethod]
    #[pyo3(name = "from_json")]
    fn from_json(value: &Bound<PyAny>, pseudonyms: Vec<String>) -> PyResult<Self> {
        let json_value = python_to_json(value)?;
        let pseudonym_refs: Vec<&str> = pseudonyms.iter().map(|s| s.as_str()).collect();
        let builder = PEPJSONBuilder::from_json(&json_value, &pseudonym_refs).ok_or_else(|| {
            PyValueError::new_err("Invalid object or pseudonym field not a string")
        })?;
        Ok(Self { builder })
    }

    /// Add a field as a regular attribute.
    ///
    /// Args:
    ///     key: Field name
    ///     value: Field value (any JSON-serializable Python object)
    ///
    /// Returns:
    ///     Self for method chaining
    #[pyo3(name = "attribute")]
    fn attribute<'py>(
        slf: Bound<'py, Self>,
        key: &str,
        value: &Bound<'py, PyAny>,
    ) -> PyResult<Bound<'py, Self>> {
        let json_value = python_to_json(value)?;
        let mut borrow = slf.borrow_mut();
        borrow.builder = std::mem::take(&mut borrow.builder).attribute(key, json_value);
        drop(borrow);
        Ok(slf)
    }

    /// Add a string field as a pseudonym.
    ///
    /// Args:
    ///     key: Field name
    ///     value: String value
    ///
    /// Returns:
    ///     Self for method chaining
    #[pyo3(name = "pseudonym")]
    fn pseudonym<'py>(slf: Bound<'py, Self>, key: &str, value: &str) -> PyResult<Bound<'py, Self>> {
        let mut borrow = slf.borrow_mut();
        borrow.builder = std::mem::take(&mut borrow.builder).pseudonym(key, value);
        drop(borrow);
        Ok(slf)
    }

    /// Build the final PEPJSONValue object.
    ///
    /// Returns:
    ///     A PEPJSONValue
    #[pyo3(name = "build")]
    fn build(&mut self) -> PyPEPJSONValue {
        let builder = std::mem::take(&mut self.builder);
        PyPEPJSONValue(builder.build())
    }
}

/// Transcrypt a batch of EncryptedPEPJSONValues and shuffle their order.
///
/// Args:
///     values: List of EncryptedPEPJSONValue objects
///     transcryption_info: TranscryptionInfo object containing domains, sessions, and secrets
///
/// Returns:
///     A shuffled list of transcrypted EncryptedPEPJSONValue objects
#[cfg(feature = "batch")]
#[pyfunction]
#[pyo3(name = "transcrypt_batch")]
pub fn py_transcrypt_batch(
    values: Vec<PyEncryptedPEPJSONValue>,
    transcryption_info: &PyTranscryptionInfo,
) -> PyResult<Vec<PyEncryptedPEPJSONValue>> {
    let mut rng = rand::rng();
    let mut rust_values: Vec<EncryptedPEPJSONValue> = values.into_iter().map(|v| v.0).collect();
    let info: TranscryptionInfo = transcryption_info.into();
    let transcrypted = transcrypt_batch(&mut rust_values, &info, &mut rng)
        .map_err(|e| PyValueError::new_err(format!("Batch transcryption failed: {}", e)))?;

    Ok(transcrypted
        .into_vec()
        .into_iter()
        .map(PyEncryptedPEPJSONValue)
        .collect())
}

/// Transcrypt a batch of EncryptedPEPJSONValues using a TranscryptionInfo object.
///
/// This is a simpler version that accepts a PyTranscryptionInfo.
#[cfg(feature = "batch")]
#[pyfunction]
#[pyo3(name = "transcrypt_json_batch")]
pub fn py_transcrypt_json_batch(
    values: Vec<PyEncryptedPEPJSONValue>,
    transcryption_info: &PyTranscryptionInfo,
) -> PyResult<Vec<PyEncryptedPEPJSONValue>> {
    let mut rng = rand::rng();
    let mut rust_values: Vec<EncryptedPEPJSONValue> = values.into_iter().map(|v| v.0).collect();
    let info: TranscryptionInfo = transcryption_info.into();
    let transcrypted = transcrypt_batch(&mut rust_values, &info, &mut rng)
        .map_err(|e| PyValueError::new_err(format!("Batch transcryption failed: {}", e)))?;

    Ok(transcrypted
        .into_vec()
        .into_iter()
        .map(PyEncryptedPEPJSONValue)
        .collect())
}

// Helper functions to convert between Python and serde_json::Value

fn python_to_json(value: &Bound<PyAny>) -> PyResult<Value> {
    if value.is_none() {
        Ok(Value::Null)
    } else if let Ok(b) = value.extract::<bool>() {
        Ok(Value::Bool(b))
    } else if let Ok(i) = value.extract::<i64>() {
        Ok(Value::Number(i.into()))
    } else if let Ok(f) = value.extract::<f64>() {
        Ok(serde_json::Number::from_f64(f)
            .map(Value::Number)
            .unwrap_or(Value::Null))
    } else if let Ok(s) = value.extract::<String>() {
        Ok(Value::String(s))
    } else if let Ok(list) = value.cast::<PyList>() {
        let mut arr = Vec::new();
        for item in list.iter() {
            arr.push(python_to_json(&item)?);
        }
        Ok(Value::Array(arr))
    } else if let Ok(dict) = value.cast::<PyDict>() {
        let mut obj = serde_json::Map::new();
        for (key, val) in dict.iter() {
            let key_str = key.extract::<String>()?;
            obj.insert(key_str, python_to_json(&val)?);
        }
        Ok(Value::Object(obj))
    } else {
        Err(PyValueError::new_err(
            "Unsupported Python type for JSON conversion",
        ))
    }
}

pub(crate) fn json_to_python(py: Python, value: &Value) -> PyResult<Py<PyAny>> {
    match value {
        Value::Null => Ok(py.None()),
        Value::Bool(b) => {
            let bound = (*b).into_pyobject(py)?;
            Ok(bound.as_any().clone().unbind())
        }
        Value::Number(n) => {
            if let Some(i) = n.as_i64() {
                let bound = i.into_pyobject(py)?;
                Ok(bound.as_any().clone().unbind())
            } else if let Some(u) = n.as_u64() {
                let bound = u.into_pyobject(py)?;
                Ok(bound.as_any().clone().unbind())
            } else if let Some(f) = n.as_f64() {
                let bound = f.into_pyobject(py)?;
                Ok(bound.as_any().clone().unbind())
            } else {
                Err(PyValueError::new_err("Invalid number"))
            }
        }
        Value::String(s) => {
            let bound = s.as_str().into_pyobject(py)?;
            Ok(bound.as_any().clone().unbind())
        }
        Value::Array(arr) => {
            let list = PyList::empty(py);
            for item in arr {
                list.append(json_to_python(py, item)?)?;
            }
            Ok(list.into())
        }
        Value::Object(obj) => {
            let dict = PyDict::new(py);
            for (key, val) in obj {
                dict.set_item(key, json_to_python(py, val)?)?;
            }
            Ok(dict.into())
        }
    }
}

/// Encrypt a PEPJSONValue using global public keys.
/// Can be used when encryption happens offline and no session key is available, or when using
/// a session key may leak information.
#[cfg(feature = "offline")]
#[pyfunction]
#[pyo3(name = "encrypt_global")]
pub fn py_encrypt_global(
    value: &PyPEPJSONValue,
    global_keys: &PyGlobalPublicKeys,
) -> PyEncryptedPEPJSONValue {
    let mut rng = rand::rng();
    let keys = GlobalPublicKeys {
        pseudonym: global_keys.pseudonym.0 .0.into(),
        attribute: global_keys.attribute.0 .0.into(),
    };
    PyEncryptedPEPJSONValue(encrypt_global(&value.0, &keys, &mut rng))
}

/// Decrypt an EncryptedPEPJSONValue using global secret keys.
/// Note: For most applications, the global secret key should be discarded and thus never exist.
#[cfg(all(feature = "insecure", feature = "offline"))]
#[pyfunction]
#[pyo3(name = "decrypt_global")]
pub fn py_decrypt_global(
    encrypted: &PyEncryptedPEPJSONValue,
    global_secret_keys: &PyGlobalSecretKeys,
) -> PyResult<PyPEPJSONValue> {
    let keys = GlobalSecretKeys {
        pseudonym: global_secret_keys.pseudonym.0 .0.into(),
        attribute: global_secret_keys.attribute.0 .0.into(),
    };
    #[cfg(feature = "elgamal3")]
    let decrypted = decrypt_global(&encrypted.0, &keys)
        .ok_or_else(|| PyValueError::new_err("Decryption failed: key mismatch"))?;
    #[cfg(not(feature = "elgamal3"))]
    let decrypted = decrypt_global(&encrypted.0, &keys);
    Ok(PyPEPJSONValue(decrypted))
}

// JSON utility functions

/// Convert a boolean to a single byte (0x00 for false, 0x01 for true).
#[pyfunction]
#[pyo3(name = "bool_to_byte")]
pub fn py_bool_to_byte(b: bool) -> u8 {
    utils::bool_to_byte(b)
}

/// Convert a byte to a boolean. Returns an error if the byte is neither 0x00 nor 0x01.
#[pyfunction]
#[pyo3(name = "byte_to_bool")]
pub fn py_byte_to_bool(byte: u8) -> PyResult<bool> {
    utils::byte_to_bool(byte).map_err(|e| PyValueError::new_err(e.to_string()))
}

/// Convert a JSON number to bytes (9 bytes: 1 byte type tag + 8 bytes data).
#[pyfunction]
#[pyo3(name = "number_to_bytes")]
pub fn py_number_to_bytes(n: f64) -> [u8; 9] {
    let num = serde_json::Number::from_f64(n).unwrap_or(serde_json::Number::from(0));
    utils::number_to_bytes(&num)
}

/// Convert bytes to a JSON number (9 bytes: 1 byte type tag + 8 bytes data).
#[pyfunction]
#[pyo3(name = "bytes_to_number")]
pub fn py_bytes_to_number(bytes: [u8; 9]) -> f64 {
    let num = utils::bytes_to_number(&bytes);
    num.as_f64().unwrap_or(0.0)
}

/// Unifies multiple JSON structures by taking the maximum block count for each field.
///
/// This function is useful for batch operations where you need to normalize multiple
/// values to have the same structure. It recursively unifies nested structures,
/// taking the maximum block count for strings and pseudonyms.
///
/// Args:
///     structures: A list of JSONStructure objects to unify
///
/// Returns:
///     A unified JSONStructure where string and pseudonym fields have maximum block counts
///
/// Raises:
///     ValueError: If the structures are incompatible (different types, array lengths, or object fields)
#[pyfunction]
#[pyo3(name = "unify_structures")]
pub fn py_unify_structures(structures: Vec<PyJSONStructure>) -> PyResult<PyJSONStructure> {
    let rust_structures: Vec<JSONStructure> = structures.into_iter().map(|s| s.0).collect();
    crate::data::json::structure::unify_structures(&rust_structures)
        .map(PyJSONStructure)
        .map_err(|e| PyValueError::new_err(format!("Unification failed: {}", e)))
}

pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> {
    // Register main JSON types at json module level
    m.add_class::<PyPEPJSONValue>()?;
    m.add_class::<PyEncryptedPEPJSONValue>()?;
    m.add_class::<PyJSONStructure>()?;
    m.add_class::<PyPEPJSONBuilder>()?;

    // Batch transcryption functions
    #[cfg(feature = "batch")]
    {
        m.add_function(wrap_pyfunction!(py_transcrypt_batch, m)?)?;
        m.add_function(wrap_pyfunction!(py_transcrypt_json_batch, m)?)?;
    }

    // Global key functions (offline feature)
    #[cfg(feature = "offline")]
    m.add_function(wrap_pyfunction!(py_encrypt_global, m)?)?;
    #[cfg(all(feature = "insecure", feature = "offline"))]
    m.add_function(wrap_pyfunction!(py_decrypt_global, m)?)?;

    // JSON utility functions
    m.add_function(wrap_pyfunction!(py_bool_to_byte, m)?)?;
    m.add_function(wrap_pyfunction!(py_byte_to_bool, m)?)?;
    m.add_function(wrap_pyfunction!(py_number_to_bytes, m)?)?;
    m.add_function(wrap_pyfunction!(py_bytes_to_number, m)?)?;
    m.add_function(wrap_pyfunction!(py_unify_structures, m)?)?;

    Ok(())
}