Skip to main content

cloakpipe_core/
lib.rs

1//! CloakPipe Core — Detection, pseudonymization, and rehydration engine.
2//!
3//! This crate provides the foundational privacy primitives:
4//! - Multi-layer entity detection (regex, financial, NER, custom rules)
5//! - Consistent pseudonymization with stable entity→token mappings
6//! - Encrypted mapping vault (AES-256-GCM + zeroize)
7//! - Response rehydration (including SSE streaming support)
8
9pub mod detector;
10pub mod replacer;
11pub mod vault;
12pub mod rehydrator;
13pub mod config;
14
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17
18/// A detected sensitive entity within text.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct DetectedEntity {
21    /// The raw sensitive text that was detected.
22    pub original: String,
23    /// Byte offset start in the source text.
24    pub start: usize,
25    /// Byte offset end in the source text.
26    pub end: usize,
27    /// The category of sensitive data (e.g., PERSON, ORG, AMOUNT, SECRET).
28    pub category: EntityCategory,
29    /// Detection confidence (0.0–1.0). 1.0 for pattern-based, variable for NER.
30    pub confidence: f64,
31    /// Which detection layer found this entity.
32    pub source: DetectionSource,
33}
34
35/// Categories of sensitive entities.
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
37pub enum EntityCategory {
38    Person,
39    Organization,
40    Location,
41    Amount,
42    Percentage,
43    Date,
44    Email,
45    PhoneNumber,
46    IpAddress,
47    Secret,      // API keys, tokens, passwords
48    Url,         // Internal URLs
49    Project,     // Custom: project codenames
50    Business,    // Custom: client tiers, deal terms
51    Infra,       // Custom: internal infrastructure
52    Custom(String),
53}
54
55/// Which detection layer identified the entity.
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub enum DetectionSource {
58    Pattern,     // Regex-based
59    Financial,   // Currency/percentage parser
60    Ner,         // ONNX NER model
61    Custom,      // User-defined TOML rules
62}
63
64/// A pseudonymized replacement token.
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
66pub struct PseudoToken {
67    /// The replacement string (e.g., "ORG_7", "AMOUNT_12").
68    pub token: String,
69    /// The category prefix used.
70    pub category: EntityCategory,
71    /// The sequential ID within this category.
72    pub id: u32,
73}
74
75/// Result of pseudonymizing a text.
76#[derive(Debug, Clone)]
77pub struct PseudonymizedText {
78    /// The text with all sensitive entities replaced by pseudo-tokens.
79    pub text: String,
80    /// Map from pseudo-tokens back to original values (for rehydration).
81    pub mappings: HashMap<String, String>,
82    /// List of all entities that were detected and replaced.
83    pub entities: Vec<DetectedEntity>,
84}
85
86/// Result of rehydrating a response.
87#[derive(Debug, Clone)]
88pub struct RehydratedText {
89    /// The text with pseudo-tokens replaced back with original values.
90    pub text: String,
91    /// Number of tokens that were successfully rehydrated.
92    pub rehydrated_count: usize,
93}