Skip to main content

cloakpipe_core/
lib.rs

1//! CloakPipe Core — Detection, pseudonymization, and rehydration engine.
2//!
3//! This crate provides the foundational privacy primitives:
4//! - Multi-layer entity detection (regex, financial, NER, custom rules)
5//! - Consistent pseudonymization with stable entity→token mappings
6//! - Encrypted mapping vault (AES-256-GCM + zeroize)
7//! - Response rehydration (including SSE streaming support)
8
9pub mod detector;
10pub mod replacer;
11pub mod resolver;
12pub mod session;
13pub mod vault;
14pub mod vault_sqlite;
15pub mod rehydrator;
16pub mod config;
17pub mod profiles;
18
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21
22/// A detected sensitive entity within text.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DetectedEntity {
25    /// The raw sensitive text that was detected.
26    pub original: String,
27    /// Byte offset start in the source text.
28    pub start: usize,
29    /// Byte offset end in the source text.
30    pub end: usize,
31    /// The category of sensitive data (e.g., PERSON, ORG, AMOUNT, SECRET).
32    pub category: EntityCategory,
33    /// Detection confidence (0.0–1.0). 1.0 for pattern-based, variable for NER.
34    pub confidence: f64,
35    /// Which detection layer found this entity.
36    pub source: DetectionSource,
37}
38
39/// Categories of sensitive entities.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
41pub enum EntityCategory {
42    Person,
43    Organization,
44    Location,
45    Amount,
46    Percentage,
47    Date,
48    Email,
49    PhoneNumber,
50    IpAddress,
51    Secret,      // API keys, tokens, passwords
52    Url,         // Internal URLs
53    Project,     // Custom: project codenames
54    Business,    // Custom: client tiers, deal terms
55    Infra,       // Custom: internal infrastructure
56    Custom(String),
57}
58
59/// Which detection layer identified the entity.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub enum DetectionSource {
62    Pattern,     // Regex-based
63    Financial,   // Currency/percentage parser
64    Ner,         // ONNX NER model
65    Custom,      // User-defined TOML rules
66}
67
68/// A pseudonymized replacement token.
69#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
70pub struct PseudoToken {
71    /// The replacement string (e.g., "ORG_7", "AMOUNT_12").
72    pub token: String,
73    /// The category prefix used.
74    pub category: EntityCategory,
75    /// The sequential ID within this category.
76    pub id: u32,
77}
78
79/// Result of pseudonymizing a text.
80#[derive(Debug, Clone)]
81pub struct PseudonymizedText {
82    /// The text with all sensitive entities replaced by pseudo-tokens.
83    pub text: String,
84    /// Map from pseudo-tokens back to original values (for rehydration).
85    pub mappings: HashMap<String, String>,
86    /// List of all entities that were detected and replaced.
87    pub entities: Vec<DetectedEntity>,
88}
89
90/// Result of rehydrating a response.
91#[derive(Debug, Clone)]
92pub struct RehydratedText {
93    /// The text with pseudo-tokens replaced back with original values.
94    pub text: String,
95    /// Number of tokens that were successfully rehydrated.
96    pub rehydrated_count: usize,
97}