cloakpipe_core/lib.rs
1//! CloakPipe Core — Detection, pseudonymization, and rehydration engine.
2//!
3//! This crate provides the foundational privacy primitives:
4//! - Multi-layer entity detection (regex, financial, NER, custom rules)
5//! - Consistent pseudonymization with stable entity→token mappings
6//! - Encrypted mapping vault (AES-256-GCM + zeroize)
7//! - Response rehydration (including SSE streaming support)
8
9pub mod detector;
10pub mod replacer;
11pub mod vault;
12pub mod rehydrator;
13pub mod config;
14
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17
18/// A detected sensitive entity within text.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct DetectedEntity {
21 /// The raw sensitive text that was detected.
22 pub original: String,
23 /// Byte offset start in the source text.
24 pub start: usize,
25 /// Byte offset end in the source text.
26 pub end: usize,
27 /// The category of sensitive data (e.g., PERSON, ORG, AMOUNT, SECRET).
28 pub category: EntityCategory,
29 /// Detection confidence (0.0–1.0). 1.0 for pattern-based, variable for NER.
30 pub confidence: f64,
31 /// Which detection layer found this entity.
32 pub source: DetectionSource,
33}
34
35/// Categories of sensitive entities.
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
37pub enum EntityCategory {
38 Person,
39 Organization,
40 Location,
41 Amount,
42 Percentage,
43 Date,
44 Email,
45 PhoneNumber,
46 IpAddress,
47 Secret, // API keys, tokens, passwords
48 Url, // Internal URLs
49 Project, // Custom: project codenames
50 Business, // Custom: client tiers, deal terms
51 Infra, // Custom: internal infrastructure
52 Custom(String),
53}
54
55/// Which detection layer identified the entity.
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub enum DetectionSource {
58 Pattern, // Regex-based
59 Financial, // Currency/percentage parser
60 Ner, // ONNX NER model
61 Custom, // User-defined TOML rules
62}
63
64/// A pseudonymized replacement token.
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
66pub struct PseudoToken {
67 /// The replacement string (e.g., "ORG_7", "AMOUNT_12").
68 pub token: String,
69 /// The category prefix used.
70 pub category: EntityCategory,
71 /// The sequential ID within this category.
72 pub id: u32,
73}
74
75/// Result of pseudonymizing a text.
76#[derive(Debug, Clone)]
77pub struct PseudonymizedText {
78 /// The text with all sensitive entities replaced by pseudo-tokens.
79 pub text: String,
80 /// Map from pseudo-tokens back to original values (for rehydration).
81 pub mappings: HashMap<String, String>,
82 /// List of all entities that were detected and replaced.
83 pub entities: Vec<DetectedEntity>,
84}
85
86/// Result of rehydrating a response.
87#[derive(Debug, Clone)]
88pub struct RehydratedText {
89 /// The text with pseudo-tokens replaced back with original values.
90 pub text: String,
91 /// Number of tokens that were successfully rehydrated.
92 pub rehydrated_count: usize,
93}