cloakpipe_core/lib.rs
1//! CloakPipe Core — Detection, pseudonymization, and rehydration engine.
2//!
3//! This crate provides the foundational privacy primitives:
4//! - Multi-layer entity detection (regex, financial, NER, custom rules)
5//! - Consistent pseudonymization with stable entity→token mappings
6//! - Encrypted mapping vault (AES-256-GCM + zeroize)
7//! - Response rehydration (including SSE streaming support)
8
9pub mod detector;
10pub mod replacer;
11pub mod resolver;
12pub mod session;
13pub mod vault;
14pub mod vault_sqlite;
15pub mod rehydrator;
16pub mod config;
17pub mod profiles;
18
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21
22/// A detected sensitive entity within text.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DetectedEntity {
25 /// The raw sensitive text that was detected.
26 pub original: String,
27 /// Byte offset start in the source text.
28 pub start: usize,
29 /// Byte offset end in the source text.
30 pub end: usize,
31 /// The category of sensitive data (e.g., PERSON, ORG, AMOUNT, SECRET).
32 pub category: EntityCategory,
33 /// Detection confidence (0.0–1.0). 1.0 for pattern-based, variable for NER.
34 pub confidence: f64,
35 /// Which detection layer found this entity.
36 pub source: DetectionSource,
37}
38
39/// Categories of sensitive entities.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
41pub enum EntityCategory {
42 Person,
43 Organization,
44 Location,
45 Amount,
46 Percentage,
47 Date,
48 Email,
49 PhoneNumber,
50 IpAddress,
51 Secret, // API keys, tokens, passwords
52 Url, // Internal URLs
53 Project, // Custom: project codenames
54 Business, // Custom: client tiers, deal terms
55 Infra, // Custom: internal infrastructure
56 Custom(String),
57}
58
59/// Which detection layer identified the entity.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub enum DetectionSource {
62 Pattern, // Regex-based
63 Financial, // Currency/percentage parser
64 Ner, // ONNX NER model
65 Custom, // User-defined TOML rules
66}
67
68/// A pseudonymized replacement token.
69#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
70pub struct PseudoToken {
71 /// The replacement string (e.g., "ORG_7", "AMOUNT_12").
72 pub token: String,
73 /// The category prefix used.
74 pub category: EntityCategory,
75 /// The sequential ID within this category.
76 pub id: u32,
77}
78
79/// Result of pseudonymizing a text.
80#[derive(Debug, Clone)]
81pub struct PseudonymizedText {
82 /// The text with all sensitive entities replaced by pseudo-tokens.
83 pub text: String,
84 /// Map from pseudo-tokens back to original values (for rehydration).
85 pub mappings: HashMap<String, String>,
86 /// List of all entities that were detected and replaced.
87 pub entities: Vec<DetectedEntity>,
88}
89
90/// Result of rehydrating a response.
91#[derive(Debug, Clone)]
92pub struct RehydratedText {
93 /// The text with pseudo-tokens replaced back with original values.
94 pub text: String,
95 /// Number of tokens that were successfully rehydrated.
96 pub rehydrated_count: usize,
97}