1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Deserialize, Serialize)]
6pub struct CloakPipeConfig {
7 pub proxy: ProxyConfig,
8 pub vault: VaultConfig,
9 pub detection: DetectionConfig,
10 #[serde(default)]
11 pub tree: TreeConfig,
12 #[serde(default)]
13 pub vectors: VectorConfig,
14 #[serde(default)]
15 pub local: LocalConfig,
16 #[serde(default)]
17 pub audit: AuditConfig,
18}
19
20#[derive(Debug, Clone, Deserialize, Serialize)]
21pub struct ProxyConfig {
22 pub listen: String,
23 pub upstream: String,
24 #[serde(default = "default_api_key_env")]
25 pub api_key_env: String,
26 #[serde(default = "default_timeout")]
27 pub timeout_seconds: u64,
28 #[serde(default = "default_max_concurrent")]
29 pub max_concurrent: usize,
30 #[serde(default = "default_mode")]
31 pub mode: String,
32}
33
34#[derive(Debug, Clone, Deserialize, Serialize)]
35pub struct VaultConfig {
36 pub path: String,
37 #[serde(default = "default_encryption")]
38 pub encryption: String,
39 pub key_env: Option<String>,
40 #[serde(default)]
41 pub key_keyring: bool,
42}
43
44#[derive(Debug, Clone, Deserialize, Serialize)]
45pub struct DetectionConfig {
46 #[serde(default = "default_true")]
47 pub secrets: bool,
48 #[serde(default = "default_true")]
49 pub financial: bool,
50 #[serde(default = "default_true")]
51 pub dates: bool,
52 #[serde(default = "default_true")]
53 pub emails: bool,
54 #[serde(default)]
55 pub phone_numbers: bool,
56 #[serde(default)]
57 pub ip_addresses: bool,
58 #[serde(default)]
59 pub urls_internal: bool,
60 #[serde(default)]
61 pub ner: NerConfig,
62 #[serde(default)]
63 pub custom: CustomConfig,
64 #[serde(default)]
65 pub overrides: OverrideConfig,
66}
67
68#[derive(Debug, Clone, Default, Deserialize, Serialize)]
69pub struct NerConfig {
70 #[serde(default)]
71 pub enabled: bool,
72 pub model: Option<String>,
73 #[serde(default = "default_confidence")]
74 pub confidence_threshold: f64,
75 #[serde(default)]
76 pub entity_types: Vec<String>,
77}
78
79#[derive(Debug, Clone, Default, Deserialize, Serialize)]
80pub struct CustomConfig {
81 #[serde(default)]
82 pub patterns: Vec<CustomPattern>,
83}
84
85#[derive(Debug, Clone, Deserialize, Serialize)]
86pub struct CustomPattern {
87 pub name: String,
88 pub regex: String,
89 pub category: String,
90}
91
92#[derive(Debug, Clone, Default, Deserialize, Serialize)]
93pub struct OverrideConfig {
94 #[serde(default)]
95 pub preserve: Vec<String>,
96 #[serde(default)]
97 pub force: Vec<String>,
98}
99
100#[derive(Debug, Clone, Deserialize, Serialize)]
101pub struct TreeConfig {
102 #[serde(default = "default_true")]
103 pub enabled: bool,
104 #[serde(default = "default_tree_path")]
105 pub storage_path: String,
106 #[serde(default = "default_tree_model")]
107 pub index_model: String,
108 #[serde(default = "default_tree_model")]
109 pub search_model: String,
110 #[serde(default = "default_max_pages")]
111 pub max_pages_per_node: usize,
112 #[serde(default = "default_max_tokens")]
113 pub max_tokens_per_node: usize,
114 #[serde(default = "default_true")]
115 pub add_node_summaries: bool,
116 #[serde(default = "default_true")]
117 pub pseudonymize_summaries: bool,
118}
119
120#[derive(Debug, Clone, Default, Deserialize, Serialize)]
121pub struct VectorConfig {
122 #[serde(default)]
123 pub encrypt: bool,
124 #[serde(default = "default_adcpe")]
125 pub algorithm: String,
126 pub key_env: Option<String>,
127}
128
129#[derive(Debug, Clone, Default, Deserialize, Serialize)]
130pub struct LocalConfig {
131 pub embeddings_model: Option<String>,
132 #[serde(default = "default_vector_db")]
133 pub vector_db: String,
134 pub vector_db_path: Option<String>,
135 pub llm_model: Option<String>,
136 pub llm_backend: Option<String>,
137}
138
139#[derive(Debug, Clone, Deserialize, Serialize)]
140pub struct AuditConfig {
141 #[serde(default = "default_true")]
142 pub enabled: bool,
143 #[serde(default = "default_audit_path")]
144 pub log_path: String,
145 #[serde(default = "default_jsonl")]
146 pub format: String,
147 #[serde(default = "default_retention")]
148 pub retention_days: u32,
149 #[serde(default = "default_true")]
150 pub log_entities: bool,
151 #[serde(default)]
152 pub log_mappings: bool,
153}
154
155fn default_true() -> bool { true }
157fn default_api_key_env() -> String { "OPENAI_API_KEY".into() }
158fn default_timeout() -> u64 { 120 }
159fn default_max_concurrent() -> usize { 256 }
160fn default_mode() -> String { "cloaktree".into() }
161fn default_encryption() -> String { "aes-256-gcm".into() }
162fn default_confidence() -> f64 { 0.85 }
163fn default_tree_path() -> String { "./trees/".into() }
164fn default_tree_model() -> String { "gpt-4o".into() }
165fn default_max_pages() -> usize { 10 }
166fn default_max_tokens() -> usize { 20000 }
167fn default_adcpe() -> String { "adcpe".into() }
168fn default_vector_db() -> String { "lancedb".into() }
169fn default_audit_path() -> String { "./audit/".into() }
170fn default_jsonl() -> String { "jsonl".into() }
171fn default_retention() -> u32 { 90 }
172
173impl Default for TreeConfig {
174 fn default() -> Self {
175 Self {
176 enabled: true,
177 storage_path: default_tree_path(),
178 index_model: default_tree_model(),
179 search_model: default_tree_model(),
180 max_pages_per_node: default_max_pages(),
181 max_tokens_per_node: default_max_tokens(),
182 add_node_summaries: true,
183 pseudonymize_summaries: true,
184 }
185 }
186}
187
188impl Default for AuditConfig {
189 fn default() -> Self {
190 Self {
191 enabled: true,
192 log_path: default_audit_path(),
193 format: default_jsonl(),
194 retention_days: default_retention(),
195 log_entities: true,
196 log_mappings: false,
197 }
198 }
199}