Skip to main content

cloakpipe_core/
config.rs

1//! Configuration types parsed from cloakpipe.toml.
2
3use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Deserialize, Serialize)]
6pub struct CloakPipeConfig {
7    pub proxy: ProxyConfig,
8    pub vault: VaultConfig,
9    pub detection: DetectionConfig,
10    #[serde(default)]
11    pub tree: TreeConfig,
12    #[serde(default)]
13    pub vectors: VectorConfig,
14    #[serde(default)]
15    pub local: LocalConfig,
16    #[serde(default)]
17    pub audit: AuditConfig,
18}
19
20#[derive(Debug, Clone, Deserialize, Serialize)]
21pub struct ProxyConfig {
22    pub listen: String,
23    pub upstream: String,
24    #[serde(default = "default_api_key_env")]
25    pub api_key_env: String,
26    #[serde(default = "default_timeout")]
27    pub timeout_seconds: u64,
28    #[serde(default = "default_max_concurrent")]
29    pub max_concurrent: usize,
30    #[serde(default = "default_mode")]
31    pub mode: String,
32}
33
34#[derive(Debug, Clone, Deserialize, Serialize)]
35pub struct VaultConfig {
36    pub path: String,
37    #[serde(default = "default_encryption")]
38    pub encryption: String,
39    pub key_env: Option<String>,
40    #[serde(default)]
41    pub key_keyring: bool,
42}
43
44#[derive(Debug, Clone, Deserialize, Serialize)]
45pub struct DetectionConfig {
46    #[serde(default = "default_true")]
47    pub secrets: bool,
48    #[serde(default = "default_true")]
49    pub financial: bool,
50    #[serde(default = "default_true")]
51    pub dates: bool,
52    #[serde(default = "default_true")]
53    pub emails: bool,
54    #[serde(default)]
55    pub phone_numbers: bool,
56    #[serde(default)]
57    pub ip_addresses: bool,
58    #[serde(default)]
59    pub urls_internal: bool,
60    #[serde(default)]
61    pub ner: NerConfig,
62    #[serde(default)]
63    pub custom: CustomConfig,
64    #[serde(default)]
65    pub overrides: OverrideConfig,
66}
67
68#[derive(Debug, Clone, Default, Deserialize, Serialize)]
69pub struct NerConfig {
70    #[serde(default)]
71    pub enabled: bool,
72    pub model: Option<String>,
73    #[serde(default = "default_confidence")]
74    pub confidence_threshold: f64,
75    #[serde(default)]
76    pub entity_types: Vec<String>,
77}
78
79#[derive(Debug, Clone, Default, Deserialize, Serialize)]
80pub struct CustomConfig {
81    #[serde(default)]
82    pub patterns: Vec<CustomPattern>,
83}
84
85#[derive(Debug, Clone, Deserialize, Serialize)]
86pub struct CustomPattern {
87    pub name: String,
88    pub regex: String,
89    pub category: String,
90}
91
92#[derive(Debug, Clone, Default, Deserialize, Serialize)]
93pub struct OverrideConfig {
94    #[serde(default)]
95    pub preserve: Vec<String>,
96    #[serde(default)]
97    pub force: Vec<String>,
98}
99
100#[derive(Debug, Clone, Deserialize, Serialize)]
101pub struct TreeConfig {
102    #[serde(default = "default_true")]
103    pub enabled: bool,
104    #[serde(default = "default_tree_path")]
105    pub storage_path: String,
106    #[serde(default = "default_tree_model")]
107    pub index_model: String,
108    #[serde(default = "default_tree_model")]
109    pub search_model: String,
110    #[serde(default = "default_max_pages")]
111    pub max_pages_per_node: usize,
112    #[serde(default = "default_max_tokens")]
113    pub max_tokens_per_node: usize,
114    #[serde(default = "default_true")]
115    pub add_node_summaries: bool,
116    #[serde(default = "default_true")]
117    pub pseudonymize_summaries: bool,
118}
119
120#[derive(Debug, Clone, Default, Deserialize, Serialize)]
121pub struct VectorConfig {
122    #[serde(default)]
123    pub encrypt: bool,
124    #[serde(default = "default_adcpe")]
125    pub algorithm: String,
126    pub key_env: Option<String>,
127}
128
129#[derive(Debug, Clone, Default, Deserialize, Serialize)]
130pub struct LocalConfig {
131    pub embeddings_model: Option<String>,
132    #[serde(default = "default_vector_db")]
133    pub vector_db: String,
134    pub vector_db_path: Option<String>,
135    pub llm_model: Option<String>,
136    pub llm_backend: Option<String>,
137}
138
139#[derive(Debug, Clone, Deserialize, Serialize)]
140pub struct AuditConfig {
141    #[serde(default = "default_true")]
142    pub enabled: bool,
143    #[serde(default = "default_audit_path")]
144    pub log_path: String,
145    #[serde(default = "default_jsonl")]
146    pub format: String,
147    #[serde(default = "default_retention")]
148    pub retention_days: u32,
149    #[serde(default = "default_true")]
150    pub log_entities: bool,
151    #[serde(default)]
152    pub log_mappings: bool,
153}
154
155// Default value functions
156fn default_true() -> bool { true }
157fn default_api_key_env() -> String { "OPENAI_API_KEY".into() }
158fn default_timeout() -> u64 { 120 }
159fn default_max_concurrent() -> usize { 256 }
160fn default_mode() -> String { "cloaktree".into() }
161fn default_encryption() -> String { "aes-256-gcm".into() }
162fn default_confidence() -> f64 { 0.85 }
163fn default_tree_path() -> String { "./trees/".into() }
164fn default_tree_model() -> String { "gpt-4o".into() }
165fn default_max_pages() -> usize { 10 }
166fn default_max_tokens() -> usize { 20000 }
167fn default_adcpe() -> String { "adcpe".into() }
168fn default_vector_db() -> String { "lancedb".into() }
169fn default_audit_path() -> String { "./audit/".into() }
170fn default_jsonl() -> String { "jsonl".into() }
171fn default_retention() -> u32 { 90 }
172
173impl Default for TreeConfig {
174    fn default() -> Self {
175        Self {
176            enabled: true,
177            storage_path: default_tree_path(),
178            index_model: default_tree_model(),
179            search_model: default_tree_model(),
180            max_pages_per_node: default_max_pages(),
181            max_tokens_per_node: default_max_tokens(),
182            add_node_summaries: true,
183            pseudonymize_summaries: true,
184        }
185    }
186}
187
188impl Default for AuditConfig {
189    fn default() -> Self {
190        Self {
191            enabled: true,
192            log_path: default_audit_path(),
193            format: default_jsonl(),
194            retention_days: default_retention(),
195            log_entities: true,
196            log_mappings: false,
197        }
198    }
199}