Skip to main content

redact_api/
models.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6use redact_core::AnonymizationStrategy;
7use serde::{Deserialize, Serialize};
8
9/// Request to analyze text for PII entities
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AnalyzeRequest {
12    /// Text to analyze
13    pub text: String,
14
15    /// Language code (e.g., "en", "es")
16    #[serde(default = "default_language")]
17    pub language: String,
18
19    /// Specific entity types to detect (optional)
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub entities: Option<Vec<String>>,
22
23    /// Minimum confidence threshold
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub min_score: Option<f32>,
26}
27
28fn default_language() -> String {
29    "en".to_string()
30}
31
32/// Response from analyze endpoint
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AnalyzeResponse {
35    /// Original text (if requested)
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub original_text: Option<String>,
38
39    /// Detected entities
40    pub results: Vec<EntityResult>,
41
42    /// Metadata about the analysis
43    pub metadata: AnalysisMetadata,
44}
45
46/// A detected entity in the text
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct EntityResult {
49    /// Type of entity
50    pub entity_type: String,
51
52    /// Start position
53    pub start: usize,
54
55    /// End position
56    pub end: usize,
57
58    /// Confidence score (0.0 to 1.0)
59    pub score: f32,
60
61    /// The detected text
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub text: Option<String>,
64
65    /// Recognizer that detected this entity
66    pub recognizer_name: String,
67}
68
69/// Metadata about the analysis
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct AnalysisMetadata {
72    /// Number of recognizers used
73    pub recognizers_used: usize,
74
75    /// Processing time in milliseconds
76    pub processing_time_ms: u64,
77
78    /// Language analyzed
79    pub language: String,
80
81    /// Model version (if NER was used)
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub model_version: Option<String>,
84}
85
86/// Request to anonymize text
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct AnonymizeRequest {
89    /// Text to anonymize
90    pub text: String,
91
92    /// Language code
93    #[serde(default = "default_language")]
94    pub language: String,
95
96    /// Anonymization configuration
97    #[serde(default)]
98    pub config: AnonymizationConfig,
99
100    /// Specific entity types to anonymize (optional)
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub entities: Option<Vec<String>>,
103}
104
105/// Anonymization configuration
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct AnonymizationConfig {
108    /// Strategy to use
109    #[serde(default)]
110    pub strategy: AnonymizationStrategy,
111
112    /// Masking character (for mask strategy)
113    #[serde(default = "default_mask_char")]
114    pub mask_char: String,
115
116    /// Characters to show at start (for mask strategy)
117    #[serde(default)]
118    pub mask_start_chars: usize,
119
120    /// Characters to show at end (for mask strategy)
121    #[serde(default)]
122    pub mask_end_chars: usize,
123
124    /// Preserve format (for mask strategy)
125    #[serde(default)]
126    pub preserve_format: bool,
127
128    /// Encryption key (for encrypt strategy)
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub encryption_key: Option<String>,
131
132    /// Hash salt (for hash strategy)
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub hash_salt: Option<String>,
135}
136
137impl Default for AnonymizationConfig {
138    fn default() -> Self {
139        Self {
140            strategy: AnonymizationStrategy::Replace,
141            mask_char: default_mask_char(),
142            mask_start_chars: 0,
143            mask_end_chars: 0,
144            preserve_format: false,
145            encryption_key: None,
146            hash_salt: None,
147        }
148    }
149}
150
151fn default_mask_char() -> String {
152    "*".to_string()
153}
154
155/// Response from anonymize endpoint
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct AnonymizeResponse {
158    /// Anonymized text
159    pub text: String,
160
161    /// Entities that were anonymized
162    pub results: Vec<EntityResult>,
163
164    /// Tokens for reversible anonymization
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub tokens: Option<Vec<TokenInfo>>,
167
168    /// Metadata
169    pub metadata: AnalysisMetadata,
170}
171
172/// Token information for reversible anonymization
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct TokenInfo {
175    /// Token identifier
176    pub token_id: String,
177
178    /// Entity type
179    pub entity_type: String,
180
181    /// Start position in anonymized text
182    pub start: usize,
183
184    /// End position in anonymized text
185    pub end: usize,
186
187    /// Expiration timestamp (if applicable)
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub expires_at: Option<String>,
190}
191
192/// Health check response
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct HealthResponse {
195    pub status: String,
196    pub version: String,
197    /// Number of recognizer instances (e.g. pattern, NER)
198    pub recognizers: usize,
199    /// Number of entity types supported across all recognizers (e.g. 36+)
200    pub entity_types: usize,
201}
202
203/// Error response
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct ErrorResponse {
206    pub error: String,
207    pub message: String,
208}
209
210impl ErrorResponse {
211    pub fn new(error: impl Into<String>, message: impl Into<String>) -> Self {
212        Self {
213            error: error.into(),
214            message: message.into(),
215        }
216    }
217}
218
219// Conversion helpers
220impl From<redact_core::RecognizerResult> for EntityResult {
221    fn from(result: redact_core::RecognizerResult) -> Self {
222        Self {
223            entity_type: result.entity_type.as_str().to_string(),
224            start: result.start,
225            end: result.end,
226            score: result.score,
227            text: result.text,
228            recognizer_name: result.recognizer_name,
229        }
230    }
231}
232
233impl From<redact_core::AnalysisMetadata> for AnalysisMetadata {
234    fn from(metadata: redact_core::AnalysisMetadata) -> Self {
235        Self {
236            recognizers_used: metadata.recognizers_used,
237            processing_time_ms: metadata.processing_time_ms,
238            language: metadata.language,
239            model_version: metadata.model_version,
240        }
241    }
242}
243
244impl From<redact_core::Token> for TokenInfo {
245    fn from(token: redact_core::Token) -> Self {
246        Self {
247            token_id: token.token_id,
248            entity_type: token.entity_type.as_str().to_string(),
249            start: token.start,
250            end: token.end,
251            expires_at: token.expires_at.map(|dt| dt.to_rfc3339()),
252        }
253    }
254}