Skip to main content

redact_api/
models.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6use redact_core::AnonymizationStrategy;
7use serde::{Deserialize, Serialize};
8
9/// Request to analyze text for PII entities
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AnalyzeRequest {
12    /// Text to analyze
13    pub text: String,
14
15    /// Language code (e.g., "en", "es")
16    #[serde(default = "default_language")]
17    pub language: String,
18
19    /// Specific entity types to detect (optional)
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub entities: Option<Vec<String>>,
22
23    /// Minimum confidence threshold
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub min_score: Option<f32>,
26}
27
28fn default_language() -> String {
29    "en".to_string()
30}
31
32/// Response from analyze endpoint
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AnalyzeResponse {
35    /// Original text (if requested)
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub original_text: Option<String>,
38
39    /// Detected entities
40    pub results: Vec<EntityResult>,
41
42    /// Metadata about the analysis
43    pub metadata: AnalysisMetadata,
44}
45
46/// A detected entity in the text
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct EntityResult {
49    /// Type of entity
50    pub entity_type: String,
51
52    /// Start position
53    pub start: usize,
54
55    /// End position
56    pub end: usize,
57
58    /// Confidence score (0.0 to 1.0)
59    pub score: f32,
60
61    /// The detected text
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub text: Option<String>,
64
65    /// Recognizer that detected this entity
66    pub recognizer_name: String,
67}
68
69/// Metadata about the analysis
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct AnalysisMetadata {
72    /// Number of recognizers used
73    pub recognizers_used: usize,
74
75    /// Processing time in milliseconds
76    pub processing_time_ms: u64,
77
78    /// Language analyzed
79    pub language: String,
80
81    /// Model version (if NER was used)
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub model_version: Option<String>,
84}
85
86/// Request to anonymize text
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct AnonymizeRequest {
89    /// Text to anonymize
90    pub text: String,
91
92    /// Language code
93    #[serde(default = "default_language")]
94    pub language: String,
95
96    /// Anonymization configuration
97    #[serde(default)]
98    pub config: AnonymizationConfig,
99
100    /// Specific entity types to anonymize (optional)
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub entities: Option<Vec<String>>,
103}
104
105/// Anonymization configuration
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct AnonymizationConfig {
108    /// Strategy to use
109    #[serde(default)]
110    pub strategy: AnonymizationStrategy,
111
112    /// Masking character (for mask strategy)
113    #[serde(default = "default_mask_char")]
114    pub mask_char: String,
115
116    /// Characters to show at start (for mask strategy)
117    #[serde(default)]
118    pub mask_start_chars: usize,
119
120    /// Characters to show at end (for mask strategy)
121    #[serde(default)]
122    pub mask_end_chars: usize,
123
124    /// Preserve format (for mask strategy)
125    #[serde(default)]
126    pub preserve_format: bool,
127
128    /// Encryption key (for encrypt strategy)
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub encryption_key: Option<String>,
131
132    /// Hash salt (for hash strategy)
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub hash_salt: Option<String>,
135}
136
137impl Default for AnonymizationConfig {
138    fn default() -> Self {
139        Self {
140            strategy: AnonymizationStrategy::Replace,
141            mask_char: default_mask_char(),
142            mask_start_chars: 0,
143            mask_end_chars: 0,
144            preserve_format: false,
145            encryption_key: None,
146            hash_salt: None,
147        }
148    }
149}
150
151fn default_mask_char() -> String {
152    "*".to_string()
153}
154
155/// Response from anonymize endpoint
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct AnonymizeResponse {
158    /// Anonymized text
159    pub text: String,
160
161    /// Entities that were anonymized
162    pub results: Vec<EntityResult>,
163
164    /// Tokens for reversible anonymization
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub tokens: Option<Vec<TokenInfo>>,
167
168    /// Metadata
169    pub metadata: AnalysisMetadata,
170}
171
172/// Token information for reversible anonymization
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct TokenInfo {
175    /// Token identifier
176    pub token_id: String,
177
178    /// Entity type
179    pub entity_type: String,
180
181    /// Start position in anonymized text
182    pub start: usize,
183
184    /// End position in anonymized text
185    pub end: usize,
186
187    /// Expiration timestamp (if applicable)
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub expires_at: Option<String>,
190}
191
192/// Health check response
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct HealthResponse {
195    pub status: String,
196    pub version: String,
197    pub recognizers: usize,
198}
199
200/// Error response
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct ErrorResponse {
203    pub error: String,
204    pub message: String,
205}
206
207impl ErrorResponse {
208    pub fn new(error: impl Into<String>, message: impl Into<String>) -> Self {
209        Self {
210            error: error.into(),
211            message: message.into(),
212        }
213    }
214}
215
216// Conversion helpers
217impl From<redact_core::RecognizerResult> for EntityResult {
218    fn from(result: redact_core::RecognizerResult) -> Self {
219        Self {
220            entity_type: result.entity_type.as_str().to_string(),
221            start: result.start,
222            end: result.end,
223            score: result.score,
224            text: result.text,
225            recognizer_name: result.recognizer_name,
226        }
227    }
228}
229
230impl From<redact_core::AnalysisMetadata> for AnalysisMetadata {
231    fn from(metadata: redact_core::AnalysisMetadata) -> Self {
232        Self {
233            recognizers_used: metadata.recognizers_used,
234            processing_time_ms: metadata.processing_time_ms,
235            language: metadata.language,
236            model_version: metadata.model_version,
237        }
238    }
239}
240
241impl From<redact_core::Token> for TokenInfo {
242    fn from(token: redact_core::Token) -> Self {
243        Self {
244            token_id: token.token_id,
245            entity_type: token.entity_type.as_str().to_string(),
246            start: token.start,
247            end: token.end,
248            expires_at: token.expires_at.map(|dt| dt.to_rfc3339()),
249        }
250    }
251}