Skip to main content

entrenar/research/
anonymization.rs

1//! Anonymization Config for double-blind (ENT-023)
2//!
3//! Provides anonymization capabilities for research artifacts
4//! to support double-blind peer review.
5
6use crate::research::artifact::{Author, ResearchArtifact};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9
10/// Configuration for anonymization
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub struct AnonymizationConfig {
13    /// Salt for deterministic anonymous ID generation
14    pub salt: String,
15    /// Replacement text for author names
16    pub author_replacement: String,
17    /// Replacement text for affiliations
18    pub affiliation_replacement: String,
19    /// Whether to strip ORCID identifiers
20    pub strip_orcid: bool,
21    /// Whether to strip ROR identifiers
22    pub strip_ror: bool,
23    /// Whether to strip DOI
24    pub strip_doi: bool,
25    /// Custom patterns to redact (regex strings)
26    pub redact_patterns: Vec<String>,
27}
28
29impl Default for AnonymizationConfig {
30    fn default() -> Self {
31        Self {
32            salt: String::new(),
33            author_replacement: "Anonymous Author".to_string(),
34            affiliation_replacement: "Anonymous Institution".to_string(),
35            strip_orcid: true,
36            strip_ror: true,
37            strip_doi: false,
38            redact_patterns: Vec::new(),
39        }
40    }
41}
42
43impl AnonymizationConfig {
44    /// Create a new anonymization config with a salt
45    pub fn new(salt: impl Into<String>) -> Self {
46        Self { salt: salt.into(), ..Default::default() }
47    }
48
49    /// Set the author replacement text
50    pub fn with_author_replacement(mut self, replacement: impl Into<String>) -> Self {
51        self.author_replacement = replacement.into();
52        self
53    }
54
55    /// Set the affiliation replacement text
56    pub fn with_affiliation_replacement(mut self, replacement: impl Into<String>) -> Self {
57        self.affiliation_replacement = replacement.into();
58        self
59    }
60
61    /// Set whether to strip DOI
62    pub fn with_strip_doi(mut self, strip: bool) -> Self {
63        self.strip_doi = strip;
64        self
65    }
66
67    /// Add patterns to redact
68    pub fn with_redact_patterns(
69        mut self,
70        patterns: impl IntoIterator<Item = impl Into<String>>,
71    ) -> Self {
72        self.redact_patterns.extend(patterns.into_iter().map(Into::into));
73        self
74    }
75
76    /// Generate a deterministic anonymous ID from an original ID
77    pub fn generate_anonymous_id(&self, original_id: &str) -> String {
78        let mut hasher = Sha256::new();
79        hasher.update(self.salt.as_bytes());
80        hasher.update(original_id.as_bytes());
81        let hash = hasher.finalize();
82        format!("anon-{}", hex::encode(hash.get(..8).unwrap_or(&hash))) // Use first 8 bytes for brevity
83    }
84
85    /// Anonymize a research artifact
86    pub fn anonymize(&self, artifact: &ResearchArtifact) -> AnonymizedArtifact {
87        let anonymous_id = self.generate_anonymous_id(&artifact.id);
88
89        // Create anonymous authors
90        let anonymous_authors: Vec<AnonymousAuthor> = artifact
91            .authors
92            .iter()
93            .enumerate()
94            .map(|(i, _)| AnonymousAuthor {
95                placeholder: format!("{} {}", self.author_replacement, i + 1),
96                affiliation_placeholder: self.affiliation_replacement.clone(),
97            })
98            .collect();
99
100        // Strip DOI if configured
101        let doi = if self.strip_doi { None } else { artifact.doi.clone() };
102
103        // Anonymize description if present
104        let description = artifact
105            .description
106            .as_ref()
107            .map(|desc| anonymize_text_internal(desc, &artifact.authors, self));
108
109        AnonymizedArtifact {
110            anonymous_id,
111            original_id_hash: self.hash_original_id(&artifact.id),
112            title: artifact.title.clone(),
113            authors: anonymous_authors,
114            artifact_type: artifact.artifact_type,
115            license: artifact.license.clone(),
116            doi,
117            version: artifact.version.clone(),
118            description,
119            keywords: artifact.keywords.clone(),
120        }
121    }
122
123    /// Hash the original ID for later verification
124    fn hash_original_id(&self, original_id: &str) -> String {
125        let mut hasher = Sha256::new();
126        hasher.update(self.salt.as_bytes());
127        hasher.update(b"original:");
128        hasher.update(original_id.as_bytes());
129        hex::encode(hasher.finalize())
130    }
131
132    /// Verify that an anonymous artifact came from a specific original ID
133    pub fn verify_original_id(&self, artifact: &AnonymizedArtifact, original_id: &str) -> bool {
134        let expected_hash = self.hash_original_id(original_id);
135        artifact.original_id_hash == expected_hash
136    }
137}
138
139/// Anonymous author placeholder
140#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
141pub struct AnonymousAuthor {
142    /// Placeholder name (e.g., "Anonymous Author 1")
143    pub placeholder: String,
144    /// Placeholder affiliation
145    pub affiliation_placeholder: String,
146}
147
148/// Anonymized research artifact
149#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
150pub struct AnonymizedArtifact {
151    /// Anonymous ID (derived from original ID + salt)
152    pub anonymous_id: String,
153    /// Hash of original ID for later verification
154    pub original_id_hash: String,
155    /// Title (preserved)
156    pub title: String,
157    /// Anonymous author placeholders
158    pub authors: Vec<AnonymousAuthor>,
159    /// Artifact type (preserved)
160    pub artifact_type: crate::research::artifact::ArtifactType,
161    /// License (preserved)
162    pub license: crate::research::artifact::License,
163    /// DOI (may be stripped based on config)
164    pub doi: Option<String>,
165    /// Version (preserved)
166    pub version: String,
167    /// Description (preserved)
168    pub description: Option<String>,
169    /// Keywords (preserved)
170    pub keywords: Vec<String>,
171}
172
173impl AnonymizedArtifact {
174    /// Convert to a format suitable for double-blind export (JSON)
175    pub fn to_double_blind_json(&self) -> String {
176        serde_json::to_string_pretty(self).unwrap_or_else(|_err| "{}".to_string())
177    }
178
179    /// Get author count
180    pub fn author_count(&self) -> usize {
181        self.authors.len()
182    }
183}
184
185/// Anonymize a string by replacing author names
186pub fn anonymize_text(text: &str, authors: &[Author], config: &AnonymizationConfig) -> String {
187    anonymize_text_internal(text, authors, config)
188}
189
190/// Internal function for text anonymization
191fn anonymize_text_internal(text: &str, authors: &[Author], config: &AnonymizationConfig) -> String {
192    let mut result = text.to_string();
193
194    for (i, author) in authors.iter().enumerate() {
195        // Replace full name
196        result = result.replace(&author.name, &format!("{} {}", config.author_replacement, i + 1));
197
198        // Replace last name only
199        let last_name = author.last_name();
200        if last_name != author.name {
201            result = result.replace(last_name, &format!("{} {}", config.author_replacement, i + 1));
202        }
203
204        // Replace affiliations
205        for affiliation in &author.affiliations {
206            result = result.replace(&affiliation.name, &config.affiliation_replacement);
207        }
208    }
209
210    result
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use crate::research::artifact::{Affiliation, ArtifactType, ContributorRole, License};
217
218    fn create_test_artifact() -> ResearchArtifact {
219        let author1 = Author::new("Alice Smith")
220            .with_orcid("0000-0002-1825-0097")
221            .expect("operation should succeed")
222            .with_role(ContributorRole::Conceptualization)
223            .with_affiliation(
224                Affiliation::new("MIT")
225                    .with_ror_id("https://ror.org/03yrm5c26")
226                    .expect("operation should succeed"),
227            );
228
229        let author2 =
230            Author::new("Bob Jones").with_affiliation(Affiliation::new("Stanford University"));
231
232        ResearchArtifact::new(
233            "paper-2024-001",
234            "Novel Deep Learning Architecture",
235            ArtifactType::Paper,
236            License::CcBy4,
237        )
238        .with_authors([author1, author2])
239        .with_doi("10.1234/example.2024")
240        .with_description("A groundbreaking paper by Alice Smith from MIT")
241    }
242
243    #[test]
244    fn test_anonymize_removes_authors() {
245        let artifact = create_test_artifact();
246        let config = AnonymizationConfig::new("secret-salt");
247
248        let anon = config.anonymize(&artifact);
249
250        assert_eq!(anon.authors.len(), 2);
251        assert_eq!(anon.authors[0].placeholder, "Anonymous Author 1");
252        assert_eq!(anon.authors[1].placeholder, "Anonymous Author 2");
253    }
254
255    #[test]
256    fn test_anonymize_removes_affiliations() {
257        let artifact = create_test_artifact();
258        let config = AnonymizationConfig::new("secret-salt");
259
260        let anon = config.anonymize(&artifact);
261
262        assert_eq!(anon.authors[0].affiliation_placeholder, "Anonymous Institution");
263        assert_eq!(anon.authors[1].affiliation_placeholder, "Anonymous Institution");
264    }
265
266    #[test]
267    fn test_anonymous_id_deterministic() {
268        let config = AnonymizationConfig::new("fixed-salt");
269
270        let id1 = config.generate_anonymous_id("paper-001");
271        let id2 = config.generate_anonymous_id("paper-001");
272
273        assert_eq!(id1, id2);
274        assert!(id1.starts_with("anon-"));
275    }
276
277    #[test]
278    fn test_salt_changes_ids() {
279        let config1 = AnonymizationConfig::new("salt-1");
280        let config2 = AnonymizationConfig::new("salt-2");
281
282        let id1 = config1.generate_anonymous_id("paper-001");
283        let id2 = config2.generate_anonymous_id("paper-001");
284
285        assert_ne!(id1, id2);
286    }
287
288    #[test]
289    fn test_double_blind_export() {
290        let artifact = create_test_artifact();
291        let config = AnonymizationConfig::new("review-salt");
292
293        let anon = config.anonymize(&artifact);
294        let json = anon.to_double_blind_json();
295
296        // Should not contain author names
297        assert!(!json.contains("Alice Smith"));
298        assert!(!json.contains("Bob Jones"));
299
300        // Should not contain affiliations
301        assert!(!json.contains("MIT"));
302        assert!(!json.contains("Stanford"));
303
304        // Should contain anonymous placeholders
305        assert!(json.contains("Anonymous Author"));
306        assert!(json.contains("Anonymous Institution"));
307
308        // Should preserve title
309        assert!(json.contains("Novel Deep Learning Architecture"));
310    }
311
312    #[test]
313    fn test_strip_doi_option() {
314        let artifact = create_test_artifact();
315
316        // Default: DOI preserved
317        let config1 = AnonymizationConfig::new("salt");
318        let anon1 = config1.anonymize(&artifact);
319        assert!(anon1.doi.is_some());
320
321        // With strip_doi: DOI removed
322        let config2 = AnonymizationConfig::new("salt").with_strip_doi(true);
323        let anon2 = config2.anonymize(&artifact);
324        assert!(anon2.doi.is_none());
325    }
326
327    #[test]
328    fn test_verify_original_id() {
329        let artifact = create_test_artifact();
330        let config = AnonymizationConfig::new("verification-salt");
331
332        let anon = config.anonymize(&artifact);
333
334        // Should verify with correct original ID
335        assert!(config.verify_original_id(&anon, "paper-2024-001"));
336
337        // Should fail with wrong original ID
338        assert!(!config.verify_original_id(&anon, "paper-2024-002"));
339    }
340
341    #[test]
342    fn test_custom_replacements() {
343        let artifact = create_test_artifact();
344        let config = AnonymizationConfig::new("salt")
345            .with_author_replacement("Reviewer")
346            .with_affiliation_replacement("Hidden University");
347
348        let anon = config.anonymize(&artifact);
349
350        assert_eq!(anon.authors[0].placeholder, "Reviewer 1");
351        assert_eq!(anon.authors[0].affiliation_placeholder, "Hidden University");
352    }
353
354    #[test]
355    fn test_anonymize_text() {
356        let author = Author::new("Alice Smith").with_affiliation(Affiliation::new("MIT"));
357
358        let text = "This paper by Alice Smith from MIT presents...";
359        let config = AnonymizationConfig::new("salt");
360
361        let anon_text = anonymize_text(text, &[author], &config);
362
363        assert!(!anon_text.contains("Alice Smith"));
364        assert!(!anon_text.contains("MIT"));
365        assert!(anon_text.contains("Anonymous Author 1"));
366        assert!(anon_text.contains("Anonymous Institution"));
367    }
368
369    #[test]
370    fn test_anonymize_text_last_name() {
371        let author = Author::new("Alice Marie Smith").with_affiliation(Affiliation::new("MIT"));
372
373        let text = "Smith et al. demonstrated...";
374        let config = AnonymizationConfig::new("salt");
375
376        let anon_text = anonymize_text(text, &[author], &config);
377
378        assert!(!anon_text.contains("Smith"));
379        assert!(anon_text.contains("Anonymous Author 1"));
380    }
381
382    #[test]
383    fn test_author_count() {
384        let artifact = create_test_artifact();
385        let config = AnonymizationConfig::new("salt");
386        let anon = config.anonymize(&artifact);
387
388        assert_eq!(anon.author_count(), 2);
389    }
390
391    #[test]
392    fn test_preserved_fields() {
393        let artifact = create_test_artifact();
394        let config = AnonymizationConfig::new("salt");
395        let anon = config.anonymize(&artifact);
396
397        // These should be preserved
398        assert_eq!(anon.title, artifact.title);
399        assert_eq!(anon.version, artifact.version);
400        assert_eq!(anon.artifact_type, artifact.artifact_type);
401        assert_eq!(anon.license, artifact.license);
402        assert_eq!(anon.keywords, artifact.keywords);
403    }
404}