1use crate::research::artifact::{Author, ResearchArtifact};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub struct AnonymizationConfig {
13 pub salt: String,
15 pub author_replacement: String,
17 pub affiliation_replacement: String,
19 pub strip_orcid: bool,
21 pub strip_ror: bool,
23 pub strip_doi: bool,
25 pub redact_patterns: Vec<String>,
27}
28
29impl Default for AnonymizationConfig {
30 fn default() -> Self {
31 Self {
32 salt: String::new(),
33 author_replacement: "Anonymous Author".to_string(),
34 affiliation_replacement: "Anonymous Institution".to_string(),
35 strip_orcid: true,
36 strip_ror: true,
37 strip_doi: false,
38 redact_patterns: Vec::new(),
39 }
40 }
41}
42
43impl AnonymizationConfig {
44 pub fn new(salt: impl Into<String>) -> Self {
46 Self { salt: salt.into(), ..Default::default() }
47 }
48
49 pub fn with_author_replacement(mut self, replacement: impl Into<String>) -> Self {
51 self.author_replacement = replacement.into();
52 self
53 }
54
55 pub fn with_affiliation_replacement(mut self, replacement: impl Into<String>) -> Self {
57 self.affiliation_replacement = replacement.into();
58 self
59 }
60
61 pub fn with_strip_doi(mut self, strip: bool) -> Self {
63 self.strip_doi = strip;
64 self
65 }
66
67 pub fn with_redact_patterns(
69 mut self,
70 patterns: impl IntoIterator<Item = impl Into<String>>,
71 ) -> Self {
72 self.redact_patterns.extend(patterns.into_iter().map(Into::into));
73 self
74 }
75
76 pub fn generate_anonymous_id(&self, original_id: &str) -> String {
78 let mut hasher = Sha256::new();
79 hasher.update(self.salt.as_bytes());
80 hasher.update(original_id.as_bytes());
81 let hash = hasher.finalize();
82 format!("anon-{}", hex::encode(hash.get(..8).unwrap_or(&hash))) }
84
85 pub fn anonymize(&self, artifact: &ResearchArtifact) -> AnonymizedArtifact {
87 let anonymous_id = self.generate_anonymous_id(&artifact.id);
88
89 let anonymous_authors: Vec<AnonymousAuthor> = artifact
91 .authors
92 .iter()
93 .enumerate()
94 .map(|(i, _)| AnonymousAuthor {
95 placeholder: format!("{} {}", self.author_replacement, i + 1),
96 affiliation_placeholder: self.affiliation_replacement.clone(),
97 })
98 .collect();
99
100 let doi = if self.strip_doi { None } else { artifact.doi.clone() };
102
103 let description = artifact
105 .description
106 .as_ref()
107 .map(|desc| anonymize_text_internal(desc, &artifact.authors, self));
108
109 AnonymizedArtifact {
110 anonymous_id,
111 original_id_hash: self.hash_original_id(&artifact.id),
112 title: artifact.title.clone(),
113 authors: anonymous_authors,
114 artifact_type: artifact.artifact_type,
115 license: artifact.license.clone(),
116 doi,
117 version: artifact.version.clone(),
118 description,
119 keywords: artifact.keywords.clone(),
120 }
121 }
122
123 fn hash_original_id(&self, original_id: &str) -> String {
125 let mut hasher = Sha256::new();
126 hasher.update(self.salt.as_bytes());
127 hasher.update(b"original:");
128 hasher.update(original_id.as_bytes());
129 hex::encode(hasher.finalize())
130 }
131
132 pub fn verify_original_id(&self, artifact: &AnonymizedArtifact, original_id: &str) -> bool {
134 let expected_hash = self.hash_original_id(original_id);
135 artifact.original_id_hash == expected_hash
136 }
137}
138
139#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
141pub struct AnonymousAuthor {
142 pub placeholder: String,
144 pub affiliation_placeholder: String,
146}
147
148#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
150pub struct AnonymizedArtifact {
151 pub anonymous_id: String,
153 pub original_id_hash: String,
155 pub title: String,
157 pub authors: Vec<AnonymousAuthor>,
159 pub artifact_type: crate::research::artifact::ArtifactType,
161 pub license: crate::research::artifact::License,
163 pub doi: Option<String>,
165 pub version: String,
167 pub description: Option<String>,
169 pub keywords: Vec<String>,
171}
172
173impl AnonymizedArtifact {
174 pub fn to_double_blind_json(&self) -> String {
176 serde_json::to_string_pretty(self).unwrap_or_else(|_err| "{}".to_string())
177 }
178
179 pub fn author_count(&self) -> usize {
181 self.authors.len()
182 }
183}
184
185pub fn anonymize_text(text: &str, authors: &[Author], config: &AnonymizationConfig) -> String {
187 anonymize_text_internal(text, authors, config)
188}
189
190fn anonymize_text_internal(text: &str, authors: &[Author], config: &AnonymizationConfig) -> String {
192 let mut result = text.to_string();
193
194 for (i, author) in authors.iter().enumerate() {
195 result = result.replace(&author.name, &format!("{} {}", config.author_replacement, i + 1));
197
198 let last_name = author.last_name();
200 if last_name != author.name {
201 result = result.replace(last_name, &format!("{} {}", config.author_replacement, i + 1));
202 }
203
204 for affiliation in &author.affiliations {
206 result = result.replace(&affiliation.name, &config.affiliation_replacement);
207 }
208 }
209
210 result
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216 use crate::research::artifact::{Affiliation, ArtifactType, ContributorRole, License};
217
218 fn create_test_artifact() -> ResearchArtifact {
219 let author1 = Author::new("Alice Smith")
220 .with_orcid("0000-0002-1825-0097")
221 .expect("operation should succeed")
222 .with_role(ContributorRole::Conceptualization)
223 .with_affiliation(
224 Affiliation::new("MIT")
225 .with_ror_id("https://ror.org/03yrm5c26")
226 .expect("operation should succeed"),
227 );
228
229 let author2 =
230 Author::new("Bob Jones").with_affiliation(Affiliation::new("Stanford University"));
231
232 ResearchArtifact::new(
233 "paper-2024-001",
234 "Novel Deep Learning Architecture",
235 ArtifactType::Paper,
236 License::CcBy4,
237 )
238 .with_authors([author1, author2])
239 .with_doi("10.1234/example.2024")
240 .with_description("A groundbreaking paper by Alice Smith from MIT")
241 }
242
243 #[test]
244 fn test_anonymize_removes_authors() {
245 let artifact = create_test_artifact();
246 let config = AnonymizationConfig::new("secret-salt");
247
248 let anon = config.anonymize(&artifact);
249
250 assert_eq!(anon.authors.len(), 2);
251 assert_eq!(anon.authors[0].placeholder, "Anonymous Author 1");
252 assert_eq!(anon.authors[1].placeholder, "Anonymous Author 2");
253 }
254
255 #[test]
256 fn test_anonymize_removes_affiliations() {
257 let artifact = create_test_artifact();
258 let config = AnonymizationConfig::new("secret-salt");
259
260 let anon = config.anonymize(&artifact);
261
262 assert_eq!(anon.authors[0].affiliation_placeholder, "Anonymous Institution");
263 assert_eq!(anon.authors[1].affiliation_placeholder, "Anonymous Institution");
264 }
265
266 #[test]
267 fn test_anonymous_id_deterministic() {
268 let config = AnonymizationConfig::new("fixed-salt");
269
270 let id1 = config.generate_anonymous_id("paper-001");
271 let id2 = config.generate_anonymous_id("paper-001");
272
273 assert_eq!(id1, id2);
274 assert!(id1.starts_with("anon-"));
275 }
276
277 #[test]
278 fn test_salt_changes_ids() {
279 let config1 = AnonymizationConfig::new("salt-1");
280 let config2 = AnonymizationConfig::new("salt-2");
281
282 let id1 = config1.generate_anonymous_id("paper-001");
283 let id2 = config2.generate_anonymous_id("paper-001");
284
285 assert_ne!(id1, id2);
286 }
287
288 #[test]
289 fn test_double_blind_export() {
290 let artifact = create_test_artifact();
291 let config = AnonymizationConfig::new("review-salt");
292
293 let anon = config.anonymize(&artifact);
294 let json = anon.to_double_blind_json();
295
296 assert!(!json.contains("Alice Smith"));
298 assert!(!json.contains("Bob Jones"));
299
300 assert!(!json.contains("MIT"));
302 assert!(!json.contains("Stanford"));
303
304 assert!(json.contains("Anonymous Author"));
306 assert!(json.contains("Anonymous Institution"));
307
308 assert!(json.contains("Novel Deep Learning Architecture"));
310 }
311
312 #[test]
313 fn test_strip_doi_option() {
314 let artifact = create_test_artifact();
315
316 let config1 = AnonymizationConfig::new("salt");
318 let anon1 = config1.anonymize(&artifact);
319 assert!(anon1.doi.is_some());
320
321 let config2 = AnonymizationConfig::new("salt").with_strip_doi(true);
323 let anon2 = config2.anonymize(&artifact);
324 assert!(anon2.doi.is_none());
325 }
326
327 #[test]
328 fn test_verify_original_id() {
329 let artifact = create_test_artifact();
330 let config = AnonymizationConfig::new("verification-salt");
331
332 let anon = config.anonymize(&artifact);
333
334 assert!(config.verify_original_id(&anon, "paper-2024-001"));
336
337 assert!(!config.verify_original_id(&anon, "paper-2024-002"));
339 }
340
341 #[test]
342 fn test_custom_replacements() {
343 let artifact = create_test_artifact();
344 let config = AnonymizationConfig::new("salt")
345 .with_author_replacement("Reviewer")
346 .with_affiliation_replacement("Hidden University");
347
348 let anon = config.anonymize(&artifact);
349
350 assert_eq!(anon.authors[0].placeholder, "Reviewer 1");
351 assert_eq!(anon.authors[0].affiliation_placeholder, "Hidden University");
352 }
353
354 #[test]
355 fn test_anonymize_text() {
356 let author = Author::new("Alice Smith").with_affiliation(Affiliation::new("MIT"));
357
358 let text = "This paper by Alice Smith from MIT presents...";
359 let config = AnonymizationConfig::new("salt");
360
361 let anon_text = anonymize_text(text, &[author], &config);
362
363 assert!(!anon_text.contains("Alice Smith"));
364 assert!(!anon_text.contains("MIT"));
365 assert!(anon_text.contains("Anonymous Author 1"));
366 assert!(anon_text.contains("Anonymous Institution"));
367 }
368
369 #[test]
370 fn test_anonymize_text_last_name() {
371 let author = Author::new("Alice Marie Smith").with_affiliation(Affiliation::new("MIT"));
372
373 let text = "Smith et al. demonstrated...";
374 let config = AnonymizationConfig::new("salt");
375
376 let anon_text = anonymize_text(text, &[author], &config);
377
378 assert!(!anon_text.contains("Smith"));
379 assert!(anon_text.contains("Anonymous Author 1"));
380 }
381
382 #[test]
383 fn test_author_count() {
384 let artifact = create_test_artifact();
385 let config = AnonymizationConfig::new("salt");
386 let anon = config.anonymize(&artifact);
387
388 assert_eq!(anon.author_count(), 2);
389 }
390
391 #[test]
392 fn test_preserved_fields() {
393 let artifact = create_test_artifact();
394 let config = AnonymizationConfig::new("salt");
395 let anon = config.anonymize(&artifact);
396
397 assert_eq!(anon.title, artifact.title);
399 assert_eq!(anon.version, artifact.version);
400 assert_eq!(anon.artifact_type, artifact.artifact_type);
401 assert_eq!(anon.license, artifact.license);
402 assert_eq!(anon.keywords, artifact.keywords);
403 }
404}