Skip to main content

entrenar/research/
citation.rs

1//! Citation Metadata with BibTeX/CFF export (ENT-020)
2//!
3//! Provides citation generation in standard academic formats.
4
5use crate::research::artifact::{ArtifactType, Author, ResearchArtifact};
6use serde::{Deserialize, Serialize};
7
8/// Citation metadata for academic reference
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
10pub struct CitationMetadata {
11    /// The research artifact being cited
12    pub artifact: ResearchArtifact,
13    /// Publication year
14    pub year: u16,
15    /// Journal or venue name (optional)
16    pub journal: Option<String>,
17    /// Volume number (optional)
18    pub volume: Option<String>,
19    /// Page range (optional)
20    pub pages: Option<String>,
21    /// URL to the resource
22    pub url: Option<String>,
23    /// Additional keywords
24    pub keywords: Vec<String>,
25}
26
27impl CitationMetadata {
28    /// Create citation metadata from an artifact
29    pub fn new(artifact: ResearchArtifact, year: u16) -> Self {
30        Self {
31            artifact,
32            year,
33            journal: None,
34            volume: None,
35            pages: None,
36            url: None,
37            keywords: Vec::new(),
38        }
39    }
40
41    /// Set journal/venue
42    pub fn with_journal(mut self, journal: impl Into<String>) -> Self {
43        self.journal = Some(journal.into());
44        self
45    }
46
47    /// Set volume
48    pub fn with_volume(mut self, volume: impl Into<String>) -> Self {
49        self.volume = Some(volume.into());
50        self
51    }
52
53    /// Set page range
54    pub fn with_pages(mut self, pages: impl Into<String>) -> Self {
55        self.pages = Some(pages.into());
56        self
57    }
58
59    /// Set URL
60    pub fn with_url(mut self, url: impl Into<String>) -> Self {
61        self.url = Some(url.into());
62        self
63    }
64
65    /// Add keywords
66    pub fn with_keywords(mut self, keywords: impl IntoIterator<Item = impl Into<String>>) -> Self {
67        self.keywords.extend(keywords.into_iter().map(Into::into));
68        self
69    }
70
71    /// Generate a citation key (author_year_firstword pattern)
72    pub fn generate_citation_key(&self) -> String {
73        let author_part = self
74            .artifact
75            .first_author()
76            .map_or_else(|| "anon".to_string(), |a| a.last_name().to_lowercase());
77
78        let first_word = self
79            .artifact
80            .title
81            .split_whitespace()
82            .next()
83            .unwrap_or("untitled")
84            .to_lowercase()
85            .chars()
86            .filter(|c| c.is_alphanumeric())
87            .collect::<String>();
88
89        format!("{author_part}_{year}_{first_word}", year = self.year)
90    }
91
92    /// Export to BibTeX format
93    pub fn to_bibtex(&self) -> String {
94        let entry_type = match self.artifact.artifact_type {
95            ArtifactType::Paper => "article",
96            ArtifactType::Dataset => "misc",
97            ArtifactType::Model => "misc",
98            ArtifactType::Code => "software",
99            ArtifactType::Notebook => "misc",
100            ArtifactType::Workflow => "misc",
101        };
102
103        let key = self.generate_citation_key();
104        let mut bibtex = format!("@{entry_type}{{{key},\n");
105
106        // Authors (BibTeX format: "Last1, First1 and Last2, First2")
107        let authors = format_bibtex_authors(&self.artifact.authors);
108        bibtex.push_str(&format!("  author = {{{}}},\n", escape_bibtex(&authors)));
109
110        // Title
111        bibtex.push_str(&format!("  title = {{{{{}}}}},\n", escape_bibtex(&self.artifact.title)));
112
113        // Year
114        bibtex.push_str(&format!("  year = {{{}}},\n", self.year));
115
116        // Optional fields
117        if let Some(journal) = &self.journal {
118            bibtex.push_str(&format!("  journal = {{{}}},\n", escape_bibtex(journal)));
119        }
120
121        if let Some(volume) = &self.volume {
122            bibtex.push_str(&format!("  volume = {{{volume}}},\n"));
123        }
124
125        if let Some(pages) = &self.pages {
126            bibtex.push_str(&format!("  pages = {{{pages}}},\n"));
127        }
128
129        if let Some(doi) = &self.artifact.doi {
130            bibtex.push_str(&format!("  doi = {{{doi}}},\n"));
131        }
132
133        if let Some(url) = &self.url {
134            bibtex.push_str(&format!("  url = {{{url}}},\n"));
135        }
136
137        if !self.keywords.is_empty() {
138            let kw = self.keywords.join(", ");
139            bibtex.push_str(&format!("  keywords = {{{kw}}},\n"));
140        }
141
142        bibtex.push('}');
143        bibtex
144    }
145
146    /// Export to CITATION.cff format (YAML)
147    pub fn to_cff(&self) -> String {
148        let mut cff = String::new();
149
150        cff.push_str("cff-version: 1.2.0\n");
151        cff.push_str(&format!(
152            "message: \"If you use this {}, please cite it as below.\"\n",
153            self.artifact.artifact_type.to_string().to_lowercase()
154        ));
155
156        // Type mapping
157        let cff_type = match self.artifact.artifact_type {
158            ArtifactType::Paper => "article",
159            ArtifactType::Dataset => "dataset",
160            ArtifactType::Model => "software",
161            ArtifactType::Code => "software",
162            ArtifactType::Notebook => "software",
163            ArtifactType::Workflow => "software",
164        };
165        cff.push_str(&format!("type: {cff_type}\n"));
166
167        // Title
168        cff.push_str(&format!("title: \"{}\"\n", escape_yaml(&self.artifact.title)));
169
170        // Version
171        cff.push_str(&format!("version: \"{}\"\n", self.artifact.version));
172
173        // License
174        cff.push_str(&format!("license: {}\n", self.artifact.license));
175
176        // DOI
177        if let Some(doi) = &self.artifact.doi {
178            cff.push_str(&format!("doi: {doi}\n"));
179        }
180
181        // URL
182        if let Some(url) = &self.url {
183            cff.push_str(&format!("url: \"{url}\"\n"));
184        }
185
186        // Date
187        cff.push_str(&format!("date-released: \"{}-01-01\"\n", self.year));
188
189        // Authors
190        cff.push_str("authors:\n");
191        for author in &self.artifact.authors {
192            cff.push_str(&format_cff_author(author));
193        }
194
195        // Keywords
196        if !self.keywords.is_empty() || !self.artifact.keywords.is_empty() {
197            cff.push_str("keywords:\n");
198            for kw in &self.artifact.keywords {
199                cff.push_str(&format!("  - \"{kw}\"\n"));
200            }
201            for kw in &self.keywords {
202                cff.push_str(&format!("  - \"{kw}\"\n"));
203            }
204        }
205
206        // Abstract
207        if let Some(desc) = &self.artifact.description {
208            cff.push_str(&format!("abstract: \"{}\"\n", escape_yaml(desc)));
209        }
210
211        cff
212    }
213}
214
215/// Format authors for BibTeX (Last, First and Last2, First2)
216fn format_bibtex_authors(authors: &[Author]) -> String {
217    authors
218        .iter()
219        .map(|a| {
220            let parts: Vec<&str> = a.name.split_whitespace().collect();
221            if parts.len() >= 2 {
222                let last = parts.last().expect("parts guaranteed non-empty by len check");
223                let first = parts[..parts.len() - 1].join(" ");
224                format!("{last}, {first}")
225            } else {
226                a.name.clone()
227            }
228        })
229        .collect::<Vec<_>>()
230        .join(" and ")
231}
232
233/// Format a single author for CFF
234fn format_cff_author(author: &Author) -> String {
235    let mut cff = String::new();
236
237    let parts: Vec<&str> = author.name.split_whitespace().collect();
238    if parts.len() >= 2 {
239        let family = parts.last().expect("parts guaranteed non-empty by len check");
240        let given = parts[..parts.len() - 1].join(" ");
241        cff.push_str(&format!("  - family-names: \"{family}\"\n"));
242        cff.push_str(&format!("    given-names: \"{given}\"\n"));
243    } else {
244        cff.push_str(&format!("  - name: \"{}\"\n", author.name));
245    }
246
247    if let Some(orcid) = &author.orcid {
248        cff.push_str(&format!("    orcid: \"https://orcid.org/{orcid}\"\n"));
249    }
250
251    if let Some(affiliation) = author.affiliations.first() {
252        cff.push_str(&format!("    affiliation: \"{}\"\n", affiliation.name));
253    }
254
255    cff
256}
257
258/// Escape special characters for BibTeX
259fn escape_bibtex(s: &str) -> String {
260    s.replace('&', r"\&")
261        .replace('%', r"\%")
262        .replace('$', r"\$")
263        .replace('#', r"\#")
264        .replace('_', r"\_")
265        .replace('{', r"\{")
266        .replace('}', r"\}")
267        .replace('~', r"\~{}")
268        .replace('^', r"\^{}")
269}
270
271/// Escape special characters for YAML strings
272fn escape_yaml(s: &str) -> String {
273    s.replace('\\', "\\\\").replace('"', "\\\"")
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::research::artifact::{Affiliation, ContributorRole, License};
280
281    fn create_test_artifact() -> ResearchArtifact {
282        let author = Author::new("Alice Smith")
283            .with_orcid("0000-0002-1825-0097")
284            .expect("operation should succeed")
285            .with_role(ContributorRole::Conceptualization)
286            .with_affiliation(Affiliation::new("MIT"));
287
288        ResearchArtifact::new(
289            "test-001",
290            "Deep Learning for Natural Language Processing",
291            ArtifactType::Paper,
292            License::CcBy4,
293        )
294        .with_author(author)
295        .with_doi("10.1234/example.2024")
296        .with_description("A novel approach to NLP")
297    }
298
299    #[test]
300    fn test_bibtex_generation() {
301        let artifact = create_test_artifact();
302        let citation = CitationMetadata::new(artifact, 2024)
303            .with_journal("Nature Machine Intelligence")
304            .with_volume("6")
305            .with_pages("123-145");
306
307        let bibtex = citation.to_bibtex();
308
309        assert!(bibtex.starts_with("@article{"));
310        assert!(bibtex.contains("author = {Smith, Alice}"));
311        assert!(bibtex.contains("title = {{Deep Learning for Natural Language Processing}}"));
312        assert!(bibtex.contains("year = {2024}"));
313        assert!(bibtex.contains("journal = {Nature Machine Intelligence}"));
314        assert!(bibtex.contains("volume = {6}"));
315        assert!(bibtex.contains("pages = {123-145}"));
316        assert!(bibtex.contains("doi = {10.1234/example.2024}"));
317    }
318
319    #[test]
320    fn test_bibtex_escaping_special_chars() {
321        let artifact = ResearchArtifact::new(
322            "test-002",
323            "Machine Learning & Data Science: A 100% Complete Guide",
324            ArtifactType::Paper,
325            License::Mit,
326        )
327        .with_author(Author::new("John O'Brien"));
328
329        let citation = CitationMetadata::new(artifact, 2024);
330        let bibtex = citation.to_bibtex();
331
332        assert!(bibtex.contains(r"Machine Learning \& Data Science"));
333        assert!(bibtex.contains(r"100\% Complete"));
334    }
335
336    #[test]
337    fn test_cff_generation() {
338        let artifact = create_test_artifact();
339        let citation = CitationMetadata::new(artifact, 2024)
340            .with_url("https://example.com/paper")
341            .with_keywords(["deep learning", "NLP"]);
342
343        let cff = citation.to_cff();
344
345        assert!(cff.contains("cff-version: 1.2.0"));
346        assert!(cff.contains("type: article"));
347        assert!(cff.contains("title: \"Deep Learning for Natural Language Processing\""));
348        assert!(cff.contains("license: CC-BY-4.0"));
349        assert!(cff.contains("doi: 10.1234/example.2024"));
350        assert!(cff.contains("url: \"https://example.com/paper\""));
351        assert!(cff.contains("family-names: \"Smith\""));
352        assert!(cff.contains("given-names: \"Alice\""));
353        assert!(cff.contains("orcid: \"https://orcid.org/0000-0002-1825-0097\""));
354        assert!(cff.contains("affiliation: \"MIT\""));
355        assert!(cff.contains("- \"deep learning\""));
356        assert!(cff.contains("- \"NLP\""));
357    }
358
359    #[test]
360    fn test_citation_key_generation() {
361        let artifact = create_test_artifact();
362        let citation = CitationMetadata::new(artifact, 2024);
363
364        let key = citation.generate_citation_key();
365        assert_eq!(key, "smith_2024_deep");
366    }
367
368    #[test]
369    fn test_citation_key_no_author() {
370        let artifact = ResearchArtifact::new(
371            "test-003",
372            "Anonymous Dataset",
373            ArtifactType::Dataset,
374            License::Cc0,
375        );
376
377        let citation = CitationMetadata::new(artifact, 2023);
378        let key = citation.generate_citation_key();
379
380        assert_eq!(key, "anon_2023_anonymous");
381    }
382
383    #[test]
384    fn test_multiple_authors_bibtex() {
385        let author1 = Author::new("Alice Smith");
386        let author2 = Author::new("Bob Jones");
387        let author3 = Author::new("Carol Williams");
388
389        let artifact = ResearchArtifact::new(
390            "test-004",
391            "Collaborative Research Paper",
392            ArtifactType::Paper,
393            License::CcBy4,
394        )
395        .with_authors([author1, author2, author3]);
396
397        let citation = CitationMetadata::new(artifact, 2024);
398        let bibtex = citation.to_bibtex();
399
400        assert!(bibtex.contains("author = {Smith, Alice and Jones, Bob and Williams, Carol}"));
401    }
402
403    #[test]
404    fn test_dataset_bibtex_type() {
405        let artifact = ResearchArtifact::new(
406            "dataset-001",
407            "ImageNet Subset",
408            ArtifactType::Dataset,
409            License::CcBy4,
410        );
411
412        let citation = CitationMetadata::new(artifact, 2024);
413        let bibtex = citation.to_bibtex();
414
415        assert!(bibtex.starts_with("@misc{"));
416    }
417
418    #[test]
419    fn test_software_bibtex_type() {
420        let artifact = ResearchArtifact::new(
421            "code-001",
422            "PyTorch Lightning",
423            ArtifactType::Code,
424            License::Apache2,
425        );
426
427        let citation = CitationMetadata::new(artifact, 2024);
428        let bibtex = citation.to_bibtex();
429
430        assert!(bibtex.starts_with("@software{"));
431    }
432
433    #[test]
434    fn test_cff_single_name_author() {
435        let artifact =
436            ResearchArtifact::new("test-005", "Single Name Test", ArtifactType::Code, License::Mit)
437                .with_author(Author::new("Madonna"));
438
439        let citation = CitationMetadata::new(artifact, 2024);
440        let cff = citation.to_cff();
441
442        assert!(cff.contains("- name: \"Madonna\""));
443    }
444
445    #[test]
446    fn test_keywords_in_bibtex() {
447        let artifact = create_test_artifact();
448        let citation = CitationMetadata::new(artifact, 2024).with_keywords([
449            "machine learning",
450            "transformers",
451            "attention",
452        ]);
453
454        let bibtex = citation.to_bibtex();
455
456        assert!(bibtex.contains("keywords = {machine learning, transformers, attention}"));
457    }
458}