1use crate::models::WorkMeta;
2
3#[derive(Debug, Clone, PartialEq)]
5pub enum KeyStyle {
6 AuthorYear,
8 ShortTitle,
10}
11
12pub fn generate_citation_key_by_style(work: &WorkMeta, style: &KeyStyle) -> String {
14 match style {
15 KeyStyle::AuthorYear => generate_citation_key(&work.authors, work.year),
16 KeyStyle::ShortTitle => generate_short_title_key(work),
17 }
18}
19
20pub fn generate_citation_key(authors: &[String], year: Option<i32>) -> String {
25 let author_part = authors
26 .iter()
27 .take(2)
28 .filter_map(|a| {
29 a.split(',')
31 .next()
32 .map(|family| family.trim().to_string())
33 })
34 .filter(|s| !s.is_empty())
35 .map(capitalise_first)
36 .collect::<Vec<_>>()
37 .join("");
38
39 let year_part = year
40 .map(|y| y.to_string())
41 .unwrap_or_default();
42
43 if author_part.is_empty() {
44 format!("Unknown{year_part}")
45 } else {
46 format!("{author_part}{year_part}")
47 }
48}
49
50pub fn resolve_key_conflict(base_key: &str, existing_keys: &[String]) -> String {
53 if !existing_keys.contains(&base_key.to_string()) {
54 return base_key.to_string();
55 }
56 (b'a'..=b'z')
57 .map(|c| format!("{}{}", base_key, c as char))
58 .find(|candidate| !existing_keys.contains(candidate))
59 .unwrap_or_else(|| {
60 for c1 in b'a'..=b'z' {
62 for c2 in b'a'..=b'z' {
63 let candidate = format!("{}{}{}", base_key, c1 as char, c2 as char);
64 if !existing_keys.contains(&candidate) {
65 return candidate;
66 }
67 }
68 }
69 format!("{base_key}_conflict")
70 })
71}
72
73pub fn normalise_doi(doi: &str) -> String {
77 doi.trim()
78 .trim_start_matches("https://doi.org/")
79 .trim_start_matches("http://doi.org/")
80 .trim_start_matches("https://dx.doi.org/")
81 .trim_start_matches("http://dx.doi.org/")
82 .trim_start_matches("doi:")
83 .to_string()
84}
85
86pub fn capitalise_first(s: String) -> String {
88 let mut chars = s.chars();
89 match chars.next() {
90 None => String::new(),
91 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
92 }
93}
94
95fn generate_short_title_key(work: &WorkMeta) -> String {
100 const STOP_WORDS: &[&str] = &[
101 "a", "an", "the", "of", "in", "on", "at", "to", "for", "and",
102 "or", "by", "with", "is", "are", "was", "were", "from", "as",
103 "into", "that", "this", "its", "be", "has", "have", "had",
104 ];
105
106 let title_part: String = work
107 .title
108 .as_deref()
109 .unwrap_or("")
110 .split_whitespace()
111 .filter(|w| {
112 let lower: String = w
114 .chars()
115 .filter(|c| c.is_alphabetic())
116 .collect::<String>()
117 .to_lowercase();
118 !lower.is_empty() && !STOP_WORDS.contains(&lower.as_str())
119 })
120 .take(4)
121 .map(|w| {
122 let clean: String = w.chars().filter(|c| c.is_alphanumeric()).collect();
124 capitalise_first(clean)
125 })
126 .filter(|s| !s.is_empty())
127 .collect::<Vec<_>>()
128 .join("");
129
130 let year_part = work.year.map(|y| y.to_string()).unwrap_or_default();
131
132 if title_part.is_empty() {
133 format!("Unknown{year_part}")
134 } else {
135 format!("{title_part}{year_part}")
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142
143 #[test]
144 fn test_normalise_doi_strips_url() {
145 assert_eq!(
146 normalise_doi("https://doi.org/10.1234/test"),
147 "10.1234/test"
148 );
149 assert_eq!(normalise_doi("10.1234/test"), "10.1234/test");
150 assert_eq!(normalise_doi("doi:10.1234/test"), "10.1234/test");
151 }
152
153 #[test]
154 fn test_generate_citation_key_single_author() {
155 let authors = vec!["Smith, John".to_string()];
156 assert_eq!(generate_citation_key(&authors, Some(2024)), "Smith2024");
157 }
158
159 #[test]
160 fn test_generate_citation_key_two_authors() {
161 let authors = vec!["Smith, John".to_string(), "Jones, Alice".to_string()];
162 assert_eq!(generate_citation_key(&authors, Some(2024)), "SmithJones2024");
163 }
164
165 #[test]
166 fn test_resolve_key_conflict() {
167 let existing = vec!["Smith2024".to_string(), "Smith2024a".to_string()];
168 assert_eq!(resolve_key_conflict("Smith2024", &existing), "Smith2024b");
169 }
170
171 #[test]
172 fn test_resolve_key_conflict_beyond_z() {
173 let mut existing = vec!["Smith2024".to_string()];
175 for c in b'a'..=b'z' {
176 existing.push(format!("Smith2024{}", c as char));
177 }
178 assert_eq!(resolve_key_conflict("Smith2024", &existing), "Smith2024aa");
180 }
181
182 #[test]
183 fn test_short_title_key_style() {
184 let work = WorkMeta {
185 doi: "10.1234/ml".to_string(),
186 title: Some("Machine Learning in Practice".to_string()),
187 authors: vec!["Smith, John".to_string()],
188 year: Some(2024),
189 ..WorkMeta::default()
190 };
191 let key = generate_citation_key_by_style(&work, &KeyStyle::ShortTitle);
192 assert_eq!(key, "MachineLearningPractice2024");
194 }
195
196 #[test]
197 fn test_short_title_key_strips_stop_words() {
198 let work = WorkMeta {
199 doi: "10.1234/a".to_string(),
200 title: Some("The Role of AI in the Future".to_string()),
201 authors: vec![],
202 year: Some(2020),
203 ..WorkMeta::default()
204 };
205 let key = generate_citation_key_by_style(&work, &KeyStyle::ShortTitle);
206 assert_eq!(key, "RoleAIFuture2020");
209 }
210
211 #[test]
212 fn test_author_year_key_style() {
213 let work = WorkMeta {
214 doi: "10.1234/t".to_string(),
215 title: Some("Some Title".to_string()),
216 authors: vec!["Smith, John".to_string()],
217 year: Some(2024),
218 ..WorkMeta::default()
219 };
220 let key = generate_citation_key_by_style(&work, &KeyStyle::AuthorYear);
221 assert_eq!(key, "Smith2024");
222 }
223}