1use std::collections::HashMap;
2use std::fmt::{self, Display, Formatter};
3
4use crate::{chunk::*, Spanned};
5use crate::{Type, TypeError};
6
7#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
12pub struct Person {
13 pub name: String,
15 pub given_name: String,
17 pub prefix: String,
20 pub suffix: String,
22}
23
24impl Person {
25 pub fn parse(chunks: ChunksRef) -> Self {
37 if chunks.iter().any(|val| val.v.get().contains("=")) {
39 return Self::parse_extended_name_format(chunks);
40 }
41 Self::parse_bibtex(chunks)
43 }
44
45 fn parse_extended_name_format(chunks: ChunksRef) -> Self {
47 let mut person = HashMap::new();
48 for chunk in split_token_lists(chunks, ",") {
49 let (key, value) = split_at_normal_char(&chunk, '=', true);
50 let key = key.format_verbatim();
51 let value = value.format_verbatim();
52 person.insert(key, value);
53 }
54
55 let name = person.remove("family").unwrap_or_default();
56 let given_name = person.remove("given").unwrap_or_default();
57 let prefix = person.remove("prefix").unwrap_or_default();
58 let suffix = person.remove("suffix").unwrap_or_default();
59
60 Self { name, given_name, prefix, suffix }
61 }
62
63 fn parse_bibtex(chunks: ChunksRef) -> Self {
64 let num_commas = count_num_char(chunks, ',');
65
66 match num_commas {
67 0 => Self::parse_unified(chunks), 1 => {
69 let (v1, v2) = split_at_normal_char(chunks, ',', true);
70 Self::parse_single_comma(&v1, &v2)
71 } _ => {
73 let (v1, v2) = split_at_normal_char(chunks, ',', true);
74 let (v2, v3) = split_at_normal_char(&v2, ',', true);
75 Self::parse_two_commas(&v1, &v2, &v3)
76 } }
78 }
79
80 fn parse_unified(chunks: ChunksRef) -> Self {
83 let mut word_start = true;
88 let mut capital = false;
89 let mut seen_lowercase = false;
90 let mut seen_uppercase = false;
91 let mut seen_uppercase2 = false;
92 let mut cap_new_start = 0;
93 let mut cap_word_end = 0;
94 let mut last_word_start = 0;
95 let mut last_lowercase_start = 0;
96
97 for (index, (c, v)) in chunk_chars(chunks).enumerate() {
98 if c.is_whitespace() && !v {
99 word_start = true;
100 continue;
101 }
102
103 if word_start {
104 last_word_start = index;
105 capital = if v || c.is_uppercase() {
106 seen_uppercase = true;
107 if seen_lowercase && last_lowercase_start >= cap_new_start {
108 seen_uppercase2 = true;
109 cap_new_start = index;
110 }
111 true
112 } else {
113 last_lowercase_start = index;
114 seen_lowercase = true;
115 false
116 };
117 }
118
119 if capital && !seen_lowercase {
120 cap_word_end = index;
121 }
122
123 word_start = false;
124 }
125
126 let mut name = String::new();
127 let mut given_name = String::new();
128 let mut prefix = String::new();
129
130 for (index, (c, _)) in chunk_chars(chunks).enumerate() {
131 if (index <= cap_word_end
132 && seen_lowercase
133 && seen_uppercase
134 && !(index == 0 && c.is_lowercase()))
135 || (index < last_word_start && !seen_lowercase)
136 {
137 given_name.push(c);
138 } else if (index < cap_new_start && cap_new_start > cap_word_end)
139 || (index < last_word_start
140 && (!seen_uppercase2
141 || (last_word_start == last_lowercase_start
142 && index < cap_new_start)))
143 {
144 prefix.push(c);
145 } else {
146 name.push(c);
147 }
148 }
149
150 Self {
151 name: name.trim_start().to_string(),
152 given_name: given_name.trim_end().to_string(),
153 prefix: prefix.trim().to_string(),
154 suffix: String::new(),
155 }
156 }
157
158 fn parse_single_comma(s1: ChunksRef, s2: ChunksRef) -> Self {
165 if s2.is_empty() || (s2.len() == 1 && s2.format_verbatim().is_empty()) {
166 let formatted = s1.format_verbatim();
167 let last_space = formatted.rfind(' ').unwrap_or(0);
168 let (prefix, last) = formatted.split_at(last_space);
169 return Self {
170 given_name: String::new(),
171 name: last.trim_start().to_string(),
172 prefix: prefix.trim_end().to_string(),
173 suffix: String::new(),
174 };
175 }
176
177 let given_name = s2.format_verbatim();
178
179 let mut word_start = true;
180 let mut last_lower_case_end: i32 = -1;
181 let mut is_lowercase = false;
182 let mut last_word_start = 0;
183 let mut has_seen_uppercase_words = false;
184
185 for (index, (c, v)) in chunk_chars(s1).enumerate() {
186 if c.is_whitespace() && !v {
187 word_start = true;
188 continue;
189 }
190
191 if word_start {
192 last_word_start = index;
193
194 if c.is_lowercase() || v {
195 is_lowercase = true;
196 } else {
197 is_lowercase = false;
198 has_seen_uppercase_words = true;
199 }
200 }
201
202 if is_lowercase {
203 last_lower_case_end = index as i32;
204 }
205
206 word_start = false;
207 }
208
209 let mut name = String::new();
210 let mut prefix = String::new();
211 for (index, (c, _)) in chunk_chars(s1).enumerate() {
212 if (index as i32 <= last_lower_case_end && has_seen_uppercase_words)
213 || (!has_seen_uppercase_words && index < last_word_start)
214 {
215 prefix.push(c);
216 } else if has_seen_uppercase_words || index >= last_word_start {
217 name.push(c);
218 }
219 }
220
221 Self {
222 name: name.trim_start().to_string(),
223 given_name: given_name.trim_start().to_string(),
224 prefix: prefix.trim_end().to_string(),
225 suffix: String::new(),
226 }
227 }
228
229 fn parse_two_commas(s1: ChunksRef, s2: ChunksRef, s3: ChunksRef) -> Self {
237 let mut p = Self::parse_single_comma(s1, s3);
238 p.suffix = s2.format_verbatim();
239 p
240 }
241}
242
243impl Type for Vec<Person> {
244 fn from_chunks(chunks: ChunksRef) -> Result<Self, TypeError> {
245 Ok(split_token_lists_with_kw(chunks, "and")
246 .into_iter()
247 .map(|subchunks| Person::parse(&subchunks))
248 .collect())
249 }
250
251 fn to_chunks(&self) -> Chunks {
252 self.iter()
253 .map(|p| {
254 let prefix = if let Some(c) = p.prefix.chars().next() {
255 if c.is_uppercase() {
256 (
257 Some(Spanned::detached(Chunk::Verbatim(p.prefix.clone()))),
258 " ".to_string(),
259 )
260 } else {
261 (None, format!("{} ", p.prefix))
262 }
263 } else {
264 (None, String::new())
265 };
266
267 let name_str = if !p.suffix.is_empty() {
268 format!("{}{}, {}, {}", prefix.1, p.name, p.suffix, p.given_name)
269 } else {
270 format!("{}{}, {}", prefix.1, p.name, p.given_name)
271 };
272
273 let mut res = vec![Spanned::detached(Chunk::Normal(name_str))];
274 if let Some(pre_chunk) = prefix.0 {
275 res.insert(0, pre_chunk);
276 }
277
278 res
279 })
280 .collect::<Vec<Chunks>>()
281 .to_chunks()
282 }
283}
284
285impl Display for Person {
286 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
287 if !self.given_name.is_empty() {
288 write!(f, "{} ", self.given_name)?;
289 }
290
291 if !self.prefix.is_empty() {
292 write!(f, "{} ", self.prefix)?;
293 }
294
295 write!(f, "{}", self.name)?;
296
297 if !self.suffix.is_empty() {
298 write!(f, " {}", self.suffix)?;
299 }
300
301 Ok(())
302 }
303}
304
305#[cfg(test)]
306mod tests {
307 use super::*;
308 use crate::chunk::tests::*;
309
310 #[test]
311 fn test_list_of_names() {
312 let names =
313 String::from("Johannes Gutenberg and Aldus Manutius and Claude Garamond");
314 let people = &[Spanned::detached(Chunk::Normal(names))];
315 let people: Vec<Person> = Type::from_chunks(people).unwrap();
316 assert_eq!(people.len(), 3);
317
318 assert_eq!(people[0].name, "Gutenberg");
319 assert_eq!(people[0].prefix, "");
320 assert_eq!(people[0].given_name, "Johannes");
321
322 assert_eq!(people[1].name, "Manutius");
323 assert_eq!(people[1].prefix, "");
324 assert_eq!(people[1].given_name, "Aldus");
325
326 assert_eq!(people[2].name, "Garamond");
327 assert_eq!(people[2].prefix, "");
328 assert_eq!(people[2].given_name, "Claude");
329 }
330
331 #[test]
332 fn test_list_of_names_multilines() {
333 let names = String::from(
334 "Johannes Gutenberg and
335Aldus Manutius and
336Claude Garamond",
337 );
338 let people = &[Spanned::detached(Chunk::Normal(names))];
339 let people1: Vec<Person> = Type::from_chunks(people).unwrap();
340 assert_eq!(people1.len(), 3);
341
342 let names = String::from(
343 "Johannes Gutenberg
344and
345Aldus Manutius
346and
347Claude Garamond",
348 );
349 let people = &[Spanned::detached(Chunk::Normal(names))];
350 let people2: Vec<Person> = Type::from_chunks(people).unwrap();
351 assert_eq!(people2.len(), 3);
352
353 let names = String::from(
354 "Johannes Gutenberg
355and
356Aldus Manutius and
357Claude Garamond",
358 );
359 let people = &[Spanned::detached(Chunk::Normal(names))];
360 let people3: Vec<Person> = Type::from_chunks(people).unwrap();
361 assert_eq!(people3.len(), 3);
362
363 assert_eq!(people1, people2);
364 assert_eq!(people2, people3);
365
366 assert_eq!(people1[0].name, "Gutenberg");
367 assert_eq!(people1[0].prefix, "");
368 assert_eq!(people1[0].given_name, "Johannes");
369
370 assert_eq!(people1[1].name, "Manutius");
371 assert_eq!(people1[1].prefix, "");
372 assert_eq!(people1[1].given_name, "Aldus");
373
374 assert_eq!(people1[2].name, "Garamond");
375 assert_eq!(people1[2].prefix, "");
376 assert_eq!(people1[2].given_name, "Claude");
377 }
378
379 #[test]
380 fn test_leading_and() {
381 let names = String::from(
382 "and Gutenberg, Johannes and
383Aldus Manutius and
384Claude Garamond",
385 );
386 let people = &[Spanned::detached(Chunk::Normal(names))];
387 let people: Vec<Person> = Type::from_chunks(people).unwrap();
388 assert_eq!(people.len(), 3);
389
390 assert_eq!(people[0].name, "Gutenberg");
391 assert_eq!(people[0].prefix, "and");
392 assert_eq!(people[0].given_name, "Johannes");
393 }
394
395 #[test]
396 fn test_trailing_and() {
397 let names = String::from(
398 "Johannes Gutenberg and
399Aldus Manutius and
400Claude Garamond and",
401 );
402 let people = &[Spanned::detached(Chunk::Normal(names))];
403 let people: Vec<Person> = Type::from_chunks(people).unwrap();
404 assert_eq!(people.len(), 3);
405
406 assert_eq!(people[2].name, "and");
407 assert_eq!(people[2].prefix, "");
408 assert_eq!(people[2].given_name, "Claude Garamond");
409 }
410
411 #[test]
412 fn test_consecutive_and() {
413 let names = String::from(
414 "Johannes Gutenberg and and
415Aldus Manutius and
416Claude Garamond",
417 );
418 let people = &[Spanned::detached(Chunk::Normal(names))];
419 let people: Vec<Person> = Type::from_chunks(people).unwrap();
420 assert_eq!(people.len(), 4);
421
422 assert_eq!(people[1].name, "");
423 assert_eq!(people[1].prefix, "");
424 assert_eq!(people[1].given_name, "");
425
426 let names = String::from(
427 "Johannes Gutenberg and and and
428Aldus Manutius and
429Claude Garamond",
430 );
431 let people = &[Spanned::detached(Chunk::Normal(names))];
432 let people: Vec<Person> = Type::from_chunks(people).unwrap();
433 assert_eq!(people.len(), 5);
434
435 assert_eq!(people[1].name, "");
436 assert_eq!(people[1].prefix, "");
437 assert_eq!(people[1].given_name, "");
438 assert_eq!(people[2].name, "");
439 assert_eq!(people[2].prefix, "");
440 assert_eq!(people[2].given_name, "");
441 }
442
443 #[test]
444 fn test_name_with_and_inside() {
445 let names = String::from(
446 "Johannes anderson Gutenberg and Claudeand Garamond and Aanderson Manutius",
447 );
448 let people = &[Spanned::detached(Chunk::Normal(names))];
449 let people: Vec<Person> = Type::from_chunks(people).unwrap();
450 assert_eq!(people.len(), 3);
451
452 assert_eq!(people[0].name, "Gutenberg");
453 assert_eq!(people[0].prefix, "anderson");
454 assert_eq!(people[0].given_name, "Johannes");
455
456 assert_eq!(people[1].name, "Garamond");
457 assert_eq!(people[1].prefix, "");
458 assert_eq!(people[1].given_name, "Claudeand");
459
460 assert_eq!(people[2].name, "Manutius");
461 assert_eq!(people[2].prefix, "");
462 assert_eq!(people[2].given_name, "Aanderson");
463 }
464
465 #[test]
466 fn test_verbatim() {
467 let people = &[
468 Spanned::detached(Chunk::Verbatim("Johannes".to_string())),
469 Spanned::detached(Chunk::Normal(" ".to_string())),
470 Spanned::detached(Chunk::Verbatim("Gutenberg".to_string())),
471 ];
472 let people: Vec<Person> = Type::from_chunks(people).unwrap();
473 assert_eq!(people.len(), 1);
474 assert_eq!(people[0].name, "Gutenberg");
475 assert_eq!(people[0].given_name, "Johannes");
476
477 let people = &[
478 Spanned::detached(Chunk::Verbatim("Johannes".to_string())),
479 Spanned::detached(Chunk::Normal(" ".to_string())),
480 Spanned::detached(Chunk::Verbatim("Gutenberg".to_string())),
481 Spanned::detached(Chunk::Normal(" and ".to_string())),
482 Spanned::detached(Chunk::Verbatim("Manutius".to_string())),
483 Spanned::detached(Chunk::Normal(" ".to_string())),
484 Spanned::detached(Chunk::Verbatim("Aldus".to_string())),
485 ];
486 let people: Vec<Person> = Type::from_chunks(people).unwrap();
487 assert_eq!(people.len(), 2);
488 assert_eq!(people[0].name, "Gutenberg");
489 assert_eq!(people[0].given_name, "Johannes");
490 assert_eq!(people[1].name, "Aldus");
491 assert_eq!(people[1].given_name, "Manutius");
492
493 let people = &[
494 Spanned::detached(Chunk::Verbatim("Johannes".to_string())),
495 Spanned::detached(Chunk::Normal(" ".to_string())),
496 Spanned::detached(Chunk::Verbatim("Gutenberg".to_string())),
497 Spanned::detached(Chunk::Normal(" and Manutius Aldus".to_string())),
498 ];
499 let people: Vec<Person> = Type::from_chunks(people).unwrap();
500 assert_eq!(people.len(), 2);
501 assert_eq!(people[0].name, "Gutenberg");
502 assert_eq!(people[0].given_name, "Johannes");
503 assert_eq!(people[1].name, "Aldus");
504 assert_eq!(people[1].given_name, "Manutius");
505 }
506
507 #[test]
508 fn test_person_comma() {
509 let p = Person::parse(&[Spanned::zero(N("jean de la fontaine,"))]);
510 assert_eq!(p.name, "fontaine");
511 assert_eq!(p.prefix, "jean de la");
512 assert_eq!(p.given_name, "");
513 assert_eq!(vec![p].to_chunks(), vec![d(N("jean de la fontaine, "),)]);
514
515 let p = Person::parse(&[Spanned::zero(N("de la fontaine, Jean"))]);
516 assert_eq!(p.name, "fontaine");
517 assert_eq!(p.prefix, "de la");
518 assert_eq!(p.given_name, "Jean");
519 assert_eq!(vec![p].to_chunks(), vec![d(N("de la fontaine, Jean"),)]);
520
521 let p = Person::parse(&[Spanned::zero(N("De La Fontaine, Jean"))]);
522 assert_eq!(p.name, "De La Fontaine");
523 assert_eq!(p.prefix, "");
524 assert_eq!(p.given_name, "Jean");
525 assert_eq!(vec![p].to_chunks(), vec![d(N("De La Fontaine, Jean"),)]);
526
527 let p = Person::parse(&[s(V("De La"), 2..6), s(N(" Fontaine, Jean"), 7..15)]);
528 assert_eq!(p.name, "Fontaine");
529 assert_eq!(p.prefix, "De La");
530 assert_eq!(p.given_name, "Jean");
531 assert_eq!(vec![p].to_chunks(), vec![d(V("De La")), d(N(" Fontaine, Jean"))]);
532
533 let p = Person::parse(&[Spanned::zero(N("De la Fontaine, Jean"))]);
534 assert_eq!(p.name, "Fontaine");
535 assert_eq!(p.prefix, "De la");
536 assert_eq!(p.given_name, "Jean");
537
538 let p = Person::parse(&[Spanned::zero(N("de La Fontaine, Jean"))]);
539 assert_eq!(p.name, "La Fontaine");
540 assert_eq!(p.prefix, "de");
541 assert_eq!(p.given_name, "Jean");
542 }
543
544 #[test]
545 fn test_person_no_comma() {
546 let p = Person::parse(&[Spanned::zero(N(""))]);
547 assert_eq!(p.name, "");
548 assert_eq!(p.prefix, "");
549 assert_eq!(p.given_name, "");
550
551 let p = Person::parse(&[Spanned::zero(N("jean de la fontaine"))]);
552 assert_eq!(p.name, "fontaine");
553 assert_eq!(p.prefix, "jean de la");
554 assert_eq!(p.given_name, "");
555
556 let p = Person::parse(&[Spanned::zero(N("Jean de la fontaine"))]);
557 assert_eq!(p.name, "fontaine");
558 assert_eq!(p.prefix, "de la");
559 assert_eq!(p.given_name, "Jean");
560
561 let p = Person::parse(&[
562 Spanned::zero(N("Jean ")),
563 Spanned::zero(V("de")),
564 Spanned::zero(N(" la fontaine")),
565 ]);
566 assert_eq!(p.name, "fontaine");
567 assert_eq!(p.prefix, "la");
568 assert_eq!(p.given_name, "Jean de");
569
570 let p = Person::parse(&[
571 Spanned::zero(N("Jean ")),
572 Spanned::zero(V("de")),
573 Spanned::zero(N(" ")),
574 Spanned::zero(V("la")),
575 Spanned::zero(N(" fontaine")),
576 ]);
577 assert_eq!(p.name, "fontaine");
578 assert_eq!(p.prefix, "");
579 assert_eq!(p.given_name, "Jean de la");
580
581 let p = Person::parse(&[
582 Spanned::zero(N("jean ")),
583 Spanned::zero(V("de")),
584 Spanned::zero(N(" ")),
585 Spanned::zero(V("la")),
586 Spanned::zero(N(" fontaine")),
587 ]);
588 assert_eq!(p.name, "de la fontaine");
589 assert_eq!(p.prefix, "jean");
590 assert_eq!(p.given_name, "");
591
592 let p = Person::parse(&[Spanned::zero(N("Jean De La Fontaine"))]);
593 assert_eq!(p.name, "Fontaine");
594 assert_eq!(p.prefix, "");
595 assert_eq!(p.given_name, "Jean De La");
596
597 let p = Person::parse(&[Spanned::zero(N("jean De la Fontaine"))]);
598 assert_eq!(p.name, "Fontaine");
599 assert_eq!(p.prefix, "jean De la");
600 assert_eq!(p.given_name, "");
601
602 let p = Person::parse(&[Spanned::zero(N("Jean de La Fontaine"))]);
603 assert_eq!(p.name, "La Fontaine");
604 assert_eq!(p.prefix, "de");
605 assert_eq!(p.given_name, "Jean");
606 }
607
608 #[test]
609 fn test_person_two_comma() {
610 let p = Person::parse(&[Spanned::zero(N("Mudd, Sr., Harcourt Fenton"))]);
611 assert_eq!(p.name, "Mudd");
612 assert_eq!(p.prefix, "");
613 assert_eq!(p.suffix, "Sr.");
614 assert_eq!(p.given_name, "Harcourt Fenton");
615 }
616
617 #[test]
618 fn test_person_extended_name_format() {
619 let p = Person::parse(&[Spanned::zero(N("given=Hans, family=Harman"))]);
621 assert_eq!(p.name, "Harman");
622 assert_eq!(p.prefix, "");
623 assert_eq!(p.suffix, "");
624 assert_eq!(p.given_name, "Hans");
625
626 let p =
628 Person::parse(&[Spanned::zero(N("given=")), Spanned::zero(V("Jean Pierre"))]);
629 assert_eq!(p.name, "");
630 assert_eq!(p.prefix, "");
631 assert_eq!(p.suffix, "");
632 assert_eq!(p.given_name, "Jean Pierre");
633
634 let p = Person::parse(&[
636 Spanned::zero(N("given=")),
637 Spanned::zero(V("Jean Pierre Simon")),
638 Spanned::zero(N(", given-i=JPS")),
639 ]);
640 assert_eq!(p.name, "");
641 assert_eq!(p.prefix, "");
642 assert_eq!(p.suffix, "");
643 assert_eq!(p.given_name, "Jean Pierre Simon");
644
645 let p = Person::parse(&[Spanned::zero(N(
647 "given=Jean, prefix=de la, prefix-i=d, family=Rousse",
648 ))]);
649 assert_eq!(p.name, "Rousse");
650 assert_eq!(p.prefix, "de la");
651 assert_eq!(p.suffix, "");
652 assert_eq!(p.given_name, "Jean");
653
654 let p = Person::parse(&[
656 Spanned::zero(N("family=")),
657 Spanned::zero(V("Robert and Sons, Inc.")),
658 ]);
659 assert_eq!(p.name, "Robert and Sons, Inc.");
660 assert_eq!(p.prefix, "");
661 assert_eq!(p.suffix, "");
662 assert_eq!(p.given_name, "");
663
664 let p = Person::parse(&[Spanned::zero(N(
666 "given = Simon, prefix = de, family = Beumont",
667 ))]);
668 assert_eq!(p.name, "Beumont");
669 assert_eq!(p.prefix, "de");
670 assert_eq!(p.suffix, "");
671 assert_eq!(p.given_name, "Simon");
672
673 let people = &[Spanned::zero(N(
675 "given=Hans, family=Harman and given=Simon, prefix=de, family=Beumont",
676 ))];
677 let people: Vec<Person> = Type::from_chunks(people).unwrap();
678 assert_eq!(people.len(), 2);
679 assert_eq!(people[0].name, "Harman");
680 assert_eq!(people[0].prefix, "");
681 assert_eq!(people[0].suffix, "");
682 assert_eq!(people[0].given_name, "Hans");
683 assert_eq!(people[1].name, "Beumont");
684 assert_eq!(people[1].prefix, "de");
685 assert_eq!(people[1].suffix, "");
686 assert_eq!(people[1].given_name, "Simon");
687
688 let people =
690 &[Spanned::zero(N("Hans Harman and given=Simon, prefix=de, family=Beumont"))];
691 let people: Vec<Person> = Type::from_chunks(people).unwrap();
692 assert_eq!(people.len(), 2);
693 assert_eq!(people[0].name, "Harman");
694 assert_eq!(people[0].prefix, "");
695 assert_eq!(people[0].suffix, "");
696 assert_eq!(people[0].given_name, "Hans");
697 assert_eq!(people[1].name, "Beumont");
698 assert_eq!(people[1].prefix, "de");
699 assert_eq!(people[1].suffix, "");
700 assert_eq!(people[1].given_name, "Simon");
701
702 let people = &[Spanned::zero(N("nosortothers=true and Hans Harman and given=Simon, family=Beumont, prefix=de, useprefix=true"))];
704 let people: Vec<Person> = Type::from_chunks(people).unwrap();
705 assert_eq!(people.len(), 3);
706 assert_eq!(people[0].name, "");
707 assert_eq!(people[0].prefix, "");
708 assert_eq!(people[0].suffix, "");
709 assert_eq!(people[0].given_name, "");
710 }
711}