1use ahash::AHashMap;
4use std::borrow::Cow;
5use std::fmt;
6
7#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
9pub enum ValidationLevel {
10 Minimal,
12 #[default]
14 Standard,
15 Strict,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct ValidationError {
22 pub field: Option<String>,
24 pub message: String,
26 pub severity: ValidationSeverity,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ValidationSeverity {
33 Error,
35 Warning,
37 Info,
39}
40
41impl ValidationError {
42 #[must_use]
44 pub fn error(field: Option<&str>, message: impl Into<String>) -> Self {
45 Self {
46 field: field.map(String::from),
47 message: message.into(),
48 severity: ValidationSeverity::Error,
49 }
50 }
51
52 #[must_use]
54 pub fn warning(field: Option<&str>, message: impl Into<String>) -> Self {
55 Self {
56 field: field.map(String::from),
57 message: message.into(),
58 severity: ValidationSeverity::Warning,
59 }
60 }
61
62 #[must_use]
64 pub fn info(field: Option<&str>, message: impl Into<String>) -> Self {
65 Self {
66 field: field.map(String::from),
67 message: message.into(),
68 severity: ValidationSeverity::Info,
69 }
70 }
71}
72
73impl fmt::Display for ValidationError {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 let field = self.field.as_deref().unwrap_or("<entry>");
76 write!(f, "[{:?}] {}: {}", self.severity, field, self.message)
77 }
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct PersonName {
87 pub raw: String,
89 pub first: String,
91 pub von: String,
93 pub last: String,
95 pub jr: String,
97 pub given: Vec<String>,
99 pub family: Vec<String>,
101 pub prefix: Vec<String>,
103 pub suffix: Vec<String>,
105 pub literal: Option<String>,
107}
108
109impl PersonName {
110 #[must_use]
112 pub fn display_name(&self) -> String {
113 if let Some(literal) = &self.literal {
114 return literal.clone();
115 }
116
117 let mut parts = Vec::new();
118 if !self.first.is_empty() {
119 parts.push(self.first.as_str());
120 }
121 if !self.von.is_empty() {
122 parts.push(self.von.as_str());
123 }
124 if !self.last.is_empty() {
125 parts.push(self.last.as_str());
126 }
127
128 let mut name = parts.join(" ");
129 if !self.jr.is_empty() {
130 if !name.is_empty() {
131 name.push_str(", ");
132 }
133 name.push_str(&self.jr);
134 }
135 name
136 }
137
138 #[must_use]
140 pub fn is_empty(&self) -> bool {
141 self.raw.is_empty()
142 && self.first.is_empty()
143 && self.von.is_empty()
144 && self.last.is_empty()
145 && self.jr.is_empty()
146 && self.literal.is_none()
147 }
148
149 #[must_use]
151 pub const fn is_literal(&self) -> bool {
152 self.literal.is_some()
153 }
154
155 #[cfg(feature = "latex_to_unicode")]
157 #[must_use]
158 pub fn unicode_display_name(&self) -> String {
159 crate::latex_unicode::latex_to_unicode(&self.display_name())
160 }
161}
162
163#[must_use]
168pub fn parse_names(input: &str) -> Vec<PersonName> {
169 split_bibtex_names(input)
170 .into_iter()
171 .map(parse_single_name)
172 .filter(|name| !name.is_empty())
173 .collect()
174}
175
176#[derive(Debug, Clone, Copy, PartialEq, Eq)]
178pub struct DateParts {
179 pub year: i32,
181 pub month: Option<u8>,
183 pub day: Option<u8>,
185}
186
187impl DateParts {
188 #[must_use]
190 pub const fn is_complete(&self) -> bool {
191 self.month.is_some() && self.day.is_some()
192 }
193}
194
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
197pub enum DateParseError {
198 Empty,
200 InvalidYear,
202 InvalidMonth,
204 InvalidDay,
206 UnsupportedFormat,
208}
209
210impl fmt::Display for DateParseError {
211 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
212 match self {
213 Self::Empty => f.write_str("empty date"),
214 Self::InvalidYear => f.write_str("invalid date year"),
215 Self::InvalidMonth => f.write_str("invalid date month"),
216 Self::InvalidDay => f.write_str("invalid date day"),
217 Self::UnsupportedFormat => f.write_str("unsupported date format"),
218 }
219 }
220}
221
222impl std::error::Error for DateParseError {}
223
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
226pub enum ResourceKind {
227 File,
229 Url,
231 Doi,
233 Pmid,
235 Pmcid,
237 Isbn,
239 Issn,
241 Eprint,
243 Arxiv,
245 Crossref,
247}
248
249impl ResourceKind {
250 #[must_use]
252 pub const fn as_str(self) -> &'static str {
253 match self {
254 Self::File => "file",
255 Self::Url => "url",
256 Self::Doi => "doi",
257 Self::Pmid => "pmid",
258 Self::Pmcid => "pmcid",
259 Self::Isbn => "isbn",
260 Self::Issn => "issn",
261 Self::Eprint => "eprint",
262 Self::Arxiv => "arxiv",
263 Self::Crossref => "crossref",
264 }
265 }
266}
267
268#[derive(Debug, Clone, PartialEq, Eq)]
270pub struct ResourceField {
271 pub kind: ResourceKind,
273 pub field_name: String,
275 pub value: String,
277 pub normalized: Option<String>,
279}
280
281pub fn parse_date_parts(input: &str) -> std::result::Result<DateParts, DateParseError> {
287 let cleaned = trim_bibtex_scalar(input);
288 if cleaned.is_empty() {
289 return Err(DateParseError::Empty);
290 }
291
292 let parts = cleaned.split('-').collect::<Vec<_>>();
293 match parts.as_slice() {
294 [year] => Ok(DateParts {
295 year: parse_year(year)?,
296 month: None,
297 day: None,
298 }),
299 [year, month] => {
300 let year = parse_year(year)?;
301 let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
302 Ok(DateParts {
303 year,
304 month: Some(month),
305 day: None,
306 })
307 }
308 [year, month, day] => {
309 let year = parse_year(year)?;
310 let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
311 let day = parse_day_number(day, year, month)?;
312 Ok(DateParts {
313 year,
314 month: Some(month),
315 day: Some(day),
316 })
317 }
318 _ => Err(DateParseError::UnsupportedFormat),
319 }
320}
321
322#[must_use]
324pub fn normalize_field_name_ascii(name: &str) -> String {
325 name.trim().to_ascii_lowercase()
326}
327
328#[must_use]
330pub fn canonical_biblatex_field_alias(name: &str) -> Option<&'static str> {
331 match normalize_field_name_ascii(name).as_str() {
332 "journaltitle" => Some("journal"),
333 "date" => Some("year"),
334 "institution" => Some("school"),
335 "location" => Some("address"),
336 _ => None,
337 }
338}
339
340#[must_use]
342pub fn normalize_biblatex_field_name(name: &str) -> String {
343 canonical_biblatex_field_alias(name)
344 .map_or_else(|| normalize_field_name_ascii(name), ToOwned::to_owned)
345}
346
347#[must_use]
349pub fn classify_resource_field(name: &str) -> Option<ResourceKind> {
350 match normalize_field_name_ascii(name).as_str() {
351 "file" => Some(ResourceKind::File),
352 "url" => Some(ResourceKind::Url),
353 "doi" => Some(ResourceKind::Doi),
354 "pmid" => Some(ResourceKind::Pmid),
355 "pmcid" => Some(ResourceKind::Pmcid),
356 "isbn" => Some(ResourceKind::Isbn),
357 "issn" => Some(ResourceKind::Issn),
358 "eprint" => Some(ResourceKind::Eprint),
359 "arxiv" => Some(ResourceKind::Arxiv),
360 "crossref" => Some(ResourceKind::Crossref),
361 _ => None,
362 }
363}
364
365#[derive(Debug, Clone, PartialEq)]
367pub struct Entry<'a> {
368 pub ty: EntryType<'a>,
370 pub key: Cow<'a, str>,
372 pub fields: Vec<Field<'a>>,
374}
375
376impl<'a> Entry<'a> {
377 #[must_use]
379 pub const fn new(ty: EntryType<'a>, key: &'a str) -> Self {
380 Self {
381 ty,
382 key: Cow::Borrowed(key),
383 fields: Vec::new(),
384 }
385 }
386
387 #[must_use]
389 pub const fn entry_type(&self) -> &EntryType<'a> {
390 &self.ty
391 }
392
393 #[must_use]
395 pub fn key(&self) -> &str {
396 &self.key
397 }
398
399 #[must_use]
401 pub fn field(&self, name: &str) -> Option<&Field<'a>> {
402 self.fields.iter().find(|f| f.name == name)
403 }
404
405 #[must_use]
407 pub fn field_ignore_case(&self, name: &str) -> Option<&Field<'a>> {
408 self.fields
409 .iter()
410 .find(|f| f.name.eq_ignore_ascii_case(name))
411 }
412
413 #[must_use]
416 pub fn get(&self, name: &str) -> Option<&str> {
417 self.field(name).and_then(|f| f.value.as_str())
418 }
419
420 #[must_use]
424 pub fn get_ignore_case(&self, name: &str) -> Option<&str> {
425 self.field_ignore_case(name).and_then(|f| f.value.as_str())
426 }
427
428 #[must_use]
430 pub fn get_as_string(&self, name: &str) -> Option<String> {
431 self.field(name).map(|f| value_to_lossy_string(&f.value))
432 }
433
434 #[must_use]
436 pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
437 self.field_ignore_case(name)
438 .map(|f| value_to_lossy_string(&f.value))
439 }
440
441 #[must_use]
443 pub fn get_any_ignore_case(&self, names: &[&str]) -> Option<&str> {
444 names.iter().find_map(|name| self.get_ignore_case(name))
445 }
446
447 #[must_use]
449 pub fn get_any_as_string_ignore_case(&self, names: &[&str]) -> Option<String> {
450 names
451 .iter()
452 .find_map(|name| self.get_as_string_ignore_case(name))
453 }
454
455 #[must_use]
457 pub fn has_field(&self, name: &str) -> bool {
458 self.field_ignore_case(name).is_some()
459 }
460
461 #[must_use]
463 pub fn has_any_field(&self, names: &[&str]) -> bool {
464 names.iter().any(|name| self.has_field(name))
465 }
466
467 #[must_use]
472 pub fn doi(&self) -> Option<String> {
473 self.get_as_string_ignore_case("doi")
474 .and_then(|doi| normalize_doi(&doi))
475 }
476
477 #[must_use]
479 pub fn authors(&self) -> Vec<PersonName> {
480 self.get_as_string_ignore_case("author")
481 .map_or_else(Vec::new, |authors| parse_names(&authors))
482 }
483
484 #[must_use]
486 pub fn editors(&self) -> Vec<PersonName> {
487 self.get_as_string_ignore_case("editor")
488 .map_or_else(Vec::new, |editors| parse_names(&editors))
489 }
490
491 #[must_use]
493 pub fn translators(&self) -> Vec<PersonName> {
494 self.get_as_string_ignore_case("translator")
495 .map_or_else(Vec::new, |translators| parse_names(&translators))
496 }
497
498 #[must_use]
500 pub fn date_parts_for(
501 &self,
502 field: &str,
503 ) -> Option<std::result::Result<DateParts, DateParseError>> {
504 self.get_as_string_ignore_case(field)
505 .map(|value| parse_date_parts(&value))
506 }
507
508 #[must_use]
513 pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
514 for field in &["date", "issued", "eventdate", "origdate", "urldate"] {
515 if let Some(value) = self.get_as_string_ignore_case(field) {
516 return Some(parse_date_parts(&value));
517 }
518 }
519
520 let year = self.get_as_string_ignore_case("year")?;
521 let mut parts = match parse_date_parts(&year) {
522 Ok(parts) => parts,
523 Err(error) => return Some(Err(error)),
524 };
525 if let Some(month) = self.get_as_string_ignore_case("month") {
526 match parse_month_number(&month) {
527 Some(month) => parts.month = Some(month),
528 None => return Some(Err(DateParseError::InvalidMonth)),
529 }
530 }
531 Some(Ok(parts))
532 }
533
534 #[must_use]
536 pub fn resource_fields(&self) -> Vec<ResourceField> {
537 let archive_prefix = self
538 .get_as_string_ignore_case("archiveprefix")
539 .or_else(|| self.get_as_string_ignore_case("eprinttype"));
540
541 self.fields
542 .iter()
543 .filter_map(|field| {
544 resource_field_from_parts(
545 &field.name,
546 field.value.to_plain_string(),
547 archive_prefix.as_deref(),
548 )
549 })
550 .collect()
551 }
552
553 #[must_use]
555 pub fn fields(&self) -> &[Field<'a>] {
556 &self.fields
557 }
558
559 pub fn add_field(&mut self, field: Field<'a>) {
561 self.fields.push(field);
562 }
563
564 pub fn set(&mut self, name: &'a str, value: Value<'a>) {
566 if let Some(field) = self.fields.iter_mut().find(|field| field.name == name) {
567 field.value = value;
568 } else {
569 self.fields.push(Field::new(name, value));
570 }
571 }
572
573 pub fn set_literal(&mut self, name: &'a str, value: &'a str) {
575 self.set(name, Value::Literal(Cow::Borrowed(value)));
576 }
577
578 pub fn remove(&mut self, name: &str) -> Vec<Field<'a>> {
580 let mut removed = Vec::new();
581 let mut index = 0;
582 while index < self.fields.len() {
583 if self.fields[index].name == name {
584 removed.push(self.fields.remove(index));
585 } else {
586 index += 1;
587 }
588 }
589 removed
590 }
591
592 pub fn rename_field(&mut self, old: &str, new: &'a str) -> usize {
594 let mut renamed = 0;
595 for field in &mut self.fields {
596 if field.name == old {
597 field.name = Cow::Borrowed(new);
598 renamed += 1;
599 }
600 }
601 renamed
602 }
603
604 #[must_use]
606 pub fn title(&self) -> Option<String> {
607 self.get_any_as_string_ignore_case(&["title"])
608 }
609
610 #[must_use]
612 pub fn year(&self) -> Option<String> {
613 self.get_any_as_string_ignore_case(&["year"])
614 }
615
616 #[must_use]
618 pub fn date(&self) -> Option<String> {
619 self.get_any_as_string_ignore_case(&["date"])
620 }
621
622 #[must_use]
624 pub fn journal(&self) -> Option<String> {
625 self.get_any_as_string_ignore_case(&["journal", "journaltitle"])
626 }
627
628 #[must_use]
630 pub fn booktitle(&self) -> Option<String> {
631 self.get_any_as_string_ignore_case(&["booktitle"])
632 }
633
634 #[must_use]
636 pub fn url(&self) -> Option<String> {
637 self.get_any_as_string_ignore_case(&["url"])
638 }
639
640 #[must_use]
642 pub fn keywords(&self) -> Vec<String> {
643 self.get_any_as_string_ignore_case(&["keywords", "keyword"])
644 .map(|keywords| {
645 keywords
646 .split([',', ';'])
647 .map(str::trim)
648 .filter(|keyword| !keyword.is_empty())
649 .map(ToOwned::to_owned)
650 .collect()
651 })
652 .unwrap_or_default()
653 }
654
655 pub fn validate(&self, level: ValidationLevel) -> Result<(), Vec<ValidationError>> {
658 let mut errors = Vec::new();
659
660 self.validate_required_fields(&mut errors);
662
663 match level {
664 ValidationLevel::Minimal => {
665 }
667 ValidationLevel::Standard => {
668 self.validate_common_issues(&mut errors);
670 }
671 ValidationLevel::Strict => {
672 self.validate_common_issues(&mut errors);
674 self.validate_field_formats(&mut errors);
675 self.validate_cross_references(&mut errors);
676 }
677 }
678
679 if errors.is_empty() {
680 Ok(())
681 } else {
682 Err(errors)
683 }
684 }
685
686 fn validate_required_fields(&self, errors: &mut Vec<ValidationError>) {
688 for &field_group in self.ty.required_field_groups() {
689 if self.has_any_field(field_group) {
690 continue;
691 }
692
693 if field_group == ["author", "editor"] {
694 errors.push(ValidationError::error(
695 None,
696 format!(
697 "{} entry must have either 'author' or 'editor' field",
698 self.ty
699 ),
700 ));
701 continue;
702 }
703
704 let primary_field = field_group[0];
705 let message = if field_group.len() == 1 {
706 format!(
707 "Required field '{}' is missing for {} entry",
708 primary_field, self.ty
709 )
710 } else {
711 format!(
712 "Required field '{}' is missing for {} entry (accepted aliases: {})",
713 primary_field,
714 self.ty,
715 field_group.join(", ")
716 )
717 };
718
719 errors.push(ValidationError::error(Some(primary_field), message));
720 }
721 }
722
723 fn validate_common_issues(&self, errors: &mut Vec<ValidationError>) {
725 if let Some(year_str) = self.get_any_as_string_ignore_case(&["year", "date"]) {
729 if let Ok(year) = year_str.parse::<i32>() {
730 if !(1000..=2100).contains(&year) {
731 errors.push(ValidationError::warning(
732 Some(if self.has_field("year") {
733 "year"
734 } else {
735 "date"
736 }),
737 format!("Year {year} seems unlikely"),
738 ));
739 }
740 } else {
741 errors.push(ValidationError::warning(
742 Some(if self.has_field("year") {
743 "year"
744 } else {
745 "date"
746 }),
747 "Year/date should be a number",
748 ));
749 }
750 }
751
752 if let Some(pages) = self.get_ignore_case("pages") {
754 if !is_valid_page_range(pages) {
755 errors.push(ValidationError::warning(
756 Some("pages"),
757 "Pages should be in format '12-34' or '12--34'",
758 ));
759 }
760 }
761
762 match self.ty {
764 EntryType::InBook | EntryType::InProceedings | EntryType::InCollection
765 if !self.has_any_field(&["author", "editor"]) =>
766 {
767 errors.push(ValidationError::warning(
768 None,
769 "Entry should have either 'author' or 'editor' field",
770 ));
771 }
772 _ => {}
773 }
774
775 for field in &self.fields {
777 if let Some(value_str) = field.value.as_str() {
778 if value_str.trim().is_empty() {
779 errors.push(ValidationError::warning(
780 Some(&field.name),
781 "Field has empty value",
782 ));
783 }
784 }
785 }
786 }
787
788 fn validate_field_formats(&self, errors: &mut Vec<ValidationError>) {
790 if let Some(doi) = self.get_as_string_ignore_case("doi") {
792 if normalize_doi(&doi).is_none() {
793 errors.push(ValidationError::warning(
794 Some("doi"),
795 "DOI should start with '10.' or a DOI URL/prefix",
796 ));
797 }
798 }
799
800 if let Some(url) = self.get_ignore_case("url") {
802 if !url.starts_with("http://") && !url.starts_with("https://") {
803 errors.push(ValidationError::warning(
804 Some("url"),
805 "URL should start with http:// or https://",
806 ));
807 }
808 }
809
810 if let Some(isbn) = self.get_ignore_case("isbn") {
812 if !is_valid_isbn_shape(isbn) {
813 errors.push(ValidationError::warning(
814 Some("isbn"),
815 "ISBN should have 10 or 13 digits",
816 ));
817 }
818 }
819
820 if let Some(month) = self.get_ignore_case("month") {
822 if !is_valid_month(month) {
823 errors.push(ValidationError::info(
824 Some("month"),
825 "Month should be a standard abbreviation (jan, feb, etc.) or full name",
826 ));
827 }
828 }
829
830 for field_name in &["volume", "number"] {
832 if let Some(value) = self.get_ignore_case(field_name) {
833 if value.parse::<i32>().is_err() && !value.contains('-') {
834 errors.push(ValidationError::info(
835 Some(field_name),
836 format!("{field_name} should typically be numeric"),
837 ));
838 }
839 }
840 }
841 }
842
843 fn validate_cross_references(&self, errors: &mut Vec<ValidationError>) {
845 if let Some(crossref) = self.get_ignore_case("crossref") {
846 if crossref.trim().is_empty() {
847 errors.push(ValidationError::error(
848 Some("crossref"),
849 "Cross-reference is empty",
850 ));
851 }
852 }
853 }
854
855 #[must_use]
857 pub fn is_valid(&self) -> bool {
858 self.validate(ValidationLevel::Minimal).is_ok()
859 }
860
861 #[cfg(feature = "latex_to_unicode")]
879 #[must_use]
880 pub fn get_unicode(&self, name: &str) -> Option<String> {
881 self.get(name).map(crate::latex_unicode::latex_to_unicode)
882 }
883
884 #[cfg(feature = "latex_to_unicode")]
903 #[must_use]
904 pub fn get_unicode_ignore_case(&self, name: &str) -> Option<String> {
905 self.get_ignore_case(name)
906 .map(crate::latex_unicode::latex_to_unicode)
907 }
908
909 #[cfg(feature = "latex_to_unicode")]
914 #[must_use]
915 pub fn get_as_unicode_string(&self, name: &str) -> Option<String> {
916 self.get_as_string(name)
917 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
918 }
919
920 #[cfg(feature = "latex_to_unicode")]
925 #[must_use]
926 pub fn get_as_unicode_string_ignore_case(&self, name: &str) -> Option<String> {
927 self.get_as_string_ignore_case(name)
928 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
929 }
930
931 #[cfg(feature = "latex_to_unicode")]
959 #[must_use]
960 pub fn fields_unicode(&self) -> Vec<(String, String)> {
961 self.fields
962 .iter()
963 .filter_map(|f| {
964 f.value.as_str().map(|s| {
965 (
966 f.name.to_string(),
967 crate::latex_unicode::latex_to_unicode(s),
968 )
969 })
970 })
971 .collect()
972 }
973
974 #[must_use]
976 pub fn into_owned(self) -> Entry<'static> {
977 Entry {
978 ty: self.ty.into_owned(),
979 key: Cow::Owned(self.key.into_owned()),
980 fields: self.fields.into_iter().map(Field::into_owned).collect(),
981 }
982 }
983}
984
985#[derive(Debug, Clone, PartialEq, Eq, Hash)]
987pub enum EntryType<'a> {
988 Article,
990 Book,
992 Booklet,
994 MvBook,
996 InBook,
998 BookInBook,
1000 SuppBook,
1002 Collection,
1004 MvCollection,
1006 InCollection,
1008 SuppCollection,
1010 InProceedings,
1012 Proceedings,
1014 MvProceedings,
1016 Reference,
1018 InReference,
1020 Manual,
1022 MastersThesis,
1024 PhdThesis,
1026 Thesis,
1028 TechReport,
1030 Report,
1032 Patent,
1034 Periodical,
1036 Online,
1038 Software,
1040 Dataset,
1042 Set,
1044 XData,
1046 Unpublished,
1048 Misc,
1050 Custom(Cow<'a, str>),
1052}
1053
1054impl<'a> EntryType<'a> {
1055 #[must_use]
1057 #[inline(never)]
1058 pub fn parse(s: &'a str) -> Self {
1059 let bytes = s.as_bytes();
1060 if bytes.is_empty() {
1061 return Self::Custom(Cow::Borrowed(s));
1062 }
1063
1064 match (bytes.len(), ascii_lower(bytes[0])) {
1065 (3, b's') if eq_ascii_lower(bytes, b"set") => Self::Set,
1066 (4, b'b') if eq_ascii_lower(bytes, b"book") => Self::Book,
1067 (4, b'm') if eq_ascii_lower(bytes, b"misc") => Self::Misc,
1068 (6, b'i') if eq_ascii_lower(bytes, b"inbook") => Self::InBook,
1069 (6, b'm') if eq_ascii_lower(bytes, b"manual") => Self::Manual,
1070 (6, b'm') if eq_ascii_lower(bytes, b"mvbook") => Self::MvBook,
1071 (6, b'o') if eq_ascii_lower(bytes, b"online") => Self::Online,
1072 (6, b'p') if eq_ascii_lower(bytes, b"patent") => Self::Patent,
1073 (6, b'r') if eq_ascii_lower(bytes, b"report") => Self::Report,
1074 (6, b't') if eq_ascii_lower(bytes, b"thesis") => Self::Thesis,
1075 (7, b'a') if eq_ascii_lower(bytes, b"article") => Self::Article,
1076 (7, b'b') if eq_ascii_lower(bytes, b"booklet") => Self::Booklet,
1077 (7, b'd') if eq_ascii_lower(bytes, b"dataset") => Self::Dataset,
1078 (8, b's') if eq_ascii_lower(bytes, b"software") => Self::Software,
1079 (8, b's') if eq_ascii_lower(bytes, b"suppbook") => Self::SuppBook,
1080 (9, b'r') if eq_ascii_lower(bytes, b"reference") => Self::Reference,
1081 (9, b'p') if eq_ascii_lower(bytes, b"phdthesis") => Self::PhdThesis,
1082 (10, b'b') if eq_ascii_lower(bytes, b"bookinbook") => Self::BookInBook,
1083 (10, b'c') if eq_ascii_lower(bytes, b"conference") => Self::InProceedings,
1084 (10, b'c') if eq_ascii_lower(bytes, b"collection") => Self::Collection,
1085 (10, b'p') if eq_ascii_lower(bytes, b"periodical") => Self::Periodical,
1086 (10, b't') if eq_ascii_lower(bytes, b"techreport") => Self::TechReport,
1087 (11, b'i') if eq_ascii_lower(bytes, b"inreference") => Self::InReference,
1088 (11, b'p') if eq_ascii_lower(bytes, b"proceedings") => Self::Proceedings,
1089 (11, b'u') if eq_ascii_lower(bytes, b"unpublished") => Self::Unpublished,
1090 (12, b'i') if eq_ascii_lower(bytes, b"incollection") => Self::InCollection,
1091 (12, b'm') if eq_ascii_lower(bytes, b"mvcollection") => Self::MvCollection,
1092 (13, b'i') if eq_ascii_lower(bytes, b"inproceedings") => Self::InProceedings,
1093 (13, b'm') if eq_ascii_lower(bytes, b"mastersthesis") => Self::MastersThesis,
1094 (13, b'm') if eq_ascii_lower(bytes, b"mvproceedings") => Self::MvProceedings,
1095 (14, b's') if eq_ascii_lower(bytes, b"suppcollection") => Self::SuppCollection,
1096 (5, b'x') if eq_ascii_lower(bytes, b"xdata") => Self::XData,
1097 _ => Self::Custom(Cow::Borrowed(s)),
1098 }
1099 }
1100
1101 #[must_use]
1103 pub const fn required_fields(&self) -> &'static [&'static str] {
1104 match self {
1105 Self::Article => &["author", "title", "journal", "year"],
1106 Self::Book | Self::MvBook => &["author", "title", "publisher", "year"],
1107 Self::Booklet | Self::Manual => &["title"],
1108 Self::InBook | Self::BookInBook | Self::SuppBook => {
1109 &["author", "title", "chapter", "publisher", "year"]
1110 }
1111 Self::Collection | Self::MvCollection | Self::Reference => {
1112 &["editor", "title", "publisher", "year"]
1113 }
1114 Self::InCollection | Self::SuppCollection | Self::InReference => {
1115 &["author", "title", "booktitle", "publisher", "year"]
1116 }
1117 Self::InProceedings => &["author", "title", "booktitle", "year"],
1118 Self::Proceedings | Self::MvProceedings | Self::Periodical => &["title", "year"],
1119 Self::MastersThesis | Self::PhdThesis | Self::Thesis => {
1120 &["author", "title", "school", "year"]
1121 }
1122 Self::TechReport => &["author", "title", "institution", "year"],
1123 Self::Report => &["author", "title", "type", "institution", "year"],
1124 Self::Patent => &["author", "title", "number", "year"],
1125 Self::Online => &["title", "url"],
1126 Self::Software | Self::Dataset => &["author", "title", "year"],
1127 Self::Unpublished => &["author", "title", "note"],
1128 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1129 }
1130 }
1131
1132 #[must_use]
1137 pub const fn required_field_groups(&self) -> &'static [&'static [&'static str]] {
1138 match self {
1139 Self::Article => &[
1140 &["author"],
1141 &["title"],
1142 &["journal", "journaltitle"],
1143 &["year", "date"],
1144 ],
1145 Self::Book | Self::MvBook => &[
1146 &["author", "editor"],
1147 &["title"],
1148 &["publisher"],
1149 &["year", "date"],
1150 ],
1151 Self::Booklet | Self::Manual => &[&["title"]],
1152 Self::InBook | Self::BookInBook | Self::SuppBook => &[
1153 &["author", "editor"],
1154 &["title"],
1155 &["chapter", "pages"],
1156 &["publisher"],
1157 &["year", "date"],
1158 ],
1159 Self::Collection | Self::MvCollection | Self::Reference => &[
1160 &["editor", "author"],
1161 &["title"],
1162 &["publisher"],
1163 &["year", "date"],
1164 ],
1165 Self::InCollection | Self::SuppCollection | Self::InReference => &[
1166 &["author", "editor"],
1167 &["title"],
1168 &["booktitle"],
1169 &["publisher"],
1170 &["year", "date"],
1171 ],
1172 Self::InProceedings => &[
1173 &["author", "editor"],
1174 &["title"],
1175 &["booktitle"],
1176 &["year", "date"],
1177 ],
1178 Self::Proceedings | Self::MvProceedings | Self::Periodical => {
1179 &[&["title"], &["year", "date"]]
1180 }
1181 Self::MastersThesis | Self::PhdThesis | Self::Thesis => &[
1182 &["author"],
1183 &["title"],
1184 &["school", "institution"],
1185 &["year", "date"],
1186 ],
1187 Self::TechReport => &[&["author"], &["title"], &["institution"], &["year", "date"]],
1188 Self::Report => &[
1189 &["author", "editor"],
1190 &["title"],
1191 &["type"],
1192 &["institution"],
1193 &["year", "date"],
1194 ],
1195 Self::Patent => &[&["author"], &["title"], &["number"], &["year", "date"]],
1196 Self::Online => &[&["title"], &["url", "doi"], &["year", "date", "urldate"]],
1197 Self::Software | Self::Dataset => &[
1198 &["author", "editor"],
1199 &["title"],
1200 &["year", "date", "version"],
1201 ],
1202 Self::Unpublished => &[&["author"], &["title"], &["note"]],
1203 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1204 }
1205 }
1206
1207 #[must_use]
1209 pub fn canonical_name(&self) -> &str {
1210 match self {
1211 Self::Article => "article",
1212 Self::Book => "book",
1213 Self::Booklet => "booklet",
1214 Self::MvBook => "mvbook",
1215 Self::InBook => "inbook",
1216 Self::BookInBook => "bookinbook",
1217 Self::SuppBook => "suppbook",
1218 Self::Collection => "collection",
1219 Self::MvCollection => "mvcollection",
1220 Self::InCollection => "incollection",
1221 Self::SuppCollection => "suppcollection",
1222 Self::InProceedings => "inproceedings",
1223 Self::Proceedings => "proceedings",
1224 Self::MvProceedings => "mvproceedings",
1225 Self::Reference => "reference",
1226 Self::InReference => "inreference",
1227 Self::Manual => "manual",
1228 Self::MastersThesis => "mastersthesis",
1229 Self::PhdThesis => "phdthesis",
1230 Self::Thesis => "thesis",
1231 Self::TechReport => "techreport",
1232 Self::Report => "report",
1233 Self::Patent => "patent",
1234 Self::Periodical => "periodical",
1235 Self::Online => "online",
1236 Self::Software => "software",
1237 Self::Dataset => "dataset",
1238 Self::Set => "set",
1239 Self::XData => "xdata",
1240 Self::Unpublished => "unpublished",
1241 Self::Misc => "misc",
1242 Self::Custom(s) => s,
1243 }
1244 }
1245
1246 #[must_use]
1248 pub const fn aliases(&self) -> &'static [&'static str] {
1249 match self {
1250 Self::InProceedings => &["conference"],
1251 Self::TechReport => &["techreport"],
1252 Self::MastersThesis => &["mastersthesis"],
1253 Self::PhdThesis => &["phdthesis"],
1254 _ => &[],
1255 }
1256 }
1257
1258 #[must_use]
1260 pub const fn is_classic_bibtex(&self) -> bool {
1261 matches!(
1262 self,
1263 Self::Article
1264 | Self::Book
1265 | Self::Booklet
1266 | Self::InBook
1267 | Self::InCollection
1268 | Self::InProceedings
1269 | Self::Manual
1270 | Self::MastersThesis
1271 | Self::PhdThesis
1272 | Self::Proceedings
1273 | Self::TechReport
1274 | Self::Unpublished
1275 | Self::Misc
1276 )
1277 }
1278
1279 #[must_use]
1281 pub const fn is_extended(&self) -> bool {
1282 !self.is_classic_bibtex() && !matches!(self, Self::Custom(_))
1283 }
1284
1285 #[must_use]
1287 pub fn into_owned(self) -> EntryType<'static> {
1288 match self {
1289 Self::Custom(s) => EntryType::Custom(Cow::Owned(s.into_owned())),
1290 Self::Article => EntryType::Article,
1291 Self::Book => EntryType::Book,
1292 Self::Booklet => EntryType::Booklet,
1293 Self::MvBook => EntryType::MvBook,
1294 Self::InBook => EntryType::InBook,
1295 Self::BookInBook => EntryType::BookInBook,
1296 Self::SuppBook => EntryType::SuppBook,
1297 Self::Collection => EntryType::Collection,
1298 Self::MvCollection => EntryType::MvCollection,
1299 Self::InCollection => EntryType::InCollection,
1300 Self::SuppCollection => EntryType::SuppCollection,
1301 Self::InProceedings => EntryType::InProceedings,
1302 Self::Proceedings => EntryType::Proceedings,
1303 Self::MvProceedings => EntryType::MvProceedings,
1304 Self::Reference => EntryType::Reference,
1305 Self::InReference => EntryType::InReference,
1306 Self::Manual => EntryType::Manual,
1307 Self::MastersThesis => EntryType::MastersThesis,
1308 Self::PhdThesis => EntryType::PhdThesis,
1309 Self::Thesis => EntryType::Thesis,
1310 Self::TechReport => EntryType::TechReport,
1311 Self::Report => EntryType::Report,
1312 Self::Patent => EntryType::Patent,
1313 Self::Periodical => EntryType::Periodical,
1314 Self::Online => EntryType::Online,
1315 Self::Software => EntryType::Software,
1316 Self::Dataset => EntryType::Dataset,
1317 Self::Set => EntryType::Set,
1318 Self::XData => EntryType::XData,
1319 Self::Unpublished => EntryType::Unpublished,
1320 Self::Misc => EntryType::Misc,
1321 }
1322 }
1323}
1324
1325#[inline]
1326const fn ascii_lower(byte: u8) -> u8 {
1327 if b'A' <= byte && byte <= b'Z' {
1328 byte + (b'a' - b'A')
1329 } else {
1330 byte
1331 }
1332}
1333
1334#[inline]
1335fn eq_ascii_lower(input: &[u8], expected: &[u8]) -> bool {
1336 if input.len() != expected.len() {
1337 return false;
1338 }
1339
1340 let mut index = 0usize;
1341 while index < input.len() {
1342 if ascii_lower(input[index]) != expected[index] {
1343 return false;
1344 }
1345 index += 1;
1346 }
1347
1348 true
1349}
1350
1351impl fmt::Display for EntryType<'_> {
1352 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1353 f.write_str(self.canonical_name())
1354 }
1355}
1356
1357#[derive(Debug, Clone, PartialEq)]
1359pub struct Field<'a> {
1360 pub name: Cow<'a, str>,
1362 pub value: Value<'a>,
1364}
1365
1366impl<'a> Field<'a> {
1367 #[must_use]
1369 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1370 Self {
1371 name: Cow::Borrowed(name),
1372 value,
1373 }
1374 }
1375
1376 #[must_use]
1378 pub fn name_eq_ignore_case(&self, name: &str) -> bool {
1379 self.name.eq_ignore_ascii_case(name)
1380 }
1381
1382 #[must_use]
1384 pub fn into_owned(self) -> Field<'static> {
1385 Field {
1386 name: Cow::Owned(self.name.into_owned()),
1387 value: self.value.into_owned(),
1388 }
1389 }
1390}
1391
1392#[derive(Debug, Clone, PartialEq)]
1398pub enum Value<'a> {
1399 Literal(Cow<'a, str>),
1401 Number(i64),
1403 Concat(Box<[Self]>),
1405 Variable(Cow<'a, str>),
1407}
1408
1409impl Default for Value<'_> {
1410 fn default() -> Self {
1411 Self::Number(0)
1412 }
1413}
1414
1415impl Value<'_> {
1416 #[must_use]
1418 pub fn as_str(&self) -> Option<&str> {
1419 match self {
1420 Self::Literal(s) => Some(s),
1421 _ => None,
1422 }
1423 }
1424
1425 #[must_use]
1427 pub fn expand(&self, strings: &AHashMap<&str, Value>) -> String {
1428 match self {
1429 Self::Literal(s) => normalize_text_projection(s),
1430 Self::Number(n) => n.to_string(),
1431 Self::Variable(name) => strings
1432 .get(name.as_ref())
1433 .map_or_else(|| format!("{{undefined:{name}}}"), |v| v.expand(strings)),
1434 Self::Concat(parts) => parts.iter().map(|p| p.expand(strings)).collect::<String>(),
1435 }
1436 }
1437
1438 #[must_use]
1444 pub fn to_plain_string(&self) -> String {
1445 value_to_plain_string(self)
1446 }
1447
1448 #[must_use]
1450 pub fn to_lossy_string(&self) -> String {
1451 value_to_lossy_string(self)
1452 }
1453
1454 #[must_use]
1456 pub fn from_plain_string<'a>(text: impl Into<Cow<'a, str>>) -> Value<'a> {
1457 Value::Literal(text.into())
1458 }
1459
1460 #[must_use]
1465 pub fn to_bibtex_source(&self) -> String {
1466 match self {
1467 Self::Literal(text) => format!("{{{text}}}"),
1468 Self::Number(number) => number.to_string(),
1469 Self::Variable(name) => name.to_string(),
1470 Self::Concat(parts) => parts
1471 .iter()
1472 .map(Self::to_bibtex_source)
1473 .collect::<Vec<_>>()
1474 .join(" # "),
1475 }
1476 }
1477
1478 #[cfg(feature = "latex_to_unicode")]
1480 #[must_use]
1481 pub fn to_unicode_plain_string(&self) -> String {
1482 crate::latex_unicode::latex_to_unicode(&self.to_plain_string())
1483 }
1484
1485 #[must_use]
1487 pub fn into_owned(self) -> Value<'static> {
1488 match self {
1489 Self::Literal(s) => Value::Literal(Cow::Owned(s.into_owned())),
1490 Self::Number(n) => Value::Number(n),
1491 Self::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
1492 Self::Concat(parts) => Value::Concat(
1493 parts
1494 .into_vec()
1495 .into_iter()
1496 .map(Value::into_owned)
1497 .collect::<Vec<_>>()
1498 .into_boxed_slice(),
1499 ),
1500 }
1501 }
1502}
1503
1504impl fmt::Display for Value<'_> {
1505 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1506 match self {
1507 Self::Literal(s) => write!(f, "{s}"),
1508 Self::Number(n) => write!(f, "{n}"),
1509 Self::Variable(name) => write!(f, "{{{name}}}"),
1510 Self::Concat(parts) => {
1511 for (i, part) in parts.iter().enumerate() {
1512 if i > 0 {
1513 write!(f, " # ")?;
1514 }
1515 write!(f, "{part}")?;
1516 }
1517 Ok(())
1518 }
1519 }
1520 }
1521}
1522
1523fn value_to_lossy_string(value: &Value<'_>) -> String {
1524 match value {
1525 Value::Literal(s) => normalize_text_projection(s),
1526 Value::Number(n) => n.to_string(),
1527 Value::Variable(v) => format!("{{{v}}}"),
1528 Value::Concat(parts) => parts.iter().map(value_to_lossy_string).collect(),
1529 }
1530}
1531
1532fn value_to_plain_string(value: &Value<'_>) -> String {
1533 match value {
1534 Value::Literal(text) => normalize_text_projection(text),
1535 Value::Number(number) => number.to_string(),
1536 Value::Variable(name) => name.to_string(),
1537 Value::Concat(parts) => parts.iter().map(value_to_plain_string).collect(),
1538 }
1539}
1540
1541pub(crate) fn normalize_text_projection(text: &str) -> String {
1542 if !text
1543 .as_bytes()
1544 .iter()
1545 .any(|byte| matches!(byte, b'\n' | b'\r'))
1546 {
1547 return text.to_string();
1548 }
1549
1550 let mut normalized = String::with_capacity(text.len());
1551 let mut chars = text.chars().peekable();
1552 while let Some(ch) = chars.next() {
1553 match ch {
1554 '\r' => {
1555 if chars.peek() == Some(&'\n') {
1556 chars.next();
1557 }
1558 normalized.push('\n');
1559 while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1560 chars.next();
1561 }
1562 }
1563 '\n' => {
1564 normalized.push('\n');
1565 while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1566 chars.next();
1567 }
1568 }
1569 _ => normalized.push(ch),
1570 }
1571 }
1572 normalized
1573}
1574
1575#[must_use]
1577pub fn normalize_doi(input: &str) -> Option<String> {
1578 let mut doi = input.trim();
1579 if doi.is_empty() {
1580 return None;
1581 }
1582
1583 for prefix in [
1584 "https://doi.org/",
1585 "http://doi.org/",
1586 "https://dx.doi.org/",
1587 "http://dx.doi.org/",
1588 "doi:",
1589 "DOI:",
1590 ] {
1591 if let Some(stripped) = doi.strip_prefix(prefix) {
1592 doi = stripped.trim();
1593 break;
1594 }
1595 }
1596
1597 let doi = doi.trim_end_matches(['.', ',', ';']);
1598 if doi.len() > 3 && doi.starts_with("10.") && doi.contains('/') {
1599 Some(doi.to_ascii_lowercase())
1600 } else {
1601 None
1602 }
1603}
1604
1605fn resource_field_from_parts(
1606 field_name: &str,
1607 value: String,
1608 archive_prefix: Option<&str>,
1609) -> Option<ResourceField> {
1610 let mut kind = classify_resource_field(field_name)?;
1611 if kind == ResourceKind::Eprint
1612 && archive_prefix.is_some_and(|prefix| prefix.eq_ignore_ascii_case("arxiv"))
1613 {
1614 kind = ResourceKind::Arxiv;
1615 }
1616 let normalized = normalize_resource_value(kind, &value);
1617 Some(ResourceField {
1618 kind,
1619 field_name: field_name.to_string(),
1620 value,
1621 normalized,
1622 })
1623}
1624
1625fn normalize_resource_value(kind: ResourceKind, value: &str) -> Option<String> {
1626 let trimmed = value.trim();
1627 if trimmed.is_empty() {
1628 return None;
1629 }
1630
1631 match kind {
1632 ResourceKind::Doi => normalize_doi(trimmed),
1633 ResourceKind::Pmid => normalize_ascii_digits(trimmed),
1634 ResourceKind::Pmcid => Some(normalize_pmcid(trimmed)),
1635 ResourceKind::Isbn => normalize_isbn(trimmed),
1636 ResourceKind::Issn => normalize_issn(trimmed),
1637 ResourceKind::Arxiv => Some(normalize_arxiv(trimmed)),
1638 ResourceKind::File | ResourceKind::Url | ResourceKind::Eprint | ResourceKind::Crossref => {
1639 Some(trimmed.to_string())
1640 }
1641 }
1642}
1643
1644fn normalize_ascii_digits(input: &str) -> Option<String> {
1645 let compact = input.trim();
1646 compact
1647 .chars()
1648 .all(|ch| ch.is_ascii_digit())
1649 .then(|| compact.to_string())
1650}
1651
1652fn normalize_pmcid(input: &str) -> String {
1653 let compact = input
1654 .trim()
1655 .trim_start_matches("pmcid:")
1656 .trim_start_matches("PMCID:")
1657 .trim();
1658 if compact
1659 .get(..3)
1660 .is_some_and(|prefix| prefix.eq_ignore_ascii_case("pmc"))
1661 {
1662 compact.to_ascii_uppercase()
1663 } else {
1664 format!("PMC{compact}")
1665 }
1666}
1667
1668fn normalize_isbn(input: &str) -> Option<String> {
1669 let compact = input
1670 .chars()
1671 .filter(|ch| !matches!(ch, '-' | ' '))
1672 .collect::<String>()
1673 .to_ascii_uppercase();
1674 is_valid_isbn_shape(&compact).then_some(compact)
1675}
1676
1677fn normalize_issn(input: &str) -> Option<String> {
1678 let compact = input
1679 .chars()
1680 .filter(|ch| !matches!(ch, '-' | ' '))
1681 .collect::<String>()
1682 .to_ascii_uppercase();
1683 (compact.len() == 8
1684 && compact
1685 .chars()
1686 .enumerate()
1687 .all(|(index, ch)| ch.is_ascii_digit() || (index == 7 && ch == 'X')))
1688 .then_some(compact)
1689}
1690
1691fn normalize_arxiv(input: &str) -> String {
1692 input
1693 .trim()
1694 .trim_start_matches("arXiv:")
1695 .trim_start_matches("arxiv:")
1696 .trim()
1697 .to_string()
1698}
1699
1700fn trim_bibtex_scalar(input: &str) -> &str {
1701 let mut value = input.trim();
1702 loop {
1703 let trimmed = value.trim();
1704 if trimmed.len() >= 2
1705 && ((trimmed.starts_with('{') && trimmed.ends_with('}'))
1706 || (trimmed.starts_with('"') && trimmed.ends_with('"')))
1707 {
1708 value = trimmed[1..trimmed.len() - 1].trim();
1709 } else {
1710 return trimmed;
1711 }
1712 }
1713}
1714
1715fn parse_year(input: &str) -> std::result::Result<i32, DateParseError> {
1716 let input = input.trim();
1717 if input.len() != 4 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1718 return Err(DateParseError::InvalidYear);
1719 }
1720 input
1721 .parse::<i32>()
1722 .map_err(|_| DateParseError::InvalidYear)
1723}
1724
1725fn parse_month_number(input: &str) -> Option<u8> {
1726 let normalized = trim_bibtex_scalar(input).to_ascii_lowercase();
1727 if normalized.is_empty() {
1728 return None;
1729 }
1730
1731 if let Ok(month) = normalized.parse::<u8>() {
1732 return (1..=12).contains(&month).then_some(month);
1733 }
1734
1735 match normalized.as_str() {
1736 "jan" | "january" => Some(1),
1737 "feb" | "february" => Some(2),
1738 "mar" | "march" => Some(3),
1739 "apr" | "april" => Some(4),
1740 "may" => Some(5),
1741 "jun" | "june" => Some(6),
1742 "jul" | "july" => Some(7),
1743 "aug" | "august" => Some(8),
1744 "sep" | "sept" | "september" => Some(9),
1745 "oct" | "october" => Some(10),
1746 "nov" | "november" => Some(11),
1747 "dec" | "december" => Some(12),
1748 _ => None,
1749 }
1750}
1751
1752fn parse_day_number(input: &str, year: i32, month: u8) -> std::result::Result<u8, DateParseError> {
1753 let input = input.trim();
1754 if input.is_empty() || input.len() > 2 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1755 return Err(DateParseError::InvalidDay);
1756 }
1757 let day = input
1758 .parse::<u8>()
1759 .map_err(|_| DateParseError::InvalidDay)?;
1760 (1..=days_in_month(year, month))
1761 .contains(&day)
1762 .then_some(day)
1763 .ok_or(DateParseError::InvalidDay)
1764}
1765
1766const fn days_in_month(year: i32, month: u8) -> u8 {
1767 match month {
1768 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1769 4 | 6 | 9 | 11 => 30,
1770 2 if is_leap_year(year) => 29,
1771 2 => 28,
1772 _ => 0,
1773 }
1774}
1775
1776const fn is_leap_year(year: i32) -> bool {
1777 (year % 4 == 0 && year % 100 != 0) || year % 400 == 0
1778}
1779
1780fn is_valid_isbn_shape(isbn: &str) -> bool {
1781 let compact: String = isbn.chars().filter(|c| !matches!(c, '-' | ' ')).collect();
1782
1783 match compact.len() {
1784 10 => compact
1785 .chars()
1786 .enumerate()
1787 .all(|(index, ch)| ch.is_ascii_digit() || (index == 9 && matches!(ch, 'x' | 'X'))),
1788 13 => compact.chars().all(|ch| ch.is_ascii_digit()),
1789 _ => false,
1790 }
1791}
1792
1793fn split_bibtex_names(input: &str) -> Vec<&str> {
1794 let mut names = Vec::new();
1795 let mut start = 0;
1796 let mut depth = 0usize;
1797 let mut iter = input.char_indices().peekable();
1798
1799 while let Some((index, ch)) = iter.next() {
1800 match ch {
1801 '{' => depth += 1,
1802 '}' => depth = depth.saturating_sub(1),
1803 'a' | 'A' if depth == 0 && starts_name_separator(input, index) => {
1804 let candidate = input[start..index].trim();
1805 if !candidate.is_empty() {
1806 names.push(candidate);
1807 }
1808 start = index + 3;
1809 while input[start..]
1810 .chars()
1811 .next()
1812 .is_some_and(char::is_whitespace)
1813 {
1814 start += input[start..].chars().next().map_or(0, char::len_utf8);
1815 }
1816 while iter
1817 .peek()
1818 .is_some_and(|(_, next_ch)| next_ch.is_whitespace())
1819 {
1820 iter.next();
1821 }
1822 }
1823 _ => {}
1824 }
1825 }
1826
1827 let candidate = input[start..].trim();
1828 if !candidate.is_empty() {
1829 names.push(candidate);
1830 }
1831
1832 names
1833}
1834
1835fn starts_name_separator(input: &str, index: usize) -> bool {
1836 let tail = &input[index..];
1837 let Some(rest) = tail.get(..3) else {
1838 return false;
1839 };
1840 if !rest.eq_ignore_ascii_case("and") {
1841 return false;
1842 }
1843
1844 let before_is_boundary = input[..index]
1845 .chars()
1846 .next_back()
1847 .map_or(true, char::is_whitespace);
1848 let after_is_boundary = tail[3..].chars().next().map_or(true, char::is_whitespace);
1849
1850 before_is_boundary && after_is_boundary
1851}
1852
1853fn parse_single_name(input: &str) -> PersonName {
1854 let raw = input.trim();
1855 if let Some(literal) = braced_literal_name(raw) {
1856 return person_name(
1857 raw,
1858 String::new(),
1859 String::new(),
1860 literal.clone(),
1861 String::new(),
1862 Some(literal),
1863 );
1864 }
1865
1866 let parts = split_top_level_commas(input);
1867 match parts.as_slice() {
1868 [last] => parse_first_von_last(last),
1869 [last, first] => {
1870 let (von, last) = split_von_last(last);
1871 person_name(
1872 raw,
1873 normalize_name_part(first),
1874 von,
1875 last,
1876 String::new(),
1877 None,
1878 )
1879 }
1880 [last, jr, first, ..] => {
1881 let (von, last) = split_von_last(last);
1882 person_name(
1883 raw,
1884 normalize_name_part(first),
1885 von,
1886 last,
1887 normalize_name_part(jr),
1888 None,
1889 )
1890 }
1891 [] => empty_person_name(raw),
1892 }
1893}
1894
1895fn parse_first_von_last(input: &str) -> PersonName {
1896 let raw = input.trim();
1897 let words = split_name_words(input);
1898 match words.len() {
1899 0 => empty_person_name(raw),
1900 1 => person_name(
1901 raw,
1902 String::new(),
1903 String::new(),
1904 normalize_name_part(words[0]),
1905 String::new(),
1906 None,
1907 ),
1908 _ => {
1909 let von_start = words
1910 .iter()
1911 .position(|word| starts_with_lowercase_letter(word));
1912 let (first, von, last) = von_start.map_or_else(
1913 || {
1914 (
1915 join_name_words(&words[..words.len() - 1]),
1916 String::new(),
1917 normalize_name_part(words[words.len() - 1]),
1918 )
1919 },
1920 |von_start| {
1921 let last_start = words[von_start + 1..]
1922 .iter()
1923 .position(|word| !starts_with_lowercase_letter(word))
1924 .map_or(words.len() - 1, |offset| von_start + 1 + offset);
1925
1926 (
1927 join_name_words(&words[..von_start]),
1928 join_name_words(&words[von_start..last_start]),
1929 join_name_words(&words[last_start..]),
1930 )
1931 },
1932 );
1933
1934 person_name(raw, first, von, last, String::new(), None)
1935 }
1936 }
1937}
1938
1939fn person_name(
1940 raw: &str,
1941 first: String,
1942 von: String,
1943 last: String,
1944 jr: String,
1945 literal: Option<String>,
1946) -> PersonName {
1947 let given = split_component_tokens(&first);
1948 let family = split_component_tokens(&last);
1949 let prefix = split_component_tokens(&von);
1950 let suffix = split_component_tokens(&jr);
1951 PersonName {
1952 raw: raw.to_string(),
1953 first,
1954 von,
1955 last,
1956 jr,
1957 given,
1958 family,
1959 prefix,
1960 suffix,
1961 literal,
1962 }
1963}
1964
1965fn empty_person_name(raw: &str) -> PersonName {
1966 person_name(
1967 raw,
1968 String::new(),
1969 String::new(),
1970 String::new(),
1971 String::new(),
1972 None,
1973 )
1974}
1975
1976fn split_component_tokens(input: &str) -> Vec<String> {
1977 split_name_words(input)
1978 .into_iter()
1979 .map(normalize_name_part)
1980 .filter(|part| !part.is_empty())
1981 .collect()
1982}
1983
1984fn split_von_last(input: &str) -> (String, String) {
1985 let words = split_name_words(input);
1986 if words.is_empty() {
1987 return (String::new(), String::new());
1988 }
1989
1990 if let Some(last_start) = words
1991 .iter()
1992 .rposition(|word| starts_with_lowercase_letter(word))
1993 {
1994 if last_start + 1 < words.len() {
1995 return (
1996 join_name_words(&words[..=last_start]),
1997 join_name_words(&words[last_start + 1..]),
1998 );
1999 }
2000 }
2001
2002 if words.len() == 1 {
2003 (String::new(), normalize_name_part(words[0]))
2004 } else {
2005 (
2006 join_name_words(&words[..words.len() - 1]),
2007 normalize_name_part(words[words.len() - 1]),
2008 )
2009 }
2010}
2011
2012fn split_top_level_commas(input: &str) -> Vec<&str> {
2013 let mut parts = Vec::new();
2014 let mut start = 0;
2015 let mut depth = 0usize;
2016
2017 for (index, ch) in input.char_indices() {
2018 match ch {
2019 '{' => depth += 1,
2020 '}' => depth = depth.saturating_sub(1),
2021 ',' if depth == 0 => {
2022 parts.push(input[start..index].trim());
2023 start = index + 1;
2024 }
2025 _ => {}
2026 }
2027 }
2028
2029 parts.push(input[start..].trim());
2030 parts
2031}
2032
2033fn split_name_words(input: &str) -> Vec<&str> {
2034 let mut words = Vec::new();
2035 let mut start = None;
2036 let mut depth = 0usize;
2037
2038 for (index, ch) in input.char_indices() {
2039 match ch {
2040 '{' => {
2041 depth += 1;
2042 start.get_or_insert(index);
2043 }
2044 '}' => {
2045 depth = depth.saturating_sub(1);
2046 }
2047 ch if ch.is_whitespace() && depth == 0 => {
2048 if let Some(word_start) = start.take() {
2049 words.push(input[word_start..index].trim());
2050 }
2051 }
2052 _ => {
2053 start.get_or_insert(index);
2054 }
2055 }
2056 }
2057
2058 if let Some(word_start) = start {
2059 words.push(input[word_start..].trim());
2060 }
2061
2062 words.into_iter().filter(|word| !word.is_empty()).collect()
2063}
2064
2065fn join_name_words(words: &[&str]) -> String {
2066 words
2067 .iter()
2068 .map(|word| normalize_name_part(word))
2069 .filter(|word| !word.is_empty())
2070 .collect::<Vec<_>>()
2071 .join(" ")
2072}
2073
2074fn normalize_name_part(input: &str) -> String {
2075 let trimmed = input.trim();
2076 if trimmed.len() >= 2 && trimmed.starts_with('{') && trimmed.ends_with('}') {
2077 trimmed[1..trimmed.len() - 1].trim().to_string()
2078 } else {
2079 trimmed.to_string()
2080 }
2081}
2082
2083fn braced_literal_name(input: &str) -> Option<String> {
2084 let trimmed = input.trim();
2085 if trimmed.len() < 2 || !trimmed.starts_with('{') || !trimmed.ends_with('}') {
2086 return None;
2087 }
2088
2089 let mut depth = 0usize;
2090 for (index, ch) in trimmed.char_indices() {
2091 match ch {
2092 '{' => depth += 1,
2093 '}' => {
2094 depth = depth.saturating_sub(1);
2095 if depth == 0 && index != trimmed.len() - 1 {
2096 return None;
2097 }
2098 }
2099 _ => {}
2100 }
2101 }
2102
2103 (depth == 0).then(|| normalize_name_part(trimmed))
2104}
2105
2106fn starts_with_lowercase_letter(input: &str) -> bool {
2107 normalize_name_part(input)
2108 .chars()
2109 .find(|ch| ch.is_alphabetic())
2110 .is_some_and(char::is_lowercase)
2111}
2112
2113fn is_valid_page_range(pages: &str) -> bool {
2116 if pages.trim().is_empty() {
2117 return false;
2118 }
2119
2120 if pages.chars().all(|c| c.is_ascii_digit()) {
2122 return true;
2123 }
2124
2125 if !pages.contains('-') && !pages.contains(',') {
2127 return false;
2128 }
2129
2130 for range in pages.split(',') {
2132 let range = range.trim();
2133 if range.is_empty() {
2134 continue;
2135 }
2136
2137 if range.contains("--") {
2139 let parts: Vec<&str> = range.split("--").collect();
2141 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2142 return false;
2143 }
2144 } else if range.contains('-') {
2145 let parts: Vec<&str> = range.split('-').collect();
2147 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2148 return false;
2149 }
2150 }
2151 }
2152
2153 true
2154}
2155
2156fn is_valid_month(month: &str) -> bool {
2159 let month_lower = month.to_lowercase();
2160
2161 matches!(
2163 month_lower.as_str(),
2164 "jan"
2165 | "feb"
2166 | "mar"
2167 | "apr"
2168 | "may"
2169 | "jun"
2170 | "jul"
2171 | "aug"
2172 | "sep"
2173 | "oct"
2174 | "nov"
2175 | "dec"
2176 | "january"
2177 | "february"
2178 | "march"
2179 | "april"
2180 | "june"
2181 | "july"
2182 | "august"
2183 | "september"
2184 | "october"
2185 | "november"
2186 | "december"
2187 ) || month.parse::<i32>().is_ok_and(|m| (1..=12).contains(&m))
2188}