1use ahash::AHashMap;
4use memchr::memchr2;
5use std::borrow::Cow;
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
10pub enum ValidationLevel {
11 Minimal,
13 #[default]
15 Standard,
16 Strict,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct ValidationError {
23 pub field: Option<String>,
25 pub message: String,
27 pub severity: ValidationSeverity,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum ValidationSeverity {
34 Error,
36 Warning,
38 Info,
40}
41
42impl ValidationError {
43 #[must_use]
45 pub fn error(field: Option<&str>, message: impl Into<String>) -> Self {
46 Self {
47 field: field.map(String::from),
48 message: message.into(),
49 severity: ValidationSeverity::Error,
50 }
51 }
52
53 #[must_use]
55 pub fn warning(field: Option<&str>, message: impl Into<String>) -> Self {
56 Self {
57 field: field.map(String::from),
58 message: message.into(),
59 severity: ValidationSeverity::Warning,
60 }
61 }
62
63 #[must_use]
65 pub fn info(field: Option<&str>, message: impl Into<String>) -> Self {
66 Self {
67 field: field.map(String::from),
68 message: message.into(),
69 severity: ValidationSeverity::Info,
70 }
71 }
72}
73
74impl fmt::Display for ValidationError {
75 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76 let field = self.field.as_deref().unwrap_or("<entry>");
77 write!(f, "[{:?}] {}: {}", self.severity, field, self.message)
78 }
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct PersonName {
88 pub raw: String,
90 pub first: String,
92 pub von: String,
94 pub last: String,
96 pub jr: String,
98 pub given: Vec<String>,
100 pub family: Vec<String>,
102 pub prefix: Vec<String>,
104 pub suffix: Vec<String>,
106 pub literal: Option<String>,
108}
109
110impl PersonName {
111 #[must_use]
113 pub fn display_name(&self) -> String {
114 if let Some(literal) = &self.literal {
115 return literal.clone();
116 }
117
118 let mut parts = Vec::new();
119 if !self.first.is_empty() {
120 parts.push(self.first.as_str());
121 }
122 if !self.von.is_empty() {
123 parts.push(self.von.as_str());
124 }
125 if !self.last.is_empty() {
126 parts.push(self.last.as_str());
127 }
128
129 let mut name = parts.join(" ");
130 if !self.jr.is_empty() {
131 if !name.is_empty() {
132 name.push_str(", ");
133 }
134 name.push_str(&self.jr);
135 }
136 name
137 }
138
139 #[must_use]
141 pub fn is_empty(&self) -> bool {
142 self.raw.is_empty()
143 && self.first.is_empty()
144 && self.von.is_empty()
145 && self.last.is_empty()
146 && self.jr.is_empty()
147 && self.literal.is_none()
148 }
149
150 #[must_use]
152 pub const fn is_literal(&self) -> bool {
153 self.literal.is_some()
154 }
155
156 #[cfg(feature = "latex_to_unicode")]
158 #[must_use]
159 pub fn unicode_display_name(&self) -> String {
160 crate::latex_unicode::latex_to_unicode(&self.display_name())
161 }
162}
163
164#[must_use]
169pub fn parse_names(input: &str) -> Vec<PersonName> {
170 split_bibtex_names(input)
171 .into_iter()
172 .map(parse_single_name)
173 .filter(|name| !name.is_empty())
174 .collect()
175}
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179pub struct DateParts {
180 pub year: i32,
182 pub month: Option<u8>,
184 pub day: Option<u8>,
186}
187
188impl DateParts {
189 #[must_use]
191 pub const fn is_complete(&self) -> bool {
192 self.month.is_some() && self.day.is_some()
193 }
194}
195
196#[derive(Debug, Clone, Copy, PartialEq, Eq)]
198pub enum DateParseError {
199 Empty,
201 InvalidYear,
203 InvalidMonth,
205 InvalidDay,
207 UnsupportedFormat,
209}
210
211impl fmt::Display for DateParseError {
212 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213 match self {
214 Self::Empty => f.write_str("empty date"),
215 Self::InvalidYear => f.write_str("invalid date year"),
216 Self::InvalidMonth => f.write_str("invalid date month"),
217 Self::InvalidDay => f.write_str("invalid date day"),
218 Self::UnsupportedFormat => f.write_str("unsupported date format"),
219 }
220 }
221}
222
223impl std::error::Error for DateParseError {}
224
225#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
227pub enum ResourceKind {
228 File,
230 Url,
232 Doi,
234 Pmid,
236 Pmcid,
238 Isbn,
240 Issn,
242 Eprint,
244 Arxiv,
246 Crossref,
248}
249
250impl ResourceKind {
251 #[must_use]
253 pub const fn as_str(self) -> &'static str {
254 match self {
255 Self::File => "file",
256 Self::Url => "url",
257 Self::Doi => "doi",
258 Self::Pmid => "pmid",
259 Self::Pmcid => "pmcid",
260 Self::Isbn => "isbn",
261 Self::Issn => "issn",
262 Self::Eprint => "eprint",
263 Self::Arxiv => "arxiv",
264 Self::Crossref => "crossref",
265 }
266 }
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
271pub struct ResourceField {
272 pub kind: ResourceKind,
274 pub field_name: String,
276 pub value: String,
278 pub normalized: Option<String>,
280}
281
282pub fn parse_date_parts(input: &str) -> std::result::Result<DateParts, DateParseError> {
288 let cleaned = trim_bibtex_scalar(input);
289 if cleaned.is_empty() {
290 return Err(DateParseError::Empty);
291 }
292
293 let parts = cleaned.split('-').collect::<Vec<_>>();
294 match parts.as_slice() {
295 [year] => Ok(DateParts {
296 year: parse_year(year)?,
297 month: None,
298 day: None,
299 }),
300 [year, month] => {
301 let year = parse_year(year)?;
302 let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
303 Ok(DateParts {
304 year,
305 month: Some(month),
306 day: None,
307 })
308 }
309 [year, month, day] => {
310 let year = parse_year(year)?;
311 let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
312 let day = parse_day_number(day, year, month)?;
313 Ok(DateParts {
314 year,
315 month: Some(month),
316 day: Some(day),
317 })
318 }
319 _ => Err(DateParseError::UnsupportedFormat),
320 }
321}
322
323#[must_use]
325pub fn normalize_field_name_ascii(name: &str) -> String {
326 name.trim().to_ascii_lowercase()
327}
328
329#[must_use]
331pub fn canonical_biblatex_field_alias(name: &str) -> Option<&'static str> {
332 match normalize_field_name_ascii(name).as_str() {
333 "journaltitle" => Some("journal"),
334 "date" => Some("year"),
335 "institution" => Some("school"),
336 "location" => Some("address"),
337 _ => None,
338 }
339}
340
341#[must_use]
343pub fn normalize_biblatex_field_name(name: &str) -> String {
344 canonical_biblatex_field_alias(name)
345 .map_or_else(|| normalize_field_name_ascii(name), ToOwned::to_owned)
346}
347
348#[must_use]
350pub fn classify_resource_field(name: &str) -> Option<ResourceKind> {
351 match normalize_field_name_ascii(name).as_str() {
352 "file" => Some(ResourceKind::File),
353 "url" => Some(ResourceKind::Url),
354 "doi" => Some(ResourceKind::Doi),
355 "pmid" => Some(ResourceKind::Pmid),
356 "pmcid" => Some(ResourceKind::Pmcid),
357 "isbn" => Some(ResourceKind::Isbn),
358 "issn" => Some(ResourceKind::Issn),
359 "eprint" => Some(ResourceKind::Eprint),
360 "arxiv" => Some(ResourceKind::Arxiv),
361 "crossref" => Some(ResourceKind::Crossref),
362 _ => None,
363 }
364}
365
366#[derive(Debug, Clone, PartialEq)]
368pub struct Entry<'a> {
369 pub ty: EntryType<'a>,
371 pub key: Cow<'a, str>,
373 pub fields: Vec<Field<'a>>,
375}
376
377impl<'a> Entry<'a> {
378 #[must_use]
380 pub const fn new(ty: EntryType<'a>, key: &'a str) -> Self {
381 Self {
382 ty,
383 key: Cow::Borrowed(key),
384 fields: Vec::new(),
385 }
386 }
387
388 #[must_use]
390 pub const fn entry_type(&self) -> &EntryType<'a> {
391 &self.ty
392 }
393
394 #[must_use]
396 pub fn key(&self) -> &str {
397 &self.key
398 }
399
400 #[must_use]
402 pub fn field(&self, name: &str) -> Option<&Field<'a>> {
403 self.fields.iter().find(|f| f.name == name)
404 }
405
406 #[must_use]
408 pub fn field_ignore_case(&self, name: &str) -> Option<&Field<'a>> {
409 self.fields
410 .iter()
411 .find(|f| f.name.eq_ignore_ascii_case(name))
412 }
413
414 #[must_use]
417 pub fn get(&self, name: &str) -> Option<&str> {
418 self.field(name).and_then(|f| f.value.as_str())
419 }
420
421 #[must_use]
425 pub fn get_ignore_case(&self, name: &str) -> Option<&str> {
426 self.field_ignore_case(name).and_then(|f| f.value.as_str())
427 }
428
429 #[must_use]
431 pub fn get_as_string(&self, name: &str) -> Option<String> {
432 self.field(name).map(|f| value_to_lossy_string(&f.value))
433 }
434
435 #[must_use]
437 pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
438 self.field_ignore_case(name)
439 .map(|f| value_to_lossy_string(&f.value))
440 }
441
442 #[must_use]
444 pub fn get_any_ignore_case(&self, names: &[&str]) -> Option<&str> {
445 names.iter().find_map(|name| self.get_ignore_case(name))
446 }
447
448 #[must_use]
450 pub fn get_any_as_string_ignore_case(&self, names: &[&str]) -> Option<String> {
451 names
452 .iter()
453 .find_map(|name| self.get_as_string_ignore_case(name))
454 }
455
456 #[must_use]
458 pub fn has_field(&self, name: &str) -> bool {
459 self.field_ignore_case(name).is_some()
460 }
461
462 #[must_use]
464 pub fn has_any_field(&self, names: &[&str]) -> bool {
465 names.iter().any(|name| self.has_field(name))
466 }
467
468 #[must_use]
473 pub fn doi(&self) -> Option<String> {
474 self.get_as_string_ignore_case("doi")
475 .and_then(|doi| normalize_doi(&doi))
476 }
477
478 #[must_use]
480 pub fn authors(&self) -> Vec<PersonName> {
481 self.get_as_string_ignore_case("author")
482 .map_or_else(Vec::new, |authors| parse_names(&authors))
483 }
484
485 #[must_use]
487 pub fn editors(&self) -> Vec<PersonName> {
488 self.get_as_string_ignore_case("editor")
489 .map_or_else(Vec::new, |editors| parse_names(&editors))
490 }
491
492 #[must_use]
494 pub fn translators(&self) -> Vec<PersonName> {
495 self.get_as_string_ignore_case("translator")
496 .map_or_else(Vec::new, |translators| parse_names(&translators))
497 }
498
499 #[must_use]
501 pub fn date_parts_for(
502 &self,
503 field: &str,
504 ) -> Option<std::result::Result<DateParts, DateParseError>> {
505 self.get_as_string_ignore_case(field)
506 .map(|value| parse_date_parts(&value))
507 }
508
509 #[must_use]
514 pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
515 for field in &["date", "issued", "eventdate", "origdate", "urldate"] {
516 if let Some(value) = self.get_as_string_ignore_case(field) {
517 return Some(parse_date_parts(&value));
518 }
519 }
520
521 let year = self.get_as_string_ignore_case("year")?;
522 let mut parts = match parse_date_parts(&year) {
523 Ok(parts) => parts,
524 Err(error) => return Some(Err(error)),
525 };
526 if let Some(month) = self.get_as_string_ignore_case("month") {
527 match parse_month_number(&month) {
528 Some(month) => parts.month = Some(month),
529 None => return Some(Err(DateParseError::InvalidMonth)),
530 }
531 }
532 Some(Ok(parts))
533 }
534
535 #[must_use]
537 pub fn resource_fields(&self) -> Vec<ResourceField> {
538 let archive_prefix = self
539 .get_as_string_ignore_case("archiveprefix")
540 .or_else(|| self.get_as_string_ignore_case("eprinttype"));
541
542 self.fields
543 .iter()
544 .filter_map(|field| {
545 resource_field_from_parts(
546 &field.name,
547 field.value.to_plain_string(),
548 archive_prefix.as_deref(),
549 )
550 })
551 .collect()
552 }
553
554 #[must_use]
556 pub fn fields(&self) -> &[Field<'a>] {
557 &self.fields
558 }
559
560 pub fn add_field(&mut self, field: Field<'a>) {
562 self.fields.push(field);
563 }
564
565 pub fn set(&mut self, name: &'a str, value: Value<'a>) {
567 if let Some(field) = self.fields.iter_mut().find(|field| field.name == name) {
568 field.value = value;
569 } else {
570 self.fields.push(Field::new(name, value));
571 }
572 }
573
574 pub fn set_literal(&mut self, name: &'a str, value: &'a str) {
576 self.set(name, Value::Literal(Cow::Borrowed(value)));
577 }
578
579 pub fn remove(&mut self, name: &str) -> Vec<Field<'a>> {
581 let mut removed = Vec::new();
582 let mut index = 0;
583 while index < self.fields.len() {
584 if self.fields[index].name == name {
585 removed.push(self.fields.remove(index));
586 } else {
587 index += 1;
588 }
589 }
590 removed
591 }
592
593 pub fn rename_field(&mut self, old: &str, new: &'a str) -> usize {
595 let mut renamed = 0;
596 for field in &mut self.fields {
597 if field.name == old {
598 field.name = Cow::Borrowed(new);
599 renamed += 1;
600 }
601 }
602 renamed
603 }
604
605 #[must_use]
607 pub fn title(&self) -> Option<String> {
608 self.get_any_as_string_ignore_case(&["title"])
609 }
610
611 #[must_use]
613 pub fn year(&self) -> Option<String> {
614 self.get_any_as_string_ignore_case(&["year"])
615 }
616
617 #[must_use]
619 pub fn date(&self) -> Option<String> {
620 self.get_any_as_string_ignore_case(&["date"])
621 }
622
623 #[must_use]
625 pub fn journal(&self) -> Option<String> {
626 self.get_any_as_string_ignore_case(&["journal", "journaltitle"])
627 }
628
629 #[must_use]
631 pub fn booktitle(&self) -> Option<String> {
632 self.get_any_as_string_ignore_case(&["booktitle"])
633 }
634
635 #[must_use]
637 pub fn url(&self) -> Option<String> {
638 self.get_any_as_string_ignore_case(&["url"])
639 }
640
641 #[must_use]
643 pub fn keywords(&self) -> Vec<String> {
644 self.get_any_as_string_ignore_case(&["keywords", "keyword"])
645 .map(|keywords| {
646 keywords
647 .split([',', ';'])
648 .map(str::trim)
649 .filter(|keyword| !keyword.is_empty())
650 .map(ToOwned::to_owned)
651 .collect()
652 })
653 .unwrap_or_default()
654 }
655
656 pub fn validate(&self, level: ValidationLevel) -> Result<(), Vec<ValidationError>> {
659 let mut errors = Vec::new();
660
661 self.validate_required_fields(&mut errors);
663
664 match level {
665 ValidationLevel::Minimal => {
666 }
668 ValidationLevel::Standard => {
669 self.validate_common_issues(&mut errors);
671 }
672 ValidationLevel::Strict => {
673 self.validate_common_issues(&mut errors);
675 self.validate_field_formats(&mut errors);
676 self.validate_cross_references(&mut errors);
677 }
678 }
679
680 if errors.is_empty() {
681 Ok(())
682 } else {
683 Err(errors)
684 }
685 }
686
687 fn validate_required_fields(&self, errors: &mut Vec<ValidationError>) {
689 for &field_group in self.ty.required_field_groups() {
690 if self.has_any_field(field_group) {
691 continue;
692 }
693
694 if field_group == ["author", "editor"] {
695 errors.push(ValidationError::error(
696 None,
697 format!(
698 "{} entry must have either 'author' or 'editor' field",
699 self.ty
700 ),
701 ));
702 continue;
703 }
704
705 let primary_field = field_group[0];
706 let message = if field_group.len() == 1 {
707 format!(
708 "Required field '{}' is missing for {} entry",
709 primary_field, self.ty
710 )
711 } else {
712 format!(
713 "Required field '{}' is missing for {} entry (accepted aliases: {})",
714 primary_field,
715 self.ty,
716 field_group.join(", ")
717 )
718 };
719
720 errors.push(ValidationError::error(Some(primary_field), message));
721 }
722 }
723
724 fn validate_common_issues(&self, errors: &mut Vec<ValidationError>) {
726 if let Some(year_str) = self.get_any_as_string_ignore_case(&["year", "date"]) {
730 if let Ok(year) = year_str.parse::<i32>() {
731 if !(1000..=2100).contains(&year) {
732 errors.push(ValidationError::warning(
733 Some(if self.has_field("year") {
734 "year"
735 } else {
736 "date"
737 }),
738 format!("Year {year} seems unlikely"),
739 ));
740 }
741 } else {
742 errors.push(ValidationError::warning(
743 Some(if self.has_field("year") {
744 "year"
745 } else {
746 "date"
747 }),
748 "Year/date should be a number",
749 ));
750 }
751 }
752
753 if let Some(pages) = self.get_ignore_case("pages") {
755 if !is_valid_page_range(pages) {
756 errors.push(ValidationError::warning(
757 Some("pages"),
758 "Pages should be in format '12-34' or '12--34'",
759 ));
760 }
761 }
762
763 match self.ty {
765 EntryType::InBook | EntryType::InProceedings | EntryType::InCollection
766 if !self.has_any_field(&["author", "editor"]) =>
767 {
768 errors.push(ValidationError::warning(
769 None,
770 "Entry should have either 'author' or 'editor' field",
771 ));
772 }
773 _ => {}
774 }
775
776 for field in &self.fields {
778 if let Some(value_str) = field.value.as_str() {
779 if value_str.trim().is_empty() {
780 errors.push(ValidationError::warning(
781 Some(&field.name),
782 "Field has empty value",
783 ));
784 }
785 }
786 }
787 }
788
789 fn validate_field_formats(&self, errors: &mut Vec<ValidationError>) {
791 if let Some(doi) = self.get_as_string_ignore_case("doi") {
793 if normalize_doi(&doi).is_none() {
794 errors.push(ValidationError::warning(
795 Some("doi"),
796 "DOI should start with '10.' or a DOI URL/prefix",
797 ));
798 }
799 }
800
801 if let Some(url) = self.get_ignore_case("url") {
803 if !url.starts_with("http://") && !url.starts_with("https://") {
804 errors.push(ValidationError::warning(
805 Some("url"),
806 "URL should start with http:// or https://",
807 ));
808 }
809 }
810
811 if let Some(isbn) = self.get_ignore_case("isbn") {
813 if !is_valid_isbn_shape(isbn) {
814 errors.push(ValidationError::warning(
815 Some("isbn"),
816 "ISBN should have 10 or 13 digits",
817 ));
818 }
819 }
820
821 if let Some(month) = self.get_ignore_case("month") {
823 if !is_valid_month(month) {
824 errors.push(ValidationError::info(
825 Some("month"),
826 "Month should be a standard abbreviation (jan, feb, etc.) or full name",
827 ));
828 }
829 }
830
831 for field_name in &["volume", "number"] {
833 if let Some(value) = self.get_ignore_case(field_name) {
834 if value.parse::<i32>().is_err() && !value.contains('-') {
835 errors.push(ValidationError::info(
836 Some(field_name),
837 format!("{field_name} should typically be numeric"),
838 ));
839 }
840 }
841 }
842 }
843
844 fn validate_cross_references(&self, errors: &mut Vec<ValidationError>) {
846 if let Some(crossref) = self.get_ignore_case("crossref") {
847 if crossref.trim().is_empty() {
848 errors.push(ValidationError::error(
849 Some("crossref"),
850 "Cross-reference is empty",
851 ));
852 }
853 }
854 }
855
856 #[must_use]
858 pub fn is_valid(&self) -> bool {
859 self.validate(ValidationLevel::Minimal).is_ok()
860 }
861
862 #[cfg(feature = "latex_to_unicode")]
880 #[must_use]
881 pub fn get_unicode(&self, name: &str) -> Option<String> {
882 self.get(name).map(crate::latex_unicode::latex_to_unicode)
883 }
884
885 #[cfg(feature = "latex_to_unicode")]
904 #[must_use]
905 pub fn get_unicode_ignore_case(&self, name: &str) -> Option<String> {
906 self.get_ignore_case(name)
907 .map(crate::latex_unicode::latex_to_unicode)
908 }
909
910 #[cfg(feature = "latex_to_unicode")]
915 #[must_use]
916 pub fn get_as_unicode_string(&self, name: &str) -> Option<String> {
917 self.get_as_string(name)
918 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
919 }
920
921 #[cfg(feature = "latex_to_unicode")]
926 #[must_use]
927 pub fn get_as_unicode_string_ignore_case(&self, name: &str) -> Option<String> {
928 self.get_as_string_ignore_case(name)
929 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
930 }
931
932 #[cfg(feature = "latex_to_unicode")]
960 #[must_use]
961 pub fn fields_unicode(&self) -> Vec<(String, String)> {
962 self.fields
963 .iter()
964 .filter_map(|f| {
965 f.value.as_str().map(|s| {
966 (
967 f.name.to_string(),
968 crate::latex_unicode::latex_to_unicode(s),
969 )
970 })
971 })
972 .collect()
973 }
974
975 #[must_use]
977 pub fn into_owned(self) -> Entry<'static> {
978 Entry {
979 ty: self.ty.into_owned(),
980 key: Cow::Owned(self.key.into_owned()),
981 fields: self.fields.into_iter().map(Field::into_owned).collect(),
982 }
983 }
984}
985
986#[derive(Debug, Clone, PartialEq, Eq, Hash)]
988pub enum EntryType<'a> {
989 Article,
991 Book,
993 Booklet,
995 MvBook,
997 InBook,
999 BookInBook,
1001 SuppBook,
1003 Collection,
1005 MvCollection,
1007 InCollection,
1009 SuppCollection,
1011 InProceedings,
1013 Proceedings,
1015 MvProceedings,
1017 Reference,
1019 InReference,
1021 Manual,
1023 MastersThesis,
1025 PhdThesis,
1027 Thesis,
1029 TechReport,
1031 Report,
1033 Patent,
1035 Periodical,
1037 Online,
1039 Software,
1041 Dataset,
1043 Set,
1045 XData,
1047 Unpublished,
1049 Misc,
1051 Custom(Cow<'a, str>),
1053}
1054
1055impl<'a> EntryType<'a> {
1056 #[must_use]
1058 #[inline(never)]
1059 pub fn parse(s: &'a str) -> Self {
1060 let bytes = s.as_bytes();
1061 if bytes.is_empty() {
1062 return Self::Custom(Cow::Borrowed(s));
1063 }
1064
1065 match (bytes.len(), ascii_lower(bytes[0])) {
1066 (3, b's') if eq_ascii_lower(bytes, b"set") => Self::Set,
1067 (4, b'b') if eq_ascii_lower(bytes, b"book") => Self::Book,
1068 (4, b'm') if eq_ascii_lower(bytes, b"misc") => Self::Misc,
1069 (6, b'i') if eq_ascii_lower(bytes, b"inbook") => Self::InBook,
1070 (6, b'm') if eq_ascii_lower(bytes, b"manual") => Self::Manual,
1071 (6, b'm') if eq_ascii_lower(bytes, b"mvbook") => Self::MvBook,
1072 (6, b'o') if eq_ascii_lower(bytes, b"online") => Self::Online,
1073 (6, b'p') if eq_ascii_lower(bytes, b"patent") => Self::Patent,
1074 (6, b'r') if eq_ascii_lower(bytes, b"report") => Self::Report,
1075 (6, b't') if eq_ascii_lower(bytes, b"thesis") => Self::Thesis,
1076 (7, b'a') if eq_ascii_lower(bytes, b"article") => Self::Article,
1077 (7, b'b') if eq_ascii_lower(bytes, b"booklet") => Self::Booklet,
1078 (7, b'd') if eq_ascii_lower(bytes, b"dataset") => Self::Dataset,
1079 (8, b's') if eq_ascii_lower(bytes, b"software") => Self::Software,
1080 (8, b's') if eq_ascii_lower(bytes, b"suppbook") => Self::SuppBook,
1081 (9, b'r') if eq_ascii_lower(bytes, b"reference") => Self::Reference,
1082 (9, b'p') if eq_ascii_lower(bytes, b"phdthesis") => Self::PhdThesis,
1083 (10, b'b') if eq_ascii_lower(bytes, b"bookinbook") => Self::BookInBook,
1084 (10, b'c') if eq_ascii_lower(bytes, b"conference") => Self::InProceedings,
1085 (10, b'c') if eq_ascii_lower(bytes, b"collection") => Self::Collection,
1086 (10, b'p') if eq_ascii_lower(bytes, b"periodical") => Self::Periodical,
1087 (10, b't') if eq_ascii_lower(bytes, b"techreport") => Self::TechReport,
1088 (11, b'i') if eq_ascii_lower(bytes, b"inreference") => Self::InReference,
1089 (11, b'p') if eq_ascii_lower(bytes, b"proceedings") => Self::Proceedings,
1090 (11, b'u') if eq_ascii_lower(bytes, b"unpublished") => Self::Unpublished,
1091 (12, b'i') if eq_ascii_lower(bytes, b"incollection") => Self::InCollection,
1092 (12, b'm') if eq_ascii_lower(bytes, b"mvcollection") => Self::MvCollection,
1093 (13, b'i') if eq_ascii_lower(bytes, b"inproceedings") => Self::InProceedings,
1094 (13, b'm') if eq_ascii_lower(bytes, b"mastersthesis") => Self::MastersThesis,
1095 (13, b'm') if eq_ascii_lower(bytes, b"mvproceedings") => Self::MvProceedings,
1096 (14, b's') if eq_ascii_lower(bytes, b"suppcollection") => Self::SuppCollection,
1097 (5, b'x') if eq_ascii_lower(bytes, b"xdata") => Self::XData,
1098 _ => Self::Custom(Cow::Borrowed(s)),
1099 }
1100 }
1101
1102 #[must_use]
1104 pub const fn required_fields(&self) -> &'static [&'static str] {
1105 match self {
1106 Self::Article => &["author", "title", "journal", "year"],
1107 Self::Book | Self::MvBook => &["author", "title", "publisher", "year"],
1108 Self::Booklet | Self::Manual => &["title"],
1109 Self::InBook | Self::BookInBook | Self::SuppBook => {
1110 &["author", "title", "chapter", "publisher", "year"]
1111 }
1112 Self::Collection | Self::MvCollection | Self::Reference => {
1113 &["editor", "title", "publisher", "year"]
1114 }
1115 Self::InCollection | Self::SuppCollection | Self::InReference => {
1116 &["author", "title", "booktitle", "publisher", "year"]
1117 }
1118 Self::InProceedings => &["author", "title", "booktitle", "year"],
1119 Self::Proceedings | Self::MvProceedings | Self::Periodical => &["title", "year"],
1120 Self::MastersThesis | Self::PhdThesis | Self::Thesis => {
1121 &["author", "title", "school", "year"]
1122 }
1123 Self::TechReport => &["author", "title", "institution", "year"],
1124 Self::Report => &["author", "title", "type", "institution", "year"],
1125 Self::Patent => &["author", "title", "number", "year"],
1126 Self::Online => &["title", "url"],
1127 Self::Software | Self::Dataset => &["author", "title", "year"],
1128 Self::Unpublished => &["author", "title", "note"],
1129 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1130 }
1131 }
1132
1133 #[must_use]
1138 pub const fn required_field_groups(&self) -> &'static [&'static [&'static str]] {
1139 match self {
1140 Self::Article => &[
1141 &["author"],
1142 &["title"],
1143 &["journal", "journaltitle"],
1144 &["year", "date"],
1145 ],
1146 Self::Book | Self::MvBook => &[
1147 &["author", "editor"],
1148 &["title"],
1149 &["publisher"],
1150 &["year", "date"],
1151 ],
1152 Self::Booklet | Self::Manual => &[&["title"]],
1153 Self::InBook | Self::BookInBook | Self::SuppBook => &[
1154 &["author", "editor"],
1155 &["title"],
1156 &["chapter", "pages"],
1157 &["publisher"],
1158 &["year", "date"],
1159 ],
1160 Self::Collection | Self::MvCollection | Self::Reference => &[
1161 &["editor", "author"],
1162 &["title"],
1163 &["publisher"],
1164 &["year", "date"],
1165 ],
1166 Self::InCollection | Self::SuppCollection | Self::InReference => &[
1167 &["author", "editor"],
1168 &["title"],
1169 &["booktitle"],
1170 &["publisher"],
1171 &["year", "date"],
1172 ],
1173 Self::InProceedings => &[
1174 &["author", "editor"],
1175 &["title"],
1176 &["booktitle"],
1177 &["year", "date"],
1178 ],
1179 Self::Proceedings | Self::MvProceedings | Self::Periodical => {
1180 &[&["title"], &["year", "date"]]
1181 }
1182 Self::MastersThesis | Self::PhdThesis | Self::Thesis => &[
1183 &["author"],
1184 &["title"],
1185 &["school", "institution"],
1186 &["year", "date"],
1187 ],
1188 Self::TechReport => &[&["author"], &["title"], &["institution"], &["year", "date"]],
1189 Self::Report => &[
1190 &["author", "editor"],
1191 &["title"],
1192 &["type"],
1193 &["institution"],
1194 &["year", "date"],
1195 ],
1196 Self::Patent => &[&["author"], &["title"], &["number"], &["year", "date"]],
1197 Self::Online => &[&["title"], &["url", "doi"], &["year", "date", "urldate"]],
1198 Self::Software | Self::Dataset => &[
1199 &["author", "editor"],
1200 &["title"],
1201 &["year", "date", "version"],
1202 ],
1203 Self::Unpublished => &[&["author"], &["title"], &["note"]],
1204 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1205 }
1206 }
1207
1208 #[must_use]
1210 pub fn canonical_name(&self) -> &str {
1211 match self {
1212 Self::Article => "article",
1213 Self::Book => "book",
1214 Self::Booklet => "booklet",
1215 Self::MvBook => "mvbook",
1216 Self::InBook => "inbook",
1217 Self::BookInBook => "bookinbook",
1218 Self::SuppBook => "suppbook",
1219 Self::Collection => "collection",
1220 Self::MvCollection => "mvcollection",
1221 Self::InCollection => "incollection",
1222 Self::SuppCollection => "suppcollection",
1223 Self::InProceedings => "inproceedings",
1224 Self::Proceedings => "proceedings",
1225 Self::MvProceedings => "mvproceedings",
1226 Self::Reference => "reference",
1227 Self::InReference => "inreference",
1228 Self::Manual => "manual",
1229 Self::MastersThesis => "mastersthesis",
1230 Self::PhdThesis => "phdthesis",
1231 Self::Thesis => "thesis",
1232 Self::TechReport => "techreport",
1233 Self::Report => "report",
1234 Self::Patent => "patent",
1235 Self::Periodical => "periodical",
1236 Self::Online => "online",
1237 Self::Software => "software",
1238 Self::Dataset => "dataset",
1239 Self::Set => "set",
1240 Self::XData => "xdata",
1241 Self::Unpublished => "unpublished",
1242 Self::Misc => "misc",
1243 Self::Custom(s) => s,
1244 }
1245 }
1246
1247 #[must_use]
1249 pub const fn aliases(&self) -> &'static [&'static str] {
1250 match self {
1251 Self::InProceedings => &["conference"],
1252 Self::TechReport => &["techreport"],
1253 Self::MastersThesis => &["mastersthesis"],
1254 Self::PhdThesis => &["phdthesis"],
1255 _ => &[],
1256 }
1257 }
1258
1259 #[must_use]
1261 pub const fn is_classic_bibtex(&self) -> bool {
1262 matches!(
1263 self,
1264 Self::Article
1265 | Self::Book
1266 | Self::Booklet
1267 | Self::InBook
1268 | Self::InCollection
1269 | Self::InProceedings
1270 | Self::Manual
1271 | Self::MastersThesis
1272 | Self::PhdThesis
1273 | Self::Proceedings
1274 | Self::TechReport
1275 | Self::Unpublished
1276 | Self::Misc
1277 )
1278 }
1279
1280 #[must_use]
1282 pub const fn is_extended(&self) -> bool {
1283 !self.is_classic_bibtex() && !matches!(self, Self::Custom(_))
1284 }
1285
1286 #[must_use]
1288 pub fn into_owned(self) -> EntryType<'static> {
1289 match self {
1290 Self::Custom(s) => EntryType::Custom(Cow::Owned(s.into_owned())),
1291 Self::Article => EntryType::Article,
1292 Self::Book => EntryType::Book,
1293 Self::Booklet => EntryType::Booklet,
1294 Self::MvBook => EntryType::MvBook,
1295 Self::InBook => EntryType::InBook,
1296 Self::BookInBook => EntryType::BookInBook,
1297 Self::SuppBook => EntryType::SuppBook,
1298 Self::Collection => EntryType::Collection,
1299 Self::MvCollection => EntryType::MvCollection,
1300 Self::InCollection => EntryType::InCollection,
1301 Self::SuppCollection => EntryType::SuppCollection,
1302 Self::InProceedings => EntryType::InProceedings,
1303 Self::Proceedings => EntryType::Proceedings,
1304 Self::MvProceedings => EntryType::MvProceedings,
1305 Self::Reference => EntryType::Reference,
1306 Self::InReference => EntryType::InReference,
1307 Self::Manual => EntryType::Manual,
1308 Self::MastersThesis => EntryType::MastersThesis,
1309 Self::PhdThesis => EntryType::PhdThesis,
1310 Self::Thesis => EntryType::Thesis,
1311 Self::TechReport => EntryType::TechReport,
1312 Self::Report => EntryType::Report,
1313 Self::Patent => EntryType::Patent,
1314 Self::Periodical => EntryType::Periodical,
1315 Self::Online => EntryType::Online,
1316 Self::Software => EntryType::Software,
1317 Self::Dataset => EntryType::Dataset,
1318 Self::Set => EntryType::Set,
1319 Self::XData => EntryType::XData,
1320 Self::Unpublished => EntryType::Unpublished,
1321 Self::Misc => EntryType::Misc,
1322 }
1323 }
1324}
1325
1326#[inline]
1327const fn ascii_lower(byte: u8) -> u8 {
1328 if b'A' <= byte && byte <= b'Z' {
1329 byte + (b'a' - b'A')
1330 } else {
1331 byte
1332 }
1333}
1334
1335#[inline]
1336fn eq_ascii_lower(input: &[u8], expected: &[u8]) -> bool {
1337 if input.len() != expected.len() {
1338 return false;
1339 }
1340
1341 let mut index = 0usize;
1342 while index < input.len() {
1343 if ascii_lower(input[index]) != expected[index] {
1344 return false;
1345 }
1346 index += 1;
1347 }
1348
1349 true
1350}
1351
1352impl fmt::Display for EntryType<'_> {
1353 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1354 f.write_str(self.canonical_name())
1355 }
1356}
1357
1358#[derive(Debug, Clone, PartialEq)]
1360pub struct Field<'a> {
1361 pub name: Cow<'a, str>,
1363 pub value: Value<'a>,
1365}
1366
1367impl<'a> Field<'a> {
1368 #[must_use]
1370 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1371 Self {
1372 name: Cow::Borrowed(name),
1373 value,
1374 }
1375 }
1376
1377 #[must_use]
1379 pub fn name_eq_ignore_case(&self, name: &str) -> bool {
1380 self.name.eq_ignore_ascii_case(name)
1381 }
1382
1383 #[must_use]
1385 pub fn into_owned(self) -> Field<'static> {
1386 Field {
1387 name: Cow::Owned(self.name.into_owned()),
1388 value: self.value.into_owned(),
1389 }
1390 }
1391}
1392
1393#[derive(Debug, Clone, PartialEq)]
1399pub enum Value<'a> {
1400 Literal(Cow<'a, str>),
1402 Number(i64),
1404 Concat(Box<[Self]>),
1406 Variable(Cow<'a, str>),
1408}
1409
1410impl Default for Value<'_> {
1411 fn default() -> Self {
1412 Self::Number(0)
1413 }
1414}
1415
1416impl Value<'_> {
1417 #[must_use]
1419 pub fn as_str(&self) -> Option<&str> {
1420 match self {
1421 Self::Literal(s) => Some(s),
1422 _ => None,
1423 }
1424 }
1425
1426 #[must_use]
1428 pub fn expand(&self, strings: &AHashMap<&str, Value>) -> String {
1429 match self {
1430 Self::Literal(s) => normalize_text_projection(s),
1431 Self::Number(n) => n.to_string(),
1432 Self::Variable(name) => strings
1433 .get(name.as_ref())
1434 .map_or_else(|| format!("{{undefined:{name}}}"), |v| v.expand(strings)),
1435 Self::Concat(parts) => parts.iter().map(|p| p.expand(strings)).collect::<String>(),
1436 }
1437 }
1438
1439 #[must_use]
1445 pub fn to_plain_string(&self) -> String {
1446 value_to_plain_string(self)
1447 }
1448
1449 #[must_use]
1451 pub fn to_lossy_string(&self) -> String {
1452 value_to_lossy_string(self)
1453 }
1454
1455 #[must_use]
1457 pub fn from_plain_string<'a>(text: impl Into<Cow<'a, str>>) -> Value<'a> {
1458 Value::Literal(text.into())
1459 }
1460
1461 pub fn from_bibtex_source(source: &str) -> crate::Result<Value<'_>> {
1467 let mut input = source;
1468 crate::parser::lexer::skip_whitespace(&mut input);
1469 let value = crate::parser::value::parse_value(&mut input)
1470 .map_err(|_| crate::Error::WinnowError("invalid BibTeX value source".to_string()))?;
1471 crate::parser::lexer::skip_whitespace(&mut input);
1472 if input.is_empty() {
1473 Ok(value)
1474 } else {
1475 Err(crate::Error::WinnowError(format!(
1476 "trailing text after BibTeX value: {input:?}"
1477 )))
1478 }
1479 }
1480
1481 #[must_use]
1486 pub fn to_bibtex_source(&self) -> String {
1487 match self {
1488 Self::Literal(text) => literal_to_bibtex_source(text),
1489 Self::Number(number) => number.to_string(),
1490 Self::Variable(name) => name.to_string(),
1491 Self::Concat(parts) => parts
1492 .iter()
1493 .map(Self::to_bibtex_source)
1494 .collect::<Vec<_>>()
1495 .join(" # "),
1496 }
1497 }
1498
1499 #[cfg(feature = "latex_to_unicode")]
1501 #[must_use]
1502 pub fn to_unicode_plain_string(&self) -> String {
1503 crate::latex_unicode::latex_to_unicode(&self.to_plain_string())
1504 }
1505
1506 #[must_use]
1508 pub fn into_owned(self) -> Value<'static> {
1509 match self {
1510 Self::Literal(s) => Value::Literal(Cow::Owned(s.into_owned())),
1511 Self::Number(n) => Value::Number(n),
1512 Self::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
1513 Self::Concat(parts) => Value::Concat(
1514 parts
1515 .into_vec()
1516 .into_iter()
1517 .map(Value::into_owned)
1518 .collect::<Vec<_>>()
1519 .into_boxed_slice(),
1520 ),
1521 }
1522 }
1523}
1524
1525fn literal_to_bibtex_source(text: &str) -> String {
1526 if is_balanced_braced_literal_content(text) {
1527 format!("{{{text}}}")
1528 } else {
1529 format!("\"{}\"", escape_quoted_literal(text))
1530 }
1531}
1532
1533fn is_balanced_braced_literal_content(text: &str) -> bool {
1534 let bytes = text.as_bytes();
1535 let mut depth = 0usize;
1536 let mut pos = 0usize;
1537
1538 while let Some(offset) = memchr2(b'{', b'}', &bytes[pos..]) {
1539 let idx = pos + offset;
1540 if is_escaped_delimiter(bytes, idx) {
1541 pos = idx + 1;
1542 continue;
1543 }
1544
1545 match bytes[idx] {
1546 b'{' => depth += 1,
1547 b'}' => {
1548 let Some(new_depth) = depth.checked_sub(1) else {
1549 return false;
1550 };
1551 depth = new_depth;
1552 }
1553 _ => unreachable!(),
1554 }
1555 pos = idx + 1;
1556 }
1557
1558 depth == 0
1559}
1560
1561fn is_escaped_delimiter(input: &[u8], delimiter: usize) -> bool {
1562 if delimiter == 0 || input[delimiter - 1] != b'\\' {
1563 return false;
1564 }
1565
1566 let mut slash_count = 0usize;
1567 let mut pos = delimiter;
1568 while pos > 0 && input[pos - 1] == b'\\' {
1569 slash_count += 1;
1570 pos -= 1;
1571 }
1572 slash_count % 2 == 1
1573}
1574
1575fn escape_quoted_literal(text: &str) -> String {
1576 text.replace('"', "\\\"")
1577}
1578
1579impl fmt::Display for Value<'_> {
1580 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1581 match self {
1582 Self::Literal(s) => write!(f, "{s}"),
1583 Self::Number(n) => write!(f, "{n}"),
1584 Self::Variable(name) => write!(f, "{{{name}}}"),
1585 Self::Concat(parts) => {
1586 for (i, part) in parts.iter().enumerate() {
1587 if i > 0 {
1588 write!(f, " # ")?;
1589 }
1590 write!(f, "{part}")?;
1591 }
1592 Ok(())
1593 }
1594 }
1595 }
1596}
1597
1598fn value_to_lossy_string(value: &Value<'_>) -> String {
1599 match value {
1600 Value::Literal(s) => normalize_text_projection(s),
1601 Value::Number(n) => n.to_string(),
1602 Value::Variable(v) => format!("{{{v}}}"),
1603 Value::Concat(parts) => parts.iter().map(value_to_lossy_string).collect(),
1604 }
1605}
1606
1607fn value_to_plain_string(value: &Value<'_>) -> String {
1608 match value {
1609 Value::Literal(text) => normalize_text_projection(text),
1610 Value::Number(number) => number.to_string(),
1611 Value::Variable(name) => name.to_string(),
1612 Value::Concat(parts) => parts.iter().map(value_to_plain_string).collect(),
1613 }
1614}
1615
1616pub(crate) fn normalize_text_projection(text: &str) -> String {
1617 if !text
1618 .as_bytes()
1619 .iter()
1620 .any(|byte| matches!(byte, b'\n' | b'\r'))
1621 {
1622 return text.to_string();
1623 }
1624
1625 let mut normalized = String::with_capacity(text.len());
1626 let mut chars = text.chars().peekable();
1627 while let Some(ch) = chars.next() {
1628 match ch {
1629 '\r' => {
1630 if chars.peek() == Some(&'\n') {
1631 chars.next();
1632 }
1633 normalized.push('\n');
1634 while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1635 chars.next();
1636 }
1637 }
1638 '\n' => {
1639 normalized.push('\n');
1640 while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1641 chars.next();
1642 }
1643 }
1644 _ => normalized.push(ch),
1645 }
1646 }
1647 normalized
1648}
1649
1650#[must_use]
1652pub fn normalize_doi(input: &str) -> Option<String> {
1653 let mut doi = input.trim();
1654 if doi.is_empty() {
1655 return None;
1656 }
1657
1658 for prefix in [
1659 "https://doi.org/",
1660 "http://doi.org/",
1661 "https://dx.doi.org/",
1662 "http://dx.doi.org/",
1663 "doi:",
1664 "DOI:",
1665 ] {
1666 if let Some(stripped) = doi.strip_prefix(prefix) {
1667 doi = stripped.trim();
1668 break;
1669 }
1670 }
1671
1672 let doi = doi.trim_end_matches(['.', ',', ';']);
1673 if doi.len() > 3 && doi.starts_with("10.") && doi.contains('/') {
1674 Some(doi.to_ascii_lowercase())
1675 } else {
1676 None
1677 }
1678}
1679
1680fn resource_field_from_parts(
1681 field_name: &str,
1682 value: String,
1683 archive_prefix: Option<&str>,
1684) -> Option<ResourceField> {
1685 let mut kind = classify_resource_field(field_name)?;
1686 if kind == ResourceKind::Eprint
1687 && archive_prefix.is_some_and(|prefix| prefix.eq_ignore_ascii_case("arxiv"))
1688 {
1689 kind = ResourceKind::Arxiv;
1690 }
1691 let normalized = normalize_resource_value(kind, &value);
1692 Some(ResourceField {
1693 kind,
1694 field_name: field_name.to_string(),
1695 value,
1696 normalized,
1697 })
1698}
1699
1700fn normalize_resource_value(kind: ResourceKind, value: &str) -> Option<String> {
1701 let trimmed = value.trim();
1702 if trimmed.is_empty() {
1703 return None;
1704 }
1705
1706 match kind {
1707 ResourceKind::Doi => normalize_doi(trimmed),
1708 ResourceKind::Pmid => normalize_ascii_digits(trimmed),
1709 ResourceKind::Pmcid => Some(normalize_pmcid(trimmed)),
1710 ResourceKind::Isbn => normalize_isbn(trimmed),
1711 ResourceKind::Issn => normalize_issn(trimmed),
1712 ResourceKind::Arxiv => Some(normalize_arxiv(trimmed)),
1713 ResourceKind::File | ResourceKind::Url | ResourceKind::Eprint | ResourceKind::Crossref => {
1714 Some(trimmed.to_string())
1715 }
1716 }
1717}
1718
1719fn normalize_ascii_digits(input: &str) -> Option<String> {
1720 let compact = input.trim();
1721 compact
1722 .chars()
1723 .all(|ch| ch.is_ascii_digit())
1724 .then(|| compact.to_string())
1725}
1726
1727fn normalize_pmcid(input: &str) -> String {
1728 let compact = input
1729 .trim()
1730 .trim_start_matches("pmcid:")
1731 .trim_start_matches("PMCID:")
1732 .trim();
1733 if compact
1734 .get(..3)
1735 .is_some_and(|prefix| prefix.eq_ignore_ascii_case("pmc"))
1736 {
1737 compact.to_ascii_uppercase()
1738 } else {
1739 format!("PMC{compact}")
1740 }
1741}
1742
1743fn normalize_isbn(input: &str) -> Option<String> {
1744 let compact = input
1745 .chars()
1746 .filter(|ch| !matches!(ch, '-' | ' '))
1747 .collect::<String>()
1748 .to_ascii_uppercase();
1749 is_valid_isbn_shape(&compact).then_some(compact)
1750}
1751
1752fn normalize_issn(input: &str) -> Option<String> {
1753 let compact = input
1754 .chars()
1755 .filter(|ch| !matches!(ch, '-' | ' '))
1756 .collect::<String>()
1757 .to_ascii_uppercase();
1758 (compact.len() == 8
1759 && compact
1760 .chars()
1761 .enumerate()
1762 .all(|(index, ch)| ch.is_ascii_digit() || (index == 7 && ch == 'X')))
1763 .then_some(compact)
1764}
1765
1766fn normalize_arxiv(input: &str) -> String {
1767 input
1768 .trim()
1769 .trim_start_matches("arXiv:")
1770 .trim_start_matches("arxiv:")
1771 .trim()
1772 .to_string()
1773}
1774
1775fn trim_bibtex_scalar(input: &str) -> &str {
1776 let mut value = input.trim();
1777 loop {
1778 let trimmed = value.trim();
1779 if trimmed.len() >= 2
1780 && ((trimmed.starts_with('{') && trimmed.ends_with('}'))
1781 || (trimmed.starts_with('"') && trimmed.ends_with('"')))
1782 {
1783 value = trimmed[1..trimmed.len() - 1].trim();
1784 } else {
1785 return trimmed;
1786 }
1787 }
1788}
1789
1790fn parse_year(input: &str) -> std::result::Result<i32, DateParseError> {
1791 let input = input.trim();
1792 if input.len() != 4 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1793 return Err(DateParseError::InvalidYear);
1794 }
1795 input
1796 .parse::<i32>()
1797 .map_err(|_| DateParseError::InvalidYear)
1798}
1799
1800fn parse_month_number(input: &str) -> Option<u8> {
1801 let normalized = trim_bibtex_scalar(input).to_ascii_lowercase();
1802 if normalized.is_empty() {
1803 return None;
1804 }
1805
1806 if let Ok(month) = normalized.parse::<u8>() {
1807 return (1..=12).contains(&month).then_some(month);
1808 }
1809
1810 match normalized.as_str() {
1811 "jan" | "january" => Some(1),
1812 "feb" | "february" => Some(2),
1813 "mar" | "march" => Some(3),
1814 "apr" | "april" => Some(4),
1815 "may" => Some(5),
1816 "jun" | "june" => Some(6),
1817 "jul" | "july" => Some(7),
1818 "aug" | "august" => Some(8),
1819 "sep" | "sept" | "september" => Some(9),
1820 "oct" | "october" => Some(10),
1821 "nov" | "november" => Some(11),
1822 "dec" | "december" => Some(12),
1823 _ => None,
1824 }
1825}
1826
1827fn parse_day_number(input: &str, year: i32, month: u8) -> std::result::Result<u8, DateParseError> {
1828 let input = input.trim();
1829 if input.is_empty() || input.len() > 2 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1830 return Err(DateParseError::InvalidDay);
1831 }
1832 let day = input
1833 .parse::<u8>()
1834 .map_err(|_| DateParseError::InvalidDay)?;
1835 (1..=days_in_month(year, month))
1836 .contains(&day)
1837 .then_some(day)
1838 .ok_or(DateParseError::InvalidDay)
1839}
1840
1841const fn days_in_month(year: i32, month: u8) -> u8 {
1842 match month {
1843 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1844 4 | 6 | 9 | 11 => 30,
1845 2 if is_leap_year(year) => 29,
1846 2 => 28,
1847 _ => 0,
1848 }
1849}
1850
1851const fn is_leap_year(year: i32) -> bool {
1852 (year % 4 == 0 && year % 100 != 0) || year % 400 == 0
1853}
1854
1855fn is_valid_isbn_shape(isbn: &str) -> bool {
1856 let compact: String = isbn.chars().filter(|c| !matches!(c, '-' | ' ')).collect();
1857
1858 match compact.len() {
1859 10 => compact
1860 .chars()
1861 .enumerate()
1862 .all(|(index, ch)| ch.is_ascii_digit() || (index == 9 && matches!(ch, 'x' | 'X'))),
1863 13 => compact.chars().all(|ch| ch.is_ascii_digit()),
1864 _ => false,
1865 }
1866}
1867
1868fn split_bibtex_names(input: &str) -> Vec<&str> {
1869 let mut names = Vec::new();
1870 let mut start = 0;
1871 let mut depth = 0usize;
1872 let mut iter = input.char_indices().peekable();
1873
1874 while let Some((index, ch)) = iter.next() {
1875 match ch {
1876 '{' => depth += 1,
1877 '}' => depth = depth.saturating_sub(1),
1878 'a' | 'A' if depth == 0 && starts_name_separator(input, index) => {
1879 let candidate = input[start..index].trim();
1880 if !candidate.is_empty() {
1881 names.push(candidate);
1882 }
1883 start = index + 3;
1884 while input[start..]
1885 .chars()
1886 .next()
1887 .is_some_and(char::is_whitespace)
1888 {
1889 start += input[start..].chars().next().map_or(0, char::len_utf8);
1890 }
1891 while iter
1892 .peek()
1893 .is_some_and(|(_, next_ch)| next_ch.is_whitespace())
1894 {
1895 iter.next();
1896 }
1897 }
1898 _ => {}
1899 }
1900 }
1901
1902 let candidate = input[start..].trim();
1903 if !candidate.is_empty() {
1904 names.push(candidate);
1905 }
1906
1907 names
1908}
1909
1910fn starts_name_separator(input: &str, index: usize) -> bool {
1911 let tail = &input[index..];
1912 let Some(rest) = tail.get(..3) else {
1913 return false;
1914 };
1915 if !rest.eq_ignore_ascii_case("and") {
1916 return false;
1917 }
1918
1919 let before_is_boundary = input[..index]
1920 .chars()
1921 .next_back()
1922 .map_or(true, char::is_whitespace);
1923 let after_is_boundary = tail[3..].chars().next().map_or(true, char::is_whitespace);
1924
1925 before_is_boundary && after_is_boundary
1926}
1927
1928fn parse_single_name(input: &str) -> PersonName {
1929 let raw = input.trim();
1930 if let Some(literal) = braced_literal_name(raw) {
1931 return person_name(
1932 raw,
1933 String::new(),
1934 String::new(),
1935 literal.clone(),
1936 String::new(),
1937 Some(literal),
1938 );
1939 }
1940
1941 let parts = split_top_level_commas(input);
1942 match parts.as_slice() {
1943 [last] => parse_first_von_last(last),
1944 [last, first] => {
1945 let (von, last) = split_von_last(last);
1946 person_name(
1947 raw,
1948 normalize_name_part(first),
1949 von,
1950 last,
1951 String::new(),
1952 None,
1953 )
1954 }
1955 [last, jr, first, ..] => {
1956 let (von, last) = split_von_last(last);
1957 person_name(
1958 raw,
1959 normalize_name_part(first),
1960 von,
1961 last,
1962 normalize_name_part(jr),
1963 None,
1964 )
1965 }
1966 [] => empty_person_name(raw),
1967 }
1968}
1969
1970fn parse_first_von_last(input: &str) -> PersonName {
1971 let raw = input.trim();
1972 let words = split_name_words(input);
1973 match words.len() {
1974 0 => empty_person_name(raw),
1975 1 => person_name(
1976 raw,
1977 String::new(),
1978 String::new(),
1979 normalize_name_part(words[0]),
1980 String::new(),
1981 None,
1982 ),
1983 _ => {
1984 let von_start = words
1985 .iter()
1986 .position(|word| starts_with_lowercase_letter(word));
1987 let (first, von, last) = von_start.map_or_else(
1988 || {
1989 (
1990 join_name_words(&words[..words.len() - 1]),
1991 String::new(),
1992 normalize_name_part(words[words.len() - 1]),
1993 )
1994 },
1995 |von_start| {
1996 let last_start = words[von_start + 1..]
1997 .iter()
1998 .position(|word| !starts_with_lowercase_letter(word))
1999 .map_or(words.len() - 1, |offset| von_start + 1 + offset);
2000
2001 (
2002 join_name_words(&words[..von_start]),
2003 join_name_words(&words[von_start..last_start]),
2004 join_name_words(&words[last_start..]),
2005 )
2006 },
2007 );
2008
2009 person_name(raw, first, von, last, String::new(), None)
2010 }
2011 }
2012}
2013
2014fn person_name(
2015 raw: &str,
2016 first: String,
2017 von: String,
2018 last: String,
2019 jr: String,
2020 literal: Option<String>,
2021) -> PersonName {
2022 let given = split_component_tokens(&first);
2023 let family = split_component_tokens(&last);
2024 let prefix = split_component_tokens(&von);
2025 let suffix = split_component_tokens(&jr);
2026 PersonName {
2027 raw: raw.to_string(),
2028 first,
2029 von,
2030 last,
2031 jr,
2032 given,
2033 family,
2034 prefix,
2035 suffix,
2036 literal,
2037 }
2038}
2039
2040fn empty_person_name(raw: &str) -> PersonName {
2041 person_name(
2042 raw,
2043 String::new(),
2044 String::new(),
2045 String::new(),
2046 String::new(),
2047 None,
2048 )
2049}
2050
2051fn split_component_tokens(input: &str) -> Vec<String> {
2052 split_name_words(input)
2053 .into_iter()
2054 .map(normalize_name_part)
2055 .filter(|part| !part.is_empty())
2056 .collect()
2057}
2058
2059fn split_von_last(input: &str) -> (String, String) {
2060 let words = split_name_words(input);
2061 if words.is_empty() {
2062 return (String::new(), String::new());
2063 }
2064
2065 if let Some(last_start) = words
2066 .iter()
2067 .rposition(|word| starts_with_lowercase_letter(word))
2068 {
2069 if last_start + 1 < words.len() {
2070 return (
2071 join_name_words(&words[..=last_start]),
2072 join_name_words(&words[last_start + 1..]),
2073 );
2074 }
2075 }
2076
2077 if words.len() == 1 {
2078 (String::new(), normalize_name_part(words[0]))
2079 } else {
2080 (
2081 join_name_words(&words[..words.len() - 1]),
2082 normalize_name_part(words[words.len() - 1]),
2083 )
2084 }
2085}
2086
2087fn split_top_level_commas(input: &str) -> Vec<&str> {
2088 let mut parts = Vec::new();
2089 let mut start = 0;
2090 let mut depth = 0usize;
2091
2092 for (index, ch) in input.char_indices() {
2093 match ch {
2094 '{' => depth += 1,
2095 '}' => depth = depth.saturating_sub(1),
2096 ',' if depth == 0 => {
2097 parts.push(input[start..index].trim());
2098 start = index + 1;
2099 }
2100 _ => {}
2101 }
2102 }
2103
2104 parts.push(input[start..].trim());
2105 parts
2106}
2107
2108fn split_name_words(input: &str) -> Vec<&str> {
2109 let mut words = Vec::new();
2110 let mut start = None;
2111 let mut depth = 0usize;
2112
2113 for (index, ch) in input.char_indices() {
2114 match ch {
2115 '{' => {
2116 depth += 1;
2117 start.get_or_insert(index);
2118 }
2119 '}' => {
2120 depth = depth.saturating_sub(1);
2121 }
2122 ch if ch.is_whitespace() && depth == 0 => {
2123 if let Some(word_start) = start.take() {
2124 words.push(input[word_start..index].trim());
2125 }
2126 }
2127 _ => {
2128 start.get_or_insert(index);
2129 }
2130 }
2131 }
2132
2133 if let Some(word_start) = start {
2134 words.push(input[word_start..].trim());
2135 }
2136
2137 words.into_iter().filter(|word| !word.is_empty()).collect()
2138}
2139
2140fn join_name_words(words: &[&str]) -> String {
2141 words
2142 .iter()
2143 .map(|word| normalize_name_part(word))
2144 .filter(|word| !word.is_empty())
2145 .collect::<Vec<_>>()
2146 .join(" ")
2147}
2148
2149fn normalize_name_part(input: &str) -> String {
2150 let trimmed = input.trim();
2151 if trimmed.len() >= 2 && trimmed.starts_with('{') && trimmed.ends_with('}') {
2152 trimmed[1..trimmed.len() - 1].trim().to_string()
2153 } else {
2154 trimmed.to_string()
2155 }
2156}
2157
2158fn braced_literal_name(input: &str) -> Option<String> {
2159 let trimmed = input.trim();
2160 if trimmed.len() < 2 || !trimmed.starts_with('{') || !trimmed.ends_with('}') {
2161 return None;
2162 }
2163
2164 let mut depth = 0usize;
2165 for (index, ch) in trimmed.char_indices() {
2166 match ch {
2167 '{' => depth += 1,
2168 '}' => {
2169 depth = depth.saturating_sub(1);
2170 if depth == 0 && index != trimmed.len() - 1 {
2171 return None;
2172 }
2173 }
2174 _ => {}
2175 }
2176 }
2177
2178 (depth == 0).then(|| normalize_name_part(trimmed))
2179}
2180
2181fn starts_with_lowercase_letter(input: &str) -> bool {
2182 normalize_name_part(input)
2183 .chars()
2184 .find(|ch| ch.is_alphabetic())
2185 .is_some_and(char::is_lowercase)
2186}
2187
2188fn is_valid_page_range(pages: &str) -> bool {
2191 if pages.trim().is_empty() {
2192 return false;
2193 }
2194
2195 if pages.chars().all(|c| c.is_ascii_digit()) {
2197 return true;
2198 }
2199
2200 if !pages.contains('-') && !pages.contains(',') {
2202 return false;
2203 }
2204
2205 for range in pages.split(',') {
2207 let range = range.trim();
2208 if range.is_empty() {
2209 continue;
2210 }
2211
2212 if range.contains("--") {
2214 let parts: Vec<&str> = range.split("--").collect();
2216 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2217 return false;
2218 }
2219 } else if range.contains('-') {
2220 let parts: Vec<&str> = range.split('-').collect();
2222 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2223 return false;
2224 }
2225 }
2226 }
2227
2228 true
2229}
2230
2231fn is_valid_month(month: &str) -> bool {
2234 let month_lower = month.to_lowercase();
2235
2236 matches!(
2238 month_lower.as_str(),
2239 "jan"
2240 | "feb"
2241 | "mar"
2242 | "apr"
2243 | "may"
2244 | "jun"
2245 | "jul"
2246 | "aug"
2247 | "sep"
2248 | "oct"
2249 | "nov"
2250 | "dec"
2251 | "january"
2252 | "february"
2253 | "march"
2254 | "april"
2255 | "june"
2256 | "july"
2257 | "august"
2258 | "september"
2259 | "october"
2260 | "november"
2261 | "december"
2262 ) || month.parse::<i32>().is_ok_and(|m| (1..=12).contains(&m))
2263}