1use ahash::AHashMap;
4use std::borrow::Cow;
5use std::fmt;
6
7#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
9pub enum ValidationLevel {
10 Minimal,
12 #[default]
14 Standard,
15 Strict,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct ValidationError {
22 pub field: Option<String>,
24 pub message: String,
26 pub severity: ValidationSeverity,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ValidationSeverity {
33 Error,
35 Warning,
37 Info,
39}
40
41impl ValidationError {
42 #[must_use]
44 pub fn error(field: Option<&str>, message: impl Into<String>) -> Self {
45 Self {
46 field: field.map(String::from),
47 message: message.into(),
48 severity: ValidationSeverity::Error,
49 }
50 }
51
52 #[must_use]
54 pub fn warning(field: Option<&str>, message: impl Into<String>) -> Self {
55 Self {
56 field: field.map(String::from),
57 message: message.into(),
58 severity: ValidationSeverity::Warning,
59 }
60 }
61
62 #[must_use]
64 pub fn info(field: Option<&str>, message: impl Into<String>) -> Self {
65 Self {
66 field: field.map(String::from),
67 message: message.into(),
68 severity: ValidationSeverity::Info,
69 }
70 }
71}
72
73impl fmt::Display for ValidationError {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 let field = self.field.as_deref().unwrap_or("<entry>");
76 write!(f, "[{:?}] {}: {}", self.severity, field, self.message)
77 }
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct PersonName {
87 pub first: String,
89 pub von: String,
91 pub last: String,
93 pub jr: String,
95}
96
97impl PersonName {
98 #[must_use]
100 pub fn display_name(&self) -> String {
101 let mut parts = Vec::new();
102 if !self.first.is_empty() {
103 parts.push(self.first.as_str());
104 }
105 if !self.von.is_empty() {
106 parts.push(self.von.as_str());
107 }
108 if !self.last.is_empty() {
109 parts.push(self.last.as_str());
110 }
111
112 let mut name = parts.join(" ");
113 if !self.jr.is_empty() {
114 if !name.is_empty() {
115 name.push_str(", ");
116 }
117 name.push_str(&self.jr);
118 }
119 name
120 }
121
122 #[must_use]
124 pub fn is_empty(&self) -> bool {
125 self.first.is_empty() && self.von.is_empty() && self.last.is_empty() && self.jr.is_empty()
126 }
127}
128
129#[must_use]
134pub fn parse_names(input: &str) -> Vec<PersonName> {
135 split_bibtex_names(input)
136 .into_iter()
137 .map(parse_single_name)
138 .filter(|name| !name.is_empty())
139 .collect()
140}
141
142#[derive(Debug, Clone, PartialEq)]
144pub struct Entry<'a> {
145 pub ty: EntryType<'a>,
147 pub key: Cow<'a, str>,
149 pub fields: Vec<Field<'a>>,
151}
152
153impl<'a> Entry<'a> {
154 #[must_use]
156 pub const fn new(ty: EntryType<'a>, key: &'a str) -> Self {
157 Self {
158 ty,
159 key: Cow::Borrowed(key),
160 fields: Vec::new(),
161 }
162 }
163
164 #[must_use]
166 pub const fn entry_type(&self) -> &EntryType<'a> {
167 &self.ty
168 }
169
170 #[must_use]
172 pub fn key(&self) -> &str {
173 &self.key
174 }
175
176 #[must_use]
178 pub fn field(&self, name: &str) -> Option<&Field<'a>> {
179 self.fields.iter().find(|f| f.name == name)
180 }
181
182 #[must_use]
184 pub fn field_ignore_case(&self, name: &str) -> Option<&Field<'a>> {
185 self.fields
186 .iter()
187 .find(|f| f.name.eq_ignore_ascii_case(name))
188 }
189
190 #[must_use]
193 pub fn get(&self, name: &str) -> Option<&str> {
194 self.field(name).and_then(|f| f.value.as_str())
195 }
196
197 #[must_use]
201 pub fn get_ignore_case(&self, name: &str) -> Option<&str> {
202 self.field_ignore_case(name).and_then(|f| f.value.as_str())
203 }
204
205 #[must_use]
207 pub fn get_as_string(&self, name: &str) -> Option<String> {
208 self.field(name).map(|f| value_to_lossy_string(&f.value))
209 }
210
211 #[must_use]
213 pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
214 self.field_ignore_case(name)
215 .map(|f| value_to_lossy_string(&f.value))
216 }
217
218 #[must_use]
220 pub fn get_any_ignore_case(&self, names: &[&str]) -> Option<&str> {
221 names.iter().find_map(|name| self.get_ignore_case(name))
222 }
223
224 #[must_use]
226 pub fn get_any_as_string_ignore_case(&self, names: &[&str]) -> Option<String> {
227 names
228 .iter()
229 .find_map(|name| self.get_as_string_ignore_case(name))
230 }
231
232 #[must_use]
234 pub fn has_field(&self, name: &str) -> bool {
235 self.field_ignore_case(name).is_some()
236 }
237
238 #[must_use]
240 pub fn has_any_field(&self, names: &[&str]) -> bool {
241 names.iter().any(|name| self.has_field(name))
242 }
243
244 #[must_use]
249 pub fn doi(&self) -> Option<String> {
250 self.get_as_string_ignore_case("doi")
251 .and_then(|doi| normalize_doi(&doi))
252 }
253
254 #[must_use]
256 pub fn authors(&self) -> Vec<PersonName> {
257 self.get_as_string_ignore_case("author")
258 .map_or_else(Vec::new, |authors| parse_names(&authors))
259 }
260
261 #[must_use]
263 pub fn editors(&self) -> Vec<PersonName> {
264 self.get_as_string_ignore_case("editor")
265 .map_or_else(Vec::new, |editors| parse_names(&editors))
266 }
267
268 #[must_use]
270 pub fn fields(&self) -> &[Field<'a>] {
271 &self.fields
272 }
273
274 pub fn add_field(&mut self, field: Field<'a>) {
276 self.fields.push(field);
277 }
278
279 pub fn set(&mut self, name: &'a str, value: Value<'a>) {
281 if let Some(field) = self.fields.iter_mut().find(|field| field.name == name) {
282 field.value = value;
283 } else {
284 self.fields.push(Field::new(name, value));
285 }
286 }
287
288 pub fn set_literal(&mut self, name: &'a str, value: &'a str) {
290 self.set(name, Value::Literal(Cow::Borrowed(value)));
291 }
292
293 pub fn remove(&mut self, name: &str) -> Vec<Field<'a>> {
295 let mut removed = Vec::new();
296 let mut index = 0;
297 while index < self.fields.len() {
298 if self.fields[index].name == name {
299 removed.push(self.fields.remove(index));
300 } else {
301 index += 1;
302 }
303 }
304 removed
305 }
306
307 pub fn rename_field(&mut self, old: &str, new: &'a str) -> usize {
309 let mut renamed = 0;
310 for field in &mut self.fields {
311 if field.name == old {
312 field.name = Cow::Borrowed(new);
313 renamed += 1;
314 }
315 }
316 renamed
317 }
318
319 #[must_use]
321 pub fn title(&self) -> Option<String> {
322 self.get_any_as_string_ignore_case(&["title"])
323 }
324
325 #[must_use]
327 pub fn year(&self) -> Option<String> {
328 self.get_any_as_string_ignore_case(&["year"])
329 }
330
331 #[must_use]
333 pub fn date(&self) -> Option<String> {
334 self.get_any_as_string_ignore_case(&["date"])
335 }
336
337 #[must_use]
339 pub fn journal(&self) -> Option<String> {
340 self.get_any_as_string_ignore_case(&["journal", "journaltitle"])
341 }
342
343 #[must_use]
345 pub fn booktitle(&self) -> Option<String> {
346 self.get_any_as_string_ignore_case(&["booktitle"])
347 }
348
349 #[must_use]
351 pub fn url(&self) -> Option<String> {
352 self.get_any_as_string_ignore_case(&["url"])
353 }
354
355 #[must_use]
357 pub fn keywords(&self) -> Vec<String> {
358 self.get_any_as_string_ignore_case(&["keywords", "keyword"])
359 .map(|keywords| {
360 keywords
361 .split([',', ';'])
362 .map(str::trim)
363 .filter(|keyword| !keyword.is_empty())
364 .map(ToOwned::to_owned)
365 .collect()
366 })
367 .unwrap_or_default()
368 }
369
370 pub fn validate(&self, level: ValidationLevel) -> Result<(), Vec<ValidationError>> {
373 let mut errors = Vec::new();
374
375 self.validate_required_fields(&mut errors);
377
378 match level {
379 ValidationLevel::Minimal => {
380 }
382 ValidationLevel::Standard => {
383 self.validate_common_issues(&mut errors);
385 }
386 ValidationLevel::Strict => {
387 self.validate_common_issues(&mut errors);
389 self.validate_field_formats(&mut errors);
390 self.validate_cross_references(&mut errors);
391 }
392 }
393
394 if errors.is_empty() {
395 Ok(())
396 } else {
397 Err(errors)
398 }
399 }
400
401 fn validate_required_fields(&self, errors: &mut Vec<ValidationError>) {
403 for &field_group in self.ty.required_field_groups() {
404 if self.has_any_field(field_group) {
405 continue;
406 }
407
408 if field_group == ["author", "editor"] {
409 errors.push(ValidationError::error(
410 None,
411 format!(
412 "{} entry must have either 'author' or 'editor' field",
413 self.ty
414 ),
415 ));
416 continue;
417 }
418
419 let primary_field = field_group[0];
420 let message = if field_group.len() == 1 {
421 format!(
422 "Required field '{}' is missing for {} entry",
423 primary_field, self.ty
424 )
425 } else {
426 format!(
427 "Required field '{}' is missing for {} entry (accepted aliases: {})",
428 primary_field,
429 self.ty,
430 field_group.join(", ")
431 )
432 };
433
434 errors.push(ValidationError::error(Some(primary_field), message));
435 }
436 }
437
438 fn validate_common_issues(&self, errors: &mut Vec<ValidationError>) {
440 if let Some(year_str) = self.get_any_as_string_ignore_case(&["year", "date"]) {
444 if let Ok(year) = year_str.parse::<i32>() {
445 if !(1000..=2100).contains(&year) {
446 errors.push(ValidationError::warning(
447 Some(if self.has_field("year") {
448 "year"
449 } else {
450 "date"
451 }),
452 format!("Year {year} seems unlikely"),
453 ));
454 }
455 } else {
456 errors.push(ValidationError::warning(
457 Some(if self.has_field("year") {
458 "year"
459 } else {
460 "date"
461 }),
462 "Year/date should be a number",
463 ));
464 }
465 }
466
467 if let Some(pages) = self.get_ignore_case("pages") {
469 if !is_valid_page_range(pages) {
470 errors.push(ValidationError::warning(
471 Some("pages"),
472 "Pages should be in format '12-34' or '12--34'",
473 ));
474 }
475 }
476
477 match self.ty {
479 EntryType::InBook | EntryType::InProceedings | EntryType::InCollection => {
480 if !self.has_any_field(&["author", "editor"]) {
481 errors.push(ValidationError::warning(
482 None,
483 "Entry should have either 'author' or 'editor' field",
484 ));
485 }
486 }
487 _ => {}
488 }
489
490 for field in &self.fields {
492 if let Some(value_str) = field.value.as_str() {
493 if value_str.trim().is_empty() {
494 errors.push(ValidationError::warning(
495 Some(&field.name),
496 "Field has empty value",
497 ));
498 }
499 }
500 }
501 }
502
503 fn validate_field_formats(&self, errors: &mut Vec<ValidationError>) {
505 if let Some(doi) = self.get_as_string_ignore_case("doi") {
507 if normalize_doi(&doi).is_none() {
508 errors.push(ValidationError::warning(
509 Some("doi"),
510 "DOI should start with '10.' or a DOI URL/prefix",
511 ));
512 }
513 }
514
515 if let Some(url) = self.get_ignore_case("url") {
517 if !url.starts_with("http://") && !url.starts_with("https://") {
518 errors.push(ValidationError::warning(
519 Some("url"),
520 "URL should start with http:// or https://",
521 ));
522 }
523 }
524
525 if let Some(isbn) = self.get_ignore_case("isbn") {
527 if !is_valid_isbn_shape(isbn) {
528 errors.push(ValidationError::warning(
529 Some("isbn"),
530 "ISBN should have 10 or 13 digits",
531 ));
532 }
533 }
534
535 if let Some(month) = self.get_ignore_case("month") {
537 if !is_valid_month(month) {
538 errors.push(ValidationError::info(
539 Some("month"),
540 "Month should be a standard abbreviation (jan, feb, etc.) or full name",
541 ));
542 }
543 }
544
545 for field_name in &["volume", "number"] {
547 if let Some(value) = self.get_ignore_case(field_name) {
548 if value.parse::<i32>().is_err() && !value.contains('-') {
549 errors.push(ValidationError::info(
550 Some(field_name),
551 format!("{field_name} should typically be numeric"),
552 ));
553 }
554 }
555 }
556 }
557
558 fn validate_cross_references(&self, errors: &mut Vec<ValidationError>) {
560 if let Some(crossref) = self.get_ignore_case("crossref") {
561 if crossref.trim().is_empty() {
562 errors.push(ValidationError::error(
563 Some("crossref"),
564 "Cross-reference is empty",
565 ));
566 }
567 }
568 }
569
570 #[must_use]
572 pub fn is_valid(&self) -> bool {
573 self.validate(ValidationLevel::Minimal).is_ok()
574 }
575
576 #[cfg(feature = "latex_to_unicode")]
594 #[must_use]
595 pub fn get_unicode(&self, name: &str) -> Option<String> {
596 self.get(name).map(crate::latex_unicode::latex_to_unicode)
597 }
598
599 #[cfg(feature = "latex_to_unicode")]
618 #[must_use]
619 pub fn get_unicode_ignore_case(&self, name: &str) -> Option<String> {
620 self.get_ignore_case(name)
621 .map(crate::latex_unicode::latex_to_unicode)
622 }
623
624 #[cfg(feature = "latex_to_unicode")]
629 #[must_use]
630 pub fn get_as_unicode_string(&self, name: &str) -> Option<String> {
631 self.get_as_string(name)
632 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
633 }
634
635 #[cfg(feature = "latex_to_unicode")]
640 #[must_use]
641 pub fn get_as_unicode_string_ignore_case(&self, name: &str) -> Option<String> {
642 self.get_as_string_ignore_case(name)
643 .map(|s| crate::latex_unicode::latex_to_unicode(&s))
644 }
645
646 #[cfg(feature = "latex_to_unicode")]
674 #[must_use]
675 pub fn fields_unicode(&self) -> Vec<(String, String)> {
676 self.fields
677 .iter()
678 .filter_map(|f| {
679 f.value.as_str().map(|s| {
680 (
681 f.name.to_string(),
682 crate::latex_unicode::latex_to_unicode(s),
683 )
684 })
685 })
686 .collect()
687 }
688
689 #[must_use]
691 pub fn into_owned(self) -> Entry<'static> {
692 Entry {
693 ty: self.ty.into_owned(),
694 key: Cow::Owned(self.key.into_owned()),
695 fields: self.fields.into_iter().map(Field::into_owned).collect(),
696 }
697 }
698}
699
700#[derive(Debug, Clone, PartialEq, Eq, Hash)]
702pub enum EntryType<'a> {
703 Article,
705 Book,
707 Booklet,
709 MvBook,
711 InBook,
713 BookInBook,
715 SuppBook,
717 Collection,
719 MvCollection,
721 InCollection,
723 SuppCollection,
725 InProceedings,
727 Proceedings,
729 MvProceedings,
731 Reference,
733 InReference,
735 Manual,
737 MastersThesis,
739 PhdThesis,
741 Thesis,
743 TechReport,
745 Report,
747 Patent,
749 Periodical,
751 Online,
753 Software,
755 Dataset,
757 Set,
759 XData,
761 Unpublished,
763 Misc,
765 Custom(Cow<'a, str>),
767}
768
769impl<'a> EntryType<'a> {
770 #[must_use]
772 #[inline(never)]
773 pub fn parse(s: &'a str) -> Self {
774 let bytes = s.as_bytes();
775 if bytes.is_empty() {
776 return Self::Custom(Cow::Borrowed(s));
777 }
778
779 match (bytes.len(), ascii_lower(bytes[0])) {
780 (3, b's') if eq_ascii_lower(bytes, b"set") => Self::Set,
781 (4, b'b') if eq_ascii_lower(bytes, b"book") => Self::Book,
782 (4, b'm') if eq_ascii_lower(bytes, b"misc") => Self::Misc,
783 (6, b'i') if eq_ascii_lower(bytes, b"inbook") => Self::InBook,
784 (6, b'm') if eq_ascii_lower(bytes, b"manual") => Self::Manual,
785 (6, b'm') if eq_ascii_lower(bytes, b"mvbook") => Self::MvBook,
786 (6, b'o') if eq_ascii_lower(bytes, b"online") => Self::Online,
787 (6, b'p') if eq_ascii_lower(bytes, b"patent") => Self::Patent,
788 (6, b'r') if eq_ascii_lower(bytes, b"report") => Self::Report,
789 (6, b't') if eq_ascii_lower(bytes, b"thesis") => Self::Thesis,
790 (7, b'a') if eq_ascii_lower(bytes, b"article") => Self::Article,
791 (7, b'b') if eq_ascii_lower(bytes, b"booklet") => Self::Booklet,
792 (7, b'd') if eq_ascii_lower(bytes, b"dataset") => Self::Dataset,
793 (8, b's') if eq_ascii_lower(bytes, b"software") => Self::Software,
794 (8, b's') if eq_ascii_lower(bytes, b"suppbook") => Self::SuppBook,
795 (9, b'r') if eq_ascii_lower(bytes, b"reference") => Self::Reference,
796 (9, b'p') if eq_ascii_lower(bytes, b"phdthesis") => Self::PhdThesis,
797 (10, b'b') if eq_ascii_lower(bytes, b"bookinbook") => Self::BookInBook,
798 (10, b'c') if eq_ascii_lower(bytes, b"conference") => Self::InProceedings,
799 (10, b'c') if eq_ascii_lower(bytes, b"collection") => Self::Collection,
800 (10, b'p') if eq_ascii_lower(bytes, b"periodical") => Self::Periodical,
801 (10, b't') if eq_ascii_lower(bytes, b"techreport") => Self::TechReport,
802 (11, b'i') if eq_ascii_lower(bytes, b"inreference") => Self::InReference,
803 (11, b'p') if eq_ascii_lower(bytes, b"proceedings") => Self::Proceedings,
804 (11, b'u') if eq_ascii_lower(bytes, b"unpublished") => Self::Unpublished,
805 (12, b'i') if eq_ascii_lower(bytes, b"incollection") => Self::InCollection,
806 (12, b'm') if eq_ascii_lower(bytes, b"mvcollection") => Self::MvCollection,
807 (13, b'i') if eq_ascii_lower(bytes, b"inproceedings") => Self::InProceedings,
808 (13, b'm') if eq_ascii_lower(bytes, b"mastersthesis") => Self::MastersThesis,
809 (13, b'm') if eq_ascii_lower(bytes, b"mvproceedings") => Self::MvProceedings,
810 (14, b's') if eq_ascii_lower(bytes, b"suppcollection") => Self::SuppCollection,
811 (5, b'x') if eq_ascii_lower(bytes, b"xdata") => Self::XData,
812 _ => Self::Custom(Cow::Borrowed(s)),
813 }
814 }
815
816 #[must_use]
818 pub const fn required_fields(&self) -> &'static [&'static str] {
819 match self {
820 Self::Article => &["author", "title", "journal", "year"],
821 Self::Book | Self::MvBook => &["author", "title", "publisher", "year"],
822 Self::Booklet | Self::Manual => &["title"],
823 Self::InBook | Self::BookInBook | Self::SuppBook => {
824 &["author", "title", "chapter", "publisher", "year"]
825 }
826 Self::Collection | Self::MvCollection | Self::Reference => {
827 &["editor", "title", "publisher", "year"]
828 }
829 Self::InCollection | Self::SuppCollection | Self::InReference => {
830 &["author", "title", "booktitle", "publisher", "year"]
831 }
832 Self::InProceedings => &["author", "title", "booktitle", "year"],
833 Self::Proceedings | Self::MvProceedings | Self::Periodical => &["title", "year"],
834 Self::MastersThesis | Self::PhdThesis | Self::Thesis => {
835 &["author", "title", "school", "year"]
836 }
837 Self::TechReport => &["author", "title", "institution", "year"],
838 Self::Report => &["author", "title", "type", "institution", "year"],
839 Self::Patent => &["author", "title", "number", "year"],
840 Self::Online => &["title", "url"],
841 Self::Software | Self::Dataset => &["author", "title", "year"],
842 Self::Unpublished => &["author", "title", "note"],
843 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
844 }
845 }
846
847 #[must_use]
852 pub const fn required_field_groups(&self) -> &'static [&'static [&'static str]] {
853 match self {
854 Self::Article => &[
855 &["author"],
856 &["title"],
857 &["journal", "journaltitle"],
858 &["year", "date"],
859 ],
860 Self::Book | Self::MvBook => &[
861 &["author", "editor"],
862 &["title"],
863 &["publisher"],
864 &["year", "date"],
865 ],
866 Self::Booklet | Self::Manual => &[&["title"]],
867 Self::InBook | Self::BookInBook | Self::SuppBook => &[
868 &["author", "editor"],
869 &["title"],
870 &["chapter", "pages"],
871 &["publisher"],
872 &["year", "date"],
873 ],
874 Self::Collection | Self::MvCollection | Self::Reference => &[
875 &["editor", "author"],
876 &["title"],
877 &["publisher"],
878 &["year", "date"],
879 ],
880 Self::InCollection | Self::SuppCollection | Self::InReference => &[
881 &["author", "editor"],
882 &["title"],
883 &["booktitle"],
884 &["publisher"],
885 &["year", "date"],
886 ],
887 Self::InProceedings => &[
888 &["author", "editor"],
889 &["title"],
890 &["booktitle"],
891 &["year", "date"],
892 ],
893 Self::Proceedings | Self::MvProceedings | Self::Periodical => {
894 &[&["title"], &["year", "date"]]
895 }
896 Self::MastersThesis | Self::PhdThesis | Self::Thesis => &[
897 &["author"],
898 &["title"],
899 &["school", "institution"],
900 &["year", "date"],
901 ],
902 Self::TechReport => &[&["author"], &["title"], &["institution"], &["year", "date"]],
903 Self::Report => &[
904 &["author", "editor"],
905 &["title"],
906 &["type"],
907 &["institution"],
908 &["year", "date"],
909 ],
910 Self::Patent => &[&["author"], &["title"], &["number"], &["year", "date"]],
911 Self::Online => &[&["title"], &["url", "doi"], &["year", "date", "urldate"]],
912 Self::Software | Self::Dataset => &[
913 &["author", "editor"],
914 &["title"],
915 &["year", "date", "version"],
916 ],
917 Self::Unpublished => &[&["author"], &["title"], &["note"]],
918 Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
919 }
920 }
921
922 #[must_use]
924 pub fn canonical_name(&self) -> &str {
925 match self {
926 Self::Article => "article",
927 Self::Book => "book",
928 Self::Booklet => "booklet",
929 Self::MvBook => "mvbook",
930 Self::InBook => "inbook",
931 Self::BookInBook => "bookinbook",
932 Self::SuppBook => "suppbook",
933 Self::Collection => "collection",
934 Self::MvCollection => "mvcollection",
935 Self::InCollection => "incollection",
936 Self::SuppCollection => "suppcollection",
937 Self::InProceedings => "inproceedings",
938 Self::Proceedings => "proceedings",
939 Self::MvProceedings => "mvproceedings",
940 Self::Reference => "reference",
941 Self::InReference => "inreference",
942 Self::Manual => "manual",
943 Self::MastersThesis => "mastersthesis",
944 Self::PhdThesis => "phdthesis",
945 Self::Thesis => "thesis",
946 Self::TechReport => "techreport",
947 Self::Report => "report",
948 Self::Patent => "patent",
949 Self::Periodical => "periodical",
950 Self::Online => "online",
951 Self::Software => "software",
952 Self::Dataset => "dataset",
953 Self::Set => "set",
954 Self::XData => "xdata",
955 Self::Unpublished => "unpublished",
956 Self::Misc => "misc",
957 Self::Custom(s) => s,
958 }
959 }
960
961 #[must_use]
963 pub const fn aliases(&self) -> &'static [&'static str] {
964 match self {
965 Self::InProceedings => &["conference"],
966 Self::TechReport => &["techreport"],
967 Self::MastersThesis => &["mastersthesis"],
968 Self::PhdThesis => &["phdthesis"],
969 _ => &[],
970 }
971 }
972
973 #[must_use]
975 pub const fn is_classic_bibtex(&self) -> bool {
976 matches!(
977 self,
978 Self::Article
979 | Self::Book
980 | Self::Booklet
981 | Self::InBook
982 | Self::InCollection
983 | Self::InProceedings
984 | Self::Manual
985 | Self::MastersThesis
986 | Self::PhdThesis
987 | Self::Proceedings
988 | Self::TechReport
989 | Self::Unpublished
990 | Self::Misc
991 )
992 }
993
994 #[must_use]
996 pub const fn is_extended(&self) -> bool {
997 !self.is_classic_bibtex() && !matches!(self, Self::Custom(_))
998 }
999
1000 #[must_use]
1002 pub fn into_owned(self) -> EntryType<'static> {
1003 match self {
1004 Self::Custom(s) => EntryType::Custom(Cow::Owned(s.into_owned())),
1005 Self::Article => EntryType::Article,
1006 Self::Book => EntryType::Book,
1007 Self::Booklet => EntryType::Booklet,
1008 Self::MvBook => EntryType::MvBook,
1009 Self::InBook => EntryType::InBook,
1010 Self::BookInBook => EntryType::BookInBook,
1011 Self::SuppBook => EntryType::SuppBook,
1012 Self::Collection => EntryType::Collection,
1013 Self::MvCollection => EntryType::MvCollection,
1014 Self::InCollection => EntryType::InCollection,
1015 Self::SuppCollection => EntryType::SuppCollection,
1016 Self::InProceedings => EntryType::InProceedings,
1017 Self::Proceedings => EntryType::Proceedings,
1018 Self::MvProceedings => EntryType::MvProceedings,
1019 Self::Reference => EntryType::Reference,
1020 Self::InReference => EntryType::InReference,
1021 Self::Manual => EntryType::Manual,
1022 Self::MastersThesis => EntryType::MastersThesis,
1023 Self::PhdThesis => EntryType::PhdThesis,
1024 Self::Thesis => EntryType::Thesis,
1025 Self::TechReport => EntryType::TechReport,
1026 Self::Report => EntryType::Report,
1027 Self::Patent => EntryType::Patent,
1028 Self::Periodical => EntryType::Periodical,
1029 Self::Online => EntryType::Online,
1030 Self::Software => EntryType::Software,
1031 Self::Dataset => EntryType::Dataset,
1032 Self::Set => EntryType::Set,
1033 Self::XData => EntryType::XData,
1034 Self::Unpublished => EntryType::Unpublished,
1035 Self::Misc => EntryType::Misc,
1036 }
1037 }
1038}
1039
1040#[inline]
1041const fn ascii_lower(byte: u8) -> u8 {
1042 if b'A' <= byte && byte <= b'Z' {
1043 byte + (b'a' - b'A')
1044 } else {
1045 byte
1046 }
1047}
1048
1049#[inline]
1050fn eq_ascii_lower(input: &[u8], expected: &[u8]) -> bool {
1051 if input.len() != expected.len() {
1052 return false;
1053 }
1054
1055 let mut index = 0usize;
1056 while index < input.len() {
1057 if ascii_lower(input[index]) != expected[index] {
1058 return false;
1059 }
1060 index += 1;
1061 }
1062
1063 true
1064}
1065
1066impl fmt::Display for EntryType<'_> {
1067 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1068 f.write_str(self.canonical_name())
1069 }
1070}
1071
1072#[derive(Debug, Clone, PartialEq)]
1074pub struct Field<'a> {
1075 pub name: Cow<'a, str>,
1077 pub value: Value<'a>,
1079}
1080
1081impl<'a> Field<'a> {
1082 #[must_use]
1084 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1085 Self {
1086 name: Cow::Borrowed(name),
1087 value,
1088 }
1089 }
1090
1091 #[must_use]
1093 pub fn name_eq_ignore_case(&self, name: &str) -> bool {
1094 self.name.eq_ignore_ascii_case(name)
1095 }
1096
1097 #[must_use]
1099 pub fn into_owned(self) -> Field<'static> {
1100 Field {
1101 name: Cow::Owned(self.name.into_owned()),
1102 value: self.value.into_owned(),
1103 }
1104 }
1105}
1106
1107#[derive(Debug, Clone, PartialEq)]
1113pub enum Value<'a> {
1114 Literal(Cow<'a, str>),
1116 Number(i64),
1118 Concat(Box<[Self]>),
1120 Variable(Cow<'a, str>),
1122}
1123
1124impl Default for Value<'_> {
1125 fn default() -> Self {
1126 Self::Number(0)
1127 }
1128}
1129
1130impl Value<'_> {
1131 #[must_use]
1133 pub fn as_str(&self) -> Option<&str> {
1134 match self {
1135 Self::Literal(s) => Some(s),
1136 _ => None,
1137 }
1138 }
1139
1140 #[must_use]
1142 pub fn expand(&self, strings: &AHashMap<&str, Value>) -> String {
1143 match self {
1144 Self::Literal(s) => s.to_string(),
1145 Self::Number(n) => n.to_string(),
1146 Self::Variable(name) => strings
1147 .get(name.as_ref())
1148 .map_or_else(|| format!("{{undefined:{name}}}"), |v| v.expand(strings)),
1149 Self::Concat(parts) => parts.iter().map(|p| p.expand(strings)).collect::<String>(),
1150 }
1151 }
1152
1153 #[must_use]
1155 pub fn into_owned(self) -> Value<'static> {
1156 match self {
1157 Self::Literal(s) => Value::Literal(Cow::Owned(s.into_owned())),
1158 Self::Number(n) => Value::Number(n),
1159 Self::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
1160 Self::Concat(parts) => Value::Concat(
1161 parts
1162 .into_vec()
1163 .into_iter()
1164 .map(Value::into_owned)
1165 .collect::<Vec<_>>()
1166 .into_boxed_slice(),
1167 ),
1168 }
1169 }
1170}
1171
1172impl fmt::Display for Value<'_> {
1173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1174 match self {
1175 Self::Literal(s) => write!(f, "{s}"),
1176 Self::Number(n) => write!(f, "{n}"),
1177 Self::Variable(name) => write!(f, "{{{name}}}"),
1178 Self::Concat(parts) => {
1179 for (i, part) in parts.iter().enumerate() {
1180 if i > 0 {
1181 write!(f, " # ")?;
1182 }
1183 write!(f, "{part}")?;
1184 }
1185 Ok(())
1186 }
1187 }
1188 }
1189}
1190
1191fn value_to_lossy_string(value: &Value<'_>) -> String {
1192 match value {
1193 Value::Literal(s) => s.to_string(),
1194 Value::Number(n) => n.to_string(),
1195 Value::Variable(v) => format!("{{{v}}}"),
1196 Value::Concat(parts) => parts.iter().map(value_to_lossy_string).collect(),
1197 }
1198}
1199
1200#[must_use]
1202pub fn normalize_doi(input: &str) -> Option<String> {
1203 let mut doi = input.trim();
1204 if doi.is_empty() {
1205 return None;
1206 }
1207
1208 for prefix in [
1209 "https://doi.org/",
1210 "http://doi.org/",
1211 "https://dx.doi.org/",
1212 "http://dx.doi.org/",
1213 "doi:",
1214 "DOI:",
1215 ] {
1216 if let Some(stripped) = doi.strip_prefix(prefix) {
1217 doi = stripped.trim();
1218 break;
1219 }
1220 }
1221
1222 let doi = doi.trim_end_matches(['.', ',', ';']);
1223 if doi.len() > 3 && doi.starts_with("10.") && doi.contains('/') {
1224 Some(doi.to_ascii_lowercase())
1225 } else {
1226 None
1227 }
1228}
1229
1230fn is_valid_isbn_shape(isbn: &str) -> bool {
1231 let compact: String = isbn.chars().filter(|c| !matches!(c, '-' | ' ')).collect();
1232
1233 match compact.len() {
1234 10 => compact
1235 .chars()
1236 .enumerate()
1237 .all(|(index, ch)| ch.is_ascii_digit() || (index == 9 && matches!(ch, 'x' | 'X'))),
1238 13 => compact.chars().all(|ch| ch.is_ascii_digit()),
1239 _ => false,
1240 }
1241}
1242
1243fn split_bibtex_names(input: &str) -> Vec<&str> {
1244 let mut names = Vec::new();
1245 let mut start = 0;
1246 let mut depth = 0usize;
1247 let mut iter = input.char_indices().peekable();
1248
1249 while let Some((index, ch)) = iter.next() {
1250 match ch {
1251 '{' => depth += 1,
1252 '}' => depth = depth.saturating_sub(1),
1253 'a' | 'A' if depth == 0 && starts_name_separator(input, index) => {
1254 let candidate = input[start..index].trim();
1255 if !candidate.is_empty() {
1256 names.push(candidate);
1257 }
1258 start = index + 3;
1259 while input[start..]
1260 .chars()
1261 .next()
1262 .is_some_and(char::is_whitespace)
1263 {
1264 start += input[start..].chars().next().map_or(0, char::len_utf8);
1265 }
1266 while iter
1267 .peek()
1268 .is_some_and(|(_, next_ch)| next_ch.is_whitespace())
1269 {
1270 iter.next();
1271 }
1272 }
1273 _ => {}
1274 }
1275 }
1276
1277 let candidate = input[start..].trim();
1278 if !candidate.is_empty() {
1279 names.push(candidate);
1280 }
1281
1282 names
1283}
1284
1285fn starts_name_separator(input: &str, index: usize) -> bool {
1286 let tail = &input[index..];
1287 let Some(rest) = tail.get(..3) else {
1288 return false;
1289 };
1290 if !rest.eq_ignore_ascii_case("and") {
1291 return false;
1292 }
1293
1294 let before_is_boundary = input[..index]
1295 .chars()
1296 .next_back()
1297 .map_or(true, char::is_whitespace);
1298 let after_is_boundary = tail[3..].chars().next().map_or(true, char::is_whitespace);
1299
1300 before_is_boundary && after_is_boundary
1301}
1302
1303fn parse_single_name(input: &str) -> PersonName {
1304 let parts = split_top_level_commas(input);
1305 match parts.as_slice() {
1306 [last] => parse_first_von_last(last),
1307 [last, first] => {
1308 let (von, last) = split_von_last(last);
1309 PersonName {
1310 first: normalize_name_part(first),
1311 von,
1312 last,
1313 jr: String::new(),
1314 }
1315 }
1316 [last, jr, first, ..] => {
1317 let (von, last) = split_von_last(last);
1318 PersonName {
1319 first: normalize_name_part(first),
1320 von,
1321 last,
1322 jr: normalize_name_part(jr),
1323 }
1324 }
1325 [] => PersonName {
1326 first: String::new(),
1327 von: String::new(),
1328 last: String::new(),
1329 jr: String::new(),
1330 },
1331 }
1332}
1333
1334fn parse_first_von_last(input: &str) -> PersonName {
1335 let words = split_name_words(input);
1336 match words.len() {
1337 0 => PersonName {
1338 first: String::new(),
1339 von: String::new(),
1340 last: String::new(),
1341 jr: String::new(),
1342 },
1343 1 => PersonName {
1344 first: String::new(),
1345 von: String::new(),
1346 last: normalize_name_part(words[0]),
1347 jr: String::new(),
1348 },
1349 _ => {
1350 let von_start = words
1351 .iter()
1352 .position(|word| starts_with_lowercase_letter(word));
1353 let (first, von, last) = von_start.map_or_else(
1354 || {
1355 (
1356 join_name_words(&words[..words.len() - 1]),
1357 String::new(),
1358 normalize_name_part(words[words.len() - 1]),
1359 )
1360 },
1361 |von_start| {
1362 let last_start = words[von_start + 1..]
1363 .iter()
1364 .position(|word| !starts_with_lowercase_letter(word))
1365 .map_or(words.len() - 1, |offset| von_start + 1 + offset);
1366
1367 (
1368 join_name_words(&words[..von_start]),
1369 join_name_words(&words[von_start..last_start]),
1370 join_name_words(&words[last_start..]),
1371 )
1372 },
1373 );
1374
1375 PersonName {
1376 first,
1377 von,
1378 last,
1379 jr: String::new(),
1380 }
1381 }
1382 }
1383}
1384
1385fn split_von_last(input: &str) -> (String, String) {
1386 let words = split_name_words(input);
1387 if words.is_empty() {
1388 return (String::new(), String::new());
1389 }
1390
1391 if let Some(last_start) = words
1392 .iter()
1393 .rposition(|word| starts_with_lowercase_letter(word))
1394 {
1395 if last_start + 1 < words.len() {
1396 return (
1397 join_name_words(&words[..=last_start]),
1398 join_name_words(&words[last_start + 1..]),
1399 );
1400 }
1401 }
1402
1403 if words.len() == 1 {
1404 (String::new(), normalize_name_part(words[0]))
1405 } else {
1406 (
1407 join_name_words(&words[..words.len() - 1]),
1408 normalize_name_part(words[words.len() - 1]),
1409 )
1410 }
1411}
1412
1413fn split_top_level_commas(input: &str) -> Vec<&str> {
1414 let mut parts = Vec::new();
1415 let mut start = 0;
1416 let mut depth = 0usize;
1417
1418 for (index, ch) in input.char_indices() {
1419 match ch {
1420 '{' => depth += 1,
1421 '}' => depth = depth.saturating_sub(1),
1422 ',' if depth == 0 => {
1423 parts.push(input[start..index].trim());
1424 start = index + 1;
1425 }
1426 _ => {}
1427 }
1428 }
1429
1430 parts.push(input[start..].trim());
1431 parts
1432}
1433
1434fn split_name_words(input: &str) -> Vec<&str> {
1435 let mut words = Vec::new();
1436 let mut start = None;
1437 let mut depth = 0usize;
1438
1439 for (index, ch) in input.char_indices() {
1440 match ch {
1441 '{' => {
1442 depth += 1;
1443 start.get_or_insert(index);
1444 }
1445 '}' => {
1446 depth = depth.saturating_sub(1);
1447 }
1448 ch if ch.is_whitespace() && depth == 0 => {
1449 if let Some(word_start) = start.take() {
1450 words.push(input[word_start..index].trim());
1451 }
1452 }
1453 _ => {
1454 start.get_or_insert(index);
1455 }
1456 }
1457 }
1458
1459 if let Some(word_start) = start {
1460 words.push(input[word_start..].trim());
1461 }
1462
1463 words.into_iter().filter(|word| !word.is_empty()).collect()
1464}
1465
1466fn join_name_words(words: &[&str]) -> String {
1467 words
1468 .iter()
1469 .map(|word| normalize_name_part(word))
1470 .filter(|word| !word.is_empty())
1471 .collect::<Vec<_>>()
1472 .join(" ")
1473}
1474
1475fn normalize_name_part(input: &str) -> String {
1476 let trimmed = input.trim();
1477 if trimmed.len() >= 2 && trimmed.starts_with('{') && trimmed.ends_with('}') {
1478 trimmed[1..trimmed.len() - 1].trim().to_string()
1479 } else {
1480 trimmed.to_string()
1481 }
1482}
1483
1484fn starts_with_lowercase_letter(input: &str) -> bool {
1485 normalize_name_part(input)
1486 .chars()
1487 .find(|ch| ch.is_alphabetic())
1488 .is_some_and(char::is_lowercase)
1489}
1490
1491fn is_valid_page_range(pages: &str) -> bool {
1494 if pages.trim().is_empty() {
1495 return false;
1496 }
1497
1498 if pages.chars().all(|c| c.is_ascii_digit()) {
1500 return true;
1501 }
1502
1503 if !pages.contains('-') && !pages.contains(',') {
1505 return false;
1506 }
1507
1508 for range in pages.split(',') {
1510 let range = range.trim();
1511 if range.is_empty() {
1512 continue;
1513 }
1514
1515 if range.contains("--") {
1517 let parts: Vec<&str> = range.split("--").collect();
1519 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
1520 return false;
1521 }
1522 } else if range.contains('-') {
1523 let parts: Vec<&str> = range.split('-').collect();
1525 if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
1526 return false;
1527 }
1528 }
1529 }
1530
1531 true
1532}
1533
1534fn is_valid_month(month: &str) -> bool {
1537 let month_lower = month.to_lowercase();
1538
1539 matches!(
1541 month_lower.as_str(),
1542 "jan"
1543 | "feb"
1544 | "mar"
1545 | "apr"
1546 | "may"
1547 | "jun"
1548 | "jul"
1549 | "aug"
1550 | "sep"
1551 | "oct"
1552 | "nov"
1553 | "dec"
1554 | "january"
1555 | "february"
1556 | "march"
1557 | "april"
1558 | "june"
1559 | "july"
1560 | "august"
1561 | "september"
1562 | "october"
1563 | "november"
1564 | "december"
1565 ) || month.parse::<i32>().is_ok_and(|m| (1..=12).contains(&m))
1566}