Skip to main content

bibtex_parser/
model.rs

1//! Data models for BibTeX entries
2
3use ahash::AHashMap;
4use std::borrow::Cow;
5use std::fmt;
6
7/// Validation strictness level for BibTeX entries
8#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
9pub enum ValidationLevel {
10    /// Only check that required fields exist
11    Minimal,
12    /// Check required fields and common issues (default)
13    #[default]
14    Standard,
15    /// Strict validation including field formats and cross-references
16    Strict,
17}
18
19/// Represents a validation error for a BibTeX entry
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct ValidationError {
22    /// The field that failed validation (if applicable)
23    pub field: Option<String>,
24    /// Description of the validation failure
25    pub message: String,
26    /// Severity of the error
27    pub severity: ValidationSeverity,
28}
29
30/// Severity level for validation errors
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ValidationSeverity {
33    /// Must be fixed for valid BibTeX
34    Error,
35    /// Should be fixed but might work
36    Warning,
37    /// Informational, best practices
38    Info,
39}
40
41impl ValidationError {
42    /// Create a new error-level validation error
43    #[must_use]
44    pub fn error(field: Option<&str>, message: impl Into<String>) -> Self {
45        Self {
46            field: field.map(String::from),
47            message: message.into(),
48            severity: ValidationSeverity::Error,
49        }
50    }
51
52    /// Create a new warning-level validation error
53    #[must_use]
54    pub fn warning(field: Option<&str>, message: impl Into<String>) -> Self {
55        Self {
56            field: field.map(String::from),
57            message: message.into(),
58            severity: ValidationSeverity::Warning,
59        }
60    }
61
62    /// Create a new info-level validation error
63    #[must_use]
64    pub fn info(field: Option<&str>, message: impl Into<String>) -> Self {
65        Self {
66            field: field.map(String::from),
67            message: message.into(),
68            severity: ValidationSeverity::Info,
69        }
70    }
71}
72
73impl fmt::Display for ValidationError {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        let field = self.field.as_deref().unwrap_or("<entry>");
76        write!(f, "[{:?}] {}: {}", self.severity, field, self.message)
77    }
78}
79
80/// A structured BibTeX person name.
81///
82/// BibTeX supports the forms `First von Last`, `von Last, First`, and
83/// `von Last, Jr, First`. This type keeps those four logical parts separate
84/// while preserving the exact token text from the source value.
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct PersonName {
87    /// Given names and initials.
88    pub first: String,
89    /// Lowercase particles such as `von`, `van`, `de`, or `der`.
90    pub von: String,
91    /// Family name.
92    pub last: String,
93    /// Junior part such as `Jr.` in `Last, Jr., First`.
94    pub jr: String,
95}
96
97impl PersonName {
98    /// Return the display form used by most bibliography styles.
99    #[must_use]
100    pub fn display_name(&self) -> String {
101        let mut parts = Vec::new();
102        if !self.first.is_empty() {
103            parts.push(self.first.as_str());
104        }
105        if !self.von.is_empty() {
106            parts.push(self.von.as_str());
107        }
108        if !self.last.is_empty() {
109            parts.push(self.last.as_str());
110        }
111
112        let mut name = parts.join(" ");
113        if !self.jr.is_empty() {
114            if !name.is_empty() {
115                name.push_str(", ");
116            }
117            name.push_str(&self.jr);
118        }
119        name
120    }
121
122    /// Return `true` when every name component is empty.
123    #[must_use]
124    pub fn is_empty(&self) -> bool {
125        self.first.is_empty() && self.von.is_empty() && self.last.is_empty() && self.jr.is_empty()
126    }
127}
128
129/// Parse a BibTeX `author` or `editor` field into structured person names.
130///
131/// Splitting respects balanced braces, so organization names such as
132/// `{The Unicode Consortium}` and literal `and` inside braces stay intact.
133#[must_use]
134pub fn parse_names(input: &str) -> Vec<PersonName> {
135    split_bibtex_names(input)
136        .into_iter()
137        .map(parse_single_name)
138        .filter(|name| !name.is_empty())
139        .collect()
140}
141
142/// A BibTeX entry (article, book, etc.)
143#[derive(Debug, Clone, PartialEq)]
144pub struct Entry<'a> {
145    /// Entry type (article, book, inproceedings, etc.)
146    pub ty: EntryType<'a>,
147    /// Citation key
148    pub key: Cow<'a, str>,
149    /// Fields (author, title, year, etc.)
150    pub fields: Vec<Field<'a>>,
151}
152
153impl<'a> Entry<'a> {
154    /// Create a new entry
155    #[must_use]
156    pub const fn new(ty: EntryType<'a>, key: &'a str) -> Self {
157        Self {
158            ty,
159            key: Cow::Borrowed(key),
160            fields: Vec::new(),
161        }
162    }
163
164    /// Get the entry type
165    #[must_use]
166    pub const fn entry_type(&self) -> &EntryType<'a> {
167        &self.ty
168    }
169
170    /// Get the citation key
171    #[must_use]
172    pub fn key(&self) -> &str {
173        &self.key
174    }
175
176    /// Get a field by name (case-sensitive).
177    #[must_use]
178    pub fn field(&self, name: &str) -> Option<&Field<'a>> {
179        self.fields.iter().find(|f| f.name == name)
180    }
181
182    /// Get a field by name (case-insensitive).
183    #[must_use]
184    pub fn field_ignore_case(&self, name: &str) -> Option<&Field<'a>> {
185        self.fields
186            .iter()
187            .find(|f| f.name.eq_ignore_ascii_case(name))
188    }
189
190    /// Get a field value by name (case-sensitive)
191    /// Note: This only returns string literals, not numbers
192    #[must_use]
193    pub fn get(&self, name: &str) -> Option<&str> {
194        self.field(name).and_then(|f| f.value.as_str())
195    }
196
197    /// Get a field value by name (case-insensitive)
198    /// Returns the first field whose name matches ignoring case
199    /// Note: This only returns string literals, not numbers
200    #[must_use]
201    pub fn get_ignore_case(&self, name: &str) -> Option<&str> {
202        self.field_ignore_case(name).and_then(|f| f.value.as_str())
203    }
204
205    /// Get a field value as a string, converting numbers if necessary (case-sensitive)
206    #[must_use]
207    pub fn get_as_string(&self, name: &str) -> Option<String> {
208        self.field(name).map(|f| value_to_lossy_string(&f.value))
209    }
210
211    /// Get a field value as a string, converting numbers if necessary (case-insensitive)
212    #[must_use]
213    pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
214        self.field_ignore_case(name)
215            .map(|f| value_to_lossy_string(&f.value))
216    }
217
218    /// Get the first string-literal field matching any of the names, case-insensitively.
219    #[must_use]
220    pub fn get_any_ignore_case(&self, names: &[&str]) -> Option<&str> {
221        names.iter().find_map(|name| self.get_ignore_case(name))
222    }
223
224    /// Get the first field matching any of the names as a string, case-insensitively.
225    #[must_use]
226    pub fn get_any_as_string_ignore_case(&self, names: &[&str]) -> Option<String> {
227        names
228            .iter()
229            .find_map(|name| self.get_as_string_ignore_case(name))
230    }
231
232    /// Return `true` when a field exists, ignoring ASCII case.
233    #[must_use]
234    pub fn has_field(&self, name: &str) -> bool {
235        self.field_ignore_case(name).is_some()
236    }
237
238    /// Return `true` when any field in `names` exists, ignoring ASCII case.
239    #[must_use]
240    pub fn has_any_field(&self, names: &[&str]) -> bool {
241        names.iter().any(|name| self.has_field(name))
242    }
243
244    /// Return the normalized DOI, if the entry has a recognizable DOI field.
245    ///
246    /// This accepts common input forms such as `10.1000/xyz`,
247    /// `doi:10.1000/xyz`, and `https://doi.org/10.1000/xyz`.
248    #[must_use]
249    pub fn doi(&self) -> Option<String> {
250        self.get_as_string_ignore_case("doi")
251            .and_then(|doi| normalize_doi(&doi))
252    }
253
254    /// Parse the `author` field into structured BibTeX names.
255    #[must_use]
256    pub fn authors(&self) -> Vec<PersonName> {
257        self.get_as_string_ignore_case("author")
258            .map_or_else(Vec::new, |authors| parse_names(&authors))
259    }
260
261    /// Parse the `editor` field into structured BibTeX names.
262    #[must_use]
263    pub fn editors(&self) -> Vec<PersonName> {
264        self.get_as_string_ignore_case("editor")
265            .map_or_else(Vec::new, |editors| parse_names(&editors))
266    }
267
268    /// Get all fields
269    #[must_use]
270    pub fn fields(&self) -> &[Field<'a>] {
271        &self.fields
272    }
273
274    /// Add a field
275    pub fn add_field(&mut self, field: Field<'a>) {
276        self.fields.push(field);
277    }
278
279    /// Set a field value, replacing the first matching field or appending it.
280    pub fn set(&mut self, name: &'a str, value: Value<'a>) {
281        if let Some(field) = self.fields.iter_mut().find(|field| field.name == name) {
282            field.value = value;
283        } else {
284            self.fields.push(Field::new(name, value));
285        }
286    }
287
288    /// Set a field to a string literal.
289    pub fn set_literal(&mut self, name: &'a str, value: &'a str) {
290        self.set(name, Value::Literal(Cow::Borrowed(value)));
291    }
292
293    /// Remove all fields whose name matches exactly.
294    pub fn remove(&mut self, name: &str) -> Vec<Field<'a>> {
295        let mut removed = Vec::new();
296        let mut index = 0;
297        while index < self.fields.len() {
298            if self.fields[index].name == name {
299                removed.push(self.fields.remove(index));
300            } else {
301                index += 1;
302            }
303        }
304        removed
305    }
306
307    /// Rename all fields whose name matches exactly.
308    pub fn rename_field(&mut self, old: &str, new: &'a str) -> usize {
309        let mut renamed = 0;
310        for field in &mut self.fields {
311            if field.name == old {
312                field.name = Cow::Borrowed(new);
313                renamed += 1;
314            }
315        }
316        renamed
317    }
318
319    /// Return the title field as a string.
320    #[must_use]
321    pub fn title(&self) -> Option<String> {
322        self.get_any_as_string_ignore_case(&["title"])
323    }
324
325    /// Return the year field as a string.
326    #[must_use]
327    pub fn year(&self) -> Option<String> {
328        self.get_any_as_string_ignore_case(&["year"])
329    }
330
331    /// Return the date field as a string.
332    #[must_use]
333    pub fn date(&self) -> Option<String> {
334        self.get_any_as_string_ignore_case(&["date"])
335    }
336
337    /// Return the journal field, accepting BibLaTeX's `journaltitle` alias.
338    #[must_use]
339    pub fn journal(&self) -> Option<String> {
340        self.get_any_as_string_ignore_case(&["journal", "journaltitle"])
341    }
342
343    /// Return the book title field as a string.
344    #[must_use]
345    pub fn booktitle(&self) -> Option<String> {
346        self.get_any_as_string_ignore_case(&["booktitle"])
347    }
348
349    /// Return the URL field as a string.
350    #[must_use]
351    pub fn url(&self) -> Option<String> {
352        self.get_any_as_string_ignore_case(&["url"])
353    }
354
355    /// Return keywords split on commas or semicolons.
356    #[must_use]
357    pub fn keywords(&self) -> Vec<String> {
358        self.get_any_as_string_ignore_case(&["keywords", "keyword"])
359            .map(|keywords| {
360                keywords
361                    .split([',', ';'])
362                    .map(str::trim)
363                    .filter(|keyword| !keyword.is_empty())
364                    .map(ToOwned::to_owned)
365                    .collect()
366            })
367            .unwrap_or_default()
368    }
369
370    /// Validate the entry according to the specified level
371    /// Returns Ok(()) if valid, or Err with a list of validation errors
372    pub fn validate(&self, level: ValidationLevel) -> Result<(), Vec<ValidationError>> {
373        let mut errors = Vec::new();
374
375        // Always check required fields
376        self.validate_required_fields(&mut errors);
377
378        match level {
379            ValidationLevel::Minimal => {
380                // Only required fields
381            }
382            ValidationLevel::Standard => {
383                // Additional standard checks
384                self.validate_common_issues(&mut errors);
385            }
386            ValidationLevel::Strict => {
387                // All checks
388                self.validate_common_issues(&mut errors);
389                self.validate_field_formats(&mut errors);
390                self.validate_cross_references(&mut errors);
391            }
392        }
393
394        if errors.is_empty() {
395            Ok(())
396        } else {
397            Err(errors)
398        }
399    }
400
401    /// Validate required fields for the entry type
402    fn validate_required_fields(&self, errors: &mut Vec<ValidationError>) {
403        for &field_group in self.ty.required_field_groups() {
404            if self.has_any_field(field_group) {
405                continue;
406            }
407
408            if field_group == ["author", "editor"] {
409                errors.push(ValidationError::error(
410                    None,
411                    format!(
412                        "{} entry must have either 'author' or 'editor' field",
413                        self.ty
414                    ),
415                ));
416                continue;
417            }
418
419            let primary_field = field_group[0];
420            let message = if field_group.len() == 1 {
421                format!(
422                    "Required field '{}' is missing for {} entry",
423                    primary_field, self.ty
424                )
425            } else {
426                format!(
427                    "Required field '{}' is missing for {} entry (accepted aliases: {})",
428                    primary_field,
429                    self.ty,
430                    field_group.join(", ")
431                )
432            };
433
434            errors.push(ValidationError::error(Some(primary_field), message));
435        }
436    }
437
438    /// Validate common issues that might cause problems
439    fn validate_common_issues(&self, errors: &mut Vec<ValidationError>) {
440        // Check for common issues
441
442        // Year should be a valid number and recent
443        if let Some(year_str) = self.get_any_as_string_ignore_case(&["year", "date"]) {
444            if let Ok(year) = year_str.parse::<i32>() {
445                if !(1000..=2100).contains(&year) {
446                    errors.push(ValidationError::warning(
447                        Some(if self.has_field("year") {
448                            "year"
449                        } else {
450                            "date"
451                        }),
452                        format!("Year {year} seems unlikely"),
453                    ));
454                }
455            } else {
456                errors.push(ValidationError::warning(
457                    Some(if self.has_field("year") {
458                        "year"
459                    } else {
460                        "date"
461                    }),
462                    "Year/date should be a number",
463                ));
464            }
465        }
466
467        // Pages should have valid format (e.g., "12-24" or "12--24")
468        if let Some(pages) = self.get_ignore_case("pages") {
469            if !is_valid_page_range(pages) {
470                errors.push(ValidationError::warning(
471                    Some("pages"),
472                    "Pages should be in format '12-34' or '12--34'",
473                ));
474            }
475        }
476
477        // Author and editor shouldn't both be missing for some types (but not books, handled above)
478        match self.ty {
479            EntryType::InBook | EntryType::InProceedings | EntryType::InCollection => {
480                if !self.has_any_field(&["author", "editor"]) {
481                    errors.push(ValidationError::warning(
482                        None,
483                        "Entry should have either 'author' or 'editor' field",
484                    ));
485                }
486            }
487            _ => {}
488        }
489
490        // Check for empty fields
491        for field in &self.fields {
492            if let Some(value_str) = field.value.as_str() {
493                if value_str.trim().is_empty() {
494                    errors.push(ValidationError::warning(
495                        Some(&field.name),
496                        "Field has empty value",
497                    ));
498                }
499            }
500        }
501    }
502
503    /// Validate specific field formats for strict checking
504    fn validate_field_formats(&self, errors: &mut Vec<ValidationError>) {
505        // DOI format
506        if let Some(doi) = self.get_as_string_ignore_case("doi") {
507            if normalize_doi(&doi).is_none() {
508                errors.push(ValidationError::warning(
509                    Some("doi"),
510                    "DOI should start with '10.' or a DOI URL/prefix",
511                ));
512            }
513        }
514
515        // URL format
516        if let Some(url) = self.get_ignore_case("url") {
517            if !url.starts_with("http://") && !url.starts_with("https://") {
518                errors.push(ValidationError::warning(
519                    Some("url"),
520                    "URL should start with http:// or https://",
521                ));
522            }
523        }
524
525        // ISBN format (basic check)
526        if let Some(isbn) = self.get_ignore_case("isbn") {
527            if !is_valid_isbn_shape(isbn) {
528                errors.push(ValidationError::warning(
529                    Some("isbn"),
530                    "ISBN should have 10 or 13 digits",
531                ));
532            }
533        }
534
535        // Month should be valid
536        if let Some(month) = self.get_ignore_case("month") {
537            if !is_valid_month(month) {
538                errors.push(ValidationError::info(
539                    Some("month"),
540                    "Month should be a standard abbreviation (jan, feb, etc.) or full name",
541                ));
542            }
543        }
544
545        // Volume and number should be numeric if present
546        for field_name in &["volume", "number"] {
547            if let Some(value) = self.get_ignore_case(field_name) {
548                if value.parse::<i32>().is_err() && !value.contains('-') {
549                    errors.push(ValidationError::info(
550                        Some(field_name),
551                        format!("{field_name} should typically be numeric"),
552                    ));
553                }
554            }
555        }
556    }
557
558    /// Validate cross-references for strict checking
559    fn validate_cross_references(&self, errors: &mut Vec<ValidationError>) {
560        if let Some(crossref) = self.get_ignore_case("crossref") {
561            if crossref.trim().is_empty() {
562                errors.push(ValidationError::error(
563                    Some("crossref"),
564                    "Cross-reference is empty",
565                ));
566            }
567        }
568    }
569
570    /// Check if entry has all required fields for its type (backward compatible)
571    #[must_use]
572    pub fn is_valid(&self) -> bool {
573        self.validate(ValidationLevel::Minimal).is_ok()
574    }
575
576    /// Get a field value with LaTeX sequences converted to Unicode (case-sensitive)
577    ///
578    /// This method converts common LaTeX escape sequences like `\'e` to `é` and `\"{o}` to `ö`.
579    /// Returns `None` if the field doesn't exist or isn't a string literal.
580    ///
581    /// # Examples
582    ///
583    /// ```
584    /// # #[cfg(feature = "latex_to_unicode")]
585    /// # {
586    /// # use bibtex_parser::Library;
587    /// let bibtex = r#"@article{test, author = "Jos\'e Garc\'ia"}"#;
588    /// let library = Library::parser().parse(bibtex).unwrap();
589    /// let entry = &library.entries()[0];
590    /// assert_eq!(entry.get_unicode("author"), Some("José García".to_string()));
591    /// # }
592    /// ```
593    #[cfg(feature = "latex_to_unicode")]
594    #[must_use]
595    pub fn get_unicode(&self, name: &str) -> Option<String> {
596        self.get(name).map(crate::latex_unicode::latex_to_unicode)
597    }
598
599    /// Get a field value with LaTeX sequences converted to Unicode (case-insensitive)
600    ///
601    /// This method converts common LaTeX escape sequences like `\'e` to `é` and `\"{o}` to `ö`.
602    /// Returns `None` if the field doesn't exist or isn't a string literal.
603    /// Field name matching is case-insensitive.
604    ///
605    /// # Examples
606    ///
607    /// ```
608    /// # #[cfg(feature = "latex_to_unicode")]
609    /// # {
610    /// # use bibtex_parser::Library;
611    /// let bibtex = r#"@article{test, TITLE = "M\\\"uller's work"}"#;
612    /// let library = Library::parser().parse(bibtex).unwrap();
613    /// let entry = &library.entries()[0];
614    /// assert_eq!(entry.get_unicode_ignore_case("title"), Some("Müller's work".to_string()));
615    /// # }
616    /// ```
617    #[cfg(feature = "latex_to_unicode")]
618    #[must_use]
619    pub fn get_unicode_ignore_case(&self, name: &str) -> Option<String> {
620        self.get_ignore_case(name)
621            .map(crate::latex_unicode::latex_to_unicode)
622    }
623
624    /// Get a field value as string with LaTeX conversion (case-sensitive)
625    ///
626    /// Similar to `get_as_string()` but converts LaTeX sequences to Unicode.
627    /// This handles all field types (literals, numbers, variables, concatenations).
628    #[cfg(feature = "latex_to_unicode")]
629    #[must_use]
630    pub fn get_as_unicode_string(&self, name: &str) -> Option<String> {
631        self.get_as_string(name)
632            .map(|s| crate::latex_unicode::latex_to_unicode(&s))
633    }
634
635    /// Get a field value as string with LaTeX conversion (case-insensitive)
636    ///
637    /// Similar to `get_as_string_ignore_case()` but converts LaTeX sequences to Unicode.
638    /// This handles all field types (literals, numbers, variables, concatenations).
639    #[cfg(feature = "latex_to_unicode")]
640    #[must_use]
641    pub fn get_as_unicode_string_ignore_case(&self, name: &str) -> Option<String> {
642        self.get_as_string_ignore_case(name)
643            .map(|s| crate::latex_unicode::latex_to_unicode(&s))
644    }
645
646    /// Get all fields with LaTeX converted to Unicode
647    ///
648    /// Returns a vector of (`field_name`, `unicode_value`) pairs for all string literal fields.
649    /// Non-string fields (numbers, variables) are excluded.
650    ///
651    /// # Examples
652    ///
653    /// ```
654    /// # #[cfg(feature = "latex_to_unicode")]
655    /// # {
656    /// # use bibtex_parser::Library;
657    /// let bibtex = r#"@article{test,
658    ///     author = "Jos\'e Garc\'ia",
659    ///     title = "\\alpha and \\beta particles",
660    ///     year = 2024
661    /// }"#;
662    /// let library = Library::parser().parse(bibtex).unwrap();
663    /// let entry = &library.entries()[0];
664    /// let unicode_fields = entry.fields_unicode();
665    ///
666    /// let author = unicode_fields.iter()
667    ///     .find(|(k, _)| k == "author")
668    ///     .map(|(_, v)| v.as_str())
669    ///     .unwrap();
670    /// assert_eq!(author, "José García");
671    /// # }
672    /// ```
673    #[cfg(feature = "latex_to_unicode")]
674    #[must_use]
675    pub fn fields_unicode(&self) -> Vec<(String, String)> {
676        self.fields
677            .iter()
678            .filter_map(|f| {
679                f.value.as_str().map(|s| {
680                    (
681                        f.name.to_string(),
682                        crate::latex_unicode::latex_to_unicode(s),
683                    )
684                })
685            })
686            .collect()
687    }
688
689    /// Convert to owned version
690    #[must_use]
691    pub fn into_owned(self) -> Entry<'static> {
692        Entry {
693            ty: self.ty.into_owned(),
694            key: Cow::Owned(self.key.into_owned()),
695            fields: self.fields.into_iter().map(Field::into_owned).collect(),
696        }
697    }
698}
699
700/// BibTeX entry type
701#[derive(Debug, Clone, PartialEq, Eq, Hash)]
702pub enum EntryType<'a> {
703    /// Article from a journal
704    Article,
705    /// Book with publisher
706    Book,
707    /// Booklet without a named publisher
708    Booklet,
709    /// A multi-volume book (`biblatex`)
710    MvBook,
711    /// Part of a book
712    InBook,
713    /// A self-contained book part published as a book (`biblatex`)
714    BookInBook,
715    /// Supplemental material in a book (`biblatex`)
716    SuppBook,
717    /// A collection with its own title
718    Collection,
719    /// A multi-volume collection (`biblatex`)
720    MvCollection,
721    /// A contribution to a collection
722    InCollection,
723    /// Supplemental material in a collection (`biblatex`)
724    SuppCollection,
725    /// Article in conference proceedings
726    InProceedings,
727    /// Conference proceedings
728    Proceedings,
729    /// Multi-volume proceedings (`biblatex`)
730    MvProceedings,
731    /// A reference work (`biblatex`)
732    Reference,
733    /// A contribution to a reference work (`biblatex`)
734    InReference,
735    /// Technical documentation or manual
736    Manual,
737    /// Master's thesis
738    MastersThesis,
739    /// `PhD` thesis
740    PhdThesis,
741    /// Generic thesis (`biblatex`)
742    Thesis,
743    /// Technical report
744    TechReport,
745    /// Generic report (`biblatex`)
746    Report,
747    /// Patent or patent request (`biblatex`)
748    Patent,
749    /// Periodical issue (`biblatex`)
750    Periodical,
751    /// Online resource (`biblatex`)
752    Online,
753    /// Software artifact (`biblatex` and common repository exports)
754    Software,
755    /// Dataset artifact (`biblatex` and common repository exports)
756    Dataset,
757    /// Entry set (`biblatex`)
758    Set,
759    /// Reusable data-only entry (`biblatex`)
760    XData,
761    /// Unpublished work
762    Unpublished,
763    /// Miscellaneous
764    Misc,
765    /// Custom entry type
766    Custom(Cow<'a, str>),
767}
768
769impl<'a> EntryType<'a> {
770    /// Parse from string (case-insensitive)
771    #[must_use]
772    #[inline(never)]
773    pub fn parse(s: &'a str) -> Self {
774        let bytes = s.as_bytes();
775        if bytes.is_empty() {
776            return Self::Custom(Cow::Borrowed(s));
777        }
778
779        match (bytes.len(), ascii_lower(bytes[0])) {
780            (3, b's') if eq_ascii_lower(bytes, b"set") => Self::Set,
781            (4, b'b') if eq_ascii_lower(bytes, b"book") => Self::Book,
782            (4, b'm') if eq_ascii_lower(bytes, b"misc") => Self::Misc,
783            (6, b'i') if eq_ascii_lower(bytes, b"inbook") => Self::InBook,
784            (6, b'm') if eq_ascii_lower(bytes, b"manual") => Self::Manual,
785            (6, b'm') if eq_ascii_lower(bytes, b"mvbook") => Self::MvBook,
786            (6, b'o') if eq_ascii_lower(bytes, b"online") => Self::Online,
787            (6, b'p') if eq_ascii_lower(bytes, b"patent") => Self::Patent,
788            (6, b'r') if eq_ascii_lower(bytes, b"report") => Self::Report,
789            (6, b't') if eq_ascii_lower(bytes, b"thesis") => Self::Thesis,
790            (7, b'a') if eq_ascii_lower(bytes, b"article") => Self::Article,
791            (7, b'b') if eq_ascii_lower(bytes, b"booklet") => Self::Booklet,
792            (7, b'd') if eq_ascii_lower(bytes, b"dataset") => Self::Dataset,
793            (8, b's') if eq_ascii_lower(bytes, b"software") => Self::Software,
794            (8, b's') if eq_ascii_lower(bytes, b"suppbook") => Self::SuppBook,
795            (9, b'r') if eq_ascii_lower(bytes, b"reference") => Self::Reference,
796            (9, b'p') if eq_ascii_lower(bytes, b"phdthesis") => Self::PhdThesis,
797            (10, b'b') if eq_ascii_lower(bytes, b"bookinbook") => Self::BookInBook,
798            (10, b'c') if eq_ascii_lower(bytes, b"conference") => Self::InProceedings,
799            (10, b'c') if eq_ascii_lower(bytes, b"collection") => Self::Collection,
800            (10, b'p') if eq_ascii_lower(bytes, b"periodical") => Self::Periodical,
801            (10, b't') if eq_ascii_lower(bytes, b"techreport") => Self::TechReport,
802            (11, b'i') if eq_ascii_lower(bytes, b"inreference") => Self::InReference,
803            (11, b'p') if eq_ascii_lower(bytes, b"proceedings") => Self::Proceedings,
804            (11, b'u') if eq_ascii_lower(bytes, b"unpublished") => Self::Unpublished,
805            (12, b'i') if eq_ascii_lower(bytes, b"incollection") => Self::InCollection,
806            (12, b'm') if eq_ascii_lower(bytes, b"mvcollection") => Self::MvCollection,
807            (13, b'i') if eq_ascii_lower(bytes, b"inproceedings") => Self::InProceedings,
808            (13, b'm') if eq_ascii_lower(bytes, b"mastersthesis") => Self::MastersThesis,
809            (13, b'm') if eq_ascii_lower(bytes, b"mvproceedings") => Self::MvProceedings,
810            (14, b's') if eq_ascii_lower(bytes, b"suppcollection") => Self::SuppCollection,
811            (5, b'x') if eq_ascii_lower(bytes, b"xdata") => Self::XData,
812            _ => Self::Custom(Cow::Borrowed(s)),
813        }
814    }
815
816    /// Get required fields for this entry type
817    #[must_use]
818    pub const fn required_fields(&self) -> &'static [&'static str] {
819        match self {
820            Self::Article => &["author", "title", "journal", "year"],
821            Self::Book | Self::MvBook => &["author", "title", "publisher", "year"],
822            Self::Booklet | Self::Manual => &["title"],
823            Self::InBook | Self::BookInBook | Self::SuppBook => {
824                &["author", "title", "chapter", "publisher", "year"]
825            }
826            Self::Collection | Self::MvCollection | Self::Reference => {
827                &["editor", "title", "publisher", "year"]
828            }
829            Self::InCollection | Self::SuppCollection | Self::InReference => {
830                &["author", "title", "booktitle", "publisher", "year"]
831            }
832            Self::InProceedings => &["author", "title", "booktitle", "year"],
833            Self::Proceedings | Self::MvProceedings | Self::Periodical => &["title", "year"],
834            Self::MastersThesis | Self::PhdThesis | Self::Thesis => {
835                &["author", "title", "school", "year"]
836            }
837            Self::TechReport => &["author", "title", "institution", "year"],
838            Self::Report => &["author", "title", "type", "institution", "year"],
839            Self::Patent => &["author", "title", "number", "year"],
840            Self::Online => &["title", "url"],
841            Self::Software | Self::Dataset => &["author", "title", "year"],
842            Self::Unpublished => &["author", "title", "note"],
843            Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
844        }
845    }
846
847    /// Get required field groups for validation.
848    ///
849    /// Each inner group is an OR-list. For example, `["author", "editor"]`
850    /// means either field satisfies that requirement.
851    #[must_use]
852    pub const fn required_field_groups(&self) -> &'static [&'static [&'static str]] {
853        match self {
854            Self::Article => &[
855                &["author"],
856                &["title"],
857                &["journal", "journaltitle"],
858                &["year", "date"],
859            ],
860            Self::Book | Self::MvBook => &[
861                &["author", "editor"],
862                &["title"],
863                &["publisher"],
864                &["year", "date"],
865            ],
866            Self::Booklet | Self::Manual => &[&["title"]],
867            Self::InBook | Self::BookInBook | Self::SuppBook => &[
868                &["author", "editor"],
869                &["title"],
870                &["chapter", "pages"],
871                &["publisher"],
872                &["year", "date"],
873            ],
874            Self::Collection | Self::MvCollection | Self::Reference => &[
875                &["editor", "author"],
876                &["title"],
877                &["publisher"],
878                &["year", "date"],
879            ],
880            Self::InCollection | Self::SuppCollection | Self::InReference => &[
881                &["author", "editor"],
882                &["title"],
883                &["booktitle"],
884                &["publisher"],
885                &["year", "date"],
886            ],
887            Self::InProceedings => &[
888                &["author", "editor"],
889                &["title"],
890                &["booktitle"],
891                &["year", "date"],
892            ],
893            Self::Proceedings | Self::MvProceedings | Self::Periodical => {
894                &[&["title"], &["year", "date"]]
895            }
896            Self::MastersThesis | Self::PhdThesis | Self::Thesis => &[
897                &["author"],
898                &["title"],
899                &["school", "institution"],
900                &["year", "date"],
901            ],
902            Self::TechReport => &[&["author"], &["title"], &["institution"], &["year", "date"]],
903            Self::Report => &[
904                &["author", "editor"],
905                &["title"],
906                &["type"],
907                &["institution"],
908                &["year", "date"],
909            ],
910            Self::Patent => &[&["author"], &["title"], &["number"], &["year", "date"]],
911            Self::Online => &[&["title"], &["url", "doi"], &["year", "date", "urldate"]],
912            Self::Software | Self::Dataset => &[
913                &["author", "editor"],
914                &["title"],
915                &["year", "date", "version"],
916            ],
917            Self::Unpublished => &[&["author"], &["title"], &["note"]],
918            Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
919        }
920    }
921
922    /// Return the canonical lowercase entry type name.
923    #[must_use]
924    pub fn canonical_name(&self) -> &str {
925        match self {
926            Self::Article => "article",
927            Self::Book => "book",
928            Self::Booklet => "booklet",
929            Self::MvBook => "mvbook",
930            Self::InBook => "inbook",
931            Self::BookInBook => "bookinbook",
932            Self::SuppBook => "suppbook",
933            Self::Collection => "collection",
934            Self::MvCollection => "mvcollection",
935            Self::InCollection => "incollection",
936            Self::SuppCollection => "suppcollection",
937            Self::InProceedings => "inproceedings",
938            Self::Proceedings => "proceedings",
939            Self::MvProceedings => "mvproceedings",
940            Self::Reference => "reference",
941            Self::InReference => "inreference",
942            Self::Manual => "manual",
943            Self::MastersThesis => "mastersthesis",
944            Self::PhdThesis => "phdthesis",
945            Self::Thesis => "thesis",
946            Self::TechReport => "techreport",
947            Self::Report => "report",
948            Self::Patent => "patent",
949            Self::Periodical => "periodical",
950            Self::Online => "online",
951            Self::Software => "software",
952            Self::Dataset => "dataset",
953            Self::Set => "set",
954            Self::XData => "xdata",
955            Self::Unpublished => "unpublished",
956            Self::Misc => "misc",
957            Self::Custom(s) => s,
958        }
959    }
960
961    /// Return common aliases that parse to this entry type.
962    #[must_use]
963    pub const fn aliases(&self) -> &'static [&'static str] {
964        match self {
965            Self::InProceedings => &["conference"],
966            Self::TechReport => &["techreport"],
967            Self::MastersThesis => &["mastersthesis"],
968            Self::PhdThesis => &["phdthesis"],
969            _ => &[],
970        }
971    }
972
973    /// Return `true` for the classic BibTeX entry types.
974    #[must_use]
975    pub const fn is_classic_bibtex(&self) -> bool {
976        matches!(
977            self,
978            Self::Article
979                | Self::Book
980                | Self::Booklet
981                | Self::InBook
982                | Self::InCollection
983                | Self::InProceedings
984                | Self::Manual
985                | Self::MastersThesis
986                | Self::PhdThesis
987                | Self::Proceedings
988                | Self::TechReport
989                | Self::Unpublished
990                | Self::Misc
991        )
992    }
993
994    /// Return `true` for entry types that are specific to BibLaTeX or common repository exports.
995    #[must_use]
996    pub const fn is_extended(&self) -> bool {
997        !self.is_classic_bibtex() && !matches!(self, Self::Custom(_))
998    }
999
1000    /// Convert to owned version
1001    #[must_use]
1002    pub fn into_owned(self) -> EntryType<'static> {
1003        match self {
1004            Self::Custom(s) => EntryType::Custom(Cow::Owned(s.into_owned())),
1005            Self::Article => EntryType::Article,
1006            Self::Book => EntryType::Book,
1007            Self::Booklet => EntryType::Booklet,
1008            Self::MvBook => EntryType::MvBook,
1009            Self::InBook => EntryType::InBook,
1010            Self::BookInBook => EntryType::BookInBook,
1011            Self::SuppBook => EntryType::SuppBook,
1012            Self::Collection => EntryType::Collection,
1013            Self::MvCollection => EntryType::MvCollection,
1014            Self::InCollection => EntryType::InCollection,
1015            Self::SuppCollection => EntryType::SuppCollection,
1016            Self::InProceedings => EntryType::InProceedings,
1017            Self::Proceedings => EntryType::Proceedings,
1018            Self::MvProceedings => EntryType::MvProceedings,
1019            Self::Reference => EntryType::Reference,
1020            Self::InReference => EntryType::InReference,
1021            Self::Manual => EntryType::Manual,
1022            Self::MastersThesis => EntryType::MastersThesis,
1023            Self::PhdThesis => EntryType::PhdThesis,
1024            Self::Thesis => EntryType::Thesis,
1025            Self::TechReport => EntryType::TechReport,
1026            Self::Report => EntryType::Report,
1027            Self::Patent => EntryType::Patent,
1028            Self::Periodical => EntryType::Periodical,
1029            Self::Online => EntryType::Online,
1030            Self::Software => EntryType::Software,
1031            Self::Dataset => EntryType::Dataset,
1032            Self::Set => EntryType::Set,
1033            Self::XData => EntryType::XData,
1034            Self::Unpublished => EntryType::Unpublished,
1035            Self::Misc => EntryType::Misc,
1036        }
1037    }
1038}
1039
1040#[inline]
1041const fn ascii_lower(byte: u8) -> u8 {
1042    if b'A' <= byte && byte <= b'Z' {
1043        byte + (b'a' - b'A')
1044    } else {
1045        byte
1046    }
1047}
1048
1049#[inline]
1050fn eq_ascii_lower(input: &[u8], expected: &[u8]) -> bool {
1051    if input.len() != expected.len() {
1052        return false;
1053    }
1054
1055    let mut index = 0usize;
1056    while index < input.len() {
1057        if ascii_lower(input[index]) != expected[index] {
1058            return false;
1059        }
1060        index += 1;
1061    }
1062
1063    true
1064}
1065
1066impl fmt::Display for EntryType<'_> {
1067    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1068        f.write_str(self.canonical_name())
1069    }
1070}
1071
1072/// A field in a BibTeX entry
1073#[derive(Debug, Clone, PartialEq)]
1074pub struct Field<'a> {
1075    /// Field name
1076    pub name: Cow<'a, str>,
1077    /// Field value
1078    pub value: Value<'a>,
1079}
1080
1081impl<'a> Field<'a> {
1082    /// Create a new field
1083    #[must_use]
1084    pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1085        Self {
1086            name: Cow::Borrowed(name),
1087            value,
1088        }
1089    }
1090
1091    /// Check if field name matches (case-insensitive)
1092    #[must_use]
1093    pub fn name_eq_ignore_case(&self, name: &str) -> bool {
1094        self.name.eq_ignore_ascii_case(name)
1095    }
1096
1097    /// Convert to owned version
1098    #[must_use]
1099    pub fn into_owned(self) -> Field<'static> {
1100        Field {
1101            name: Cow::Owned(self.name.into_owned()),
1102            value: self.value.into_owned(),
1103        }
1104    }
1105}
1106
1107/// A value in a BibTeX field
1108///
1109/// # Memory Optimization
1110/// Concatenation parts are stored out of line so the common literal, number,
1111/// and variable variants stay compact.
1112#[derive(Debug, Clone, PartialEq)]
1113pub enum Value<'a> {
1114    /// String literal
1115    Literal(Cow<'a, str>),
1116    /// Number literal
1117    Number(i64),
1118    /// Concatenated values (boxed to reduce enum size)
1119    Concat(Box<[Self]>),
1120    /// Variable reference
1121    Variable(Cow<'a, str>),
1122}
1123
1124impl Default for Value<'_> {
1125    fn default() -> Self {
1126        Self::Number(0)
1127    }
1128}
1129
1130impl Value<'_> {
1131    /// Get the value as a string (if it's a simple literal)
1132    #[must_use]
1133    pub fn as_str(&self) -> Option<&str> {
1134        match self {
1135            Self::Literal(s) => Some(s),
1136            _ => None,
1137        }
1138    }
1139
1140    /// Expand variables and concatenations to get final string
1141    #[must_use]
1142    pub fn expand(&self, strings: &AHashMap<&str, Value>) -> String {
1143        match self {
1144            Self::Literal(s) => s.to_string(),
1145            Self::Number(n) => n.to_string(),
1146            Self::Variable(name) => strings
1147                .get(name.as_ref())
1148                .map_or_else(|| format!("{{undefined:{name}}}"), |v| v.expand(strings)),
1149            Self::Concat(parts) => parts.iter().map(|p| p.expand(strings)).collect::<String>(),
1150        }
1151    }
1152
1153    /// Convert to owned version
1154    #[must_use]
1155    pub fn into_owned(self) -> Value<'static> {
1156        match self {
1157            Self::Literal(s) => Value::Literal(Cow::Owned(s.into_owned())),
1158            Self::Number(n) => Value::Number(n),
1159            Self::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
1160            Self::Concat(parts) => Value::Concat(
1161                parts
1162                    .into_vec()
1163                    .into_iter()
1164                    .map(Value::into_owned)
1165                    .collect::<Vec<_>>()
1166                    .into_boxed_slice(),
1167            ),
1168        }
1169    }
1170}
1171
1172impl fmt::Display for Value<'_> {
1173    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1174        match self {
1175            Self::Literal(s) => write!(f, "{s}"),
1176            Self::Number(n) => write!(f, "{n}"),
1177            Self::Variable(name) => write!(f, "{{{name}}}"),
1178            Self::Concat(parts) => {
1179                for (i, part) in parts.iter().enumerate() {
1180                    if i > 0 {
1181                        write!(f, " # ")?;
1182                    }
1183                    write!(f, "{part}")?;
1184                }
1185                Ok(())
1186            }
1187        }
1188    }
1189}
1190
1191fn value_to_lossy_string(value: &Value<'_>) -> String {
1192    match value {
1193        Value::Literal(s) => s.to_string(),
1194        Value::Number(n) => n.to_string(),
1195        Value::Variable(v) => format!("{{{v}}}"),
1196        Value::Concat(parts) => parts.iter().map(value_to_lossy_string).collect(),
1197    }
1198}
1199
1200/// Normalize a DOI from common raw forms into lowercase `10.x/...` form.
1201#[must_use]
1202pub fn normalize_doi(input: &str) -> Option<String> {
1203    let mut doi = input.trim();
1204    if doi.is_empty() {
1205        return None;
1206    }
1207
1208    for prefix in [
1209        "https://doi.org/",
1210        "http://doi.org/",
1211        "https://dx.doi.org/",
1212        "http://dx.doi.org/",
1213        "doi:",
1214        "DOI:",
1215    ] {
1216        if let Some(stripped) = doi.strip_prefix(prefix) {
1217            doi = stripped.trim();
1218            break;
1219        }
1220    }
1221
1222    let doi = doi.trim_end_matches(['.', ',', ';']);
1223    if doi.len() > 3 && doi.starts_with("10.") && doi.contains('/') {
1224        Some(doi.to_ascii_lowercase())
1225    } else {
1226        None
1227    }
1228}
1229
1230fn is_valid_isbn_shape(isbn: &str) -> bool {
1231    let compact: String = isbn.chars().filter(|c| !matches!(c, '-' | ' ')).collect();
1232
1233    match compact.len() {
1234        10 => compact
1235            .chars()
1236            .enumerate()
1237            .all(|(index, ch)| ch.is_ascii_digit() || (index == 9 && matches!(ch, 'x' | 'X'))),
1238        13 => compact.chars().all(|ch| ch.is_ascii_digit()),
1239        _ => false,
1240    }
1241}
1242
1243fn split_bibtex_names(input: &str) -> Vec<&str> {
1244    let mut names = Vec::new();
1245    let mut start = 0;
1246    let mut depth = 0usize;
1247    let mut iter = input.char_indices().peekable();
1248
1249    while let Some((index, ch)) = iter.next() {
1250        match ch {
1251            '{' => depth += 1,
1252            '}' => depth = depth.saturating_sub(1),
1253            'a' | 'A' if depth == 0 && starts_name_separator(input, index) => {
1254                let candidate = input[start..index].trim();
1255                if !candidate.is_empty() {
1256                    names.push(candidate);
1257                }
1258                start = index + 3;
1259                while input[start..]
1260                    .chars()
1261                    .next()
1262                    .is_some_and(char::is_whitespace)
1263                {
1264                    start += input[start..].chars().next().map_or(0, char::len_utf8);
1265                }
1266                while iter
1267                    .peek()
1268                    .is_some_and(|(_, next_ch)| next_ch.is_whitespace())
1269                {
1270                    iter.next();
1271                }
1272            }
1273            _ => {}
1274        }
1275    }
1276
1277    let candidate = input[start..].trim();
1278    if !candidate.is_empty() {
1279        names.push(candidate);
1280    }
1281
1282    names
1283}
1284
1285fn starts_name_separator(input: &str, index: usize) -> bool {
1286    let tail = &input[index..];
1287    let Some(rest) = tail.get(..3) else {
1288        return false;
1289    };
1290    if !rest.eq_ignore_ascii_case("and") {
1291        return false;
1292    }
1293
1294    let before_is_boundary = input[..index]
1295        .chars()
1296        .next_back()
1297        .map_or(true, char::is_whitespace);
1298    let after_is_boundary = tail[3..].chars().next().map_or(true, char::is_whitespace);
1299
1300    before_is_boundary && after_is_boundary
1301}
1302
1303fn parse_single_name(input: &str) -> PersonName {
1304    let parts = split_top_level_commas(input);
1305    match parts.as_slice() {
1306        [last] => parse_first_von_last(last),
1307        [last, first] => {
1308            let (von, last) = split_von_last(last);
1309            PersonName {
1310                first: normalize_name_part(first),
1311                von,
1312                last,
1313                jr: String::new(),
1314            }
1315        }
1316        [last, jr, first, ..] => {
1317            let (von, last) = split_von_last(last);
1318            PersonName {
1319                first: normalize_name_part(first),
1320                von,
1321                last,
1322                jr: normalize_name_part(jr),
1323            }
1324        }
1325        [] => PersonName {
1326            first: String::new(),
1327            von: String::new(),
1328            last: String::new(),
1329            jr: String::new(),
1330        },
1331    }
1332}
1333
1334fn parse_first_von_last(input: &str) -> PersonName {
1335    let words = split_name_words(input);
1336    match words.len() {
1337        0 => PersonName {
1338            first: String::new(),
1339            von: String::new(),
1340            last: String::new(),
1341            jr: String::new(),
1342        },
1343        1 => PersonName {
1344            first: String::new(),
1345            von: String::new(),
1346            last: normalize_name_part(words[0]),
1347            jr: String::new(),
1348        },
1349        _ => {
1350            let von_start = words
1351                .iter()
1352                .position(|word| starts_with_lowercase_letter(word));
1353            let (first, von, last) = von_start.map_or_else(
1354                || {
1355                    (
1356                        join_name_words(&words[..words.len() - 1]),
1357                        String::new(),
1358                        normalize_name_part(words[words.len() - 1]),
1359                    )
1360                },
1361                |von_start| {
1362                    let last_start = words[von_start + 1..]
1363                        .iter()
1364                        .position(|word| !starts_with_lowercase_letter(word))
1365                        .map_or(words.len() - 1, |offset| von_start + 1 + offset);
1366
1367                    (
1368                        join_name_words(&words[..von_start]),
1369                        join_name_words(&words[von_start..last_start]),
1370                        join_name_words(&words[last_start..]),
1371                    )
1372                },
1373            );
1374
1375            PersonName {
1376                first,
1377                von,
1378                last,
1379                jr: String::new(),
1380            }
1381        }
1382    }
1383}
1384
1385fn split_von_last(input: &str) -> (String, String) {
1386    let words = split_name_words(input);
1387    if words.is_empty() {
1388        return (String::new(), String::new());
1389    }
1390
1391    if let Some(last_start) = words
1392        .iter()
1393        .rposition(|word| starts_with_lowercase_letter(word))
1394    {
1395        if last_start + 1 < words.len() {
1396            return (
1397                join_name_words(&words[..=last_start]),
1398                join_name_words(&words[last_start + 1..]),
1399            );
1400        }
1401    }
1402
1403    if words.len() == 1 {
1404        (String::new(), normalize_name_part(words[0]))
1405    } else {
1406        (
1407            join_name_words(&words[..words.len() - 1]),
1408            normalize_name_part(words[words.len() - 1]),
1409        )
1410    }
1411}
1412
1413fn split_top_level_commas(input: &str) -> Vec<&str> {
1414    let mut parts = Vec::new();
1415    let mut start = 0;
1416    let mut depth = 0usize;
1417
1418    for (index, ch) in input.char_indices() {
1419        match ch {
1420            '{' => depth += 1,
1421            '}' => depth = depth.saturating_sub(1),
1422            ',' if depth == 0 => {
1423                parts.push(input[start..index].trim());
1424                start = index + 1;
1425            }
1426            _ => {}
1427        }
1428    }
1429
1430    parts.push(input[start..].trim());
1431    parts
1432}
1433
1434fn split_name_words(input: &str) -> Vec<&str> {
1435    let mut words = Vec::new();
1436    let mut start = None;
1437    let mut depth = 0usize;
1438
1439    for (index, ch) in input.char_indices() {
1440        match ch {
1441            '{' => {
1442                depth += 1;
1443                start.get_or_insert(index);
1444            }
1445            '}' => {
1446                depth = depth.saturating_sub(1);
1447            }
1448            ch if ch.is_whitespace() && depth == 0 => {
1449                if let Some(word_start) = start.take() {
1450                    words.push(input[word_start..index].trim());
1451                }
1452            }
1453            _ => {
1454                start.get_or_insert(index);
1455            }
1456        }
1457    }
1458
1459    if let Some(word_start) = start {
1460        words.push(input[word_start..].trim());
1461    }
1462
1463    words.into_iter().filter(|word| !word.is_empty()).collect()
1464}
1465
1466fn join_name_words(words: &[&str]) -> String {
1467    words
1468        .iter()
1469        .map(|word| normalize_name_part(word))
1470        .filter(|word| !word.is_empty())
1471        .collect::<Vec<_>>()
1472        .join(" ")
1473}
1474
1475fn normalize_name_part(input: &str) -> String {
1476    let trimmed = input.trim();
1477    if trimmed.len() >= 2 && trimmed.starts_with('{') && trimmed.ends_with('}') {
1478        trimmed[1..trimmed.len() - 1].trim().to_string()
1479    } else {
1480        trimmed.to_string()
1481    }
1482}
1483
1484fn starts_with_lowercase_letter(input: &str) -> bool {
1485    normalize_name_part(input)
1486        .chars()
1487        .find(|ch| ch.is_alphabetic())
1488        .is_some_and(char::is_lowercase)
1489}
1490
1491/// Check if a string is a valid page range
1492/// Accepts formats like "12", "12-34", "12--34", "12-34,45-67"
1493fn is_valid_page_range(pages: &str) -> bool {
1494    if pages.trim().is_empty() {
1495        return false;
1496    }
1497
1498    // Accept single page numbers
1499    if pages.chars().all(|c| c.is_ascii_digit()) {
1500        return true;
1501    }
1502
1503    // Check for range patterns - must contain dash or comma
1504    if !pages.contains('-') && !pages.contains(',') {
1505        return false;
1506    }
1507
1508    // Split by comma for multiple ranges
1509    for range in pages.split(',') {
1510        let range = range.trim();
1511        if range.is_empty() {
1512            continue;
1513        }
1514
1515        // Check individual range
1516        if range.contains("--") {
1517            // LaTeX-style double dash
1518            let parts: Vec<&str> = range.split("--").collect();
1519            if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
1520                return false;
1521            }
1522        } else if range.contains('-') {
1523            // Single dash
1524            let parts: Vec<&str> = range.split('-').collect();
1525            if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
1526                return false;
1527            }
1528        }
1529    }
1530
1531    true
1532}
1533
1534/// Check if a month value is valid
1535/// Accepts standard month abbreviations and full month names
1536fn is_valid_month(month: &str) -> bool {
1537    let month_lower = month.to_lowercase();
1538
1539    // Standard BibTeX month abbreviations and full names
1540    matches!(
1541        month_lower.as_str(),
1542        "jan"
1543            | "feb"
1544            | "mar"
1545            | "apr"
1546            | "may"
1547            | "jun"
1548            | "jul"
1549            | "aug"
1550            | "sep"
1551            | "oct"
1552            | "nov"
1553            | "dec"
1554            | "january"
1555            | "february"
1556            | "march"
1557            | "april"
1558            | "june"
1559            | "july"
1560            | "august"
1561            | "september"
1562            | "october"
1563            | "november"
1564            | "december"
1565    ) || month.parse::<i32>().is_ok_and(|m| (1..=12).contains(&m))
1566}