Skip to main content

bibtex_parser/
model.rs

1//! Data models for BibTeX entries
2
3use ahash::AHashMap;
4use memchr::memchr2;
5use std::borrow::Cow;
6use std::fmt;
7
8/// Validation strictness level for BibTeX entries
9#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
10pub enum ValidationLevel {
11    /// Only check that required fields exist
12    Minimal,
13    /// Check required fields and common issues (default)
14    #[default]
15    Standard,
16    /// Strict validation including field formats and cross-references
17    Strict,
18}
19
20/// Represents a validation error for a BibTeX entry
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct ValidationError {
23    /// The field that failed validation (if applicable)
24    pub field: Option<String>,
25    /// Description of the validation failure
26    pub message: String,
27    /// Severity of the error
28    pub severity: ValidationSeverity,
29}
30
31/// Severity level for validation errors
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum ValidationSeverity {
34    /// Must be fixed for valid BibTeX
35    Error,
36    /// Should be fixed but might work
37    Warning,
38    /// Informational note
39    Info,
40}
41
42impl ValidationError {
43    /// Create a new error-level validation error
44    #[must_use]
45    pub fn error(field: Option<&str>, message: impl Into<String>) -> Self {
46        Self {
47            field: field.map(String::from),
48            message: message.into(),
49            severity: ValidationSeverity::Error,
50        }
51    }
52
53    /// Create a new warning-level validation error
54    #[must_use]
55    pub fn warning(field: Option<&str>, message: impl Into<String>) -> Self {
56        Self {
57            field: field.map(String::from),
58            message: message.into(),
59            severity: ValidationSeverity::Warning,
60        }
61    }
62
63    /// Create a new info-level validation error
64    #[must_use]
65    pub fn info(field: Option<&str>, message: impl Into<String>) -> Self {
66        Self {
67            field: field.map(String::from),
68            message: message.into(),
69            severity: ValidationSeverity::Info,
70        }
71    }
72}
73
74impl fmt::Display for ValidationError {
75    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76        let field = self.field.as_deref().unwrap_or("<entry>");
77        write!(f, "[{:?}] {}: {}", self.severity, field, self.message)
78    }
79}
80
81/// A structured BibTeX person name.
82///
83/// BibTeX supports the forms `First von Last`, `von Last, First`, and
84/// `von Last, Jr, First`. This type keeps those four logical parts separate
85/// while preserving the exact token text from the source value.
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct PersonName {
88    /// Exact source text for this name segment, trimmed of surrounding whitespace.
89    pub raw: String,
90    /// Given names and initials.
91    pub first: String,
92    /// Lowercase particles such as `von`, `van`, `de`, or `der`.
93    pub von: String,
94    /// Family name.
95    pub last: String,
96    /// Junior part such as `Jr.` in `Last, Jr., First`.
97    pub jr: String,
98    /// Given-name tokens.
99    pub given: Vec<String>,
100    /// Family-name tokens.
101    pub family: Vec<String>,
102    /// Prefix or particle tokens.
103    pub prefix: Vec<String>,
104    /// Suffix tokens.
105    pub suffix: Vec<String>,
106    /// Literal organization or preserved braced name.
107    pub literal: Option<String>,
108}
109
110impl PersonName {
111    /// Return the display form used by most bibliography styles.
112    #[must_use]
113    pub fn display_name(&self) -> String {
114        if let Some(literal) = &self.literal {
115            return literal.clone();
116        }
117
118        let mut parts = Vec::new();
119        if !self.first.is_empty() {
120            parts.push(self.first.as_str());
121        }
122        if !self.von.is_empty() {
123            parts.push(self.von.as_str());
124        }
125        if !self.last.is_empty() {
126            parts.push(self.last.as_str());
127        }
128
129        let mut name = parts.join(" ");
130        if !self.jr.is_empty() {
131            if !name.is_empty() {
132                name.push_str(", ");
133            }
134            name.push_str(&self.jr);
135        }
136        name
137    }
138
139    /// Return `true` when every name component is empty.
140    #[must_use]
141    pub fn is_empty(&self) -> bool {
142        self.raw.is_empty()
143            && self.first.is_empty()
144            && self.von.is_empty()
145            && self.last.is_empty()
146            && self.jr.is_empty()
147            && self.literal.is_none()
148    }
149
150    /// Return `true` when the name is a braced literal or organization name.
151    #[must_use]
152    pub const fn is_literal(&self) -> bool {
153        self.literal.is_some()
154    }
155
156    /// Return the display name after LaTeX-to-Unicode conversion.
157    #[cfg(feature = "latex_to_unicode")]
158    #[must_use]
159    pub fn unicode_display_name(&self) -> String {
160        crate::latex_unicode::latex_to_unicode(&self.display_name())
161    }
162}
163
164/// Parse a BibTeX `author` or `editor` field into structured person names.
165///
166/// Splitting respects balanced braces, so organization names such as
167/// `{The Unicode Consortium}` and literal `and` inside braces stay intact.
168#[must_use]
169pub fn parse_names(input: &str) -> Vec<PersonName> {
170    split_bibtex_names(input)
171        .into_iter()
172        .map(parse_single_name)
173        .filter(|name| !name.is_empty())
174        .collect()
175}
176
177/// Parsed bibliography date parts.
178#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179pub struct DateParts {
180    /// Four-digit year.
181    pub year: i32,
182    /// One-based month, when present.
183    pub month: Option<u8>,
184    /// One-based day of month, when present.
185    pub day: Option<u8>,
186}
187
188impl DateParts {
189    /// Return `true` when both month and day are present.
190    #[must_use]
191    pub const fn is_complete(&self) -> bool {
192        self.month.is_some() && self.day.is_some()
193    }
194}
195
196/// Explicit date parse failure.
197#[derive(Debug, Clone, Copy, PartialEq, Eq)]
198pub enum DateParseError {
199    /// Input was empty after trimming BibTeX delimiters.
200    Empty,
201    /// Year was missing or not a four-digit number.
202    InvalidYear,
203    /// Month was present but outside `1..=12` or unrecognized.
204    InvalidMonth,
205    /// Day was present but invalid for the parsed year and month.
206    InvalidDay,
207    /// Input used a shape outside the supported common bibliography forms.
208    UnsupportedFormat,
209}
210
211impl fmt::Display for DateParseError {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        match self {
214            Self::Empty => f.write_str("empty date"),
215            Self::InvalidYear => f.write_str("invalid date year"),
216            Self::InvalidMonth => f.write_str("invalid date month"),
217            Self::InvalidDay => f.write_str("invalid date day"),
218            Self::UnsupportedFormat => f.write_str("unsupported date format"),
219        }
220    }
221}
222
223impl std::error::Error for DateParseError {}
224
225/// Common resource or identifier field kind.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
227pub enum ResourceKind {
228    /// Local file attachment field.
229    File,
230    /// URL field.
231    Url,
232    /// DOI field.
233    Doi,
234    /// `PubMed` identifier.
235    Pmid,
236    /// `PubMed Central` identifier.
237    Pmcid,
238    /// ISBN field.
239    Isbn,
240    /// ISSN field.
241    Issn,
242    /// Generic eprint field.
243    Eprint,
244    /// arXiv identifier.
245    Arxiv,
246    /// Cross-reference citation key.
247    Crossref,
248}
249
250impl ResourceKind {
251    /// Return a stable lowercase kind name.
252    #[must_use]
253    pub const fn as_str(self) -> &'static str {
254        match self {
255            Self::File => "file",
256            Self::Url => "url",
257            Self::Doi => "doi",
258            Self::Pmid => "pmid",
259            Self::Pmcid => "pmcid",
260            Self::Isbn => "isbn",
261            Self::Issn => "issn",
262            Self::Eprint => "eprint",
263            Self::Arxiv => "arxiv",
264            Self::Crossref => "crossref",
265        }
266    }
267}
268
269/// Classified resource or identifier field.
270#[derive(Debug, Clone, PartialEq, Eq)]
271pub struct ResourceField {
272    /// Classified field kind.
273    pub kind: ResourceKind,
274    /// Original field name spelling.
275    pub field_name: String,
276    /// Plain text value.
277    pub value: String,
278    /// Normalized value when the kind has a stable local normalization.
279    pub normalized: Option<String>,
280}
281
282/// Parse a common bibliography date shape into parts.
283///
284/// Supported input shapes are `YYYY`, `YYYY-MM`, and `YYYY-MM-DD`.
285/// Month names and BibTeX month abbreviations are accepted by entry helpers
286/// when a separate `month` field is combined with a `year` field.
287pub fn parse_date_parts(input: &str) -> std::result::Result<DateParts, DateParseError> {
288    let cleaned = trim_bibtex_scalar(input);
289    if cleaned.is_empty() {
290        return Err(DateParseError::Empty);
291    }
292
293    let parts = cleaned.split('-').collect::<Vec<_>>();
294    match parts.as_slice() {
295        [year] => Ok(DateParts {
296            year: parse_year(year)?,
297            month: None,
298            day: None,
299        }),
300        [year, month] => {
301            let year = parse_year(year)?;
302            let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
303            Ok(DateParts {
304                year,
305                month: Some(month),
306                day: None,
307            })
308        }
309        [year, month, day] => {
310            let year = parse_year(year)?;
311            let month = parse_month_number(month).ok_or(DateParseError::InvalidMonth)?;
312            let day = parse_day_number(day, year, month)?;
313            Ok(DateParts {
314                year,
315                month: Some(month),
316                day: Some(day),
317            })
318        }
319        _ => Err(DateParseError::UnsupportedFormat),
320    }
321}
322
323/// Normalize a field name to ASCII lowercase.
324#[must_use]
325pub fn normalize_field_name_ascii(name: &str) -> String {
326    name.trim().to_ascii_lowercase()
327}
328
329/// Return the crate's built-in BibLaTeX-to-BibTeX field alias, if any.
330#[must_use]
331pub fn canonical_biblatex_field_alias(name: &str) -> Option<&'static str> {
332    match normalize_field_name_ascii(name).as_str() {
333        "journaltitle" => Some("journal"),
334        "date" => Some("year"),
335        "institution" => Some("school"),
336        "location" => Some("address"),
337        _ => None,
338    }
339}
340
341/// Normalize a field name with ASCII lowercase and built-in BibLaTeX aliases.
342#[must_use]
343pub fn normalize_biblatex_field_name(name: &str) -> String {
344    canonical_biblatex_field_alias(name)
345        .map_or_else(|| normalize_field_name_ascii(name), ToOwned::to_owned)
346}
347
348/// Classify a common resource or identifier field name.
349#[must_use]
350pub fn classify_resource_field(name: &str) -> Option<ResourceKind> {
351    match normalize_field_name_ascii(name).as_str() {
352        "file" => Some(ResourceKind::File),
353        "url" => Some(ResourceKind::Url),
354        "doi" => Some(ResourceKind::Doi),
355        "pmid" => Some(ResourceKind::Pmid),
356        "pmcid" => Some(ResourceKind::Pmcid),
357        "isbn" => Some(ResourceKind::Isbn),
358        "issn" => Some(ResourceKind::Issn),
359        "eprint" => Some(ResourceKind::Eprint),
360        "arxiv" => Some(ResourceKind::Arxiv),
361        "crossref" => Some(ResourceKind::Crossref),
362        _ => None,
363    }
364}
365
366/// A BibTeX entry (article, book, etc.)
367#[derive(Debug, Clone, PartialEq)]
368pub struct Entry<'a> {
369    /// Entry type (article, book, inproceedings, etc.)
370    pub ty: EntryType<'a>,
371    /// Citation key
372    pub key: Cow<'a, str>,
373    /// Fields (author, title, year, etc.)
374    pub fields: Vec<Field<'a>>,
375}
376
377impl<'a> Entry<'a> {
378    /// Create a new entry
379    #[must_use]
380    pub const fn new(ty: EntryType<'a>, key: &'a str) -> Self {
381        Self {
382            ty,
383            key: Cow::Borrowed(key),
384            fields: Vec::new(),
385        }
386    }
387
388    /// Get the entry type
389    #[must_use]
390    pub const fn entry_type(&self) -> &EntryType<'a> {
391        &self.ty
392    }
393
394    /// Get the citation key
395    #[must_use]
396    pub fn key(&self) -> &str {
397        &self.key
398    }
399
400    /// Get a field by name (case-sensitive).
401    #[must_use]
402    pub fn field(&self, name: &str) -> Option<&Field<'a>> {
403        self.fields.iter().find(|f| f.name == name)
404    }
405
406    /// Get a field by name (case-insensitive).
407    #[must_use]
408    pub fn field_ignore_case(&self, name: &str) -> Option<&Field<'a>> {
409        self.fields
410            .iter()
411            .find(|f| f.name.eq_ignore_ascii_case(name))
412    }
413
414    /// Get a field value by name (case-sensitive)
415    /// Note: This only returns string literals, not numbers
416    #[must_use]
417    pub fn get(&self, name: &str) -> Option<&str> {
418        self.field(name).and_then(|f| f.value.as_str())
419    }
420
421    /// Get a field value by name (case-insensitive)
422    /// Returns the first field whose name matches ignoring case
423    /// Note: This only returns string literals, not numbers
424    #[must_use]
425    pub fn get_ignore_case(&self, name: &str) -> Option<&str> {
426        self.field_ignore_case(name).and_then(|f| f.value.as_str())
427    }
428
429    /// Get a field value as a string, converting numbers if necessary (case-sensitive)
430    #[must_use]
431    pub fn get_as_string(&self, name: &str) -> Option<String> {
432        self.field(name).map(|f| value_to_lossy_string(&f.value))
433    }
434
435    /// Get a field value as a string, converting numbers if necessary (case-insensitive)
436    #[must_use]
437    pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
438        self.field_ignore_case(name)
439            .map(|f| value_to_lossy_string(&f.value))
440    }
441
442    /// Get the first string-literal field matching any of the names, case-insensitively.
443    #[must_use]
444    pub fn get_any_ignore_case(&self, names: &[&str]) -> Option<&str> {
445        names.iter().find_map(|name| self.get_ignore_case(name))
446    }
447
448    /// Get the first field matching any of the names as a string, case-insensitively.
449    #[must_use]
450    pub fn get_any_as_string_ignore_case(&self, names: &[&str]) -> Option<String> {
451        names
452            .iter()
453            .find_map(|name| self.get_as_string_ignore_case(name))
454    }
455
456    /// Return `true` when a field exists, ignoring ASCII case.
457    #[must_use]
458    pub fn has_field(&self, name: &str) -> bool {
459        self.field_ignore_case(name).is_some()
460    }
461
462    /// Return `true` when any field in `names` exists, ignoring ASCII case.
463    #[must_use]
464    pub fn has_any_field(&self, names: &[&str]) -> bool {
465        names.iter().any(|name| self.has_field(name))
466    }
467
468    /// Return the normalized DOI, if the entry has a recognizable DOI field.
469    ///
470    /// This accepts common input forms such as `10.1000/xyz`,
471    /// `doi:10.1000/xyz`, and `https://doi.org/10.1000/xyz`.
472    #[must_use]
473    pub fn doi(&self) -> Option<String> {
474        self.get_as_string_ignore_case("doi")
475            .and_then(|doi| normalize_doi(&doi))
476    }
477
478    /// Parse the `author` field into structured BibTeX names.
479    #[must_use]
480    pub fn authors(&self) -> Vec<PersonName> {
481        self.get_as_string_ignore_case("author")
482            .map_or_else(Vec::new, |authors| parse_names(&authors))
483    }
484
485    /// Parse the `editor` field into structured BibTeX names.
486    #[must_use]
487    pub fn editors(&self) -> Vec<PersonName> {
488        self.get_as_string_ignore_case("editor")
489            .map_or_else(Vec::new, |editors| parse_names(&editors))
490    }
491
492    /// Parse the `translator` field into structured BibTeX names.
493    #[must_use]
494    pub fn translators(&self) -> Vec<PersonName> {
495        self.get_as_string_ignore_case("translator")
496            .map_or_else(Vec::new, |translators| parse_names(&translators))
497    }
498
499    /// Parse a specific date-like field into date parts.
500    #[must_use]
501    pub fn date_parts_for(
502        &self,
503        field: &str,
504    ) -> Option<std::result::Result<DateParts, DateParseError>> {
505        self.get_as_string_ignore_case(field)
506            .map(|value| parse_date_parts(&value))
507    }
508
509    /// Return issued date parts for this entry.
510    ///
511    /// `date`, `issued`, `eventdate`, `origdate`, and `urldate` are checked
512    /// before falling back to `year` plus an optional `month` field.
513    #[must_use]
514    pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
515        for field in &["date", "issued", "eventdate", "origdate", "urldate"] {
516            if let Some(value) = self.get_as_string_ignore_case(field) {
517                return Some(parse_date_parts(&value));
518            }
519        }
520
521        let year = self.get_as_string_ignore_case("year")?;
522        let mut parts = match parse_date_parts(&year) {
523            Ok(parts) => parts,
524            Err(error) => return Some(Err(error)),
525        };
526        if let Some(month) = self.get_as_string_ignore_case("month") {
527            match parse_month_number(&month) {
528                Some(month) => parts.month = Some(month),
529                None => return Some(Err(DateParseError::InvalidMonth)),
530            }
531        }
532        Some(Ok(parts))
533    }
534
535    /// Return classified resource and identifier fields in source order.
536    #[must_use]
537    pub fn resource_fields(&self) -> Vec<ResourceField> {
538        let archive_prefix = self
539            .get_as_string_ignore_case("archiveprefix")
540            .or_else(|| self.get_as_string_ignore_case("eprinttype"));
541
542        self.fields
543            .iter()
544            .filter_map(|field| {
545                resource_field_from_parts(
546                    &field.name,
547                    field.value.to_plain_string(),
548                    archive_prefix.as_deref(),
549                )
550            })
551            .collect()
552    }
553
554    /// Get all fields
555    #[must_use]
556    pub fn fields(&self) -> &[Field<'a>] {
557        &self.fields
558    }
559
560    /// Add a field
561    pub fn add_field(&mut self, field: Field<'a>) {
562        self.fields.push(field);
563    }
564
565    /// Set a field value, replacing the first matching field or appending it.
566    pub fn set(&mut self, name: &'a str, value: Value<'a>) {
567        if let Some(field) = self.fields.iter_mut().find(|field| field.name == name) {
568            field.value = value;
569        } else {
570            self.fields.push(Field::new(name, value));
571        }
572    }
573
574    /// Set a field to a string literal.
575    pub fn set_literal(&mut self, name: &'a str, value: &'a str) {
576        self.set(name, Value::Literal(Cow::Borrowed(value)));
577    }
578
579    /// Remove all fields whose name matches exactly.
580    pub fn remove(&mut self, name: &str) -> Vec<Field<'a>> {
581        let mut removed = Vec::new();
582        let mut index = 0;
583        while index < self.fields.len() {
584            if self.fields[index].name == name {
585                removed.push(self.fields.remove(index));
586            } else {
587                index += 1;
588            }
589        }
590        removed
591    }
592
593    /// Rename all fields whose name matches exactly.
594    pub fn rename_field(&mut self, old: &str, new: &'a str) -> usize {
595        let mut renamed = 0;
596        for field in &mut self.fields {
597            if field.name == old {
598                field.name = Cow::Borrowed(new);
599                renamed += 1;
600            }
601        }
602        renamed
603    }
604
605    /// Return the title field as a string.
606    #[must_use]
607    pub fn title(&self) -> Option<String> {
608        self.get_any_as_string_ignore_case(&["title"])
609    }
610
611    /// Return the year field as a string.
612    #[must_use]
613    pub fn year(&self) -> Option<String> {
614        self.get_any_as_string_ignore_case(&["year"])
615    }
616
617    /// Return the date field as a string.
618    #[must_use]
619    pub fn date(&self) -> Option<String> {
620        self.get_any_as_string_ignore_case(&["date"])
621    }
622
623    /// Return the journal field, accepting BibLaTeX's `journaltitle` alias.
624    #[must_use]
625    pub fn journal(&self) -> Option<String> {
626        self.get_any_as_string_ignore_case(&["journal", "journaltitle"])
627    }
628
629    /// Return the book title field as a string.
630    #[must_use]
631    pub fn booktitle(&self) -> Option<String> {
632        self.get_any_as_string_ignore_case(&["booktitle"])
633    }
634
635    /// Return the URL field as a string.
636    #[must_use]
637    pub fn url(&self) -> Option<String> {
638        self.get_any_as_string_ignore_case(&["url"])
639    }
640
641    /// Return keywords split on commas or semicolons.
642    #[must_use]
643    pub fn keywords(&self) -> Vec<String> {
644        self.get_any_as_string_ignore_case(&["keywords", "keyword"])
645            .map(|keywords| {
646                keywords
647                    .split([',', ';'])
648                    .map(str::trim)
649                    .filter(|keyword| !keyword.is_empty())
650                    .map(ToOwned::to_owned)
651                    .collect()
652            })
653            .unwrap_or_default()
654    }
655
656    /// Validate the entry according to the specified level
657    /// Returns Ok(()) if valid, or Err with a list of validation errors
658    pub fn validate(&self, level: ValidationLevel) -> Result<(), Vec<ValidationError>> {
659        let mut errors = Vec::new();
660
661        // Always check required fields
662        self.validate_required_fields(&mut errors);
663
664        match level {
665            ValidationLevel::Minimal => {
666                // Only required fields
667            }
668            ValidationLevel::Standard => {
669                // Additional standard checks
670                self.validate_common_issues(&mut errors);
671            }
672            ValidationLevel::Strict => {
673                // All checks
674                self.validate_common_issues(&mut errors);
675                self.validate_field_formats(&mut errors);
676                self.validate_cross_references(&mut errors);
677            }
678        }
679
680        if errors.is_empty() {
681            Ok(())
682        } else {
683            Err(errors)
684        }
685    }
686
687    /// Validate required fields for the entry type
688    fn validate_required_fields(&self, errors: &mut Vec<ValidationError>) {
689        for &field_group in self.ty.required_field_groups() {
690            if self.has_any_field(field_group) {
691                continue;
692            }
693
694            if field_group == ["author", "editor"] {
695                errors.push(ValidationError::error(
696                    None,
697                    format!(
698                        "{} entry must have either 'author' or 'editor' field",
699                        self.ty
700                    ),
701                ));
702                continue;
703            }
704
705            let primary_field = field_group[0];
706            let message = if field_group.len() == 1 {
707                format!(
708                    "Required field '{}' is missing for {} entry",
709                    primary_field, self.ty
710                )
711            } else {
712                format!(
713                    "Required field '{}' is missing for {} entry (accepted aliases: {})",
714                    primary_field,
715                    self.ty,
716                    field_group.join(", ")
717                )
718            };
719
720            errors.push(ValidationError::error(Some(primary_field), message));
721        }
722    }
723
724    /// Validate common issues that might cause problems
725    fn validate_common_issues(&self, errors: &mut Vec<ValidationError>) {
726        // Check for common issues
727
728        // Year should be a valid number and recent
729        if let Some(year_str) = self.get_any_as_string_ignore_case(&["year", "date"]) {
730            if let Ok(year) = year_str.parse::<i32>() {
731                if !(1000..=2100).contains(&year) {
732                    errors.push(ValidationError::warning(
733                        Some(if self.has_field("year") {
734                            "year"
735                        } else {
736                            "date"
737                        }),
738                        format!("Year {year} seems unlikely"),
739                    ));
740                }
741            } else {
742                errors.push(ValidationError::warning(
743                    Some(if self.has_field("year") {
744                        "year"
745                    } else {
746                        "date"
747                    }),
748                    "Year/date should be a number",
749                ));
750            }
751        }
752
753        // Pages should have valid format (e.g., "12-24" or "12--24")
754        if let Some(pages) = self.get_ignore_case("pages") {
755            if !is_valid_page_range(pages) {
756                errors.push(ValidationError::warning(
757                    Some("pages"),
758                    "Pages should be in format '12-34' or '12--34'",
759                ));
760            }
761        }
762
763        // Author and editor shouldn't both be missing for some types (but not books, handled above)
764        match self.ty {
765            EntryType::InBook | EntryType::InProceedings | EntryType::InCollection
766                if !self.has_any_field(&["author", "editor"]) =>
767            {
768                errors.push(ValidationError::warning(
769                    None,
770                    "Entry should have either 'author' or 'editor' field",
771                ));
772            }
773            _ => {}
774        }
775
776        // Check for empty fields
777        for field in &self.fields {
778            if let Some(value_str) = field.value.as_str() {
779                if value_str.trim().is_empty() {
780                    errors.push(ValidationError::warning(
781                        Some(&field.name),
782                        "Field has empty value",
783                    ));
784                }
785            }
786        }
787    }
788
789    /// Validate specific field formats for strict checking
790    fn validate_field_formats(&self, errors: &mut Vec<ValidationError>) {
791        // DOI format
792        if let Some(doi) = self.get_as_string_ignore_case("doi") {
793            if normalize_doi(&doi).is_none() {
794                errors.push(ValidationError::warning(
795                    Some("doi"),
796                    "DOI should start with '10.' or a DOI URL/prefix",
797                ));
798            }
799        }
800
801        // URL format
802        if let Some(url) = self.get_ignore_case("url") {
803            if !url.starts_with("http://") && !url.starts_with("https://") {
804                errors.push(ValidationError::warning(
805                    Some("url"),
806                    "URL should start with http:// or https://",
807                ));
808            }
809        }
810
811        // ISBN format (basic check)
812        if let Some(isbn) = self.get_ignore_case("isbn") {
813            if !is_valid_isbn_shape(isbn) {
814                errors.push(ValidationError::warning(
815                    Some("isbn"),
816                    "ISBN should have 10 or 13 digits",
817                ));
818            }
819        }
820
821        // Month should be valid
822        if let Some(month) = self.get_ignore_case("month") {
823            if !is_valid_month(month) {
824                errors.push(ValidationError::info(
825                    Some("month"),
826                    "Month should be a standard abbreviation (jan, feb, etc.) or full name",
827                ));
828            }
829        }
830
831        // Volume and number should be numeric if present
832        for field_name in &["volume", "number"] {
833            if let Some(value) = self.get_ignore_case(field_name) {
834                if value.parse::<i32>().is_err() && !value.contains('-') {
835                    errors.push(ValidationError::info(
836                        Some(field_name),
837                        format!("{field_name} should typically be numeric"),
838                    ));
839                }
840            }
841        }
842    }
843
844    /// Validate cross-references for strict checking
845    fn validate_cross_references(&self, errors: &mut Vec<ValidationError>) {
846        if let Some(crossref) = self.get_ignore_case("crossref") {
847            if crossref.trim().is_empty() {
848                errors.push(ValidationError::error(
849                    Some("crossref"),
850                    "Cross-reference is empty",
851                ));
852            }
853        }
854    }
855
856    /// Check whether the entry has the minimal required fields for its type.
857    #[must_use]
858    pub fn is_valid(&self) -> bool {
859        self.validate(ValidationLevel::Minimal).is_ok()
860    }
861
862    /// Get a field value with LaTeX sequences converted to Unicode (case-sensitive)
863    ///
864    /// This method converts common LaTeX escape sequences like `\'e` to `é` and `\"{o}` to `ö`.
865    /// Returns `None` if the field doesn't exist or isn't a string literal.
866    ///
867    /// # Examples
868    ///
869    /// ```
870    /// # #[cfg(feature = "latex_to_unicode")]
871    /// # {
872    /// # use bibtex_parser::Library;
873    /// let bibtex = r#"@article{test, author = "Jos\'e Garc\'ia"}"#;
874    /// let library = Library::parser().parse(bibtex).unwrap();
875    /// let entry = &library.entries()[0];
876    /// assert_eq!(entry.get_unicode("author"), Some("José García".to_string()));
877    /// # }
878    /// ```
879    #[cfg(feature = "latex_to_unicode")]
880    #[must_use]
881    pub fn get_unicode(&self, name: &str) -> Option<String> {
882        self.get(name).map(crate::latex_unicode::latex_to_unicode)
883    }
884
885    /// Get a field value with LaTeX sequences converted to Unicode (case-insensitive)
886    ///
887    /// This method converts common LaTeX escape sequences like `\'e` to `é` and `\"{o}` to `ö`.
888    /// Returns `None` if the field doesn't exist or isn't a string literal.
889    /// Field name matching is case-insensitive.
890    ///
891    /// # Examples
892    ///
893    /// ```
894    /// # #[cfg(feature = "latex_to_unicode")]
895    /// # {
896    /// # use bibtex_parser::Library;
897    /// let bibtex = r#"@article{test, TITLE = "M\\\"uller's work"}"#;
898    /// let library = Library::parser().parse(bibtex).unwrap();
899    /// let entry = &library.entries()[0];
900    /// assert_eq!(entry.get_unicode_ignore_case("title"), Some("Müller's work".to_string()));
901    /// # }
902    /// ```
903    #[cfg(feature = "latex_to_unicode")]
904    #[must_use]
905    pub fn get_unicode_ignore_case(&self, name: &str) -> Option<String> {
906        self.get_ignore_case(name)
907            .map(crate::latex_unicode::latex_to_unicode)
908    }
909
910    /// Get a field value as string with LaTeX conversion (case-sensitive)
911    ///
912    /// Similar to `get_as_string()` but converts LaTeX sequences to Unicode.
913    /// This handles all field types (literals, numbers, variables, concatenations).
914    #[cfg(feature = "latex_to_unicode")]
915    #[must_use]
916    pub fn get_as_unicode_string(&self, name: &str) -> Option<String> {
917        self.get_as_string(name)
918            .map(|s| crate::latex_unicode::latex_to_unicode(&s))
919    }
920
921    /// Get a field value as string with LaTeX conversion (case-insensitive)
922    ///
923    /// Similar to `get_as_string_ignore_case()` but converts LaTeX sequences to Unicode.
924    /// This handles all field types (literals, numbers, variables, concatenations).
925    #[cfg(feature = "latex_to_unicode")]
926    #[must_use]
927    pub fn get_as_unicode_string_ignore_case(&self, name: &str) -> Option<String> {
928        self.get_as_string_ignore_case(name)
929            .map(|s| crate::latex_unicode::latex_to_unicode(&s))
930    }
931
932    /// Get all fields with LaTeX converted to Unicode
933    ///
934    /// Returns a vector of (`field_name`, `unicode_value`) pairs for all string literal fields.
935    /// Non-string fields (numbers, variables) are excluded.
936    ///
937    /// # Examples
938    ///
939    /// ```
940    /// # #[cfg(feature = "latex_to_unicode")]
941    /// # {
942    /// # use bibtex_parser::Library;
943    /// let bibtex = r#"@article{test,
944    ///     author = "Jos\'e Garc\'ia",
945    ///     title = "\\alpha and \\beta particles",
946    ///     year = 2024
947    /// }"#;
948    /// let library = Library::parser().parse(bibtex).unwrap();
949    /// let entry = &library.entries()[0];
950    /// let unicode_fields = entry.fields_unicode();
951    ///
952    /// let author = unicode_fields.iter()
953    ///     .find(|(k, _)| k == "author")
954    ///     .map(|(_, v)| v.as_str())
955    ///     .unwrap();
956    /// assert_eq!(author, "José García");
957    /// # }
958    /// ```
959    #[cfg(feature = "latex_to_unicode")]
960    #[must_use]
961    pub fn fields_unicode(&self) -> Vec<(String, String)> {
962        self.fields
963            .iter()
964            .filter_map(|f| {
965                f.value.as_str().map(|s| {
966                    (
967                        f.name.to_string(),
968                        crate::latex_unicode::latex_to_unicode(s),
969                    )
970                })
971            })
972            .collect()
973    }
974
975    /// Convert to owned version
976    #[must_use]
977    pub fn into_owned(self) -> Entry<'static> {
978        Entry {
979            ty: self.ty.into_owned(),
980            key: Cow::Owned(self.key.into_owned()),
981            fields: self.fields.into_iter().map(Field::into_owned).collect(),
982        }
983    }
984}
985
986/// BibTeX entry type
987#[derive(Debug, Clone, PartialEq, Eq, Hash)]
988pub enum EntryType<'a> {
989    /// Article from a journal
990    Article,
991    /// Book with publisher
992    Book,
993    /// Booklet without a named publisher
994    Booklet,
995    /// A multi-volume book (`biblatex`)
996    MvBook,
997    /// Part of a book
998    InBook,
999    /// A self-contained book part published as a book (`biblatex`)
1000    BookInBook,
1001    /// Supplemental material in a book (`biblatex`)
1002    SuppBook,
1003    /// A collection with its own title
1004    Collection,
1005    /// A multi-volume collection (`biblatex`)
1006    MvCollection,
1007    /// A contribution to a collection
1008    InCollection,
1009    /// Supplemental material in a collection (`biblatex`)
1010    SuppCollection,
1011    /// Article in conference proceedings
1012    InProceedings,
1013    /// Conference proceedings
1014    Proceedings,
1015    /// Multi-volume proceedings (`biblatex`)
1016    MvProceedings,
1017    /// A reference work (`biblatex`)
1018    Reference,
1019    /// A contribution to a reference work (`biblatex`)
1020    InReference,
1021    /// Technical documentation or manual
1022    Manual,
1023    /// Master's thesis
1024    MastersThesis,
1025    /// `PhD` thesis
1026    PhdThesis,
1027    /// Generic thesis (`biblatex`)
1028    Thesis,
1029    /// Technical report
1030    TechReport,
1031    /// Generic report (`biblatex`)
1032    Report,
1033    /// Patent or patent request (`biblatex`)
1034    Patent,
1035    /// Periodical issue (`biblatex`)
1036    Periodical,
1037    /// Online resource (`biblatex`)
1038    Online,
1039    /// Software artifact (`biblatex` and common repository exports)
1040    Software,
1041    /// Dataset artifact (`biblatex` and common repository exports)
1042    Dataset,
1043    /// Entry set (`biblatex`)
1044    Set,
1045    /// Reusable data-only entry (`biblatex`)
1046    XData,
1047    /// Unpublished work
1048    Unpublished,
1049    /// Miscellaneous
1050    Misc,
1051    /// Custom entry type
1052    Custom(Cow<'a, str>),
1053}
1054
1055impl<'a> EntryType<'a> {
1056    /// Parse from string (case-insensitive)
1057    #[must_use]
1058    #[inline(never)]
1059    pub fn parse(s: &'a str) -> Self {
1060        let bytes = s.as_bytes();
1061        if bytes.is_empty() {
1062            return Self::Custom(Cow::Borrowed(s));
1063        }
1064
1065        match (bytes.len(), ascii_lower(bytes[0])) {
1066            (3, b's') if eq_ascii_lower(bytes, b"set") => Self::Set,
1067            (4, b'b') if eq_ascii_lower(bytes, b"book") => Self::Book,
1068            (4, b'm') if eq_ascii_lower(bytes, b"misc") => Self::Misc,
1069            (6, b'i') if eq_ascii_lower(bytes, b"inbook") => Self::InBook,
1070            (6, b'm') if eq_ascii_lower(bytes, b"manual") => Self::Manual,
1071            (6, b'm') if eq_ascii_lower(bytes, b"mvbook") => Self::MvBook,
1072            (6, b'o') if eq_ascii_lower(bytes, b"online") => Self::Online,
1073            (6, b'p') if eq_ascii_lower(bytes, b"patent") => Self::Patent,
1074            (6, b'r') if eq_ascii_lower(bytes, b"report") => Self::Report,
1075            (6, b't') if eq_ascii_lower(bytes, b"thesis") => Self::Thesis,
1076            (7, b'a') if eq_ascii_lower(bytes, b"article") => Self::Article,
1077            (7, b'b') if eq_ascii_lower(bytes, b"booklet") => Self::Booklet,
1078            (7, b'd') if eq_ascii_lower(bytes, b"dataset") => Self::Dataset,
1079            (8, b's') if eq_ascii_lower(bytes, b"software") => Self::Software,
1080            (8, b's') if eq_ascii_lower(bytes, b"suppbook") => Self::SuppBook,
1081            (9, b'r') if eq_ascii_lower(bytes, b"reference") => Self::Reference,
1082            (9, b'p') if eq_ascii_lower(bytes, b"phdthesis") => Self::PhdThesis,
1083            (10, b'b') if eq_ascii_lower(bytes, b"bookinbook") => Self::BookInBook,
1084            (10, b'c') if eq_ascii_lower(bytes, b"conference") => Self::InProceedings,
1085            (10, b'c') if eq_ascii_lower(bytes, b"collection") => Self::Collection,
1086            (10, b'p') if eq_ascii_lower(bytes, b"periodical") => Self::Periodical,
1087            (10, b't') if eq_ascii_lower(bytes, b"techreport") => Self::TechReport,
1088            (11, b'i') if eq_ascii_lower(bytes, b"inreference") => Self::InReference,
1089            (11, b'p') if eq_ascii_lower(bytes, b"proceedings") => Self::Proceedings,
1090            (11, b'u') if eq_ascii_lower(bytes, b"unpublished") => Self::Unpublished,
1091            (12, b'i') if eq_ascii_lower(bytes, b"incollection") => Self::InCollection,
1092            (12, b'm') if eq_ascii_lower(bytes, b"mvcollection") => Self::MvCollection,
1093            (13, b'i') if eq_ascii_lower(bytes, b"inproceedings") => Self::InProceedings,
1094            (13, b'm') if eq_ascii_lower(bytes, b"mastersthesis") => Self::MastersThesis,
1095            (13, b'm') if eq_ascii_lower(bytes, b"mvproceedings") => Self::MvProceedings,
1096            (14, b's') if eq_ascii_lower(bytes, b"suppcollection") => Self::SuppCollection,
1097            (5, b'x') if eq_ascii_lower(bytes, b"xdata") => Self::XData,
1098            _ => Self::Custom(Cow::Borrowed(s)),
1099        }
1100    }
1101
1102    /// Get required fields for this entry type
1103    #[must_use]
1104    pub const fn required_fields(&self) -> &'static [&'static str] {
1105        match self {
1106            Self::Article => &["author", "title", "journal", "year"],
1107            Self::Book | Self::MvBook => &["author", "title", "publisher", "year"],
1108            Self::Booklet | Self::Manual => &["title"],
1109            Self::InBook | Self::BookInBook | Self::SuppBook => {
1110                &["author", "title", "chapter", "publisher", "year"]
1111            }
1112            Self::Collection | Self::MvCollection | Self::Reference => {
1113                &["editor", "title", "publisher", "year"]
1114            }
1115            Self::InCollection | Self::SuppCollection | Self::InReference => {
1116                &["author", "title", "booktitle", "publisher", "year"]
1117            }
1118            Self::InProceedings => &["author", "title", "booktitle", "year"],
1119            Self::Proceedings | Self::MvProceedings | Self::Periodical => &["title", "year"],
1120            Self::MastersThesis | Self::PhdThesis | Self::Thesis => {
1121                &["author", "title", "school", "year"]
1122            }
1123            Self::TechReport => &["author", "title", "institution", "year"],
1124            Self::Report => &["author", "title", "type", "institution", "year"],
1125            Self::Patent => &["author", "title", "number", "year"],
1126            Self::Online => &["title", "url"],
1127            Self::Software | Self::Dataset => &["author", "title", "year"],
1128            Self::Unpublished => &["author", "title", "note"],
1129            Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1130        }
1131    }
1132
1133    /// Get required field groups for validation.
1134    ///
1135    /// Each inner group is an OR-list. For example, `["author", "editor"]`
1136    /// means either field satisfies that requirement.
1137    #[must_use]
1138    pub const fn required_field_groups(&self) -> &'static [&'static [&'static str]] {
1139        match self {
1140            Self::Article => &[
1141                &["author"],
1142                &["title"],
1143                &["journal", "journaltitle"],
1144                &["year", "date"],
1145            ],
1146            Self::Book | Self::MvBook => &[
1147                &["author", "editor"],
1148                &["title"],
1149                &["publisher"],
1150                &["year", "date"],
1151            ],
1152            Self::Booklet | Self::Manual => &[&["title"]],
1153            Self::InBook | Self::BookInBook | Self::SuppBook => &[
1154                &["author", "editor"],
1155                &["title"],
1156                &["chapter", "pages"],
1157                &["publisher"],
1158                &["year", "date"],
1159            ],
1160            Self::Collection | Self::MvCollection | Self::Reference => &[
1161                &["editor", "author"],
1162                &["title"],
1163                &["publisher"],
1164                &["year", "date"],
1165            ],
1166            Self::InCollection | Self::SuppCollection | Self::InReference => &[
1167                &["author", "editor"],
1168                &["title"],
1169                &["booktitle"],
1170                &["publisher"],
1171                &["year", "date"],
1172            ],
1173            Self::InProceedings => &[
1174                &["author", "editor"],
1175                &["title"],
1176                &["booktitle"],
1177                &["year", "date"],
1178            ],
1179            Self::Proceedings | Self::MvProceedings | Self::Periodical => {
1180                &[&["title"], &["year", "date"]]
1181            }
1182            Self::MastersThesis | Self::PhdThesis | Self::Thesis => &[
1183                &["author"],
1184                &["title"],
1185                &["school", "institution"],
1186                &["year", "date"],
1187            ],
1188            Self::TechReport => &[&["author"], &["title"], &["institution"], &["year", "date"]],
1189            Self::Report => &[
1190                &["author", "editor"],
1191                &["title"],
1192                &["type"],
1193                &["institution"],
1194                &["year", "date"],
1195            ],
1196            Self::Patent => &[&["author"], &["title"], &["number"], &["year", "date"]],
1197            Self::Online => &[&["title"], &["url", "doi"], &["year", "date", "urldate"]],
1198            Self::Software | Self::Dataset => &[
1199                &["author", "editor"],
1200                &["title"],
1201                &["year", "date", "version"],
1202            ],
1203            Self::Unpublished => &[&["author"], &["title"], &["note"]],
1204            Self::Misc | Self::Set | Self::XData | Self::Custom(_) => &[],
1205        }
1206    }
1207
1208    /// Return the canonical lowercase entry type name.
1209    #[must_use]
1210    pub fn canonical_name(&self) -> &str {
1211        match self {
1212            Self::Article => "article",
1213            Self::Book => "book",
1214            Self::Booklet => "booklet",
1215            Self::MvBook => "mvbook",
1216            Self::InBook => "inbook",
1217            Self::BookInBook => "bookinbook",
1218            Self::SuppBook => "suppbook",
1219            Self::Collection => "collection",
1220            Self::MvCollection => "mvcollection",
1221            Self::InCollection => "incollection",
1222            Self::SuppCollection => "suppcollection",
1223            Self::InProceedings => "inproceedings",
1224            Self::Proceedings => "proceedings",
1225            Self::MvProceedings => "mvproceedings",
1226            Self::Reference => "reference",
1227            Self::InReference => "inreference",
1228            Self::Manual => "manual",
1229            Self::MastersThesis => "mastersthesis",
1230            Self::PhdThesis => "phdthesis",
1231            Self::Thesis => "thesis",
1232            Self::TechReport => "techreport",
1233            Self::Report => "report",
1234            Self::Patent => "patent",
1235            Self::Periodical => "periodical",
1236            Self::Online => "online",
1237            Self::Software => "software",
1238            Self::Dataset => "dataset",
1239            Self::Set => "set",
1240            Self::XData => "xdata",
1241            Self::Unpublished => "unpublished",
1242            Self::Misc => "misc",
1243            Self::Custom(s) => s,
1244        }
1245    }
1246
1247    /// Return common aliases that parse to this entry type.
1248    #[must_use]
1249    pub const fn aliases(&self) -> &'static [&'static str] {
1250        match self {
1251            Self::InProceedings => &["conference"],
1252            Self::TechReport => &["techreport"],
1253            Self::MastersThesis => &["mastersthesis"],
1254            Self::PhdThesis => &["phdthesis"],
1255            _ => &[],
1256        }
1257    }
1258
1259    /// Return `true` for the classic BibTeX entry types.
1260    #[must_use]
1261    pub const fn is_classic_bibtex(&self) -> bool {
1262        matches!(
1263            self,
1264            Self::Article
1265                | Self::Book
1266                | Self::Booklet
1267                | Self::InBook
1268                | Self::InCollection
1269                | Self::InProceedings
1270                | Self::Manual
1271                | Self::MastersThesis
1272                | Self::PhdThesis
1273                | Self::Proceedings
1274                | Self::TechReport
1275                | Self::Unpublished
1276                | Self::Misc
1277        )
1278    }
1279
1280    /// Return `true` for entry types that are specific to BibLaTeX or common repository exports.
1281    #[must_use]
1282    pub const fn is_extended(&self) -> bool {
1283        !self.is_classic_bibtex() && !matches!(self, Self::Custom(_))
1284    }
1285
1286    /// Convert to owned version
1287    #[must_use]
1288    pub fn into_owned(self) -> EntryType<'static> {
1289        match self {
1290            Self::Custom(s) => EntryType::Custom(Cow::Owned(s.into_owned())),
1291            Self::Article => EntryType::Article,
1292            Self::Book => EntryType::Book,
1293            Self::Booklet => EntryType::Booklet,
1294            Self::MvBook => EntryType::MvBook,
1295            Self::InBook => EntryType::InBook,
1296            Self::BookInBook => EntryType::BookInBook,
1297            Self::SuppBook => EntryType::SuppBook,
1298            Self::Collection => EntryType::Collection,
1299            Self::MvCollection => EntryType::MvCollection,
1300            Self::InCollection => EntryType::InCollection,
1301            Self::SuppCollection => EntryType::SuppCollection,
1302            Self::InProceedings => EntryType::InProceedings,
1303            Self::Proceedings => EntryType::Proceedings,
1304            Self::MvProceedings => EntryType::MvProceedings,
1305            Self::Reference => EntryType::Reference,
1306            Self::InReference => EntryType::InReference,
1307            Self::Manual => EntryType::Manual,
1308            Self::MastersThesis => EntryType::MastersThesis,
1309            Self::PhdThesis => EntryType::PhdThesis,
1310            Self::Thesis => EntryType::Thesis,
1311            Self::TechReport => EntryType::TechReport,
1312            Self::Report => EntryType::Report,
1313            Self::Patent => EntryType::Patent,
1314            Self::Periodical => EntryType::Periodical,
1315            Self::Online => EntryType::Online,
1316            Self::Software => EntryType::Software,
1317            Self::Dataset => EntryType::Dataset,
1318            Self::Set => EntryType::Set,
1319            Self::XData => EntryType::XData,
1320            Self::Unpublished => EntryType::Unpublished,
1321            Self::Misc => EntryType::Misc,
1322        }
1323    }
1324}
1325
1326#[inline]
1327const fn ascii_lower(byte: u8) -> u8 {
1328    if b'A' <= byte && byte <= b'Z' {
1329        byte + (b'a' - b'A')
1330    } else {
1331        byte
1332    }
1333}
1334
1335#[inline]
1336fn eq_ascii_lower(input: &[u8], expected: &[u8]) -> bool {
1337    if input.len() != expected.len() {
1338        return false;
1339    }
1340
1341    let mut index = 0usize;
1342    while index < input.len() {
1343        if ascii_lower(input[index]) != expected[index] {
1344            return false;
1345        }
1346        index += 1;
1347    }
1348
1349    true
1350}
1351
1352impl fmt::Display for EntryType<'_> {
1353    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1354        f.write_str(self.canonical_name())
1355    }
1356}
1357
1358/// A field in a BibTeX entry
1359#[derive(Debug, Clone, PartialEq)]
1360pub struct Field<'a> {
1361    /// Field name
1362    pub name: Cow<'a, str>,
1363    /// Field value
1364    pub value: Value<'a>,
1365}
1366
1367impl<'a> Field<'a> {
1368    /// Create a new field
1369    #[must_use]
1370    pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1371        Self {
1372            name: Cow::Borrowed(name),
1373            value,
1374        }
1375    }
1376
1377    /// Check if field name matches (case-insensitive)
1378    #[must_use]
1379    pub fn name_eq_ignore_case(&self, name: &str) -> bool {
1380        self.name.eq_ignore_ascii_case(name)
1381    }
1382
1383    /// Convert to owned version
1384    #[must_use]
1385    pub fn into_owned(self) -> Field<'static> {
1386        Field {
1387            name: Cow::Owned(self.name.into_owned()),
1388            value: self.value.into_owned(),
1389        }
1390    }
1391}
1392
1393/// A value in a BibTeX field
1394///
1395/// # Memory Optimization
1396/// Concatenation parts are stored out of line so the common literal, number,
1397/// and variable variants stay compact.
1398#[derive(Debug, Clone, PartialEq)]
1399pub enum Value<'a> {
1400    /// String literal
1401    Literal(Cow<'a, str>),
1402    /// Number literal
1403    Number(i64),
1404    /// Concatenated values (boxed to reduce enum size)
1405    Concat(Box<[Self]>),
1406    /// Variable reference
1407    Variable(Cow<'a, str>),
1408}
1409
1410impl Default for Value<'_> {
1411    fn default() -> Self {
1412        Self::Number(0)
1413    }
1414}
1415
1416impl Value<'_> {
1417    /// Get the value as a string (if it's a simple literal)
1418    #[must_use]
1419    pub fn as_str(&self) -> Option<&str> {
1420        match self {
1421            Self::Literal(s) => Some(s),
1422            _ => None,
1423        }
1424    }
1425
1426    /// Expand variables and concatenations to get final string
1427    #[must_use]
1428    pub fn expand(&self, strings: &AHashMap<&str, Value>) -> String {
1429        match self {
1430            Self::Literal(s) => normalize_text_projection(s),
1431            Self::Number(n) => n.to_string(),
1432            Self::Variable(name) => strings
1433                .get(name.as_ref())
1434                .map_or_else(|| format!("{{undefined:{name}}}"), |v| v.expand(strings)),
1435            Self::Concat(parts) => parts.iter().map(|p| p.expand(strings)).collect::<String>(),
1436        }
1437    }
1438
1439    /// Project this value to ordinary text without adding unresolved-variable markers.
1440    ///
1441    /// Literals and numbers become their text form, variables become their
1442    /// variable name, and concatenations are joined recursively. This allocates
1443    /// when the value is not already a simple borrowed literal.
1444    #[must_use]
1445    pub fn to_plain_string(&self) -> String {
1446        value_to_plain_string(self)
1447    }
1448
1449    /// Project this value to display text, marking unresolved variables as `{name}`.
1450    #[must_use]
1451    pub fn to_lossy_string(&self) -> String {
1452        value_to_lossy_string(self)
1453    }
1454
1455    /// Create a literal value from ordinary text.
1456    #[must_use]
1457    pub fn from_plain_string<'a>(text: impl Into<Cow<'a, str>>) -> Value<'a> {
1458        Value::Literal(text.into())
1459    }
1460
1461    /// Parse a BibTeX value fragment into a structured value.
1462    ///
1463    /// The input should be a field-value fragment such as `{Title}`, `venue`, or
1464    /// `"Part " # suffix`. Leading and trailing whitespace are accepted, but
1465    /// any other trailing text is rejected.
1466    pub fn from_bibtex_source(source: &str) -> crate::Result<Value<'_>> {
1467        let mut input = source;
1468        crate::parser::lexer::skip_whitespace(&mut input);
1469        let value = crate::parser::value::parse_value(&mut input)
1470            .map_err(|_| crate::Error::WinnowError("invalid BibTeX value source".to_string()))?;
1471        crate::parser::lexer::skip_whitespace(&mut input);
1472        if input.is_empty() {
1473            Ok(value)
1474        } else {
1475            Err(crate::Error::WinnowError(format!(
1476                "trailing text after BibTeX value: {input:?}"
1477            )))
1478        }
1479    }
1480
1481    /// Serialize this value as a BibTeX value fragment.
1482    ///
1483    /// This is a normalized source projection. Use source-preserving parsing
1484    /// when callers need the exact original spelling or delimiters.
1485    #[must_use]
1486    pub fn to_bibtex_source(&self) -> String {
1487        match self {
1488            Self::Literal(text) => literal_to_bibtex_source(text),
1489            Self::Number(number) => number.to_string(),
1490            Self::Variable(name) => name.to_string(),
1491            Self::Concat(parts) => parts
1492                .iter()
1493                .map(Self::to_bibtex_source)
1494                .collect::<Vec<_>>()
1495                .join(" # "),
1496        }
1497    }
1498
1499    /// Project this value to ordinary text and convert common LaTeX sequences to Unicode.
1500    #[cfg(feature = "latex_to_unicode")]
1501    #[must_use]
1502    pub fn to_unicode_plain_string(&self) -> String {
1503        crate::latex_unicode::latex_to_unicode(&self.to_plain_string())
1504    }
1505
1506    /// Convert to owned version
1507    #[must_use]
1508    pub fn into_owned(self) -> Value<'static> {
1509        match self {
1510            Self::Literal(s) => Value::Literal(Cow::Owned(s.into_owned())),
1511            Self::Number(n) => Value::Number(n),
1512            Self::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
1513            Self::Concat(parts) => Value::Concat(
1514                parts
1515                    .into_vec()
1516                    .into_iter()
1517                    .map(Value::into_owned)
1518                    .collect::<Vec<_>>()
1519                    .into_boxed_slice(),
1520            ),
1521        }
1522    }
1523}
1524
1525fn literal_to_bibtex_source(text: &str) -> String {
1526    if is_balanced_braced_literal_content(text) {
1527        format!("{{{text}}}")
1528    } else {
1529        format!("\"{}\"", escape_quoted_literal(text))
1530    }
1531}
1532
1533fn is_balanced_braced_literal_content(text: &str) -> bool {
1534    let bytes = text.as_bytes();
1535    let mut depth = 0usize;
1536    let mut pos = 0usize;
1537
1538    while let Some(offset) = memchr2(b'{', b'}', &bytes[pos..]) {
1539        let idx = pos + offset;
1540        if is_escaped_delimiter(bytes, idx) {
1541            pos = idx + 1;
1542            continue;
1543        }
1544
1545        match bytes[idx] {
1546            b'{' => depth += 1,
1547            b'}' => {
1548                let Some(new_depth) = depth.checked_sub(1) else {
1549                    return false;
1550                };
1551                depth = new_depth;
1552            }
1553            _ => unreachable!(),
1554        }
1555        pos = idx + 1;
1556    }
1557
1558    depth == 0
1559}
1560
1561fn is_escaped_delimiter(input: &[u8], delimiter: usize) -> bool {
1562    if delimiter == 0 || input[delimiter - 1] != b'\\' {
1563        return false;
1564    }
1565
1566    let mut slash_count = 0usize;
1567    let mut pos = delimiter;
1568    while pos > 0 && input[pos - 1] == b'\\' {
1569        slash_count += 1;
1570        pos -= 1;
1571    }
1572    slash_count % 2 == 1
1573}
1574
1575fn escape_quoted_literal(text: &str) -> String {
1576    text.replace('"', "\\\"")
1577}
1578
1579impl fmt::Display for Value<'_> {
1580    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1581        match self {
1582            Self::Literal(s) => write!(f, "{s}"),
1583            Self::Number(n) => write!(f, "{n}"),
1584            Self::Variable(name) => write!(f, "{{{name}}}"),
1585            Self::Concat(parts) => {
1586                for (i, part) in parts.iter().enumerate() {
1587                    if i > 0 {
1588                        write!(f, " # ")?;
1589                    }
1590                    write!(f, "{part}")?;
1591                }
1592                Ok(())
1593            }
1594        }
1595    }
1596}
1597
1598fn value_to_lossy_string(value: &Value<'_>) -> String {
1599    match value {
1600        Value::Literal(s) => normalize_text_projection(s),
1601        Value::Number(n) => n.to_string(),
1602        Value::Variable(v) => format!("{{{v}}}"),
1603        Value::Concat(parts) => parts.iter().map(value_to_lossy_string).collect(),
1604    }
1605}
1606
1607fn value_to_plain_string(value: &Value<'_>) -> String {
1608    match value {
1609        Value::Literal(text) => normalize_text_projection(text),
1610        Value::Number(number) => number.to_string(),
1611        Value::Variable(name) => name.to_string(),
1612        Value::Concat(parts) => parts.iter().map(value_to_plain_string).collect(),
1613    }
1614}
1615
1616pub(crate) fn normalize_text_projection(text: &str) -> String {
1617    if !text
1618        .as_bytes()
1619        .iter()
1620        .any(|byte| matches!(byte, b'\n' | b'\r'))
1621    {
1622        return text.to_string();
1623    }
1624
1625    let mut normalized = String::with_capacity(text.len());
1626    let mut chars = text.chars().peekable();
1627    while let Some(ch) = chars.next() {
1628        match ch {
1629            '\r' => {
1630                if chars.peek() == Some(&'\n') {
1631                    chars.next();
1632                }
1633                normalized.push('\n');
1634                while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1635                    chars.next();
1636                }
1637            }
1638            '\n' => {
1639                normalized.push('\n');
1640                while chars.peek().is_some_and(|next| matches!(next, ' ' | '\t')) {
1641                    chars.next();
1642                }
1643            }
1644            _ => normalized.push(ch),
1645        }
1646    }
1647    normalized
1648}
1649
1650/// Normalize a DOI from common raw forms into lowercase `10.x/...` form.
1651#[must_use]
1652pub fn normalize_doi(input: &str) -> Option<String> {
1653    let mut doi = input.trim();
1654    if doi.is_empty() {
1655        return None;
1656    }
1657
1658    for prefix in [
1659        "https://doi.org/",
1660        "http://doi.org/",
1661        "https://dx.doi.org/",
1662        "http://dx.doi.org/",
1663        "doi:",
1664        "DOI:",
1665    ] {
1666        if let Some(stripped) = doi.strip_prefix(prefix) {
1667            doi = stripped.trim();
1668            break;
1669        }
1670    }
1671
1672    let doi = doi.trim_end_matches(['.', ',', ';']);
1673    if doi.len() > 3 && doi.starts_with("10.") && doi.contains('/') {
1674        Some(doi.to_ascii_lowercase())
1675    } else {
1676        None
1677    }
1678}
1679
1680fn resource_field_from_parts(
1681    field_name: &str,
1682    value: String,
1683    archive_prefix: Option<&str>,
1684) -> Option<ResourceField> {
1685    let mut kind = classify_resource_field(field_name)?;
1686    if kind == ResourceKind::Eprint
1687        && archive_prefix.is_some_and(|prefix| prefix.eq_ignore_ascii_case("arxiv"))
1688    {
1689        kind = ResourceKind::Arxiv;
1690    }
1691    let normalized = normalize_resource_value(kind, &value);
1692    Some(ResourceField {
1693        kind,
1694        field_name: field_name.to_string(),
1695        value,
1696        normalized,
1697    })
1698}
1699
1700fn normalize_resource_value(kind: ResourceKind, value: &str) -> Option<String> {
1701    let trimmed = value.trim();
1702    if trimmed.is_empty() {
1703        return None;
1704    }
1705
1706    match kind {
1707        ResourceKind::Doi => normalize_doi(trimmed),
1708        ResourceKind::Pmid => normalize_ascii_digits(trimmed),
1709        ResourceKind::Pmcid => Some(normalize_pmcid(trimmed)),
1710        ResourceKind::Isbn => normalize_isbn(trimmed),
1711        ResourceKind::Issn => normalize_issn(trimmed),
1712        ResourceKind::Arxiv => Some(normalize_arxiv(trimmed)),
1713        ResourceKind::File | ResourceKind::Url | ResourceKind::Eprint | ResourceKind::Crossref => {
1714            Some(trimmed.to_string())
1715        }
1716    }
1717}
1718
1719fn normalize_ascii_digits(input: &str) -> Option<String> {
1720    let compact = input.trim();
1721    compact
1722        .chars()
1723        .all(|ch| ch.is_ascii_digit())
1724        .then(|| compact.to_string())
1725}
1726
1727fn normalize_pmcid(input: &str) -> String {
1728    let compact = input
1729        .trim()
1730        .trim_start_matches("pmcid:")
1731        .trim_start_matches("PMCID:")
1732        .trim();
1733    if compact
1734        .get(..3)
1735        .is_some_and(|prefix| prefix.eq_ignore_ascii_case("pmc"))
1736    {
1737        compact.to_ascii_uppercase()
1738    } else {
1739        format!("PMC{compact}")
1740    }
1741}
1742
1743fn normalize_isbn(input: &str) -> Option<String> {
1744    let compact = input
1745        .chars()
1746        .filter(|ch| !matches!(ch, '-' | ' '))
1747        .collect::<String>()
1748        .to_ascii_uppercase();
1749    is_valid_isbn_shape(&compact).then_some(compact)
1750}
1751
1752fn normalize_issn(input: &str) -> Option<String> {
1753    let compact = input
1754        .chars()
1755        .filter(|ch| !matches!(ch, '-' | ' '))
1756        .collect::<String>()
1757        .to_ascii_uppercase();
1758    (compact.len() == 8
1759        && compact
1760            .chars()
1761            .enumerate()
1762            .all(|(index, ch)| ch.is_ascii_digit() || (index == 7 && ch == 'X')))
1763    .then_some(compact)
1764}
1765
1766fn normalize_arxiv(input: &str) -> String {
1767    input
1768        .trim()
1769        .trim_start_matches("arXiv:")
1770        .trim_start_matches("arxiv:")
1771        .trim()
1772        .to_string()
1773}
1774
1775fn trim_bibtex_scalar(input: &str) -> &str {
1776    let mut value = input.trim();
1777    loop {
1778        let trimmed = value.trim();
1779        if trimmed.len() >= 2
1780            && ((trimmed.starts_with('{') && trimmed.ends_with('}'))
1781                || (trimmed.starts_with('"') && trimmed.ends_with('"')))
1782        {
1783            value = trimmed[1..trimmed.len() - 1].trim();
1784        } else {
1785            return trimmed;
1786        }
1787    }
1788}
1789
1790fn parse_year(input: &str) -> std::result::Result<i32, DateParseError> {
1791    let input = input.trim();
1792    if input.len() != 4 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1793        return Err(DateParseError::InvalidYear);
1794    }
1795    input
1796        .parse::<i32>()
1797        .map_err(|_| DateParseError::InvalidYear)
1798}
1799
1800fn parse_month_number(input: &str) -> Option<u8> {
1801    let normalized = trim_bibtex_scalar(input).to_ascii_lowercase();
1802    if normalized.is_empty() {
1803        return None;
1804    }
1805
1806    if let Ok(month) = normalized.parse::<u8>() {
1807        return (1..=12).contains(&month).then_some(month);
1808    }
1809
1810    match normalized.as_str() {
1811        "jan" | "january" => Some(1),
1812        "feb" | "february" => Some(2),
1813        "mar" | "march" => Some(3),
1814        "apr" | "april" => Some(4),
1815        "may" => Some(5),
1816        "jun" | "june" => Some(6),
1817        "jul" | "july" => Some(7),
1818        "aug" | "august" => Some(8),
1819        "sep" | "sept" | "september" => Some(9),
1820        "oct" | "october" => Some(10),
1821        "nov" | "november" => Some(11),
1822        "dec" | "december" => Some(12),
1823        _ => None,
1824    }
1825}
1826
1827fn parse_day_number(input: &str, year: i32, month: u8) -> std::result::Result<u8, DateParseError> {
1828    let input = input.trim();
1829    if input.is_empty() || input.len() > 2 || !input.chars().all(|ch| ch.is_ascii_digit()) {
1830        return Err(DateParseError::InvalidDay);
1831    }
1832    let day = input
1833        .parse::<u8>()
1834        .map_err(|_| DateParseError::InvalidDay)?;
1835    (1..=days_in_month(year, month))
1836        .contains(&day)
1837        .then_some(day)
1838        .ok_or(DateParseError::InvalidDay)
1839}
1840
1841const fn days_in_month(year: i32, month: u8) -> u8 {
1842    match month {
1843        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1844        4 | 6 | 9 | 11 => 30,
1845        2 if is_leap_year(year) => 29,
1846        2 => 28,
1847        _ => 0,
1848    }
1849}
1850
1851const fn is_leap_year(year: i32) -> bool {
1852    (year % 4 == 0 && year % 100 != 0) || year % 400 == 0
1853}
1854
1855fn is_valid_isbn_shape(isbn: &str) -> bool {
1856    let compact: String = isbn.chars().filter(|c| !matches!(c, '-' | ' ')).collect();
1857
1858    match compact.len() {
1859        10 => compact
1860            .chars()
1861            .enumerate()
1862            .all(|(index, ch)| ch.is_ascii_digit() || (index == 9 && matches!(ch, 'x' | 'X'))),
1863        13 => compact.chars().all(|ch| ch.is_ascii_digit()),
1864        _ => false,
1865    }
1866}
1867
1868fn split_bibtex_names(input: &str) -> Vec<&str> {
1869    let mut names = Vec::new();
1870    let mut start = 0;
1871    let mut depth = 0usize;
1872    let mut iter = input.char_indices().peekable();
1873
1874    while let Some((index, ch)) = iter.next() {
1875        match ch {
1876            '{' => depth += 1,
1877            '}' => depth = depth.saturating_sub(1),
1878            'a' | 'A' if depth == 0 && starts_name_separator(input, index) => {
1879                let candidate = input[start..index].trim();
1880                if !candidate.is_empty() {
1881                    names.push(candidate);
1882                }
1883                start = index + 3;
1884                while input[start..]
1885                    .chars()
1886                    .next()
1887                    .is_some_and(char::is_whitespace)
1888                {
1889                    start += input[start..].chars().next().map_or(0, char::len_utf8);
1890                }
1891                while iter
1892                    .peek()
1893                    .is_some_and(|(_, next_ch)| next_ch.is_whitespace())
1894                {
1895                    iter.next();
1896                }
1897            }
1898            _ => {}
1899        }
1900    }
1901
1902    let candidate = input[start..].trim();
1903    if !candidate.is_empty() {
1904        names.push(candidate);
1905    }
1906
1907    names
1908}
1909
1910fn starts_name_separator(input: &str, index: usize) -> bool {
1911    let tail = &input[index..];
1912    let Some(rest) = tail.get(..3) else {
1913        return false;
1914    };
1915    if !rest.eq_ignore_ascii_case("and") {
1916        return false;
1917    }
1918
1919    let before_is_boundary = input[..index]
1920        .chars()
1921        .next_back()
1922        .map_or(true, char::is_whitespace);
1923    let after_is_boundary = tail[3..].chars().next().map_or(true, char::is_whitespace);
1924
1925    before_is_boundary && after_is_boundary
1926}
1927
1928fn parse_single_name(input: &str) -> PersonName {
1929    let raw = input.trim();
1930    if let Some(literal) = braced_literal_name(raw) {
1931        return person_name(
1932            raw,
1933            String::new(),
1934            String::new(),
1935            literal.clone(),
1936            String::new(),
1937            Some(literal),
1938        );
1939    }
1940
1941    let parts = split_top_level_commas(input);
1942    match parts.as_slice() {
1943        [last] => parse_first_von_last(last),
1944        [last, first] => {
1945            let (von, last) = split_von_last(last);
1946            person_name(
1947                raw,
1948                normalize_name_part(first),
1949                von,
1950                last,
1951                String::new(),
1952                None,
1953            )
1954        }
1955        [last, jr, first, ..] => {
1956            let (von, last) = split_von_last(last);
1957            person_name(
1958                raw,
1959                normalize_name_part(first),
1960                von,
1961                last,
1962                normalize_name_part(jr),
1963                None,
1964            )
1965        }
1966        [] => empty_person_name(raw),
1967    }
1968}
1969
1970fn parse_first_von_last(input: &str) -> PersonName {
1971    let raw = input.trim();
1972    let words = split_name_words(input);
1973    match words.len() {
1974        0 => empty_person_name(raw),
1975        1 => person_name(
1976            raw,
1977            String::new(),
1978            String::new(),
1979            normalize_name_part(words[0]),
1980            String::new(),
1981            None,
1982        ),
1983        _ => {
1984            let von_start = words
1985                .iter()
1986                .position(|word| starts_with_lowercase_letter(word));
1987            let (first, von, last) = von_start.map_or_else(
1988                || {
1989                    (
1990                        join_name_words(&words[..words.len() - 1]),
1991                        String::new(),
1992                        normalize_name_part(words[words.len() - 1]),
1993                    )
1994                },
1995                |von_start| {
1996                    let last_start = words[von_start + 1..]
1997                        .iter()
1998                        .position(|word| !starts_with_lowercase_letter(word))
1999                        .map_or(words.len() - 1, |offset| von_start + 1 + offset);
2000
2001                    (
2002                        join_name_words(&words[..von_start]),
2003                        join_name_words(&words[von_start..last_start]),
2004                        join_name_words(&words[last_start..]),
2005                    )
2006                },
2007            );
2008
2009            person_name(raw, first, von, last, String::new(), None)
2010        }
2011    }
2012}
2013
2014fn person_name(
2015    raw: &str,
2016    first: String,
2017    von: String,
2018    last: String,
2019    jr: String,
2020    literal: Option<String>,
2021) -> PersonName {
2022    let given = split_component_tokens(&first);
2023    let family = split_component_tokens(&last);
2024    let prefix = split_component_tokens(&von);
2025    let suffix = split_component_tokens(&jr);
2026    PersonName {
2027        raw: raw.to_string(),
2028        first,
2029        von,
2030        last,
2031        jr,
2032        given,
2033        family,
2034        prefix,
2035        suffix,
2036        literal,
2037    }
2038}
2039
2040fn empty_person_name(raw: &str) -> PersonName {
2041    person_name(
2042        raw,
2043        String::new(),
2044        String::new(),
2045        String::new(),
2046        String::new(),
2047        None,
2048    )
2049}
2050
2051fn split_component_tokens(input: &str) -> Vec<String> {
2052    split_name_words(input)
2053        .into_iter()
2054        .map(normalize_name_part)
2055        .filter(|part| !part.is_empty())
2056        .collect()
2057}
2058
2059fn split_von_last(input: &str) -> (String, String) {
2060    let words = split_name_words(input);
2061    if words.is_empty() {
2062        return (String::new(), String::new());
2063    }
2064
2065    if let Some(last_start) = words
2066        .iter()
2067        .rposition(|word| starts_with_lowercase_letter(word))
2068    {
2069        if last_start + 1 < words.len() {
2070            return (
2071                join_name_words(&words[..=last_start]),
2072                join_name_words(&words[last_start + 1..]),
2073            );
2074        }
2075    }
2076
2077    if words.len() == 1 {
2078        (String::new(), normalize_name_part(words[0]))
2079    } else {
2080        (
2081            join_name_words(&words[..words.len() - 1]),
2082            normalize_name_part(words[words.len() - 1]),
2083        )
2084    }
2085}
2086
2087fn split_top_level_commas(input: &str) -> Vec<&str> {
2088    let mut parts = Vec::new();
2089    let mut start = 0;
2090    let mut depth = 0usize;
2091
2092    for (index, ch) in input.char_indices() {
2093        match ch {
2094            '{' => depth += 1,
2095            '}' => depth = depth.saturating_sub(1),
2096            ',' if depth == 0 => {
2097                parts.push(input[start..index].trim());
2098                start = index + 1;
2099            }
2100            _ => {}
2101        }
2102    }
2103
2104    parts.push(input[start..].trim());
2105    parts
2106}
2107
2108fn split_name_words(input: &str) -> Vec<&str> {
2109    let mut words = Vec::new();
2110    let mut start = None;
2111    let mut depth = 0usize;
2112
2113    for (index, ch) in input.char_indices() {
2114        match ch {
2115            '{' => {
2116                depth += 1;
2117                start.get_or_insert(index);
2118            }
2119            '}' => {
2120                depth = depth.saturating_sub(1);
2121            }
2122            ch if ch.is_whitespace() && depth == 0 => {
2123                if let Some(word_start) = start.take() {
2124                    words.push(input[word_start..index].trim());
2125                }
2126            }
2127            _ => {
2128                start.get_or_insert(index);
2129            }
2130        }
2131    }
2132
2133    if let Some(word_start) = start {
2134        words.push(input[word_start..].trim());
2135    }
2136
2137    words.into_iter().filter(|word| !word.is_empty()).collect()
2138}
2139
2140fn join_name_words(words: &[&str]) -> String {
2141    words
2142        .iter()
2143        .map(|word| normalize_name_part(word))
2144        .filter(|word| !word.is_empty())
2145        .collect::<Vec<_>>()
2146        .join(" ")
2147}
2148
2149fn normalize_name_part(input: &str) -> String {
2150    let trimmed = input.trim();
2151    if trimmed.len() >= 2 && trimmed.starts_with('{') && trimmed.ends_with('}') {
2152        trimmed[1..trimmed.len() - 1].trim().to_string()
2153    } else {
2154        trimmed.to_string()
2155    }
2156}
2157
2158fn braced_literal_name(input: &str) -> Option<String> {
2159    let trimmed = input.trim();
2160    if trimmed.len() < 2 || !trimmed.starts_with('{') || !trimmed.ends_with('}') {
2161        return None;
2162    }
2163
2164    let mut depth = 0usize;
2165    for (index, ch) in trimmed.char_indices() {
2166        match ch {
2167            '{' => depth += 1,
2168            '}' => {
2169                depth = depth.saturating_sub(1);
2170                if depth == 0 && index != trimmed.len() - 1 {
2171                    return None;
2172                }
2173            }
2174            _ => {}
2175        }
2176    }
2177
2178    (depth == 0).then(|| normalize_name_part(trimmed))
2179}
2180
2181fn starts_with_lowercase_letter(input: &str) -> bool {
2182    normalize_name_part(input)
2183        .chars()
2184        .find(|ch| ch.is_alphabetic())
2185        .is_some_and(char::is_lowercase)
2186}
2187
2188/// Check if a string is a valid page range
2189/// Accepts formats like "12", "12-34", "12--34", "12-34,45-67"
2190fn is_valid_page_range(pages: &str) -> bool {
2191    if pages.trim().is_empty() {
2192        return false;
2193    }
2194
2195    // Accept single page numbers
2196    if pages.chars().all(|c| c.is_ascii_digit()) {
2197        return true;
2198    }
2199
2200    // Check for range patterns - must contain dash or comma
2201    if !pages.contains('-') && !pages.contains(',') {
2202        return false;
2203    }
2204
2205    // Split by comma for multiple ranges
2206    for range in pages.split(',') {
2207        let range = range.trim();
2208        if range.is_empty() {
2209            continue;
2210        }
2211
2212        // Check individual range
2213        if range.contains("--") {
2214            // LaTeX-style double dash
2215            let parts: Vec<&str> = range.split("--").collect();
2216            if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2217                return false;
2218            }
2219        } else if range.contains('-') {
2220            // Single dash
2221            let parts: Vec<&str> = range.split('-').collect();
2222            if parts.len() != 2 || parts.iter().any(|p| p.trim().is_empty()) {
2223                return false;
2224            }
2225        }
2226    }
2227
2228    true
2229}
2230
2231/// Check if a month value is valid
2232/// Accepts standard month abbreviations and full month names
2233fn is_valid_month(month: &str) -> bool {
2234    let month_lower = month.to_lowercase();
2235
2236    // Standard BibTeX month abbreviations and full names
2237    matches!(
2238        month_lower.as_str(),
2239        "jan"
2240            | "feb"
2241            | "mar"
2242            | "apr"
2243            | "may"
2244            | "jun"
2245            | "jul"
2246            | "aug"
2247            | "sep"
2248            | "oct"
2249            | "nov"
2250            | "dec"
2251            | "january"
2252            | "february"
2253            | "march"
2254            | "april"
2255            | "june"
2256            | "july"
2257            | "august"
2258            | "september"
2259            | "october"
2260            | "november"
2261            | "december"
2262    ) || month.parse::<i32>().is_ok_and(|m| (1..=12).contains(&m))
2263}