Skip to main content

bibtex_parser/
database.rs

1//! BibTeX library representation
2
3use crate::{
4    normalize_doi, Entry, Error, Result, SourceSpan, ValidationError, ValidationLevel, Value,
5};
6use ahash::AHashMap;
7use memchr::memchr;
8use std::borrow::Cow;
9use std::ops::Deref;
10use std::path::Path;
11
12#[cfg(feature = "parallel")]
13use rayon::prelude::*;
14
15const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
16const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
17const CONCAT_CACHE_LIMIT: usize = 16;
18
19enum ExpansionCache<'a> {
20    Small(Vec<(Cow<'a, str>, Value<'a>)>),
21    Large(AHashMap<Cow<'a, str>, Value<'a>>),
22}
23
24impl<'a> ExpansionCache<'a> {
25    fn with_capacity(capacity: usize) -> Self {
26        if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
27            Self::Small(Vec::with_capacity(capacity))
28        } else {
29            Self::Large(AHashMap::with_capacity(capacity))
30        }
31    }
32
33    fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
34        match self {
35            Self::Small(entries) => {
36                let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
37                if index != 0 {
38                    entries.swap(0, index);
39                }
40                Some(entries[0].1.clone())
41            }
42            Self::Large(entries) => entries.get(name).cloned(),
43        }
44    }
45
46    fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
47        match self {
48            Self::Small(entries) => {
49                if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
50                    entries.push((name, value));
51                } else {
52                    let mut large = AHashMap::with_capacity(entries.len() + 1);
53                    for (key, value) in entries.drain(..) {
54                        large.insert(key, value);
55                    }
56                    large.insert(name, value);
57                    *self = Self::Large(large);
58                }
59            }
60            Self::Large(entries) => {
61                entries.insert(name, value);
62            }
63        }
64    }
65}
66
67struct ConcatCache<'a> {
68    entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
69}
70
71impl<'a> ConcatCache<'a> {
72    const fn new() -> Self {
73        Self {
74            entries: Vec::new(),
75        }
76    }
77
78    fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
79        let index = self
80            .entries
81            .iter()
82            .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
83        if index != 0 {
84            self.entries.swap(0, index);
85        }
86        Some(self.entries[0].1.clone())
87    }
88
89    fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
90        if self.entries.len() < CONCAT_CACHE_LIMIT {
91            self.entries.push((parts, value));
92        }
93    }
94}
95
96fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
97    left.len() == right.len()
98        && left
99            .iter()
100            .zip(right)
101            .all(|(left, right)| cache_values_equal(left, right))
102}
103
104fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
105    match (left, right) {
106        (Value::Literal(left), Value::Literal(right))
107        | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
108        (Value::Number(left), Value::Number(right)) => left == right,
109        (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
110        _ => false,
111    }
112}
113
114/// Get month expansion for a given abbreviation (case-insensitive)
115///
116/// Returns None if the name is not a recognized month abbreviation.
117/// This is used as a fallback when user-defined string variables are not found.
118#[inline]
119fn get_month_expansion(name: &str) -> Option<&'static str> {
120    let bytes = name.as_bytes();
121    if bytes.len() != 3 {
122        return None;
123    }
124
125    let key = (u32::from(bytes[0] | 0x20) << 16)
126        | (u32::from(bytes[1] | 0x20) << 8)
127        | u32::from(bytes[2] | 0x20);
128
129    match key {
130        0x6a_61_6e => Some("January"),
131        0x66_65_62 => Some("February"),
132        0x6d_61_72 => Some("March"),
133        0x61_70_72 => Some("April"),
134        0x6d_61_79 => Some("May"),
135        0x6a_75_6e => Some("June"),
136        0x6a_75_6c => Some("July"),
137        0x61_75_67 => Some("August"),
138        0x73_65_70 => Some("September"),
139        0x6f_63_74 => Some("October"),
140        0x6e_6f_76 => Some("November"),
141        0x64_65_63 => Some("December"),
142        _ => None,
143    }
144}
145
146#[inline]
147fn get_string_value<'map, 'a>(
148    strings: &'map [StringDefinition<'a>],
149    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
150    name: &str,
151) -> Option<&'map Value<'a>> {
152    get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
153}
154
155#[inline]
156fn get_string_definition<'map, 'a>(
157    strings: &'map [StringDefinition<'a>],
158    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
159    name: &str,
160) -> Option<&'map StringDefinition<'a>> {
161    if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
162        strings
163            .iter()
164            .rev()
165            .find(|definition| definition.name.as_ref() == name)
166    } else {
167        string_lookup
168            .get(name)
169            .and_then(|&index| strings.get(index))
170    }
171}
172
173#[inline]
174fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
175    strings
176        .iter()
177        .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
178}
179
180/// Check if a value contains any variables
181#[inline]
182fn contains_variables(value: &Value) -> bool {
183    match value {
184        Value::Variable(_) => true,
185        Value::Concat(parts) => parts.iter().any(contains_variables),
186        _ => false,
187    }
188}
189
190/// Check if a value contains variables that might be month constants
191#[inline]
192fn contains_potential_month_variables(value: &Value) -> bool {
193    match value {
194        Value::Variable(name) => get_month_expansion(name).is_some(),
195        Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
196        _ => false,
197    }
198}
199
200#[inline]
201const fn is_identifier_char(byte: u8) -> bool {
202    matches!(
203        byte,
204        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
205    )
206}
207
208#[inline]
209fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
210    if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
211        return false;
212    }
213
214    for (offset, &expected) in keyword.iter().enumerate() {
215        if (input[offset + 1] | 0x20) != expected {
216            return false;
217        }
218    }
219
220    if input.len() == keyword.len() + 1 {
221        return true;
222    }
223
224    !is_identifier_char(input[keyword.len() + 1])
225}
226
227#[derive(Debug, Clone, Copy)]
228struct InputScan {
229    may_contain_string_definition: bool,
230    at_count: usize,
231}
232
233/// Fast pre-scan to detect `@string` entries and estimate block capacity.
234fn scan_input(input: &str) -> InputScan {
235    let bytes = input.as_bytes();
236    let mut pos = 0;
237    let mut at_count = 0;
238    let mut may_contain_string_definition = false;
239
240    while pos < bytes.len() {
241        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
242            let at = pos + offset;
243            at_count += 1;
244            if starts_with_at_keyword(&bytes[at..], b"string") {
245                may_contain_string_definition = true;
246            }
247            pos = at + 1;
248        } else {
249            break;
250        }
251    }
252
253    InputScan {
254        may_contain_string_definition,
255        at_count,
256    }
257}
258
259/// Detect whether a `@string` may appear after a regular entry.
260///
261/// False positives are acceptable (we take the conservative slow path), but
262/// false negatives would be incorrect, so keyword matching mirrors parser rules.
263fn input_may_have_late_string_definition(input: &str) -> bool {
264    let bytes = input.as_bytes();
265    let mut pos = 0;
266    let mut saw_regular_entry = false;
267
268    while pos < bytes.len() {
269        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
270            let at = pos + offset;
271            let tail = &bytes[at..];
272
273            if starts_with_at_keyword(tail, b"string") {
274                if saw_regular_entry {
275                    return true;
276                }
277            } else if !saw_regular_entry
278                && !starts_with_at_keyword(tail, b"preamble")
279                && !starts_with_at_keyword(tail, b"comment")
280            {
281                // Anything else that looks like `@<identifier>` is treated as a regular entry.
282                saw_regular_entry = true;
283            }
284
285            pos = at + 1;
286        } else {
287            break;
288        }
289    }
290
291    false
292}
293
294fn source_span(input: &str, byte_start: usize, byte_end: usize) -> SourceSpan {
295    let (line, column) = source_position(input, byte_start);
296    SourceSpan::new(byte_start, byte_end, line, column)
297}
298
299fn source_position(input: &str, pos: usize) -> (usize, usize) {
300    let mut line = 1;
301    let mut column = 1;
302
303    for (byte_index, ch) in input.char_indices() {
304        if byte_index >= pos {
305            break;
306        }
307        if ch == '\n' {
308            line += 1;
309            column = 1;
310        } else {
311            column += 1;
312        }
313    }
314
315    (line, column)
316}
317
318fn next_recovery_boundary(input: &str, start: usize) -> usize {
319    let bytes = input.as_bytes();
320    let mut pos = start.saturating_add(1);
321    while pos < bytes.len() {
322        if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
323            return pos;
324        }
325        pos += 1;
326    }
327    input.len()
328}
329
330fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
331    let line_start = bytes[..pos]
332        .iter()
333        .rposition(|byte| matches!(byte, b'\n' | b'\r'))
334        .map_or(0, |index| index + 1);
335
336    bytes[line_start..pos]
337        .iter()
338        .all(|byte| matches!(byte, b' ' | b'\t'))
339}
340
341/// Parser configuration.
342#[derive(Debug, Default, Clone)]
343pub struct Parser {
344    threads: Option<usize>,
345    tolerant: bool,
346    capture_source: bool,
347}
348
349impl Parser {
350    /// Create a new parser.
351    #[must_use]
352    #[inline]
353    pub fn new() -> Self {
354        Self::default()
355    }
356
357    /// Set number of threads (None = use all available)
358    #[must_use]
359    #[inline]
360    pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
361        self.threads = threads.into();
362        self
363    }
364
365    /// Continue after malformed blocks and collect diagnostics.
366    #[must_use]
367    #[inline]
368    pub const fn tolerant(mut self) -> Self {
369        self.tolerant = true;
370        self
371    }
372
373    /// Capture source spans for blocks.
374    #[must_use]
375    #[inline]
376    pub const fn capture_source(mut self) -> Self {
377        self.capture_source = true;
378        self
379    }
380
381    /// Parse a single input string.
382    #[inline]
383    pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
384        if self.tolerant {
385            Library::parse_tolerant(input, self.capture_source)
386        } else if self.capture_source {
387            Library::parse_with_spans(input)
388        } else {
389            Library::parse_sequential(input)
390        }
391    }
392
393    /// Parse multiple files in parallel
394    pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
395        #[cfg(feature = "parallel")]
396        {
397            if let Some(threads) = self.threads {
398                if threads <= 1 {
399                    return Self::parse_files_sequential(paths);
400                }
401            }
402
403            let pool = self.build_thread_pool()?;
404
405            let libraries: Result<Vec<_>> = pool.install(|| {
406                paths
407                    .par_iter()
408                    .map(|path| {
409                        let content = std::fs::read_to_string(path)?;
410                        let library = Library::parse_sequential(&content)?;
411                        Ok(library.into_owned())
412                    })
413                    .collect()
414            });
415
416            let libraries = libraries?;
417            Ok(Library::merge_libraries_parallel(libraries))
418        }
419
420        #[cfg(not(feature = "parallel"))]
421        {
422            Self::parse_files_sequential(paths)
423        }
424    }
425
426    /// Sequential file parsing fallback
427    fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
428        let mut result = Library::new();
429        for path in paths {
430            let content = std::fs::read_to_string(path)?;
431            let library = Library::parse_sequential(&content)?;
432            result.merge(library.into_owned());
433        }
434        Ok(result)
435    }
436
437    #[cfg(feature = "parallel")]
438    fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
439        let mut builder = rayon::ThreadPoolBuilder::new();
440
441        if let Some(threads) = self.threads {
442            builder = builder.num_threads(threads);
443        }
444
445        builder
446            .build()
447            .map_err(|e| Error::WinnowError(e.to_string()))
448    }
449}
450
451/// A high-level block in a parsed BibTeX library.
452#[derive(Debug, Clone, Copy)]
453pub enum Block<'lib, 'a> {
454    /// A regular bibliography entry.
455    Entry(&'lib Entry<'a>, Option<SourceSpan>),
456    /// A string definition.
457    String(&'lib StringDefinition<'a>),
458    /// A preamble block.
459    Preamble(&'lib Preamble<'a>),
460    /// A comment block.
461    Comment(&'lib Comment<'a>),
462    /// A malformed block retained by tolerant parsing.
463    Failed(&'lib FailedBlock<'a>),
464}
465
466#[derive(Debug, Clone, Copy, PartialEq, Eq)]
467enum BlockKind {
468    Entry(usize),
469    String(usize),
470    Preamble(usize),
471    Comment(usize),
472    Failed(usize),
473}
474
475#[derive(Debug)]
476enum RawBuildItem<'a> {
477    Parsed(crate::parser::ParsedItem<'a>, SourceSpan),
478    Failed(FailedBlock<'a>),
479}
480
481/// A BibTeX string definition.
482#[derive(Debug, Clone, PartialEq)]
483pub struct StringDefinition<'a> {
484    /// String variable name.
485    pub name: Cow<'a, str>,
486    /// Unexpanded string value.
487    pub value: Value<'a>,
488    /// Optional source location.
489    pub source: Option<SourceSpan>,
490}
491
492impl<'a> StringDefinition<'a> {
493    /// Create a string definition.
494    #[must_use]
495    pub const fn new(name: &'a str, value: Value<'a>) -> Self {
496        Self {
497            name: Cow::Borrowed(name),
498            value,
499            source: None,
500        }
501    }
502
503    /// Return the string name.
504    #[must_use]
505    pub fn name(&self) -> &str {
506        &self.name
507    }
508
509    /// Return the string value.
510    #[must_use]
511    pub const fn value(&self) -> &Value<'a> {
512        &self.value
513    }
514
515    /// Convert to an owned definition.
516    #[must_use]
517    pub fn into_owned(self) -> StringDefinition<'static> {
518        StringDefinition {
519            name: Cow::Owned(self.name.into_owned()),
520            value: self.value.into_owned(),
521            source: self.source,
522        }
523    }
524}
525
526/// A BibTeX preamble block.
527#[derive(Debug, Clone, PartialEq)]
528pub struct Preamble<'a> {
529    /// Expanded preamble value.
530    pub value: Value<'a>,
531    /// Optional source location.
532    pub source: Option<SourceSpan>,
533}
534
535impl<'a> Preamble<'a> {
536    /// Create a preamble block.
537    #[must_use]
538    pub const fn new(value: Value<'a>) -> Self {
539        Self {
540            value,
541            source: None,
542        }
543    }
544
545    /// Return the preamble value.
546    #[must_use]
547    pub const fn value(&self) -> &Value<'a> {
548        &self.value
549    }
550
551    /// Convert to an owned preamble.
552    #[must_use]
553    pub fn into_owned(self) -> Preamble<'static> {
554        Preamble {
555            value: self.value.into_owned(),
556            source: self.source,
557        }
558    }
559}
560
561impl<'a> Deref for Preamble<'a> {
562    type Target = Value<'a>;
563
564    fn deref(&self) -> &Self::Target {
565        &self.value
566    }
567}
568
569/// A BibTeX comment block.
570#[derive(Debug, Clone, PartialEq, Eq)]
571pub struct Comment<'a> {
572    /// Comment text.
573    pub text: Cow<'a, str>,
574    /// Optional source location.
575    pub source: Option<SourceSpan>,
576}
577
578impl<'a> Comment<'a> {
579    /// Create a comment block.
580    #[must_use]
581    pub const fn new(text: &'a str) -> Self {
582        Self {
583            text: Cow::Borrowed(text),
584            source: None,
585        }
586    }
587
588    /// Return the comment text.
589    #[must_use]
590    pub fn text(&self) -> &str {
591        &self.text
592    }
593
594    /// Convert to an owned comment.
595    #[must_use]
596    pub fn into_owned(self) -> Comment<'static> {
597        Comment {
598            text: Cow::Owned(self.text.into_owned()),
599            source: self.source,
600        }
601    }
602}
603
604impl Deref for Comment<'_> {
605    type Target = str;
606
607    fn deref(&self) -> &Self::Target {
608        &self.text
609    }
610}
611
612/// A malformed block retained by tolerant parsing.
613#[derive(Debug, Clone, PartialEq, Eq)]
614pub struct FailedBlock<'a> {
615    /// Raw source for the malformed block.
616    pub raw: Cow<'a, str>,
617    /// Parse error message.
618    pub error: String,
619    /// Optional source location.
620    pub source: Option<SourceSpan>,
621}
622
623impl FailedBlock<'_> {
624    /// Convert to an owned failed block.
625    #[must_use]
626    pub fn into_owned(self) -> FailedBlock<'static> {
627        FailedBlock {
628            raw: Cow::Owned(self.raw.into_owned()),
629            error: self.error,
630            source: self.source,
631        }
632    }
633}
634
635/// Month rendering style used by month normalization.
636#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
637pub enum MonthStyle {
638    /// Full English month names such as `January`.
639    #[default]
640    Long,
641    /// Three-letter lowercase BibTeX abbreviations such as `jan`.
642    Abbrev,
643    /// One-based month numbers such as `1`.
644    Number,
645}
646
647/// Entry and field ordering options.
648#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
649pub struct SortOptions {
650    /// Sort regular entries by citation key.
651    pub entries_by_key: bool,
652    /// Sort fields inside each entry by field name.
653    pub fields_by_name: bool,
654}
655
656/// Field-name casing policy for field normalization.
657#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
658pub enum FieldNameCase {
659    /// Preserve existing field names.
660    #[default]
661    Preserve,
662    /// Convert field names to lowercase ASCII.
663    Lowercase,
664}
665
666/// Field normalization options.
667#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
668pub struct FieldNormalizeOptions {
669    /// Field-name casing policy.
670    pub name_case: FieldNameCase,
671    /// Normalize common BibLaTeX aliases to classic BibTeX field names.
672    pub biblatex_aliases: bool,
673}
674
675/// A parsed BibTeX library.
676#[derive(Debug, Clone, Default)]
677pub struct Library<'a> {
678    /// Bibliography entries
679    entries: Vec<Entry<'a>>,
680    /// Optional entry source spans
681    entry_sources: Option<Vec<Option<SourceSpan>>>,
682    /// String definitions
683    strings: Vec<StringDefinition<'a>>,
684    /// Latest string definition by name
685    string_lookup: AHashMap<Cow<'a, str>, usize>,
686    /// Preambles
687    preambles: Vec<Preamble<'a>>,
688    /// Comments
689    comments: Vec<Comment<'a>>,
690    /// Failed blocks retained during tolerant parsing
691    failed_blocks: Vec<FailedBlock<'a>>,
692    /// Original block order
693    block_order: Vec<BlockKind>,
694}
695
696impl<'a> Library<'a> {
697    fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
698        let index = self.entries.len();
699        self.entries.push(entry);
700        if let Some(sources) = &mut self.entry_sources {
701            sources.push(source);
702        } else if source.is_some() {
703            let mut sources = vec![None; index];
704            sources.push(source);
705            self.entry_sources = Some(sources);
706        }
707        self.block_order.push(BlockKind::Entry(index));
708    }
709
710    fn register_string_definition(
711        &mut self,
712        name: Cow<'a, str>,
713        value: Value<'a>,
714        source: Option<SourceSpan>,
715    ) -> usize {
716        let index = self.strings.len();
717        self.string_lookup.insert(name.clone(), index);
718        self.strings.push(StringDefinition {
719            name,
720            value,
721            source,
722        });
723        index
724    }
725
726    fn push_string_with_source(
727        &mut self,
728        name: Cow<'a, str>,
729        value: Value<'a>,
730        source: Option<SourceSpan>,
731    ) {
732        let index = self.register_string_definition(name, value, source);
733        self.block_order.push(BlockKind::String(index));
734    }
735
736    fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
737        let index = self.preambles.len();
738        self.preambles.push(Preamble { value, source });
739        self.block_order.push(BlockKind::Preamble(index));
740        index
741    }
742
743    fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
744        let index = self.comments.len();
745        self.comments.push(Comment { text, source });
746        self.block_order.push(BlockKind::Comment(index));
747    }
748
749    fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
750        let index = self.failed_blocks.len();
751        self.failed_blocks.push(failed);
752        self.block_order.push(BlockKind::Failed(index));
753    }
754
755    #[inline]
756    fn expand_value_for_parse(
757        &self,
758        value: &mut Value<'a>,
759        has_user_strings: bool,
760        month_constants_shadowed: bool,
761        expanded_variables: &mut ExpansionCache<'a>,
762        expansion_stack: &mut Vec<Cow<'a, str>>,
763        concat_cache: &mut ConcatCache<'a>,
764    ) -> Result<()> {
765        match value {
766            Value::Literal(_) | Value::Number(_) => Ok(()),
767            Value::Variable(name) => {
768                if !has_user_strings || !month_constants_shadowed {
769                    if let Some(month_value) = get_month_expansion(name.as_ref()) {
770                        *value = Value::Literal(Cow::Borrowed(month_value));
771                        return Ok(());
772                    }
773                }
774
775                if has_user_strings {
776                    if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
777                        *value = expanded;
778                        return Ok(());
779                    }
780
781                    let old_value = std::mem::take(value);
782                    *value = self.smart_expand_value_cached(
783                        old_value,
784                        expanded_variables,
785                        expansion_stack,
786                        concat_cache,
787                    )?;
788                }
789
790                Ok(())
791            }
792            Value::Concat(parts) => {
793                if has_user_strings {
794                    if let Some(expanded) = concat_cache.get_cloned(parts) {
795                        *value = expanded;
796                        return Ok(());
797                    }
798                }
799
800                let needs_expansion = if has_user_strings {
801                    parts.iter().any(contains_variables)
802                } else {
803                    parts.iter().any(contains_potential_month_variables)
804                };
805
806                if needs_expansion {
807                    if !has_user_strings {
808                        if let Some(expanded) = concat_cache.get_cloned(parts) {
809                            *value = expanded;
810                            return Ok(());
811                        }
812                    }
813
814                    let old_value = std::mem::take(value);
815                    *value = self.smart_expand_value_cached(
816                        old_value,
817                        expanded_variables,
818                        expansion_stack,
819                        concat_cache,
820                    )?;
821                }
822
823                Ok(())
824            }
825        }
826    }
827
828    /// Create a new empty library
829    #[must_use]
830    #[inline]
831    pub fn new() -> Self {
832        Self::default()
833    }
834
835    /// Create a parser with options
836    ///
837    /// # Parallel Processing
838    ///
839    /// The `threads` option only affects `parse_files()`. Single file
840    /// parsing with `parse()` is sequential.
841    ///
842    /// # Example
843    ///
844    /// ```rust,no_run
845    /// use bibtex_parser::Library;
846    /// // Parse multiple files in parallel
847    /// let library = Library::parser()
848    ///     .threads(4)
849    ///     .parse_files(&["file1.bib", "file2.bib"]).unwrap();
850    ///
851    /// // Single-file parsing stays sequential
852    /// let content = "@article{demo, title=\"Demo\"}";
853    /// let library = Library::parser()
854    ///     .threads(4)
855    ///     .parse(content).unwrap();
856    /// ```
857    #[must_use]
858    #[inline]
859    pub fn parser() -> Parser {
860        Parser::new()
861    }
862
863    /// Parse a BibTeX library from a string with default strict settings.
864    pub fn parse(input: &'a str) -> Result<Self> {
865        Self::parser().parse(input)
866    }
867
868    /// Parse a BibTeX library from a file into owned data.
869    pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
870        let content = std::fs::read_to_string(path)?;
871        Library::parser().parse(&content).map(Library::into_owned)
872    }
873
874    /// Serialize this library to BibTeX.
875    pub fn to_bibtex(&self) -> Result<String> {
876        crate::writer::to_string(self)
877    }
878
879    /// Serialize this library to a BibTeX file.
880    pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
881        crate::writer::to_file(self, path)
882    }
883
884    /// Parse a BibTeX library from a string (single-threaded implementation)
885    #[allow(clippy::too_many_lines)]
886    pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
887        let mut db = Self::new();
888        let input_scan = scan_input(input);
889
890        // Fast path for common corpora (like tugboat) with no user-defined strings.
891        // This avoids buffering all entries before expansion.
892        if !input_scan.may_contain_string_definition {
893            db.entries.reserve(input_scan.at_count);
894            db.block_order.reserve(input_scan.at_count);
895            let has_user_strings = false;
896            let month_constants_shadowed = false;
897            let mut expanded_variables = ExpansionCache::with_capacity(0);
898            let mut expansion_stack = Vec::new();
899            let mut concat_cache = ConcatCache::new();
900
901            crate::parser::parse_bibtex_stream(input, |item| {
902                match item {
903                    crate::parser::ParsedItem::Entry(mut entry) => {
904                        for field in &mut entry.fields {
905                            db.expand_value_for_parse(
906                                &mut field.value,
907                                has_user_strings,
908                                month_constants_shadowed,
909                                &mut expanded_variables,
910                                &mut expansion_stack,
911                                &mut concat_cache,
912                            )?;
913                        }
914                        db.push_entry_with_source(entry, None);
915                    }
916                    crate::parser::ParsedItem::Preamble(value) => {
917                        let mut expanded = value;
918                        db.expand_value_for_parse(
919                            &mut expanded,
920                            has_user_strings,
921                            month_constants_shadowed,
922                            &mut expanded_variables,
923                            &mut expansion_stack,
924                            &mut concat_cache,
925                        )?;
926                        db.push_preamble_with_source(expanded, None);
927                    }
928                    crate::parser::ParsedItem::Comment(text) => {
929                        db.push_comment_with_source(Cow::Borrowed(text), None);
930                    }
931                    crate::parser::ParsedItem::String(name, value) => {
932                        // Defensive fallback for scanner false negatives.
933                        db.push_string_with_source(Cow::Borrowed(name), value, None);
934                    }
935                }
936                Ok(())
937            })?;
938
939            return Ok(db);
940        }
941
942        db.block_order.reserve(input_scan.at_count);
943
944        // Single-pass path when all @string definitions appear before regular
945        // entries. This keeps correctness while avoiding buffering entries and
946        // a full second pass over them.
947        if !input_may_have_late_string_definition(input) {
948            let mut pending_preambles = Vec::new();
949            let mut expanded_variables = ExpansionCache::with_capacity(0);
950            let mut expansion_stack = Vec::new();
951            let mut concat_cache = ConcatCache::new();
952            let mut month_constants_shadowed = None;
953
954            crate::parser::parse_bibtex_stream(input, |item| {
955                match item {
956                    crate::parser::ParsedItem::Entry(mut entry) => {
957                        let has_user_strings = !db.strings.is_empty();
958                        let month_constants_shadowed = *month_constants_shadowed
959                            .get_or_insert_with(|| {
960                                has_user_strings && user_strings_shadow_month_constants(&db.strings)
961                            });
962                        for field in &mut entry.fields {
963                            db.expand_value_for_parse(
964                                &mut field.value,
965                                has_user_strings,
966                                month_constants_shadowed,
967                                &mut expanded_variables,
968                                &mut expansion_stack,
969                                &mut concat_cache,
970                            )?;
971                        }
972                        db.push_entry_with_source(entry, None);
973                    }
974                    crate::parser::ParsedItem::Preamble(value) => {
975                        let index = db.push_preamble_with_source(value, None);
976                        pending_preambles.push(index);
977                    }
978                    crate::parser::ParsedItem::String(name, value) => {
979                        db.push_string_with_source(Cow::Borrowed(name), value, None);
980                    }
981                    crate::parser::ParsedItem::Comment(text) => {
982                        db.push_comment_with_source(Cow::Borrowed(text), None);
983                    }
984                }
985                Ok(())
986            })?;
987
988            let has_user_strings = !db.strings.is_empty();
989            let month_constants_shadowed =
990                has_user_strings && user_strings_shadow_month_constants(&db.strings);
991            for index in pending_preambles {
992                let mut expanded = std::mem::take(&mut db.preambles[index].value);
993                db.expand_value_for_parse(
994                    &mut expanded,
995                    has_user_strings,
996                    month_constants_shadowed,
997                    &mut expanded_variables,
998                    &mut expansion_stack,
999                    &mut concat_cache,
1000                )?;
1001                db.preambles[index].value = expanded;
1002            }
1003
1004            return Ok(db);
1005        }
1006
1007        let mut entry_indices = Vec::new();
1008        let mut preamble_indices = Vec::new();
1009
1010        crate::parser::parse_bibtex_stream(input, |item| {
1011            match item {
1012                crate::parser::ParsedItem::Entry(entry) => {
1013                    let index = db.entries.len();
1014                    db.push_entry_with_source(entry, None);
1015                    entry_indices.push(index);
1016                }
1017                crate::parser::ParsedItem::Preamble(value) => {
1018                    let index = db.push_preamble_with_source(value, None);
1019                    preamble_indices.push(index);
1020                }
1021                crate::parser::ParsedItem::String(name, value) => {
1022                    db.push_string_with_source(Cow::Borrowed(name), value, None);
1023                }
1024                crate::parser::ParsedItem::Comment(text) => {
1025                    db.push_comment_with_source(Cow::Borrowed(text), None);
1026                }
1027            }
1028            Ok(())
1029        })?;
1030
1031        // Expand after parsing so all @string definitions are available globally.
1032        let has_user_strings = !db.strings.is_empty();
1033        let month_constants_shadowed =
1034            has_user_strings && user_strings_shadow_month_constants(&db.strings);
1035        let mut expanded_variables = ExpansionCache::with_capacity(db.strings.len());
1036        let mut expansion_stack = Vec::new();
1037        let mut concat_cache = ConcatCache::new();
1038
1039        for entry_index in entry_indices {
1040            let field_count = db.entries[entry_index].fields.len();
1041            for field_index in 0..field_count {
1042                let mut value =
1043                    std::mem::take(&mut db.entries[entry_index].fields[field_index].value);
1044                db.expand_value_for_parse(
1045                    &mut value,
1046                    has_user_strings,
1047                    month_constants_shadowed,
1048                    &mut expanded_variables,
1049                    &mut expansion_stack,
1050                    &mut concat_cache,
1051                )?;
1052                db.entries[entry_index].fields[field_index].value = value;
1053            }
1054        }
1055
1056        for preamble_index in preamble_indices {
1057            let mut expanded = std::mem::take(&mut db.preambles[preamble_index].value);
1058            db.expand_value_for_parse(
1059                &mut expanded,
1060                has_user_strings,
1061                month_constants_shadowed,
1062                &mut expanded_variables,
1063                &mut expansion_stack,
1064                &mut concat_cache,
1065            )?;
1066            db.preambles[preamble_index].value = expanded;
1067        }
1068
1069        Ok(db)
1070    }
1071
1072    fn parse_with_spans(input: &'a str) -> Result<Self> {
1073        let mut raw_items = Vec::new();
1074        crate::parser::parse_bibtex_stream_with_spans(input, |item, span, _raw| {
1075            raw_items.push(RawBuildItem::Parsed(item, span));
1076            Ok(())
1077        })?;
1078        Self::from_raw_items(raw_items)
1079    }
1080
1081    fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1082        let mut raw_items = Vec::new();
1083        let mut remaining = input;
1084
1085        loop {
1086            crate::parser::lexer::skip_whitespace(&mut remaining);
1087            if remaining.is_empty() {
1088                break;
1089            }
1090
1091            let start = input.len() - remaining.len();
1092            match crate::parser::parse_item(&mut remaining) {
1093                Ok(item) => {
1094                    let end = input.len() - remaining.len();
1095                    raw_items.push(RawBuildItem::Parsed(item, source_span(input, start, end)));
1096                }
1097                Err(err) => {
1098                    let end = next_recovery_boundary(input, start);
1099                    let source = capture_source.then(|| source_span(input, start, end));
1100                    raw_items.push(RawBuildItem::Failed(FailedBlock {
1101                        raw: Cow::Borrowed(&input[start..end]),
1102                        error: format!("Failed to parse entry: {err}"),
1103                        source,
1104                    }));
1105                    remaining = &input[end..];
1106                }
1107            }
1108        }
1109
1110        Self::from_raw_items(raw_items)
1111    }
1112
1113    fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1114        let mut library = Self::new();
1115
1116        for raw_item in &raw_items {
1117            if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span) =
1118                raw_item
1119            {
1120                library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1121            }
1122        }
1123
1124        let has_user_strings = !library.strings.is_empty();
1125        let month_constants_shadowed =
1126            has_user_strings && user_strings_shadow_month_constants(&library.strings);
1127        let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1128        let mut expansion_stack = Vec::new();
1129        let mut concat_cache = ConcatCache::new();
1130        let mut string_index = 0;
1131
1132        for raw_item in raw_items {
1133            match raw_item {
1134                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span) => {
1135                    for field in &mut entry.fields {
1136                        library.expand_value_for_parse(
1137                            &mut field.value,
1138                            has_user_strings,
1139                            month_constants_shadowed,
1140                            &mut expanded_variables,
1141                            &mut expansion_stack,
1142                            &mut concat_cache,
1143                        )?;
1144                    }
1145                    library.push_entry_with_source(entry, Some(span));
1146                }
1147                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _) => {
1148                    library.block_order.push(BlockKind::String(string_index));
1149                    string_index += 1;
1150                }
1151                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span) => {
1152                    library.expand_value_for_parse(
1153                        &mut value,
1154                        has_user_strings,
1155                        month_constants_shadowed,
1156                        &mut expanded_variables,
1157                        &mut expansion_stack,
1158                        &mut concat_cache,
1159                    )?;
1160                    library.push_preamble_with_source(value, Some(span));
1161                }
1162                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span) => {
1163                    library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1164                }
1165                RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1166            }
1167        }
1168
1169        Ok(library)
1170    }
1171
1172    /// Merge another library into this one
1173    pub fn merge(&mut self, other: Self) {
1174        let entry_offset = self.entries.len();
1175        let string_offset = self.strings.len();
1176        let preamble_offset = self.preambles.len();
1177        let comment_offset = self.comments.len();
1178        let failed_offset = self.failed_blocks.len();
1179        let other_entry_count = other.entries.len();
1180        let other_entry_sources = other.entry_sources;
1181
1182        self.entries.extend(other.entries);
1183        match (&mut self.entry_sources, other_entry_sources) {
1184            (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1185            (Some(sources), None) => {
1186                sources.extend(std::iter::repeat(None).take(other_entry_count));
1187            }
1188            (None, Some(other_sources)) => {
1189                let mut sources = vec![None; entry_offset];
1190                sources.extend(other_sources);
1191                self.entry_sources = Some(sources);
1192            }
1193            (None, None) => {}
1194        }
1195        self.preambles.extend(other.preambles);
1196        self.comments.extend(other.comments);
1197        self.failed_blocks.extend(other.failed_blocks);
1198
1199        for definition in other.strings {
1200            let index = self.strings.len();
1201            self.string_lookup.insert(definition.name.clone(), index);
1202            self.strings.push(definition);
1203        }
1204
1205        self.block_order
1206            .extend(other.block_order.into_iter().map(|kind| match kind {
1207                BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1208                BlockKind::String(index) => BlockKind::String(string_offset + index),
1209                BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1210                BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1211                BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1212            }));
1213    }
1214
1215    #[cfg(feature = "parallel")]
1216    fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1217        let mut result = Library::new();
1218        for library in libraries {
1219            result.merge(library);
1220        }
1221        result
1222    }
1223
1224    /// Get all entries
1225    #[must_use]
1226    pub fn entries(&self) -> &[Entry<'a>] {
1227        &self.entries
1228    }
1229
1230    /// Get mutable access to all entries
1231    #[must_use]
1232    pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1233        &mut self.entries
1234    }
1235
1236    /// Get all string definitions
1237    #[must_use]
1238    pub fn strings(&self) -> &[StringDefinition<'a>] {
1239        &self.strings
1240    }
1241
1242    /// Get a string definition by name.
1243    #[must_use]
1244    pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1245        get_string_definition(&self.strings, &self.string_lookup, name)
1246    }
1247
1248    /// Get a string definition value by name.
1249    #[must_use]
1250    pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1251        self.string(name).map(|definition| &definition.value)
1252    }
1253
1254    /// Get all preambles
1255    #[must_use]
1256    pub fn preambles(&self) -> &[Preamble<'a>] {
1257        &self.preambles
1258    }
1259
1260    /// Get mutable access to preambles
1261    #[must_use]
1262    pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1263        &mut self.preambles
1264    }
1265
1266    /// Get all comments
1267    #[must_use]
1268    pub fn comments(&self) -> &[Comment<'a>] {
1269        &self.comments
1270    }
1271
1272    /// Get mutable access to comments
1273    #[must_use]
1274    pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1275        &mut self.comments
1276    }
1277
1278    /// Get malformed blocks retained by tolerant parsing.
1279    #[must_use]
1280    pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1281        &self.failed_blocks
1282    }
1283
1284    /// Return blocks in source order.
1285    #[must_use]
1286    pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1287        self.block_order
1288            .iter()
1289            .map(|kind| match *kind {
1290                BlockKind::Entry(index) => Block::Entry(
1291                    &self.entries[index],
1292                    self.entry_sources
1293                        .as_ref()
1294                        .and_then(|sources| sources.get(index).copied().flatten()),
1295                ),
1296                BlockKind::String(index) => Block::String(&self.strings[index]),
1297                BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1298                BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1299                BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1300            })
1301            .collect()
1302    }
1303
1304    /// Find entries by key
1305    #[must_use]
1306    pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1307        self.entries.iter().find(|e| e.key == key)
1308    }
1309
1310    /// Find entries by key, ignoring ASCII case.
1311    #[must_use]
1312    pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1313        self.entries
1314            .iter()
1315            .find(|entry| entry.key.eq_ignore_ascii_case(key))
1316    }
1317
1318    /// Return `true` when the library contains `key`.
1319    #[must_use]
1320    pub fn contains_key(&self, key: &str) -> bool {
1321        self.find_by_key(key).is_some()
1322    }
1323
1324    /// Find entries by type
1325    #[must_use]
1326    pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1327        self.entries
1328            .iter()
1329            .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1330            .collect()
1331    }
1332
1333    /// Find entries by field value
1334    #[must_use]
1335    pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1336        self.entries
1337            .iter()
1338            .filter(|e| {
1339                e.get_as_string(field)
1340                    .as_ref()
1341                    .is_some_and(|v| v.contains(value))
1342            })
1343            .collect()
1344    }
1345
1346    /// Find entries by field value, ignoring ASCII case for the field name and value.
1347    #[must_use]
1348    pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1349        self.entries
1350            .iter()
1351            .filter(|entry| {
1352                entry
1353                    .get_as_string_ignore_case(field)
1354                    .as_ref()
1355                    .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1356            })
1357            .collect()
1358    }
1359
1360    /// Find entries whose normalized DOI matches `doi`.
1361    #[must_use]
1362    pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1363        let Some(needle) = normalize_doi(doi) else {
1364            return Vec::new();
1365        };
1366
1367        self.entries
1368            .iter()
1369            .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1370            .collect()
1371    }
1372
1373    /// Smart expansion with memoization for repeated variable references.
1374    fn smart_expand_value_cached(
1375        &self,
1376        value: Value<'a>,
1377        expanded_variables: &mut ExpansionCache<'a>,
1378        expansion_stack: &mut Vec<Cow<'a, str>>,
1379        concat_cache: &mut ConcatCache<'a>,
1380    ) -> Result<Value<'a>> {
1381        match value {
1382            // Simple literals and numbers stay as-is (zero-copy!)
1383            Value::Literal(_) | Value::Number(_) => Ok(value),
1384
1385            // Variables need to be resolved
1386            Value::Variable(name) => {
1387                let name_text = name.as_ref();
1388                if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1389                    return Ok(expanded);
1390                }
1391
1392                if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1393                    let mut cycle = expansion_stack
1394                        .iter()
1395                        .map(std::convert::AsRef::as_ref)
1396                        .collect::<Vec<_>>()
1397                        .join(" -> ");
1398                    if !cycle.is_empty() {
1399                        cycle.push_str(" -> ");
1400                    }
1401                    cycle.push_str(name_text);
1402                    return Err(Error::CircularReference(cycle));
1403                }
1404
1405                if let Some(user_value) =
1406                    get_string_value(&self.strings, &self.string_lookup, name_text)
1407                {
1408                    // Recursively expand the variable's value and cache the result.
1409                    expansion_stack.push(name.clone());
1410                    let expanded = self.smart_expand_value_cached(
1411                        user_value.clone(),
1412                        expanded_variables,
1413                        expansion_stack,
1414                        concat_cache,
1415                    );
1416                    expansion_stack.pop();
1417
1418                    let expanded = expanded?;
1419                    expanded_variables.insert(name, expanded.clone());
1420                    Ok(expanded)
1421                } else {
1422                    // Check month abbreviations as fallback
1423                    get_month_expansion(name_text).map_or_else(
1424                        || {
1425                            // Variable not found in either user strings or month constants
1426                            Err(Error::UndefinedVariable(name_text.to_string()))
1427                        },
1428                        |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1429                    )
1430                }
1431            }
1432
1433            // Concatenations need special handling
1434            Value::Concat(parts) => {
1435                if let Some(expanded) = concat_cache.get_cloned(&parts) {
1436                    return Ok(expanded);
1437                }
1438
1439                let cache_key = parts.clone();
1440                let expanded = self.expand_concatenation_cached(
1441                    parts.into_vec(),
1442                    expanded_variables,
1443                    expansion_stack,
1444                    concat_cache,
1445                )?;
1446                concat_cache.insert(cache_key, expanded.clone());
1447                Ok(expanded)
1448            }
1449        }
1450    }
1451
1452    /// Alternative expansion that works with references (requires cloning for variables)
1453    pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
1454        match value {
1455            // Simple literals and numbers can be cloned cheaply
1456            Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
1457
1458            // Variables need to be resolved
1459            Value::Variable(name) => {
1460                // First check user-defined strings
1461                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1462                    || {
1463                        // Check month abbreviations as fallback
1464                        get_month_expansion(name.as_ref()).map_or_else(
1465                            || {
1466                                // Variable not found in either user strings or month constants
1467                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
1468                            },
1469                            |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1470                        )
1471                    },
1472                    |user_value| self.expand_value_ref(user_value),
1473                )
1474            }
1475
1476            // Concatenations need cloning
1477            Value::Concat(parts) => {
1478                let cloned_parts = parts.to_vec();
1479                self.expand_concatenation(cloned_parts)
1480            }
1481        }
1482    }
1483
1484    /// Expand a concatenation, only converting to owned when necessary
1485    fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
1486        let mut expanded_variables = ExpansionCache::with_capacity(0);
1487        let mut expansion_stack = Vec::new();
1488        let mut concat_cache = ConcatCache::new();
1489        self.expand_concatenation_cached(
1490            parts,
1491            &mut expanded_variables,
1492            &mut expansion_stack,
1493            &mut concat_cache,
1494        )
1495    }
1496
1497    /// Cached concatenation expansion used by hot parsing paths.
1498    fn expand_concatenation_cached(
1499        &self,
1500        parts: Vec<Value<'a>>,
1501        expanded_variables: &mut ExpansionCache<'a>,
1502        expansion_stack: &mut Vec<Cow<'a, str>>,
1503        concat_cache: &mut ConcatCache<'a>,
1504    ) -> Result<Value<'a>> {
1505        let mut expanded_parts = Vec::with_capacity(parts.len());
1506
1507        // First, expand all parts
1508        for part in parts {
1509            let expanded = self.smart_expand_value_cached(
1510                part,
1511                expanded_variables,
1512                expansion_stack,
1513                concat_cache,
1514            )?;
1515            expanded_parts.push(expanded);
1516        }
1517
1518        // If all parts are literals or numbers, we can flatten to a single string
1519        if expanded_parts
1520            .iter()
1521            .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
1522        {
1523            let combined = concatenate_simple_values(&expanded_parts);
1524            Ok(Value::Literal(Cow::Owned(combined)))
1525        } else {
1526            Ok(Value::Concat(expanded_parts.into_boxed_slice()))
1527        }
1528    }
1529
1530    /// Get a fully expanded string value (for compatibility)
1531    pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
1532        match value {
1533            Value::Literal(s) => Ok(s.to_string()),
1534            Value::Number(n) => Ok(n.to_string()),
1535            Value::Variable(name) => {
1536                // First check user-defined strings
1537                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1538                    || {
1539                        // Check month abbreviations as fallback
1540                        get_month_expansion(name.as_ref()).map_or_else(
1541                            || {
1542                                // Variable not found in either user strings or month constants
1543                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
1544                            },
1545                            |month_value| Ok(month_value.to_string()),
1546                        )
1547                    },
1548                    |user_value| self.get_expanded_string(user_value),
1549                )
1550            }
1551            Value::Concat(parts) => {
1552                let mut result = String::new();
1553                for part in parts.iter() {
1554                    result.push_str(&self.get_expanded_string(part)?);
1555                }
1556                Ok(result)
1557            }
1558        }
1559    }
1560
1561    /// Convert to owned version (no borrowed data)
1562    #[must_use]
1563    pub fn into_owned(self) -> Library<'static> {
1564        let strings = self
1565            .strings
1566            .into_iter()
1567            .map(StringDefinition::into_owned)
1568            .collect::<Vec<_>>();
1569        let mut string_lookup = AHashMap::with_capacity(strings.len());
1570        for (index, definition) in strings.iter().enumerate() {
1571            string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
1572        }
1573
1574        Library {
1575            entries: self.entries.into_iter().map(Entry::into_owned).collect(),
1576            entry_sources: self.entry_sources,
1577            strings,
1578            string_lookup,
1579            preambles: self
1580                .preambles
1581                .into_iter()
1582                .map(Preamble::into_owned)
1583                .collect(),
1584            comments: self.comments.into_iter().map(Comment::into_owned).collect(),
1585            failed_blocks: self
1586                .failed_blocks
1587                .into_iter()
1588                .map(FailedBlock::into_owned)
1589                .collect(),
1590            block_order: self.block_order,
1591        }
1592    }
1593
1594    /// Add a string definition (useful for building libraries programmatically)
1595    pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
1596        self.push_string_with_source(Cow::Borrowed(name), value, None);
1597    }
1598
1599    /// Add an entry
1600    pub fn add_entry(&mut self, entry: Entry<'a>) {
1601        self.push_entry_with_source(entry, None);
1602    }
1603
1604    /// Add a preamble
1605    pub fn add_preamble(&mut self, value: Value<'a>) {
1606        self.push_preamble_with_source(value, None);
1607    }
1608
1609    /// Add a comment
1610    pub fn add_comment(&mut self, comment: &'a str) {
1611        self.push_comment_with_source(Cow::Borrowed(comment), None);
1612    }
1613
1614    /// Resolve string variables and concatenations in entries and preambles in place.
1615    pub fn resolve_strings(&mut self) -> Result<()> {
1616        let has_user_strings = !self.strings.is_empty();
1617        let month_constants_shadowed =
1618            has_user_strings && user_strings_shadow_month_constants(&self.strings);
1619        let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
1620        let mut expansion_stack = Vec::new();
1621        let mut concat_cache = ConcatCache::new();
1622
1623        for entry_index in 0..self.entries.len() {
1624            let field_count = self.entries[entry_index].fields.len();
1625            for field_index in 0..field_count {
1626                let mut value =
1627                    std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
1628                self.expand_value_for_parse(
1629                    &mut value,
1630                    has_user_strings,
1631                    month_constants_shadowed,
1632                    &mut expanded_variables,
1633                    &mut expansion_stack,
1634                    &mut concat_cache,
1635                )?;
1636                self.entries[entry_index].fields[field_index].value = value;
1637            }
1638        }
1639
1640        for preamble_index in 0..self.preambles.len() {
1641            let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
1642            self.expand_value_for_parse(
1643                &mut value,
1644                has_user_strings,
1645                month_constants_shadowed,
1646                &mut expanded_variables,
1647                &mut expansion_stack,
1648                &mut concat_cache,
1649            )?;
1650            self.preambles[preamble_index].value = value;
1651        }
1652
1653        Ok(())
1654    }
1655
1656    /// Normalize DOI fields to lowercase `10.x/...` form when recognizable.
1657    pub fn normalize_doi_fields(&mut self) {
1658        for entry in &mut self.entries {
1659            for field in &mut entry.fields {
1660                if field.name.eq_ignore_ascii_case("doi") {
1661                    if let Some(normalized) = normalize_doi(&value_to_plain_string(&field.value)) {
1662                        field.value = Value::Literal(Cow::Owned(normalized));
1663                    }
1664                }
1665            }
1666        }
1667    }
1668
1669    /// Normalize month fields to a chosen representation.
1670    pub fn normalize_months(&mut self, style: MonthStyle) {
1671        for entry in &mut self.entries {
1672            for field in &mut entry.fields {
1673                if field.name.eq_ignore_ascii_case("month") {
1674                    if let Some(month) =
1675                        normalize_month_value(&value_to_plain_string(&field.value), style)
1676                    {
1677                        field.value = month;
1678                    }
1679                }
1680            }
1681        }
1682    }
1683
1684    /// Normalize field names and common BibLaTeX aliases.
1685    pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
1686        for entry in &mut self.entries {
1687            for field in &mut entry.fields {
1688                let mut name = if options.biblatex_aliases {
1689                    canonical_field_alias(&field.name)
1690                        .unwrap_or_else(|| field.name.as_ref())
1691                        .to_string()
1692                } else {
1693                    field.name.to_string()
1694                };
1695
1696                if options.name_case == FieldNameCase::Lowercase {
1697                    name.make_ascii_lowercase();
1698                }
1699
1700                if name != field.name {
1701                    field.name = Cow::Owned(name);
1702                }
1703            }
1704        }
1705    }
1706
1707    /// Sort entries and/or fields in place.
1708    pub fn sort(&mut self, options: SortOptions) {
1709        if options.fields_by_name {
1710            for entry in &mut self.entries {
1711                entry
1712                    .fields
1713                    .sort_by(|left, right| left.name.cmp(&right.name));
1714            }
1715        }
1716
1717        if options.entries_by_key {
1718            if let Some(sources) = self.entry_sources.take() {
1719                let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
1720                entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
1721                let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
1722                    entries.into_iter().unzip();
1723                self.entries = sorted_entries;
1724                self.entry_sources = Some(sorted_sources);
1725            } else {
1726                self.entries.sort_by(|left, right| left.key.cmp(&right.key));
1727            }
1728            self.rebuild_grouped_block_order();
1729        }
1730    }
1731
1732    fn rebuild_grouped_block_order(&mut self) {
1733        self.block_order.clear();
1734        self.block_order
1735            .extend((0..self.strings.len()).map(BlockKind::String));
1736        self.block_order
1737            .extend((0..self.preambles.len()).map(BlockKind::Preamble));
1738        self.block_order
1739            .extend((0..self.comments.len()).map(BlockKind::Comment));
1740        self.block_order
1741            .extend((0..self.entries.len()).map(BlockKind::Entry));
1742        self.block_order
1743            .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
1744    }
1745
1746    /// Validate all entries in the library
1747    /// Returns a list of entries with their indices and validation errors
1748    #[must_use]
1749    pub fn validate(
1750        &self,
1751        level: ValidationLevel,
1752    ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
1753        let mut invalid_entries = Vec::new();
1754
1755        for (index, entry) in self.entries.iter().enumerate() {
1756            if let Err(errors) = entry.validate(level) {
1757                invalid_entries.push((index, entry, errors));
1758            }
1759        }
1760
1761        invalid_entries
1762    }
1763
1764    /// Check for duplicate citation keys
1765    /// Returns a list of duplicate keys (each key appears once in the list even if it has multiple duplicates)
1766    #[must_use]
1767    pub fn find_duplicate_keys(&self) -> Vec<&str> {
1768        let mut seen = std::collections::HashSet::new();
1769        let mut duplicates = std::collections::HashSet::new();
1770
1771        for entry in &self.entries {
1772            if !seen.insert(entry.key()) {
1773                duplicates.insert(entry.key());
1774            }
1775        }
1776
1777        duplicates.into_iter().collect()
1778    }
1779
1780    /// Check for duplicate citation keys, ignoring ASCII case.
1781    #[must_use]
1782    pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
1783        let mut seen = std::collections::HashSet::new();
1784        let mut duplicates = std::collections::HashSet::new();
1785
1786        for entry in &self.entries {
1787            let normalized_key = entry.key().to_ascii_lowercase();
1788            if !seen.insert(normalized_key.clone()) {
1789                duplicates.insert(normalized_key);
1790            }
1791        }
1792
1793        duplicates.into_iter().collect()
1794    }
1795
1796    /// Find duplicate DOI groups using normalized DOI values.
1797    #[must_use]
1798    pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
1799        let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
1800        for entry in &self.entries {
1801            if let Some(doi) = entry.doi() {
1802                groups.entry(doi).or_default().push(entry);
1803            }
1804        }
1805
1806        groups
1807            .into_iter()
1808            .filter(|(_, entries)| entries.len() > 1)
1809            .collect()
1810    }
1811
1812    /// Validate all entries and return a comprehensive validation report
1813    #[must_use]
1814    pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
1815        let invalid_entries = self.validate(level);
1816        let duplicate_keys = self.find_duplicate_keys();
1817        let empty_entries = self.find_empty_entries();
1818
1819        ValidationReport {
1820            invalid_entries,
1821            duplicate_keys,
1822            empty_entries,
1823            total_entries: self.entries.len(),
1824            validation_level: level,
1825        }
1826    }
1827
1828    /// Find entries with no fields (only key and type)
1829    fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
1830        self.entries
1831            .iter()
1832            .enumerate()
1833            .filter(|(_, entry)| entry.fields().is_empty())
1834            .collect()
1835    }
1836
1837    /// Get statistics about the library
1838    #[must_use]
1839    pub fn stats(&self) -> LibraryStats {
1840        let mut type_counts = AHashMap::new();
1841        for entry in &self.entries {
1842            *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
1843        }
1844
1845        LibraryStats {
1846            total_entries: self.entries.len(),
1847            total_strings: self.strings.len(),
1848            total_preambles: self.preambles.len(),
1849            total_comments: self.comments.len(),
1850            entries_by_type: type_counts,
1851        }
1852    }
1853}
1854
1855/// Statistics about a library
1856#[derive(Debug, Clone)]
1857pub struct LibraryStats {
1858    /// Total number of entries
1859    pub total_entries: usize,
1860    /// Total number of string definitions
1861    pub total_strings: usize,
1862    /// Total number of preambles
1863    pub total_preambles: usize,
1864    /// Total number of comments
1865    pub total_comments: usize,
1866    /// Entry counts by type
1867    pub entries_by_type: AHashMap<String, usize>,
1868}
1869
1870/// Comprehensive validation report for a library
1871#[derive(Debug, Clone)]
1872pub struct ValidationReport<'a> {
1873    /// Entries that failed validation with their errors
1874    pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
1875    /// Duplicate citation keys
1876    pub duplicate_keys: Vec<&'a str>,
1877    /// Entries with no fields
1878    pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
1879    /// Total number of entries in the library
1880    pub total_entries: usize,
1881    /// Validation level used
1882    pub validation_level: ValidationLevel,
1883}
1884
1885impl ValidationReport<'_> {
1886    /// Check if the library is completely valid
1887    #[must_use]
1888    pub fn is_valid(&self) -> bool {
1889        self.invalid_entries.is_empty()
1890            && self.duplicate_keys.is_empty()
1891            && self.empty_entries.is_empty()
1892    }
1893
1894    /// Get total number of issues found
1895    #[must_use]
1896    pub fn total_issues(&self) -> usize {
1897        self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
1898    }
1899
1900    /// Get a summary of issues by severity
1901    #[must_use]
1902    pub fn issue_summary(&self) -> IssueSummary {
1903        let mut errors = 0;
1904        let mut warnings = 0;
1905        let mut infos = 0;
1906
1907        for (_, _, validation_errors) in &self.invalid_entries {
1908            for error in validation_errors {
1909                match error.severity {
1910                    crate::model::ValidationSeverity::Error => errors += 1,
1911                    crate::model::ValidationSeverity::Warning => warnings += 1,
1912                    crate::model::ValidationSeverity::Info => infos += 1,
1913                }
1914            }
1915        }
1916
1917        // Duplicate keys and empty entries are considered errors
1918        errors += self.duplicate_keys.len() + self.empty_entries.len();
1919
1920        IssueSummary {
1921            errors,
1922            warnings,
1923            infos,
1924        }
1925    }
1926}
1927
1928/// Summary of validation issues by severity
1929#[derive(Debug, Clone, PartialEq, Eq)]
1930pub struct IssueSummary {
1931    /// Number of error-level issues
1932    pub errors: usize,
1933    /// Number of warning-level issues
1934    pub warnings: usize,
1935    /// Number of info-level issues
1936    pub infos: usize,
1937}
1938
1939/// Concatenate simple values (literals and numbers) into a single string
1940fn concatenate_simple_values(values: &[Value]) -> String {
1941    let mut result = String::new();
1942
1943    // Pre-calculate capacity for efficiency
1944    let capacity: usize = values
1945        .iter()
1946        .map(|v| match v {
1947            Value::Literal(s) => s.len(),
1948            Value::Number(n) => n.to_string().len(),
1949            _ => 0,
1950        })
1951        .sum();
1952
1953    result.reserve(capacity);
1954
1955    for value in values {
1956        match value {
1957            Value::Literal(s) => result.push_str(s),
1958            Value::Number(n) => result.push_str(&n.to_string()),
1959            _ => {} // Should not happen given the precondition
1960        }
1961    }
1962
1963    result
1964}
1965
1966fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
1967    if needle.is_empty() {
1968        return true;
1969    }
1970
1971    haystack.to_lowercase().contains(&needle.to_lowercase())
1972}
1973
1974fn value_to_plain_string(value: &Value<'_>) -> String {
1975    match value {
1976        Value::Literal(text) => text.to_string(),
1977        Value::Number(number) => number.to_string(),
1978        Value::Variable(name) => name.to_string(),
1979        Value::Concat(parts) => parts.iter().map(value_to_plain_string).collect(),
1980    }
1981}
1982
1983fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
1984    let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
1985    let month_index = match normalized.as_str() {
1986        "jan" | "january" | "1" | "01" => 1,
1987        "feb" | "february" | "2" | "02" => 2,
1988        "mar" | "march" | "3" | "03" => 3,
1989        "apr" | "april" | "4" | "04" => 4,
1990        "may" | "5" | "05" => 5,
1991        "jun" | "june" | "6" | "06" => 6,
1992        "jul" | "july" | "7" | "07" => 7,
1993        "aug" | "august" | "8" | "08" => 8,
1994        "sep" | "september" | "9" | "09" => 9,
1995        "oct" | "october" | "10" => 10,
1996        "nov" | "november" | "11" => 11,
1997        "dec" | "december" | "12" => 12,
1998        _ => return None,
1999    };
2000
2001    let text = match style {
2002        MonthStyle::Long => month_long_name(month_index),
2003        MonthStyle::Abbrev => month_abbreviation(month_index),
2004        MonthStyle::Number => return Some(Value::Number(month_index)),
2005    };
2006
2007    Some(Value::Literal(Cow::Borrowed(text)))
2008}
2009
2010const fn month_long_name(month: i64) -> &'static str {
2011    match month {
2012        1 => "January",
2013        2 => "February",
2014        3 => "March",
2015        4 => "April",
2016        5 => "May",
2017        6 => "June",
2018        7 => "July",
2019        8 => "August",
2020        9 => "September",
2021        10 => "October",
2022        11 => "November",
2023        12 => "December",
2024        _ => "",
2025    }
2026}
2027
2028const fn month_abbreviation(month: i64) -> &'static str {
2029    match month {
2030        1 => "jan",
2031        2 => "feb",
2032        3 => "mar",
2033        4 => "apr",
2034        5 => "may",
2035        6 => "jun",
2036        7 => "jul",
2037        8 => "aug",
2038        9 => "sep",
2039        10 => "oct",
2040        11 => "nov",
2041        12 => "dec",
2042        _ => "",
2043    }
2044}
2045
2046fn canonical_field_alias(name: &str) -> Option<&'static str> {
2047    if name.eq_ignore_ascii_case("journaltitle") {
2048        Some("journal")
2049    } else if name.eq_ignore_ascii_case("date") {
2050        Some("year")
2051    } else if name.eq_ignore_ascii_case("institution") {
2052        Some("school")
2053    } else if name.eq_ignore_ascii_case("location") {
2054        Some("address")
2055    } else {
2056        None
2057    }
2058}
2059
2060/// Builder for creating libraries programmatically
2061#[derive(Debug, Default)]
2062pub struct LibraryBuilder<'a> {
2063    db: Library<'a>,
2064}
2065
2066impl<'a> LibraryBuilder<'a> {
2067    /// Create a new builder
2068    #[must_use]
2069    pub fn new() -> Self {
2070        Self::default()
2071    }
2072
2073    /// Add an entry
2074    #[must_use]
2075    pub fn entry(mut self, entry: Entry<'a>) -> Self {
2076        self.db.add_entry(entry);
2077        self
2078    }
2079
2080    /// Add a string definition
2081    #[must_use]
2082    pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2083        self.db.add_string(name, value);
2084        self
2085    }
2086
2087    /// Add a preamble
2088    #[must_use]
2089    pub fn preamble(mut self, value: Value<'a>) -> Self {
2090        self.db.add_preamble(value);
2091        self
2092    }
2093
2094    /// Add a comment
2095    #[must_use]
2096    pub fn comment(mut self, text: &'a str) -> Self {
2097        self.db.add_comment(text);
2098        self
2099    }
2100
2101    /// Build the library
2102    #[must_use]
2103    pub fn build(self) -> Library<'a> {
2104        self.db
2105    }
2106}
2107
2108#[cfg(test)]
2109mod tests {
2110    use super::*;
2111    use crate::model::{EntryType, Field};
2112
2113    #[test]
2114    fn test_library_parse() {
2115        let input = r#"
2116            @string{me = "John Doe"}
2117            
2118            @article{test2023,
2119                author = me,
2120                title = "Test Article",
2121                year = 2023
2122            }
2123        "#;
2124
2125        let library = Library::parser().parse(input).unwrap();
2126        assert_eq!(library.entries().len(), 1);
2127        assert_eq!(library.strings().len(), 1);
2128
2129        let entry = &library.entries()[0];
2130        // Use get_as_string since the value might be a variable reference
2131        assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2132    }
2133
2134    #[test]
2135    fn test_zero_copy_preservation() {
2136        let input = r#"
2137            @article{test,
2138                title = "This is borrowed",
2139                year = 2023
2140            }
2141        "#;
2142
2143        let library = Library::parser().parse(input).unwrap();
2144        let entry = &library.entries()[0];
2145
2146        // The title should still be borrowed from the input
2147        if let Some(Value::Literal(cow)) = entry
2148            .fields
2149            .iter()
2150            .find(|f| f.name == "title")
2151            .map(|f| &f.value)
2152        {
2153            assert!(matches!(cow, Cow::Borrowed(_)));
2154        }
2155    }
2156
2157    #[test]
2158    fn test_concatenation_creates_owned() {
2159        let input = r#"
2160            @string{first = "Hello"}
2161            @string{second = "World"}
2162            
2163            @article{test,
2164                title = first # ", " # second
2165            }
2166        "#;
2167
2168        let library = Library::parser().parse(input).unwrap();
2169        let entry = &library.entries()[0];
2170
2171        // Concatenation should create an owned string
2172        assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2173    }
2174
2175    #[test]
2176    fn test_boxed_concat_memory_optimization() {
2177        // Verify that Value enum is 24 bytes or less (was 32 before optimization)
2178        assert!(
2179            std::mem::size_of::<Value>() <= 32,
2180            "Value enum is {} bytes, should be 32 or less",
2181            std::mem::size_of::<Value>()
2182        );
2183    }
2184
2185    #[test]
2186    fn test_field_vec_capacity_bounded() {
2187        let input = r#"
2188            @article{test,
2189                a = "1", b = "2", c = "3", d = "4", e = "5",
2190                f = "6", g = "7", h = "8", i = "9", j = "10"
2191            }
2192        "#;
2193
2194        let db = Library::parser().parse(input).unwrap();
2195        let entry = &db.entries()[0];
2196
2197        assert_eq!(entry.fields.len(), 10);
2198        assert!(
2199            entry.fields.capacity() <= 17,
2200            "Unexpected field Vec growth: len={}, capacity={}",
2201            entry.fields.len(),
2202            entry.fields.capacity()
2203        );
2204    }
2205
2206    #[test]
2207    fn test_library_builder() {
2208        let library = LibraryBuilder::new()
2209            .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2210            .entry(Entry {
2211                ty: EntryType::Article,
2212                key: Cow::Borrowed("test2023"),
2213                fields: vec![
2214                    Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2215                    Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2216                ],
2217            })
2218            .build();
2219
2220        assert_eq!(library.entries().len(), 1);
2221        assert_eq!(library.strings().len(), 1);
2222    }
2223
2224    #[test]
2225    fn test_library_stats() {
2226        let input = r#"
2227            @string{ieee = "IEEE"}
2228            @preamble{"Test preamble"}
2229            % This is a percent comment that now works properly
2230            @comment{This is a formal comment that works}
2231            @article{a1, title = "Article 1"}
2232            @article{a2, title = "Article 2"}
2233            @book{b1, title = "Book 1"}
2234        "#;
2235
2236        let library = Library::parser().parse(input).unwrap();
2237        let stats = library.stats();
2238
2239        assert_eq!(stats.total_entries, 3);
2240        assert_eq!(stats.total_strings, 1);
2241        assert_eq!(stats.total_preambles, 1);
2242        assert_eq!(stats.total_comments, 2); // Both % and @comment should work
2243        assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2244        assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2245    }
2246
2247    #[test]
2248    fn test_parse_files_parallel() {
2249        use std::fs::write;
2250        use std::path::PathBuf;
2251
2252        let dir = std::env::temp_dir();
2253        let path1 = dir.join("parallel_test1.bib");
2254        let path2 = dir.join("parallel_test2.bib");
2255
2256        write(&path1, "@article{a1,title=\"A\"}").unwrap();
2257        write(&path2, "@article{a2,title=\"B\"}").unwrap();
2258
2259        let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2260
2261        let db = Library::parser().threads(2).parse_files(&paths).unwrap();
2262
2263        assert_eq!(db.entries().len(), 2);
2264
2265        let _ = std::fs::remove_file(path1);
2266        let _ = std::fs::remove_file(path2);
2267    }
2268
2269    #[test]
2270    fn test_builder_pattern_api() {
2271        let input = "@article{test, title = \"Test\"}";
2272
2273        // Single-threaded (default)
2274        let db1 = Library::parser().parse(input).unwrap();
2275        assert_eq!(db1.entries().len(), 1);
2276
2277        // Using parser builder
2278        let db2 = Library::parser().threads(1).parse(input).unwrap();
2279        assert_eq!(db2.entries().len(), 1);
2280
2281        #[cfg(feature = "parallel")]
2282        {
2283            use std::fs::write;
2284
2285            // Parallel only works for multiple files
2286            let db3 = Library::parser().threads(4).parse(input).unwrap();
2287            assert_eq!(db3.entries().len(), 1);
2288
2289            // Multi-file parallel processing
2290            let path1 = "/tmp/test1.bib";
2291            let path2 = "/tmp/test2.bib";
2292            write(path1, "@article{a1, title=\"A\"}").unwrap();
2293            write(path2, "@article{a2, title=\"B\"}").unwrap();
2294
2295            let db4 = Library::parser()
2296                .threads(2)
2297                .parse_files(&[path1, path2])
2298                .unwrap();
2299            assert_eq!(db4.entries().len(), 2);
2300
2301            let _ = std::fs::remove_file(path1);
2302            let _ = std::fs::remove_file(path2);
2303        }
2304    }
2305}