Skip to main content

bibtex_parser/
library.rs

1//! BibTeX library representation
2
3use crate::{
4    canonical_biblatex_field_alias, normalize_doi, CorpusEvent, CorpusSource, Entry, Error,
5    ParseEvent, ParseFlow, ParsedBlock, ParsedComment, ParsedCorpus, ParsedDocument, ParsedEntry,
6    ParsedFailedBlock, ParsedPreamble, ParsedSource, ParsedString, Result, SourceId, SourceMap,
7    SourceSpan, StreamingSummary, ValidationError, ValidationLevel, Value,
8};
9use ahash::AHashMap;
10use memchr::memchr;
11use std::borrow::Cow;
12use std::ops::Deref;
13use std::path::Path;
14
15#[cfg(feature = "parallel")]
16use rayon::prelude::*;
17
18const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
19const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
20const CONCAT_CACHE_LIMIT: usize = 16;
21
22enum ExpansionCache<'a> {
23    Small(Vec<(Cow<'a, str>, Value<'a>)>),
24    Large(AHashMap<Cow<'a, str>, Value<'a>>),
25}
26
27impl<'a> ExpansionCache<'a> {
28    fn with_capacity(capacity: usize) -> Self {
29        if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
30            Self::Small(Vec::with_capacity(capacity))
31        } else {
32            Self::Large(AHashMap::with_capacity(capacity))
33        }
34    }
35
36    fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
37        match self {
38            Self::Small(entries) => {
39                let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
40                if index != 0 {
41                    entries.swap(0, index);
42                }
43                Some(entries[0].1.clone())
44            }
45            Self::Large(entries) => entries.get(name).cloned(),
46        }
47    }
48
49    fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
50        match self {
51            Self::Small(entries) => {
52                if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
53                    entries.push((name, value));
54                } else {
55                    let mut large = AHashMap::with_capacity(entries.len() + 1);
56                    for (key, value) in entries.drain(..) {
57                        large.insert(key, value);
58                    }
59                    large.insert(name, value);
60                    *self = Self::Large(large);
61                }
62            }
63            Self::Large(entries) => {
64                entries.insert(name, value);
65            }
66        }
67    }
68}
69
70struct ConcatCache<'a> {
71    entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
72}
73
74impl<'a> ConcatCache<'a> {
75    const fn new() -> Self {
76        Self {
77            entries: Vec::new(),
78        }
79    }
80
81    fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
82        let index = self
83            .entries
84            .iter()
85            .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
86        if index != 0 {
87            self.entries.swap(0, index);
88        }
89        Some(self.entries[0].1.clone())
90    }
91
92    fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
93        if self.entries.len() < CONCAT_CACHE_LIMIT {
94            self.entries.push((parts, value));
95        }
96    }
97}
98
99fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
100    left.len() == right.len()
101        && left
102            .iter()
103            .zip(right)
104            .all(|(left, right)| cache_values_equal(left, right))
105}
106
107fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
108    match (left, right) {
109        (Value::Literal(left), Value::Literal(right))
110        | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
111        (Value::Number(left), Value::Number(right)) => left == right,
112        (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
113        _ => false,
114    }
115}
116
117/// Get month expansion for a given abbreviation (case-insensitive)
118///
119/// Returns None if the name is not a recognized month abbreviation.
120/// This is used as a fallback when user-defined string variables are not found.
121#[inline]
122fn get_month_expansion(name: &str) -> Option<&'static str> {
123    let bytes = name.as_bytes();
124    if bytes.len() != 3 {
125        return None;
126    }
127
128    let key = (u32::from(bytes[0] | 0x20) << 16)
129        | (u32::from(bytes[1] | 0x20) << 8)
130        | u32::from(bytes[2] | 0x20);
131
132    match key {
133        0x6a_61_6e => Some("January"),
134        0x66_65_62 => Some("February"),
135        0x6d_61_72 => Some("March"),
136        0x61_70_72 => Some("April"),
137        0x6d_61_79 => Some("May"),
138        0x6a_75_6e => Some("June"),
139        0x6a_75_6c => Some("July"),
140        0x61_75_67 => Some("August"),
141        0x73_65_70 => Some("September"),
142        0x6f_63_74 => Some("October"),
143        0x6e_6f_76 => Some("November"),
144        0x64_65_63 => Some("December"),
145        _ => None,
146    }
147}
148
149#[inline]
150fn get_string_value<'map, 'a>(
151    strings: &'map [StringDefinition<'a>],
152    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
153    name: &str,
154) -> Option<&'map Value<'a>> {
155    get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
156}
157
158#[inline]
159fn get_string_definition<'map, 'a>(
160    strings: &'map [StringDefinition<'a>],
161    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
162    name: &str,
163) -> Option<&'map StringDefinition<'a>> {
164    if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
165        strings
166            .iter()
167            .rev()
168            .find(|definition| definition.name.as_ref() == name)
169    } else {
170        string_lookup
171            .get(name)
172            .and_then(|&index| strings.get(index))
173    }
174}
175
176#[inline]
177fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
178    strings
179        .iter()
180        .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
181}
182
183/// Check if a value contains any variables
184#[inline]
185fn contains_variables(value: &Value) -> bool {
186    match value {
187        Value::Variable(_) => true,
188        Value::Concat(parts) => parts.iter().any(contains_variables),
189        _ => false,
190    }
191}
192
193/// Check if a value contains variables that might be month constants
194#[inline]
195fn contains_potential_month_variables(value: &Value) -> bool {
196    match value {
197        Value::Variable(name) => get_month_expansion(name).is_some(),
198        Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
199        _ => false,
200    }
201}
202
203#[inline]
204const fn is_identifier_char(byte: u8) -> bool {
205    matches!(
206        byte,
207        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
208    )
209}
210
211#[inline]
212fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
213    if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
214        return false;
215    }
216
217    for (offset, &expected) in keyword.iter().enumerate() {
218        if (input[offset + 1] | 0x20) != expected {
219            return false;
220        }
221    }
222
223    if input.len() == keyword.len() + 1 {
224        return true;
225    }
226
227    !is_identifier_char(input[keyword.len() + 1])
228}
229
230#[derive(Debug, Clone, Copy)]
231struct InputScan {
232    may_contain_string_definition: bool,
233    at_count: usize,
234}
235
236/// Fast pre-scan to detect `@string` entries and estimate block capacity.
237fn scan_input(input: &str) -> InputScan {
238    let bytes = input.as_bytes();
239    let mut pos = 0;
240    let mut at_count = 0;
241    let mut may_contain_string_definition = false;
242
243    while pos < bytes.len() {
244        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
245            let at = pos + offset;
246            at_count += 1;
247            if starts_with_at_keyword(&bytes[at..], b"string") {
248                may_contain_string_definition = true;
249            }
250            pos = at + 1;
251        } else {
252            break;
253        }
254    }
255
256    InputScan {
257        may_contain_string_definition,
258        at_count,
259    }
260}
261
262/// Detect whether a `@string` may appear after a regular entry.
263///
264/// False positives are acceptable (we take the conservative slow path), but
265/// false negatives would be incorrect, so keyword matching mirrors parser rules.
266fn input_may_have_late_string_definition(input: &str) -> bool {
267    let bytes = input.as_bytes();
268    let mut pos = 0;
269    let mut saw_regular_entry = false;
270
271    while pos < bytes.len() {
272        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
273            let at = pos + offset;
274            let tail = &bytes[at..];
275
276            if starts_with_at_keyword(tail, b"string") {
277                if saw_regular_entry {
278                    return true;
279                }
280            } else if !saw_regular_entry
281                && !starts_with_at_keyword(tail, b"preamble")
282                && !starts_with_at_keyword(tail, b"comment")
283            {
284                // Anything else that looks like `@<identifier>` is treated as a regular entry.
285                saw_regular_entry = true;
286            }
287
288            pos = at + 1;
289        } else {
290            break;
291        }
292    }
293
294    false
295}
296
297fn next_recovery_boundary(input: &str, start: usize) -> usize {
298    let bytes = input.as_bytes();
299    let mut pos = start.saturating_add(1);
300    while pos < bytes.len() {
301        if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
302            return pos;
303        }
304        pos += 1;
305    }
306    input.len()
307}
308
309fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
310    let line_start = bytes[..pos]
311        .iter()
312        .rposition(|byte| matches!(byte, b'\n' | b'\r'))
313        .map_or(0, |index| index + 1);
314
315    bytes[line_start..pos]
316        .iter()
317        .all(|byte| matches!(byte, b' ' | b'\t'))
318}
319
320fn merge_streaming_summary(total: &mut StreamingSummary, source: StreamingSummary) {
321    total.entries += source.entries;
322    total.strings += source.strings;
323    total.preambles += source.preambles;
324    total.comments += source.comments;
325    total.failed_blocks += source.failed_blocks;
326    total.warnings += source.warnings;
327    total.errors += source.errors;
328    total.infos += source.infos;
329    total.recovered_blocks += source.recovered_blocks;
330    total.stopped |= source.stopped;
331}
332
333/// Parser configuration.
334#[derive(Debug, Default, Clone)]
335pub struct Parser {
336    threads: Option<usize>,
337    tolerant: bool,
338    document: DocumentOptions,
339}
340
341#[derive(Debug, Default, Clone, Copy)]
342struct DocumentOptions {
343    capture_source: bool,
344    preserve_raw: bool,
345    expand_values: bool,
346}
347
348impl Parser {
349    /// Create a new parser.
350    #[must_use]
351    #[inline]
352    pub fn new() -> Self {
353        Self::default()
354    }
355
356    /// Set number of threads (None = use all available)
357    #[must_use]
358    #[inline]
359    pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
360        self.threads = threads.into();
361        self
362    }
363
364    /// Continue after malformed blocks and collect diagnostics.
365    #[must_use]
366    #[inline]
367    pub const fn tolerant(mut self) -> Self {
368        self.tolerant = true;
369        self
370    }
371
372    /// Capture source spans for blocks.
373    #[must_use]
374    #[inline]
375    pub const fn capture_source(mut self) -> Self {
376        self.document.capture_source = true;
377        self
378    }
379
380    /// Preserve exact raw source text in parsed-document output.
381    #[must_use]
382    #[inline]
383    pub const fn preserve_raw(mut self) -> Self {
384        self.document.preserve_raw = true;
385        self
386    }
387
388    /// Populate expanded value text in parsed-document output.
389    #[must_use]
390    #[inline]
391    pub const fn expand_values(mut self) -> Self {
392        self.document.expand_values = true;
393        self
394    }
395
396    /// Parse a single input string.
397    #[inline]
398    pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
399        if self.tolerant {
400            Library::parse_tolerant(input, self.document.capture_source)
401        } else if self.document.capture_source {
402            Library::parse_with_spans(input)
403        } else {
404            Library::parse_sequential(input)
405        }
406    }
407
408    /// Parse a single input string into the parsed document model.
409    ///
410    /// Use this when a caller needs source-order blocks, diagnostics, raw-text
411    /// slots, or partial parse results. Use [`Self::parse`] for the compact
412    /// [`Library`] API.
413    #[inline]
414    pub fn parse_document<'a>(&self, input: &'a str) -> Result<ParsedDocument<'a>> {
415        self.parse_document_with_source_id(SourceId::new(0), None, input)
416    }
417
418    /// Parse a named source into the parsed document model.
419    ///
420    /// The parser does not read files itself; callers provide the source name
421    /// or path-like label together with the already-loaded input text.
422    #[inline]
423    pub fn parse_source<'a>(
424        &self,
425        source_name: impl Into<Cow<'a, str>>,
426        input: &'a str,
427    ) -> Result<ParsedDocument<'a>> {
428        self.parse_document_with_source_id(SourceId::new(0), Some(source_name.into()), input)
429    }
430
431    /// Parse multiple named in-memory sources into a corpus result.
432    pub fn parse_sources<'a>(&self, sources: &[CorpusSource<'a>]) -> Result<ParsedCorpus<'a>> {
433        let mut documents = Vec::with_capacity(sources.len());
434        for (index, source) in sources.iter().enumerate() {
435            documents.push(self.parse_document_with_source_id(
436                SourceId::new(index),
437                Some(Cow::Borrowed(source.name)),
438                source.input,
439            )?);
440        }
441
442        Ok(ParsedCorpus::from_documents(documents))
443    }
444
445    /// Stream parsed source-order events to a callback.
446    ///
447    /// Strict mode returns an error on the first malformed block. Tolerant mode
448    /// emits recovered partial entries or failed blocks with diagnostics and
449    /// continues. The callback can return [`ParseFlow::Stop`] to stop after the
450    /// current event; the returned summary then has `stopped = true`.
451    #[inline]
452    pub fn parse_events<'a, F>(&self, input: &'a str, on_event: F) -> Result<StreamingSummary>
453    where
454        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
455    {
456        self.parse_source_events_with_source(SourceId::new(0), None, input, on_event)
457    }
458
459    /// Stream parsed source-order events from a named source.
460    #[inline]
461    pub fn parse_source_events<'a, F>(
462        &self,
463        source_name: impl Into<Cow<'a, str>>,
464        input: &'a str,
465        on_event: F,
466    ) -> Result<StreamingSummary>
467    where
468        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
469    {
470        self.parse_source_events_with_source(
471            SourceId::new(0),
472            Some(source_name.into()),
473            input,
474            on_event,
475        )
476    }
477
478    /// Stream events from multiple named in-memory sources in corpus order.
479    pub fn parse_corpus_events<'a, F>(
480        &self,
481        sources: &[CorpusSource<'a>],
482        mut on_event: F,
483    ) -> Result<StreamingSummary>
484    where
485        F: FnMut(CorpusEvent<'a>) -> Result<ParseFlow>,
486    {
487        let mut summary = StreamingSummary::default();
488
489        for (index, source) in sources.iter().enumerate() {
490            if summary.stopped {
491                break;
492            }
493
494            let source_id = SourceId::new(index);
495            let parsed_source = ParsedSource {
496                id: source_id,
497                name: Some(Cow::Borrowed(source.name)),
498            };
499            if on_event(CorpusEvent::SourceStart(parsed_source.clone()))? == ParseFlow::Stop {
500                summary.stopped = true;
501                break;
502            }
503
504            let source_summary = self.parse_source_events_with_source(
505                source_id,
506                Some(Cow::Borrowed(source.name)),
507                source.input,
508                |event| {
509                    on_event(CorpusEvent::Event {
510                        source: source_id,
511                        event: Box::new(event),
512                    })
513                },
514            )?;
515            merge_streaming_summary(&mut summary, source_summary);
516
517            if on_event(CorpusEvent::SourceEnd(parsed_source))? == ParseFlow::Stop {
518                summary.stopped = true;
519            }
520        }
521
522        summary.finalize_status();
523        Ok(summary)
524    }
525
526    fn parse_source_events_with_source<'a, F>(
527        &self,
528        source_id: SourceId,
529        source_name: Option<Cow<'a, str>>,
530        input: &'a str,
531        mut on_event: F,
532    ) -> Result<StreamingSummary>
533    where
534        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
535    {
536        let source_map = SourceMap::new(Some(source_id), source_name, input);
537        let mut summary = StreamingSummary::default();
538
539        if self.tolerant {
540            self.parse_tolerant_events(input, &source_map, &mut summary, &mut on_event)?;
541        } else {
542            crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
543                let source = source_map.span(span.byte_start, span.byte_end);
544                self.emit_parsed_event(item, source, raw, &source_map, &mut summary, &mut on_event)
545            })?;
546        }
547
548        summary.finalize_status();
549        Ok(summary)
550    }
551
552    fn parse_tolerant_events<'a, F>(
553        &self,
554        input: &'a str,
555        source_map: &SourceMap<'a>,
556        summary: &mut StreamingSummary,
557        on_event: &mut F,
558    ) -> Result<()>
559    where
560        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
561    {
562        let mut remaining = input;
563
564        loop {
565            crate::parser::lexer::skip_whitespace(&mut remaining);
566            if remaining.is_empty() || summary.stopped {
567                break;
568            }
569
570            let start = input.len() - remaining.len();
571            match crate::parser::parse_item(&mut remaining) {
572                Ok(item) => {
573                    let end = input.len() - remaining.len();
574                    let source = source_map.span(start, end);
575                    self.emit_parsed_event(
576                        item,
577                        source,
578                        &input[start..end],
579                        source_map,
580                        summary,
581                        on_event,
582                    )?;
583                }
584                Err(err) => {
585                    let end = next_recovery_boundary(input, start);
586                    let failed = FailedBlock {
587                        raw: Cow::Borrowed(&input[start..end]),
588                        error: format!("Failed to parse entry: {err}"),
589                        source: Some(source_map.span(start, end)),
590                    };
591                    let failed_index = summary.failed_blocks;
592                    let failed = ParsedFailedBlock::from_failed_block(
593                        failed_index,
594                        failed,
595                        Some(source_map),
596                    );
597                    if let Some(partial) = crate::document::recover_partial_stream_entry(
598                        &failed,
599                        source_map,
600                        summary.entries,
601                        self.document.preserve_raw,
602                    ) {
603                        Self::emit_event(ParseEvent::Entry(partial), summary, on_event)?;
604                    } else {
605                        Self::emit_event(ParseEvent::Failed(failed), summary, on_event)?;
606                    }
607                    remaining = &input[end..];
608                }
609            }
610        }
611
612        Ok(())
613    }
614
615    fn emit_parsed_event<'a, F>(
616        &self,
617        item: crate::parser::ParsedItem<'a>,
618        source: SourceSpan,
619        raw: &'a str,
620        source_map: &SourceMap<'a>,
621        summary: &mut StreamingSummary,
622        on_event: &mut F,
623    ) -> Result<()>
624    where
625        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
626    {
627        if summary.stopped {
628            return Ok(());
629        }
630
631        let event = match item {
632            crate::parser::ParsedItem::Entry(entry) => {
633                ParseEvent::Entry(ParsedEntry::from_stream_entry(
634                    entry,
635                    source,
636                    raw,
637                    source_map,
638                    self.document.preserve_raw,
639                ))
640            }
641            crate::parser::ParsedItem::String(name, value) => {
642                ParseEvent::String(ParsedString::from_stream_definition(
643                    name,
644                    value,
645                    source,
646                    raw,
647                    self.document.preserve_raw,
648                ))
649            }
650            crate::parser::ParsedItem::Preamble(value) => {
651                ParseEvent::Preamble(ParsedPreamble::from_stream_preamble(
652                    value,
653                    source,
654                    raw,
655                    self.document.preserve_raw,
656                ))
657            }
658            crate::parser::ParsedItem::Comment(text) => ParseEvent::Comment(
659                ParsedComment::from_stream_comment(text, source, raw, self.document.preserve_raw),
660            ),
661        };
662
663        Self::emit_event(event, summary, on_event)
664    }
665
666    fn emit_event<'a, F>(
667        event: ParseEvent<'a>,
668        summary: &mut StreamingSummary,
669        on_event: &mut F,
670    ) -> Result<()>
671    where
672        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
673    {
674        if summary.stopped {
675            return Ok(());
676        }
677
678        let diagnostics = match &event {
679            ParseEvent::Entry(entry) => {
680                summary.entries += 1;
681                if entry.status == crate::ParsedEntryStatus::Partial {
682                    summary.recovered_blocks += 1;
683                }
684                entry.diagnostics.clone()
685            }
686            ParseEvent::String(_) => {
687                summary.strings += 1;
688                Vec::new()
689            }
690            ParseEvent::Preamble(_) => {
691                summary.preambles += 1;
692                Vec::new()
693            }
694            ParseEvent::Comment(_) => {
695                summary.comments += 1;
696                Vec::new()
697            }
698            ParseEvent::Failed(failed) => {
699                summary.failed_blocks += 1;
700                failed.diagnostics.clone()
701            }
702            ParseEvent::Diagnostic(diagnostic) => {
703                summary.count_diagnostic(diagnostic);
704                Vec::new()
705            }
706        };
707        for diagnostic in &diagnostics {
708            summary.count_diagnostic(diagnostic);
709        }
710
711        if on_event(event)? == ParseFlow::Stop {
712            summary.stopped = true;
713            return Ok(());
714        }
715
716        for diagnostic in diagnostics {
717            if on_event(ParseEvent::Diagnostic(diagnostic))? == ParseFlow::Stop {
718                summary.stopped = true;
719                break;
720            }
721        }
722
723        Ok(())
724    }
725
726    fn parse_document_with_source_id<'a>(
727        &self,
728        source_id: SourceId,
729        source_name: Option<Cow<'a, str>>,
730        input: &'a str,
731    ) -> Result<ParsedDocument<'a>> {
732        let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
733        let sources = vec![ParsedSource {
734            id: source_id,
735            name: source_name,
736        }];
737        let raw_items = if self.tolerant {
738            Library::parse_tolerant_raw_items(input, true, &source_map)
739        } else {
740            match Library::parse_raw_items_with_source(input, &source_map) {
741                Ok(raw_items) => raw_items,
742                Err(error) => {
743                    return Ok(ParsedDocument::failed_from_error(
744                        sources,
745                        &source_map,
746                        &error,
747                    ));
748                }
749            }
750        };
751        let library = match Library::from_raw_items(raw_items.clone()) {
752            Ok(library) => library,
753            Err(Error::UndefinedVariable(_) | Error::CircularReference(_))
754                if !self.document.expand_values =>
755            {
756                Library::from_raw_items_unexpanded(raw_items.clone())
757            }
758            Err(error) => return Err(error),
759        };
760        let mut document =
761            ParsedDocument::from_library_with_source_map(library, sources, Some(&source_map));
762        let mut entry_index = 0;
763        for raw_item in &raw_items {
764            if let RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, raw) = raw_item {
765                document.apply_entry_locations(
766                    entry_index,
767                    raw,
768                    &source_map,
769                    self.document.preserve_raw,
770                );
771                entry_index += 1;
772            }
773        }
774        document.apply_parsed_values(&raw_items);
775        if self.document.preserve_raw {
776            document.apply_raw_items(&raw_items);
777        }
778        if self.tolerant {
779            document.recover_partial_entries(&source_map, self.document.preserve_raw);
780        }
781        if self.document.expand_values {
782            document.populate_expanded_values(crate::ExpansionOptions::default())?;
783        }
784        Ok(document)
785    }
786
787    pub(crate) fn parse_compact_document_owned(
788        &self,
789        source_name: Option<String>,
790        input: &str,
791    ) -> Result<ParsedDocument<'static>> {
792        let source_name = source_name.map(Cow::Owned);
793        let sources = vec![ParsedSource {
794            id: SourceId::new(0),
795            name: source_name,
796        }];
797        let input_scan = scan_input(input);
798        let mut entries = Vec::with_capacity(input_scan.at_count);
799        let mut strings = Vec::new();
800        let mut preambles = Vec::new();
801        let mut comments = Vec::new();
802        let mut blocks = Vec::with_capacity(input_scan.at_count);
803
804        crate::parser::parse_bibtex_stream(input, |item| {
805            match item {
806                crate::parser::ParsedItem::Entry(entry) => {
807                    let index = entries.len();
808                    entries.push(ParsedEntry::from_entry_owned(entry, None));
809                    blocks.push(ParsedBlock::Entry(index));
810                }
811                crate::parser::ParsedItem::String(name, value) => {
812                    let index = strings.len();
813                    strings.push(ParsedString::from_definition(StringDefinition {
814                        name: Cow::Owned(name.to_string()),
815                        value: value.into_owned(),
816                        source: None,
817                    }));
818                    blocks.push(ParsedBlock::String(index));
819                }
820                crate::parser::ParsedItem::Preamble(value) => {
821                    let index = preambles.len();
822                    preambles.push(ParsedPreamble::from_preamble(Preamble::new(
823                        value.into_owned(),
824                    )));
825                    blocks.push(ParsedBlock::Preamble(index));
826                }
827                crate::parser::ParsedItem::Comment(text) => {
828                    let index = comments.len();
829                    comments.push(ParsedComment::from_comment(Comment {
830                        text: Cow::Owned(text.to_string()),
831                        source: None,
832                    }));
833                    blocks.push(ParsedBlock::Comment(index));
834                }
835            }
836            Ok(())
837        })?;
838
839        let mut document = ParsedDocument::from_parsed_parts(
840            Library::new(),
841            sources,
842            entries,
843            strings,
844            preambles,
845            comments,
846            blocks,
847        );
848        if self.document.expand_values {
849            document.populate_expanded_values(crate::ExpansionOptions::default())?;
850        }
851        Ok(document)
852    }
853
854    pub(crate) fn parse_source_document_owned(
855        &self,
856        source_name: Option<String>,
857        input: &str,
858    ) -> Result<ParsedDocument<'static>> {
859        let source_name = source_name.map(Cow::Owned);
860        let source_id = SourceId::new(0);
861        let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
862        let sources = vec![ParsedSource {
863            id: source_id,
864            name: source_name,
865        }];
866        let input_scan = scan_input(input);
867        let mut entries = Vec::with_capacity(input_scan.at_count);
868        let mut strings = Vec::new();
869        let mut preambles = Vec::new();
870        let mut comments = Vec::new();
871        let mut blocks = Vec::with_capacity(input_scan.at_count);
872        let mut span_cursor = source_map.cursor();
873
874        crate::parser::parse_bibtex_stream_with_entry_locations(input, |item, start, end, raw| {
875            let source = span_cursor.span(start, end);
876            match item {
877                crate::parser::LocatedParsedItem::Entry(entry) => {
878                    let index = entries.len();
879                    entries.push(ParsedEntry::from_located_stream_entry_owned(
880                        entry,
881                        source,
882                        &mut span_cursor,
883                    ));
884                    blocks.push(ParsedBlock::Entry(index));
885                }
886                crate::parser::LocatedParsedItem::String(name, value) => {
887                    let index = strings.len();
888                    strings.push(ParsedString::from_stream_definition_owned(
889                        name, value, source, raw,
890                    ));
891                    blocks.push(ParsedBlock::String(index));
892                }
893                crate::parser::LocatedParsedItem::Preamble(value) => {
894                    let index = preambles.len();
895                    preambles.push(ParsedPreamble::from_stream_preamble_owned(
896                        value, source, raw,
897                    ));
898                    blocks.push(ParsedBlock::Preamble(index));
899                }
900                crate::parser::LocatedParsedItem::Comment(text) => {
901                    let index = comments.len();
902                    comments.push(ParsedComment::from_stream_comment_owned(text, source));
903                    blocks.push(ParsedBlock::Comment(index));
904                }
905            }
906            Ok(())
907        })?;
908
909        let mut document = ParsedDocument::from_parsed_parts(
910            Library::new(),
911            sources,
912            entries,
913            strings,
914            preambles,
915            comments,
916            blocks,
917        );
918        if self.document.expand_values {
919            document.populate_expanded_values(crate::ExpansionOptions::default())?;
920        }
921        Ok(document)
922    }
923
924    /// Parse multiple files in parallel
925    pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
926        #[cfg(feature = "parallel")]
927        {
928            if let Some(threads) = self.threads {
929                if threads <= 1 {
930                    return Self::parse_files_sequential(paths);
931                }
932            }
933
934            let pool = self.build_thread_pool()?;
935
936            let libraries: Result<Vec<_>> = pool.install(|| {
937                paths
938                    .par_iter()
939                    .map(|path| {
940                        let content = std::fs::read_to_string(path)?;
941                        let library = Library::parse_sequential(&content)?;
942                        Ok(library.into_owned())
943                    })
944                    .collect()
945            });
946
947            let libraries = libraries?;
948            Ok(Library::merge_libraries_parallel(libraries))
949        }
950
951        #[cfg(not(feature = "parallel"))]
952        {
953            Self::parse_files_sequential(paths)
954        }
955    }
956
957    /// Sequential file parsing fallback
958    fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
959        let mut result = Library::new();
960        for path in paths {
961            let content = std::fs::read_to_string(path)?;
962            let library = Library::parse_sequential(&content)?;
963            result.merge(library.into_owned());
964        }
965        Ok(result)
966    }
967
968    #[cfg(feature = "parallel")]
969    fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
970        let mut builder = rayon::ThreadPoolBuilder::new();
971
972        if let Some(threads) = self.threads {
973            builder = builder.num_threads(threads);
974        }
975
976        builder
977            .build()
978            .map_err(|e| Error::WinnowError(e.to_string()))
979    }
980}
981
982/// A high-level block in a parsed BibTeX library.
983#[derive(Debug, Clone, Copy)]
984pub enum Block<'lib, 'a> {
985    /// A regular bibliography entry.
986    Entry(&'lib Entry<'a>, Option<SourceSpan>),
987    /// A string definition.
988    String(&'lib StringDefinition<'a>),
989    /// A preamble block.
990    Preamble(&'lib Preamble<'a>),
991    /// A comment block.
992    Comment(&'lib Comment<'a>),
993    /// A malformed block retained by tolerant parsing.
994    Failed(&'lib FailedBlock<'a>),
995}
996
997#[derive(Debug, Clone, Copy, PartialEq, Eq)]
998pub enum BlockKind {
999    Entry(usize),
1000    String(usize),
1001    Preamble(usize),
1002    Comment(usize),
1003    Failed(usize),
1004}
1005
1006#[derive(Debug, Clone)]
1007pub enum RawBuildItem<'a> {
1008    Parsed(crate::parser::ParsedItem<'a>, SourceSpan, &'a str),
1009    Failed(FailedBlock<'a>),
1010}
1011
1012/// A BibTeX string definition.
1013#[derive(Debug, Clone, PartialEq)]
1014pub struct StringDefinition<'a> {
1015    /// String variable name.
1016    pub name: Cow<'a, str>,
1017    /// Unexpanded string value.
1018    pub value: Value<'a>,
1019    /// Optional source location.
1020    pub source: Option<SourceSpan>,
1021}
1022
1023impl<'a> StringDefinition<'a> {
1024    /// Create a string definition.
1025    #[must_use]
1026    pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1027        Self {
1028            name: Cow::Borrowed(name),
1029            value,
1030            source: None,
1031        }
1032    }
1033
1034    /// Return the string name.
1035    #[must_use]
1036    pub fn name(&self) -> &str {
1037        &self.name
1038    }
1039
1040    /// Return the string value.
1041    #[must_use]
1042    pub const fn value(&self) -> &Value<'a> {
1043        &self.value
1044    }
1045
1046    /// Convert to an owned definition.
1047    #[must_use]
1048    pub fn into_owned(self) -> StringDefinition<'static> {
1049        StringDefinition {
1050            name: Cow::Owned(self.name.into_owned()),
1051            value: self.value.into_owned(),
1052            source: self.source,
1053        }
1054    }
1055}
1056
1057/// A BibTeX preamble block.
1058#[derive(Debug, Clone, PartialEq)]
1059pub struct Preamble<'a> {
1060    /// Expanded preamble value.
1061    pub value: Value<'a>,
1062    /// Optional source location.
1063    pub source: Option<SourceSpan>,
1064}
1065
1066impl<'a> Preamble<'a> {
1067    /// Create a preamble block.
1068    #[must_use]
1069    pub const fn new(value: Value<'a>) -> Self {
1070        Self {
1071            value,
1072            source: None,
1073        }
1074    }
1075
1076    /// Return the preamble value.
1077    #[must_use]
1078    pub const fn value(&self) -> &Value<'a> {
1079        &self.value
1080    }
1081
1082    /// Convert to an owned preamble.
1083    #[must_use]
1084    pub fn into_owned(self) -> Preamble<'static> {
1085        Preamble {
1086            value: self.value.into_owned(),
1087            source: self.source,
1088        }
1089    }
1090}
1091
1092impl<'a> Deref for Preamble<'a> {
1093    type Target = Value<'a>;
1094
1095    fn deref(&self) -> &Self::Target {
1096        &self.value
1097    }
1098}
1099
1100/// A BibTeX comment block.
1101#[derive(Debug, Clone, PartialEq, Eq)]
1102pub struct Comment<'a> {
1103    /// Comment text.
1104    pub text: Cow<'a, str>,
1105    /// Optional source location.
1106    pub source: Option<SourceSpan>,
1107}
1108
1109impl<'a> Comment<'a> {
1110    /// Create a comment block.
1111    #[must_use]
1112    pub const fn new(text: &'a str) -> Self {
1113        Self {
1114            text: Cow::Borrowed(text),
1115            source: None,
1116        }
1117    }
1118
1119    /// Return the comment text.
1120    #[must_use]
1121    pub fn text(&self) -> &str {
1122        &self.text
1123    }
1124
1125    /// Convert to an owned comment.
1126    #[must_use]
1127    pub fn into_owned(self) -> Comment<'static> {
1128        Comment {
1129            text: Cow::Owned(self.text.into_owned()),
1130            source: self.source,
1131        }
1132    }
1133}
1134
1135impl Deref for Comment<'_> {
1136    type Target = str;
1137
1138    fn deref(&self) -> &Self::Target {
1139        &self.text
1140    }
1141}
1142
1143/// A malformed block retained by tolerant parsing.
1144#[derive(Debug, Clone, PartialEq, Eq)]
1145pub struct FailedBlock<'a> {
1146    /// Raw source for the malformed block.
1147    pub raw: Cow<'a, str>,
1148    /// Parse error message.
1149    pub error: String,
1150    /// Optional source location.
1151    pub source: Option<SourceSpan>,
1152}
1153
1154impl FailedBlock<'_> {
1155    /// Convert to an owned failed block.
1156    #[must_use]
1157    pub fn into_owned(self) -> FailedBlock<'static> {
1158        FailedBlock {
1159            raw: Cow::Owned(self.raw.into_owned()),
1160            error: self.error,
1161            source: self.source,
1162        }
1163    }
1164}
1165
1166/// Month rendering style used by month normalization.
1167#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1168pub enum MonthStyle {
1169    /// Full English month names such as `January`.
1170    #[default]
1171    Long,
1172    /// Three-letter lowercase BibTeX abbreviations such as `jan`.
1173    Abbrev,
1174    /// One-based month numbers such as `1`.
1175    Number,
1176}
1177
1178/// Entry and field ordering options.
1179#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1180pub struct SortOptions {
1181    /// Sort regular entries by citation key.
1182    pub entries_by_key: bool,
1183    /// Sort fields inside each entry by field name.
1184    pub fields_by_name: bool,
1185}
1186
1187/// Field-name casing policy for field normalization.
1188#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1189pub enum FieldNameCase {
1190    /// Preserve existing field names.
1191    #[default]
1192    Preserve,
1193    /// Convert field names to lowercase ASCII.
1194    Lowercase,
1195}
1196
1197/// Field normalization options.
1198#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1199pub struct FieldNormalizeOptions {
1200    /// Field-name casing policy.
1201    pub name_case: FieldNameCase,
1202    /// Normalize common BibLaTeX aliases to classic BibTeX field names.
1203    pub biblatex_aliases: bool,
1204}
1205
1206/// A parsed BibTeX library.
1207#[derive(Debug, Clone, Default)]
1208pub struct Library<'a> {
1209    /// Bibliography entries
1210    entries: Vec<Entry<'a>>,
1211    /// Optional entry source spans
1212    entry_sources: Option<Vec<Option<SourceSpan>>>,
1213    /// String definitions
1214    strings: Vec<StringDefinition<'a>>,
1215    /// Latest string definition by name
1216    string_lookup: AHashMap<Cow<'a, str>, usize>,
1217    /// Preambles
1218    preambles: Vec<Preamble<'a>>,
1219    /// Comments
1220    comments: Vec<Comment<'a>>,
1221    /// Failed blocks retained during tolerant parsing
1222    failed_blocks: Vec<FailedBlock<'a>>,
1223    /// Original block order
1224    block_order: Vec<BlockKind>,
1225}
1226
1227impl<'a> Library<'a> {
1228    fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
1229        let index = self.entries.len();
1230        self.entries.push(entry);
1231        if let Some(sources) = &mut self.entry_sources {
1232            sources.push(source);
1233        } else if source.is_some() {
1234            let mut sources = vec![None; index];
1235            sources.push(source);
1236            self.entry_sources = Some(sources);
1237        }
1238        self.block_order.push(BlockKind::Entry(index));
1239    }
1240
1241    fn register_string_definition(
1242        &mut self,
1243        name: Cow<'a, str>,
1244        value: Value<'a>,
1245        source: Option<SourceSpan>,
1246    ) -> usize {
1247        let index = self.strings.len();
1248        self.string_lookup.insert(name.clone(), index);
1249        self.strings.push(StringDefinition {
1250            name,
1251            value,
1252            source,
1253        });
1254        index
1255    }
1256
1257    fn push_string_with_source(
1258        &mut self,
1259        name: Cow<'a, str>,
1260        value: Value<'a>,
1261        source: Option<SourceSpan>,
1262    ) {
1263        let index = self.register_string_definition(name, value, source);
1264        self.block_order.push(BlockKind::String(index));
1265    }
1266
1267    fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
1268        let index = self.preambles.len();
1269        self.preambles.push(Preamble { value, source });
1270        self.block_order.push(BlockKind::Preamble(index));
1271        index
1272    }
1273
1274    fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
1275        let index = self.comments.len();
1276        self.comments.push(Comment { text, source });
1277        self.block_order.push(BlockKind::Comment(index));
1278    }
1279
1280    fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
1281        let index = self.failed_blocks.len();
1282        self.failed_blocks.push(failed);
1283        self.block_order.push(BlockKind::Failed(index));
1284    }
1285
1286    #[inline]
1287    fn expand_value_for_parse(
1288        &self,
1289        value: &mut Value<'a>,
1290        has_user_strings: bool,
1291        month_constants_shadowed: bool,
1292        expanded_variables: &mut ExpansionCache<'a>,
1293        expansion_stack: &mut Vec<Cow<'a, str>>,
1294        concat_cache: &mut ConcatCache<'a>,
1295    ) -> Result<()> {
1296        match value {
1297            Value::Literal(_) | Value::Number(_) => Ok(()),
1298            Value::Variable(name) => {
1299                if !has_user_strings || !month_constants_shadowed {
1300                    if let Some(month_value) = get_month_expansion(name.as_ref()) {
1301                        *value = Value::Literal(Cow::Borrowed(month_value));
1302                        return Ok(());
1303                    }
1304                }
1305
1306                if has_user_strings {
1307                    if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
1308                        *value = expanded;
1309                        return Ok(());
1310                    }
1311
1312                    let old_value = std::mem::take(value);
1313                    *value = self.smart_expand_value_cached(
1314                        old_value,
1315                        expanded_variables,
1316                        expansion_stack,
1317                        concat_cache,
1318                    )?;
1319                }
1320
1321                Ok(())
1322            }
1323            Value::Concat(parts) => {
1324                if has_user_strings {
1325                    if let Some(expanded) = concat_cache.get_cloned(parts) {
1326                        *value = expanded;
1327                        return Ok(());
1328                    }
1329                }
1330
1331                let needs_expansion = if has_user_strings {
1332                    parts.iter().any(contains_variables)
1333                } else {
1334                    parts.iter().any(contains_potential_month_variables)
1335                };
1336
1337                if needs_expansion {
1338                    if !has_user_strings {
1339                        if let Some(expanded) = concat_cache.get_cloned(parts) {
1340                            *value = expanded;
1341                            return Ok(());
1342                        }
1343                    }
1344
1345                    let old_value = std::mem::take(value);
1346                    *value = self.smart_expand_value_cached(
1347                        old_value,
1348                        expanded_variables,
1349                        expansion_stack,
1350                        concat_cache,
1351                    )?;
1352                }
1353
1354                Ok(())
1355            }
1356        }
1357    }
1358
1359    /// Create a new empty library
1360    #[must_use]
1361    #[inline]
1362    pub fn new() -> Self {
1363        Self::default()
1364    }
1365
1366    /// Create a parser with options
1367    ///
1368    /// # Parallel Processing
1369    ///
1370    /// The `threads` option only affects `parse_files()`. Single file
1371    /// parsing with `parse()` is sequential.
1372    ///
1373    /// # Example
1374    ///
1375    /// ```rust,no_run
1376    /// use bibtex_parser::Library;
1377    /// // Parse multiple files in parallel
1378    /// let library = Library::parser()
1379    ///     .threads(4)
1380    ///     .parse_files(&["file1.bib", "file2.bib"]).unwrap();
1381    ///
1382    /// // Single-file parsing stays sequential
1383    /// let content = "@article{demo, title=\"Demo\"}";
1384    /// let library = Library::parser()
1385    ///     .threads(4)
1386    ///     .parse(content).unwrap();
1387    /// ```
1388    #[must_use]
1389    #[inline]
1390    pub fn parser() -> Parser {
1391        Parser::new()
1392    }
1393
1394    /// Parse a BibTeX library from a string with default strict settings.
1395    pub fn parse(input: &'a str) -> Result<Self> {
1396        Self::parser().parse(input)
1397    }
1398
1399    /// Parse a BibTeX library from a file into owned data.
1400    pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
1401        let content = std::fs::read_to_string(path)?;
1402        Library::parser().parse(&content).map(Library::into_owned)
1403    }
1404
1405    /// Serialize this library to BibTeX.
1406    pub fn to_bibtex(&self) -> Result<String> {
1407        crate::writer::to_string(self)
1408    }
1409
1410    /// Serialize this library to a BibTeX file.
1411    pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
1412        crate::writer::to_file(self, path)
1413    }
1414
1415    /// Parse a BibTeX library from a string (single-threaded implementation)
1416    #[allow(clippy::too_many_lines)]
1417    pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
1418        let mut library = Self::new();
1419        let input_scan = scan_input(input);
1420
1421        // Fast path for common corpora (like tugboat) with no user-defined strings.
1422        // This avoids buffering all entries before expansion.
1423        if !input_scan.may_contain_string_definition {
1424            library.entries.reserve(input_scan.at_count);
1425            library.block_order.reserve(input_scan.at_count);
1426            let has_user_strings = false;
1427            let month_constants_shadowed = false;
1428            let mut expanded_variables = ExpansionCache::with_capacity(0);
1429            let mut expansion_stack = Vec::new();
1430            let mut concat_cache = ConcatCache::new();
1431
1432            crate::parser::parse_bibtex_stream(input, |item| {
1433                match item {
1434                    crate::parser::ParsedItem::Entry(mut entry) => {
1435                        for field in &mut entry.fields {
1436                            library.expand_value_for_parse(
1437                                &mut field.value,
1438                                has_user_strings,
1439                                month_constants_shadowed,
1440                                &mut expanded_variables,
1441                                &mut expansion_stack,
1442                                &mut concat_cache,
1443                            )?;
1444                        }
1445                        library.push_entry_with_source(entry, None);
1446                    }
1447                    crate::parser::ParsedItem::Preamble(value) => {
1448                        let mut expanded = value;
1449                        library.expand_value_for_parse(
1450                            &mut expanded,
1451                            has_user_strings,
1452                            month_constants_shadowed,
1453                            &mut expanded_variables,
1454                            &mut expansion_stack,
1455                            &mut concat_cache,
1456                        )?;
1457                        library.push_preamble_with_source(expanded, None);
1458                    }
1459                    crate::parser::ParsedItem::Comment(text) => {
1460                        library.push_comment_with_source(Cow::Borrowed(text), None);
1461                    }
1462                    crate::parser::ParsedItem::String(name, value) => {
1463                        // Defensive fallback for scanner false negatives.
1464                        library.push_string_with_source(Cow::Borrowed(name), value, None);
1465                    }
1466                }
1467                Ok(())
1468            })?;
1469
1470            return Ok(library);
1471        }
1472
1473        library.block_order.reserve(input_scan.at_count);
1474
1475        // Single-pass path when all @string definitions appear before regular
1476        // entries. This keeps correctness while avoiding buffering entries and
1477        // a full second pass over them.
1478        if !input_may_have_late_string_definition(input) {
1479            let mut pending_preambles = Vec::new();
1480            let mut expanded_variables = ExpansionCache::with_capacity(0);
1481            let mut expansion_stack = Vec::new();
1482            let mut concat_cache = ConcatCache::new();
1483            let mut month_constants_shadowed = None;
1484
1485            crate::parser::parse_bibtex_stream(input, |item| {
1486                match item {
1487                    crate::parser::ParsedItem::Entry(mut entry) => {
1488                        let has_user_strings = !library.strings.is_empty();
1489                        let month_constants_shadowed = *month_constants_shadowed
1490                            .get_or_insert_with(|| {
1491                                has_user_strings
1492                                    && user_strings_shadow_month_constants(&library.strings)
1493                            });
1494                        for field in &mut entry.fields {
1495                            library.expand_value_for_parse(
1496                                &mut field.value,
1497                                has_user_strings,
1498                                month_constants_shadowed,
1499                                &mut expanded_variables,
1500                                &mut expansion_stack,
1501                                &mut concat_cache,
1502                            )?;
1503                        }
1504                        library.push_entry_with_source(entry, None);
1505                    }
1506                    crate::parser::ParsedItem::Preamble(value) => {
1507                        let index = library.push_preamble_with_source(value, None);
1508                        pending_preambles.push(index);
1509                    }
1510                    crate::parser::ParsedItem::String(name, value) => {
1511                        library.push_string_with_source(Cow::Borrowed(name), value, None);
1512                    }
1513                    crate::parser::ParsedItem::Comment(text) => {
1514                        library.push_comment_with_source(Cow::Borrowed(text), None);
1515                    }
1516                }
1517                Ok(())
1518            })?;
1519
1520            let has_user_strings = !library.strings.is_empty();
1521            let month_constants_shadowed =
1522                has_user_strings && user_strings_shadow_month_constants(&library.strings);
1523            for index in pending_preambles {
1524                let mut expanded = std::mem::take(&mut library.preambles[index].value);
1525                library.expand_value_for_parse(
1526                    &mut expanded,
1527                    has_user_strings,
1528                    month_constants_shadowed,
1529                    &mut expanded_variables,
1530                    &mut expansion_stack,
1531                    &mut concat_cache,
1532                )?;
1533                library.preambles[index].value = expanded;
1534            }
1535
1536            return Ok(library);
1537        }
1538
1539        let mut entry_indices = Vec::new();
1540        let mut preamble_indices = Vec::new();
1541
1542        crate::parser::parse_bibtex_stream(input, |item| {
1543            match item {
1544                crate::parser::ParsedItem::Entry(entry) => {
1545                    let index = library.entries.len();
1546                    library.push_entry_with_source(entry, None);
1547                    entry_indices.push(index);
1548                }
1549                crate::parser::ParsedItem::Preamble(value) => {
1550                    let index = library.push_preamble_with_source(value, None);
1551                    preamble_indices.push(index);
1552                }
1553                crate::parser::ParsedItem::String(name, value) => {
1554                    library.push_string_with_source(Cow::Borrowed(name), value, None);
1555                }
1556                crate::parser::ParsedItem::Comment(text) => {
1557                    library.push_comment_with_source(Cow::Borrowed(text), None);
1558                }
1559            }
1560            Ok(())
1561        })?;
1562
1563        // Expand after parsing so all @string definitions are available globally.
1564        let has_user_strings = !library.strings.is_empty();
1565        let month_constants_shadowed =
1566            has_user_strings && user_strings_shadow_month_constants(&library.strings);
1567        let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1568        let mut expansion_stack = Vec::new();
1569        let mut concat_cache = ConcatCache::new();
1570
1571        for entry_index in entry_indices {
1572            let field_count = library.entries[entry_index].fields.len();
1573            for field_index in 0..field_count {
1574                let mut value =
1575                    std::mem::take(&mut library.entries[entry_index].fields[field_index].value);
1576                library.expand_value_for_parse(
1577                    &mut value,
1578                    has_user_strings,
1579                    month_constants_shadowed,
1580                    &mut expanded_variables,
1581                    &mut expansion_stack,
1582                    &mut concat_cache,
1583                )?;
1584                library.entries[entry_index].fields[field_index].value = value;
1585            }
1586        }
1587
1588        for preamble_index in preamble_indices {
1589            let mut expanded = std::mem::take(&mut library.preambles[preamble_index].value);
1590            library.expand_value_for_parse(
1591                &mut expanded,
1592                has_user_strings,
1593                month_constants_shadowed,
1594                &mut expanded_variables,
1595                &mut expansion_stack,
1596                &mut concat_cache,
1597            )?;
1598            library.preambles[preamble_index].value = expanded;
1599        }
1600
1601        Ok(library)
1602    }
1603
1604    fn parse_with_spans(input: &'a str) -> Result<Self> {
1605        let source_map = SourceMap::anonymous(input);
1606        let raw_items = Self::parse_raw_items_with_source(input, &source_map)?;
1607        Self::from_raw_items(raw_items)
1608    }
1609
1610    fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1611        let source_map = SourceMap::anonymous(input);
1612        let raw_items = Self::parse_tolerant_raw_items(input, capture_source, &source_map);
1613        Self::from_raw_items(raw_items)
1614    }
1615
1616    fn parse_raw_items_with_source(
1617        input: &'a str,
1618        source_map: &SourceMap<'_>,
1619    ) -> Result<Vec<RawBuildItem<'a>>> {
1620        let mut raw_items = Vec::new();
1621        crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
1622            let span = if source_map.source_id().is_some() {
1623                source_map.span(span.byte_start, span.byte_end)
1624            } else {
1625                span
1626            };
1627            raw_items.push(RawBuildItem::Parsed(item, span, raw));
1628            Ok(())
1629        })?;
1630        Ok(raw_items)
1631    }
1632
1633    fn parse_tolerant_raw_items(
1634        input: &'a str,
1635        capture_source: bool,
1636        source_map: &SourceMap<'_>,
1637    ) -> Vec<RawBuildItem<'a>> {
1638        let mut raw_items = Vec::new();
1639        let mut remaining = input;
1640
1641        loop {
1642            crate::parser::lexer::skip_whitespace(&mut remaining);
1643            if remaining.is_empty() {
1644                break;
1645            }
1646
1647            let start = input.len() - remaining.len();
1648            match crate::parser::parse_item(&mut remaining) {
1649                Ok(item) => {
1650                    let end = input.len() - remaining.len();
1651                    raw_items.push(RawBuildItem::Parsed(
1652                        item,
1653                        source_map.span(start, end),
1654                        &input[start..end],
1655                    ));
1656                }
1657                Err(err) => {
1658                    let end = next_recovery_boundary(input, start);
1659                    let source = capture_source.then(|| source_map.span(start, end));
1660                    raw_items.push(RawBuildItem::Failed(FailedBlock {
1661                        raw: Cow::Borrowed(&input[start..end]),
1662                        error: format!("Failed to parse entry: {err}"),
1663                        source,
1664                    }));
1665                    remaining = &input[end..];
1666                }
1667            }
1668        }
1669
1670        raw_items
1671    }
1672
1673    fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1674        let mut library = Self::new();
1675
1676        for raw_item in &raw_items {
1677            if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) =
1678                raw_item
1679            {
1680                library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1681            }
1682        }
1683
1684        let has_user_strings = !library.strings.is_empty();
1685        let month_constants_shadowed =
1686            has_user_strings && user_strings_shadow_month_constants(&library.strings);
1687        let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1688        let mut expansion_stack = Vec::new();
1689        let mut concat_cache = ConcatCache::new();
1690        let mut string_index = 0;
1691
1692        for raw_item in raw_items {
1693            match raw_item {
1694                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span, _) => {
1695                    for field in &mut entry.fields {
1696                        library.expand_value_for_parse(
1697                            &mut field.value,
1698                            has_user_strings,
1699                            month_constants_shadowed,
1700                            &mut expanded_variables,
1701                            &mut expansion_stack,
1702                            &mut concat_cache,
1703                        )?;
1704                    }
1705                    library.push_entry_with_source(entry, Some(span));
1706                }
1707                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, _) => {
1708                    library.block_order.push(BlockKind::String(string_index));
1709                    string_index += 1;
1710                }
1711                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span, _) => {
1712                    library.expand_value_for_parse(
1713                        &mut value,
1714                        has_user_strings,
1715                        month_constants_shadowed,
1716                        &mut expanded_variables,
1717                        &mut expansion_stack,
1718                        &mut concat_cache,
1719                    )?;
1720                    library.push_preamble_with_source(value, Some(span));
1721                }
1722                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1723                    library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1724                }
1725                RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1726            }
1727        }
1728
1729        Ok(library)
1730    }
1731
1732    fn from_raw_items_unexpanded(raw_items: Vec<RawBuildItem<'a>>) -> Self {
1733        let mut library = Self::new();
1734
1735        for raw_item in raw_items {
1736            match raw_item {
1737                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(entry), span, _) => {
1738                    library.push_entry_with_source(entry, Some(span));
1739                }
1740                RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) => {
1741                    library.push_string_with_source(Cow::Borrowed(name), value, Some(span));
1742                }
1743                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), span, _) => {
1744                    library.push_preamble_with_source(value, Some(span));
1745                }
1746                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1747                    library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1748                }
1749                RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1750            }
1751        }
1752
1753        library
1754    }
1755
1756    /// Merge another library into this one
1757    pub fn merge(&mut self, other: Self) {
1758        let entry_offset = self.entries.len();
1759        let string_offset = self.strings.len();
1760        let preamble_offset = self.preambles.len();
1761        let comment_offset = self.comments.len();
1762        let failed_offset = self.failed_blocks.len();
1763        let other_entry_count = other.entries.len();
1764        let other_entry_sources = other.entry_sources;
1765
1766        self.entries.extend(other.entries);
1767        match (&mut self.entry_sources, other_entry_sources) {
1768            (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1769            (Some(sources), None) => {
1770                sources.extend(std::iter::repeat(None).take(other_entry_count));
1771            }
1772            (None, Some(other_sources)) => {
1773                let mut sources = vec![None; entry_offset];
1774                sources.extend(other_sources);
1775                self.entry_sources = Some(sources);
1776            }
1777            (None, None) => {}
1778        }
1779        self.preambles.extend(other.preambles);
1780        self.comments.extend(other.comments);
1781        self.failed_blocks.extend(other.failed_blocks);
1782
1783        for definition in other.strings {
1784            let index = self.strings.len();
1785            self.string_lookup.insert(definition.name.clone(), index);
1786            self.strings.push(definition);
1787        }
1788
1789        self.block_order
1790            .extend(other.block_order.into_iter().map(|kind| match kind {
1791                BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1792                BlockKind::String(index) => BlockKind::String(string_offset + index),
1793                BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1794                BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1795                BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1796            }));
1797    }
1798
1799    #[cfg(feature = "parallel")]
1800    fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1801        let mut result = Library::new();
1802        for library in libraries {
1803            result.merge(library);
1804        }
1805        result
1806    }
1807
1808    /// Get all entries
1809    #[must_use]
1810    pub fn entries(&self) -> &[Entry<'a>] {
1811        &self.entries
1812    }
1813
1814    /// Get mutable access to all entries
1815    #[must_use]
1816    pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1817        &mut self.entries
1818    }
1819
1820    /// Get all string definitions
1821    #[must_use]
1822    pub fn strings(&self) -> &[StringDefinition<'a>] {
1823        &self.strings
1824    }
1825
1826    /// Get a string definition by name.
1827    #[must_use]
1828    pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1829        get_string_definition(&self.strings, &self.string_lookup, name)
1830    }
1831
1832    /// Get a string definition value by name.
1833    #[must_use]
1834    pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1835        self.string(name).map(|definition| &definition.value)
1836    }
1837
1838    /// Get all preambles
1839    #[must_use]
1840    pub fn preambles(&self) -> &[Preamble<'a>] {
1841        &self.preambles
1842    }
1843
1844    /// Get mutable access to preambles
1845    #[must_use]
1846    pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1847        &mut self.preambles
1848    }
1849
1850    /// Get all comments
1851    #[must_use]
1852    pub fn comments(&self) -> &[Comment<'a>] {
1853        &self.comments
1854    }
1855
1856    /// Get mutable access to comments
1857    #[must_use]
1858    pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1859        &mut self.comments
1860    }
1861
1862    /// Get malformed blocks retained by tolerant parsing.
1863    #[must_use]
1864    pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1865        &self.failed_blocks
1866    }
1867
1868    /// Return blocks in source order.
1869    #[must_use]
1870    pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1871        self.block_order
1872            .iter()
1873            .map(|kind| match *kind {
1874                BlockKind::Entry(index) => Block::Entry(
1875                    &self.entries[index],
1876                    self.entry_sources
1877                        .as_ref()
1878                        .and_then(|sources| sources.get(index).copied().flatten()),
1879                ),
1880                BlockKind::String(index) => Block::String(&self.strings[index]),
1881                BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1882                BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1883                BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1884            })
1885            .collect()
1886    }
1887
1888    #[must_use]
1889    pub(crate) fn entry_source(&self, index: usize) -> Option<SourceSpan> {
1890        self.entry_sources
1891            .as_ref()
1892            .and_then(|sources| sources.get(index).copied().flatten())
1893    }
1894
1895    #[must_use]
1896    pub(crate) fn block_kinds(&self) -> &[BlockKind] {
1897        &self.block_order
1898    }
1899
1900    /// Find entries by key
1901    #[must_use]
1902    pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1903        self.entries.iter().find(|e| e.key == key)
1904    }
1905
1906    /// Find entries by key, ignoring ASCII case.
1907    #[must_use]
1908    pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1909        self.entries
1910            .iter()
1911            .find(|entry| entry.key.eq_ignore_ascii_case(key))
1912    }
1913
1914    /// Return `true` when the library contains `key`.
1915    #[must_use]
1916    pub fn contains_key(&self, key: &str) -> bool {
1917        self.find_by_key(key).is_some()
1918    }
1919
1920    /// Find entries by type
1921    #[must_use]
1922    pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1923        self.entries
1924            .iter()
1925            .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1926            .collect()
1927    }
1928
1929    /// Find entries by field value
1930    #[must_use]
1931    pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1932        self.entries
1933            .iter()
1934            .filter(|e| {
1935                e.get_as_string(field)
1936                    .as_ref()
1937                    .is_some_and(|v| v.contains(value))
1938            })
1939            .collect()
1940    }
1941
1942    /// Find entries by field value, ignoring ASCII case for the field name and value.
1943    #[must_use]
1944    pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1945        self.entries
1946            .iter()
1947            .filter(|entry| {
1948                entry
1949                    .get_as_string_ignore_case(field)
1950                    .as_ref()
1951                    .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1952            })
1953            .collect()
1954    }
1955
1956    /// Find entries whose normalized DOI matches `doi`.
1957    #[must_use]
1958    pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1959        let Some(needle) = normalize_doi(doi) else {
1960            return Vec::new();
1961        };
1962
1963        self.entries
1964            .iter()
1965            .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1966            .collect()
1967    }
1968
1969    /// Smart expansion with memoization for repeated variable references.
1970    fn smart_expand_value_cached(
1971        &self,
1972        value: Value<'a>,
1973        expanded_variables: &mut ExpansionCache<'a>,
1974        expansion_stack: &mut Vec<Cow<'a, str>>,
1975        concat_cache: &mut ConcatCache<'a>,
1976    ) -> Result<Value<'a>> {
1977        match value {
1978            // Simple literals and numbers stay as-is (zero-copy!)
1979            Value::Literal(_) | Value::Number(_) => Ok(value),
1980
1981            // Variables need to be resolved
1982            Value::Variable(name) => {
1983                let name_text = name.as_ref();
1984                if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1985                    return Ok(expanded);
1986                }
1987
1988                if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1989                    let mut cycle = expansion_stack
1990                        .iter()
1991                        .map(std::convert::AsRef::as_ref)
1992                        .collect::<Vec<_>>()
1993                        .join(" -> ");
1994                    if !cycle.is_empty() {
1995                        cycle.push_str(" -> ");
1996                    }
1997                    cycle.push_str(name_text);
1998                    return Err(Error::CircularReference(cycle));
1999                }
2000
2001                if let Some(user_value) =
2002                    get_string_value(&self.strings, &self.string_lookup, name_text)
2003                {
2004                    // Recursively expand the variable's value and cache the result.
2005                    expansion_stack.push(name.clone());
2006                    let expanded = self.smart_expand_value_cached(
2007                        user_value.clone(),
2008                        expanded_variables,
2009                        expansion_stack,
2010                        concat_cache,
2011                    );
2012                    expansion_stack.pop();
2013
2014                    let expanded = expanded?;
2015                    expanded_variables.insert(name, expanded.clone());
2016                    Ok(expanded)
2017                } else {
2018                    // Check month abbreviations as fallback
2019                    get_month_expansion(name_text).map_or_else(
2020                        || {
2021                            // Variable not found in either user strings or month constants
2022                            Err(Error::UndefinedVariable(name_text.to_string()))
2023                        },
2024                        |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
2025                    )
2026                }
2027            }
2028
2029            // Concatenations need special handling
2030            Value::Concat(parts) => {
2031                if let Some(expanded) = concat_cache.get_cloned(&parts) {
2032                    return Ok(expanded);
2033                }
2034
2035                let cache_key = parts.clone();
2036                let expanded = self.expand_concatenation_cached(
2037                    parts.into_vec(),
2038                    expanded_variables,
2039                    expansion_stack,
2040                    concat_cache,
2041                )?;
2042                concat_cache.insert(cache_key, expanded.clone());
2043                Ok(expanded)
2044            }
2045        }
2046    }
2047
2048    /// Alternative expansion that works with references (requires cloning for variables)
2049    pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
2050        match value {
2051            // Simple literals and numbers can be cloned cheaply
2052            Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
2053
2054            // Variables need to be resolved
2055            Value::Variable(name) => {
2056                // First check user-defined strings
2057                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
2058                    || {
2059                        // Check month abbreviations as fallback
2060                        get_month_expansion(name.as_ref()).map_or_else(
2061                            || {
2062                                // Variable not found in either user strings or month constants
2063                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
2064                            },
2065                            |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
2066                        )
2067                    },
2068                    |user_value| self.expand_value_ref(user_value),
2069                )
2070            }
2071
2072            // Concatenations need cloning
2073            Value::Concat(parts) => {
2074                let cloned_parts = parts.to_vec();
2075                self.expand_concatenation(cloned_parts)
2076            }
2077        }
2078    }
2079
2080    /// Expand a concatenation, only converting to owned when necessary
2081    fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
2082        let mut expanded_variables = ExpansionCache::with_capacity(0);
2083        let mut expansion_stack = Vec::new();
2084        let mut concat_cache = ConcatCache::new();
2085        self.expand_concatenation_cached(
2086            parts,
2087            &mut expanded_variables,
2088            &mut expansion_stack,
2089            &mut concat_cache,
2090        )
2091    }
2092
2093    /// Cached concatenation expansion used by hot parsing paths.
2094    fn expand_concatenation_cached(
2095        &self,
2096        parts: Vec<Value<'a>>,
2097        expanded_variables: &mut ExpansionCache<'a>,
2098        expansion_stack: &mut Vec<Cow<'a, str>>,
2099        concat_cache: &mut ConcatCache<'a>,
2100    ) -> Result<Value<'a>> {
2101        let mut expanded_parts = Vec::with_capacity(parts.len());
2102
2103        // First, expand all parts
2104        for part in parts {
2105            let expanded = self.smart_expand_value_cached(
2106                part,
2107                expanded_variables,
2108                expansion_stack,
2109                concat_cache,
2110            )?;
2111            expanded_parts.push(expanded);
2112        }
2113
2114        // If all parts are literals or numbers, we can flatten to a single string
2115        if expanded_parts
2116            .iter()
2117            .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
2118        {
2119            let combined = concatenate_simple_values(&expanded_parts);
2120            Ok(Value::Literal(Cow::Owned(combined)))
2121        } else {
2122            Ok(Value::Concat(expanded_parts.into_boxed_slice()))
2123        }
2124    }
2125
2126    /// Get a fully expanded string value.
2127    pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
2128        match value {
2129            Value::Literal(s) => Ok(s.to_string()),
2130            Value::Number(n) => Ok(n.to_string()),
2131            Value::Variable(name) => {
2132                // First check user-defined strings
2133                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
2134                    || {
2135                        // Check month abbreviations as fallback
2136                        get_month_expansion(name.as_ref()).map_or_else(
2137                            || {
2138                                // Variable not found in either user strings or month constants
2139                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
2140                            },
2141                            |month_value| Ok(month_value.to_string()),
2142                        )
2143                    },
2144                    |user_value| self.get_expanded_string(user_value),
2145                )
2146            }
2147            Value::Concat(parts) => {
2148                let mut result = String::new();
2149                for part in parts.iter() {
2150                    result.push_str(&self.get_expanded_string(part)?);
2151                }
2152                Ok(result)
2153            }
2154        }
2155    }
2156
2157    /// Convert to owned version (no borrowed data)
2158    #[must_use]
2159    pub fn into_owned(self) -> Library<'static> {
2160        let strings = self
2161            .strings
2162            .into_iter()
2163            .map(StringDefinition::into_owned)
2164            .collect::<Vec<_>>();
2165        let mut string_lookup = AHashMap::with_capacity(strings.len());
2166        for (index, definition) in strings.iter().enumerate() {
2167            string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
2168        }
2169
2170        Library {
2171            entries: self.entries.into_iter().map(Entry::into_owned).collect(),
2172            entry_sources: self.entry_sources,
2173            strings,
2174            string_lookup,
2175            preambles: self
2176                .preambles
2177                .into_iter()
2178                .map(Preamble::into_owned)
2179                .collect(),
2180            comments: self.comments.into_iter().map(Comment::into_owned).collect(),
2181            failed_blocks: self
2182                .failed_blocks
2183                .into_iter()
2184                .map(FailedBlock::into_owned)
2185                .collect(),
2186            block_order: self.block_order,
2187        }
2188    }
2189
2190    /// Add a string definition (useful for building libraries programmatically)
2191    pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
2192        self.push_string_with_source(Cow::Borrowed(name), value, None);
2193    }
2194
2195    /// Add an entry
2196    pub fn add_entry(&mut self, entry: Entry<'a>) {
2197        self.push_entry_with_source(entry, None);
2198    }
2199
2200    /// Add a preamble
2201    pub fn add_preamble(&mut self, value: Value<'a>) {
2202        self.push_preamble_with_source(value, None);
2203    }
2204
2205    /// Add a comment
2206    pub fn add_comment(&mut self, comment: &'a str) {
2207        self.push_comment_with_source(Cow::Borrowed(comment), None);
2208    }
2209
2210    /// Resolve string variables and concatenations in entries and preambles in place.
2211    pub fn resolve_strings(&mut self) -> Result<()> {
2212        let has_user_strings = !self.strings.is_empty();
2213        let month_constants_shadowed =
2214            has_user_strings && user_strings_shadow_month_constants(&self.strings);
2215        let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
2216        let mut expansion_stack = Vec::new();
2217        let mut concat_cache = ConcatCache::new();
2218
2219        for entry_index in 0..self.entries.len() {
2220            let field_count = self.entries[entry_index].fields.len();
2221            for field_index in 0..field_count {
2222                let mut value =
2223                    std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
2224                self.expand_value_for_parse(
2225                    &mut value,
2226                    has_user_strings,
2227                    month_constants_shadowed,
2228                    &mut expanded_variables,
2229                    &mut expansion_stack,
2230                    &mut concat_cache,
2231                )?;
2232                self.entries[entry_index].fields[field_index].value = value;
2233            }
2234        }
2235
2236        for preamble_index in 0..self.preambles.len() {
2237            let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
2238            self.expand_value_for_parse(
2239                &mut value,
2240                has_user_strings,
2241                month_constants_shadowed,
2242                &mut expanded_variables,
2243                &mut expansion_stack,
2244                &mut concat_cache,
2245            )?;
2246            self.preambles[preamble_index].value = value;
2247        }
2248
2249        Ok(())
2250    }
2251
2252    /// Normalize DOI fields to lowercase `10.x/...` form when recognizable.
2253    pub fn normalize_doi_fields(&mut self) {
2254        for entry in &mut self.entries {
2255            for field in &mut entry.fields {
2256                if field.name.eq_ignore_ascii_case("doi") {
2257                    if let Some(normalized) = normalize_doi(&field.value.to_plain_string()) {
2258                        field.value = Value::Literal(Cow::Owned(normalized));
2259                    }
2260                }
2261            }
2262        }
2263    }
2264
2265    /// Normalize month fields to a chosen representation.
2266    pub fn normalize_months(&mut self, style: MonthStyle) {
2267        for entry in &mut self.entries {
2268            for field in &mut entry.fields {
2269                if field.name.eq_ignore_ascii_case("month") {
2270                    if let Some(month) =
2271                        normalize_month_value(&field.value.to_plain_string(), style)
2272                    {
2273                        field.value = month;
2274                    }
2275                }
2276            }
2277        }
2278    }
2279
2280    /// Normalize field names and common BibLaTeX aliases.
2281    pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
2282        for entry in &mut self.entries {
2283            for field in &mut entry.fields {
2284                let mut name = if options.biblatex_aliases {
2285                    canonical_biblatex_field_alias(&field.name)
2286                        .unwrap_or_else(|| field.name.as_ref())
2287                        .to_string()
2288                } else {
2289                    field.name.to_string()
2290                };
2291
2292                if options.name_case == FieldNameCase::Lowercase {
2293                    name.make_ascii_lowercase();
2294                }
2295
2296                if name != field.name {
2297                    field.name = Cow::Owned(name);
2298                }
2299            }
2300        }
2301    }
2302
2303    /// Sort entries and/or fields in place.
2304    pub fn sort(&mut self, options: SortOptions) {
2305        if options.fields_by_name {
2306            for entry in &mut self.entries {
2307                entry
2308                    .fields
2309                    .sort_by(|left, right| left.name.cmp(&right.name));
2310            }
2311        }
2312
2313        if options.entries_by_key {
2314            if let Some(sources) = self.entry_sources.take() {
2315                let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
2316                entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
2317                let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
2318                    entries.into_iter().unzip();
2319                self.entries = sorted_entries;
2320                self.entry_sources = Some(sorted_sources);
2321            } else {
2322                self.entries.sort_by(|left, right| left.key.cmp(&right.key));
2323            }
2324            self.rebuild_grouped_block_order();
2325        }
2326    }
2327
2328    fn rebuild_grouped_block_order(&mut self) {
2329        self.block_order.clear();
2330        self.block_order
2331            .extend((0..self.strings.len()).map(BlockKind::String));
2332        self.block_order
2333            .extend((0..self.preambles.len()).map(BlockKind::Preamble));
2334        self.block_order
2335            .extend((0..self.comments.len()).map(BlockKind::Comment));
2336        self.block_order
2337            .extend((0..self.entries.len()).map(BlockKind::Entry));
2338        self.block_order
2339            .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
2340    }
2341
2342    /// Validate all entries in the library
2343    /// Returns a list of entries with their indices and validation errors
2344    #[must_use]
2345    pub fn validate(
2346        &self,
2347        level: ValidationLevel,
2348    ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
2349        let mut invalid_entries = Vec::new();
2350
2351        for (index, entry) in self.entries.iter().enumerate() {
2352            if let Err(errors) = entry.validate(level) {
2353                invalid_entries.push((index, entry, errors));
2354            }
2355        }
2356
2357        invalid_entries
2358    }
2359
2360    /// Check for duplicate citation keys
2361    /// Returns a list of duplicate keys (each key appears once in the list even if it has multiple duplicates)
2362    #[must_use]
2363    pub fn find_duplicate_keys(&self) -> Vec<&str> {
2364        let mut seen = std::collections::HashSet::new();
2365        let mut duplicates = std::collections::HashSet::new();
2366
2367        for entry in &self.entries {
2368            if !seen.insert(entry.key()) {
2369                duplicates.insert(entry.key());
2370            }
2371        }
2372
2373        duplicates.into_iter().collect()
2374    }
2375
2376    /// Check for duplicate citation keys, ignoring ASCII case.
2377    #[must_use]
2378    pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
2379        let mut seen = std::collections::HashSet::new();
2380        let mut duplicates = std::collections::HashSet::new();
2381
2382        for entry in &self.entries {
2383            let normalized_key = entry.key().to_ascii_lowercase();
2384            if !seen.insert(normalized_key.clone()) {
2385                duplicates.insert(normalized_key);
2386            }
2387        }
2388
2389        duplicates.into_iter().collect()
2390    }
2391
2392    /// Find duplicate DOI groups using normalized DOI values.
2393    #[must_use]
2394    pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
2395        let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
2396        for entry in &self.entries {
2397            if let Some(doi) = entry.doi() {
2398                groups.entry(doi).or_default().push(entry);
2399            }
2400        }
2401
2402        groups
2403            .into_iter()
2404            .filter(|(_, entries)| entries.len() > 1)
2405            .collect()
2406    }
2407
2408    /// Validate all entries and return a comprehensive validation report
2409    #[must_use]
2410    pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
2411        let invalid_entries = self.validate(level);
2412        let duplicate_keys = self.find_duplicate_keys();
2413        let empty_entries = self.find_empty_entries();
2414
2415        ValidationReport {
2416            invalid_entries,
2417            duplicate_keys,
2418            empty_entries,
2419            total_entries: self.entries.len(),
2420            validation_level: level,
2421        }
2422    }
2423
2424    /// Find entries with no fields (only key and type)
2425    fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
2426        self.entries
2427            .iter()
2428            .enumerate()
2429            .filter(|(_, entry)| entry.fields().is_empty())
2430            .collect()
2431    }
2432
2433    /// Get statistics about the library
2434    #[must_use]
2435    pub fn stats(&self) -> LibraryStats {
2436        let mut type_counts = AHashMap::new();
2437        for entry in &self.entries {
2438            *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
2439        }
2440
2441        LibraryStats {
2442            total_entries: self.entries.len(),
2443            total_strings: self.strings.len(),
2444            total_preambles: self.preambles.len(),
2445            total_comments: self.comments.len(),
2446            entries_by_type: type_counts,
2447        }
2448    }
2449}
2450
2451/// Statistics about a library
2452#[derive(Debug, Clone)]
2453pub struct LibraryStats {
2454    /// Total number of entries
2455    pub total_entries: usize,
2456    /// Total number of string definitions
2457    pub total_strings: usize,
2458    /// Total number of preambles
2459    pub total_preambles: usize,
2460    /// Total number of comments
2461    pub total_comments: usize,
2462    /// Entry counts by type
2463    pub entries_by_type: AHashMap<String, usize>,
2464}
2465
2466/// Comprehensive validation report for a library
2467#[derive(Debug, Clone)]
2468pub struct ValidationReport<'a> {
2469    /// Entries that failed validation with their errors
2470    pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
2471    /// Duplicate citation keys
2472    pub duplicate_keys: Vec<&'a str>,
2473    /// Entries with no fields
2474    pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
2475    /// Total number of entries in the library
2476    pub total_entries: usize,
2477    /// Validation level used
2478    pub validation_level: ValidationLevel,
2479}
2480
2481impl ValidationReport<'_> {
2482    /// Check if the library is completely valid
2483    #[must_use]
2484    pub fn is_valid(&self) -> bool {
2485        self.invalid_entries.is_empty()
2486            && self.duplicate_keys.is_empty()
2487            && self.empty_entries.is_empty()
2488    }
2489
2490    /// Get total number of issues found
2491    #[must_use]
2492    pub fn total_issues(&self) -> usize {
2493        self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
2494    }
2495
2496    /// Get a summary of issues by severity
2497    #[must_use]
2498    pub fn issue_summary(&self) -> IssueSummary {
2499        let mut errors = 0;
2500        let mut warnings = 0;
2501        let mut infos = 0;
2502
2503        for (_, _, validation_errors) in &self.invalid_entries {
2504            for error in validation_errors {
2505                match error.severity {
2506                    crate::model::ValidationSeverity::Error => errors += 1,
2507                    crate::model::ValidationSeverity::Warning => warnings += 1,
2508                    crate::model::ValidationSeverity::Info => infos += 1,
2509                }
2510            }
2511        }
2512
2513        // Duplicate keys and empty entries are considered errors
2514        errors += self.duplicate_keys.len() + self.empty_entries.len();
2515
2516        IssueSummary {
2517            errors,
2518            warnings,
2519            infos,
2520        }
2521    }
2522}
2523
2524/// Summary of validation issues by severity
2525#[derive(Debug, Clone, PartialEq, Eq)]
2526pub struct IssueSummary {
2527    /// Number of error-level issues
2528    pub errors: usize,
2529    /// Number of warning-level issues
2530    pub warnings: usize,
2531    /// Number of info-level issues
2532    pub infos: usize,
2533}
2534
2535/// Concatenate simple values (literals and numbers) into a single string
2536fn concatenate_simple_values(values: &[Value]) -> String {
2537    let mut result = String::new();
2538
2539    // Pre-calculate capacity for efficiency
2540    let capacity: usize = values
2541        .iter()
2542        .map(|v| match v {
2543            Value::Literal(s) => s.len(),
2544            Value::Number(n) => n.to_string().len(),
2545            _ => 0,
2546        })
2547        .sum();
2548
2549    result.reserve(capacity);
2550
2551    for value in values {
2552        match value {
2553            Value::Literal(s) => result.push_str(s),
2554            Value::Number(n) => result.push_str(&n.to_string()),
2555            _ => {} // Should not happen given the precondition
2556        }
2557    }
2558
2559    result
2560}
2561
2562fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2563    if needle.is_empty() {
2564        return true;
2565    }
2566
2567    haystack.to_lowercase().contains(&needle.to_lowercase())
2568}
2569
2570fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
2571    let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
2572    let month_index = match normalized.as_str() {
2573        "jan" | "january" | "1" | "01" => 1,
2574        "feb" | "february" | "2" | "02" => 2,
2575        "mar" | "march" | "3" | "03" => 3,
2576        "apr" | "april" | "4" | "04" => 4,
2577        "may" | "5" | "05" => 5,
2578        "jun" | "june" | "6" | "06" => 6,
2579        "jul" | "july" | "7" | "07" => 7,
2580        "aug" | "august" | "8" | "08" => 8,
2581        "sep" | "september" | "9" | "09" => 9,
2582        "oct" | "october" | "10" => 10,
2583        "nov" | "november" | "11" => 11,
2584        "dec" | "december" | "12" => 12,
2585        _ => return None,
2586    };
2587
2588    let text = match style {
2589        MonthStyle::Long => month_long_name(month_index),
2590        MonthStyle::Abbrev => month_abbreviation(month_index),
2591        MonthStyle::Number => return Some(Value::Number(month_index)),
2592    };
2593
2594    Some(Value::Literal(Cow::Borrowed(text)))
2595}
2596
2597const fn month_long_name(month: i64) -> &'static str {
2598    match month {
2599        1 => "January",
2600        2 => "February",
2601        3 => "March",
2602        4 => "April",
2603        5 => "May",
2604        6 => "June",
2605        7 => "July",
2606        8 => "August",
2607        9 => "September",
2608        10 => "October",
2609        11 => "November",
2610        12 => "December",
2611        _ => "",
2612    }
2613}
2614
2615const fn month_abbreviation(month: i64) -> &'static str {
2616    match month {
2617        1 => "jan",
2618        2 => "feb",
2619        3 => "mar",
2620        4 => "apr",
2621        5 => "may",
2622        6 => "jun",
2623        7 => "jul",
2624        8 => "aug",
2625        9 => "sep",
2626        10 => "oct",
2627        11 => "nov",
2628        12 => "dec",
2629        _ => "",
2630    }
2631}
2632
2633/// Builder for creating libraries programmatically
2634#[derive(Debug, Default)]
2635pub struct LibraryBuilder<'a> {
2636    library: Library<'a>,
2637}
2638
2639impl<'a> LibraryBuilder<'a> {
2640    /// Create a new builder
2641    #[must_use]
2642    pub fn new() -> Self {
2643        Self::default()
2644    }
2645
2646    /// Add an entry
2647    #[must_use]
2648    pub fn entry(mut self, entry: Entry<'a>) -> Self {
2649        self.library.add_entry(entry);
2650        self
2651    }
2652
2653    /// Add a string definition
2654    #[must_use]
2655    pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2656        self.library.add_string(name, value);
2657        self
2658    }
2659
2660    /// Add a preamble
2661    #[must_use]
2662    pub fn preamble(mut self, value: Value<'a>) -> Self {
2663        self.library.add_preamble(value);
2664        self
2665    }
2666
2667    /// Add a comment
2668    #[must_use]
2669    pub fn comment(mut self, text: &'a str) -> Self {
2670        self.library.add_comment(text);
2671        self
2672    }
2673
2674    /// Build the library
2675    #[must_use]
2676    pub fn build(self) -> Library<'a> {
2677        self.library
2678    }
2679}
2680
2681#[cfg(test)]
2682mod tests {
2683    use super::*;
2684    use crate::model::{EntryType, Field};
2685
2686    #[test]
2687    fn test_library_parse() {
2688        let input = r#"
2689            @string{me = "John Doe"}
2690            
2691            @article{test2023,
2692                author = me,
2693                title = "Test Article",
2694                year = 2023
2695            }
2696        "#;
2697
2698        let library = Library::parser().parse(input).unwrap();
2699        assert_eq!(library.entries().len(), 1);
2700        assert_eq!(library.strings().len(), 1);
2701
2702        let entry = &library.entries()[0];
2703        // Use get_as_string since the value might be a variable reference
2704        assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2705    }
2706
2707    #[test]
2708    fn test_zero_copy_preservation() {
2709        let input = r#"
2710            @article{test,
2711                title = "This is borrowed",
2712                year = 2023
2713            }
2714        "#;
2715
2716        let library = Library::parser().parse(input).unwrap();
2717        let entry = &library.entries()[0];
2718
2719        // The title should still be borrowed from the input
2720        if let Some(Value::Literal(cow)) = entry
2721            .fields
2722            .iter()
2723            .find(|f| f.name == "title")
2724            .map(|f| &f.value)
2725        {
2726            assert!(matches!(cow, Cow::Borrowed(_)));
2727        }
2728    }
2729
2730    #[test]
2731    fn test_concatenation_creates_owned() {
2732        let input = r#"
2733            @string{first = "Hello"}
2734            @string{second = "World"}
2735            
2736            @article{test,
2737                title = first # ", " # second
2738            }
2739        "#;
2740
2741        let library = Library::parser().parse(input).unwrap();
2742        let entry = &library.entries()[0];
2743
2744        // Concatenation should create an owned string
2745        assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2746    }
2747
2748    #[test]
2749    fn test_boxed_concat_memory_optimization() {
2750        // Verify that Value enum is 24 bytes or less (was 32 before optimization)
2751        assert!(
2752            std::mem::size_of::<Value>() <= 32,
2753            "Value enum is {} bytes, should be 32 or less",
2754            std::mem::size_of::<Value>()
2755        );
2756    }
2757
2758    #[test]
2759    fn test_field_vec_capacity_bounded() {
2760        let input = r#"
2761            @article{test,
2762                a = "1", b = "2", c = "3", d = "4", e = "5",
2763                f = "6", g = "7", h = "8", i = "9", j = "10"
2764            }
2765        "#;
2766
2767        let library = Library::parser().parse(input).unwrap();
2768        let entry = &library.entries()[0];
2769
2770        assert_eq!(entry.fields.len(), 10);
2771        assert!(
2772            entry.fields.capacity() <= 17,
2773            "Unexpected field Vec growth: len={}, capacity={}",
2774            entry.fields.len(),
2775            entry.fields.capacity()
2776        );
2777    }
2778
2779    #[test]
2780    fn test_library_builder() {
2781        let library = LibraryBuilder::new()
2782            .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2783            .entry(Entry {
2784                ty: EntryType::Article,
2785                key: Cow::Borrowed("test2023"),
2786                fields: vec![
2787                    Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2788                    Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2789                ],
2790            })
2791            .build();
2792
2793        assert_eq!(library.entries().len(), 1);
2794        assert_eq!(library.strings().len(), 1);
2795    }
2796
2797    #[test]
2798    fn test_library_stats() {
2799        let input = r#"
2800            @string{ieee = "IEEE"}
2801            @preamble{"Test preamble"}
2802            % This is a percent comment that now works properly
2803            @comment{This is a formal comment that works}
2804            @article{a1, title = "Article 1"}
2805            @article{a2, title = "Article 2"}
2806            @book{b1, title = "Book 1"}
2807        "#;
2808
2809        let library = Library::parser().parse(input).unwrap();
2810        let stats = library.stats();
2811
2812        assert_eq!(stats.total_entries, 3);
2813        assert_eq!(stats.total_strings, 1);
2814        assert_eq!(stats.total_preambles, 1);
2815        assert_eq!(stats.total_comments, 2); // Both % and @comment should work
2816        assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2817        assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2818    }
2819
2820    #[test]
2821    fn test_parse_files_parallel() {
2822        use std::fs::write;
2823        use std::path::PathBuf;
2824
2825        let dir = std::env::temp_dir();
2826        let path1 = dir.join("parallel_test1.bib");
2827        let path2 = dir.join("parallel_test2.bib");
2828
2829        write(&path1, "@article{a1,title=\"A\"}").unwrap();
2830        write(&path2, "@article{a2,title=\"B\"}").unwrap();
2831
2832        let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2833
2834        let library = Library::parser().threads(2).parse_files(&paths).unwrap();
2835
2836        assert_eq!(library.entries().len(), 2);
2837
2838        let _ = std::fs::remove_file(path1);
2839        let _ = std::fs::remove_file(path2);
2840    }
2841
2842    #[test]
2843    fn test_builder_pattern_api() {
2844        let input = "@article{test, title = \"Test\"}";
2845
2846        // Single-threaded (default)
2847        let db1 = Library::parser().parse(input).unwrap();
2848        assert_eq!(db1.entries().len(), 1);
2849
2850        // Using parser builder
2851        let library2 = Library::parser().threads(1).parse(input).unwrap();
2852        assert_eq!(library2.entries().len(), 1);
2853
2854        #[cfg(feature = "parallel")]
2855        {
2856            use std::fs::write;
2857
2858            // Parallel only works for multiple files
2859            let db3 = Library::parser().threads(4).parse(input).unwrap();
2860            assert_eq!(db3.entries().len(), 1);
2861
2862            // Multi-file parallel processing
2863            let dir = std::env::temp_dir();
2864            let path1 = dir.join(format!("bibtex-parser-test1-{}.bib", std::process::id()));
2865            let path2 = dir.join(format!("bibtex-parser-test2-{}.bib", std::process::id()));
2866            write(&path1, "@article{a1, title=\"A\"}").unwrap();
2867            write(&path2, "@article{a2, title=\"B\"}").unwrap();
2868
2869            let db4 = Library::parser()
2870                .threads(2)
2871                .parse_files(&[path1.as_path(), path2.as_path()])
2872                .unwrap();
2873            assert_eq!(db4.entries().len(), 2);
2874
2875            let _ = std::fs::remove_file(path1);
2876            let _ = std::fs::remove_file(path2);
2877        }
2878    }
2879}