Skip to main content

bibtex_parser/
library.rs

1//! BibTeX library representation
2
3use crate::{
4    canonical_biblatex_field_alias, normalize_doi, CorpusEvent, CorpusSource, Entry, Error,
5    ParseEvent, ParseFlow, ParsedComment, ParsedCorpus, ParsedDocument, ParsedEntry,
6    ParsedFailedBlock, ParsedPreamble, ParsedSource, ParsedString, Result, SourceId, SourceMap,
7    SourceSpan, StreamingSummary, ValidationError, ValidationLevel, Value,
8};
9use ahash::AHashMap;
10use memchr::memchr;
11use std::borrow::Cow;
12use std::ops::Deref;
13use std::path::Path;
14
15#[cfg(feature = "parallel")]
16use rayon::prelude::*;
17
18const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
19const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
20const CONCAT_CACHE_LIMIT: usize = 16;
21
22enum ExpansionCache<'a> {
23    Small(Vec<(Cow<'a, str>, Value<'a>)>),
24    Large(AHashMap<Cow<'a, str>, Value<'a>>),
25}
26
27impl<'a> ExpansionCache<'a> {
28    fn with_capacity(capacity: usize) -> Self {
29        if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
30            Self::Small(Vec::with_capacity(capacity))
31        } else {
32            Self::Large(AHashMap::with_capacity(capacity))
33        }
34    }
35
36    fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
37        match self {
38            Self::Small(entries) => {
39                let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
40                if index != 0 {
41                    entries.swap(0, index);
42                }
43                Some(entries[0].1.clone())
44            }
45            Self::Large(entries) => entries.get(name).cloned(),
46        }
47    }
48
49    fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
50        match self {
51            Self::Small(entries) => {
52                if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
53                    entries.push((name, value));
54                } else {
55                    let mut large = AHashMap::with_capacity(entries.len() + 1);
56                    for (key, value) in entries.drain(..) {
57                        large.insert(key, value);
58                    }
59                    large.insert(name, value);
60                    *self = Self::Large(large);
61                }
62            }
63            Self::Large(entries) => {
64                entries.insert(name, value);
65            }
66        }
67    }
68}
69
70struct ConcatCache<'a> {
71    entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
72}
73
74impl<'a> ConcatCache<'a> {
75    const fn new() -> Self {
76        Self {
77            entries: Vec::new(),
78        }
79    }
80
81    fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
82        let index = self
83            .entries
84            .iter()
85            .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
86        if index != 0 {
87            self.entries.swap(0, index);
88        }
89        Some(self.entries[0].1.clone())
90    }
91
92    fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
93        if self.entries.len() < CONCAT_CACHE_LIMIT {
94            self.entries.push((parts, value));
95        }
96    }
97}
98
99fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
100    left.len() == right.len()
101        && left
102            .iter()
103            .zip(right)
104            .all(|(left, right)| cache_values_equal(left, right))
105}
106
107fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
108    match (left, right) {
109        (Value::Literal(left), Value::Literal(right))
110        | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
111        (Value::Number(left), Value::Number(right)) => left == right,
112        (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
113        _ => false,
114    }
115}
116
117/// Get month expansion for a given abbreviation (case-insensitive)
118///
119/// Returns None if the name is not a recognized month abbreviation.
120/// This is used as a fallback when user-defined string variables are not found.
121#[inline]
122fn get_month_expansion(name: &str) -> Option<&'static str> {
123    let bytes = name.as_bytes();
124    if bytes.len() != 3 {
125        return None;
126    }
127
128    let key = (u32::from(bytes[0] | 0x20) << 16)
129        | (u32::from(bytes[1] | 0x20) << 8)
130        | u32::from(bytes[2] | 0x20);
131
132    match key {
133        0x6a_61_6e => Some("January"),
134        0x66_65_62 => Some("February"),
135        0x6d_61_72 => Some("March"),
136        0x61_70_72 => Some("April"),
137        0x6d_61_79 => Some("May"),
138        0x6a_75_6e => Some("June"),
139        0x6a_75_6c => Some("July"),
140        0x61_75_67 => Some("August"),
141        0x73_65_70 => Some("September"),
142        0x6f_63_74 => Some("October"),
143        0x6e_6f_76 => Some("November"),
144        0x64_65_63 => Some("December"),
145        _ => None,
146    }
147}
148
149#[inline]
150fn get_string_value<'map, 'a>(
151    strings: &'map [StringDefinition<'a>],
152    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
153    name: &str,
154) -> Option<&'map Value<'a>> {
155    get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
156}
157
158#[inline]
159fn get_string_definition<'map, 'a>(
160    strings: &'map [StringDefinition<'a>],
161    string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
162    name: &str,
163) -> Option<&'map StringDefinition<'a>> {
164    if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
165        strings
166            .iter()
167            .rev()
168            .find(|definition| definition.name.as_ref() == name)
169    } else {
170        string_lookup
171            .get(name)
172            .and_then(|&index| strings.get(index))
173    }
174}
175
176#[inline]
177fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
178    strings
179        .iter()
180        .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
181}
182
183/// Check if a value contains any variables
184#[inline]
185fn contains_variables(value: &Value) -> bool {
186    match value {
187        Value::Variable(_) => true,
188        Value::Concat(parts) => parts.iter().any(contains_variables),
189        _ => false,
190    }
191}
192
193/// Check if a value contains variables that might be month constants
194#[inline]
195fn contains_potential_month_variables(value: &Value) -> bool {
196    match value {
197        Value::Variable(name) => get_month_expansion(name).is_some(),
198        Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
199        _ => false,
200    }
201}
202
203#[inline]
204const fn is_identifier_char(byte: u8) -> bool {
205    matches!(
206        byte,
207        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
208    )
209}
210
211#[inline]
212fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
213    if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
214        return false;
215    }
216
217    for (offset, &expected) in keyword.iter().enumerate() {
218        if (input[offset + 1] | 0x20) != expected {
219            return false;
220        }
221    }
222
223    if input.len() == keyword.len() + 1 {
224        return true;
225    }
226
227    !is_identifier_char(input[keyword.len() + 1])
228}
229
230#[derive(Debug, Clone, Copy)]
231struct InputScan {
232    may_contain_string_definition: bool,
233    at_count: usize,
234}
235
236/// Fast pre-scan to detect `@string` entries and estimate block capacity.
237fn scan_input(input: &str) -> InputScan {
238    let bytes = input.as_bytes();
239    let mut pos = 0;
240    let mut at_count = 0;
241    let mut may_contain_string_definition = false;
242
243    while pos < bytes.len() {
244        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
245            let at = pos + offset;
246            at_count += 1;
247            if starts_with_at_keyword(&bytes[at..], b"string") {
248                may_contain_string_definition = true;
249            }
250            pos = at + 1;
251        } else {
252            break;
253        }
254    }
255
256    InputScan {
257        may_contain_string_definition,
258        at_count,
259    }
260}
261
262/// Detect whether a `@string` may appear after a regular entry.
263///
264/// False positives are acceptable (we take the conservative slow path), but
265/// false negatives would be incorrect, so keyword matching mirrors parser rules.
266fn input_may_have_late_string_definition(input: &str) -> bool {
267    let bytes = input.as_bytes();
268    let mut pos = 0;
269    let mut saw_regular_entry = false;
270
271    while pos < bytes.len() {
272        if let Some(offset) = memchr(b'@', &bytes[pos..]) {
273            let at = pos + offset;
274            let tail = &bytes[at..];
275
276            if starts_with_at_keyword(tail, b"string") {
277                if saw_regular_entry {
278                    return true;
279                }
280            } else if !saw_regular_entry
281                && !starts_with_at_keyword(tail, b"preamble")
282                && !starts_with_at_keyword(tail, b"comment")
283            {
284                // Anything else that looks like `@<identifier>` is treated as a regular entry.
285                saw_regular_entry = true;
286            }
287
288            pos = at + 1;
289        } else {
290            break;
291        }
292    }
293
294    false
295}
296
297fn next_recovery_boundary(input: &str, start: usize) -> usize {
298    let bytes = input.as_bytes();
299    let mut pos = start.saturating_add(1);
300    while pos < bytes.len() {
301        if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
302            return pos;
303        }
304        pos += 1;
305    }
306    input.len()
307}
308
309fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
310    let line_start = bytes[..pos]
311        .iter()
312        .rposition(|byte| matches!(byte, b'\n' | b'\r'))
313        .map_or(0, |index| index + 1);
314
315    bytes[line_start..pos]
316        .iter()
317        .all(|byte| matches!(byte, b' ' | b'\t'))
318}
319
320fn merge_streaming_summary(total: &mut StreamingSummary, source: StreamingSummary) {
321    total.entries += source.entries;
322    total.strings += source.strings;
323    total.preambles += source.preambles;
324    total.comments += source.comments;
325    total.failed_blocks += source.failed_blocks;
326    total.warnings += source.warnings;
327    total.errors += source.errors;
328    total.infos += source.infos;
329    total.recovered_blocks += source.recovered_blocks;
330    total.stopped |= source.stopped;
331}
332
333/// Parser configuration.
334#[derive(Debug, Default, Clone)]
335pub struct Parser {
336    threads: Option<usize>,
337    tolerant: bool,
338    document: DocumentOptions,
339}
340
341#[derive(Debug, Default, Clone, Copy)]
342struct DocumentOptions {
343    capture_source: bool,
344    preserve_raw: bool,
345    expand_values: bool,
346}
347
348impl Parser {
349    /// Create a new parser.
350    #[must_use]
351    #[inline]
352    pub fn new() -> Self {
353        Self::default()
354    }
355
356    /// Set number of threads (None = use all available)
357    #[must_use]
358    #[inline]
359    pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
360        self.threads = threads.into();
361        self
362    }
363
364    /// Continue after malformed blocks and collect diagnostics.
365    #[must_use]
366    #[inline]
367    pub const fn tolerant(mut self) -> Self {
368        self.tolerant = true;
369        self
370    }
371
372    /// Capture source spans for blocks.
373    #[must_use]
374    #[inline]
375    pub const fn capture_source(mut self) -> Self {
376        self.document.capture_source = true;
377        self
378    }
379
380    /// Preserve exact raw source text in parsed-document output.
381    #[must_use]
382    #[inline]
383    pub const fn preserve_raw(mut self) -> Self {
384        self.document.preserve_raw = true;
385        self
386    }
387
388    /// Populate expanded value text in parsed-document output.
389    #[must_use]
390    #[inline]
391    pub const fn expand_values(mut self) -> Self {
392        self.document.expand_values = true;
393        self
394    }
395
396    /// Parse a single input string.
397    #[inline]
398    pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
399        if self.tolerant {
400            Library::parse_tolerant(input, self.document.capture_source)
401        } else if self.document.capture_source {
402            Library::parse_with_spans(input)
403        } else {
404            Library::parse_sequential(input)
405        }
406    }
407
408    /// Parse a single input string into the parsed document model.
409    ///
410    /// Use this when a caller needs source-order blocks, diagnostics, raw-text
411    /// slots, or partial parse results. Use [`Self::parse`] for the compact
412    /// [`Library`] API.
413    #[inline]
414    pub fn parse_document<'a>(&self, input: &'a str) -> Result<ParsedDocument<'a>> {
415        self.parse_document_with_source_id(SourceId::new(0), None, input)
416    }
417
418    /// Parse a named source into the parsed document model.
419    ///
420    /// The parser does not read files itself; callers provide the source name
421    /// or path-like label together with the already-loaded input text.
422    #[inline]
423    pub fn parse_source<'a>(
424        &self,
425        source_name: impl Into<Cow<'a, str>>,
426        input: &'a str,
427    ) -> Result<ParsedDocument<'a>> {
428        self.parse_document_with_source_id(SourceId::new(0), Some(source_name.into()), input)
429    }
430
431    /// Parse multiple named in-memory sources into a corpus result.
432    pub fn parse_sources<'a>(&self, sources: &[CorpusSource<'a>]) -> Result<ParsedCorpus<'a>> {
433        let mut documents = Vec::with_capacity(sources.len());
434        for (index, source) in sources.iter().enumerate() {
435            documents.push(self.parse_document_with_source_id(
436                SourceId::new(index),
437                Some(Cow::Borrowed(source.name)),
438                source.input,
439            )?);
440        }
441
442        Ok(ParsedCorpus::from_documents(documents))
443    }
444
445    /// Stream parsed source-order events to a callback.
446    ///
447    /// Strict mode returns an error on the first malformed block. Tolerant mode
448    /// emits recovered partial entries or failed blocks with diagnostics and
449    /// continues. The callback can return [`ParseFlow::Stop`] to stop after the
450    /// current event; the returned summary then has `stopped = true`.
451    #[inline]
452    pub fn parse_events<'a, F>(&self, input: &'a str, on_event: F) -> Result<StreamingSummary>
453    where
454        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
455    {
456        self.parse_source_events_with_source(SourceId::new(0), None, input, on_event)
457    }
458
459    /// Stream parsed source-order events from a named source.
460    #[inline]
461    pub fn parse_source_events<'a, F>(
462        &self,
463        source_name: impl Into<Cow<'a, str>>,
464        input: &'a str,
465        on_event: F,
466    ) -> Result<StreamingSummary>
467    where
468        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
469    {
470        self.parse_source_events_with_source(
471            SourceId::new(0),
472            Some(source_name.into()),
473            input,
474            on_event,
475        )
476    }
477
478    /// Stream events from multiple named in-memory sources in corpus order.
479    pub fn parse_corpus_events<'a, F>(
480        &self,
481        sources: &[CorpusSource<'a>],
482        mut on_event: F,
483    ) -> Result<StreamingSummary>
484    where
485        F: FnMut(CorpusEvent<'a>) -> Result<ParseFlow>,
486    {
487        let mut summary = StreamingSummary::default();
488
489        for (index, source) in sources.iter().enumerate() {
490            if summary.stopped {
491                break;
492            }
493
494            let source_id = SourceId::new(index);
495            let parsed_source = ParsedSource {
496                id: source_id,
497                name: Some(Cow::Borrowed(source.name)),
498            };
499            if on_event(CorpusEvent::SourceStart(parsed_source.clone()))? == ParseFlow::Stop {
500                summary.stopped = true;
501                break;
502            }
503
504            let source_summary = self.parse_source_events_with_source(
505                source_id,
506                Some(Cow::Borrowed(source.name)),
507                source.input,
508                |event| {
509                    on_event(CorpusEvent::Event {
510                        source: source_id,
511                        event: Box::new(event),
512                    })
513                },
514            )?;
515            merge_streaming_summary(&mut summary, source_summary);
516
517            if on_event(CorpusEvent::SourceEnd(parsed_source))? == ParseFlow::Stop {
518                summary.stopped = true;
519            }
520        }
521
522        summary.finalize_status();
523        Ok(summary)
524    }
525
526    fn parse_source_events_with_source<'a, F>(
527        &self,
528        source_id: SourceId,
529        source_name: Option<Cow<'a, str>>,
530        input: &'a str,
531        mut on_event: F,
532    ) -> Result<StreamingSummary>
533    where
534        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
535    {
536        let source_map = SourceMap::new(Some(source_id), source_name, input);
537        let mut summary = StreamingSummary::default();
538
539        if self.tolerant {
540            self.parse_tolerant_events(input, &source_map, &mut summary, &mut on_event)?;
541        } else {
542            crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
543                let source = source_map.span(span.byte_start, span.byte_end);
544                self.emit_parsed_event(item, source, raw, &source_map, &mut summary, &mut on_event)
545            })?;
546        }
547
548        summary.finalize_status();
549        Ok(summary)
550    }
551
552    fn parse_tolerant_events<'a, F>(
553        &self,
554        input: &'a str,
555        source_map: &SourceMap<'a>,
556        summary: &mut StreamingSummary,
557        on_event: &mut F,
558    ) -> Result<()>
559    where
560        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
561    {
562        let mut remaining = input;
563
564        loop {
565            crate::parser::lexer::skip_whitespace(&mut remaining);
566            if remaining.is_empty() || summary.stopped {
567                break;
568            }
569
570            let start = input.len() - remaining.len();
571            match crate::parser::parse_item(&mut remaining) {
572                Ok(item) => {
573                    let end = input.len() - remaining.len();
574                    let source = source_map.span(start, end);
575                    self.emit_parsed_event(
576                        item,
577                        source,
578                        &input[start..end],
579                        source_map,
580                        summary,
581                        on_event,
582                    )?;
583                }
584                Err(err) => {
585                    let end = next_recovery_boundary(input, start);
586                    let failed = FailedBlock {
587                        raw: Cow::Borrowed(&input[start..end]),
588                        error: format!("Failed to parse entry: {err}"),
589                        source: Some(source_map.span(start, end)),
590                    };
591                    let failed_index = summary.failed_blocks;
592                    let failed = ParsedFailedBlock::from_failed_block(
593                        failed_index,
594                        failed,
595                        Some(source_map),
596                    );
597                    if let Some(partial) = crate::document::recover_partial_stream_entry(
598                        &failed,
599                        source_map,
600                        summary.entries,
601                        self.document.preserve_raw,
602                    ) {
603                        Self::emit_event(ParseEvent::Entry(partial), summary, on_event)?;
604                    } else {
605                        Self::emit_event(ParseEvent::Failed(failed), summary, on_event)?;
606                    }
607                    remaining = &input[end..];
608                }
609            }
610        }
611
612        Ok(())
613    }
614
615    fn emit_parsed_event<'a, F>(
616        &self,
617        item: crate::parser::ParsedItem<'a>,
618        source: SourceSpan,
619        raw: &'a str,
620        source_map: &SourceMap<'a>,
621        summary: &mut StreamingSummary,
622        on_event: &mut F,
623    ) -> Result<()>
624    where
625        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
626    {
627        if summary.stopped {
628            return Ok(());
629        }
630
631        let event = match item {
632            crate::parser::ParsedItem::Entry(entry) => {
633                ParseEvent::Entry(ParsedEntry::from_stream_entry(
634                    entry,
635                    source,
636                    raw,
637                    source_map,
638                    self.document.preserve_raw,
639                ))
640            }
641            crate::parser::ParsedItem::String(name, value) => {
642                ParseEvent::String(ParsedString::from_stream_definition(
643                    name,
644                    value,
645                    source,
646                    raw,
647                    self.document.preserve_raw,
648                ))
649            }
650            crate::parser::ParsedItem::Preamble(value) => {
651                ParseEvent::Preamble(ParsedPreamble::from_stream_preamble(
652                    value,
653                    source,
654                    raw,
655                    self.document.preserve_raw,
656                ))
657            }
658            crate::parser::ParsedItem::Comment(text) => ParseEvent::Comment(
659                ParsedComment::from_stream_comment(text, source, raw, self.document.preserve_raw),
660            ),
661        };
662
663        Self::emit_event(event, summary, on_event)
664    }
665
666    fn emit_event<'a, F>(
667        event: ParseEvent<'a>,
668        summary: &mut StreamingSummary,
669        on_event: &mut F,
670    ) -> Result<()>
671    where
672        F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
673    {
674        if summary.stopped {
675            return Ok(());
676        }
677
678        let diagnostics = match &event {
679            ParseEvent::Entry(entry) => {
680                summary.entries += 1;
681                if entry.status == crate::ParsedEntryStatus::Partial {
682                    summary.recovered_blocks += 1;
683                }
684                entry.diagnostics.clone()
685            }
686            ParseEvent::String(_) => {
687                summary.strings += 1;
688                Vec::new()
689            }
690            ParseEvent::Preamble(_) => {
691                summary.preambles += 1;
692                Vec::new()
693            }
694            ParseEvent::Comment(_) => {
695                summary.comments += 1;
696                Vec::new()
697            }
698            ParseEvent::Failed(failed) => {
699                summary.failed_blocks += 1;
700                failed.diagnostics.clone()
701            }
702            ParseEvent::Diagnostic(diagnostic) => {
703                summary.count_diagnostic(diagnostic);
704                Vec::new()
705            }
706        };
707        for diagnostic in &diagnostics {
708            summary.count_diagnostic(diagnostic);
709        }
710
711        if on_event(event)? == ParseFlow::Stop {
712            summary.stopped = true;
713            return Ok(());
714        }
715
716        for diagnostic in diagnostics {
717            if on_event(ParseEvent::Diagnostic(diagnostic))? == ParseFlow::Stop {
718                summary.stopped = true;
719                break;
720            }
721        }
722
723        Ok(())
724    }
725
726    fn parse_document_with_source_id<'a>(
727        &self,
728        source_id: SourceId,
729        source_name: Option<Cow<'a, str>>,
730        input: &'a str,
731    ) -> Result<ParsedDocument<'a>> {
732        let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
733        let sources = vec![ParsedSource {
734            id: source_id,
735            name: source_name,
736        }];
737        let raw_items = if self.tolerant {
738            Library::parse_tolerant_raw_items(input, true, &source_map)
739        } else {
740            match Library::parse_raw_items_with_source(input, &source_map) {
741                Ok(raw_items) => raw_items,
742                Err(error) => {
743                    return Ok(ParsedDocument::failed_from_error(
744                        sources,
745                        &source_map,
746                        &error,
747                    ));
748                }
749            }
750        };
751        let library = match Library::from_raw_items(raw_items.clone()) {
752            Ok(library) => library,
753            Err(Error::UndefinedVariable(_) | Error::CircularReference(_))
754                if !self.document.expand_values =>
755            {
756                Library::from_raw_items_unexpanded(raw_items.clone())
757            }
758            Err(error) => return Err(error),
759        };
760        let mut document =
761            ParsedDocument::from_library_with_source_map(library, sources, Some(&source_map));
762        let mut entry_index = 0;
763        for raw_item in &raw_items {
764            if let RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, raw) = raw_item {
765                document.apply_entry_locations(
766                    entry_index,
767                    raw,
768                    &source_map,
769                    self.document.preserve_raw,
770                );
771                entry_index += 1;
772            }
773        }
774        document.apply_parsed_values(&raw_items);
775        if self.document.preserve_raw {
776            document.apply_raw_items(&raw_items);
777        }
778        if self.tolerant {
779            document.recover_partial_entries(&source_map, self.document.preserve_raw);
780        }
781        if self.document.expand_values {
782            document.populate_expanded_values(crate::ExpansionOptions::default())?;
783        }
784        Ok(document)
785    }
786
787    /// Parse multiple files in parallel
788    pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
789        #[cfg(feature = "parallel")]
790        {
791            if let Some(threads) = self.threads {
792                if threads <= 1 {
793                    return Self::parse_files_sequential(paths);
794                }
795            }
796
797            let pool = self.build_thread_pool()?;
798
799            let libraries: Result<Vec<_>> = pool.install(|| {
800                paths
801                    .par_iter()
802                    .map(|path| {
803                        let content = std::fs::read_to_string(path)?;
804                        let library = Library::parse_sequential(&content)?;
805                        Ok(library.into_owned())
806                    })
807                    .collect()
808            });
809
810            let libraries = libraries?;
811            Ok(Library::merge_libraries_parallel(libraries))
812        }
813
814        #[cfg(not(feature = "parallel"))]
815        {
816            Self::parse_files_sequential(paths)
817        }
818    }
819
820    /// Sequential file parsing fallback
821    fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
822        let mut result = Library::new();
823        for path in paths {
824            let content = std::fs::read_to_string(path)?;
825            let library = Library::parse_sequential(&content)?;
826            result.merge(library.into_owned());
827        }
828        Ok(result)
829    }
830
831    #[cfg(feature = "parallel")]
832    fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
833        let mut builder = rayon::ThreadPoolBuilder::new();
834
835        if let Some(threads) = self.threads {
836            builder = builder.num_threads(threads);
837        }
838
839        builder
840            .build()
841            .map_err(|e| Error::WinnowError(e.to_string()))
842    }
843}
844
845/// A high-level block in a parsed BibTeX library.
846#[derive(Debug, Clone, Copy)]
847pub enum Block<'lib, 'a> {
848    /// A regular bibliography entry.
849    Entry(&'lib Entry<'a>, Option<SourceSpan>),
850    /// A string definition.
851    String(&'lib StringDefinition<'a>),
852    /// A preamble block.
853    Preamble(&'lib Preamble<'a>),
854    /// A comment block.
855    Comment(&'lib Comment<'a>),
856    /// A malformed block retained by tolerant parsing.
857    Failed(&'lib FailedBlock<'a>),
858}
859
860#[derive(Debug, Clone, Copy, PartialEq, Eq)]
861pub enum BlockKind {
862    Entry(usize),
863    String(usize),
864    Preamble(usize),
865    Comment(usize),
866    Failed(usize),
867}
868
869#[derive(Debug, Clone)]
870pub enum RawBuildItem<'a> {
871    Parsed(crate::parser::ParsedItem<'a>, SourceSpan, &'a str),
872    Failed(FailedBlock<'a>),
873}
874
875/// A BibTeX string definition.
876#[derive(Debug, Clone, PartialEq)]
877pub struct StringDefinition<'a> {
878    /// String variable name.
879    pub name: Cow<'a, str>,
880    /// Unexpanded string value.
881    pub value: Value<'a>,
882    /// Optional source location.
883    pub source: Option<SourceSpan>,
884}
885
886impl<'a> StringDefinition<'a> {
887    /// Create a string definition.
888    #[must_use]
889    pub const fn new(name: &'a str, value: Value<'a>) -> Self {
890        Self {
891            name: Cow::Borrowed(name),
892            value,
893            source: None,
894        }
895    }
896
897    /// Return the string name.
898    #[must_use]
899    pub fn name(&self) -> &str {
900        &self.name
901    }
902
903    /// Return the string value.
904    #[must_use]
905    pub const fn value(&self) -> &Value<'a> {
906        &self.value
907    }
908
909    /// Convert to an owned definition.
910    #[must_use]
911    pub fn into_owned(self) -> StringDefinition<'static> {
912        StringDefinition {
913            name: Cow::Owned(self.name.into_owned()),
914            value: self.value.into_owned(),
915            source: self.source,
916        }
917    }
918}
919
920/// A BibTeX preamble block.
921#[derive(Debug, Clone, PartialEq)]
922pub struct Preamble<'a> {
923    /// Expanded preamble value.
924    pub value: Value<'a>,
925    /// Optional source location.
926    pub source: Option<SourceSpan>,
927}
928
929impl<'a> Preamble<'a> {
930    /// Create a preamble block.
931    #[must_use]
932    pub const fn new(value: Value<'a>) -> Self {
933        Self {
934            value,
935            source: None,
936        }
937    }
938
939    /// Return the preamble value.
940    #[must_use]
941    pub const fn value(&self) -> &Value<'a> {
942        &self.value
943    }
944
945    /// Convert to an owned preamble.
946    #[must_use]
947    pub fn into_owned(self) -> Preamble<'static> {
948        Preamble {
949            value: self.value.into_owned(),
950            source: self.source,
951        }
952    }
953}
954
955impl<'a> Deref for Preamble<'a> {
956    type Target = Value<'a>;
957
958    fn deref(&self) -> &Self::Target {
959        &self.value
960    }
961}
962
963/// A BibTeX comment block.
964#[derive(Debug, Clone, PartialEq, Eq)]
965pub struct Comment<'a> {
966    /// Comment text.
967    pub text: Cow<'a, str>,
968    /// Optional source location.
969    pub source: Option<SourceSpan>,
970}
971
972impl<'a> Comment<'a> {
973    /// Create a comment block.
974    #[must_use]
975    pub const fn new(text: &'a str) -> Self {
976        Self {
977            text: Cow::Borrowed(text),
978            source: None,
979        }
980    }
981
982    /// Return the comment text.
983    #[must_use]
984    pub fn text(&self) -> &str {
985        &self.text
986    }
987
988    /// Convert to an owned comment.
989    #[must_use]
990    pub fn into_owned(self) -> Comment<'static> {
991        Comment {
992            text: Cow::Owned(self.text.into_owned()),
993            source: self.source,
994        }
995    }
996}
997
998impl Deref for Comment<'_> {
999    type Target = str;
1000
1001    fn deref(&self) -> &Self::Target {
1002        &self.text
1003    }
1004}
1005
1006/// A malformed block retained by tolerant parsing.
1007#[derive(Debug, Clone, PartialEq, Eq)]
1008pub struct FailedBlock<'a> {
1009    /// Raw source for the malformed block.
1010    pub raw: Cow<'a, str>,
1011    /// Parse error message.
1012    pub error: String,
1013    /// Optional source location.
1014    pub source: Option<SourceSpan>,
1015}
1016
1017impl FailedBlock<'_> {
1018    /// Convert to an owned failed block.
1019    #[must_use]
1020    pub fn into_owned(self) -> FailedBlock<'static> {
1021        FailedBlock {
1022            raw: Cow::Owned(self.raw.into_owned()),
1023            error: self.error,
1024            source: self.source,
1025        }
1026    }
1027}
1028
1029/// Month rendering style used by month normalization.
1030#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1031pub enum MonthStyle {
1032    /// Full English month names such as `January`.
1033    #[default]
1034    Long,
1035    /// Three-letter lowercase BibTeX abbreviations such as `jan`.
1036    Abbrev,
1037    /// One-based month numbers such as `1`.
1038    Number,
1039}
1040
1041/// Entry and field ordering options.
1042#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1043pub struct SortOptions {
1044    /// Sort regular entries by citation key.
1045    pub entries_by_key: bool,
1046    /// Sort fields inside each entry by field name.
1047    pub fields_by_name: bool,
1048}
1049
1050/// Field-name casing policy for field normalization.
1051#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1052pub enum FieldNameCase {
1053    /// Preserve existing field names.
1054    #[default]
1055    Preserve,
1056    /// Convert field names to lowercase ASCII.
1057    Lowercase,
1058}
1059
1060/// Field normalization options.
1061#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1062pub struct FieldNormalizeOptions {
1063    /// Field-name casing policy.
1064    pub name_case: FieldNameCase,
1065    /// Normalize common BibLaTeX aliases to classic BibTeX field names.
1066    pub biblatex_aliases: bool,
1067}
1068
1069/// A parsed BibTeX library.
1070#[derive(Debug, Clone, Default)]
1071pub struct Library<'a> {
1072    /// Bibliography entries
1073    entries: Vec<Entry<'a>>,
1074    /// Optional entry source spans
1075    entry_sources: Option<Vec<Option<SourceSpan>>>,
1076    /// String definitions
1077    strings: Vec<StringDefinition<'a>>,
1078    /// Latest string definition by name
1079    string_lookup: AHashMap<Cow<'a, str>, usize>,
1080    /// Preambles
1081    preambles: Vec<Preamble<'a>>,
1082    /// Comments
1083    comments: Vec<Comment<'a>>,
1084    /// Failed blocks retained during tolerant parsing
1085    failed_blocks: Vec<FailedBlock<'a>>,
1086    /// Original block order
1087    block_order: Vec<BlockKind>,
1088}
1089
1090impl<'a> Library<'a> {
1091    fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
1092        let index = self.entries.len();
1093        self.entries.push(entry);
1094        if let Some(sources) = &mut self.entry_sources {
1095            sources.push(source);
1096        } else if source.is_some() {
1097            let mut sources = vec![None; index];
1098            sources.push(source);
1099            self.entry_sources = Some(sources);
1100        }
1101        self.block_order.push(BlockKind::Entry(index));
1102    }
1103
1104    fn register_string_definition(
1105        &mut self,
1106        name: Cow<'a, str>,
1107        value: Value<'a>,
1108        source: Option<SourceSpan>,
1109    ) -> usize {
1110        let index = self.strings.len();
1111        self.string_lookup.insert(name.clone(), index);
1112        self.strings.push(StringDefinition {
1113            name,
1114            value,
1115            source,
1116        });
1117        index
1118    }
1119
1120    fn push_string_with_source(
1121        &mut self,
1122        name: Cow<'a, str>,
1123        value: Value<'a>,
1124        source: Option<SourceSpan>,
1125    ) {
1126        let index = self.register_string_definition(name, value, source);
1127        self.block_order.push(BlockKind::String(index));
1128    }
1129
1130    fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
1131        let index = self.preambles.len();
1132        self.preambles.push(Preamble { value, source });
1133        self.block_order.push(BlockKind::Preamble(index));
1134        index
1135    }
1136
1137    fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
1138        let index = self.comments.len();
1139        self.comments.push(Comment { text, source });
1140        self.block_order.push(BlockKind::Comment(index));
1141    }
1142
1143    fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
1144        let index = self.failed_blocks.len();
1145        self.failed_blocks.push(failed);
1146        self.block_order.push(BlockKind::Failed(index));
1147    }
1148
1149    #[inline]
1150    fn expand_value_for_parse(
1151        &self,
1152        value: &mut Value<'a>,
1153        has_user_strings: bool,
1154        month_constants_shadowed: bool,
1155        expanded_variables: &mut ExpansionCache<'a>,
1156        expansion_stack: &mut Vec<Cow<'a, str>>,
1157        concat_cache: &mut ConcatCache<'a>,
1158    ) -> Result<()> {
1159        match value {
1160            Value::Literal(_) | Value::Number(_) => Ok(()),
1161            Value::Variable(name) => {
1162                if !has_user_strings || !month_constants_shadowed {
1163                    if let Some(month_value) = get_month_expansion(name.as_ref()) {
1164                        *value = Value::Literal(Cow::Borrowed(month_value));
1165                        return Ok(());
1166                    }
1167                }
1168
1169                if has_user_strings {
1170                    if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
1171                        *value = expanded;
1172                        return Ok(());
1173                    }
1174
1175                    let old_value = std::mem::take(value);
1176                    *value = self.smart_expand_value_cached(
1177                        old_value,
1178                        expanded_variables,
1179                        expansion_stack,
1180                        concat_cache,
1181                    )?;
1182                }
1183
1184                Ok(())
1185            }
1186            Value::Concat(parts) => {
1187                if has_user_strings {
1188                    if let Some(expanded) = concat_cache.get_cloned(parts) {
1189                        *value = expanded;
1190                        return Ok(());
1191                    }
1192                }
1193
1194                let needs_expansion = if has_user_strings {
1195                    parts.iter().any(contains_variables)
1196                } else {
1197                    parts.iter().any(contains_potential_month_variables)
1198                };
1199
1200                if needs_expansion {
1201                    if !has_user_strings {
1202                        if let Some(expanded) = concat_cache.get_cloned(parts) {
1203                            *value = expanded;
1204                            return Ok(());
1205                        }
1206                    }
1207
1208                    let old_value = std::mem::take(value);
1209                    *value = self.smart_expand_value_cached(
1210                        old_value,
1211                        expanded_variables,
1212                        expansion_stack,
1213                        concat_cache,
1214                    )?;
1215                }
1216
1217                Ok(())
1218            }
1219        }
1220    }
1221
1222    /// Create a new empty library
1223    #[must_use]
1224    #[inline]
1225    pub fn new() -> Self {
1226        Self::default()
1227    }
1228
1229    /// Create a parser with options
1230    ///
1231    /// # Parallel Processing
1232    ///
1233    /// The `threads` option only affects `parse_files()`. Single file
1234    /// parsing with `parse()` is sequential.
1235    ///
1236    /// # Example
1237    ///
1238    /// ```rust,no_run
1239    /// use bibtex_parser::Library;
1240    /// // Parse multiple files in parallel
1241    /// let library = Library::parser()
1242    ///     .threads(4)
1243    ///     .parse_files(&["file1.bib", "file2.bib"]).unwrap();
1244    ///
1245    /// // Single-file parsing stays sequential
1246    /// let content = "@article{demo, title=\"Demo\"}";
1247    /// let library = Library::parser()
1248    ///     .threads(4)
1249    ///     .parse(content).unwrap();
1250    /// ```
1251    #[must_use]
1252    #[inline]
1253    pub fn parser() -> Parser {
1254        Parser::new()
1255    }
1256
1257    /// Parse a BibTeX library from a string with default strict settings.
1258    pub fn parse(input: &'a str) -> Result<Self> {
1259        Self::parser().parse(input)
1260    }
1261
1262    /// Parse a BibTeX library from a file into owned data.
1263    pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
1264        let content = std::fs::read_to_string(path)?;
1265        Library::parser().parse(&content).map(Library::into_owned)
1266    }
1267
1268    /// Serialize this library to BibTeX.
1269    pub fn to_bibtex(&self) -> Result<String> {
1270        crate::writer::to_string(self)
1271    }
1272
1273    /// Serialize this library to a BibTeX file.
1274    pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
1275        crate::writer::to_file(self, path)
1276    }
1277
1278    /// Parse a BibTeX library from a string (single-threaded implementation)
1279    #[allow(clippy::too_many_lines)]
1280    pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
1281        let mut library = Self::new();
1282        let input_scan = scan_input(input);
1283
1284        // Fast path for common corpora (like tugboat) with no user-defined strings.
1285        // This avoids buffering all entries before expansion.
1286        if !input_scan.may_contain_string_definition {
1287            library.entries.reserve(input_scan.at_count);
1288            library.block_order.reserve(input_scan.at_count);
1289            let has_user_strings = false;
1290            let month_constants_shadowed = false;
1291            let mut expanded_variables = ExpansionCache::with_capacity(0);
1292            let mut expansion_stack = Vec::new();
1293            let mut concat_cache = ConcatCache::new();
1294
1295            crate::parser::parse_bibtex_stream(input, |item| {
1296                match item {
1297                    crate::parser::ParsedItem::Entry(mut entry) => {
1298                        for field in &mut entry.fields {
1299                            library.expand_value_for_parse(
1300                                &mut field.value,
1301                                has_user_strings,
1302                                month_constants_shadowed,
1303                                &mut expanded_variables,
1304                                &mut expansion_stack,
1305                                &mut concat_cache,
1306                            )?;
1307                        }
1308                        library.push_entry_with_source(entry, None);
1309                    }
1310                    crate::parser::ParsedItem::Preamble(value) => {
1311                        let mut expanded = value;
1312                        library.expand_value_for_parse(
1313                            &mut expanded,
1314                            has_user_strings,
1315                            month_constants_shadowed,
1316                            &mut expanded_variables,
1317                            &mut expansion_stack,
1318                            &mut concat_cache,
1319                        )?;
1320                        library.push_preamble_with_source(expanded, None);
1321                    }
1322                    crate::parser::ParsedItem::Comment(text) => {
1323                        library.push_comment_with_source(Cow::Borrowed(text), None);
1324                    }
1325                    crate::parser::ParsedItem::String(name, value) => {
1326                        // Defensive fallback for scanner false negatives.
1327                        library.push_string_with_source(Cow::Borrowed(name), value, None);
1328                    }
1329                }
1330                Ok(())
1331            })?;
1332
1333            return Ok(library);
1334        }
1335
1336        library.block_order.reserve(input_scan.at_count);
1337
1338        // Single-pass path when all @string definitions appear before regular
1339        // entries. This keeps correctness while avoiding buffering entries and
1340        // a full second pass over them.
1341        if !input_may_have_late_string_definition(input) {
1342            let mut pending_preambles = Vec::new();
1343            let mut expanded_variables = ExpansionCache::with_capacity(0);
1344            let mut expansion_stack = Vec::new();
1345            let mut concat_cache = ConcatCache::new();
1346            let mut month_constants_shadowed = None;
1347
1348            crate::parser::parse_bibtex_stream(input, |item| {
1349                match item {
1350                    crate::parser::ParsedItem::Entry(mut entry) => {
1351                        let has_user_strings = !library.strings.is_empty();
1352                        let month_constants_shadowed = *month_constants_shadowed
1353                            .get_or_insert_with(|| {
1354                                has_user_strings
1355                                    && user_strings_shadow_month_constants(&library.strings)
1356                            });
1357                        for field in &mut entry.fields {
1358                            library.expand_value_for_parse(
1359                                &mut field.value,
1360                                has_user_strings,
1361                                month_constants_shadowed,
1362                                &mut expanded_variables,
1363                                &mut expansion_stack,
1364                                &mut concat_cache,
1365                            )?;
1366                        }
1367                        library.push_entry_with_source(entry, None);
1368                    }
1369                    crate::parser::ParsedItem::Preamble(value) => {
1370                        let index = library.push_preamble_with_source(value, None);
1371                        pending_preambles.push(index);
1372                    }
1373                    crate::parser::ParsedItem::String(name, value) => {
1374                        library.push_string_with_source(Cow::Borrowed(name), value, None);
1375                    }
1376                    crate::parser::ParsedItem::Comment(text) => {
1377                        library.push_comment_with_source(Cow::Borrowed(text), None);
1378                    }
1379                }
1380                Ok(())
1381            })?;
1382
1383            let has_user_strings = !library.strings.is_empty();
1384            let month_constants_shadowed =
1385                has_user_strings && user_strings_shadow_month_constants(&library.strings);
1386            for index in pending_preambles {
1387                let mut expanded = std::mem::take(&mut library.preambles[index].value);
1388                library.expand_value_for_parse(
1389                    &mut expanded,
1390                    has_user_strings,
1391                    month_constants_shadowed,
1392                    &mut expanded_variables,
1393                    &mut expansion_stack,
1394                    &mut concat_cache,
1395                )?;
1396                library.preambles[index].value = expanded;
1397            }
1398
1399            return Ok(library);
1400        }
1401
1402        let mut entry_indices = Vec::new();
1403        let mut preamble_indices = Vec::new();
1404
1405        crate::parser::parse_bibtex_stream(input, |item| {
1406            match item {
1407                crate::parser::ParsedItem::Entry(entry) => {
1408                    let index = library.entries.len();
1409                    library.push_entry_with_source(entry, None);
1410                    entry_indices.push(index);
1411                }
1412                crate::parser::ParsedItem::Preamble(value) => {
1413                    let index = library.push_preamble_with_source(value, None);
1414                    preamble_indices.push(index);
1415                }
1416                crate::parser::ParsedItem::String(name, value) => {
1417                    library.push_string_with_source(Cow::Borrowed(name), value, None);
1418                }
1419                crate::parser::ParsedItem::Comment(text) => {
1420                    library.push_comment_with_source(Cow::Borrowed(text), None);
1421                }
1422            }
1423            Ok(())
1424        })?;
1425
1426        // Expand after parsing so all @string definitions are available globally.
1427        let has_user_strings = !library.strings.is_empty();
1428        let month_constants_shadowed =
1429            has_user_strings && user_strings_shadow_month_constants(&library.strings);
1430        let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1431        let mut expansion_stack = Vec::new();
1432        let mut concat_cache = ConcatCache::new();
1433
1434        for entry_index in entry_indices {
1435            let field_count = library.entries[entry_index].fields.len();
1436            for field_index in 0..field_count {
1437                let mut value =
1438                    std::mem::take(&mut library.entries[entry_index].fields[field_index].value);
1439                library.expand_value_for_parse(
1440                    &mut value,
1441                    has_user_strings,
1442                    month_constants_shadowed,
1443                    &mut expanded_variables,
1444                    &mut expansion_stack,
1445                    &mut concat_cache,
1446                )?;
1447                library.entries[entry_index].fields[field_index].value = value;
1448            }
1449        }
1450
1451        for preamble_index in preamble_indices {
1452            let mut expanded = std::mem::take(&mut library.preambles[preamble_index].value);
1453            library.expand_value_for_parse(
1454                &mut expanded,
1455                has_user_strings,
1456                month_constants_shadowed,
1457                &mut expanded_variables,
1458                &mut expansion_stack,
1459                &mut concat_cache,
1460            )?;
1461            library.preambles[preamble_index].value = expanded;
1462        }
1463
1464        Ok(library)
1465    }
1466
1467    fn parse_with_spans(input: &'a str) -> Result<Self> {
1468        let source_map = SourceMap::anonymous(input);
1469        let raw_items = Self::parse_raw_items_with_source(input, &source_map)?;
1470        Self::from_raw_items(raw_items)
1471    }
1472
1473    fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1474        let source_map = SourceMap::anonymous(input);
1475        let raw_items = Self::parse_tolerant_raw_items(input, capture_source, &source_map);
1476        Self::from_raw_items(raw_items)
1477    }
1478
1479    fn parse_raw_items_with_source(
1480        input: &'a str,
1481        source_map: &SourceMap<'_>,
1482    ) -> Result<Vec<RawBuildItem<'a>>> {
1483        let mut raw_items = Vec::new();
1484        crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
1485            let span = if source_map.source_id().is_some() {
1486                source_map.span(span.byte_start, span.byte_end)
1487            } else {
1488                span
1489            };
1490            raw_items.push(RawBuildItem::Parsed(item, span, raw));
1491            Ok(())
1492        })?;
1493        Ok(raw_items)
1494    }
1495
1496    fn parse_tolerant_raw_items(
1497        input: &'a str,
1498        capture_source: bool,
1499        source_map: &SourceMap<'_>,
1500    ) -> Vec<RawBuildItem<'a>> {
1501        let mut raw_items = Vec::new();
1502        let mut remaining = input;
1503
1504        loop {
1505            crate::parser::lexer::skip_whitespace(&mut remaining);
1506            if remaining.is_empty() {
1507                break;
1508            }
1509
1510            let start = input.len() - remaining.len();
1511            match crate::parser::parse_item(&mut remaining) {
1512                Ok(item) => {
1513                    let end = input.len() - remaining.len();
1514                    raw_items.push(RawBuildItem::Parsed(
1515                        item,
1516                        source_map.span(start, end),
1517                        &input[start..end],
1518                    ));
1519                }
1520                Err(err) => {
1521                    let end = next_recovery_boundary(input, start);
1522                    let source = capture_source.then(|| source_map.span(start, end));
1523                    raw_items.push(RawBuildItem::Failed(FailedBlock {
1524                        raw: Cow::Borrowed(&input[start..end]),
1525                        error: format!("Failed to parse entry: {err}"),
1526                        source,
1527                    }));
1528                    remaining = &input[end..];
1529                }
1530            }
1531        }
1532
1533        raw_items
1534    }
1535
1536    fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1537        let mut library = Self::new();
1538
1539        for raw_item in &raw_items {
1540            if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) =
1541                raw_item
1542            {
1543                library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1544            }
1545        }
1546
1547        let has_user_strings = !library.strings.is_empty();
1548        let month_constants_shadowed =
1549            has_user_strings && user_strings_shadow_month_constants(&library.strings);
1550        let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1551        let mut expansion_stack = Vec::new();
1552        let mut concat_cache = ConcatCache::new();
1553        let mut string_index = 0;
1554
1555        for raw_item in raw_items {
1556            match raw_item {
1557                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span, _) => {
1558                    for field in &mut entry.fields {
1559                        library.expand_value_for_parse(
1560                            &mut field.value,
1561                            has_user_strings,
1562                            month_constants_shadowed,
1563                            &mut expanded_variables,
1564                            &mut expansion_stack,
1565                            &mut concat_cache,
1566                        )?;
1567                    }
1568                    library.push_entry_with_source(entry, Some(span));
1569                }
1570                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, _) => {
1571                    library.block_order.push(BlockKind::String(string_index));
1572                    string_index += 1;
1573                }
1574                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span, _) => {
1575                    library.expand_value_for_parse(
1576                        &mut value,
1577                        has_user_strings,
1578                        month_constants_shadowed,
1579                        &mut expanded_variables,
1580                        &mut expansion_stack,
1581                        &mut concat_cache,
1582                    )?;
1583                    library.push_preamble_with_source(value, Some(span));
1584                }
1585                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1586                    library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1587                }
1588                RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1589            }
1590        }
1591
1592        Ok(library)
1593    }
1594
1595    fn from_raw_items_unexpanded(raw_items: Vec<RawBuildItem<'a>>) -> Self {
1596        let mut library = Self::new();
1597
1598        for raw_item in raw_items {
1599            match raw_item {
1600                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(entry), span, _) => {
1601                    library.push_entry_with_source(entry, Some(span));
1602                }
1603                RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) => {
1604                    library.push_string_with_source(Cow::Borrowed(name), value, Some(span));
1605                }
1606                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), span, _) => {
1607                    library.push_preamble_with_source(value, Some(span));
1608                }
1609                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1610                    library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1611                }
1612                RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1613            }
1614        }
1615
1616        library
1617    }
1618
1619    /// Merge another library into this one
1620    pub fn merge(&mut self, other: Self) {
1621        let entry_offset = self.entries.len();
1622        let string_offset = self.strings.len();
1623        let preamble_offset = self.preambles.len();
1624        let comment_offset = self.comments.len();
1625        let failed_offset = self.failed_blocks.len();
1626        let other_entry_count = other.entries.len();
1627        let other_entry_sources = other.entry_sources;
1628
1629        self.entries.extend(other.entries);
1630        match (&mut self.entry_sources, other_entry_sources) {
1631            (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1632            (Some(sources), None) => {
1633                sources.extend(std::iter::repeat(None).take(other_entry_count));
1634            }
1635            (None, Some(other_sources)) => {
1636                let mut sources = vec![None; entry_offset];
1637                sources.extend(other_sources);
1638                self.entry_sources = Some(sources);
1639            }
1640            (None, None) => {}
1641        }
1642        self.preambles.extend(other.preambles);
1643        self.comments.extend(other.comments);
1644        self.failed_blocks.extend(other.failed_blocks);
1645
1646        for definition in other.strings {
1647            let index = self.strings.len();
1648            self.string_lookup.insert(definition.name.clone(), index);
1649            self.strings.push(definition);
1650        }
1651
1652        self.block_order
1653            .extend(other.block_order.into_iter().map(|kind| match kind {
1654                BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1655                BlockKind::String(index) => BlockKind::String(string_offset + index),
1656                BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1657                BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1658                BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1659            }));
1660    }
1661
1662    #[cfg(feature = "parallel")]
1663    fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1664        let mut result = Library::new();
1665        for library in libraries {
1666            result.merge(library);
1667        }
1668        result
1669    }
1670
1671    /// Get all entries
1672    #[must_use]
1673    pub fn entries(&self) -> &[Entry<'a>] {
1674        &self.entries
1675    }
1676
1677    /// Get mutable access to all entries
1678    #[must_use]
1679    pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1680        &mut self.entries
1681    }
1682
1683    /// Get all string definitions
1684    #[must_use]
1685    pub fn strings(&self) -> &[StringDefinition<'a>] {
1686        &self.strings
1687    }
1688
1689    /// Get a string definition by name.
1690    #[must_use]
1691    pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1692        get_string_definition(&self.strings, &self.string_lookup, name)
1693    }
1694
1695    /// Get a string definition value by name.
1696    #[must_use]
1697    pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1698        self.string(name).map(|definition| &definition.value)
1699    }
1700
1701    /// Get all preambles
1702    #[must_use]
1703    pub fn preambles(&self) -> &[Preamble<'a>] {
1704        &self.preambles
1705    }
1706
1707    /// Get mutable access to preambles
1708    #[must_use]
1709    pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1710        &mut self.preambles
1711    }
1712
1713    /// Get all comments
1714    #[must_use]
1715    pub fn comments(&self) -> &[Comment<'a>] {
1716        &self.comments
1717    }
1718
1719    /// Get mutable access to comments
1720    #[must_use]
1721    pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1722        &mut self.comments
1723    }
1724
1725    /// Get malformed blocks retained by tolerant parsing.
1726    #[must_use]
1727    pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1728        &self.failed_blocks
1729    }
1730
1731    /// Return blocks in source order.
1732    #[must_use]
1733    pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1734        self.block_order
1735            .iter()
1736            .map(|kind| match *kind {
1737                BlockKind::Entry(index) => Block::Entry(
1738                    &self.entries[index],
1739                    self.entry_sources
1740                        .as_ref()
1741                        .and_then(|sources| sources.get(index).copied().flatten()),
1742                ),
1743                BlockKind::String(index) => Block::String(&self.strings[index]),
1744                BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1745                BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1746                BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1747            })
1748            .collect()
1749    }
1750
1751    #[must_use]
1752    pub(crate) fn entry_source(&self, index: usize) -> Option<SourceSpan> {
1753        self.entry_sources
1754            .as_ref()
1755            .and_then(|sources| sources.get(index).copied().flatten())
1756    }
1757
1758    #[must_use]
1759    pub(crate) fn block_kinds(&self) -> &[BlockKind] {
1760        &self.block_order
1761    }
1762
1763    /// Find entries by key
1764    #[must_use]
1765    pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1766        self.entries.iter().find(|e| e.key == key)
1767    }
1768
1769    /// Find entries by key, ignoring ASCII case.
1770    #[must_use]
1771    pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1772        self.entries
1773            .iter()
1774            .find(|entry| entry.key.eq_ignore_ascii_case(key))
1775    }
1776
1777    /// Return `true` when the library contains `key`.
1778    #[must_use]
1779    pub fn contains_key(&self, key: &str) -> bool {
1780        self.find_by_key(key).is_some()
1781    }
1782
1783    /// Find entries by type
1784    #[must_use]
1785    pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1786        self.entries
1787            .iter()
1788            .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1789            .collect()
1790    }
1791
1792    /// Find entries by field value
1793    #[must_use]
1794    pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1795        self.entries
1796            .iter()
1797            .filter(|e| {
1798                e.get_as_string(field)
1799                    .as_ref()
1800                    .is_some_and(|v| v.contains(value))
1801            })
1802            .collect()
1803    }
1804
1805    /// Find entries by field value, ignoring ASCII case for the field name and value.
1806    #[must_use]
1807    pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1808        self.entries
1809            .iter()
1810            .filter(|entry| {
1811                entry
1812                    .get_as_string_ignore_case(field)
1813                    .as_ref()
1814                    .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1815            })
1816            .collect()
1817    }
1818
1819    /// Find entries whose normalized DOI matches `doi`.
1820    #[must_use]
1821    pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1822        let Some(needle) = normalize_doi(doi) else {
1823            return Vec::new();
1824        };
1825
1826        self.entries
1827            .iter()
1828            .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1829            .collect()
1830    }
1831
1832    /// Smart expansion with memoization for repeated variable references.
1833    fn smart_expand_value_cached(
1834        &self,
1835        value: Value<'a>,
1836        expanded_variables: &mut ExpansionCache<'a>,
1837        expansion_stack: &mut Vec<Cow<'a, str>>,
1838        concat_cache: &mut ConcatCache<'a>,
1839    ) -> Result<Value<'a>> {
1840        match value {
1841            // Simple literals and numbers stay as-is (zero-copy!)
1842            Value::Literal(_) | Value::Number(_) => Ok(value),
1843
1844            // Variables need to be resolved
1845            Value::Variable(name) => {
1846                let name_text = name.as_ref();
1847                if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1848                    return Ok(expanded);
1849                }
1850
1851                if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1852                    let mut cycle = expansion_stack
1853                        .iter()
1854                        .map(std::convert::AsRef::as_ref)
1855                        .collect::<Vec<_>>()
1856                        .join(" -> ");
1857                    if !cycle.is_empty() {
1858                        cycle.push_str(" -> ");
1859                    }
1860                    cycle.push_str(name_text);
1861                    return Err(Error::CircularReference(cycle));
1862                }
1863
1864                if let Some(user_value) =
1865                    get_string_value(&self.strings, &self.string_lookup, name_text)
1866                {
1867                    // Recursively expand the variable's value and cache the result.
1868                    expansion_stack.push(name.clone());
1869                    let expanded = self.smart_expand_value_cached(
1870                        user_value.clone(),
1871                        expanded_variables,
1872                        expansion_stack,
1873                        concat_cache,
1874                    );
1875                    expansion_stack.pop();
1876
1877                    let expanded = expanded?;
1878                    expanded_variables.insert(name, expanded.clone());
1879                    Ok(expanded)
1880                } else {
1881                    // Check month abbreviations as fallback
1882                    get_month_expansion(name_text).map_or_else(
1883                        || {
1884                            // Variable not found in either user strings or month constants
1885                            Err(Error::UndefinedVariable(name_text.to_string()))
1886                        },
1887                        |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1888                    )
1889                }
1890            }
1891
1892            // Concatenations need special handling
1893            Value::Concat(parts) => {
1894                if let Some(expanded) = concat_cache.get_cloned(&parts) {
1895                    return Ok(expanded);
1896                }
1897
1898                let cache_key = parts.clone();
1899                let expanded = self.expand_concatenation_cached(
1900                    parts.into_vec(),
1901                    expanded_variables,
1902                    expansion_stack,
1903                    concat_cache,
1904                )?;
1905                concat_cache.insert(cache_key, expanded.clone());
1906                Ok(expanded)
1907            }
1908        }
1909    }
1910
1911    /// Alternative expansion that works with references (requires cloning for variables)
1912    pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
1913        match value {
1914            // Simple literals and numbers can be cloned cheaply
1915            Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
1916
1917            // Variables need to be resolved
1918            Value::Variable(name) => {
1919                // First check user-defined strings
1920                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1921                    || {
1922                        // Check month abbreviations as fallback
1923                        get_month_expansion(name.as_ref()).map_or_else(
1924                            || {
1925                                // Variable not found in either user strings or month constants
1926                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
1927                            },
1928                            |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1929                        )
1930                    },
1931                    |user_value| self.expand_value_ref(user_value),
1932                )
1933            }
1934
1935            // Concatenations need cloning
1936            Value::Concat(parts) => {
1937                let cloned_parts = parts.to_vec();
1938                self.expand_concatenation(cloned_parts)
1939            }
1940        }
1941    }
1942
1943    /// Expand a concatenation, only converting to owned when necessary
1944    fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
1945        let mut expanded_variables = ExpansionCache::with_capacity(0);
1946        let mut expansion_stack = Vec::new();
1947        let mut concat_cache = ConcatCache::new();
1948        self.expand_concatenation_cached(
1949            parts,
1950            &mut expanded_variables,
1951            &mut expansion_stack,
1952            &mut concat_cache,
1953        )
1954    }
1955
1956    /// Cached concatenation expansion used by hot parsing paths.
1957    fn expand_concatenation_cached(
1958        &self,
1959        parts: Vec<Value<'a>>,
1960        expanded_variables: &mut ExpansionCache<'a>,
1961        expansion_stack: &mut Vec<Cow<'a, str>>,
1962        concat_cache: &mut ConcatCache<'a>,
1963    ) -> Result<Value<'a>> {
1964        let mut expanded_parts = Vec::with_capacity(parts.len());
1965
1966        // First, expand all parts
1967        for part in parts {
1968            let expanded = self.smart_expand_value_cached(
1969                part,
1970                expanded_variables,
1971                expansion_stack,
1972                concat_cache,
1973            )?;
1974            expanded_parts.push(expanded);
1975        }
1976
1977        // If all parts are literals or numbers, we can flatten to a single string
1978        if expanded_parts
1979            .iter()
1980            .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
1981        {
1982            let combined = concatenate_simple_values(&expanded_parts);
1983            Ok(Value::Literal(Cow::Owned(combined)))
1984        } else {
1985            Ok(Value::Concat(expanded_parts.into_boxed_slice()))
1986        }
1987    }
1988
1989    /// Get a fully expanded string value.
1990    pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
1991        match value {
1992            Value::Literal(s) => Ok(s.to_string()),
1993            Value::Number(n) => Ok(n.to_string()),
1994            Value::Variable(name) => {
1995                // First check user-defined strings
1996                get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1997                    || {
1998                        // Check month abbreviations as fallback
1999                        get_month_expansion(name.as_ref()).map_or_else(
2000                            || {
2001                                // Variable not found in either user strings or month constants
2002                                Err(Error::UndefinedVariable(name.as_ref().to_string()))
2003                            },
2004                            |month_value| Ok(month_value.to_string()),
2005                        )
2006                    },
2007                    |user_value| self.get_expanded_string(user_value),
2008                )
2009            }
2010            Value::Concat(parts) => {
2011                let mut result = String::new();
2012                for part in parts.iter() {
2013                    result.push_str(&self.get_expanded_string(part)?);
2014                }
2015                Ok(result)
2016            }
2017        }
2018    }
2019
2020    /// Convert to owned version (no borrowed data)
2021    #[must_use]
2022    pub fn into_owned(self) -> Library<'static> {
2023        let strings = self
2024            .strings
2025            .into_iter()
2026            .map(StringDefinition::into_owned)
2027            .collect::<Vec<_>>();
2028        let mut string_lookup = AHashMap::with_capacity(strings.len());
2029        for (index, definition) in strings.iter().enumerate() {
2030            string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
2031        }
2032
2033        Library {
2034            entries: self.entries.into_iter().map(Entry::into_owned).collect(),
2035            entry_sources: self.entry_sources,
2036            strings,
2037            string_lookup,
2038            preambles: self
2039                .preambles
2040                .into_iter()
2041                .map(Preamble::into_owned)
2042                .collect(),
2043            comments: self.comments.into_iter().map(Comment::into_owned).collect(),
2044            failed_blocks: self
2045                .failed_blocks
2046                .into_iter()
2047                .map(FailedBlock::into_owned)
2048                .collect(),
2049            block_order: self.block_order,
2050        }
2051    }
2052
2053    /// Add a string definition (useful for building libraries programmatically)
2054    pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
2055        self.push_string_with_source(Cow::Borrowed(name), value, None);
2056    }
2057
2058    /// Add an entry
2059    pub fn add_entry(&mut self, entry: Entry<'a>) {
2060        self.push_entry_with_source(entry, None);
2061    }
2062
2063    /// Add a preamble
2064    pub fn add_preamble(&mut self, value: Value<'a>) {
2065        self.push_preamble_with_source(value, None);
2066    }
2067
2068    /// Add a comment
2069    pub fn add_comment(&mut self, comment: &'a str) {
2070        self.push_comment_with_source(Cow::Borrowed(comment), None);
2071    }
2072
2073    /// Resolve string variables and concatenations in entries and preambles in place.
2074    pub fn resolve_strings(&mut self) -> Result<()> {
2075        let has_user_strings = !self.strings.is_empty();
2076        let month_constants_shadowed =
2077            has_user_strings && user_strings_shadow_month_constants(&self.strings);
2078        let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
2079        let mut expansion_stack = Vec::new();
2080        let mut concat_cache = ConcatCache::new();
2081
2082        for entry_index in 0..self.entries.len() {
2083            let field_count = self.entries[entry_index].fields.len();
2084            for field_index in 0..field_count {
2085                let mut value =
2086                    std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
2087                self.expand_value_for_parse(
2088                    &mut value,
2089                    has_user_strings,
2090                    month_constants_shadowed,
2091                    &mut expanded_variables,
2092                    &mut expansion_stack,
2093                    &mut concat_cache,
2094                )?;
2095                self.entries[entry_index].fields[field_index].value = value;
2096            }
2097        }
2098
2099        for preamble_index in 0..self.preambles.len() {
2100            let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
2101            self.expand_value_for_parse(
2102                &mut value,
2103                has_user_strings,
2104                month_constants_shadowed,
2105                &mut expanded_variables,
2106                &mut expansion_stack,
2107                &mut concat_cache,
2108            )?;
2109            self.preambles[preamble_index].value = value;
2110        }
2111
2112        Ok(())
2113    }
2114
2115    /// Normalize DOI fields to lowercase `10.x/...` form when recognizable.
2116    pub fn normalize_doi_fields(&mut self) {
2117        for entry in &mut self.entries {
2118            for field in &mut entry.fields {
2119                if field.name.eq_ignore_ascii_case("doi") {
2120                    if let Some(normalized) = normalize_doi(&field.value.to_plain_string()) {
2121                        field.value = Value::Literal(Cow::Owned(normalized));
2122                    }
2123                }
2124            }
2125        }
2126    }
2127
2128    /// Normalize month fields to a chosen representation.
2129    pub fn normalize_months(&mut self, style: MonthStyle) {
2130        for entry in &mut self.entries {
2131            for field in &mut entry.fields {
2132                if field.name.eq_ignore_ascii_case("month") {
2133                    if let Some(month) =
2134                        normalize_month_value(&field.value.to_plain_string(), style)
2135                    {
2136                        field.value = month;
2137                    }
2138                }
2139            }
2140        }
2141    }
2142
2143    /// Normalize field names and common BibLaTeX aliases.
2144    pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
2145        for entry in &mut self.entries {
2146            for field in &mut entry.fields {
2147                let mut name = if options.biblatex_aliases {
2148                    canonical_biblatex_field_alias(&field.name)
2149                        .unwrap_or_else(|| field.name.as_ref())
2150                        .to_string()
2151                } else {
2152                    field.name.to_string()
2153                };
2154
2155                if options.name_case == FieldNameCase::Lowercase {
2156                    name.make_ascii_lowercase();
2157                }
2158
2159                if name != field.name {
2160                    field.name = Cow::Owned(name);
2161                }
2162            }
2163        }
2164    }
2165
2166    /// Sort entries and/or fields in place.
2167    pub fn sort(&mut self, options: SortOptions) {
2168        if options.fields_by_name {
2169            for entry in &mut self.entries {
2170                entry
2171                    .fields
2172                    .sort_by(|left, right| left.name.cmp(&right.name));
2173            }
2174        }
2175
2176        if options.entries_by_key {
2177            if let Some(sources) = self.entry_sources.take() {
2178                let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
2179                entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
2180                let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
2181                    entries.into_iter().unzip();
2182                self.entries = sorted_entries;
2183                self.entry_sources = Some(sorted_sources);
2184            } else {
2185                self.entries.sort_by(|left, right| left.key.cmp(&right.key));
2186            }
2187            self.rebuild_grouped_block_order();
2188        }
2189    }
2190
2191    fn rebuild_grouped_block_order(&mut self) {
2192        self.block_order.clear();
2193        self.block_order
2194            .extend((0..self.strings.len()).map(BlockKind::String));
2195        self.block_order
2196            .extend((0..self.preambles.len()).map(BlockKind::Preamble));
2197        self.block_order
2198            .extend((0..self.comments.len()).map(BlockKind::Comment));
2199        self.block_order
2200            .extend((0..self.entries.len()).map(BlockKind::Entry));
2201        self.block_order
2202            .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
2203    }
2204
2205    /// Validate all entries in the library
2206    /// Returns a list of entries with their indices and validation errors
2207    #[must_use]
2208    pub fn validate(
2209        &self,
2210        level: ValidationLevel,
2211    ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
2212        let mut invalid_entries = Vec::new();
2213
2214        for (index, entry) in self.entries.iter().enumerate() {
2215            if let Err(errors) = entry.validate(level) {
2216                invalid_entries.push((index, entry, errors));
2217            }
2218        }
2219
2220        invalid_entries
2221    }
2222
2223    /// Check for duplicate citation keys
2224    /// Returns a list of duplicate keys (each key appears once in the list even if it has multiple duplicates)
2225    #[must_use]
2226    pub fn find_duplicate_keys(&self) -> Vec<&str> {
2227        let mut seen = std::collections::HashSet::new();
2228        let mut duplicates = std::collections::HashSet::new();
2229
2230        for entry in &self.entries {
2231            if !seen.insert(entry.key()) {
2232                duplicates.insert(entry.key());
2233            }
2234        }
2235
2236        duplicates.into_iter().collect()
2237    }
2238
2239    /// Check for duplicate citation keys, ignoring ASCII case.
2240    #[must_use]
2241    pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
2242        let mut seen = std::collections::HashSet::new();
2243        let mut duplicates = std::collections::HashSet::new();
2244
2245        for entry in &self.entries {
2246            let normalized_key = entry.key().to_ascii_lowercase();
2247            if !seen.insert(normalized_key.clone()) {
2248                duplicates.insert(normalized_key);
2249            }
2250        }
2251
2252        duplicates.into_iter().collect()
2253    }
2254
2255    /// Find duplicate DOI groups using normalized DOI values.
2256    #[must_use]
2257    pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
2258        let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
2259        for entry in &self.entries {
2260            if let Some(doi) = entry.doi() {
2261                groups.entry(doi).or_default().push(entry);
2262            }
2263        }
2264
2265        groups
2266            .into_iter()
2267            .filter(|(_, entries)| entries.len() > 1)
2268            .collect()
2269    }
2270
2271    /// Validate all entries and return a comprehensive validation report
2272    #[must_use]
2273    pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
2274        let invalid_entries = self.validate(level);
2275        let duplicate_keys = self.find_duplicate_keys();
2276        let empty_entries = self.find_empty_entries();
2277
2278        ValidationReport {
2279            invalid_entries,
2280            duplicate_keys,
2281            empty_entries,
2282            total_entries: self.entries.len(),
2283            validation_level: level,
2284        }
2285    }
2286
2287    /// Find entries with no fields (only key and type)
2288    fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
2289        self.entries
2290            .iter()
2291            .enumerate()
2292            .filter(|(_, entry)| entry.fields().is_empty())
2293            .collect()
2294    }
2295
2296    /// Get statistics about the library
2297    #[must_use]
2298    pub fn stats(&self) -> LibraryStats {
2299        let mut type_counts = AHashMap::new();
2300        for entry in &self.entries {
2301            *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
2302        }
2303
2304        LibraryStats {
2305            total_entries: self.entries.len(),
2306            total_strings: self.strings.len(),
2307            total_preambles: self.preambles.len(),
2308            total_comments: self.comments.len(),
2309            entries_by_type: type_counts,
2310        }
2311    }
2312}
2313
2314/// Statistics about a library
2315#[derive(Debug, Clone)]
2316pub struct LibraryStats {
2317    /// Total number of entries
2318    pub total_entries: usize,
2319    /// Total number of string definitions
2320    pub total_strings: usize,
2321    /// Total number of preambles
2322    pub total_preambles: usize,
2323    /// Total number of comments
2324    pub total_comments: usize,
2325    /// Entry counts by type
2326    pub entries_by_type: AHashMap<String, usize>,
2327}
2328
2329/// Comprehensive validation report for a library
2330#[derive(Debug, Clone)]
2331pub struct ValidationReport<'a> {
2332    /// Entries that failed validation with their errors
2333    pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
2334    /// Duplicate citation keys
2335    pub duplicate_keys: Vec<&'a str>,
2336    /// Entries with no fields
2337    pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
2338    /// Total number of entries in the library
2339    pub total_entries: usize,
2340    /// Validation level used
2341    pub validation_level: ValidationLevel,
2342}
2343
2344impl ValidationReport<'_> {
2345    /// Check if the library is completely valid
2346    #[must_use]
2347    pub fn is_valid(&self) -> bool {
2348        self.invalid_entries.is_empty()
2349            && self.duplicate_keys.is_empty()
2350            && self.empty_entries.is_empty()
2351    }
2352
2353    /// Get total number of issues found
2354    #[must_use]
2355    pub fn total_issues(&self) -> usize {
2356        self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
2357    }
2358
2359    /// Get a summary of issues by severity
2360    #[must_use]
2361    pub fn issue_summary(&self) -> IssueSummary {
2362        let mut errors = 0;
2363        let mut warnings = 0;
2364        let mut infos = 0;
2365
2366        for (_, _, validation_errors) in &self.invalid_entries {
2367            for error in validation_errors {
2368                match error.severity {
2369                    crate::model::ValidationSeverity::Error => errors += 1,
2370                    crate::model::ValidationSeverity::Warning => warnings += 1,
2371                    crate::model::ValidationSeverity::Info => infos += 1,
2372                }
2373            }
2374        }
2375
2376        // Duplicate keys and empty entries are considered errors
2377        errors += self.duplicate_keys.len() + self.empty_entries.len();
2378
2379        IssueSummary {
2380            errors,
2381            warnings,
2382            infos,
2383        }
2384    }
2385}
2386
2387/// Summary of validation issues by severity
2388#[derive(Debug, Clone, PartialEq, Eq)]
2389pub struct IssueSummary {
2390    /// Number of error-level issues
2391    pub errors: usize,
2392    /// Number of warning-level issues
2393    pub warnings: usize,
2394    /// Number of info-level issues
2395    pub infos: usize,
2396}
2397
2398/// Concatenate simple values (literals and numbers) into a single string
2399fn concatenate_simple_values(values: &[Value]) -> String {
2400    let mut result = String::new();
2401
2402    // Pre-calculate capacity for efficiency
2403    let capacity: usize = values
2404        .iter()
2405        .map(|v| match v {
2406            Value::Literal(s) => s.len(),
2407            Value::Number(n) => n.to_string().len(),
2408            _ => 0,
2409        })
2410        .sum();
2411
2412    result.reserve(capacity);
2413
2414    for value in values {
2415        match value {
2416            Value::Literal(s) => result.push_str(s),
2417            Value::Number(n) => result.push_str(&n.to_string()),
2418            _ => {} // Should not happen given the precondition
2419        }
2420    }
2421
2422    result
2423}
2424
2425fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2426    if needle.is_empty() {
2427        return true;
2428    }
2429
2430    haystack.to_lowercase().contains(&needle.to_lowercase())
2431}
2432
2433fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
2434    let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
2435    let month_index = match normalized.as_str() {
2436        "jan" | "january" | "1" | "01" => 1,
2437        "feb" | "february" | "2" | "02" => 2,
2438        "mar" | "march" | "3" | "03" => 3,
2439        "apr" | "april" | "4" | "04" => 4,
2440        "may" | "5" | "05" => 5,
2441        "jun" | "june" | "6" | "06" => 6,
2442        "jul" | "july" | "7" | "07" => 7,
2443        "aug" | "august" | "8" | "08" => 8,
2444        "sep" | "september" | "9" | "09" => 9,
2445        "oct" | "october" | "10" => 10,
2446        "nov" | "november" | "11" => 11,
2447        "dec" | "december" | "12" => 12,
2448        _ => return None,
2449    };
2450
2451    let text = match style {
2452        MonthStyle::Long => month_long_name(month_index),
2453        MonthStyle::Abbrev => month_abbreviation(month_index),
2454        MonthStyle::Number => return Some(Value::Number(month_index)),
2455    };
2456
2457    Some(Value::Literal(Cow::Borrowed(text)))
2458}
2459
2460const fn month_long_name(month: i64) -> &'static str {
2461    match month {
2462        1 => "January",
2463        2 => "February",
2464        3 => "March",
2465        4 => "April",
2466        5 => "May",
2467        6 => "June",
2468        7 => "July",
2469        8 => "August",
2470        9 => "September",
2471        10 => "October",
2472        11 => "November",
2473        12 => "December",
2474        _ => "",
2475    }
2476}
2477
2478const fn month_abbreviation(month: i64) -> &'static str {
2479    match month {
2480        1 => "jan",
2481        2 => "feb",
2482        3 => "mar",
2483        4 => "apr",
2484        5 => "may",
2485        6 => "jun",
2486        7 => "jul",
2487        8 => "aug",
2488        9 => "sep",
2489        10 => "oct",
2490        11 => "nov",
2491        12 => "dec",
2492        _ => "",
2493    }
2494}
2495
2496/// Builder for creating libraries programmatically
2497#[derive(Debug, Default)]
2498pub struct LibraryBuilder<'a> {
2499    library: Library<'a>,
2500}
2501
2502impl<'a> LibraryBuilder<'a> {
2503    /// Create a new builder
2504    #[must_use]
2505    pub fn new() -> Self {
2506        Self::default()
2507    }
2508
2509    /// Add an entry
2510    #[must_use]
2511    pub fn entry(mut self, entry: Entry<'a>) -> Self {
2512        self.library.add_entry(entry);
2513        self
2514    }
2515
2516    /// Add a string definition
2517    #[must_use]
2518    pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2519        self.library.add_string(name, value);
2520        self
2521    }
2522
2523    /// Add a preamble
2524    #[must_use]
2525    pub fn preamble(mut self, value: Value<'a>) -> Self {
2526        self.library.add_preamble(value);
2527        self
2528    }
2529
2530    /// Add a comment
2531    #[must_use]
2532    pub fn comment(mut self, text: &'a str) -> Self {
2533        self.library.add_comment(text);
2534        self
2535    }
2536
2537    /// Build the library
2538    #[must_use]
2539    pub fn build(self) -> Library<'a> {
2540        self.library
2541    }
2542}
2543
2544#[cfg(test)]
2545mod tests {
2546    use super::*;
2547    use crate::model::{EntryType, Field};
2548
2549    #[test]
2550    fn test_library_parse() {
2551        let input = r#"
2552            @string{me = "John Doe"}
2553            
2554            @article{test2023,
2555                author = me,
2556                title = "Test Article",
2557                year = 2023
2558            }
2559        "#;
2560
2561        let library = Library::parser().parse(input).unwrap();
2562        assert_eq!(library.entries().len(), 1);
2563        assert_eq!(library.strings().len(), 1);
2564
2565        let entry = &library.entries()[0];
2566        // Use get_as_string since the value might be a variable reference
2567        assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2568    }
2569
2570    #[test]
2571    fn test_zero_copy_preservation() {
2572        let input = r#"
2573            @article{test,
2574                title = "This is borrowed",
2575                year = 2023
2576            }
2577        "#;
2578
2579        let library = Library::parser().parse(input).unwrap();
2580        let entry = &library.entries()[0];
2581
2582        // The title should still be borrowed from the input
2583        if let Some(Value::Literal(cow)) = entry
2584            .fields
2585            .iter()
2586            .find(|f| f.name == "title")
2587            .map(|f| &f.value)
2588        {
2589            assert!(matches!(cow, Cow::Borrowed(_)));
2590        }
2591    }
2592
2593    #[test]
2594    fn test_concatenation_creates_owned() {
2595        let input = r#"
2596            @string{first = "Hello"}
2597            @string{second = "World"}
2598            
2599            @article{test,
2600                title = first # ", " # second
2601            }
2602        "#;
2603
2604        let library = Library::parser().parse(input).unwrap();
2605        let entry = &library.entries()[0];
2606
2607        // Concatenation should create an owned string
2608        assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2609    }
2610
2611    #[test]
2612    fn test_boxed_concat_memory_optimization() {
2613        // Verify that Value enum is 24 bytes or less (was 32 before optimization)
2614        assert!(
2615            std::mem::size_of::<Value>() <= 32,
2616            "Value enum is {} bytes, should be 32 or less",
2617            std::mem::size_of::<Value>()
2618        );
2619    }
2620
2621    #[test]
2622    fn test_field_vec_capacity_bounded() {
2623        let input = r#"
2624            @article{test,
2625                a = "1", b = "2", c = "3", d = "4", e = "5",
2626                f = "6", g = "7", h = "8", i = "9", j = "10"
2627            }
2628        "#;
2629
2630        let library = Library::parser().parse(input).unwrap();
2631        let entry = &library.entries()[0];
2632
2633        assert_eq!(entry.fields.len(), 10);
2634        assert!(
2635            entry.fields.capacity() <= 17,
2636            "Unexpected field Vec growth: len={}, capacity={}",
2637            entry.fields.len(),
2638            entry.fields.capacity()
2639        );
2640    }
2641
2642    #[test]
2643    fn test_library_builder() {
2644        let library = LibraryBuilder::new()
2645            .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2646            .entry(Entry {
2647                ty: EntryType::Article,
2648                key: Cow::Borrowed("test2023"),
2649                fields: vec![
2650                    Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2651                    Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2652                ],
2653            })
2654            .build();
2655
2656        assert_eq!(library.entries().len(), 1);
2657        assert_eq!(library.strings().len(), 1);
2658    }
2659
2660    #[test]
2661    fn test_library_stats() {
2662        let input = r#"
2663            @string{ieee = "IEEE"}
2664            @preamble{"Test preamble"}
2665            % This is a percent comment that now works properly
2666            @comment{This is a formal comment that works}
2667            @article{a1, title = "Article 1"}
2668            @article{a2, title = "Article 2"}
2669            @book{b1, title = "Book 1"}
2670        "#;
2671
2672        let library = Library::parser().parse(input).unwrap();
2673        let stats = library.stats();
2674
2675        assert_eq!(stats.total_entries, 3);
2676        assert_eq!(stats.total_strings, 1);
2677        assert_eq!(stats.total_preambles, 1);
2678        assert_eq!(stats.total_comments, 2); // Both % and @comment should work
2679        assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2680        assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2681    }
2682
2683    #[test]
2684    fn test_parse_files_parallel() {
2685        use std::fs::write;
2686        use std::path::PathBuf;
2687
2688        let dir = std::env::temp_dir();
2689        let path1 = dir.join("parallel_test1.bib");
2690        let path2 = dir.join("parallel_test2.bib");
2691
2692        write(&path1, "@article{a1,title=\"A\"}").unwrap();
2693        write(&path2, "@article{a2,title=\"B\"}").unwrap();
2694
2695        let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2696
2697        let library = Library::parser().threads(2).parse_files(&paths).unwrap();
2698
2699        assert_eq!(library.entries().len(), 2);
2700
2701        let _ = std::fs::remove_file(path1);
2702        let _ = std::fs::remove_file(path2);
2703    }
2704
2705    #[test]
2706    fn test_builder_pattern_api() {
2707        let input = "@article{test, title = \"Test\"}";
2708
2709        // Single-threaded (default)
2710        let db1 = Library::parser().parse(input).unwrap();
2711        assert_eq!(db1.entries().len(), 1);
2712
2713        // Using parser builder
2714        let library2 = Library::parser().threads(1).parse(input).unwrap();
2715        assert_eq!(library2.entries().len(), 1);
2716
2717        #[cfg(feature = "parallel")]
2718        {
2719            use std::fs::write;
2720
2721            // Parallel only works for multiple files
2722            let db3 = Library::parser().threads(4).parse(input).unwrap();
2723            assert_eq!(db3.entries().len(), 1);
2724
2725            // Multi-file parallel processing
2726            let path1 = "/tmp/test1.bib";
2727            let path2 = "/tmp/test2.bib";
2728            write(path1, "@article{a1, title=\"A\"}").unwrap();
2729            write(path2, "@article{a2, title=\"B\"}").unwrap();
2730
2731            let db4 = Library::parser()
2732                .threads(2)
2733                .parse_files(&[path1, path2])
2734                .unwrap();
2735            assert_eq!(db4.entries().len(), 2);
2736
2737            let _ = std::fs::remove_file(path1);
2738            let _ = std::fs::remove_file(path2);
2739        }
2740    }
2741}