1use crate::{
4 canonical_biblatex_field_alias, normalize_doi, CorpusEvent, CorpusSource, Entry, Error,
5 ParseEvent, ParseFlow, ParsedBlock, ParsedComment, ParsedCorpus, ParsedDocument, ParsedEntry,
6 ParsedFailedBlock, ParsedPreamble, ParsedSource, ParsedString, Result, SourceId, SourceMap,
7 SourceSpan, StreamingSummary, ValidationError, ValidationLevel, Value,
8};
9use ahash::AHashMap;
10use memchr::memchr;
11use std::borrow::Cow;
12use std::ops::Deref;
13use std::path::Path;
14
15#[cfg(feature = "parallel")]
16use rayon::prelude::*;
17
18const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
19const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
20const CONCAT_CACHE_LIMIT: usize = 16;
21
22enum ExpansionCache<'a> {
23 Small(Vec<(Cow<'a, str>, Value<'a>)>),
24 Large(AHashMap<Cow<'a, str>, Value<'a>>),
25}
26
27impl<'a> ExpansionCache<'a> {
28 fn with_capacity(capacity: usize) -> Self {
29 if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
30 Self::Small(Vec::with_capacity(capacity))
31 } else {
32 Self::Large(AHashMap::with_capacity(capacity))
33 }
34 }
35
36 fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
37 match self {
38 Self::Small(entries) => {
39 let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
40 if index != 0 {
41 entries.swap(0, index);
42 }
43 Some(entries[0].1.clone())
44 }
45 Self::Large(entries) => entries.get(name).cloned(),
46 }
47 }
48
49 fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
50 match self {
51 Self::Small(entries) => {
52 if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
53 entries.push((name, value));
54 } else {
55 let mut large = AHashMap::with_capacity(entries.len() + 1);
56 for (key, value) in entries.drain(..) {
57 large.insert(key, value);
58 }
59 large.insert(name, value);
60 *self = Self::Large(large);
61 }
62 }
63 Self::Large(entries) => {
64 entries.insert(name, value);
65 }
66 }
67 }
68}
69
70struct ConcatCache<'a> {
71 entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
72}
73
74impl<'a> ConcatCache<'a> {
75 const fn new() -> Self {
76 Self {
77 entries: Vec::new(),
78 }
79 }
80
81 fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
82 let index = self
83 .entries
84 .iter()
85 .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
86 if index != 0 {
87 self.entries.swap(0, index);
88 }
89 Some(self.entries[0].1.clone())
90 }
91
92 fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
93 if self.entries.len() < CONCAT_CACHE_LIMIT {
94 self.entries.push((parts, value));
95 }
96 }
97}
98
99fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
100 left.len() == right.len()
101 && left
102 .iter()
103 .zip(right)
104 .all(|(left, right)| cache_values_equal(left, right))
105}
106
107fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
108 match (left, right) {
109 (Value::Literal(left), Value::Literal(right))
110 | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
111 (Value::Number(left), Value::Number(right)) => left == right,
112 (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
113 _ => false,
114 }
115}
116
117#[inline]
122fn get_month_expansion(name: &str) -> Option<&'static str> {
123 let bytes = name.as_bytes();
124 if bytes.len() != 3 {
125 return None;
126 }
127
128 let key = (u32::from(bytes[0] | 0x20) << 16)
129 | (u32::from(bytes[1] | 0x20) << 8)
130 | u32::from(bytes[2] | 0x20);
131
132 match key {
133 0x6a_61_6e => Some("January"),
134 0x66_65_62 => Some("February"),
135 0x6d_61_72 => Some("March"),
136 0x61_70_72 => Some("April"),
137 0x6d_61_79 => Some("May"),
138 0x6a_75_6e => Some("June"),
139 0x6a_75_6c => Some("July"),
140 0x61_75_67 => Some("August"),
141 0x73_65_70 => Some("September"),
142 0x6f_63_74 => Some("October"),
143 0x6e_6f_76 => Some("November"),
144 0x64_65_63 => Some("December"),
145 _ => None,
146 }
147}
148
149#[inline]
150fn get_string_value<'map, 'a>(
151 strings: &'map [StringDefinition<'a>],
152 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
153 name: &str,
154) -> Option<&'map Value<'a>> {
155 get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
156}
157
158#[inline]
159fn get_string_definition<'map, 'a>(
160 strings: &'map [StringDefinition<'a>],
161 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
162 name: &str,
163) -> Option<&'map StringDefinition<'a>> {
164 if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
165 strings
166 .iter()
167 .rev()
168 .find(|definition| definition.name.as_ref() == name)
169 } else {
170 string_lookup
171 .get(name)
172 .and_then(|&index| strings.get(index))
173 }
174}
175
176#[inline]
177fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
178 strings
179 .iter()
180 .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
181}
182
183#[inline]
185fn contains_variables(value: &Value) -> bool {
186 match value {
187 Value::Variable(_) => true,
188 Value::Concat(parts) => parts.iter().any(contains_variables),
189 _ => false,
190 }
191}
192
193#[inline]
195fn contains_potential_month_variables(value: &Value) -> bool {
196 match value {
197 Value::Variable(name) => get_month_expansion(name).is_some(),
198 Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
199 _ => false,
200 }
201}
202
203#[inline]
204const fn is_identifier_char(byte: u8) -> bool {
205 matches!(
206 byte,
207 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
208 )
209}
210
211#[inline]
212fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
213 if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
214 return false;
215 }
216
217 for (offset, &expected) in keyword.iter().enumerate() {
218 if (input[offset + 1] | 0x20) != expected {
219 return false;
220 }
221 }
222
223 if input.len() == keyword.len() + 1 {
224 return true;
225 }
226
227 !is_identifier_char(input[keyword.len() + 1])
228}
229
230#[derive(Debug, Clone, Copy)]
231struct InputScan {
232 may_contain_string_definition: bool,
233 at_count: usize,
234}
235
236fn scan_input(input: &str) -> InputScan {
238 let bytes = input.as_bytes();
239 let mut pos = 0;
240 let mut at_count = 0;
241 let mut may_contain_string_definition = false;
242
243 while pos < bytes.len() {
244 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
245 let at = pos + offset;
246 at_count += 1;
247 if starts_with_at_keyword(&bytes[at..], b"string") {
248 may_contain_string_definition = true;
249 }
250 pos = at + 1;
251 } else {
252 break;
253 }
254 }
255
256 InputScan {
257 may_contain_string_definition,
258 at_count,
259 }
260}
261
262fn input_may_have_late_string_definition(input: &str) -> bool {
267 let bytes = input.as_bytes();
268 let mut pos = 0;
269 let mut saw_regular_entry = false;
270
271 while pos < bytes.len() {
272 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
273 let at = pos + offset;
274 let tail = &bytes[at..];
275
276 if starts_with_at_keyword(tail, b"string") {
277 if saw_regular_entry {
278 return true;
279 }
280 } else if !saw_regular_entry
281 && !starts_with_at_keyword(tail, b"preamble")
282 && !starts_with_at_keyword(tail, b"comment")
283 {
284 saw_regular_entry = true;
286 }
287
288 pos = at + 1;
289 } else {
290 break;
291 }
292 }
293
294 false
295}
296
297fn next_recovery_boundary(input: &str, start: usize) -> usize {
298 let bytes = input.as_bytes();
299 let mut pos = start.saturating_add(1);
300 while pos < bytes.len() {
301 if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
302 return pos;
303 }
304 pos += 1;
305 }
306 input.len()
307}
308
309fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
310 let line_start = bytes[..pos]
311 .iter()
312 .rposition(|byte| matches!(byte, b'\n' | b'\r'))
313 .map_or(0, |index| index + 1);
314
315 bytes[line_start..pos]
316 .iter()
317 .all(|byte| matches!(byte, b' ' | b'\t'))
318}
319
320fn merge_streaming_summary(total: &mut StreamingSummary, source: StreamingSummary) {
321 total.entries += source.entries;
322 total.strings += source.strings;
323 total.preambles += source.preambles;
324 total.comments += source.comments;
325 total.failed_blocks += source.failed_blocks;
326 total.warnings += source.warnings;
327 total.errors += source.errors;
328 total.infos += source.infos;
329 total.recovered_blocks += source.recovered_blocks;
330 total.stopped |= source.stopped;
331}
332
333#[derive(Debug, Default, Clone)]
335pub struct Parser {
336 threads: Option<usize>,
337 tolerant: bool,
338 document: DocumentOptions,
339}
340
341#[derive(Debug, Default, Clone, Copy)]
342struct DocumentOptions {
343 capture_source: bool,
344 preserve_raw: bool,
345 expand_values: bool,
346}
347
348impl Parser {
349 #[must_use]
351 #[inline]
352 pub fn new() -> Self {
353 Self::default()
354 }
355
356 #[must_use]
358 #[inline]
359 pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
360 self.threads = threads.into();
361 self
362 }
363
364 #[must_use]
366 #[inline]
367 pub const fn tolerant(mut self) -> Self {
368 self.tolerant = true;
369 self
370 }
371
372 #[must_use]
374 #[inline]
375 pub const fn capture_source(mut self) -> Self {
376 self.document.capture_source = true;
377 self
378 }
379
380 #[must_use]
382 #[inline]
383 pub const fn preserve_raw(mut self) -> Self {
384 self.document.preserve_raw = true;
385 self
386 }
387
388 #[must_use]
390 #[inline]
391 pub const fn expand_values(mut self) -> Self {
392 self.document.expand_values = true;
393 self
394 }
395
396 #[inline]
398 pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
399 if self.tolerant {
400 Library::parse_tolerant(input, self.document.capture_source)
401 } else if self.document.capture_source {
402 Library::parse_with_spans(input)
403 } else {
404 Library::parse_sequential(input)
405 }
406 }
407
408 #[inline]
414 pub fn parse_document<'a>(&self, input: &'a str) -> Result<ParsedDocument<'a>> {
415 self.parse_document_with_source_id(SourceId::new(0), None, input)
416 }
417
418 #[inline]
423 pub fn parse_source<'a>(
424 &self,
425 source_name: impl Into<Cow<'a, str>>,
426 input: &'a str,
427 ) -> Result<ParsedDocument<'a>> {
428 self.parse_document_with_source_id(SourceId::new(0), Some(source_name.into()), input)
429 }
430
431 pub fn parse_sources<'a>(&self, sources: &[CorpusSource<'a>]) -> Result<ParsedCorpus<'a>> {
433 let mut documents = Vec::with_capacity(sources.len());
434 for (index, source) in sources.iter().enumerate() {
435 documents.push(self.parse_document_with_source_id(
436 SourceId::new(index),
437 Some(Cow::Borrowed(source.name)),
438 source.input,
439 )?);
440 }
441
442 Ok(ParsedCorpus::from_documents(documents))
443 }
444
445 #[inline]
452 pub fn parse_events<'a, F>(&self, input: &'a str, on_event: F) -> Result<StreamingSummary>
453 where
454 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
455 {
456 self.parse_source_events_with_source(SourceId::new(0), None, input, on_event)
457 }
458
459 #[inline]
461 pub fn parse_source_events<'a, F>(
462 &self,
463 source_name: impl Into<Cow<'a, str>>,
464 input: &'a str,
465 on_event: F,
466 ) -> Result<StreamingSummary>
467 where
468 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
469 {
470 self.parse_source_events_with_source(
471 SourceId::new(0),
472 Some(source_name.into()),
473 input,
474 on_event,
475 )
476 }
477
478 pub fn parse_corpus_events<'a, F>(
480 &self,
481 sources: &[CorpusSource<'a>],
482 mut on_event: F,
483 ) -> Result<StreamingSummary>
484 where
485 F: FnMut(CorpusEvent<'a>) -> Result<ParseFlow>,
486 {
487 let mut summary = StreamingSummary::default();
488
489 for (index, source) in sources.iter().enumerate() {
490 if summary.stopped {
491 break;
492 }
493
494 let source_id = SourceId::new(index);
495 let parsed_source = ParsedSource {
496 id: source_id,
497 name: Some(Cow::Borrowed(source.name)),
498 };
499 if on_event(CorpusEvent::SourceStart(parsed_source.clone()))? == ParseFlow::Stop {
500 summary.stopped = true;
501 break;
502 }
503
504 let source_summary = self.parse_source_events_with_source(
505 source_id,
506 Some(Cow::Borrowed(source.name)),
507 source.input,
508 |event| {
509 on_event(CorpusEvent::Event {
510 source: source_id,
511 event: Box::new(event),
512 })
513 },
514 )?;
515 merge_streaming_summary(&mut summary, source_summary);
516
517 if on_event(CorpusEvent::SourceEnd(parsed_source))? == ParseFlow::Stop {
518 summary.stopped = true;
519 }
520 }
521
522 summary.finalize_status();
523 Ok(summary)
524 }
525
526 fn parse_source_events_with_source<'a, F>(
527 &self,
528 source_id: SourceId,
529 source_name: Option<Cow<'a, str>>,
530 input: &'a str,
531 mut on_event: F,
532 ) -> Result<StreamingSummary>
533 where
534 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
535 {
536 let source_map = SourceMap::new(Some(source_id), source_name, input);
537 let mut summary = StreamingSummary::default();
538
539 if self.tolerant {
540 self.parse_tolerant_events(input, &source_map, &mut summary, &mut on_event)?;
541 } else {
542 crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
543 let source = source_map.span(span.byte_start, span.byte_end);
544 self.emit_parsed_event(item, source, raw, &source_map, &mut summary, &mut on_event)
545 })?;
546 }
547
548 summary.finalize_status();
549 Ok(summary)
550 }
551
552 fn parse_tolerant_events<'a, F>(
553 &self,
554 input: &'a str,
555 source_map: &SourceMap<'a>,
556 summary: &mut StreamingSummary,
557 on_event: &mut F,
558 ) -> Result<()>
559 where
560 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
561 {
562 let mut remaining = input;
563
564 loop {
565 crate::parser::lexer::skip_whitespace(&mut remaining);
566 if remaining.is_empty() || summary.stopped {
567 break;
568 }
569
570 let start = input.len() - remaining.len();
571 match crate::parser::parse_item(&mut remaining) {
572 Ok(item) => {
573 let end = input.len() - remaining.len();
574 let source = source_map.span(start, end);
575 self.emit_parsed_event(
576 item,
577 source,
578 &input[start..end],
579 source_map,
580 summary,
581 on_event,
582 )?;
583 }
584 Err(err) => {
585 let end = next_recovery_boundary(input, start);
586 let failed = FailedBlock {
587 raw: Cow::Borrowed(&input[start..end]),
588 error: format!("Failed to parse entry: {err}"),
589 source: Some(source_map.span(start, end)),
590 };
591 let failed_index = summary.failed_blocks;
592 let failed = ParsedFailedBlock::from_failed_block(
593 failed_index,
594 failed,
595 Some(source_map),
596 );
597 if let Some(partial) = crate::document::recover_partial_stream_entry(
598 &failed,
599 source_map,
600 summary.entries,
601 self.document.preserve_raw,
602 ) {
603 Self::emit_event(ParseEvent::Entry(partial), summary, on_event)?;
604 } else {
605 Self::emit_event(ParseEvent::Failed(failed), summary, on_event)?;
606 }
607 remaining = &input[end..];
608 }
609 }
610 }
611
612 Ok(())
613 }
614
615 fn emit_parsed_event<'a, F>(
616 &self,
617 item: crate::parser::ParsedItem<'a>,
618 source: SourceSpan,
619 raw: &'a str,
620 source_map: &SourceMap<'a>,
621 summary: &mut StreamingSummary,
622 on_event: &mut F,
623 ) -> Result<()>
624 where
625 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
626 {
627 if summary.stopped {
628 return Ok(());
629 }
630
631 let event = match item {
632 crate::parser::ParsedItem::Entry(entry) => {
633 ParseEvent::Entry(ParsedEntry::from_stream_entry(
634 entry,
635 source,
636 raw,
637 source_map,
638 self.document.preserve_raw,
639 ))
640 }
641 crate::parser::ParsedItem::String(name, value) => {
642 ParseEvent::String(ParsedString::from_stream_definition(
643 name,
644 value,
645 source,
646 raw,
647 self.document.preserve_raw,
648 ))
649 }
650 crate::parser::ParsedItem::Preamble(value) => {
651 ParseEvent::Preamble(ParsedPreamble::from_stream_preamble(
652 value,
653 source,
654 raw,
655 self.document.preserve_raw,
656 ))
657 }
658 crate::parser::ParsedItem::Comment(text) => ParseEvent::Comment(
659 ParsedComment::from_stream_comment(text, source, raw, self.document.preserve_raw),
660 ),
661 };
662
663 Self::emit_event(event, summary, on_event)
664 }
665
666 fn emit_event<'a, F>(
667 event: ParseEvent<'a>,
668 summary: &mut StreamingSummary,
669 on_event: &mut F,
670 ) -> Result<()>
671 where
672 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
673 {
674 if summary.stopped {
675 return Ok(());
676 }
677
678 let diagnostics = match &event {
679 ParseEvent::Entry(entry) => {
680 summary.entries += 1;
681 if entry.status == crate::ParsedEntryStatus::Partial {
682 summary.recovered_blocks += 1;
683 }
684 entry.diagnostics.clone()
685 }
686 ParseEvent::String(_) => {
687 summary.strings += 1;
688 Vec::new()
689 }
690 ParseEvent::Preamble(_) => {
691 summary.preambles += 1;
692 Vec::new()
693 }
694 ParseEvent::Comment(_) => {
695 summary.comments += 1;
696 Vec::new()
697 }
698 ParseEvent::Failed(failed) => {
699 summary.failed_blocks += 1;
700 failed.diagnostics.clone()
701 }
702 ParseEvent::Diagnostic(diagnostic) => {
703 summary.count_diagnostic(diagnostic);
704 Vec::new()
705 }
706 };
707 for diagnostic in &diagnostics {
708 summary.count_diagnostic(diagnostic);
709 }
710
711 if on_event(event)? == ParseFlow::Stop {
712 summary.stopped = true;
713 return Ok(());
714 }
715
716 for diagnostic in diagnostics {
717 if on_event(ParseEvent::Diagnostic(diagnostic))? == ParseFlow::Stop {
718 summary.stopped = true;
719 break;
720 }
721 }
722
723 Ok(())
724 }
725
726 fn parse_document_with_source_id<'a>(
727 &self,
728 source_id: SourceId,
729 source_name: Option<Cow<'a, str>>,
730 input: &'a str,
731 ) -> Result<ParsedDocument<'a>> {
732 let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
733 let sources = vec![ParsedSource {
734 id: source_id,
735 name: source_name,
736 }];
737 let raw_items = if self.tolerant {
738 Library::parse_tolerant_raw_items(input, true, &source_map)
739 } else {
740 match Library::parse_raw_items_with_source(input, &source_map) {
741 Ok(raw_items) => raw_items,
742 Err(error) => {
743 return Ok(ParsedDocument::failed_from_error(
744 sources,
745 &source_map,
746 &error,
747 ));
748 }
749 }
750 };
751 let library = match Library::from_raw_items(raw_items.clone()) {
752 Ok(library) => library,
753 Err(Error::UndefinedVariable(_) | Error::CircularReference(_))
754 if !self.document.expand_values =>
755 {
756 Library::from_raw_items_unexpanded(raw_items.clone())
757 }
758 Err(error) => return Err(error),
759 };
760 let mut document =
761 ParsedDocument::from_library_with_source_map(library, sources, Some(&source_map));
762 let mut entry_index = 0;
763 for raw_item in &raw_items {
764 if let RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, raw) = raw_item {
765 document.apply_entry_locations(
766 entry_index,
767 raw,
768 &source_map,
769 self.document.preserve_raw,
770 );
771 entry_index += 1;
772 }
773 }
774 document.apply_parsed_values(&raw_items);
775 if self.document.preserve_raw {
776 document.apply_raw_items(&raw_items);
777 }
778 if self.tolerant {
779 document.recover_partial_entries(&source_map, self.document.preserve_raw);
780 }
781 if self.document.expand_values {
782 document.populate_expanded_values(crate::ExpansionOptions::default())?;
783 }
784 Ok(document)
785 }
786
787 pub(crate) fn parse_compact_document_owned(
788 &self,
789 source_name: Option<String>,
790 input: &str,
791 ) -> Result<ParsedDocument<'static>> {
792 let source_name = source_name.map(Cow::Owned);
793 let sources = vec![ParsedSource {
794 id: SourceId::new(0),
795 name: source_name,
796 }];
797 let input_scan = scan_input(input);
798 let mut entries = Vec::with_capacity(input_scan.at_count);
799 let mut strings = Vec::new();
800 let mut preambles = Vec::new();
801 let mut comments = Vec::new();
802 let mut blocks = Vec::with_capacity(input_scan.at_count);
803
804 crate::parser::parse_bibtex_stream(input, |item| {
805 match item {
806 crate::parser::ParsedItem::Entry(entry) => {
807 let index = entries.len();
808 entries.push(ParsedEntry::from_entry_owned(entry, None));
809 blocks.push(ParsedBlock::Entry(index));
810 }
811 crate::parser::ParsedItem::String(name, value) => {
812 let index = strings.len();
813 strings.push(ParsedString::from_definition(StringDefinition {
814 name: Cow::Owned(name.to_string()),
815 value: value.into_owned(),
816 source: None,
817 }));
818 blocks.push(ParsedBlock::String(index));
819 }
820 crate::parser::ParsedItem::Preamble(value) => {
821 let index = preambles.len();
822 preambles.push(ParsedPreamble::from_preamble(Preamble::new(
823 value.into_owned(),
824 )));
825 blocks.push(ParsedBlock::Preamble(index));
826 }
827 crate::parser::ParsedItem::Comment(text) => {
828 let index = comments.len();
829 comments.push(ParsedComment::from_comment(Comment {
830 text: Cow::Owned(text.to_string()),
831 source: None,
832 }));
833 blocks.push(ParsedBlock::Comment(index));
834 }
835 }
836 Ok(())
837 })?;
838
839 let mut document = ParsedDocument::from_parsed_parts(
840 Library::new(),
841 sources,
842 entries,
843 strings,
844 preambles,
845 comments,
846 blocks,
847 );
848 if self.document.expand_values {
849 document.populate_expanded_values(crate::ExpansionOptions::default())?;
850 }
851 Ok(document)
852 }
853
854 pub(crate) fn parse_source_document_owned(
855 &self,
856 source_name: Option<String>,
857 input: &str,
858 ) -> Result<ParsedDocument<'static>> {
859 let source_name = source_name.map(Cow::Owned);
860 let source_id = SourceId::new(0);
861 let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
862 let sources = vec![ParsedSource {
863 id: source_id,
864 name: source_name,
865 }];
866 let input_scan = scan_input(input);
867 let mut entries = Vec::with_capacity(input_scan.at_count);
868 let mut strings = Vec::new();
869 let mut preambles = Vec::new();
870 let mut comments = Vec::new();
871 let mut blocks = Vec::with_capacity(input_scan.at_count);
872 let mut span_cursor = source_map.cursor();
873
874 crate::parser::parse_bibtex_stream_with_entry_locations(input, |item, start, end, raw| {
875 let source = span_cursor.span(start, end);
876 match item {
877 crate::parser::LocatedParsedItem::Entry(entry) => {
878 let index = entries.len();
879 entries.push(ParsedEntry::from_located_stream_entry_owned(
880 entry,
881 source,
882 &mut span_cursor,
883 ));
884 blocks.push(ParsedBlock::Entry(index));
885 }
886 crate::parser::LocatedParsedItem::String(name, value) => {
887 let index = strings.len();
888 strings.push(ParsedString::from_stream_definition_owned(
889 name, value, source, raw,
890 ));
891 blocks.push(ParsedBlock::String(index));
892 }
893 crate::parser::LocatedParsedItem::Preamble(value) => {
894 let index = preambles.len();
895 preambles.push(ParsedPreamble::from_stream_preamble_owned(
896 value, source, raw,
897 ));
898 blocks.push(ParsedBlock::Preamble(index));
899 }
900 crate::parser::LocatedParsedItem::Comment(text) => {
901 let index = comments.len();
902 comments.push(ParsedComment::from_stream_comment_owned(text, source));
903 blocks.push(ParsedBlock::Comment(index));
904 }
905 }
906 Ok(())
907 })?;
908
909 let mut document = ParsedDocument::from_parsed_parts(
910 Library::new(),
911 sources,
912 entries,
913 strings,
914 preambles,
915 comments,
916 blocks,
917 );
918 if self.document.expand_values {
919 document.populate_expanded_values(crate::ExpansionOptions::default())?;
920 }
921 Ok(document)
922 }
923
924 pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
926 #[cfg(feature = "parallel")]
927 {
928 if let Some(threads) = self.threads {
929 if threads <= 1 {
930 return Self::parse_files_sequential(paths);
931 }
932 }
933
934 let pool = self.build_thread_pool()?;
935
936 let libraries: Result<Vec<_>> = pool.install(|| {
937 paths
938 .par_iter()
939 .map(|path| {
940 let content = std::fs::read_to_string(path)?;
941 let library = Library::parse_sequential(&content)?;
942 Ok(library.into_owned())
943 })
944 .collect()
945 });
946
947 let libraries = libraries?;
948 Ok(Library::merge_libraries_parallel(libraries))
949 }
950
951 #[cfg(not(feature = "parallel"))]
952 {
953 Self::parse_files_sequential(paths)
954 }
955 }
956
957 fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
959 let mut result = Library::new();
960 for path in paths {
961 let content = std::fs::read_to_string(path)?;
962 let library = Library::parse_sequential(&content)?;
963 result.merge(library.into_owned());
964 }
965 Ok(result)
966 }
967
968 #[cfg(feature = "parallel")]
969 fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
970 let mut builder = rayon::ThreadPoolBuilder::new();
971
972 if let Some(threads) = self.threads {
973 builder = builder.num_threads(threads);
974 }
975
976 builder
977 .build()
978 .map_err(|e| Error::WinnowError(e.to_string()))
979 }
980}
981
982#[derive(Debug, Clone, Copy)]
984pub enum Block<'lib, 'a> {
985 Entry(&'lib Entry<'a>, Option<SourceSpan>),
987 String(&'lib StringDefinition<'a>),
989 Preamble(&'lib Preamble<'a>),
991 Comment(&'lib Comment<'a>),
993 Failed(&'lib FailedBlock<'a>),
995}
996
997#[derive(Debug, Clone, Copy, PartialEq, Eq)]
998pub enum BlockKind {
999 Entry(usize),
1000 String(usize),
1001 Preamble(usize),
1002 Comment(usize),
1003 Failed(usize),
1004}
1005
1006#[derive(Debug, Clone)]
1007pub enum RawBuildItem<'a> {
1008 Parsed(crate::parser::ParsedItem<'a>, SourceSpan, &'a str),
1009 Failed(FailedBlock<'a>),
1010}
1011
1012#[derive(Debug, Clone, PartialEq)]
1014pub struct StringDefinition<'a> {
1015 pub name: Cow<'a, str>,
1017 pub value: Value<'a>,
1019 pub source: Option<SourceSpan>,
1021}
1022
1023impl<'a> StringDefinition<'a> {
1024 #[must_use]
1026 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
1027 Self {
1028 name: Cow::Borrowed(name),
1029 value,
1030 source: None,
1031 }
1032 }
1033
1034 #[must_use]
1036 pub fn name(&self) -> &str {
1037 &self.name
1038 }
1039
1040 #[must_use]
1042 pub const fn value(&self) -> &Value<'a> {
1043 &self.value
1044 }
1045
1046 #[must_use]
1048 pub fn into_owned(self) -> StringDefinition<'static> {
1049 StringDefinition {
1050 name: Cow::Owned(self.name.into_owned()),
1051 value: self.value.into_owned(),
1052 source: self.source,
1053 }
1054 }
1055}
1056
1057#[derive(Debug, Clone, PartialEq)]
1059pub struct Preamble<'a> {
1060 pub value: Value<'a>,
1062 pub source: Option<SourceSpan>,
1064}
1065
1066impl<'a> Preamble<'a> {
1067 #[must_use]
1069 pub const fn new(value: Value<'a>) -> Self {
1070 Self {
1071 value,
1072 source: None,
1073 }
1074 }
1075
1076 #[must_use]
1078 pub const fn value(&self) -> &Value<'a> {
1079 &self.value
1080 }
1081
1082 #[must_use]
1084 pub fn into_owned(self) -> Preamble<'static> {
1085 Preamble {
1086 value: self.value.into_owned(),
1087 source: self.source,
1088 }
1089 }
1090}
1091
1092impl<'a> Deref for Preamble<'a> {
1093 type Target = Value<'a>;
1094
1095 fn deref(&self) -> &Self::Target {
1096 &self.value
1097 }
1098}
1099
1100#[derive(Debug, Clone, PartialEq, Eq)]
1102pub struct Comment<'a> {
1103 pub text: Cow<'a, str>,
1105 pub source: Option<SourceSpan>,
1107}
1108
1109impl<'a> Comment<'a> {
1110 #[must_use]
1112 pub const fn new(text: &'a str) -> Self {
1113 Self {
1114 text: Cow::Borrowed(text),
1115 source: None,
1116 }
1117 }
1118
1119 #[must_use]
1121 pub fn text(&self) -> &str {
1122 &self.text
1123 }
1124
1125 #[must_use]
1127 pub fn into_owned(self) -> Comment<'static> {
1128 Comment {
1129 text: Cow::Owned(self.text.into_owned()),
1130 source: self.source,
1131 }
1132 }
1133}
1134
1135impl Deref for Comment<'_> {
1136 type Target = str;
1137
1138 fn deref(&self) -> &Self::Target {
1139 &self.text
1140 }
1141}
1142
1143#[derive(Debug, Clone, PartialEq, Eq)]
1145pub struct FailedBlock<'a> {
1146 pub raw: Cow<'a, str>,
1148 pub error: String,
1150 pub source: Option<SourceSpan>,
1152}
1153
1154impl FailedBlock<'_> {
1155 #[must_use]
1157 pub fn into_owned(self) -> FailedBlock<'static> {
1158 FailedBlock {
1159 raw: Cow::Owned(self.raw.into_owned()),
1160 error: self.error,
1161 source: self.source,
1162 }
1163 }
1164}
1165
1166#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1168pub enum MonthStyle {
1169 #[default]
1171 Long,
1172 Abbrev,
1174 Number,
1176}
1177
1178#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1180pub struct SortOptions {
1181 pub entries_by_key: bool,
1183 pub fields_by_name: bool,
1185}
1186
1187#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1189pub enum FieldNameCase {
1190 #[default]
1192 Preserve,
1193 Lowercase,
1195}
1196
1197#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1199pub struct FieldNormalizeOptions {
1200 pub name_case: FieldNameCase,
1202 pub biblatex_aliases: bool,
1204}
1205
1206#[derive(Debug, Clone, Default)]
1208pub struct Library<'a> {
1209 entries: Vec<Entry<'a>>,
1211 entry_sources: Option<Vec<Option<SourceSpan>>>,
1213 strings: Vec<StringDefinition<'a>>,
1215 string_lookup: AHashMap<Cow<'a, str>, usize>,
1217 preambles: Vec<Preamble<'a>>,
1219 comments: Vec<Comment<'a>>,
1221 failed_blocks: Vec<FailedBlock<'a>>,
1223 block_order: Vec<BlockKind>,
1225}
1226
1227impl<'a> Library<'a> {
1228 fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
1229 let index = self.entries.len();
1230 self.entries.push(entry);
1231 if let Some(sources) = &mut self.entry_sources {
1232 sources.push(source);
1233 } else if source.is_some() {
1234 let mut sources = vec![None; index];
1235 sources.push(source);
1236 self.entry_sources = Some(sources);
1237 }
1238 self.block_order.push(BlockKind::Entry(index));
1239 }
1240
1241 fn register_string_definition(
1242 &mut self,
1243 name: Cow<'a, str>,
1244 value: Value<'a>,
1245 source: Option<SourceSpan>,
1246 ) -> usize {
1247 let index = self.strings.len();
1248 self.string_lookup.insert(name.clone(), index);
1249 self.strings.push(StringDefinition {
1250 name,
1251 value,
1252 source,
1253 });
1254 index
1255 }
1256
1257 fn push_string_with_source(
1258 &mut self,
1259 name: Cow<'a, str>,
1260 value: Value<'a>,
1261 source: Option<SourceSpan>,
1262 ) {
1263 let index = self.register_string_definition(name, value, source);
1264 self.block_order.push(BlockKind::String(index));
1265 }
1266
1267 fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
1268 let index = self.preambles.len();
1269 self.preambles.push(Preamble { value, source });
1270 self.block_order.push(BlockKind::Preamble(index));
1271 index
1272 }
1273
1274 fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
1275 let index = self.comments.len();
1276 self.comments.push(Comment { text, source });
1277 self.block_order.push(BlockKind::Comment(index));
1278 }
1279
1280 fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
1281 let index = self.failed_blocks.len();
1282 self.failed_blocks.push(failed);
1283 self.block_order.push(BlockKind::Failed(index));
1284 }
1285
1286 #[inline]
1287 fn expand_value_for_parse(
1288 &self,
1289 value: &mut Value<'a>,
1290 has_user_strings: bool,
1291 month_constants_shadowed: bool,
1292 expanded_variables: &mut ExpansionCache<'a>,
1293 expansion_stack: &mut Vec<Cow<'a, str>>,
1294 concat_cache: &mut ConcatCache<'a>,
1295 ) -> Result<()> {
1296 match value {
1297 Value::Literal(_) | Value::Number(_) => Ok(()),
1298 Value::Variable(name) => {
1299 if !has_user_strings || !month_constants_shadowed {
1300 if let Some(month_value) = get_month_expansion(name.as_ref()) {
1301 *value = Value::Literal(Cow::Borrowed(month_value));
1302 return Ok(());
1303 }
1304 }
1305
1306 if has_user_strings {
1307 if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
1308 *value = expanded;
1309 return Ok(());
1310 }
1311
1312 let old_value = std::mem::take(value);
1313 *value = self.smart_expand_value_cached(
1314 old_value,
1315 expanded_variables,
1316 expansion_stack,
1317 concat_cache,
1318 )?;
1319 }
1320
1321 Ok(())
1322 }
1323 Value::Concat(parts) => {
1324 if has_user_strings {
1325 if let Some(expanded) = concat_cache.get_cloned(parts) {
1326 *value = expanded;
1327 return Ok(());
1328 }
1329 }
1330
1331 let needs_expansion = if has_user_strings {
1332 parts.iter().any(contains_variables)
1333 } else {
1334 parts.iter().any(contains_potential_month_variables)
1335 };
1336
1337 if needs_expansion {
1338 if !has_user_strings {
1339 if let Some(expanded) = concat_cache.get_cloned(parts) {
1340 *value = expanded;
1341 return Ok(());
1342 }
1343 }
1344
1345 let old_value = std::mem::take(value);
1346 *value = self.smart_expand_value_cached(
1347 old_value,
1348 expanded_variables,
1349 expansion_stack,
1350 concat_cache,
1351 )?;
1352 }
1353
1354 Ok(())
1355 }
1356 }
1357 }
1358
1359 #[must_use]
1361 #[inline]
1362 pub fn new() -> Self {
1363 Self::default()
1364 }
1365
1366 #[must_use]
1389 #[inline]
1390 pub fn parser() -> Parser {
1391 Parser::new()
1392 }
1393
1394 pub fn parse(input: &'a str) -> Result<Self> {
1396 Self::parser().parse(input)
1397 }
1398
1399 pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
1401 let content = std::fs::read_to_string(path)?;
1402 Library::parser().parse(&content).map(Library::into_owned)
1403 }
1404
1405 pub fn to_bibtex(&self) -> Result<String> {
1407 crate::writer::to_string(self)
1408 }
1409
1410 pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
1412 crate::writer::to_file(self, path)
1413 }
1414
1415 #[allow(clippy::too_many_lines)]
1417 pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
1418 let mut library = Self::new();
1419 let input_scan = scan_input(input);
1420
1421 if !input_scan.may_contain_string_definition {
1424 library.entries.reserve(input_scan.at_count);
1425 library.block_order.reserve(input_scan.at_count);
1426 let has_user_strings = false;
1427 let month_constants_shadowed = false;
1428 let mut expanded_variables = ExpansionCache::with_capacity(0);
1429 let mut expansion_stack = Vec::new();
1430 let mut concat_cache = ConcatCache::new();
1431
1432 crate::parser::parse_bibtex_stream(input, |item| {
1433 match item {
1434 crate::parser::ParsedItem::Entry(mut entry) => {
1435 for field in &mut entry.fields {
1436 library.expand_value_for_parse(
1437 &mut field.value,
1438 has_user_strings,
1439 month_constants_shadowed,
1440 &mut expanded_variables,
1441 &mut expansion_stack,
1442 &mut concat_cache,
1443 )?;
1444 }
1445 library.push_entry_with_source(entry, None);
1446 }
1447 crate::parser::ParsedItem::Preamble(value) => {
1448 let mut expanded = value;
1449 library.expand_value_for_parse(
1450 &mut expanded,
1451 has_user_strings,
1452 month_constants_shadowed,
1453 &mut expanded_variables,
1454 &mut expansion_stack,
1455 &mut concat_cache,
1456 )?;
1457 library.push_preamble_with_source(expanded, None);
1458 }
1459 crate::parser::ParsedItem::Comment(text) => {
1460 library.push_comment_with_source(Cow::Borrowed(text), None);
1461 }
1462 crate::parser::ParsedItem::String(name, value) => {
1463 library.push_string_with_source(Cow::Borrowed(name), value, None);
1465 }
1466 }
1467 Ok(())
1468 })?;
1469
1470 return Ok(library);
1471 }
1472
1473 library.block_order.reserve(input_scan.at_count);
1474
1475 if !input_may_have_late_string_definition(input) {
1479 let mut pending_preambles = Vec::new();
1480 let mut expanded_variables = ExpansionCache::with_capacity(0);
1481 let mut expansion_stack = Vec::new();
1482 let mut concat_cache = ConcatCache::new();
1483 let mut month_constants_shadowed = None;
1484
1485 crate::parser::parse_bibtex_stream(input, |item| {
1486 match item {
1487 crate::parser::ParsedItem::Entry(mut entry) => {
1488 let has_user_strings = !library.strings.is_empty();
1489 let month_constants_shadowed = *month_constants_shadowed
1490 .get_or_insert_with(|| {
1491 has_user_strings
1492 && user_strings_shadow_month_constants(&library.strings)
1493 });
1494 for field in &mut entry.fields {
1495 library.expand_value_for_parse(
1496 &mut field.value,
1497 has_user_strings,
1498 month_constants_shadowed,
1499 &mut expanded_variables,
1500 &mut expansion_stack,
1501 &mut concat_cache,
1502 )?;
1503 }
1504 library.push_entry_with_source(entry, None);
1505 }
1506 crate::parser::ParsedItem::Preamble(value) => {
1507 let index = library.push_preamble_with_source(value, None);
1508 pending_preambles.push(index);
1509 }
1510 crate::parser::ParsedItem::String(name, value) => {
1511 library.push_string_with_source(Cow::Borrowed(name), value, None);
1512 }
1513 crate::parser::ParsedItem::Comment(text) => {
1514 library.push_comment_with_source(Cow::Borrowed(text), None);
1515 }
1516 }
1517 Ok(())
1518 })?;
1519
1520 let has_user_strings = !library.strings.is_empty();
1521 let month_constants_shadowed =
1522 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1523 for index in pending_preambles {
1524 let mut expanded = std::mem::take(&mut library.preambles[index].value);
1525 library.expand_value_for_parse(
1526 &mut expanded,
1527 has_user_strings,
1528 month_constants_shadowed,
1529 &mut expanded_variables,
1530 &mut expansion_stack,
1531 &mut concat_cache,
1532 )?;
1533 library.preambles[index].value = expanded;
1534 }
1535
1536 return Ok(library);
1537 }
1538
1539 let mut entry_indices = Vec::new();
1540 let mut preamble_indices = Vec::new();
1541
1542 crate::parser::parse_bibtex_stream(input, |item| {
1543 match item {
1544 crate::parser::ParsedItem::Entry(entry) => {
1545 let index = library.entries.len();
1546 library.push_entry_with_source(entry, None);
1547 entry_indices.push(index);
1548 }
1549 crate::parser::ParsedItem::Preamble(value) => {
1550 let index = library.push_preamble_with_source(value, None);
1551 preamble_indices.push(index);
1552 }
1553 crate::parser::ParsedItem::String(name, value) => {
1554 library.push_string_with_source(Cow::Borrowed(name), value, None);
1555 }
1556 crate::parser::ParsedItem::Comment(text) => {
1557 library.push_comment_with_source(Cow::Borrowed(text), None);
1558 }
1559 }
1560 Ok(())
1561 })?;
1562
1563 let has_user_strings = !library.strings.is_empty();
1565 let month_constants_shadowed =
1566 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1567 let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1568 let mut expansion_stack = Vec::new();
1569 let mut concat_cache = ConcatCache::new();
1570
1571 for entry_index in entry_indices {
1572 let field_count = library.entries[entry_index].fields.len();
1573 for field_index in 0..field_count {
1574 let mut value =
1575 std::mem::take(&mut library.entries[entry_index].fields[field_index].value);
1576 library.expand_value_for_parse(
1577 &mut value,
1578 has_user_strings,
1579 month_constants_shadowed,
1580 &mut expanded_variables,
1581 &mut expansion_stack,
1582 &mut concat_cache,
1583 )?;
1584 library.entries[entry_index].fields[field_index].value = value;
1585 }
1586 }
1587
1588 for preamble_index in preamble_indices {
1589 let mut expanded = std::mem::take(&mut library.preambles[preamble_index].value);
1590 library.expand_value_for_parse(
1591 &mut expanded,
1592 has_user_strings,
1593 month_constants_shadowed,
1594 &mut expanded_variables,
1595 &mut expansion_stack,
1596 &mut concat_cache,
1597 )?;
1598 library.preambles[preamble_index].value = expanded;
1599 }
1600
1601 Ok(library)
1602 }
1603
1604 fn parse_with_spans(input: &'a str) -> Result<Self> {
1605 let source_map = SourceMap::anonymous(input);
1606 let raw_items = Self::parse_raw_items_with_source(input, &source_map)?;
1607 Self::from_raw_items(raw_items)
1608 }
1609
1610 fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1611 let source_map = SourceMap::anonymous(input);
1612 let raw_items = Self::parse_tolerant_raw_items(input, capture_source, &source_map);
1613 Self::from_raw_items(raw_items)
1614 }
1615
1616 fn parse_raw_items_with_source(
1617 input: &'a str,
1618 source_map: &SourceMap<'_>,
1619 ) -> Result<Vec<RawBuildItem<'a>>> {
1620 let mut raw_items = Vec::new();
1621 crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
1622 let span = if source_map.source_id().is_some() {
1623 source_map.span(span.byte_start, span.byte_end)
1624 } else {
1625 span
1626 };
1627 raw_items.push(RawBuildItem::Parsed(item, span, raw));
1628 Ok(())
1629 })?;
1630 Ok(raw_items)
1631 }
1632
1633 fn parse_tolerant_raw_items(
1634 input: &'a str,
1635 capture_source: bool,
1636 source_map: &SourceMap<'_>,
1637 ) -> Vec<RawBuildItem<'a>> {
1638 let mut raw_items = Vec::new();
1639 let mut remaining = input;
1640
1641 loop {
1642 crate::parser::lexer::skip_whitespace(&mut remaining);
1643 if remaining.is_empty() {
1644 break;
1645 }
1646
1647 let start = input.len() - remaining.len();
1648 match crate::parser::parse_item(&mut remaining) {
1649 Ok(item) => {
1650 let end = input.len() - remaining.len();
1651 raw_items.push(RawBuildItem::Parsed(
1652 item,
1653 source_map.span(start, end),
1654 &input[start..end],
1655 ));
1656 }
1657 Err(err) => {
1658 let end = next_recovery_boundary(input, start);
1659 let source = capture_source.then(|| source_map.span(start, end));
1660 raw_items.push(RawBuildItem::Failed(FailedBlock {
1661 raw: Cow::Borrowed(&input[start..end]),
1662 error: format!("Failed to parse entry: {err}"),
1663 source,
1664 }));
1665 remaining = &input[end..];
1666 }
1667 }
1668 }
1669
1670 raw_items
1671 }
1672
1673 fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1674 let mut library = Self::new();
1675
1676 for raw_item in &raw_items {
1677 if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) =
1678 raw_item
1679 {
1680 library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1681 }
1682 }
1683
1684 let has_user_strings = !library.strings.is_empty();
1685 let month_constants_shadowed =
1686 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1687 let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1688 let mut expansion_stack = Vec::new();
1689 let mut concat_cache = ConcatCache::new();
1690 let mut string_index = 0;
1691
1692 for raw_item in raw_items {
1693 match raw_item {
1694 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span, _) => {
1695 for field in &mut entry.fields {
1696 library.expand_value_for_parse(
1697 &mut field.value,
1698 has_user_strings,
1699 month_constants_shadowed,
1700 &mut expanded_variables,
1701 &mut expansion_stack,
1702 &mut concat_cache,
1703 )?;
1704 }
1705 library.push_entry_with_source(entry, Some(span));
1706 }
1707 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, _) => {
1708 library.block_order.push(BlockKind::String(string_index));
1709 string_index += 1;
1710 }
1711 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span, _) => {
1712 library.expand_value_for_parse(
1713 &mut value,
1714 has_user_strings,
1715 month_constants_shadowed,
1716 &mut expanded_variables,
1717 &mut expansion_stack,
1718 &mut concat_cache,
1719 )?;
1720 library.push_preamble_with_source(value, Some(span));
1721 }
1722 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1723 library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1724 }
1725 RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1726 }
1727 }
1728
1729 Ok(library)
1730 }
1731
1732 fn from_raw_items_unexpanded(raw_items: Vec<RawBuildItem<'a>>) -> Self {
1733 let mut library = Self::new();
1734
1735 for raw_item in raw_items {
1736 match raw_item {
1737 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(entry), span, _) => {
1738 library.push_entry_with_source(entry, Some(span));
1739 }
1740 RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) => {
1741 library.push_string_with_source(Cow::Borrowed(name), value, Some(span));
1742 }
1743 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), span, _) => {
1744 library.push_preamble_with_source(value, Some(span));
1745 }
1746 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1747 library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1748 }
1749 RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1750 }
1751 }
1752
1753 library
1754 }
1755
1756 pub fn merge(&mut self, other: Self) {
1758 let entry_offset = self.entries.len();
1759 let string_offset = self.strings.len();
1760 let preamble_offset = self.preambles.len();
1761 let comment_offset = self.comments.len();
1762 let failed_offset = self.failed_blocks.len();
1763 let other_entry_count = other.entries.len();
1764 let other_entry_sources = other.entry_sources;
1765
1766 self.entries.extend(other.entries);
1767 match (&mut self.entry_sources, other_entry_sources) {
1768 (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1769 (Some(sources), None) => {
1770 sources.extend(std::iter::repeat(None).take(other_entry_count));
1771 }
1772 (None, Some(other_sources)) => {
1773 let mut sources = vec![None; entry_offset];
1774 sources.extend(other_sources);
1775 self.entry_sources = Some(sources);
1776 }
1777 (None, None) => {}
1778 }
1779 self.preambles.extend(other.preambles);
1780 self.comments.extend(other.comments);
1781 self.failed_blocks.extend(other.failed_blocks);
1782
1783 for definition in other.strings {
1784 let index = self.strings.len();
1785 self.string_lookup.insert(definition.name.clone(), index);
1786 self.strings.push(definition);
1787 }
1788
1789 self.block_order
1790 .extend(other.block_order.into_iter().map(|kind| match kind {
1791 BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1792 BlockKind::String(index) => BlockKind::String(string_offset + index),
1793 BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1794 BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1795 BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1796 }));
1797 }
1798
1799 #[cfg(feature = "parallel")]
1800 fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1801 let mut result = Library::new();
1802 for library in libraries {
1803 result.merge(library);
1804 }
1805 result
1806 }
1807
1808 #[must_use]
1810 pub fn entries(&self) -> &[Entry<'a>] {
1811 &self.entries
1812 }
1813
1814 #[must_use]
1816 pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1817 &mut self.entries
1818 }
1819
1820 #[must_use]
1822 pub fn strings(&self) -> &[StringDefinition<'a>] {
1823 &self.strings
1824 }
1825
1826 #[must_use]
1828 pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1829 get_string_definition(&self.strings, &self.string_lookup, name)
1830 }
1831
1832 #[must_use]
1834 pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1835 self.string(name).map(|definition| &definition.value)
1836 }
1837
1838 #[must_use]
1840 pub fn preambles(&self) -> &[Preamble<'a>] {
1841 &self.preambles
1842 }
1843
1844 #[must_use]
1846 pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1847 &mut self.preambles
1848 }
1849
1850 #[must_use]
1852 pub fn comments(&self) -> &[Comment<'a>] {
1853 &self.comments
1854 }
1855
1856 #[must_use]
1858 pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1859 &mut self.comments
1860 }
1861
1862 #[must_use]
1864 pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1865 &self.failed_blocks
1866 }
1867
1868 #[must_use]
1870 pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1871 self.block_order
1872 .iter()
1873 .map(|kind| match *kind {
1874 BlockKind::Entry(index) => Block::Entry(
1875 &self.entries[index],
1876 self.entry_sources
1877 .as_ref()
1878 .and_then(|sources| sources.get(index).copied().flatten()),
1879 ),
1880 BlockKind::String(index) => Block::String(&self.strings[index]),
1881 BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1882 BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1883 BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1884 })
1885 .collect()
1886 }
1887
1888 #[must_use]
1889 pub(crate) fn entry_source(&self, index: usize) -> Option<SourceSpan> {
1890 self.entry_sources
1891 .as_ref()
1892 .and_then(|sources| sources.get(index).copied().flatten())
1893 }
1894
1895 #[must_use]
1896 pub(crate) fn block_kinds(&self) -> &[BlockKind] {
1897 &self.block_order
1898 }
1899
1900 #[must_use]
1902 pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1903 self.entries.iter().find(|e| e.key == key)
1904 }
1905
1906 #[must_use]
1908 pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1909 self.entries
1910 .iter()
1911 .find(|entry| entry.key.eq_ignore_ascii_case(key))
1912 }
1913
1914 #[must_use]
1916 pub fn contains_key(&self, key: &str) -> bool {
1917 self.find_by_key(key).is_some()
1918 }
1919
1920 #[must_use]
1922 pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1923 self.entries
1924 .iter()
1925 .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1926 .collect()
1927 }
1928
1929 #[must_use]
1931 pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1932 self.entries
1933 .iter()
1934 .filter(|e| {
1935 e.get_as_string(field)
1936 .as_ref()
1937 .is_some_and(|v| v.contains(value))
1938 })
1939 .collect()
1940 }
1941
1942 #[must_use]
1944 pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1945 self.entries
1946 .iter()
1947 .filter(|entry| {
1948 entry
1949 .get_as_string_ignore_case(field)
1950 .as_ref()
1951 .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1952 })
1953 .collect()
1954 }
1955
1956 #[must_use]
1958 pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1959 let Some(needle) = normalize_doi(doi) else {
1960 return Vec::new();
1961 };
1962
1963 self.entries
1964 .iter()
1965 .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1966 .collect()
1967 }
1968
1969 fn smart_expand_value_cached(
1971 &self,
1972 value: Value<'a>,
1973 expanded_variables: &mut ExpansionCache<'a>,
1974 expansion_stack: &mut Vec<Cow<'a, str>>,
1975 concat_cache: &mut ConcatCache<'a>,
1976 ) -> Result<Value<'a>> {
1977 match value {
1978 Value::Literal(_) | Value::Number(_) => Ok(value),
1980
1981 Value::Variable(name) => {
1983 let name_text = name.as_ref();
1984 if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1985 return Ok(expanded);
1986 }
1987
1988 if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1989 let mut cycle = expansion_stack
1990 .iter()
1991 .map(std::convert::AsRef::as_ref)
1992 .collect::<Vec<_>>()
1993 .join(" -> ");
1994 if !cycle.is_empty() {
1995 cycle.push_str(" -> ");
1996 }
1997 cycle.push_str(name_text);
1998 return Err(Error::CircularReference(cycle));
1999 }
2000
2001 if let Some(user_value) =
2002 get_string_value(&self.strings, &self.string_lookup, name_text)
2003 {
2004 expansion_stack.push(name.clone());
2006 let expanded = self.smart_expand_value_cached(
2007 user_value.clone(),
2008 expanded_variables,
2009 expansion_stack,
2010 concat_cache,
2011 );
2012 expansion_stack.pop();
2013
2014 let expanded = expanded?;
2015 expanded_variables.insert(name, expanded.clone());
2016 Ok(expanded)
2017 } else {
2018 get_month_expansion(name_text).map_or_else(
2020 || {
2021 Err(Error::UndefinedVariable(name_text.to_string()))
2023 },
2024 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
2025 )
2026 }
2027 }
2028
2029 Value::Concat(parts) => {
2031 if let Some(expanded) = concat_cache.get_cloned(&parts) {
2032 return Ok(expanded);
2033 }
2034
2035 let cache_key = parts.clone();
2036 let expanded = self.expand_concatenation_cached(
2037 parts.into_vec(),
2038 expanded_variables,
2039 expansion_stack,
2040 concat_cache,
2041 )?;
2042 concat_cache.insert(cache_key, expanded.clone());
2043 Ok(expanded)
2044 }
2045 }
2046 }
2047
2048 pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
2050 match value {
2051 Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
2053
2054 Value::Variable(name) => {
2056 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
2058 || {
2059 get_month_expansion(name.as_ref()).map_or_else(
2061 || {
2062 Err(Error::UndefinedVariable(name.as_ref().to_string()))
2064 },
2065 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
2066 )
2067 },
2068 |user_value| self.expand_value_ref(user_value),
2069 )
2070 }
2071
2072 Value::Concat(parts) => {
2074 let cloned_parts = parts.to_vec();
2075 self.expand_concatenation(cloned_parts)
2076 }
2077 }
2078 }
2079
2080 fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
2082 let mut expanded_variables = ExpansionCache::with_capacity(0);
2083 let mut expansion_stack = Vec::new();
2084 let mut concat_cache = ConcatCache::new();
2085 self.expand_concatenation_cached(
2086 parts,
2087 &mut expanded_variables,
2088 &mut expansion_stack,
2089 &mut concat_cache,
2090 )
2091 }
2092
2093 fn expand_concatenation_cached(
2095 &self,
2096 parts: Vec<Value<'a>>,
2097 expanded_variables: &mut ExpansionCache<'a>,
2098 expansion_stack: &mut Vec<Cow<'a, str>>,
2099 concat_cache: &mut ConcatCache<'a>,
2100 ) -> Result<Value<'a>> {
2101 let mut expanded_parts = Vec::with_capacity(parts.len());
2102
2103 for part in parts {
2105 let expanded = self.smart_expand_value_cached(
2106 part,
2107 expanded_variables,
2108 expansion_stack,
2109 concat_cache,
2110 )?;
2111 expanded_parts.push(expanded);
2112 }
2113
2114 if expanded_parts
2116 .iter()
2117 .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
2118 {
2119 let combined = concatenate_simple_values(&expanded_parts);
2120 Ok(Value::Literal(Cow::Owned(combined)))
2121 } else {
2122 Ok(Value::Concat(expanded_parts.into_boxed_slice()))
2123 }
2124 }
2125
2126 pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
2128 match value {
2129 Value::Literal(s) => Ok(s.to_string()),
2130 Value::Number(n) => Ok(n.to_string()),
2131 Value::Variable(name) => {
2132 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
2134 || {
2135 get_month_expansion(name.as_ref()).map_or_else(
2137 || {
2138 Err(Error::UndefinedVariable(name.as_ref().to_string()))
2140 },
2141 |month_value| Ok(month_value.to_string()),
2142 )
2143 },
2144 |user_value| self.get_expanded_string(user_value),
2145 )
2146 }
2147 Value::Concat(parts) => {
2148 let mut result = String::new();
2149 for part in parts.iter() {
2150 result.push_str(&self.get_expanded_string(part)?);
2151 }
2152 Ok(result)
2153 }
2154 }
2155 }
2156
2157 #[must_use]
2159 pub fn into_owned(self) -> Library<'static> {
2160 let strings = self
2161 .strings
2162 .into_iter()
2163 .map(StringDefinition::into_owned)
2164 .collect::<Vec<_>>();
2165 let mut string_lookup = AHashMap::with_capacity(strings.len());
2166 for (index, definition) in strings.iter().enumerate() {
2167 string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
2168 }
2169
2170 Library {
2171 entries: self.entries.into_iter().map(Entry::into_owned).collect(),
2172 entry_sources: self.entry_sources,
2173 strings,
2174 string_lookup,
2175 preambles: self
2176 .preambles
2177 .into_iter()
2178 .map(Preamble::into_owned)
2179 .collect(),
2180 comments: self.comments.into_iter().map(Comment::into_owned).collect(),
2181 failed_blocks: self
2182 .failed_blocks
2183 .into_iter()
2184 .map(FailedBlock::into_owned)
2185 .collect(),
2186 block_order: self.block_order,
2187 }
2188 }
2189
2190 pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
2192 self.push_string_with_source(Cow::Borrowed(name), value, None);
2193 }
2194
2195 pub fn add_entry(&mut self, entry: Entry<'a>) {
2197 self.push_entry_with_source(entry, None);
2198 }
2199
2200 pub fn add_preamble(&mut self, value: Value<'a>) {
2202 self.push_preamble_with_source(value, None);
2203 }
2204
2205 pub fn add_comment(&mut self, comment: &'a str) {
2207 self.push_comment_with_source(Cow::Borrowed(comment), None);
2208 }
2209
2210 pub fn resolve_strings(&mut self) -> Result<()> {
2212 let has_user_strings = !self.strings.is_empty();
2213 let month_constants_shadowed =
2214 has_user_strings && user_strings_shadow_month_constants(&self.strings);
2215 let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
2216 let mut expansion_stack = Vec::new();
2217 let mut concat_cache = ConcatCache::new();
2218
2219 for entry_index in 0..self.entries.len() {
2220 let field_count = self.entries[entry_index].fields.len();
2221 for field_index in 0..field_count {
2222 let mut value =
2223 std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
2224 self.expand_value_for_parse(
2225 &mut value,
2226 has_user_strings,
2227 month_constants_shadowed,
2228 &mut expanded_variables,
2229 &mut expansion_stack,
2230 &mut concat_cache,
2231 )?;
2232 self.entries[entry_index].fields[field_index].value = value;
2233 }
2234 }
2235
2236 for preamble_index in 0..self.preambles.len() {
2237 let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
2238 self.expand_value_for_parse(
2239 &mut value,
2240 has_user_strings,
2241 month_constants_shadowed,
2242 &mut expanded_variables,
2243 &mut expansion_stack,
2244 &mut concat_cache,
2245 )?;
2246 self.preambles[preamble_index].value = value;
2247 }
2248
2249 Ok(())
2250 }
2251
2252 pub fn normalize_doi_fields(&mut self) {
2254 for entry in &mut self.entries {
2255 for field in &mut entry.fields {
2256 if field.name.eq_ignore_ascii_case("doi") {
2257 if let Some(normalized) = normalize_doi(&field.value.to_plain_string()) {
2258 field.value = Value::Literal(Cow::Owned(normalized));
2259 }
2260 }
2261 }
2262 }
2263 }
2264
2265 pub fn normalize_months(&mut self, style: MonthStyle) {
2267 for entry in &mut self.entries {
2268 for field in &mut entry.fields {
2269 if field.name.eq_ignore_ascii_case("month") {
2270 if let Some(month) =
2271 normalize_month_value(&field.value.to_plain_string(), style)
2272 {
2273 field.value = month;
2274 }
2275 }
2276 }
2277 }
2278 }
2279
2280 pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
2282 for entry in &mut self.entries {
2283 for field in &mut entry.fields {
2284 let mut name = if options.biblatex_aliases {
2285 canonical_biblatex_field_alias(&field.name)
2286 .unwrap_or_else(|| field.name.as_ref())
2287 .to_string()
2288 } else {
2289 field.name.to_string()
2290 };
2291
2292 if options.name_case == FieldNameCase::Lowercase {
2293 name.make_ascii_lowercase();
2294 }
2295
2296 if name != field.name {
2297 field.name = Cow::Owned(name);
2298 }
2299 }
2300 }
2301 }
2302
2303 pub fn sort(&mut self, options: SortOptions) {
2305 if options.fields_by_name {
2306 for entry in &mut self.entries {
2307 entry
2308 .fields
2309 .sort_by(|left, right| left.name.cmp(&right.name));
2310 }
2311 }
2312
2313 if options.entries_by_key {
2314 if let Some(sources) = self.entry_sources.take() {
2315 let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
2316 entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
2317 let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
2318 entries.into_iter().unzip();
2319 self.entries = sorted_entries;
2320 self.entry_sources = Some(sorted_sources);
2321 } else {
2322 self.entries.sort_by(|left, right| left.key.cmp(&right.key));
2323 }
2324 self.rebuild_grouped_block_order();
2325 }
2326 }
2327
2328 fn rebuild_grouped_block_order(&mut self) {
2329 self.block_order.clear();
2330 self.block_order
2331 .extend((0..self.strings.len()).map(BlockKind::String));
2332 self.block_order
2333 .extend((0..self.preambles.len()).map(BlockKind::Preamble));
2334 self.block_order
2335 .extend((0..self.comments.len()).map(BlockKind::Comment));
2336 self.block_order
2337 .extend((0..self.entries.len()).map(BlockKind::Entry));
2338 self.block_order
2339 .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
2340 }
2341
2342 #[must_use]
2345 pub fn validate(
2346 &self,
2347 level: ValidationLevel,
2348 ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
2349 let mut invalid_entries = Vec::new();
2350
2351 for (index, entry) in self.entries.iter().enumerate() {
2352 if let Err(errors) = entry.validate(level) {
2353 invalid_entries.push((index, entry, errors));
2354 }
2355 }
2356
2357 invalid_entries
2358 }
2359
2360 #[must_use]
2363 pub fn find_duplicate_keys(&self) -> Vec<&str> {
2364 let mut seen = std::collections::HashSet::new();
2365 let mut duplicates = std::collections::HashSet::new();
2366
2367 for entry in &self.entries {
2368 if !seen.insert(entry.key()) {
2369 duplicates.insert(entry.key());
2370 }
2371 }
2372
2373 duplicates.into_iter().collect()
2374 }
2375
2376 #[must_use]
2378 pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
2379 let mut seen = std::collections::HashSet::new();
2380 let mut duplicates = std::collections::HashSet::new();
2381
2382 for entry in &self.entries {
2383 let normalized_key = entry.key().to_ascii_lowercase();
2384 if !seen.insert(normalized_key.clone()) {
2385 duplicates.insert(normalized_key);
2386 }
2387 }
2388
2389 duplicates.into_iter().collect()
2390 }
2391
2392 #[must_use]
2394 pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
2395 let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
2396 for entry in &self.entries {
2397 if let Some(doi) = entry.doi() {
2398 groups.entry(doi).or_default().push(entry);
2399 }
2400 }
2401
2402 groups
2403 .into_iter()
2404 .filter(|(_, entries)| entries.len() > 1)
2405 .collect()
2406 }
2407
2408 #[must_use]
2410 pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
2411 let invalid_entries = self.validate(level);
2412 let duplicate_keys = self.find_duplicate_keys();
2413 let empty_entries = self.find_empty_entries();
2414
2415 ValidationReport {
2416 invalid_entries,
2417 duplicate_keys,
2418 empty_entries,
2419 total_entries: self.entries.len(),
2420 validation_level: level,
2421 }
2422 }
2423
2424 fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
2426 self.entries
2427 .iter()
2428 .enumerate()
2429 .filter(|(_, entry)| entry.fields().is_empty())
2430 .collect()
2431 }
2432
2433 #[must_use]
2435 pub fn stats(&self) -> LibraryStats {
2436 let mut type_counts = AHashMap::new();
2437 for entry in &self.entries {
2438 *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
2439 }
2440
2441 LibraryStats {
2442 total_entries: self.entries.len(),
2443 total_strings: self.strings.len(),
2444 total_preambles: self.preambles.len(),
2445 total_comments: self.comments.len(),
2446 entries_by_type: type_counts,
2447 }
2448 }
2449}
2450
2451#[derive(Debug, Clone)]
2453pub struct LibraryStats {
2454 pub total_entries: usize,
2456 pub total_strings: usize,
2458 pub total_preambles: usize,
2460 pub total_comments: usize,
2462 pub entries_by_type: AHashMap<String, usize>,
2464}
2465
2466#[derive(Debug, Clone)]
2468pub struct ValidationReport<'a> {
2469 pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
2471 pub duplicate_keys: Vec<&'a str>,
2473 pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
2475 pub total_entries: usize,
2477 pub validation_level: ValidationLevel,
2479}
2480
2481impl ValidationReport<'_> {
2482 #[must_use]
2484 pub fn is_valid(&self) -> bool {
2485 self.invalid_entries.is_empty()
2486 && self.duplicate_keys.is_empty()
2487 && self.empty_entries.is_empty()
2488 }
2489
2490 #[must_use]
2492 pub fn total_issues(&self) -> usize {
2493 self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
2494 }
2495
2496 #[must_use]
2498 pub fn issue_summary(&self) -> IssueSummary {
2499 let mut errors = 0;
2500 let mut warnings = 0;
2501 let mut infos = 0;
2502
2503 for (_, _, validation_errors) in &self.invalid_entries {
2504 for error in validation_errors {
2505 match error.severity {
2506 crate::model::ValidationSeverity::Error => errors += 1,
2507 crate::model::ValidationSeverity::Warning => warnings += 1,
2508 crate::model::ValidationSeverity::Info => infos += 1,
2509 }
2510 }
2511 }
2512
2513 errors += self.duplicate_keys.len() + self.empty_entries.len();
2515
2516 IssueSummary {
2517 errors,
2518 warnings,
2519 infos,
2520 }
2521 }
2522}
2523
2524#[derive(Debug, Clone, PartialEq, Eq)]
2526pub struct IssueSummary {
2527 pub errors: usize,
2529 pub warnings: usize,
2531 pub infos: usize,
2533}
2534
2535fn concatenate_simple_values(values: &[Value]) -> String {
2537 let mut result = String::new();
2538
2539 let capacity: usize = values
2541 .iter()
2542 .map(|v| match v {
2543 Value::Literal(s) => s.len(),
2544 Value::Number(n) => n.to_string().len(),
2545 _ => 0,
2546 })
2547 .sum();
2548
2549 result.reserve(capacity);
2550
2551 for value in values {
2552 match value {
2553 Value::Literal(s) => result.push_str(s),
2554 Value::Number(n) => result.push_str(&n.to_string()),
2555 _ => {} }
2557 }
2558
2559 result
2560}
2561
2562fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2563 if needle.is_empty() {
2564 return true;
2565 }
2566
2567 haystack.to_lowercase().contains(&needle.to_lowercase())
2568}
2569
2570fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
2571 let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
2572 let month_index = match normalized.as_str() {
2573 "jan" | "january" | "1" | "01" => 1,
2574 "feb" | "february" | "2" | "02" => 2,
2575 "mar" | "march" | "3" | "03" => 3,
2576 "apr" | "april" | "4" | "04" => 4,
2577 "may" | "5" | "05" => 5,
2578 "jun" | "june" | "6" | "06" => 6,
2579 "jul" | "july" | "7" | "07" => 7,
2580 "aug" | "august" | "8" | "08" => 8,
2581 "sep" | "september" | "9" | "09" => 9,
2582 "oct" | "october" | "10" => 10,
2583 "nov" | "november" | "11" => 11,
2584 "dec" | "december" | "12" => 12,
2585 _ => return None,
2586 };
2587
2588 let text = match style {
2589 MonthStyle::Long => month_long_name(month_index),
2590 MonthStyle::Abbrev => month_abbreviation(month_index),
2591 MonthStyle::Number => return Some(Value::Number(month_index)),
2592 };
2593
2594 Some(Value::Literal(Cow::Borrowed(text)))
2595}
2596
2597const fn month_long_name(month: i64) -> &'static str {
2598 match month {
2599 1 => "January",
2600 2 => "February",
2601 3 => "March",
2602 4 => "April",
2603 5 => "May",
2604 6 => "June",
2605 7 => "July",
2606 8 => "August",
2607 9 => "September",
2608 10 => "October",
2609 11 => "November",
2610 12 => "December",
2611 _ => "",
2612 }
2613}
2614
2615const fn month_abbreviation(month: i64) -> &'static str {
2616 match month {
2617 1 => "jan",
2618 2 => "feb",
2619 3 => "mar",
2620 4 => "apr",
2621 5 => "may",
2622 6 => "jun",
2623 7 => "jul",
2624 8 => "aug",
2625 9 => "sep",
2626 10 => "oct",
2627 11 => "nov",
2628 12 => "dec",
2629 _ => "",
2630 }
2631}
2632
2633#[derive(Debug, Default)]
2635pub struct LibraryBuilder<'a> {
2636 library: Library<'a>,
2637}
2638
2639impl<'a> LibraryBuilder<'a> {
2640 #[must_use]
2642 pub fn new() -> Self {
2643 Self::default()
2644 }
2645
2646 #[must_use]
2648 pub fn entry(mut self, entry: Entry<'a>) -> Self {
2649 self.library.add_entry(entry);
2650 self
2651 }
2652
2653 #[must_use]
2655 pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2656 self.library.add_string(name, value);
2657 self
2658 }
2659
2660 #[must_use]
2662 pub fn preamble(mut self, value: Value<'a>) -> Self {
2663 self.library.add_preamble(value);
2664 self
2665 }
2666
2667 #[must_use]
2669 pub fn comment(mut self, text: &'a str) -> Self {
2670 self.library.add_comment(text);
2671 self
2672 }
2673
2674 #[must_use]
2676 pub fn build(self) -> Library<'a> {
2677 self.library
2678 }
2679}
2680
2681#[cfg(test)]
2682mod tests {
2683 use super::*;
2684 use crate::model::{EntryType, Field};
2685
2686 #[test]
2687 fn test_library_parse() {
2688 let input = r#"
2689 @string{me = "John Doe"}
2690
2691 @article{test2023,
2692 author = me,
2693 title = "Test Article",
2694 year = 2023
2695 }
2696 "#;
2697
2698 let library = Library::parser().parse(input).unwrap();
2699 assert_eq!(library.entries().len(), 1);
2700 assert_eq!(library.strings().len(), 1);
2701
2702 let entry = &library.entries()[0];
2703 assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2705 }
2706
2707 #[test]
2708 fn test_zero_copy_preservation() {
2709 let input = r#"
2710 @article{test,
2711 title = "This is borrowed",
2712 year = 2023
2713 }
2714 "#;
2715
2716 let library = Library::parser().parse(input).unwrap();
2717 let entry = &library.entries()[0];
2718
2719 if let Some(Value::Literal(cow)) = entry
2721 .fields
2722 .iter()
2723 .find(|f| f.name == "title")
2724 .map(|f| &f.value)
2725 {
2726 assert!(matches!(cow, Cow::Borrowed(_)));
2727 }
2728 }
2729
2730 #[test]
2731 fn test_concatenation_creates_owned() {
2732 let input = r#"
2733 @string{first = "Hello"}
2734 @string{second = "World"}
2735
2736 @article{test,
2737 title = first # ", " # second
2738 }
2739 "#;
2740
2741 let library = Library::parser().parse(input).unwrap();
2742 let entry = &library.entries()[0];
2743
2744 assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2746 }
2747
2748 #[test]
2749 fn test_boxed_concat_memory_optimization() {
2750 assert!(
2752 std::mem::size_of::<Value>() <= 32,
2753 "Value enum is {} bytes, should be 32 or less",
2754 std::mem::size_of::<Value>()
2755 );
2756 }
2757
2758 #[test]
2759 fn test_field_vec_capacity_bounded() {
2760 let input = r#"
2761 @article{test,
2762 a = "1", b = "2", c = "3", d = "4", e = "5",
2763 f = "6", g = "7", h = "8", i = "9", j = "10"
2764 }
2765 "#;
2766
2767 let library = Library::parser().parse(input).unwrap();
2768 let entry = &library.entries()[0];
2769
2770 assert_eq!(entry.fields.len(), 10);
2771 assert!(
2772 entry.fields.capacity() <= 17,
2773 "Unexpected field Vec growth: len={}, capacity={}",
2774 entry.fields.len(),
2775 entry.fields.capacity()
2776 );
2777 }
2778
2779 #[test]
2780 fn test_library_builder() {
2781 let library = LibraryBuilder::new()
2782 .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2783 .entry(Entry {
2784 ty: EntryType::Article,
2785 key: Cow::Borrowed("test2023"),
2786 fields: vec![
2787 Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2788 Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2789 ],
2790 })
2791 .build();
2792
2793 assert_eq!(library.entries().len(), 1);
2794 assert_eq!(library.strings().len(), 1);
2795 }
2796
2797 #[test]
2798 fn test_library_stats() {
2799 let input = r#"
2800 @string{ieee = "IEEE"}
2801 @preamble{"Test preamble"}
2802 % This is a percent comment that now works properly
2803 @comment{This is a formal comment that works}
2804 @article{a1, title = "Article 1"}
2805 @article{a2, title = "Article 2"}
2806 @book{b1, title = "Book 1"}
2807 "#;
2808
2809 let library = Library::parser().parse(input).unwrap();
2810 let stats = library.stats();
2811
2812 assert_eq!(stats.total_entries, 3);
2813 assert_eq!(stats.total_strings, 1);
2814 assert_eq!(stats.total_preambles, 1);
2815 assert_eq!(stats.total_comments, 2); assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2817 assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2818 }
2819
2820 #[test]
2821 fn test_parse_files_parallel() {
2822 use std::fs::write;
2823 use std::path::PathBuf;
2824
2825 let dir = std::env::temp_dir();
2826 let path1 = dir.join("parallel_test1.bib");
2827 let path2 = dir.join("parallel_test2.bib");
2828
2829 write(&path1, "@article{a1,title=\"A\"}").unwrap();
2830 write(&path2, "@article{a2,title=\"B\"}").unwrap();
2831
2832 let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2833
2834 let library = Library::parser().threads(2).parse_files(&paths).unwrap();
2835
2836 assert_eq!(library.entries().len(), 2);
2837
2838 let _ = std::fs::remove_file(path1);
2839 let _ = std::fs::remove_file(path2);
2840 }
2841
2842 #[test]
2843 fn test_builder_pattern_api() {
2844 let input = "@article{test, title = \"Test\"}";
2845
2846 let db1 = Library::parser().parse(input).unwrap();
2848 assert_eq!(db1.entries().len(), 1);
2849
2850 let library2 = Library::parser().threads(1).parse(input).unwrap();
2852 assert_eq!(library2.entries().len(), 1);
2853
2854 #[cfg(feature = "parallel")]
2855 {
2856 use std::fs::write;
2857
2858 let db3 = Library::parser().threads(4).parse(input).unwrap();
2860 assert_eq!(db3.entries().len(), 1);
2861
2862 let dir = std::env::temp_dir();
2864 let path1 = dir.join(format!("bibtex-parser-test1-{}.bib", std::process::id()));
2865 let path2 = dir.join(format!("bibtex-parser-test2-{}.bib", std::process::id()));
2866 write(&path1, "@article{a1, title=\"A\"}").unwrap();
2867 write(&path2, "@article{a2, title=\"B\"}").unwrap();
2868
2869 let db4 = Library::parser()
2870 .threads(2)
2871 .parse_files(&[path1.as_path(), path2.as_path()])
2872 .unwrap();
2873 assert_eq!(db4.entries().len(), 2);
2874
2875 let _ = std::fs::remove_file(path1);
2876 let _ = std::fs::remove_file(path2);
2877 }
2878 }
2879}