1use crate::{
4 canonical_biblatex_field_alias, normalize_doi, CorpusEvent, CorpusSource, Entry, Error,
5 ParseEvent, ParseFlow, ParsedComment, ParsedCorpus, ParsedDocument, ParsedEntry,
6 ParsedFailedBlock, ParsedPreamble, ParsedSource, ParsedString, Result, SourceId, SourceMap,
7 SourceSpan, StreamingSummary, ValidationError, ValidationLevel, Value,
8};
9use ahash::AHashMap;
10use memchr::memchr;
11use std::borrow::Cow;
12use std::ops::Deref;
13use std::path::Path;
14
15#[cfg(feature = "parallel")]
16use rayon::prelude::*;
17
18const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
19const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
20const CONCAT_CACHE_LIMIT: usize = 16;
21
22enum ExpansionCache<'a> {
23 Small(Vec<(Cow<'a, str>, Value<'a>)>),
24 Large(AHashMap<Cow<'a, str>, Value<'a>>),
25}
26
27impl<'a> ExpansionCache<'a> {
28 fn with_capacity(capacity: usize) -> Self {
29 if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
30 Self::Small(Vec::with_capacity(capacity))
31 } else {
32 Self::Large(AHashMap::with_capacity(capacity))
33 }
34 }
35
36 fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
37 match self {
38 Self::Small(entries) => {
39 let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
40 if index != 0 {
41 entries.swap(0, index);
42 }
43 Some(entries[0].1.clone())
44 }
45 Self::Large(entries) => entries.get(name).cloned(),
46 }
47 }
48
49 fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
50 match self {
51 Self::Small(entries) => {
52 if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
53 entries.push((name, value));
54 } else {
55 let mut large = AHashMap::with_capacity(entries.len() + 1);
56 for (key, value) in entries.drain(..) {
57 large.insert(key, value);
58 }
59 large.insert(name, value);
60 *self = Self::Large(large);
61 }
62 }
63 Self::Large(entries) => {
64 entries.insert(name, value);
65 }
66 }
67 }
68}
69
70struct ConcatCache<'a> {
71 entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
72}
73
74impl<'a> ConcatCache<'a> {
75 const fn new() -> Self {
76 Self {
77 entries: Vec::new(),
78 }
79 }
80
81 fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
82 let index = self
83 .entries
84 .iter()
85 .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
86 if index != 0 {
87 self.entries.swap(0, index);
88 }
89 Some(self.entries[0].1.clone())
90 }
91
92 fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
93 if self.entries.len() < CONCAT_CACHE_LIMIT {
94 self.entries.push((parts, value));
95 }
96 }
97}
98
99fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
100 left.len() == right.len()
101 && left
102 .iter()
103 .zip(right)
104 .all(|(left, right)| cache_values_equal(left, right))
105}
106
107fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
108 match (left, right) {
109 (Value::Literal(left), Value::Literal(right))
110 | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
111 (Value::Number(left), Value::Number(right)) => left == right,
112 (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
113 _ => false,
114 }
115}
116
117#[inline]
122fn get_month_expansion(name: &str) -> Option<&'static str> {
123 let bytes = name.as_bytes();
124 if bytes.len() != 3 {
125 return None;
126 }
127
128 let key = (u32::from(bytes[0] | 0x20) << 16)
129 | (u32::from(bytes[1] | 0x20) << 8)
130 | u32::from(bytes[2] | 0x20);
131
132 match key {
133 0x6a_61_6e => Some("January"),
134 0x66_65_62 => Some("February"),
135 0x6d_61_72 => Some("March"),
136 0x61_70_72 => Some("April"),
137 0x6d_61_79 => Some("May"),
138 0x6a_75_6e => Some("June"),
139 0x6a_75_6c => Some("July"),
140 0x61_75_67 => Some("August"),
141 0x73_65_70 => Some("September"),
142 0x6f_63_74 => Some("October"),
143 0x6e_6f_76 => Some("November"),
144 0x64_65_63 => Some("December"),
145 _ => None,
146 }
147}
148
149#[inline]
150fn get_string_value<'map, 'a>(
151 strings: &'map [StringDefinition<'a>],
152 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
153 name: &str,
154) -> Option<&'map Value<'a>> {
155 get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
156}
157
158#[inline]
159fn get_string_definition<'map, 'a>(
160 strings: &'map [StringDefinition<'a>],
161 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
162 name: &str,
163) -> Option<&'map StringDefinition<'a>> {
164 if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
165 strings
166 .iter()
167 .rev()
168 .find(|definition| definition.name.as_ref() == name)
169 } else {
170 string_lookup
171 .get(name)
172 .and_then(|&index| strings.get(index))
173 }
174}
175
176#[inline]
177fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
178 strings
179 .iter()
180 .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
181}
182
183#[inline]
185fn contains_variables(value: &Value) -> bool {
186 match value {
187 Value::Variable(_) => true,
188 Value::Concat(parts) => parts.iter().any(contains_variables),
189 _ => false,
190 }
191}
192
193#[inline]
195fn contains_potential_month_variables(value: &Value) -> bool {
196 match value {
197 Value::Variable(name) => get_month_expansion(name).is_some(),
198 Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
199 _ => false,
200 }
201}
202
203#[inline]
204const fn is_identifier_char(byte: u8) -> bool {
205 matches!(
206 byte,
207 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
208 )
209}
210
211#[inline]
212fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
213 if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
214 return false;
215 }
216
217 for (offset, &expected) in keyword.iter().enumerate() {
218 if (input[offset + 1] | 0x20) != expected {
219 return false;
220 }
221 }
222
223 if input.len() == keyword.len() + 1 {
224 return true;
225 }
226
227 !is_identifier_char(input[keyword.len() + 1])
228}
229
230#[derive(Debug, Clone, Copy)]
231struct InputScan {
232 may_contain_string_definition: bool,
233 at_count: usize,
234}
235
236fn scan_input(input: &str) -> InputScan {
238 let bytes = input.as_bytes();
239 let mut pos = 0;
240 let mut at_count = 0;
241 let mut may_contain_string_definition = false;
242
243 while pos < bytes.len() {
244 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
245 let at = pos + offset;
246 at_count += 1;
247 if starts_with_at_keyword(&bytes[at..], b"string") {
248 may_contain_string_definition = true;
249 }
250 pos = at + 1;
251 } else {
252 break;
253 }
254 }
255
256 InputScan {
257 may_contain_string_definition,
258 at_count,
259 }
260}
261
262fn input_may_have_late_string_definition(input: &str) -> bool {
267 let bytes = input.as_bytes();
268 let mut pos = 0;
269 let mut saw_regular_entry = false;
270
271 while pos < bytes.len() {
272 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
273 let at = pos + offset;
274 let tail = &bytes[at..];
275
276 if starts_with_at_keyword(tail, b"string") {
277 if saw_regular_entry {
278 return true;
279 }
280 } else if !saw_regular_entry
281 && !starts_with_at_keyword(tail, b"preamble")
282 && !starts_with_at_keyword(tail, b"comment")
283 {
284 saw_regular_entry = true;
286 }
287
288 pos = at + 1;
289 } else {
290 break;
291 }
292 }
293
294 false
295}
296
297fn next_recovery_boundary(input: &str, start: usize) -> usize {
298 let bytes = input.as_bytes();
299 let mut pos = start.saturating_add(1);
300 while pos < bytes.len() {
301 if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
302 return pos;
303 }
304 pos += 1;
305 }
306 input.len()
307}
308
309fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
310 let line_start = bytes[..pos]
311 .iter()
312 .rposition(|byte| matches!(byte, b'\n' | b'\r'))
313 .map_or(0, |index| index + 1);
314
315 bytes[line_start..pos]
316 .iter()
317 .all(|byte| matches!(byte, b' ' | b'\t'))
318}
319
320fn merge_streaming_summary(total: &mut StreamingSummary, source: StreamingSummary) {
321 total.entries += source.entries;
322 total.strings += source.strings;
323 total.preambles += source.preambles;
324 total.comments += source.comments;
325 total.failed_blocks += source.failed_blocks;
326 total.warnings += source.warnings;
327 total.errors += source.errors;
328 total.infos += source.infos;
329 total.recovered_blocks += source.recovered_blocks;
330 total.stopped |= source.stopped;
331}
332
333#[derive(Debug, Default, Clone)]
335pub struct Parser {
336 threads: Option<usize>,
337 tolerant: bool,
338 document: DocumentOptions,
339}
340
341#[derive(Debug, Default, Clone, Copy)]
342struct DocumentOptions {
343 capture_source: bool,
344 preserve_raw: bool,
345 expand_values: bool,
346}
347
348impl Parser {
349 #[must_use]
351 #[inline]
352 pub fn new() -> Self {
353 Self::default()
354 }
355
356 #[must_use]
358 #[inline]
359 pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
360 self.threads = threads.into();
361 self
362 }
363
364 #[must_use]
366 #[inline]
367 pub const fn tolerant(mut self) -> Self {
368 self.tolerant = true;
369 self
370 }
371
372 #[must_use]
374 #[inline]
375 pub const fn capture_source(mut self) -> Self {
376 self.document.capture_source = true;
377 self
378 }
379
380 #[must_use]
382 #[inline]
383 pub const fn preserve_raw(mut self) -> Self {
384 self.document.preserve_raw = true;
385 self
386 }
387
388 #[must_use]
390 #[inline]
391 pub const fn expand_values(mut self) -> Self {
392 self.document.expand_values = true;
393 self
394 }
395
396 #[inline]
398 pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
399 if self.tolerant {
400 Library::parse_tolerant(input, self.document.capture_source)
401 } else if self.document.capture_source {
402 Library::parse_with_spans(input)
403 } else {
404 Library::parse_sequential(input)
405 }
406 }
407
408 #[inline]
414 pub fn parse_document<'a>(&self, input: &'a str) -> Result<ParsedDocument<'a>> {
415 self.parse_document_with_source_id(SourceId::new(0), None, input)
416 }
417
418 #[inline]
423 pub fn parse_source<'a>(
424 &self,
425 source_name: impl Into<Cow<'a, str>>,
426 input: &'a str,
427 ) -> Result<ParsedDocument<'a>> {
428 self.parse_document_with_source_id(SourceId::new(0), Some(source_name.into()), input)
429 }
430
431 pub fn parse_sources<'a>(&self, sources: &[CorpusSource<'a>]) -> Result<ParsedCorpus<'a>> {
433 let mut documents = Vec::with_capacity(sources.len());
434 for (index, source) in sources.iter().enumerate() {
435 documents.push(self.parse_document_with_source_id(
436 SourceId::new(index),
437 Some(Cow::Borrowed(source.name)),
438 source.input,
439 )?);
440 }
441
442 Ok(ParsedCorpus::from_documents(documents))
443 }
444
445 #[inline]
452 pub fn parse_events<'a, F>(&self, input: &'a str, on_event: F) -> Result<StreamingSummary>
453 where
454 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
455 {
456 self.parse_source_events_with_source(SourceId::new(0), None, input, on_event)
457 }
458
459 #[inline]
461 pub fn parse_source_events<'a, F>(
462 &self,
463 source_name: impl Into<Cow<'a, str>>,
464 input: &'a str,
465 on_event: F,
466 ) -> Result<StreamingSummary>
467 where
468 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
469 {
470 self.parse_source_events_with_source(
471 SourceId::new(0),
472 Some(source_name.into()),
473 input,
474 on_event,
475 )
476 }
477
478 pub fn parse_corpus_events<'a, F>(
480 &self,
481 sources: &[CorpusSource<'a>],
482 mut on_event: F,
483 ) -> Result<StreamingSummary>
484 where
485 F: FnMut(CorpusEvent<'a>) -> Result<ParseFlow>,
486 {
487 let mut summary = StreamingSummary::default();
488
489 for (index, source) in sources.iter().enumerate() {
490 if summary.stopped {
491 break;
492 }
493
494 let source_id = SourceId::new(index);
495 let parsed_source = ParsedSource {
496 id: source_id,
497 name: Some(Cow::Borrowed(source.name)),
498 };
499 if on_event(CorpusEvent::SourceStart(parsed_source.clone()))? == ParseFlow::Stop {
500 summary.stopped = true;
501 break;
502 }
503
504 let source_summary = self.parse_source_events_with_source(
505 source_id,
506 Some(Cow::Borrowed(source.name)),
507 source.input,
508 |event| {
509 on_event(CorpusEvent::Event {
510 source: source_id,
511 event: Box::new(event),
512 })
513 },
514 )?;
515 merge_streaming_summary(&mut summary, source_summary);
516
517 if on_event(CorpusEvent::SourceEnd(parsed_source))? == ParseFlow::Stop {
518 summary.stopped = true;
519 }
520 }
521
522 summary.finalize_status();
523 Ok(summary)
524 }
525
526 fn parse_source_events_with_source<'a, F>(
527 &self,
528 source_id: SourceId,
529 source_name: Option<Cow<'a, str>>,
530 input: &'a str,
531 mut on_event: F,
532 ) -> Result<StreamingSummary>
533 where
534 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
535 {
536 let source_map = SourceMap::new(Some(source_id), source_name, input);
537 let mut summary = StreamingSummary::default();
538
539 if self.tolerant {
540 self.parse_tolerant_events(input, &source_map, &mut summary, &mut on_event)?;
541 } else {
542 crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
543 let source = source_map.span(span.byte_start, span.byte_end);
544 self.emit_parsed_event(item, source, raw, &source_map, &mut summary, &mut on_event)
545 })?;
546 }
547
548 summary.finalize_status();
549 Ok(summary)
550 }
551
552 fn parse_tolerant_events<'a, F>(
553 &self,
554 input: &'a str,
555 source_map: &SourceMap<'a>,
556 summary: &mut StreamingSummary,
557 on_event: &mut F,
558 ) -> Result<()>
559 where
560 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
561 {
562 let mut remaining = input;
563
564 loop {
565 crate::parser::lexer::skip_whitespace(&mut remaining);
566 if remaining.is_empty() || summary.stopped {
567 break;
568 }
569
570 let start = input.len() - remaining.len();
571 match crate::parser::parse_item(&mut remaining) {
572 Ok(item) => {
573 let end = input.len() - remaining.len();
574 let source = source_map.span(start, end);
575 self.emit_parsed_event(
576 item,
577 source,
578 &input[start..end],
579 source_map,
580 summary,
581 on_event,
582 )?;
583 }
584 Err(err) => {
585 let end = next_recovery_boundary(input, start);
586 let failed = FailedBlock {
587 raw: Cow::Borrowed(&input[start..end]),
588 error: format!("Failed to parse entry: {err}"),
589 source: Some(source_map.span(start, end)),
590 };
591 let failed_index = summary.failed_blocks;
592 let failed = ParsedFailedBlock::from_failed_block(
593 failed_index,
594 failed,
595 Some(source_map),
596 );
597 if let Some(partial) = crate::document::recover_partial_stream_entry(
598 &failed,
599 source_map,
600 summary.entries,
601 self.document.preserve_raw,
602 ) {
603 Self::emit_event(ParseEvent::Entry(partial), summary, on_event)?;
604 } else {
605 Self::emit_event(ParseEvent::Failed(failed), summary, on_event)?;
606 }
607 remaining = &input[end..];
608 }
609 }
610 }
611
612 Ok(())
613 }
614
615 fn emit_parsed_event<'a, F>(
616 &self,
617 item: crate::parser::ParsedItem<'a>,
618 source: SourceSpan,
619 raw: &'a str,
620 source_map: &SourceMap<'a>,
621 summary: &mut StreamingSummary,
622 on_event: &mut F,
623 ) -> Result<()>
624 where
625 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
626 {
627 if summary.stopped {
628 return Ok(());
629 }
630
631 let event = match item {
632 crate::parser::ParsedItem::Entry(entry) => {
633 ParseEvent::Entry(ParsedEntry::from_stream_entry(
634 entry,
635 source,
636 raw,
637 source_map,
638 self.document.preserve_raw,
639 ))
640 }
641 crate::parser::ParsedItem::String(name, value) => {
642 ParseEvent::String(ParsedString::from_stream_definition(
643 name,
644 value,
645 source,
646 raw,
647 self.document.preserve_raw,
648 ))
649 }
650 crate::parser::ParsedItem::Preamble(value) => {
651 ParseEvent::Preamble(ParsedPreamble::from_stream_preamble(
652 value,
653 source,
654 raw,
655 self.document.preserve_raw,
656 ))
657 }
658 crate::parser::ParsedItem::Comment(text) => ParseEvent::Comment(
659 ParsedComment::from_stream_comment(text, source, raw, self.document.preserve_raw),
660 ),
661 };
662
663 Self::emit_event(event, summary, on_event)
664 }
665
666 fn emit_event<'a, F>(
667 event: ParseEvent<'a>,
668 summary: &mut StreamingSummary,
669 on_event: &mut F,
670 ) -> Result<()>
671 where
672 F: FnMut(ParseEvent<'a>) -> Result<ParseFlow>,
673 {
674 if summary.stopped {
675 return Ok(());
676 }
677
678 let diagnostics = match &event {
679 ParseEvent::Entry(entry) => {
680 summary.entries += 1;
681 if entry.status == crate::ParsedEntryStatus::Partial {
682 summary.recovered_blocks += 1;
683 }
684 entry.diagnostics.clone()
685 }
686 ParseEvent::String(_) => {
687 summary.strings += 1;
688 Vec::new()
689 }
690 ParseEvent::Preamble(_) => {
691 summary.preambles += 1;
692 Vec::new()
693 }
694 ParseEvent::Comment(_) => {
695 summary.comments += 1;
696 Vec::new()
697 }
698 ParseEvent::Failed(failed) => {
699 summary.failed_blocks += 1;
700 failed.diagnostics.clone()
701 }
702 ParseEvent::Diagnostic(diagnostic) => {
703 summary.count_diagnostic(diagnostic);
704 Vec::new()
705 }
706 };
707 for diagnostic in &diagnostics {
708 summary.count_diagnostic(diagnostic);
709 }
710
711 if on_event(event)? == ParseFlow::Stop {
712 summary.stopped = true;
713 return Ok(());
714 }
715
716 for diagnostic in diagnostics {
717 if on_event(ParseEvent::Diagnostic(diagnostic))? == ParseFlow::Stop {
718 summary.stopped = true;
719 break;
720 }
721 }
722
723 Ok(())
724 }
725
726 fn parse_document_with_source_id<'a>(
727 &self,
728 source_id: SourceId,
729 source_name: Option<Cow<'a, str>>,
730 input: &'a str,
731 ) -> Result<ParsedDocument<'a>> {
732 let source_map = SourceMap::new(Some(source_id), source_name.clone(), input);
733 let sources = vec![ParsedSource {
734 id: source_id,
735 name: source_name,
736 }];
737 let raw_items = if self.tolerant {
738 Library::parse_tolerant_raw_items(input, true, &source_map)
739 } else {
740 match Library::parse_raw_items_with_source(input, &source_map) {
741 Ok(raw_items) => raw_items,
742 Err(error) => {
743 return Ok(ParsedDocument::failed_from_error(
744 sources,
745 &source_map,
746 &error,
747 ));
748 }
749 }
750 };
751 let library = match Library::from_raw_items(raw_items.clone()) {
752 Ok(library) => library,
753 Err(Error::UndefinedVariable(_) | Error::CircularReference(_))
754 if !self.document.expand_values =>
755 {
756 Library::from_raw_items_unexpanded(raw_items.clone())
757 }
758 Err(error) => return Err(error),
759 };
760 let mut document =
761 ParsedDocument::from_library_with_source_map(library, sources, Some(&source_map));
762 let mut entry_index = 0;
763 for raw_item in &raw_items {
764 if let RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, raw) = raw_item {
765 document.apply_entry_locations(
766 entry_index,
767 raw,
768 &source_map,
769 self.document.preserve_raw,
770 );
771 entry_index += 1;
772 }
773 }
774 document.apply_parsed_values(&raw_items);
775 if self.document.preserve_raw {
776 document.apply_raw_items(&raw_items);
777 }
778 if self.tolerant {
779 document.recover_partial_entries(&source_map, self.document.preserve_raw);
780 }
781 if self.document.expand_values {
782 document.populate_expanded_values(crate::ExpansionOptions::default())?;
783 }
784 Ok(document)
785 }
786
787 pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
789 #[cfg(feature = "parallel")]
790 {
791 if let Some(threads) = self.threads {
792 if threads <= 1 {
793 return Self::parse_files_sequential(paths);
794 }
795 }
796
797 let pool = self.build_thread_pool()?;
798
799 let libraries: Result<Vec<_>> = pool.install(|| {
800 paths
801 .par_iter()
802 .map(|path| {
803 let content = std::fs::read_to_string(path)?;
804 let library = Library::parse_sequential(&content)?;
805 Ok(library.into_owned())
806 })
807 .collect()
808 });
809
810 let libraries = libraries?;
811 Ok(Library::merge_libraries_parallel(libraries))
812 }
813
814 #[cfg(not(feature = "parallel"))]
815 {
816 Self::parse_files_sequential(paths)
817 }
818 }
819
820 fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
822 let mut result = Library::new();
823 for path in paths {
824 let content = std::fs::read_to_string(path)?;
825 let library = Library::parse_sequential(&content)?;
826 result.merge(library.into_owned());
827 }
828 Ok(result)
829 }
830
831 #[cfg(feature = "parallel")]
832 fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
833 let mut builder = rayon::ThreadPoolBuilder::new();
834
835 if let Some(threads) = self.threads {
836 builder = builder.num_threads(threads);
837 }
838
839 builder
840 .build()
841 .map_err(|e| Error::WinnowError(e.to_string()))
842 }
843}
844
845#[derive(Debug, Clone, Copy)]
847pub enum Block<'lib, 'a> {
848 Entry(&'lib Entry<'a>, Option<SourceSpan>),
850 String(&'lib StringDefinition<'a>),
852 Preamble(&'lib Preamble<'a>),
854 Comment(&'lib Comment<'a>),
856 Failed(&'lib FailedBlock<'a>),
858}
859
860#[derive(Debug, Clone, Copy, PartialEq, Eq)]
861pub enum BlockKind {
862 Entry(usize),
863 String(usize),
864 Preamble(usize),
865 Comment(usize),
866 Failed(usize),
867}
868
869#[derive(Debug, Clone)]
870pub enum RawBuildItem<'a> {
871 Parsed(crate::parser::ParsedItem<'a>, SourceSpan, &'a str),
872 Failed(FailedBlock<'a>),
873}
874
875#[derive(Debug, Clone, PartialEq)]
877pub struct StringDefinition<'a> {
878 pub name: Cow<'a, str>,
880 pub value: Value<'a>,
882 pub source: Option<SourceSpan>,
884}
885
886impl<'a> StringDefinition<'a> {
887 #[must_use]
889 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
890 Self {
891 name: Cow::Borrowed(name),
892 value,
893 source: None,
894 }
895 }
896
897 #[must_use]
899 pub fn name(&self) -> &str {
900 &self.name
901 }
902
903 #[must_use]
905 pub const fn value(&self) -> &Value<'a> {
906 &self.value
907 }
908
909 #[must_use]
911 pub fn into_owned(self) -> StringDefinition<'static> {
912 StringDefinition {
913 name: Cow::Owned(self.name.into_owned()),
914 value: self.value.into_owned(),
915 source: self.source,
916 }
917 }
918}
919
920#[derive(Debug, Clone, PartialEq)]
922pub struct Preamble<'a> {
923 pub value: Value<'a>,
925 pub source: Option<SourceSpan>,
927}
928
929impl<'a> Preamble<'a> {
930 #[must_use]
932 pub const fn new(value: Value<'a>) -> Self {
933 Self {
934 value,
935 source: None,
936 }
937 }
938
939 #[must_use]
941 pub const fn value(&self) -> &Value<'a> {
942 &self.value
943 }
944
945 #[must_use]
947 pub fn into_owned(self) -> Preamble<'static> {
948 Preamble {
949 value: self.value.into_owned(),
950 source: self.source,
951 }
952 }
953}
954
955impl<'a> Deref for Preamble<'a> {
956 type Target = Value<'a>;
957
958 fn deref(&self) -> &Self::Target {
959 &self.value
960 }
961}
962
963#[derive(Debug, Clone, PartialEq, Eq)]
965pub struct Comment<'a> {
966 pub text: Cow<'a, str>,
968 pub source: Option<SourceSpan>,
970}
971
972impl<'a> Comment<'a> {
973 #[must_use]
975 pub const fn new(text: &'a str) -> Self {
976 Self {
977 text: Cow::Borrowed(text),
978 source: None,
979 }
980 }
981
982 #[must_use]
984 pub fn text(&self) -> &str {
985 &self.text
986 }
987
988 #[must_use]
990 pub fn into_owned(self) -> Comment<'static> {
991 Comment {
992 text: Cow::Owned(self.text.into_owned()),
993 source: self.source,
994 }
995 }
996}
997
998impl Deref for Comment<'_> {
999 type Target = str;
1000
1001 fn deref(&self) -> &Self::Target {
1002 &self.text
1003 }
1004}
1005
1006#[derive(Debug, Clone, PartialEq, Eq)]
1008pub struct FailedBlock<'a> {
1009 pub raw: Cow<'a, str>,
1011 pub error: String,
1013 pub source: Option<SourceSpan>,
1015}
1016
1017impl FailedBlock<'_> {
1018 #[must_use]
1020 pub fn into_owned(self) -> FailedBlock<'static> {
1021 FailedBlock {
1022 raw: Cow::Owned(self.raw.into_owned()),
1023 error: self.error,
1024 source: self.source,
1025 }
1026 }
1027}
1028
1029#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1031pub enum MonthStyle {
1032 #[default]
1034 Long,
1035 Abbrev,
1037 Number,
1039}
1040
1041#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1043pub struct SortOptions {
1044 pub entries_by_key: bool,
1046 pub fields_by_name: bool,
1048}
1049
1050#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1052pub enum FieldNameCase {
1053 #[default]
1055 Preserve,
1056 Lowercase,
1058}
1059
1060#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1062pub struct FieldNormalizeOptions {
1063 pub name_case: FieldNameCase,
1065 pub biblatex_aliases: bool,
1067}
1068
1069#[derive(Debug, Clone, Default)]
1071pub struct Library<'a> {
1072 entries: Vec<Entry<'a>>,
1074 entry_sources: Option<Vec<Option<SourceSpan>>>,
1076 strings: Vec<StringDefinition<'a>>,
1078 string_lookup: AHashMap<Cow<'a, str>, usize>,
1080 preambles: Vec<Preamble<'a>>,
1082 comments: Vec<Comment<'a>>,
1084 failed_blocks: Vec<FailedBlock<'a>>,
1086 block_order: Vec<BlockKind>,
1088}
1089
1090impl<'a> Library<'a> {
1091 fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
1092 let index = self.entries.len();
1093 self.entries.push(entry);
1094 if let Some(sources) = &mut self.entry_sources {
1095 sources.push(source);
1096 } else if source.is_some() {
1097 let mut sources = vec![None; index];
1098 sources.push(source);
1099 self.entry_sources = Some(sources);
1100 }
1101 self.block_order.push(BlockKind::Entry(index));
1102 }
1103
1104 fn register_string_definition(
1105 &mut self,
1106 name: Cow<'a, str>,
1107 value: Value<'a>,
1108 source: Option<SourceSpan>,
1109 ) -> usize {
1110 let index = self.strings.len();
1111 self.string_lookup.insert(name.clone(), index);
1112 self.strings.push(StringDefinition {
1113 name,
1114 value,
1115 source,
1116 });
1117 index
1118 }
1119
1120 fn push_string_with_source(
1121 &mut self,
1122 name: Cow<'a, str>,
1123 value: Value<'a>,
1124 source: Option<SourceSpan>,
1125 ) {
1126 let index = self.register_string_definition(name, value, source);
1127 self.block_order.push(BlockKind::String(index));
1128 }
1129
1130 fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
1131 let index = self.preambles.len();
1132 self.preambles.push(Preamble { value, source });
1133 self.block_order.push(BlockKind::Preamble(index));
1134 index
1135 }
1136
1137 fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
1138 let index = self.comments.len();
1139 self.comments.push(Comment { text, source });
1140 self.block_order.push(BlockKind::Comment(index));
1141 }
1142
1143 fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
1144 let index = self.failed_blocks.len();
1145 self.failed_blocks.push(failed);
1146 self.block_order.push(BlockKind::Failed(index));
1147 }
1148
1149 #[inline]
1150 fn expand_value_for_parse(
1151 &self,
1152 value: &mut Value<'a>,
1153 has_user_strings: bool,
1154 month_constants_shadowed: bool,
1155 expanded_variables: &mut ExpansionCache<'a>,
1156 expansion_stack: &mut Vec<Cow<'a, str>>,
1157 concat_cache: &mut ConcatCache<'a>,
1158 ) -> Result<()> {
1159 match value {
1160 Value::Literal(_) | Value::Number(_) => Ok(()),
1161 Value::Variable(name) => {
1162 if !has_user_strings || !month_constants_shadowed {
1163 if let Some(month_value) = get_month_expansion(name.as_ref()) {
1164 *value = Value::Literal(Cow::Borrowed(month_value));
1165 return Ok(());
1166 }
1167 }
1168
1169 if has_user_strings {
1170 if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
1171 *value = expanded;
1172 return Ok(());
1173 }
1174
1175 let old_value = std::mem::take(value);
1176 *value = self.smart_expand_value_cached(
1177 old_value,
1178 expanded_variables,
1179 expansion_stack,
1180 concat_cache,
1181 )?;
1182 }
1183
1184 Ok(())
1185 }
1186 Value::Concat(parts) => {
1187 if has_user_strings {
1188 if let Some(expanded) = concat_cache.get_cloned(parts) {
1189 *value = expanded;
1190 return Ok(());
1191 }
1192 }
1193
1194 let needs_expansion = if has_user_strings {
1195 parts.iter().any(contains_variables)
1196 } else {
1197 parts.iter().any(contains_potential_month_variables)
1198 };
1199
1200 if needs_expansion {
1201 if !has_user_strings {
1202 if let Some(expanded) = concat_cache.get_cloned(parts) {
1203 *value = expanded;
1204 return Ok(());
1205 }
1206 }
1207
1208 let old_value = std::mem::take(value);
1209 *value = self.smart_expand_value_cached(
1210 old_value,
1211 expanded_variables,
1212 expansion_stack,
1213 concat_cache,
1214 )?;
1215 }
1216
1217 Ok(())
1218 }
1219 }
1220 }
1221
1222 #[must_use]
1224 #[inline]
1225 pub fn new() -> Self {
1226 Self::default()
1227 }
1228
1229 #[must_use]
1252 #[inline]
1253 pub fn parser() -> Parser {
1254 Parser::new()
1255 }
1256
1257 pub fn parse(input: &'a str) -> Result<Self> {
1259 Self::parser().parse(input)
1260 }
1261
1262 pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
1264 let content = std::fs::read_to_string(path)?;
1265 Library::parser().parse(&content).map(Library::into_owned)
1266 }
1267
1268 pub fn to_bibtex(&self) -> Result<String> {
1270 crate::writer::to_string(self)
1271 }
1272
1273 pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
1275 crate::writer::to_file(self, path)
1276 }
1277
1278 #[allow(clippy::too_many_lines)]
1280 pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
1281 let mut library = Self::new();
1282 let input_scan = scan_input(input);
1283
1284 if !input_scan.may_contain_string_definition {
1287 library.entries.reserve(input_scan.at_count);
1288 library.block_order.reserve(input_scan.at_count);
1289 let has_user_strings = false;
1290 let month_constants_shadowed = false;
1291 let mut expanded_variables = ExpansionCache::with_capacity(0);
1292 let mut expansion_stack = Vec::new();
1293 let mut concat_cache = ConcatCache::new();
1294
1295 crate::parser::parse_bibtex_stream(input, |item| {
1296 match item {
1297 crate::parser::ParsedItem::Entry(mut entry) => {
1298 for field in &mut entry.fields {
1299 library.expand_value_for_parse(
1300 &mut field.value,
1301 has_user_strings,
1302 month_constants_shadowed,
1303 &mut expanded_variables,
1304 &mut expansion_stack,
1305 &mut concat_cache,
1306 )?;
1307 }
1308 library.push_entry_with_source(entry, None);
1309 }
1310 crate::parser::ParsedItem::Preamble(value) => {
1311 let mut expanded = value;
1312 library.expand_value_for_parse(
1313 &mut expanded,
1314 has_user_strings,
1315 month_constants_shadowed,
1316 &mut expanded_variables,
1317 &mut expansion_stack,
1318 &mut concat_cache,
1319 )?;
1320 library.push_preamble_with_source(expanded, None);
1321 }
1322 crate::parser::ParsedItem::Comment(text) => {
1323 library.push_comment_with_source(Cow::Borrowed(text), None);
1324 }
1325 crate::parser::ParsedItem::String(name, value) => {
1326 library.push_string_with_source(Cow::Borrowed(name), value, None);
1328 }
1329 }
1330 Ok(())
1331 })?;
1332
1333 return Ok(library);
1334 }
1335
1336 library.block_order.reserve(input_scan.at_count);
1337
1338 if !input_may_have_late_string_definition(input) {
1342 let mut pending_preambles = Vec::new();
1343 let mut expanded_variables = ExpansionCache::with_capacity(0);
1344 let mut expansion_stack = Vec::new();
1345 let mut concat_cache = ConcatCache::new();
1346 let mut month_constants_shadowed = None;
1347
1348 crate::parser::parse_bibtex_stream(input, |item| {
1349 match item {
1350 crate::parser::ParsedItem::Entry(mut entry) => {
1351 let has_user_strings = !library.strings.is_empty();
1352 let month_constants_shadowed = *month_constants_shadowed
1353 .get_or_insert_with(|| {
1354 has_user_strings
1355 && user_strings_shadow_month_constants(&library.strings)
1356 });
1357 for field in &mut entry.fields {
1358 library.expand_value_for_parse(
1359 &mut field.value,
1360 has_user_strings,
1361 month_constants_shadowed,
1362 &mut expanded_variables,
1363 &mut expansion_stack,
1364 &mut concat_cache,
1365 )?;
1366 }
1367 library.push_entry_with_source(entry, None);
1368 }
1369 crate::parser::ParsedItem::Preamble(value) => {
1370 let index = library.push_preamble_with_source(value, None);
1371 pending_preambles.push(index);
1372 }
1373 crate::parser::ParsedItem::String(name, value) => {
1374 library.push_string_with_source(Cow::Borrowed(name), value, None);
1375 }
1376 crate::parser::ParsedItem::Comment(text) => {
1377 library.push_comment_with_source(Cow::Borrowed(text), None);
1378 }
1379 }
1380 Ok(())
1381 })?;
1382
1383 let has_user_strings = !library.strings.is_empty();
1384 let month_constants_shadowed =
1385 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1386 for index in pending_preambles {
1387 let mut expanded = std::mem::take(&mut library.preambles[index].value);
1388 library.expand_value_for_parse(
1389 &mut expanded,
1390 has_user_strings,
1391 month_constants_shadowed,
1392 &mut expanded_variables,
1393 &mut expansion_stack,
1394 &mut concat_cache,
1395 )?;
1396 library.preambles[index].value = expanded;
1397 }
1398
1399 return Ok(library);
1400 }
1401
1402 let mut entry_indices = Vec::new();
1403 let mut preamble_indices = Vec::new();
1404
1405 crate::parser::parse_bibtex_stream(input, |item| {
1406 match item {
1407 crate::parser::ParsedItem::Entry(entry) => {
1408 let index = library.entries.len();
1409 library.push_entry_with_source(entry, None);
1410 entry_indices.push(index);
1411 }
1412 crate::parser::ParsedItem::Preamble(value) => {
1413 let index = library.push_preamble_with_source(value, None);
1414 preamble_indices.push(index);
1415 }
1416 crate::parser::ParsedItem::String(name, value) => {
1417 library.push_string_with_source(Cow::Borrowed(name), value, None);
1418 }
1419 crate::parser::ParsedItem::Comment(text) => {
1420 library.push_comment_with_source(Cow::Borrowed(text), None);
1421 }
1422 }
1423 Ok(())
1424 })?;
1425
1426 let has_user_strings = !library.strings.is_empty();
1428 let month_constants_shadowed =
1429 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1430 let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1431 let mut expansion_stack = Vec::new();
1432 let mut concat_cache = ConcatCache::new();
1433
1434 for entry_index in entry_indices {
1435 let field_count = library.entries[entry_index].fields.len();
1436 for field_index in 0..field_count {
1437 let mut value =
1438 std::mem::take(&mut library.entries[entry_index].fields[field_index].value);
1439 library.expand_value_for_parse(
1440 &mut value,
1441 has_user_strings,
1442 month_constants_shadowed,
1443 &mut expanded_variables,
1444 &mut expansion_stack,
1445 &mut concat_cache,
1446 )?;
1447 library.entries[entry_index].fields[field_index].value = value;
1448 }
1449 }
1450
1451 for preamble_index in preamble_indices {
1452 let mut expanded = std::mem::take(&mut library.preambles[preamble_index].value);
1453 library.expand_value_for_parse(
1454 &mut expanded,
1455 has_user_strings,
1456 month_constants_shadowed,
1457 &mut expanded_variables,
1458 &mut expansion_stack,
1459 &mut concat_cache,
1460 )?;
1461 library.preambles[preamble_index].value = expanded;
1462 }
1463
1464 Ok(library)
1465 }
1466
1467 fn parse_with_spans(input: &'a str) -> Result<Self> {
1468 let source_map = SourceMap::anonymous(input);
1469 let raw_items = Self::parse_raw_items_with_source(input, &source_map)?;
1470 Self::from_raw_items(raw_items)
1471 }
1472
1473 fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1474 let source_map = SourceMap::anonymous(input);
1475 let raw_items = Self::parse_tolerant_raw_items(input, capture_source, &source_map);
1476 Self::from_raw_items(raw_items)
1477 }
1478
1479 fn parse_raw_items_with_source(
1480 input: &'a str,
1481 source_map: &SourceMap<'_>,
1482 ) -> Result<Vec<RawBuildItem<'a>>> {
1483 let mut raw_items = Vec::new();
1484 crate::parser::parse_bibtex_stream_with_spans(input, |item, span, raw| {
1485 let span = if source_map.source_id().is_some() {
1486 source_map.span(span.byte_start, span.byte_end)
1487 } else {
1488 span
1489 };
1490 raw_items.push(RawBuildItem::Parsed(item, span, raw));
1491 Ok(())
1492 })?;
1493 Ok(raw_items)
1494 }
1495
1496 fn parse_tolerant_raw_items(
1497 input: &'a str,
1498 capture_source: bool,
1499 source_map: &SourceMap<'_>,
1500 ) -> Vec<RawBuildItem<'a>> {
1501 let mut raw_items = Vec::new();
1502 let mut remaining = input;
1503
1504 loop {
1505 crate::parser::lexer::skip_whitespace(&mut remaining);
1506 if remaining.is_empty() {
1507 break;
1508 }
1509
1510 let start = input.len() - remaining.len();
1511 match crate::parser::parse_item(&mut remaining) {
1512 Ok(item) => {
1513 let end = input.len() - remaining.len();
1514 raw_items.push(RawBuildItem::Parsed(
1515 item,
1516 source_map.span(start, end),
1517 &input[start..end],
1518 ));
1519 }
1520 Err(err) => {
1521 let end = next_recovery_boundary(input, start);
1522 let source = capture_source.then(|| source_map.span(start, end));
1523 raw_items.push(RawBuildItem::Failed(FailedBlock {
1524 raw: Cow::Borrowed(&input[start..end]),
1525 error: format!("Failed to parse entry: {err}"),
1526 source,
1527 }));
1528 remaining = &input[end..];
1529 }
1530 }
1531 }
1532
1533 raw_items
1534 }
1535
1536 fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1537 let mut library = Self::new();
1538
1539 for raw_item in &raw_items {
1540 if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) =
1541 raw_item
1542 {
1543 library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1544 }
1545 }
1546
1547 let has_user_strings = !library.strings.is_empty();
1548 let month_constants_shadowed =
1549 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1550 let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1551 let mut expansion_stack = Vec::new();
1552 let mut concat_cache = ConcatCache::new();
1553 let mut string_index = 0;
1554
1555 for raw_item in raw_items {
1556 match raw_item {
1557 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span, _) => {
1558 for field in &mut entry.fields {
1559 library.expand_value_for_parse(
1560 &mut field.value,
1561 has_user_strings,
1562 month_constants_shadowed,
1563 &mut expanded_variables,
1564 &mut expansion_stack,
1565 &mut concat_cache,
1566 )?;
1567 }
1568 library.push_entry_with_source(entry, Some(span));
1569 }
1570 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, _) => {
1571 library.block_order.push(BlockKind::String(string_index));
1572 string_index += 1;
1573 }
1574 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span, _) => {
1575 library.expand_value_for_parse(
1576 &mut value,
1577 has_user_strings,
1578 month_constants_shadowed,
1579 &mut expanded_variables,
1580 &mut expansion_stack,
1581 &mut concat_cache,
1582 )?;
1583 library.push_preamble_with_source(value, Some(span));
1584 }
1585 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1586 library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1587 }
1588 RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1589 }
1590 }
1591
1592 Ok(library)
1593 }
1594
1595 fn from_raw_items_unexpanded(raw_items: Vec<RawBuildItem<'a>>) -> Self {
1596 let mut library = Self::new();
1597
1598 for raw_item in raw_items {
1599 match raw_item {
1600 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(entry), span, _) => {
1601 library.push_entry_with_source(entry, Some(span));
1602 }
1603 RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span, _) => {
1604 library.push_string_with_source(Cow::Borrowed(name), value, Some(span));
1605 }
1606 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), span, _) => {
1607 library.push_preamble_with_source(value, Some(span));
1608 }
1609 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span, _) => {
1610 library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1611 }
1612 RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1613 }
1614 }
1615
1616 library
1617 }
1618
1619 pub fn merge(&mut self, other: Self) {
1621 let entry_offset = self.entries.len();
1622 let string_offset = self.strings.len();
1623 let preamble_offset = self.preambles.len();
1624 let comment_offset = self.comments.len();
1625 let failed_offset = self.failed_blocks.len();
1626 let other_entry_count = other.entries.len();
1627 let other_entry_sources = other.entry_sources;
1628
1629 self.entries.extend(other.entries);
1630 match (&mut self.entry_sources, other_entry_sources) {
1631 (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1632 (Some(sources), None) => {
1633 sources.extend(std::iter::repeat(None).take(other_entry_count));
1634 }
1635 (None, Some(other_sources)) => {
1636 let mut sources = vec![None; entry_offset];
1637 sources.extend(other_sources);
1638 self.entry_sources = Some(sources);
1639 }
1640 (None, None) => {}
1641 }
1642 self.preambles.extend(other.preambles);
1643 self.comments.extend(other.comments);
1644 self.failed_blocks.extend(other.failed_blocks);
1645
1646 for definition in other.strings {
1647 let index = self.strings.len();
1648 self.string_lookup.insert(definition.name.clone(), index);
1649 self.strings.push(definition);
1650 }
1651
1652 self.block_order
1653 .extend(other.block_order.into_iter().map(|kind| match kind {
1654 BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1655 BlockKind::String(index) => BlockKind::String(string_offset + index),
1656 BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1657 BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1658 BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1659 }));
1660 }
1661
1662 #[cfg(feature = "parallel")]
1663 fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1664 let mut result = Library::new();
1665 for library in libraries {
1666 result.merge(library);
1667 }
1668 result
1669 }
1670
1671 #[must_use]
1673 pub fn entries(&self) -> &[Entry<'a>] {
1674 &self.entries
1675 }
1676
1677 #[must_use]
1679 pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1680 &mut self.entries
1681 }
1682
1683 #[must_use]
1685 pub fn strings(&self) -> &[StringDefinition<'a>] {
1686 &self.strings
1687 }
1688
1689 #[must_use]
1691 pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1692 get_string_definition(&self.strings, &self.string_lookup, name)
1693 }
1694
1695 #[must_use]
1697 pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1698 self.string(name).map(|definition| &definition.value)
1699 }
1700
1701 #[must_use]
1703 pub fn preambles(&self) -> &[Preamble<'a>] {
1704 &self.preambles
1705 }
1706
1707 #[must_use]
1709 pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1710 &mut self.preambles
1711 }
1712
1713 #[must_use]
1715 pub fn comments(&self) -> &[Comment<'a>] {
1716 &self.comments
1717 }
1718
1719 #[must_use]
1721 pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1722 &mut self.comments
1723 }
1724
1725 #[must_use]
1727 pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1728 &self.failed_blocks
1729 }
1730
1731 #[must_use]
1733 pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1734 self.block_order
1735 .iter()
1736 .map(|kind| match *kind {
1737 BlockKind::Entry(index) => Block::Entry(
1738 &self.entries[index],
1739 self.entry_sources
1740 .as_ref()
1741 .and_then(|sources| sources.get(index).copied().flatten()),
1742 ),
1743 BlockKind::String(index) => Block::String(&self.strings[index]),
1744 BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1745 BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1746 BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1747 })
1748 .collect()
1749 }
1750
1751 #[must_use]
1752 pub(crate) fn entry_source(&self, index: usize) -> Option<SourceSpan> {
1753 self.entry_sources
1754 .as_ref()
1755 .and_then(|sources| sources.get(index).copied().flatten())
1756 }
1757
1758 #[must_use]
1759 pub(crate) fn block_kinds(&self) -> &[BlockKind] {
1760 &self.block_order
1761 }
1762
1763 #[must_use]
1765 pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1766 self.entries.iter().find(|e| e.key == key)
1767 }
1768
1769 #[must_use]
1771 pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1772 self.entries
1773 .iter()
1774 .find(|entry| entry.key.eq_ignore_ascii_case(key))
1775 }
1776
1777 #[must_use]
1779 pub fn contains_key(&self, key: &str) -> bool {
1780 self.find_by_key(key).is_some()
1781 }
1782
1783 #[must_use]
1785 pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1786 self.entries
1787 .iter()
1788 .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1789 .collect()
1790 }
1791
1792 #[must_use]
1794 pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1795 self.entries
1796 .iter()
1797 .filter(|e| {
1798 e.get_as_string(field)
1799 .as_ref()
1800 .is_some_and(|v| v.contains(value))
1801 })
1802 .collect()
1803 }
1804
1805 #[must_use]
1807 pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1808 self.entries
1809 .iter()
1810 .filter(|entry| {
1811 entry
1812 .get_as_string_ignore_case(field)
1813 .as_ref()
1814 .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1815 })
1816 .collect()
1817 }
1818
1819 #[must_use]
1821 pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1822 let Some(needle) = normalize_doi(doi) else {
1823 return Vec::new();
1824 };
1825
1826 self.entries
1827 .iter()
1828 .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1829 .collect()
1830 }
1831
1832 fn smart_expand_value_cached(
1834 &self,
1835 value: Value<'a>,
1836 expanded_variables: &mut ExpansionCache<'a>,
1837 expansion_stack: &mut Vec<Cow<'a, str>>,
1838 concat_cache: &mut ConcatCache<'a>,
1839 ) -> Result<Value<'a>> {
1840 match value {
1841 Value::Literal(_) | Value::Number(_) => Ok(value),
1843
1844 Value::Variable(name) => {
1846 let name_text = name.as_ref();
1847 if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1848 return Ok(expanded);
1849 }
1850
1851 if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1852 let mut cycle = expansion_stack
1853 .iter()
1854 .map(std::convert::AsRef::as_ref)
1855 .collect::<Vec<_>>()
1856 .join(" -> ");
1857 if !cycle.is_empty() {
1858 cycle.push_str(" -> ");
1859 }
1860 cycle.push_str(name_text);
1861 return Err(Error::CircularReference(cycle));
1862 }
1863
1864 if let Some(user_value) =
1865 get_string_value(&self.strings, &self.string_lookup, name_text)
1866 {
1867 expansion_stack.push(name.clone());
1869 let expanded = self.smart_expand_value_cached(
1870 user_value.clone(),
1871 expanded_variables,
1872 expansion_stack,
1873 concat_cache,
1874 );
1875 expansion_stack.pop();
1876
1877 let expanded = expanded?;
1878 expanded_variables.insert(name, expanded.clone());
1879 Ok(expanded)
1880 } else {
1881 get_month_expansion(name_text).map_or_else(
1883 || {
1884 Err(Error::UndefinedVariable(name_text.to_string()))
1886 },
1887 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1888 )
1889 }
1890 }
1891
1892 Value::Concat(parts) => {
1894 if let Some(expanded) = concat_cache.get_cloned(&parts) {
1895 return Ok(expanded);
1896 }
1897
1898 let cache_key = parts.clone();
1899 let expanded = self.expand_concatenation_cached(
1900 parts.into_vec(),
1901 expanded_variables,
1902 expansion_stack,
1903 concat_cache,
1904 )?;
1905 concat_cache.insert(cache_key, expanded.clone());
1906 Ok(expanded)
1907 }
1908 }
1909 }
1910
1911 pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
1913 match value {
1914 Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
1916
1917 Value::Variable(name) => {
1919 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1921 || {
1922 get_month_expansion(name.as_ref()).map_or_else(
1924 || {
1925 Err(Error::UndefinedVariable(name.as_ref().to_string()))
1927 },
1928 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1929 )
1930 },
1931 |user_value| self.expand_value_ref(user_value),
1932 )
1933 }
1934
1935 Value::Concat(parts) => {
1937 let cloned_parts = parts.to_vec();
1938 self.expand_concatenation(cloned_parts)
1939 }
1940 }
1941 }
1942
1943 fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
1945 let mut expanded_variables = ExpansionCache::with_capacity(0);
1946 let mut expansion_stack = Vec::new();
1947 let mut concat_cache = ConcatCache::new();
1948 self.expand_concatenation_cached(
1949 parts,
1950 &mut expanded_variables,
1951 &mut expansion_stack,
1952 &mut concat_cache,
1953 )
1954 }
1955
1956 fn expand_concatenation_cached(
1958 &self,
1959 parts: Vec<Value<'a>>,
1960 expanded_variables: &mut ExpansionCache<'a>,
1961 expansion_stack: &mut Vec<Cow<'a, str>>,
1962 concat_cache: &mut ConcatCache<'a>,
1963 ) -> Result<Value<'a>> {
1964 let mut expanded_parts = Vec::with_capacity(parts.len());
1965
1966 for part in parts {
1968 let expanded = self.smart_expand_value_cached(
1969 part,
1970 expanded_variables,
1971 expansion_stack,
1972 concat_cache,
1973 )?;
1974 expanded_parts.push(expanded);
1975 }
1976
1977 if expanded_parts
1979 .iter()
1980 .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
1981 {
1982 let combined = concatenate_simple_values(&expanded_parts);
1983 Ok(Value::Literal(Cow::Owned(combined)))
1984 } else {
1985 Ok(Value::Concat(expanded_parts.into_boxed_slice()))
1986 }
1987 }
1988
1989 pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
1991 match value {
1992 Value::Literal(s) => Ok(s.to_string()),
1993 Value::Number(n) => Ok(n.to_string()),
1994 Value::Variable(name) => {
1995 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1997 || {
1998 get_month_expansion(name.as_ref()).map_or_else(
2000 || {
2001 Err(Error::UndefinedVariable(name.as_ref().to_string()))
2003 },
2004 |month_value| Ok(month_value.to_string()),
2005 )
2006 },
2007 |user_value| self.get_expanded_string(user_value),
2008 )
2009 }
2010 Value::Concat(parts) => {
2011 let mut result = String::new();
2012 for part in parts.iter() {
2013 result.push_str(&self.get_expanded_string(part)?);
2014 }
2015 Ok(result)
2016 }
2017 }
2018 }
2019
2020 #[must_use]
2022 pub fn into_owned(self) -> Library<'static> {
2023 let strings = self
2024 .strings
2025 .into_iter()
2026 .map(StringDefinition::into_owned)
2027 .collect::<Vec<_>>();
2028 let mut string_lookup = AHashMap::with_capacity(strings.len());
2029 for (index, definition) in strings.iter().enumerate() {
2030 string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
2031 }
2032
2033 Library {
2034 entries: self.entries.into_iter().map(Entry::into_owned).collect(),
2035 entry_sources: self.entry_sources,
2036 strings,
2037 string_lookup,
2038 preambles: self
2039 .preambles
2040 .into_iter()
2041 .map(Preamble::into_owned)
2042 .collect(),
2043 comments: self.comments.into_iter().map(Comment::into_owned).collect(),
2044 failed_blocks: self
2045 .failed_blocks
2046 .into_iter()
2047 .map(FailedBlock::into_owned)
2048 .collect(),
2049 block_order: self.block_order,
2050 }
2051 }
2052
2053 pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
2055 self.push_string_with_source(Cow::Borrowed(name), value, None);
2056 }
2057
2058 pub fn add_entry(&mut self, entry: Entry<'a>) {
2060 self.push_entry_with_source(entry, None);
2061 }
2062
2063 pub fn add_preamble(&mut self, value: Value<'a>) {
2065 self.push_preamble_with_source(value, None);
2066 }
2067
2068 pub fn add_comment(&mut self, comment: &'a str) {
2070 self.push_comment_with_source(Cow::Borrowed(comment), None);
2071 }
2072
2073 pub fn resolve_strings(&mut self) -> Result<()> {
2075 let has_user_strings = !self.strings.is_empty();
2076 let month_constants_shadowed =
2077 has_user_strings && user_strings_shadow_month_constants(&self.strings);
2078 let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
2079 let mut expansion_stack = Vec::new();
2080 let mut concat_cache = ConcatCache::new();
2081
2082 for entry_index in 0..self.entries.len() {
2083 let field_count = self.entries[entry_index].fields.len();
2084 for field_index in 0..field_count {
2085 let mut value =
2086 std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
2087 self.expand_value_for_parse(
2088 &mut value,
2089 has_user_strings,
2090 month_constants_shadowed,
2091 &mut expanded_variables,
2092 &mut expansion_stack,
2093 &mut concat_cache,
2094 )?;
2095 self.entries[entry_index].fields[field_index].value = value;
2096 }
2097 }
2098
2099 for preamble_index in 0..self.preambles.len() {
2100 let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
2101 self.expand_value_for_parse(
2102 &mut value,
2103 has_user_strings,
2104 month_constants_shadowed,
2105 &mut expanded_variables,
2106 &mut expansion_stack,
2107 &mut concat_cache,
2108 )?;
2109 self.preambles[preamble_index].value = value;
2110 }
2111
2112 Ok(())
2113 }
2114
2115 pub fn normalize_doi_fields(&mut self) {
2117 for entry in &mut self.entries {
2118 for field in &mut entry.fields {
2119 if field.name.eq_ignore_ascii_case("doi") {
2120 if let Some(normalized) = normalize_doi(&field.value.to_plain_string()) {
2121 field.value = Value::Literal(Cow::Owned(normalized));
2122 }
2123 }
2124 }
2125 }
2126 }
2127
2128 pub fn normalize_months(&mut self, style: MonthStyle) {
2130 for entry in &mut self.entries {
2131 for field in &mut entry.fields {
2132 if field.name.eq_ignore_ascii_case("month") {
2133 if let Some(month) =
2134 normalize_month_value(&field.value.to_plain_string(), style)
2135 {
2136 field.value = month;
2137 }
2138 }
2139 }
2140 }
2141 }
2142
2143 pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
2145 for entry in &mut self.entries {
2146 for field in &mut entry.fields {
2147 let mut name = if options.biblatex_aliases {
2148 canonical_biblatex_field_alias(&field.name)
2149 .unwrap_or_else(|| field.name.as_ref())
2150 .to_string()
2151 } else {
2152 field.name.to_string()
2153 };
2154
2155 if options.name_case == FieldNameCase::Lowercase {
2156 name.make_ascii_lowercase();
2157 }
2158
2159 if name != field.name {
2160 field.name = Cow::Owned(name);
2161 }
2162 }
2163 }
2164 }
2165
2166 pub fn sort(&mut self, options: SortOptions) {
2168 if options.fields_by_name {
2169 for entry in &mut self.entries {
2170 entry
2171 .fields
2172 .sort_by(|left, right| left.name.cmp(&right.name));
2173 }
2174 }
2175
2176 if options.entries_by_key {
2177 if let Some(sources) = self.entry_sources.take() {
2178 let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
2179 entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
2180 let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
2181 entries.into_iter().unzip();
2182 self.entries = sorted_entries;
2183 self.entry_sources = Some(sorted_sources);
2184 } else {
2185 self.entries.sort_by(|left, right| left.key.cmp(&right.key));
2186 }
2187 self.rebuild_grouped_block_order();
2188 }
2189 }
2190
2191 fn rebuild_grouped_block_order(&mut self) {
2192 self.block_order.clear();
2193 self.block_order
2194 .extend((0..self.strings.len()).map(BlockKind::String));
2195 self.block_order
2196 .extend((0..self.preambles.len()).map(BlockKind::Preamble));
2197 self.block_order
2198 .extend((0..self.comments.len()).map(BlockKind::Comment));
2199 self.block_order
2200 .extend((0..self.entries.len()).map(BlockKind::Entry));
2201 self.block_order
2202 .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
2203 }
2204
2205 #[must_use]
2208 pub fn validate(
2209 &self,
2210 level: ValidationLevel,
2211 ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
2212 let mut invalid_entries = Vec::new();
2213
2214 for (index, entry) in self.entries.iter().enumerate() {
2215 if let Err(errors) = entry.validate(level) {
2216 invalid_entries.push((index, entry, errors));
2217 }
2218 }
2219
2220 invalid_entries
2221 }
2222
2223 #[must_use]
2226 pub fn find_duplicate_keys(&self) -> Vec<&str> {
2227 let mut seen = std::collections::HashSet::new();
2228 let mut duplicates = std::collections::HashSet::new();
2229
2230 for entry in &self.entries {
2231 if !seen.insert(entry.key()) {
2232 duplicates.insert(entry.key());
2233 }
2234 }
2235
2236 duplicates.into_iter().collect()
2237 }
2238
2239 #[must_use]
2241 pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
2242 let mut seen = std::collections::HashSet::new();
2243 let mut duplicates = std::collections::HashSet::new();
2244
2245 for entry in &self.entries {
2246 let normalized_key = entry.key().to_ascii_lowercase();
2247 if !seen.insert(normalized_key.clone()) {
2248 duplicates.insert(normalized_key);
2249 }
2250 }
2251
2252 duplicates.into_iter().collect()
2253 }
2254
2255 #[must_use]
2257 pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
2258 let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
2259 for entry in &self.entries {
2260 if let Some(doi) = entry.doi() {
2261 groups.entry(doi).or_default().push(entry);
2262 }
2263 }
2264
2265 groups
2266 .into_iter()
2267 .filter(|(_, entries)| entries.len() > 1)
2268 .collect()
2269 }
2270
2271 #[must_use]
2273 pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
2274 let invalid_entries = self.validate(level);
2275 let duplicate_keys = self.find_duplicate_keys();
2276 let empty_entries = self.find_empty_entries();
2277
2278 ValidationReport {
2279 invalid_entries,
2280 duplicate_keys,
2281 empty_entries,
2282 total_entries: self.entries.len(),
2283 validation_level: level,
2284 }
2285 }
2286
2287 fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
2289 self.entries
2290 .iter()
2291 .enumerate()
2292 .filter(|(_, entry)| entry.fields().is_empty())
2293 .collect()
2294 }
2295
2296 #[must_use]
2298 pub fn stats(&self) -> LibraryStats {
2299 let mut type_counts = AHashMap::new();
2300 for entry in &self.entries {
2301 *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
2302 }
2303
2304 LibraryStats {
2305 total_entries: self.entries.len(),
2306 total_strings: self.strings.len(),
2307 total_preambles: self.preambles.len(),
2308 total_comments: self.comments.len(),
2309 entries_by_type: type_counts,
2310 }
2311 }
2312}
2313
2314#[derive(Debug, Clone)]
2316pub struct LibraryStats {
2317 pub total_entries: usize,
2319 pub total_strings: usize,
2321 pub total_preambles: usize,
2323 pub total_comments: usize,
2325 pub entries_by_type: AHashMap<String, usize>,
2327}
2328
2329#[derive(Debug, Clone)]
2331pub struct ValidationReport<'a> {
2332 pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
2334 pub duplicate_keys: Vec<&'a str>,
2336 pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
2338 pub total_entries: usize,
2340 pub validation_level: ValidationLevel,
2342}
2343
2344impl ValidationReport<'_> {
2345 #[must_use]
2347 pub fn is_valid(&self) -> bool {
2348 self.invalid_entries.is_empty()
2349 && self.duplicate_keys.is_empty()
2350 && self.empty_entries.is_empty()
2351 }
2352
2353 #[must_use]
2355 pub fn total_issues(&self) -> usize {
2356 self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
2357 }
2358
2359 #[must_use]
2361 pub fn issue_summary(&self) -> IssueSummary {
2362 let mut errors = 0;
2363 let mut warnings = 0;
2364 let mut infos = 0;
2365
2366 for (_, _, validation_errors) in &self.invalid_entries {
2367 for error in validation_errors {
2368 match error.severity {
2369 crate::model::ValidationSeverity::Error => errors += 1,
2370 crate::model::ValidationSeverity::Warning => warnings += 1,
2371 crate::model::ValidationSeverity::Info => infos += 1,
2372 }
2373 }
2374 }
2375
2376 errors += self.duplicate_keys.len() + self.empty_entries.len();
2378
2379 IssueSummary {
2380 errors,
2381 warnings,
2382 infos,
2383 }
2384 }
2385}
2386
2387#[derive(Debug, Clone, PartialEq, Eq)]
2389pub struct IssueSummary {
2390 pub errors: usize,
2392 pub warnings: usize,
2394 pub infos: usize,
2396}
2397
2398fn concatenate_simple_values(values: &[Value]) -> String {
2400 let mut result = String::new();
2401
2402 let capacity: usize = values
2404 .iter()
2405 .map(|v| match v {
2406 Value::Literal(s) => s.len(),
2407 Value::Number(n) => n.to_string().len(),
2408 _ => 0,
2409 })
2410 .sum();
2411
2412 result.reserve(capacity);
2413
2414 for value in values {
2415 match value {
2416 Value::Literal(s) => result.push_str(s),
2417 Value::Number(n) => result.push_str(&n.to_string()),
2418 _ => {} }
2420 }
2421
2422 result
2423}
2424
2425fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2426 if needle.is_empty() {
2427 return true;
2428 }
2429
2430 haystack.to_lowercase().contains(&needle.to_lowercase())
2431}
2432
2433fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
2434 let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
2435 let month_index = match normalized.as_str() {
2436 "jan" | "january" | "1" | "01" => 1,
2437 "feb" | "february" | "2" | "02" => 2,
2438 "mar" | "march" | "3" | "03" => 3,
2439 "apr" | "april" | "4" | "04" => 4,
2440 "may" | "5" | "05" => 5,
2441 "jun" | "june" | "6" | "06" => 6,
2442 "jul" | "july" | "7" | "07" => 7,
2443 "aug" | "august" | "8" | "08" => 8,
2444 "sep" | "september" | "9" | "09" => 9,
2445 "oct" | "october" | "10" => 10,
2446 "nov" | "november" | "11" => 11,
2447 "dec" | "december" | "12" => 12,
2448 _ => return None,
2449 };
2450
2451 let text = match style {
2452 MonthStyle::Long => month_long_name(month_index),
2453 MonthStyle::Abbrev => month_abbreviation(month_index),
2454 MonthStyle::Number => return Some(Value::Number(month_index)),
2455 };
2456
2457 Some(Value::Literal(Cow::Borrowed(text)))
2458}
2459
2460const fn month_long_name(month: i64) -> &'static str {
2461 match month {
2462 1 => "January",
2463 2 => "February",
2464 3 => "March",
2465 4 => "April",
2466 5 => "May",
2467 6 => "June",
2468 7 => "July",
2469 8 => "August",
2470 9 => "September",
2471 10 => "October",
2472 11 => "November",
2473 12 => "December",
2474 _ => "",
2475 }
2476}
2477
2478const fn month_abbreviation(month: i64) -> &'static str {
2479 match month {
2480 1 => "jan",
2481 2 => "feb",
2482 3 => "mar",
2483 4 => "apr",
2484 5 => "may",
2485 6 => "jun",
2486 7 => "jul",
2487 8 => "aug",
2488 9 => "sep",
2489 10 => "oct",
2490 11 => "nov",
2491 12 => "dec",
2492 _ => "",
2493 }
2494}
2495
2496#[derive(Debug, Default)]
2498pub struct LibraryBuilder<'a> {
2499 library: Library<'a>,
2500}
2501
2502impl<'a> LibraryBuilder<'a> {
2503 #[must_use]
2505 pub fn new() -> Self {
2506 Self::default()
2507 }
2508
2509 #[must_use]
2511 pub fn entry(mut self, entry: Entry<'a>) -> Self {
2512 self.library.add_entry(entry);
2513 self
2514 }
2515
2516 #[must_use]
2518 pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2519 self.library.add_string(name, value);
2520 self
2521 }
2522
2523 #[must_use]
2525 pub fn preamble(mut self, value: Value<'a>) -> Self {
2526 self.library.add_preamble(value);
2527 self
2528 }
2529
2530 #[must_use]
2532 pub fn comment(mut self, text: &'a str) -> Self {
2533 self.library.add_comment(text);
2534 self
2535 }
2536
2537 #[must_use]
2539 pub fn build(self) -> Library<'a> {
2540 self.library
2541 }
2542}
2543
2544#[cfg(test)]
2545mod tests {
2546 use super::*;
2547 use crate::model::{EntryType, Field};
2548
2549 #[test]
2550 fn test_library_parse() {
2551 let input = r#"
2552 @string{me = "John Doe"}
2553
2554 @article{test2023,
2555 author = me,
2556 title = "Test Article",
2557 year = 2023
2558 }
2559 "#;
2560
2561 let library = Library::parser().parse(input).unwrap();
2562 assert_eq!(library.entries().len(), 1);
2563 assert_eq!(library.strings().len(), 1);
2564
2565 let entry = &library.entries()[0];
2566 assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2568 }
2569
2570 #[test]
2571 fn test_zero_copy_preservation() {
2572 let input = r#"
2573 @article{test,
2574 title = "This is borrowed",
2575 year = 2023
2576 }
2577 "#;
2578
2579 let library = Library::parser().parse(input).unwrap();
2580 let entry = &library.entries()[0];
2581
2582 if let Some(Value::Literal(cow)) = entry
2584 .fields
2585 .iter()
2586 .find(|f| f.name == "title")
2587 .map(|f| &f.value)
2588 {
2589 assert!(matches!(cow, Cow::Borrowed(_)));
2590 }
2591 }
2592
2593 #[test]
2594 fn test_concatenation_creates_owned() {
2595 let input = r#"
2596 @string{first = "Hello"}
2597 @string{second = "World"}
2598
2599 @article{test,
2600 title = first # ", " # second
2601 }
2602 "#;
2603
2604 let library = Library::parser().parse(input).unwrap();
2605 let entry = &library.entries()[0];
2606
2607 assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2609 }
2610
2611 #[test]
2612 fn test_boxed_concat_memory_optimization() {
2613 assert!(
2615 std::mem::size_of::<Value>() <= 32,
2616 "Value enum is {} bytes, should be 32 or less",
2617 std::mem::size_of::<Value>()
2618 );
2619 }
2620
2621 #[test]
2622 fn test_field_vec_capacity_bounded() {
2623 let input = r#"
2624 @article{test,
2625 a = "1", b = "2", c = "3", d = "4", e = "5",
2626 f = "6", g = "7", h = "8", i = "9", j = "10"
2627 }
2628 "#;
2629
2630 let library = Library::parser().parse(input).unwrap();
2631 let entry = &library.entries()[0];
2632
2633 assert_eq!(entry.fields.len(), 10);
2634 assert!(
2635 entry.fields.capacity() <= 17,
2636 "Unexpected field Vec growth: len={}, capacity={}",
2637 entry.fields.len(),
2638 entry.fields.capacity()
2639 );
2640 }
2641
2642 #[test]
2643 fn test_library_builder() {
2644 let library = LibraryBuilder::new()
2645 .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2646 .entry(Entry {
2647 ty: EntryType::Article,
2648 key: Cow::Borrowed("test2023"),
2649 fields: vec![
2650 Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2651 Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2652 ],
2653 })
2654 .build();
2655
2656 assert_eq!(library.entries().len(), 1);
2657 assert_eq!(library.strings().len(), 1);
2658 }
2659
2660 #[test]
2661 fn test_library_stats() {
2662 let input = r#"
2663 @string{ieee = "IEEE"}
2664 @preamble{"Test preamble"}
2665 % This is a percent comment that now works properly
2666 @comment{This is a formal comment that works}
2667 @article{a1, title = "Article 1"}
2668 @article{a2, title = "Article 2"}
2669 @book{b1, title = "Book 1"}
2670 "#;
2671
2672 let library = Library::parser().parse(input).unwrap();
2673 let stats = library.stats();
2674
2675 assert_eq!(stats.total_entries, 3);
2676 assert_eq!(stats.total_strings, 1);
2677 assert_eq!(stats.total_preambles, 1);
2678 assert_eq!(stats.total_comments, 2); assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2680 assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2681 }
2682
2683 #[test]
2684 fn test_parse_files_parallel() {
2685 use std::fs::write;
2686 use std::path::PathBuf;
2687
2688 let dir = std::env::temp_dir();
2689 let path1 = dir.join("parallel_test1.bib");
2690 let path2 = dir.join("parallel_test2.bib");
2691
2692 write(&path1, "@article{a1,title=\"A\"}").unwrap();
2693 write(&path2, "@article{a2,title=\"B\"}").unwrap();
2694
2695 let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2696
2697 let library = Library::parser().threads(2).parse_files(&paths).unwrap();
2698
2699 assert_eq!(library.entries().len(), 2);
2700
2701 let _ = std::fs::remove_file(path1);
2702 let _ = std::fs::remove_file(path2);
2703 }
2704
2705 #[test]
2706 fn test_builder_pattern_api() {
2707 let input = "@article{test, title = \"Test\"}";
2708
2709 let db1 = Library::parser().parse(input).unwrap();
2711 assert_eq!(db1.entries().len(), 1);
2712
2713 let library2 = Library::parser().threads(1).parse(input).unwrap();
2715 assert_eq!(library2.entries().len(), 1);
2716
2717 #[cfg(feature = "parallel")]
2718 {
2719 use std::fs::write;
2720
2721 let db3 = Library::parser().threads(4).parse(input).unwrap();
2723 assert_eq!(db3.entries().len(), 1);
2724
2725 let path1 = "/tmp/test1.bib";
2727 let path2 = "/tmp/test2.bib";
2728 write(path1, "@article{a1, title=\"A\"}").unwrap();
2729 write(path2, "@article{a2, title=\"B\"}").unwrap();
2730
2731 let db4 = Library::parser()
2732 .threads(2)
2733 .parse_files(&[path1, path2])
2734 .unwrap();
2735 assert_eq!(db4.entries().len(), 2);
2736
2737 let _ = std::fs::remove_file(path1);
2738 let _ = std::fs::remove_file(path2);
2739 }
2740 }
2741}