1use crate::{
4 normalize_doi, Entry, Error, Result, SourceSpan, ValidationError, ValidationLevel, Value,
5};
6use ahash::AHashMap;
7use memchr::memchr;
8use std::borrow::Cow;
9use std::ops::Deref;
10use std::path::Path;
11
12#[cfg(feature = "parallel")]
13use rayon::prelude::*;
14
15const SMALL_EXPANSION_CACHE_LIMIT: usize = 16;
16const SMALL_STRING_LOOKUP_LIMIT: usize = 16;
17const CONCAT_CACHE_LIMIT: usize = 16;
18
19enum ExpansionCache<'a> {
20 Small(Vec<(Cow<'a, str>, Value<'a>)>),
21 Large(AHashMap<Cow<'a, str>, Value<'a>>),
22}
23
24impl<'a> ExpansionCache<'a> {
25 fn with_capacity(capacity: usize) -> Self {
26 if capacity <= SMALL_EXPANSION_CACHE_LIMIT {
27 Self::Small(Vec::with_capacity(capacity))
28 } else {
29 Self::Large(AHashMap::with_capacity(capacity))
30 }
31 }
32
33 fn get_cloned(&mut self, name: &str) -> Option<Value<'a>> {
34 match self {
35 Self::Small(entries) => {
36 let index = entries.iter().position(|(key, _)| key.as_ref() == name)?;
37 if index != 0 {
38 entries.swap(0, index);
39 }
40 Some(entries[0].1.clone())
41 }
42 Self::Large(entries) => entries.get(name).cloned(),
43 }
44 }
45
46 fn insert(&mut self, name: Cow<'a, str>, value: Value<'a>) {
47 match self {
48 Self::Small(entries) => {
49 if entries.len() < SMALL_EXPANSION_CACHE_LIMIT {
50 entries.push((name, value));
51 } else {
52 let mut large = AHashMap::with_capacity(entries.len() + 1);
53 for (key, value) in entries.drain(..) {
54 large.insert(key, value);
55 }
56 large.insert(name, value);
57 *self = Self::Large(large);
58 }
59 }
60 Self::Large(entries) => {
61 entries.insert(name, value);
62 }
63 }
64 }
65}
66
67struct ConcatCache<'a> {
68 entries: Vec<(Box<[Value<'a>]>, Value<'a>)>,
69}
70
71impl<'a> ConcatCache<'a> {
72 const fn new() -> Self {
73 Self {
74 entries: Vec::new(),
75 }
76 }
77
78 fn get_cloned(&mut self, parts: &[Value<'a>]) -> Option<Value<'a>> {
79 let index = self
80 .entries
81 .iter()
82 .position(|(cached_parts, _)| concat_parts_equal(cached_parts, parts))?;
83 if index != 0 {
84 self.entries.swap(0, index);
85 }
86 Some(self.entries[0].1.clone())
87 }
88
89 fn insert(&mut self, parts: Box<[Value<'a>]>, value: Value<'a>) {
90 if self.entries.len() < CONCAT_CACHE_LIMIT {
91 self.entries.push((parts, value));
92 }
93 }
94}
95
96fn concat_parts_equal(left: &[Value<'_>], right: &[Value<'_>]) -> bool {
97 left.len() == right.len()
98 && left
99 .iter()
100 .zip(right)
101 .all(|(left, right)| cache_values_equal(left, right))
102}
103
104fn cache_values_equal(left: &Value<'_>, right: &Value<'_>) -> bool {
105 match (left, right) {
106 (Value::Literal(left), Value::Literal(right))
107 | (Value::Variable(left), Value::Variable(right)) => left.as_ref() == right.as_ref(),
108 (Value::Number(left), Value::Number(right)) => left == right,
109 (Value::Concat(left), Value::Concat(right)) => concat_parts_equal(left, right),
110 _ => false,
111 }
112}
113
114#[inline]
119fn get_month_expansion(name: &str) -> Option<&'static str> {
120 let bytes = name.as_bytes();
121 if bytes.len() != 3 {
122 return None;
123 }
124
125 let key = (u32::from(bytes[0] | 0x20) << 16)
126 | (u32::from(bytes[1] | 0x20) << 8)
127 | u32::from(bytes[2] | 0x20);
128
129 match key {
130 0x6a_61_6e => Some("January"),
131 0x66_65_62 => Some("February"),
132 0x6d_61_72 => Some("March"),
133 0x61_70_72 => Some("April"),
134 0x6d_61_79 => Some("May"),
135 0x6a_75_6e => Some("June"),
136 0x6a_75_6c => Some("July"),
137 0x61_75_67 => Some("August"),
138 0x73_65_70 => Some("September"),
139 0x6f_63_74 => Some("October"),
140 0x6e_6f_76 => Some("November"),
141 0x64_65_63 => Some("December"),
142 _ => None,
143 }
144}
145
146#[inline]
147fn get_string_value<'map, 'a>(
148 strings: &'map [StringDefinition<'a>],
149 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
150 name: &str,
151) -> Option<&'map Value<'a>> {
152 get_string_definition(strings, string_lookup, name).map(|definition| &definition.value)
153}
154
155#[inline]
156fn get_string_definition<'map, 'a>(
157 strings: &'map [StringDefinition<'a>],
158 string_lookup: &'map AHashMap<Cow<'a, str>, usize>,
159 name: &str,
160) -> Option<&'map StringDefinition<'a>> {
161 if strings.len() <= SMALL_STRING_LOOKUP_LIMIT {
162 strings
163 .iter()
164 .rev()
165 .find(|definition| definition.name.as_ref() == name)
166 } else {
167 string_lookup
168 .get(name)
169 .and_then(|&index| strings.get(index))
170 }
171}
172
173#[inline]
174fn user_strings_shadow_month_constants(strings: &[StringDefinition<'_>]) -> bool {
175 strings
176 .iter()
177 .any(|definition| get_month_expansion(definition.name.as_ref()).is_some())
178}
179
180#[inline]
182fn contains_variables(value: &Value) -> bool {
183 match value {
184 Value::Variable(_) => true,
185 Value::Concat(parts) => parts.iter().any(contains_variables),
186 _ => false,
187 }
188}
189
190#[inline]
192fn contains_potential_month_variables(value: &Value) -> bool {
193 match value {
194 Value::Variable(name) => get_month_expansion(name).is_some(),
195 Value::Concat(parts) => parts.iter().any(contains_potential_month_variables),
196 _ => false,
197 }
198}
199
200#[inline]
201const fn is_identifier_char(byte: u8) -> bool {
202 matches!(
203 byte,
204 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
205 )
206}
207
208#[inline]
209fn starts_with_at_keyword(input: &[u8], keyword: &[u8]) -> bool {
210 if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
211 return false;
212 }
213
214 for (offset, &expected) in keyword.iter().enumerate() {
215 if (input[offset + 1] | 0x20) != expected {
216 return false;
217 }
218 }
219
220 if input.len() == keyword.len() + 1 {
221 return true;
222 }
223
224 !is_identifier_char(input[keyword.len() + 1])
225}
226
227#[derive(Debug, Clone, Copy)]
228struct InputScan {
229 may_contain_string_definition: bool,
230 at_count: usize,
231}
232
233fn scan_input(input: &str) -> InputScan {
235 let bytes = input.as_bytes();
236 let mut pos = 0;
237 let mut at_count = 0;
238 let mut may_contain_string_definition = false;
239
240 while pos < bytes.len() {
241 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
242 let at = pos + offset;
243 at_count += 1;
244 if starts_with_at_keyword(&bytes[at..], b"string") {
245 may_contain_string_definition = true;
246 }
247 pos = at + 1;
248 } else {
249 break;
250 }
251 }
252
253 InputScan {
254 may_contain_string_definition,
255 at_count,
256 }
257}
258
259fn input_may_have_late_string_definition(input: &str) -> bool {
264 let bytes = input.as_bytes();
265 let mut pos = 0;
266 let mut saw_regular_entry = false;
267
268 while pos < bytes.len() {
269 if let Some(offset) = memchr(b'@', &bytes[pos..]) {
270 let at = pos + offset;
271 let tail = &bytes[at..];
272
273 if starts_with_at_keyword(tail, b"string") {
274 if saw_regular_entry {
275 return true;
276 }
277 } else if !saw_regular_entry
278 && !starts_with_at_keyword(tail, b"preamble")
279 && !starts_with_at_keyword(tail, b"comment")
280 {
281 saw_regular_entry = true;
283 }
284
285 pos = at + 1;
286 } else {
287 break;
288 }
289 }
290
291 false
292}
293
294fn source_span(input: &str, byte_start: usize, byte_end: usize) -> SourceSpan {
295 let (line, column) = source_position(input, byte_start);
296 SourceSpan::new(byte_start, byte_end, line, column)
297}
298
299fn source_position(input: &str, pos: usize) -> (usize, usize) {
300 let mut line = 1;
301 let mut column = 1;
302
303 for (byte_index, ch) in input.char_indices() {
304 if byte_index >= pos {
305 break;
306 }
307 if ch == '\n' {
308 line += 1;
309 column = 1;
310 } else {
311 column += 1;
312 }
313 }
314
315 (line, column)
316}
317
318fn next_recovery_boundary(input: &str, start: usize) -> usize {
319 let bytes = input.as_bytes();
320 let mut pos = start.saturating_add(1);
321 while pos < bytes.len() {
322 if bytes[pos] == b'@' && line_prefix_is_whitespace(bytes, pos) {
323 return pos;
324 }
325 pos += 1;
326 }
327 input.len()
328}
329
330fn line_prefix_is_whitespace(bytes: &[u8], pos: usize) -> bool {
331 let line_start = bytes[..pos]
332 .iter()
333 .rposition(|byte| matches!(byte, b'\n' | b'\r'))
334 .map_or(0, |index| index + 1);
335
336 bytes[line_start..pos]
337 .iter()
338 .all(|byte| matches!(byte, b' ' | b'\t'))
339}
340
341#[derive(Debug, Default, Clone)]
343pub struct Parser {
344 threads: Option<usize>,
345 tolerant: bool,
346 capture_source: bool,
347}
348
349impl Parser {
350 #[must_use]
352 #[inline]
353 pub fn new() -> Self {
354 Self::default()
355 }
356
357 #[must_use]
359 #[inline]
360 pub fn threads(mut self, threads: impl Into<Option<usize>>) -> Self {
361 self.threads = threads.into();
362 self
363 }
364
365 #[must_use]
367 #[inline]
368 pub const fn tolerant(mut self) -> Self {
369 self.tolerant = true;
370 self
371 }
372
373 #[must_use]
375 #[inline]
376 pub const fn capture_source(mut self) -> Self {
377 self.capture_source = true;
378 self
379 }
380
381 #[inline]
383 pub fn parse<'a>(&self, input: &'a str) -> Result<Library<'a>> {
384 if self.tolerant {
385 Library::parse_tolerant(input, self.capture_source)
386 } else if self.capture_source {
387 Library::parse_with_spans(input)
388 } else {
389 Library::parse_sequential(input)
390 }
391 }
392
393 pub fn parse_files<P: AsRef<Path> + Sync>(&self, paths: &[P]) -> Result<Library<'static>> {
395 #[cfg(feature = "parallel")]
396 {
397 if let Some(threads) = self.threads {
398 if threads <= 1 {
399 return Self::parse_files_sequential(paths);
400 }
401 }
402
403 let pool = self.build_thread_pool()?;
404
405 let libraries: Result<Vec<_>> = pool.install(|| {
406 paths
407 .par_iter()
408 .map(|path| {
409 let content = std::fs::read_to_string(path)?;
410 let library = Library::parse_sequential(&content)?;
411 Ok(library.into_owned())
412 })
413 .collect()
414 });
415
416 let libraries = libraries?;
417 Ok(Library::merge_libraries_parallel(libraries))
418 }
419
420 #[cfg(not(feature = "parallel"))]
421 {
422 Self::parse_files_sequential(paths)
423 }
424 }
425
426 fn parse_files_sequential<P: AsRef<Path>>(paths: &[P]) -> Result<Library<'static>> {
428 let mut result = Library::new();
429 for path in paths {
430 let content = std::fs::read_to_string(path)?;
431 let library = Library::parse_sequential(&content)?;
432 result.merge(library.into_owned());
433 }
434 Ok(result)
435 }
436
437 #[cfg(feature = "parallel")]
438 fn build_thread_pool(&self) -> Result<rayon::ThreadPool> {
439 let mut builder = rayon::ThreadPoolBuilder::new();
440
441 if let Some(threads) = self.threads {
442 builder = builder.num_threads(threads);
443 }
444
445 builder
446 .build()
447 .map_err(|e| Error::WinnowError(e.to_string()))
448 }
449}
450
451#[derive(Debug, Clone, Copy)]
453pub enum Block<'lib, 'a> {
454 Entry(&'lib Entry<'a>, Option<SourceSpan>),
456 String(&'lib StringDefinition<'a>),
458 Preamble(&'lib Preamble<'a>),
460 Comment(&'lib Comment<'a>),
462 Failed(&'lib FailedBlock<'a>),
464}
465
466#[derive(Debug, Clone, Copy, PartialEq, Eq)]
467enum BlockKind {
468 Entry(usize),
469 String(usize),
470 Preamble(usize),
471 Comment(usize),
472 Failed(usize),
473}
474
475#[derive(Debug)]
476enum RawBuildItem<'a> {
477 Parsed(crate::parser::ParsedItem<'a>, SourceSpan),
478 Failed(FailedBlock<'a>),
479}
480
481#[derive(Debug, Clone, PartialEq)]
483pub struct StringDefinition<'a> {
484 pub name: Cow<'a, str>,
486 pub value: Value<'a>,
488 pub source: Option<SourceSpan>,
490}
491
492impl<'a> StringDefinition<'a> {
493 #[must_use]
495 pub const fn new(name: &'a str, value: Value<'a>) -> Self {
496 Self {
497 name: Cow::Borrowed(name),
498 value,
499 source: None,
500 }
501 }
502
503 #[must_use]
505 pub fn name(&self) -> &str {
506 &self.name
507 }
508
509 #[must_use]
511 pub const fn value(&self) -> &Value<'a> {
512 &self.value
513 }
514
515 #[must_use]
517 pub fn into_owned(self) -> StringDefinition<'static> {
518 StringDefinition {
519 name: Cow::Owned(self.name.into_owned()),
520 value: self.value.into_owned(),
521 source: self.source,
522 }
523 }
524}
525
526#[derive(Debug, Clone, PartialEq)]
528pub struct Preamble<'a> {
529 pub value: Value<'a>,
531 pub source: Option<SourceSpan>,
533}
534
535impl<'a> Preamble<'a> {
536 #[must_use]
538 pub const fn new(value: Value<'a>) -> Self {
539 Self {
540 value,
541 source: None,
542 }
543 }
544
545 #[must_use]
547 pub const fn value(&self) -> &Value<'a> {
548 &self.value
549 }
550
551 #[must_use]
553 pub fn into_owned(self) -> Preamble<'static> {
554 Preamble {
555 value: self.value.into_owned(),
556 source: self.source,
557 }
558 }
559}
560
561impl<'a> Deref for Preamble<'a> {
562 type Target = Value<'a>;
563
564 fn deref(&self) -> &Self::Target {
565 &self.value
566 }
567}
568
569#[derive(Debug, Clone, PartialEq, Eq)]
571pub struct Comment<'a> {
572 pub text: Cow<'a, str>,
574 pub source: Option<SourceSpan>,
576}
577
578impl<'a> Comment<'a> {
579 #[must_use]
581 pub const fn new(text: &'a str) -> Self {
582 Self {
583 text: Cow::Borrowed(text),
584 source: None,
585 }
586 }
587
588 #[must_use]
590 pub fn text(&self) -> &str {
591 &self.text
592 }
593
594 #[must_use]
596 pub fn into_owned(self) -> Comment<'static> {
597 Comment {
598 text: Cow::Owned(self.text.into_owned()),
599 source: self.source,
600 }
601 }
602}
603
604impl Deref for Comment<'_> {
605 type Target = str;
606
607 fn deref(&self) -> &Self::Target {
608 &self.text
609 }
610}
611
612#[derive(Debug, Clone, PartialEq, Eq)]
614pub struct FailedBlock<'a> {
615 pub raw: Cow<'a, str>,
617 pub error: String,
619 pub source: Option<SourceSpan>,
621}
622
623impl FailedBlock<'_> {
624 #[must_use]
626 pub fn into_owned(self) -> FailedBlock<'static> {
627 FailedBlock {
628 raw: Cow::Owned(self.raw.into_owned()),
629 error: self.error,
630 source: self.source,
631 }
632 }
633}
634
635#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
637pub enum MonthStyle {
638 #[default]
640 Long,
641 Abbrev,
643 Number,
645}
646
647#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
649pub struct SortOptions {
650 pub entries_by_key: bool,
652 pub fields_by_name: bool,
654}
655
656#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
658pub enum FieldNameCase {
659 #[default]
661 Preserve,
662 Lowercase,
664}
665
666#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
668pub struct FieldNormalizeOptions {
669 pub name_case: FieldNameCase,
671 pub biblatex_aliases: bool,
673}
674
675#[derive(Debug, Clone, Default)]
677pub struct Library<'a> {
678 entries: Vec<Entry<'a>>,
680 entry_sources: Option<Vec<Option<SourceSpan>>>,
682 strings: Vec<StringDefinition<'a>>,
684 string_lookup: AHashMap<Cow<'a, str>, usize>,
686 preambles: Vec<Preamble<'a>>,
688 comments: Vec<Comment<'a>>,
690 failed_blocks: Vec<FailedBlock<'a>>,
692 block_order: Vec<BlockKind>,
694}
695
696impl<'a> Library<'a> {
697 fn push_entry_with_source(&mut self, entry: Entry<'a>, source: Option<SourceSpan>) {
698 let index = self.entries.len();
699 self.entries.push(entry);
700 if let Some(sources) = &mut self.entry_sources {
701 sources.push(source);
702 } else if source.is_some() {
703 let mut sources = vec![None; index];
704 sources.push(source);
705 self.entry_sources = Some(sources);
706 }
707 self.block_order.push(BlockKind::Entry(index));
708 }
709
710 fn register_string_definition(
711 &mut self,
712 name: Cow<'a, str>,
713 value: Value<'a>,
714 source: Option<SourceSpan>,
715 ) -> usize {
716 let index = self.strings.len();
717 self.string_lookup.insert(name.clone(), index);
718 self.strings.push(StringDefinition {
719 name,
720 value,
721 source,
722 });
723 index
724 }
725
726 fn push_string_with_source(
727 &mut self,
728 name: Cow<'a, str>,
729 value: Value<'a>,
730 source: Option<SourceSpan>,
731 ) {
732 let index = self.register_string_definition(name, value, source);
733 self.block_order.push(BlockKind::String(index));
734 }
735
736 fn push_preamble_with_source(&mut self, value: Value<'a>, source: Option<SourceSpan>) -> usize {
737 let index = self.preambles.len();
738 self.preambles.push(Preamble { value, source });
739 self.block_order.push(BlockKind::Preamble(index));
740 index
741 }
742
743 fn push_comment_with_source(&mut self, text: Cow<'a, str>, source: Option<SourceSpan>) {
744 let index = self.comments.len();
745 self.comments.push(Comment { text, source });
746 self.block_order.push(BlockKind::Comment(index));
747 }
748
749 fn push_failed_block(&mut self, failed: FailedBlock<'a>) {
750 let index = self.failed_blocks.len();
751 self.failed_blocks.push(failed);
752 self.block_order.push(BlockKind::Failed(index));
753 }
754
755 #[inline]
756 fn expand_value_for_parse(
757 &self,
758 value: &mut Value<'a>,
759 has_user_strings: bool,
760 month_constants_shadowed: bool,
761 expanded_variables: &mut ExpansionCache<'a>,
762 expansion_stack: &mut Vec<Cow<'a, str>>,
763 concat_cache: &mut ConcatCache<'a>,
764 ) -> Result<()> {
765 match value {
766 Value::Literal(_) | Value::Number(_) => Ok(()),
767 Value::Variable(name) => {
768 if !has_user_strings || !month_constants_shadowed {
769 if let Some(month_value) = get_month_expansion(name.as_ref()) {
770 *value = Value::Literal(Cow::Borrowed(month_value));
771 return Ok(());
772 }
773 }
774
775 if has_user_strings {
776 if let Some(expanded) = expanded_variables.get_cloned(name.as_ref()) {
777 *value = expanded;
778 return Ok(());
779 }
780
781 let old_value = std::mem::take(value);
782 *value = self.smart_expand_value_cached(
783 old_value,
784 expanded_variables,
785 expansion_stack,
786 concat_cache,
787 )?;
788 }
789
790 Ok(())
791 }
792 Value::Concat(parts) => {
793 if has_user_strings {
794 if let Some(expanded) = concat_cache.get_cloned(parts) {
795 *value = expanded;
796 return Ok(());
797 }
798 }
799
800 let needs_expansion = if has_user_strings {
801 parts.iter().any(contains_variables)
802 } else {
803 parts.iter().any(contains_potential_month_variables)
804 };
805
806 if needs_expansion {
807 if !has_user_strings {
808 if let Some(expanded) = concat_cache.get_cloned(parts) {
809 *value = expanded;
810 return Ok(());
811 }
812 }
813
814 let old_value = std::mem::take(value);
815 *value = self.smart_expand_value_cached(
816 old_value,
817 expanded_variables,
818 expansion_stack,
819 concat_cache,
820 )?;
821 }
822
823 Ok(())
824 }
825 }
826 }
827
828 #[must_use]
830 #[inline]
831 pub fn new() -> Self {
832 Self::default()
833 }
834
835 #[must_use]
858 #[inline]
859 pub fn parser() -> Parser {
860 Parser::new()
861 }
862
863 pub fn parse(input: &'a str) -> Result<Self> {
865 Self::parser().parse(input)
866 }
867
868 pub fn parse_file(path: impl AsRef<Path>) -> Result<Library<'static>> {
870 let content = std::fs::read_to_string(path)?;
871 Library::parser().parse(&content).map(Library::into_owned)
872 }
873
874 pub fn to_bibtex(&self) -> Result<String> {
876 crate::writer::to_string(self)
877 }
878
879 pub fn write_file(&self, path: impl AsRef<Path>) -> Result<()> {
881 crate::writer::to_file(self, path)
882 }
883
884 #[allow(clippy::too_many_lines)]
886 pub(crate) fn parse_sequential(input: &'a str) -> Result<Self> {
887 let mut db = Self::new();
888 let input_scan = scan_input(input);
889
890 if !input_scan.may_contain_string_definition {
893 db.entries.reserve(input_scan.at_count);
894 db.block_order.reserve(input_scan.at_count);
895 let has_user_strings = false;
896 let month_constants_shadowed = false;
897 let mut expanded_variables = ExpansionCache::with_capacity(0);
898 let mut expansion_stack = Vec::new();
899 let mut concat_cache = ConcatCache::new();
900
901 crate::parser::parse_bibtex_stream(input, |item| {
902 match item {
903 crate::parser::ParsedItem::Entry(mut entry) => {
904 for field in &mut entry.fields {
905 db.expand_value_for_parse(
906 &mut field.value,
907 has_user_strings,
908 month_constants_shadowed,
909 &mut expanded_variables,
910 &mut expansion_stack,
911 &mut concat_cache,
912 )?;
913 }
914 db.push_entry_with_source(entry, None);
915 }
916 crate::parser::ParsedItem::Preamble(value) => {
917 let mut expanded = value;
918 db.expand_value_for_parse(
919 &mut expanded,
920 has_user_strings,
921 month_constants_shadowed,
922 &mut expanded_variables,
923 &mut expansion_stack,
924 &mut concat_cache,
925 )?;
926 db.push_preamble_with_source(expanded, None);
927 }
928 crate::parser::ParsedItem::Comment(text) => {
929 db.push_comment_with_source(Cow::Borrowed(text), None);
930 }
931 crate::parser::ParsedItem::String(name, value) => {
932 db.push_string_with_source(Cow::Borrowed(name), value, None);
934 }
935 }
936 Ok(())
937 })?;
938
939 return Ok(db);
940 }
941
942 db.block_order.reserve(input_scan.at_count);
943
944 if !input_may_have_late_string_definition(input) {
948 let mut pending_preambles = Vec::new();
949 let mut expanded_variables = ExpansionCache::with_capacity(0);
950 let mut expansion_stack = Vec::new();
951 let mut concat_cache = ConcatCache::new();
952 let mut month_constants_shadowed = None;
953
954 crate::parser::parse_bibtex_stream(input, |item| {
955 match item {
956 crate::parser::ParsedItem::Entry(mut entry) => {
957 let has_user_strings = !db.strings.is_empty();
958 let month_constants_shadowed = *month_constants_shadowed
959 .get_or_insert_with(|| {
960 has_user_strings && user_strings_shadow_month_constants(&db.strings)
961 });
962 for field in &mut entry.fields {
963 db.expand_value_for_parse(
964 &mut field.value,
965 has_user_strings,
966 month_constants_shadowed,
967 &mut expanded_variables,
968 &mut expansion_stack,
969 &mut concat_cache,
970 )?;
971 }
972 db.push_entry_with_source(entry, None);
973 }
974 crate::parser::ParsedItem::Preamble(value) => {
975 let index = db.push_preamble_with_source(value, None);
976 pending_preambles.push(index);
977 }
978 crate::parser::ParsedItem::String(name, value) => {
979 db.push_string_with_source(Cow::Borrowed(name), value, None);
980 }
981 crate::parser::ParsedItem::Comment(text) => {
982 db.push_comment_with_source(Cow::Borrowed(text), None);
983 }
984 }
985 Ok(())
986 })?;
987
988 let has_user_strings = !db.strings.is_empty();
989 let month_constants_shadowed =
990 has_user_strings && user_strings_shadow_month_constants(&db.strings);
991 for index in pending_preambles {
992 let mut expanded = std::mem::take(&mut db.preambles[index].value);
993 db.expand_value_for_parse(
994 &mut expanded,
995 has_user_strings,
996 month_constants_shadowed,
997 &mut expanded_variables,
998 &mut expansion_stack,
999 &mut concat_cache,
1000 )?;
1001 db.preambles[index].value = expanded;
1002 }
1003
1004 return Ok(db);
1005 }
1006
1007 let mut entry_indices = Vec::new();
1008 let mut preamble_indices = Vec::new();
1009
1010 crate::parser::parse_bibtex_stream(input, |item| {
1011 match item {
1012 crate::parser::ParsedItem::Entry(entry) => {
1013 let index = db.entries.len();
1014 db.push_entry_with_source(entry, None);
1015 entry_indices.push(index);
1016 }
1017 crate::parser::ParsedItem::Preamble(value) => {
1018 let index = db.push_preamble_with_source(value, None);
1019 preamble_indices.push(index);
1020 }
1021 crate::parser::ParsedItem::String(name, value) => {
1022 db.push_string_with_source(Cow::Borrowed(name), value, None);
1023 }
1024 crate::parser::ParsedItem::Comment(text) => {
1025 db.push_comment_with_source(Cow::Borrowed(text), None);
1026 }
1027 }
1028 Ok(())
1029 })?;
1030
1031 let has_user_strings = !db.strings.is_empty();
1033 let month_constants_shadowed =
1034 has_user_strings && user_strings_shadow_month_constants(&db.strings);
1035 let mut expanded_variables = ExpansionCache::with_capacity(db.strings.len());
1036 let mut expansion_stack = Vec::new();
1037 let mut concat_cache = ConcatCache::new();
1038
1039 for entry_index in entry_indices {
1040 let field_count = db.entries[entry_index].fields.len();
1041 for field_index in 0..field_count {
1042 let mut value =
1043 std::mem::take(&mut db.entries[entry_index].fields[field_index].value);
1044 db.expand_value_for_parse(
1045 &mut value,
1046 has_user_strings,
1047 month_constants_shadowed,
1048 &mut expanded_variables,
1049 &mut expansion_stack,
1050 &mut concat_cache,
1051 )?;
1052 db.entries[entry_index].fields[field_index].value = value;
1053 }
1054 }
1055
1056 for preamble_index in preamble_indices {
1057 let mut expanded = std::mem::take(&mut db.preambles[preamble_index].value);
1058 db.expand_value_for_parse(
1059 &mut expanded,
1060 has_user_strings,
1061 month_constants_shadowed,
1062 &mut expanded_variables,
1063 &mut expansion_stack,
1064 &mut concat_cache,
1065 )?;
1066 db.preambles[preamble_index].value = expanded;
1067 }
1068
1069 Ok(db)
1070 }
1071
1072 fn parse_with_spans(input: &'a str) -> Result<Self> {
1073 let mut raw_items = Vec::new();
1074 crate::parser::parse_bibtex_stream_with_spans(input, |item, span, _raw| {
1075 raw_items.push(RawBuildItem::Parsed(item, span));
1076 Ok(())
1077 })?;
1078 Self::from_raw_items(raw_items)
1079 }
1080
1081 fn parse_tolerant(input: &'a str, capture_source: bool) -> Result<Self> {
1082 let mut raw_items = Vec::new();
1083 let mut remaining = input;
1084
1085 loop {
1086 crate::parser::lexer::skip_whitespace(&mut remaining);
1087 if remaining.is_empty() {
1088 break;
1089 }
1090
1091 let start = input.len() - remaining.len();
1092 match crate::parser::parse_item(&mut remaining) {
1093 Ok(item) => {
1094 let end = input.len() - remaining.len();
1095 raw_items.push(RawBuildItem::Parsed(item, source_span(input, start, end)));
1096 }
1097 Err(err) => {
1098 let end = next_recovery_boundary(input, start);
1099 let source = capture_source.then(|| source_span(input, start, end));
1100 raw_items.push(RawBuildItem::Failed(FailedBlock {
1101 raw: Cow::Borrowed(&input[start..end]),
1102 error: format!("Failed to parse entry: {err}"),
1103 source,
1104 }));
1105 remaining = &input[end..];
1106 }
1107 }
1108 }
1109
1110 Self::from_raw_items(raw_items)
1111 }
1112
1113 fn from_raw_items(raw_items: Vec<RawBuildItem<'a>>) -> Result<Self> {
1114 let mut library = Self::new();
1115
1116 for raw_item in &raw_items {
1117 if let RawBuildItem::Parsed(crate::parser::ParsedItem::String(name, value), span) =
1118 raw_item
1119 {
1120 library.register_string_definition(Cow::Borrowed(name), value.clone(), Some(*span));
1121 }
1122 }
1123
1124 let has_user_strings = !library.strings.is_empty();
1125 let month_constants_shadowed =
1126 has_user_strings && user_strings_shadow_month_constants(&library.strings);
1127 let mut expanded_variables = ExpansionCache::with_capacity(library.strings.len());
1128 let mut expansion_stack = Vec::new();
1129 let mut concat_cache = ConcatCache::new();
1130 let mut string_index = 0;
1131
1132 for raw_item in raw_items {
1133 match raw_item {
1134 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(mut entry), span) => {
1135 for field in &mut entry.fields {
1136 library.expand_value_for_parse(
1137 &mut field.value,
1138 has_user_strings,
1139 month_constants_shadowed,
1140 &mut expanded_variables,
1141 &mut expansion_stack,
1142 &mut concat_cache,
1143 )?;
1144 }
1145 library.push_entry_with_source(entry, Some(span));
1146 }
1147 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _) => {
1148 library.block_order.push(BlockKind::String(string_index));
1149 string_index += 1;
1150 }
1151 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(mut value), span) => {
1152 library.expand_value_for_parse(
1153 &mut value,
1154 has_user_strings,
1155 month_constants_shadowed,
1156 &mut expanded_variables,
1157 &mut expansion_stack,
1158 &mut concat_cache,
1159 )?;
1160 library.push_preamble_with_source(value, Some(span));
1161 }
1162 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(text), span) => {
1163 library.push_comment_with_source(Cow::Borrowed(text), Some(span));
1164 }
1165 RawBuildItem::Failed(failed) => library.push_failed_block(failed),
1166 }
1167 }
1168
1169 Ok(library)
1170 }
1171
1172 pub fn merge(&mut self, other: Self) {
1174 let entry_offset = self.entries.len();
1175 let string_offset = self.strings.len();
1176 let preamble_offset = self.preambles.len();
1177 let comment_offset = self.comments.len();
1178 let failed_offset = self.failed_blocks.len();
1179 let other_entry_count = other.entries.len();
1180 let other_entry_sources = other.entry_sources;
1181
1182 self.entries.extend(other.entries);
1183 match (&mut self.entry_sources, other_entry_sources) {
1184 (Some(sources), Some(other_sources)) => sources.extend(other_sources),
1185 (Some(sources), None) => {
1186 sources.extend(std::iter::repeat(None).take(other_entry_count));
1187 }
1188 (None, Some(other_sources)) => {
1189 let mut sources = vec![None; entry_offset];
1190 sources.extend(other_sources);
1191 self.entry_sources = Some(sources);
1192 }
1193 (None, None) => {}
1194 }
1195 self.preambles.extend(other.preambles);
1196 self.comments.extend(other.comments);
1197 self.failed_blocks.extend(other.failed_blocks);
1198
1199 for definition in other.strings {
1200 let index = self.strings.len();
1201 self.string_lookup.insert(definition.name.clone(), index);
1202 self.strings.push(definition);
1203 }
1204
1205 self.block_order
1206 .extend(other.block_order.into_iter().map(|kind| match kind {
1207 BlockKind::Entry(index) => BlockKind::Entry(entry_offset + index),
1208 BlockKind::String(index) => BlockKind::String(string_offset + index),
1209 BlockKind::Preamble(index) => BlockKind::Preamble(preamble_offset + index),
1210 BlockKind::Comment(index) => BlockKind::Comment(comment_offset + index),
1211 BlockKind::Failed(index) => BlockKind::Failed(failed_offset + index),
1212 }));
1213 }
1214
1215 #[cfg(feature = "parallel")]
1216 fn merge_libraries_parallel(libraries: Vec<Library<'static>>) -> Library<'static> {
1217 let mut result = Library::new();
1218 for library in libraries {
1219 result.merge(library);
1220 }
1221 result
1222 }
1223
1224 #[must_use]
1226 pub fn entries(&self) -> &[Entry<'a>] {
1227 &self.entries
1228 }
1229
1230 #[must_use]
1232 pub fn entries_mut(&mut self) -> &mut Vec<Entry<'a>> {
1233 &mut self.entries
1234 }
1235
1236 #[must_use]
1238 pub fn strings(&self) -> &[StringDefinition<'a>] {
1239 &self.strings
1240 }
1241
1242 #[must_use]
1244 pub fn string(&self, name: &str) -> Option<&StringDefinition<'a>> {
1245 get_string_definition(&self.strings, &self.string_lookup, name)
1246 }
1247
1248 #[must_use]
1250 pub fn string_value(&self, name: &str) -> Option<&Value<'a>> {
1251 self.string(name).map(|definition| &definition.value)
1252 }
1253
1254 #[must_use]
1256 pub fn preambles(&self) -> &[Preamble<'a>] {
1257 &self.preambles
1258 }
1259
1260 #[must_use]
1262 pub fn preambles_mut(&mut self) -> &mut Vec<Preamble<'a>> {
1263 &mut self.preambles
1264 }
1265
1266 #[must_use]
1268 pub fn comments(&self) -> &[Comment<'a>] {
1269 &self.comments
1270 }
1271
1272 #[must_use]
1274 pub fn comments_mut(&mut self) -> &mut Vec<Comment<'a>> {
1275 &mut self.comments
1276 }
1277
1278 #[must_use]
1280 pub fn failed_blocks(&self) -> &[FailedBlock<'a>] {
1281 &self.failed_blocks
1282 }
1283
1284 #[must_use]
1286 pub fn blocks(&self) -> Vec<Block<'_, 'a>> {
1287 self.block_order
1288 .iter()
1289 .map(|kind| match *kind {
1290 BlockKind::Entry(index) => Block::Entry(
1291 &self.entries[index],
1292 self.entry_sources
1293 .as_ref()
1294 .and_then(|sources| sources.get(index).copied().flatten()),
1295 ),
1296 BlockKind::String(index) => Block::String(&self.strings[index]),
1297 BlockKind::Preamble(index) => Block::Preamble(&self.preambles[index]),
1298 BlockKind::Comment(index) => Block::Comment(&self.comments[index]),
1299 BlockKind::Failed(index) => Block::Failed(&self.failed_blocks[index]),
1300 })
1301 .collect()
1302 }
1303
1304 #[must_use]
1306 pub fn find_by_key(&self, key: &str) -> Option<&Entry<'a>> {
1307 self.entries.iter().find(|e| e.key == key)
1308 }
1309
1310 #[must_use]
1312 pub fn find_by_key_ignore_case(&self, key: &str) -> Option<&Entry<'a>> {
1313 self.entries
1314 .iter()
1315 .find(|entry| entry.key.eq_ignore_ascii_case(key))
1316 }
1317
1318 #[must_use]
1320 pub fn contains_key(&self, key: &str) -> bool {
1321 self.find_by_key(key).is_some()
1322 }
1323
1324 #[must_use]
1326 pub fn find_by_type(&self, ty: &str) -> Vec<&Entry<'a>> {
1327 self.entries
1328 .iter()
1329 .filter(|e| e.ty.canonical_name().eq_ignore_ascii_case(ty))
1330 .collect()
1331 }
1332
1333 #[must_use]
1335 pub fn find_by_field(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1336 self.entries
1337 .iter()
1338 .filter(|e| {
1339 e.get_as_string(field)
1340 .as_ref()
1341 .is_some_and(|v| v.contains(value))
1342 })
1343 .collect()
1344 }
1345
1346 #[must_use]
1348 pub fn find_by_field_ignore_case(&self, field: &str, value: &str) -> Vec<&Entry<'a>> {
1349 self.entries
1350 .iter()
1351 .filter(|entry| {
1352 entry
1353 .get_as_string_ignore_case(field)
1354 .as_ref()
1355 .is_some_and(|field_value| contains_case_insensitive(field_value, value))
1356 })
1357 .collect()
1358 }
1359
1360 #[must_use]
1362 pub fn find_by_doi(&self, doi: &str) -> Vec<&Entry<'a>> {
1363 let Some(needle) = normalize_doi(doi) else {
1364 return Vec::new();
1365 };
1366
1367 self.entries
1368 .iter()
1369 .filter(|entry| entry.doi().as_ref().is_some_and(|value| value == &needle))
1370 .collect()
1371 }
1372
1373 fn smart_expand_value_cached(
1375 &self,
1376 value: Value<'a>,
1377 expanded_variables: &mut ExpansionCache<'a>,
1378 expansion_stack: &mut Vec<Cow<'a, str>>,
1379 concat_cache: &mut ConcatCache<'a>,
1380 ) -> Result<Value<'a>> {
1381 match value {
1382 Value::Literal(_) | Value::Number(_) => Ok(value),
1384
1385 Value::Variable(name) => {
1387 let name_text = name.as_ref();
1388 if let Some(expanded) = expanded_variables.get_cloned(name_text) {
1389 return Ok(expanded);
1390 }
1391
1392 if expansion_stack.iter().any(|v| v.as_ref() == name_text) {
1393 let mut cycle = expansion_stack
1394 .iter()
1395 .map(std::convert::AsRef::as_ref)
1396 .collect::<Vec<_>>()
1397 .join(" -> ");
1398 if !cycle.is_empty() {
1399 cycle.push_str(" -> ");
1400 }
1401 cycle.push_str(name_text);
1402 return Err(Error::CircularReference(cycle));
1403 }
1404
1405 if let Some(user_value) =
1406 get_string_value(&self.strings, &self.string_lookup, name_text)
1407 {
1408 expansion_stack.push(name.clone());
1410 let expanded = self.smart_expand_value_cached(
1411 user_value.clone(),
1412 expanded_variables,
1413 expansion_stack,
1414 concat_cache,
1415 );
1416 expansion_stack.pop();
1417
1418 let expanded = expanded?;
1419 expanded_variables.insert(name, expanded.clone());
1420 Ok(expanded)
1421 } else {
1422 get_month_expansion(name_text).map_or_else(
1424 || {
1425 Err(Error::UndefinedVariable(name_text.to_string()))
1427 },
1428 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1429 )
1430 }
1431 }
1432
1433 Value::Concat(parts) => {
1435 if let Some(expanded) = concat_cache.get_cloned(&parts) {
1436 return Ok(expanded);
1437 }
1438
1439 let cache_key = parts.clone();
1440 let expanded = self.expand_concatenation_cached(
1441 parts.into_vec(),
1442 expanded_variables,
1443 expansion_stack,
1444 concat_cache,
1445 )?;
1446 concat_cache.insert(cache_key, expanded.clone());
1447 Ok(expanded)
1448 }
1449 }
1450 }
1451
1452 pub fn expand_value_ref(&self, value: &Value<'a>) -> Result<Value<'a>> {
1454 match value {
1455 Value::Literal(_) | Value::Number(_) => Ok(value.clone()),
1457
1458 Value::Variable(name) => {
1460 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1462 || {
1463 get_month_expansion(name.as_ref()).map_or_else(
1465 || {
1466 Err(Error::UndefinedVariable(name.as_ref().to_string()))
1468 },
1469 |month_value| Ok(Value::Literal(Cow::Borrowed(month_value))),
1470 )
1471 },
1472 |user_value| self.expand_value_ref(user_value),
1473 )
1474 }
1475
1476 Value::Concat(parts) => {
1478 let cloned_parts = parts.to_vec();
1479 self.expand_concatenation(cloned_parts)
1480 }
1481 }
1482 }
1483
1484 fn expand_concatenation(&self, parts: Vec<Value<'a>>) -> Result<Value<'a>> {
1486 let mut expanded_variables = ExpansionCache::with_capacity(0);
1487 let mut expansion_stack = Vec::new();
1488 let mut concat_cache = ConcatCache::new();
1489 self.expand_concatenation_cached(
1490 parts,
1491 &mut expanded_variables,
1492 &mut expansion_stack,
1493 &mut concat_cache,
1494 )
1495 }
1496
1497 fn expand_concatenation_cached(
1499 &self,
1500 parts: Vec<Value<'a>>,
1501 expanded_variables: &mut ExpansionCache<'a>,
1502 expansion_stack: &mut Vec<Cow<'a, str>>,
1503 concat_cache: &mut ConcatCache<'a>,
1504 ) -> Result<Value<'a>> {
1505 let mut expanded_parts = Vec::with_capacity(parts.len());
1506
1507 for part in parts {
1509 let expanded = self.smart_expand_value_cached(
1510 part,
1511 expanded_variables,
1512 expansion_stack,
1513 concat_cache,
1514 )?;
1515 expanded_parts.push(expanded);
1516 }
1517
1518 if expanded_parts
1520 .iter()
1521 .all(|p| matches!(p, Value::Literal(_) | Value::Number(_)))
1522 {
1523 let combined = concatenate_simple_values(&expanded_parts);
1524 Ok(Value::Literal(Cow::Owned(combined)))
1525 } else {
1526 Ok(Value::Concat(expanded_parts.into_boxed_slice()))
1527 }
1528 }
1529
1530 pub fn get_expanded_string(&self, value: &Value<'a>) -> Result<String> {
1532 match value {
1533 Value::Literal(s) => Ok(s.to_string()),
1534 Value::Number(n) => Ok(n.to_string()),
1535 Value::Variable(name) => {
1536 get_string_value(&self.strings, &self.string_lookup, name.as_ref()).map_or_else(
1538 || {
1539 get_month_expansion(name.as_ref()).map_or_else(
1541 || {
1542 Err(Error::UndefinedVariable(name.as_ref().to_string()))
1544 },
1545 |month_value| Ok(month_value.to_string()),
1546 )
1547 },
1548 |user_value| self.get_expanded_string(user_value),
1549 )
1550 }
1551 Value::Concat(parts) => {
1552 let mut result = String::new();
1553 for part in parts.iter() {
1554 result.push_str(&self.get_expanded_string(part)?);
1555 }
1556 Ok(result)
1557 }
1558 }
1559 }
1560
1561 #[must_use]
1563 pub fn into_owned(self) -> Library<'static> {
1564 let strings = self
1565 .strings
1566 .into_iter()
1567 .map(StringDefinition::into_owned)
1568 .collect::<Vec<_>>();
1569 let mut string_lookup = AHashMap::with_capacity(strings.len());
1570 for (index, definition) in strings.iter().enumerate() {
1571 string_lookup.insert(Cow::Owned(definition.name.to_string()), index);
1572 }
1573
1574 Library {
1575 entries: self.entries.into_iter().map(Entry::into_owned).collect(),
1576 entry_sources: self.entry_sources,
1577 strings,
1578 string_lookup,
1579 preambles: self
1580 .preambles
1581 .into_iter()
1582 .map(Preamble::into_owned)
1583 .collect(),
1584 comments: self.comments.into_iter().map(Comment::into_owned).collect(),
1585 failed_blocks: self
1586 .failed_blocks
1587 .into_iter()
1588 .map(FailedBlock::into_owned)
1589 .collect(),
1590 block_order: self.block_order,
1591 }
1592 }
1593
1594 pub fn add_string(&mut self, name: &'a str, value: Value<'a>) {
1596 self.push_string_with_source(Cow::Borrowed(name), value, None);
1597 }
1598
1599 pub fn add_entry(&mut self, entry: Entry<'a>) {
1601 self.push_entry_with_source(entry, None);
1602 }
1603
1604 pub fn add_preamble(&mut self, value: Value<'a>) {
1606 self.push_preamble_with_source(value, None);
1607 }
1608
1609 pub fn add_comment(&mut self, comment: &'a str) {
1611 self.push_comment_with_source(Cow::Borrowed(comment), None);
1612 }
1613
1614 pub fn resolve_strings(&mut self) -> Result<()> {
1616 let has_user_strings = !self.strings.is_empty();
1617 let month_constants_shadowed =
1618 has_user_strings && user_strings_shadow_month_constants(&self.strings);
1619 let mut expanded_variables = ExpansionCache::with_capacity(self.strings.len());
1620 let mut expansion_stack = Vec::new();
1621 let mut concat_cache = ConcatCache::new();
1622
1623 for entry_index in 0..self.entries.len() {
1624 let field_count = self.entries[entry_index].fields.len();
1625 for field_index in 0..field_count {
1626 let mut value =
1627 std::mem::take(&mut self.entries[entry_index].fields[field_index].value);
1628 self.expand_value_for_parse(
1629 &mut value,
1630 has_user_strings,
1631 month_constants_shadowed,
1632 &mut expanded_variables,
1633 &mut expansion_stack,
1634 &mut concat_cache,
1635 )?;
1636 self.entries[entry_index].fields[field_index].value = value;
1637 }
1638 }
1639
1640 for preamble_index in 0..self.preambles.len() {
1641 let mut value = std::mem::take(&mut self.preambles[preamble_index].value);
1642 self.expand_value_for_parse(
1643 &mut value,
1644 has_user_strings,
1645 month_constants_shadowed,
1646 &mut expanded_variables,
1647 &mut expansion_stack,
1648 &mut concat_cache,
1649 )?;
1650 self.preambles[preamble_index].value = value;
1651 }
1652
1653 Ok(())
1654 }
1655
1656 pub fn normalize_doi_fields(&mut self) {
1658 for entry in &mut self.entries {
1659 for field in &mut entry.fields {
1660 if field.name.eq_ignore_ascii_case("doi") {
1661 if let Some(normalized) = normalize_doi(&value_to_plain_string(&field.value)) {
1662 field.value = Value::Literal(Cow::Owned(normalized));
1663 }
1664 }
1665 }
1666 }
1667 }
1668
1669 pub fn normalize_months(&mut self, style: MonthStyle) {
1671 for entry in &mut self.entries {
1672 for field in &mut entry.fields {
1673 if field.name.eq_ignore_ascii_case("month") {
1674 if let Some(month) =
1675 normalize_month_value(&value_to_plain_string(&field.value), style)
1676 {
1677 field.value = month;
1678 }
1679 }
1680 }
1681 }
1682 }
1683
1684 pub fn normalize_fields(&mut self, options: FieldNormalizeOptions) {
1686 for entry in &mut self.entries {
1687 for field in &mut entry.fields {
1688 let mut name = if options.biblatex_aliases {
1689 canonical_field_alias(&field.name)
1690 .unwrap_or_else(|| field.name.as_ref())
1691 .to_string()
1692 } else {
1693 field.name.to_string()
1694 };
1695
1696 if options.name_case == FieldNameCase::Lowercase {
1697 name.make_ascii_lowercase();
1698 }
1699
1700 if name != field.name {
1701 field.name = Cow::Owned(name);
1702 }
1703 }
1704 }
1705 }
1706
1707 pub fn sort(&mut self, options: SortOptions) {
1709 if options.fields_by_name {
1710 for entry in &mut self.entries {
1711 entry
1712 .fields
1713 .sort_by(|left, right| left.name.cmp(&right.name));
1714 }
1715 }
1716
1717 if options.entries_by_key {
1718 if let Some(sources) = self.entry_sources.take() {
1719 let mut entries = self.entries.drain(..).zip(sources).collect::<Vec<_>>();
1720 entries.sort_by(|(left, _), (right, _)| left.key.cmp(&right.key));
1721 let (sorted_entries, sorted_sources): (Vec<_>, Vec<_>) =
1722 entries.into_iter().unzip();
1723 self.entries = sorted_entries;
1724 self.entry_sources = Some(sorted_sources);
1725 } else {
1726 self.entries.sort_by(|left, right| left.key.cmp(&right.key));
1727 }
1728 self.rebuild_grouped_block_order();
1729 }
1730 }
1731
1732 fn rebuild_grouped_block_order(&mut self) {
1733 self.block_order.clear();
1734 self.block_order
1735 .extend((0..self.strings.len()).map(BlockKind::String));
1736 self.block_order
1737 .extend((0..self.preambles.len()).map(BlockKind::Preamble));
1738 self.block_order
1739 .extend((0..self.comments.len()).map(BlockKind::Comment));
1740 self.block_order
1741 .extend((0..self.entries.len()).map(BlockKind::Entry));
1742 self.block_order
1743 .extend((0..self.failed_blocks.len()).map(BlockKind::Failed));
1744 }
1745
1746 #[must_use]
1749 pub fn validate(
1750 &self,
1751 level: ValidationLevel,
1752 ) -> Vec<(usize, &Entry<'a>, Vec<ValidationError>)> {
1753 let mut invalid_entries = Vec::new();
1754
1755 for (index, entry) in self.entries.iter().enumerate() {
1756 if let Err(errors) = entry.validate(level) {
1757 invalid_entries.push((index, entry, errors));
1758 }
1759 }
1760
1761 invalid_entries
1762 }
1763
1764 #[must_use]
1767 pub fn find_duplicate_keys(&self) -> Vec<&str> {
1768 let mut seen = std::collections::HashSet::new();
1769 let mut duplicates = std::collections::HashSet::new();
1770
1771 for entry in &self.entries {
1772 if !seen.insert(entry.key()) {
1773 duplicates.insert(entry.key());
1774 }
1775 }
1776
1777 duplicates.into_iter().collect()
1778 }
1779
1780 #[must_use]
1782 pub fn find_duplicate_keys_ignore_case(&self) -> Vec<String> {
1783 let mut seen = std::collections::HashSet::new();
1784 let mut duplicates = std::collections::HashSet::new();
1785
1786 for entry in &self.entries {
1787 let normalized_key = entry.key().to_ascii_lowercase();
1788 if !seen.insert(normalized_key.clone()) {
1789 duplicates.insert(normalized_key);
1790 }
1791 }
1792
1793 duplicates.into_iter().collect()
1794 }
1795
1796 #[must_use]
1798 pub fn find_duplicate_dois(&self) -> Vec<(String, Vec<&Entry<'a>>)> {
1799 let mut groups: AHashMap<String, Vec<&Entry<'a>>> = AHashMap::new();
1800 for entry in &self.entries {
1801 if let Some(doi) = entry.doi() {
1802 groups.entry(doi).or_default().push(entry);
1803 }
1804 }
1805
1806 groups
1807 .into_iter()
1808 .filter(|(_, entries)| entries.len() > 1)
1809 .collect()
1810 }
1811
1812 #[must_use]
1814 pub fn validate_comprehensive(&self, level: ValidationLevel) -> ValidationReport<'_> {
1815 let invalid_entries = self.validate(level);
1816 let duplicate_keys = self.find_duplicate_keys();
1817 let empty_entries = self.find_empty_entries();
1818
1819 ValidationReport {
1820 invalid_entries,
1821 duplicate_keys,
1822 empty_entries,
1823 total_entries: self.entries.len(),
1824 validation_level: level,
1825 }
1826 }
1827
1828 fn find_empty_entries(&self) -> Vec<(usize, &Entry<'a>)> {
1830 self.entries
1831 .iter()
1832 .enumerate()
1833 .filter(|(_, entry)| entry.fields().is_empty())
1834 .collect()
1835 }
1836
1837 #[must_use]
1839 pub fn stats(&self) -> LibraryStats {
1840 let mut type_counts = AHashMap::new();
1841 for entry in &self.entries {
1842 *type_counts.entry(entry.ty.to_string()).or_insert(0) += 1;
1843 }
1844
1845 LibraryStats {
1846 total_entries: self.entries.len(),
1847 total_strings: self.strings.len(),
1848 total_preambles: self.preambles.len(),
1849 total_comments: self.comments.len(),
1850 entries_by_type: type_counts,
1851 }
1852 }
1853}
1854
1855#[derive(Debug, Clone)]
1857pub struct LibraryStats {
1858 pub total_entries: usize,
1860 pub total_strings: usize,
1862 pub total_preambles: usize,
1864 pub total_comments: usize,
1866 pub entries_by_type: AHashMap<String, usize>,
1868}
1869
1870#[derive(Debug, Clone)]
1872pub struct ValidationReport<'a> {
1873 pub invalid_entries: Vec<(usize, &'a Entry<'a>, Vec<ValidationError>)>,
1875 pub duplicate_keys: Vec<&'a str>,
1877 pub empty_entries: Vec<(usize, &'a Entry<'a>)>,
1879 pub total_entries: usize,
1881 pub validation_level: ValidationLevel,
1883}
1884
1885impl ValidationReport<'_> {
1886 #[must_use]
1888 pub fn is_valid(&self) -> bool {
1889 self.invalid_entries.is_empty()
1890 && self.duplicate_keys.is_empty()
1891 && self.empty_entries.is_empty()
1892 }
1893
1894 #[must_use]
1896 pub fn total_issues(&self) -> usize {
1897 self.invalid_entries.len() + self.duplicate_keys.len() + self.empty_entries.len()
1898 }
1899
1900 #[must_use]
1902 pub fn issue_summary(&self) -> IssueSummary {
1903 let mut errors = 0;
1904 let mut warnings = 0;
1905 let mut infos = 0;
1906
1907 for (_, _, validation_errors) in &self.invalid_entries {
1908 for error in validation_errors {
1909 match error.severity {
1910 crate::model::ValidationSeverity::Error => errors += 1,
1911 crate::model::ValidationSeverity::Warning => warnings += 1,
1912 crate::model::ValidationSeverity::Info => infos += 1,
1913 }
1914 }
1915 }
1916
1917 errors += self.duplicate_keys.len() + self.empty_entries.len();
1919
1920 IssueSummary {
1921 errors,
1922 warnings,
1923 infos,
1924 }
1925 }
1926}
1927
1928#[derive(Debug, Clone, PartialEq, Eq)]
1930pub struct IssueSummary {
1931 pub errors: usize,
1933 pub warnings: usize,
1935 pub infos: usize,
1937}
1938
1939fn concatenate_simple_values(values: &[Value]) -> String {
1941 let mut result = String::new();
1942
1943 let capacity: usize = values
1945 .iter()
1946 .map(|v| match v {
1947 Value::Literal(s) => s.len(),
1948 Value::Number(n) => n.to_string().len(),
1949 _ => 0,
1950 })
1951 .sum();
1952
1953 result.reserve(capacity);
1954
1955 for value in values {
1956 match value {
1957 Value::Literal(s) => result.push_str(s),
1958 Value::Number(n) => result.push_str(&n.to_string()),
1959 _ => {} }
1961 }
1962
1963 result
1964}
1965
1966fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
1967 if needle.is_empty() {
1968 return true;
1969 }
1970
1971 haystack.to_lowercase().contains(&needle.to_lowercase())
1972}
1973
1974fn value_to_plain_string(value: &Value<'_>) -> String {
1975 match value {
1976 Value::Literal(text) => text.to_string(),
1977 Value::Number(number) => number.to_string(),
1978 Value::Variable(name) => name.to_string(),
1979 Value::Concat(parts) => parts.iter().map(value_to_plain_string).collect(),
1980 }
1981}
1982
1983fn normalize_month_value(input: &str, style: MonthStyle) -> Option<Value<'static>> {
1984 let normalized = input.trim().trim_matches(['{', '}']).to_ascii_lowercase();
1985 let month_index = match normalized.as_str() {
1986 "jan" | "january" | "1" | "01" => 1,
1987 "feb" | "february" | "2" | "02" => 2,
1988 "mar" | "march" | "3" | "03" => 3,
1989 "apr" | "april" | "4" | "04" => 4,
1990 "may" | "5" | "05" => 5,
1991 "jun" | "june" | "6" | "06" => 6,
1992 "jul" | "july" | "7" | "07" => 7,
1993 "aug" | "august" | "8" | "08" => 8,
1994 "sep" | "september" | "9" | "09" => 9,
1995 "oct" | "october" | "10" => 10,
1996 "nov" | "november" | "11" => 11,
1997 "dec" | "december" | "12" => 12,
1998 _ => return None,
1999 };
2000
2001 let text = match style {
2002 MonthStyle::Long => month_long_name(month_index),
2003 MonthStyle::Abbrev => month_abbreviation(month_index),
2004 MonthStyle::Number => return Some(Value::Number(month_index)),
2005 };
2006
2007 Some(Value::Literal(Cow::Borrowed(text)))
2008}
2009
2010const fn month_long_name(month: i64) -> &'static str {
2011 match month {
2012 1 => "January",
2013 2 => "February",
2014 3 => "March",
2015 4 => "April",
2016 5 => "May",
2017 6 => "June",
2018 7 => "July",
2019 8 => "August",
2020 9 => "September",
2021 10 => "October",
2022 11 => "November",
2023 12 => "December",
2024 _ => "",
2025 }
2026}
2027
2028const fn month_abbreviation(month: i64) -> &'static str {
2029 match month {
2030 1 => "jan",
2031 2 => "feb",
2032 3 => "mar",
2033 4 => "apr",
2034 5 => "may",
2035 6 => "jun",
2036 7 => "jul",
2037 8 => "aug",
2038 9 => "sep",
2039 10 => "oct",
2040 11 => "nov",
2041 12 => "dec",
2042 _ => "",
2043 }
2044}
2045
2046fn canonical_field_alias(name: &str) -> Option<&'static str> {
2047 if name.eq_ignore_ascii_case("journaltitle") {
2048 Some("journal")
2049 } else if name.eq_ignore_ascii_case("date") {
2050 Some("year")
2051 } else if name.eq_ignore_ascii_case("institution") {
2052 Some("school")
2053 } else if name.eq_ignore_ascii_case("location") {
2054 Some("address")
2055 } else {
2056 None
2057 }
2058}
2059
2060#[derive(Debug, Default)]
2062pub struct LibraryBuilder<'a> {
2063 db: Library<'a>,
2064}
2065
2066impl<'a> LibraryBuilder<'a> {
2067 #[must_use]
2069 pub fn new() -> Self {
2070 Self::default()
2071 }
2072
2073 #[must_use]
2075 pub fn entry(mut self, entry: Entry<'a>) -> Self {
2076 self.db.add_entry(entry);
2077 self
2078 }
2079
2080 #[must_use]
2082 pub fn string(mut self, name: &'a str, value: Value<'a>) -> Self {
2083 self.db.add_string(name, value);
2084 self
2085 }
2086
2087 #[must_use]
2089 pub fn preamble(mut self, value: Value<'a>) -> Self {
2090 self.db.add_preamble(value);
2091 self
2092 }
2093
2094 #[must_use]
2096 pub fn comment(mut self, text: &'a str) -> Self {
2097 self.db.add_comment(text);
2098 self
2099 }
2100
2101 #[must_use]
2103 pub fn build(self) -> Library<'a> {
2104 self.db
2105 }
2106}
2107
2108#[cfg(test)]
2109mod tests {
2110 use super::*;
2111 use crate::model::{EntryType, Field};
2112
2113 #[test]
2114 fn test_library_parse() {
2115 let input = r#"
2116 @string{me = "John Doe"}
2117
2118 @article{test2023,
2119 author = me,
2120 title = "Test Article",
2121 year = 2023
2122 }
2123 "#;
2124
2125 let library = Library::parser().parse(input).unwrap();
2126 assert_eq!(library.entries().len(), 1);
2127 assert_eq!(library.strings().len(), 1);
2128
2129 let entry = &library.entries()[0];
2130 assert_eq!(entry.get_as_string("author").unwrap(), "John Doe");
2132 }
2133
2134 #[test]
2135 fn test_zero_copy_preservation() {
2136 let input = r#"
2137 @article{test,
2138 title = "This is borrowed",
2139 year = 2023
2140 }
2141 "#;
2142
2143 let library = Library::parser().parse(input).unwrap();
2144 let entry = &library.entries()[0];
2145
2146 if let Some(Value::Literal(cow)) = entry
2148 .fields
2149 .iter()
2150 .find(|f| f.name == "title")
2151 .map(|f| &f.value)
2152 {
2153 assert!(matches!(cow, Cow::Borrowed(_)));
2154 }
2155 }
2156
2157 #[test]
2158 fn test_concatenation_creates_owned() {
2159 let input = r#"
2160 @string{first = "Hello"}
2161 @string{second = "World"}
2162
2163 @article{test,
2164 title = first # ", " # second
2165 }
2166 "#;
2167
2168 let library = Library::parser().parse(input).unwrap();
2169 let entry = &library.entries()[0];
2170
2171 assert_eq!(entry.get_as_string("title").unwrap(), "Hello, World");
2173 }
2174
2175 #[test]
2176 fn test_boxed_concat_memory_optimization() {
2177 assert!(
2179 std::mem::size_of::<Value>() <= 32,
2180 "Value enum is {} bytes, should be 32 or less",
2181 std::mem::size_of::<Value>()
2182 );
2183 }
2184
2185 #[test]
2186 fn test_field_vec_capacity_bounded() {
2187 let input = r#"
2188 @article{test,
2189 a = "1", b = "2", c = "3", d = "4", e = "5",
2190 f = "6", g = "7", h = "8", i = "9", j = "10"
2191 }
2192 "#;
2193
2194 let db = Library::parser().parse(input).unwrap();
2195 let entry = &db.entries()[0];
2196
2197 assert_eq!(entry.fields.len(), 10);
2198 assert!(
2199 entry.fields.capacity() <= 17,
2200 "Unexpected field Vec growth: len={}, capacity={}",
2201 entry.fields.len(),
2202 entry.fields.capacity()
2203 );
2204 }
2205
2206 #[test]
2207 fn test_library_builder() {
2208 let library = LibraryBuilder::new()
2209 .string("me", Value::Literal(Cow::Borrowed("John Doe")))
2210 .entry(Entry {
2211 ty: EntryType::Article,
2212 key: Cow::Borrowed("test2023"),
2213 fields: vec![
2214 Field::new("author", Value::Variable(Cow::Borrowed("me"))),
2215 Field::new("title", Value::Literal(Cow::Borrowed("Test"))),
2216 ],
2217 })
2218 .build();
2219
2220 assert_eq!(library.entries().len(), 1);
2221 assert_eq!(library.strings().len(), 1);
2222 }
2223
2224 #[test]
2225 fn test_library_stats() {
2226 let input = r#"
2227 @string{ieee = "IEEE"}
2228 @preamble{"Test preamble"}
2229 % This is a percent comment that now works properly
2230 @comment{This is a formal comment that works}
2231 @article{a1, title = "Article 1"}
2232 @article{a2, title = "Article 2"}
2233 @book{b1, title = "Book 1"}
2234 "#;
2235
2236 let library = Library::parser().parse(input).unwrap();
2237 let stats = library.stats();
2238
2239 assert_eq!(stats.total_entries, 3);
2240 assert_eq!(stats.total_strings, 1);
2241 assert_eq!(stats.total_preambles, 1);
2242 assert_eq!(stats.total_comments, 2); assert_eq!(stats.entries_by_type.get("article"), Some(&2));
2244 assert_eq!(stats.entries_by_type.get("book"), Some(&1));
2245 }
2246
2247 #[test]
2248 fn test_parse_files_parallel() {
2249 use std::fs::write;
2250 use std::path::PathBuf;
2251
2252 let dir = std::env::temp_dir();
2253 let path1 = dir.join("parallel_test1.bib");
2254 let path2 = dir.join("parallel_test2.bib");
2255
2256 write(&path1, "@article{a1,title=\"A\"}").unwrap();
2257 write(&path2, "@article{a2,title=\"B\"}").unwrap();
2258
2259 let paths: Vec<PathBuf> = vec![path1.clone(), path2.clone()];
2260
2261 let db = Library::parser().threads(2).parse_files(&paths).unwrap();
2262
2263 assert_eq!(db.entries().len(), 2);
2264
2265 let _ = std::fs::remove_file(path1);
2266 let _ = std::fs::remove_file(path2);
2267 }
2268
2269 #[test]
2270 fn test_builder_pattern_api() {
2271 let input = "@article{test, title = \"Test\"}";
2272
2273 let db1 = Library::parser().parse(input).unwrap();
2275 assert_eq!(db1.entries().len(), 1);
2276
2277 let db2 = Library::parser().threads(1).parse(input).unwrap();
2279 assert_eq!(db2.entries().len(), 1);
2280
2281 #[cfg(feature = "parallel")]
2282 {
2283 use std::fs::write;
2284
2285 let db3 = Library::parser().threads(4).parse(input).unwrap();
2287 assert_eq!(db3.entries().len(), 1);
2288
2289 let path1 = "/tmp/test1.bib";
2291 let path2 = "/tmp/test2.bib";
2292 write(path1, "@article{a1, title=\"A\"}").unwrap();
2293 write(path2, "@article{a2, title=\"B\"}").unwrap();
2294
2295 let db4 = Library::parser()
2296 .threads(2)
2297 .parse_files(&[path1, path2])
2298 .unwrap();
2299 assert_eq!(db4.entries().len(), 2);
2300
2301 let _ = std::fs::remove_file(path1);
2302 let _ = std::fs::remove_file(path2);
2303 }
2304 }
2305}