biblatex/
lib.rs

1/*!
2A crate for parsing Bib(La)TeX files.
3
4The main API entrypoint is the [`Bibliography`] struct.
5
6# Example
7
8Finding out the author of a work.
9```
10# use biblatex::Bibliography;
11# fn main() -> std::io::Result<()> {
12let src = "@book{tolkien1937, author = {J. R. R. Tolkien}}";
13let bibliography = Bibliography::parse(src).unwrap();
14let entry = bibliography.get("tolkien1937").unwrap();
15let author = entry.author().unwrap();
16assert_eq!(author[0].name, "Tolkien");
17# Ok(())
18# }
19```
20*/
21
22#![deny(missing_docs)]
23
24mod chunk;
25mod macros;
26mod mechanics;
27mod raw;
28mod resolve;
29mod types;
30
31pub use chunk::{Chunk, Chunks, ChunksExt, ChunksRef};
32pub use mechanics::EntryType;
33pub use raw::{
34    Field, Pair, ParseError, ParseErrorKind, RawBibliography, RawChunk, RawEntry, Token,
35};
36pub use types::*;
37
38use std::collections::BTreeMap;
39use std::fmt;
40use std::fmt::{Debug, Display, Formatter, Write};
41
42use macros::*;
43use mechanics::{is_verbatim_field, AuthorMode, PagesChapterMode};
44
45use paste::paste;
46
47#[cfg(feature = "serde")]
48use serde::{Deserialize, Serialize};
49
50/// A fully parsed bibliography.
51#[derive(Debug, Clone, Default, Eq, PartialEq)]
52#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
53pub struct Bibliography {
54    /// The bibliography entries.
55    entries: Vec<Entry>,
56    /// Maps from citation keys to indices in `items`.
57    keys: BTreeMap<String, usize>,
58}
59
60/// A bibliography entry containing chunk fields, which can be parsed into more
61/// specific types on demand.
62#[derive(Debug, Clone, Eq, PartialEq)]
63#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
64pub struct Entry {
65    /// The citation key.
66    pub key: String,
67    /// Denotes the type of bibliography item (e.g., `Article`).
68    pub entry_type: EntryType,
69    /// Maps from field names to their associated chunk vectors.
70    pub fields: BTreeMap<String, Chunks>,
71}
72
73/// Errors that can occur when retrieving a field of an [`Entry`].
74#[derive(Debug, Clone, PartialEq)]
75pub enum RetrievalError {
76    /// The entry has no field with this name.
77    Missing(String),
78    /// The field contains malformed data.
79    TypeError(TypeError),
80}
81
82impl Display for RetrievalError {
83    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
84        match self {
85            Self::Missing(s) => write!(f, "field {} is missing", s),
86            Self::TypeError(err) => write!(f, "{}", err),
87        }
88    }
89}
90
91impl std::error::Error for RetrievalError {
92    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
93        match self {
94            Self::Missing(_) => None,
95            Self::TypeError(err) => Some(err),
96        }
97    }
98}
99
100impl From<TypeError> for RetrievalError {
101    fn from(err: TypeError) -> Self {
102        Self::TypeError(err)
103    }
104}
105
106fn convert_result<T>(err: Result<T, RetrievalError>) -> Result<Option<T>, TypeError> {
107    match err {
108        Ok(val) => Ok(Some(val)),
109        Err(RetrievalError::Missing(_)) => Ok(None),
110        Err(RetrievalError::TypeError(err)) => Err(err),
111    }
112}
113
114impl Bibliography {
115    /// Create a new, empty bibliography.
116    pub fn new() -> Self {
117        Self::default()
118    }
119
120    /// Parse a bibliography from a source string.
121    pub fn parse(src: &str) -> Result<Self, ParseError> {
122        Self::from_raw(RawBibliography::parse(src)?)
123    }
124
125    /// Construct a bibliography from a raw bibliography, with the `xdata` and
126    /// `crossref` links resolved.
127    pub fn from_raw(raw: RawBibliography) -> Result<Self, ParseError> {
128        let mut res = Self::new();
129        let abbr = &raw.abbreviations;
130
131        for entry in raw.entries {
132            // Check that the key is not repeated
133            if res.get(entry.v.key.v).is_some() {
134                return Err(ParseError::new(
135                    entry.span,
136                    ParseErrorKind::DuplicateKey(entry.v.key.v.to_string()),
137                ));
138            }
139
140            let mut fields: BTreeMap<String, Vec<Spanned<Chunk>>> = BTreeMap::new();
141            for spanned_field in entry.v.fields.into_iter() {
142                let field_key = spanned_field.key.v.to_string().to_ascii_lowercase();
143                let parsed =
144                    resolve::parse_field(&field_key, &spanned_field.value.v, abbr)?;
145                fields.insert(field_key, parsed);
146            }
147            res.insert(Entry {
148                key: entry.v.key.v.to_string(),
149                entry_type: EntryType::new(entry.v.kind.v),
150                fields,
151            });
152        }
153
154        let mut entries = res.entries.clone();
155        for entry in &mut entries {
156            entry.resolve_crossrefs(&res).map_err(|e| {
157                ParseError::new(e.span, ParseErrorKind::ResolutionError(e.kind))
158            })?;
159        }
160        res.entries = entries;
161
162        Ok(res)
163    }
164
165    /// The number of bibliography entries.
166    pub fn len(&self) -> usize {
167        self.entries.len()
168    }
169
170    /// Whether the bibliography is empty.
171    pub fn is_empty(&self) -> bool {
172        self.entries.is_empty()
173    }
174
175    /// Returns the entry with the given cite key.
176    pub fn get(&self, key: &str) -> Option<&Entry> {
177        let index = *self.keys.get(key)?;
178        self.entries.get(index)
179    }
180
181    /// Returns a mutable reference to the entry with the given cite key.
182    pub fn get_mut(&mut self, key: &str) -> Option<&mut Entry> {
183        let index = *self.keys.get(key)?;
184        self.entries.get_mut(index)
185    }
186
187    /// Insert an entry into the bibliography.
188    ///
189    /// If an entry with the same cite key is already present, the entry is
190    /// updated and the old entry is returned.
191    pub fn insert(&mut self, entry: Entry) -> Option<Entry> {
192        if let Some(prev) = self.get_mut(&entry.key) {
193            Some(std::mem::replace(prev, entry))
194        } else {
195            let index = self.entries.len();
196            self.keys.insert(entry.key.clone(), index);
197            if let Some(ids) = convert_result(entry.get_as::<Vec<String>>("ids")).unwrap()
198            {
199                for alias in ids {
200                    self.keys.insert(alias, index);
201                }
202            }
203            self.entries.push(entry);
204            None
205        }
206    }
207
208    /// Remove the entry with the given cite key.
209    pub fn remove(&mut self, key: &str) -> Option<Entry> {
210        let index = *self.keys.get(key)?;
211        let entry = self.entries.remove(index);
212
213        // Remove equal indices and update later indices.
214        self.keys.retain(|_, v| {
215            if *v > index {
216                *v -= 1;
217                true
218            } else {
219                *v != index
220            }
221        });
222
223        Some(entry)
224    }
225
226    /// Add an alias for a cite key.
227    ///
228    /// Does nothing if no entry with the given cite key exists.
229    pub fn alias(&mut self, key: &str, alias: impl Into<String>) {
230        if let Some(&index) = self.keys.get(key) {
231            self.keys.insert(alias.into(), index);
232        }
233    }
234
235    /// An iterator over the bibliography's entries.
236    pub fn iter(&'_ self) -> std::slice::Iter<'_, Entry> {
237        self.entries.iter()
238    }
239
240    /// A mutable iterator over the bibliography's entries.
241    pub fn iter_mut(&'_ mut self) -> std::slice::IterMut<'_, Entry> {
242        self.entries.iter_mut()
243    }
244
245    /// An iterator over the bibliography's entry keys.
246    pub fn keys(&self) -> impl Iterator<Item = &str> {
247        self.entries.iter().map(|entry| &*entry.key)
248    }
249
250    /// Consume this struct and return a vector of the bibliography's entries.
251    pub fn into_vec(self) -> Vec<Entry> {
252        self.entries
253    }
254
255    /// Write the entry into a writer in the BibLaTeX format.
256    pub fn write_biblatex(&self, mut sink: impl Write) -> fmt::Result {
257        let mut first = true;
258        for entry in &self.entries {
259            if !first {
260                writeln!(sink)?;
261            }
262            writeln!(sink, "{}", entry.to_biblatex_string())?;
263            first = false;
264        }
265        Ok(())
266    }
267
268    /// Serialize this bibliography into a BibLaTeX string.
269    pub fn to_biblatex_string(&self) -> String {
270        let mut biblatex = String::new();
271        self.write_biblatex(&mut biblatex).unwrap();
272        biblatex
273    }
274
275    /// Write the entry into a writer in the BibTeX format.
276    pub fn write_bibtex(&self, mut sink: impl Write) -> fmt::Result {
277        let mut first = true;
278        for entry in &self.entries {
279            if !first {
280                writeln!(sink)?;
281            }
282            writeln!(sink, "{}", entry.to_bibtex_string().map_err(|_| fmt::Error)?)?;
283            first = false;
284        }
285        Ok(())
286    }
287
288    /// Serialize this bibliography into a BibTeX string.
289    pub fn to_bibtex_string(&self) -> String {
290        let mut bibtex = String::new();
291        self.write_bibtex(&mut bibtex).unwrap();
292        bibtex
293    }
294}
295
296impl IntoIterator for Bibliography {
297    type Item = Entry;
298    type IntoIter = std::vec::IntoIter<Entry>;
299
300    fn into_iter(self) -> Self::IntoIter {
301        self.entries.into_iter()
302    }
303}
304
305impl Entry {
306    /// Construct new, empty entry.
307    pub fn new(key: String, entry_type: EntryType) -> Self {
308        Self { key, entry_type, fields: BTreeMap::new() }
309    }
310
311    /// Get the chunk slice of a field.
312    ///
313    /// The field key must be lowercase.
314    pub fn get(&'_ self, key: &str) -> Option<ChunksRef<'_>> {
315        self.fields.get(key).map(AsRef::as_ref)
316    }
317
318    /// Parse the value of a field into a specific type.
319    ///
320    /// The field key must be lowercase.
321    pub fn get_as<T: Type>(&self, key: &str) -> Result<T, RetrievalError> {
322        self.get(key)
323            .ok_or_else(|| RetrievalError::Missing(key.to_string()))?
324            .parse::<T>()
325            .map_err(Into::into)
326    }
327
328    /// Set the chunk slice for a field.
329    ///
330    /// The field key is lowercase before insertion.
331    pub fn set(&mut self, key: &str, chunks: Chunks) {
332        self.fields.insert(key.to_lowercase(), chunks);
333    }
334
335    /// Set the value of a field as a specific type.
336    ///
337    /// The field key is lowercase before insertion.
338    pub fn set_as<T: Type>(&mut self, key: &str, value: &T) {
339        self.set(key, value.to_chunks());
340    }
341
342    /// Remove a field from the entry.
343    pub fn remove(&mut self, key: &str) -> Option<Chunks> {
344        self.fields.remove(key)
345    }
346
347    /// The parents of an entry in a semantic sense (`crossref` and `xref`).
348    pub fn parents(&self) -> Result<Vec<String>, TypeError> {
349        let mut parents = vec![];
350
351        if let Some(crossref) = convert_result(self.get_as::<String>("crossref"))? {
352            parents.push(crossref);
353        }
354
355        if let Some(xrefs) = convert_result(self.get_as::<Vec<String>>("xref"))? {
356            parents.extend(xrefs);
357        }
358
359        Ok(parents)
360    }
361
362    /// Verify if the entry has the appropriate fields for its [`EntryType`].
363    pub fn verify(&self) -> Report {
364        let reqs = self.entry_type.requirements();
365        let mut missing = vec![];
366        let mut superfluous = vec![];
367
368        for field in reqs.required {
369            match field {
370                "journaltitle" => {
371                    if self
372                        .get_non_empty(field)
373                        .or_else(|| self.get_non_empty("journal"))
374                        .is_none()
375                    {
376                        missing.push(field);
377                    }
378                }
379                "location" => {
380                    if self
381                        .get_non_empty(field)
382                        .or_else(|| self.get_non_empty("address"))
383                        .is_none()
384                    {
385                        missing.push(field);
386                    }
387                }
388                "school"
389                    if self.entry_type == EntryType::Thesis
390                        || self.entry_type == EntryType::MastersThesis
391                        || self.entry_type == EntryType::PhdThesis =>
392                {
393                    if self
394                        .get_non_empty(field)
395                        .or_else(|| self.get_non_empty("institution"))
396                        .is_none()
397                    {
398                        missing.push(field);
399                    }
400                }
401                _ => {
402                    if self.get_non_empty(field).is_none() {
403                        missing.push(field);
404                    }
405                }
406            }
407        }
408
409        for field in reqs.forbidden {
410            if self.get_non_empty(field).is_some() {
411                superfluous.push(field);
412            }
413        }
414
415        match reqs.author_eds_field {
416            AuthorMode::OneRequired => {
417                if self.author().is_err() && self.editors().unwrap_or_default().is_empty()
418                {
419                    missing.push("author");
420                }
421            }
422            AuthorMode::BothRequired => {
423                if self.editors().unwrap_or_default().is_empty() {
424                    missing.push("editor");
425                }
426                if self.author().is_err() {
427                    missing.push("author");
428                }
429            }
430            AuthorMode::AuthorRequired | AuthorMode::AuthorRequiredEditorOptional => {
431                if self.author().is_err() {
432                    missing.push("author");
433                }
434            }
435            AuthorMode::EditorRequiredAuthorForbidden => {
436                if self.editors().unwrap_or_default().is_empty() {
437                    missing.push("editor");
438                }
439                if self.author().is_ok() {
440                    superfluous.push("author");
441                }
442            }
443            _ => {}
444        }
445
446        match reqs.page_chapter_field {
447            PagesChapterMode::OneRequired => {
448                if self.pages().is_err() && self.chapter().is_err() {
449                    missing.push("pages");
450                }
451            }
452            PagesChapterMode::BothForbidden => {
453                if self.pages().is_ok() {
454                    superfluous.push("pages");
455                }
456                if self.chapter().is_ok() {
457                    superfluous.push("chapter");
458                }
459            }
460            PagesChapterMode::PagesRequired => {
461                if self.pages().is_err() {
462                    missing.push("pages");
463                }
464            }
465            _ => {}
466        }
467
468        let mut malformed = vec![];
469
470        for (key, chunks) in &self.fields {
471            let error = match key.as_str() {
472                "edition" => chunks.parse::<PermissiveType<i64>>().err(),
473                "organization" => chunks.parse::<Vec<Chunks>>().err(),
474                "pages" => chunks.parse::<Vec<std::ops::Range<u32>>>().err(),
475                "publisher" => chunks.parse::<Vec<Chunks>>().err(),
476                "volume" => chunks.parse::<i64>().err(),
477                "bookpagination" => chunks.parse::<Pagination>().err(),
478                "pagination" => chunks.parse::<Pagination>().err(),
479                "volumes" => chunks.parse::<i64>().err(),
480                "gender" => chunks.parse::<Gender>().err(),
481                "editortype" => chunks.parse::<EditorType>().err(),
482                "editoratype" => chunks.parse::<EditorType>().err(),
483                "editorbtype" => chunks.parse::<EditorType>().err(),
484                "editorctype" => chunks.parse::<EditorType>().err(),
485                "xref" => chunks.parse::<Vec<String>>().err(),
486                "xdata" => chunks.parse::<Vec<String>>().err(),
487                "ids" => chunks.parse::<Vec<String>>().err(),
488                _ => continue,
489            };
490
491            if let Some(err) = error {
492                malformed.push((key.clone(), err))
493            }
494        }
495
496        for (key, err) in [
497            ("date", self.date().err()),
498            ("urldate", self.url_date().err()),
499            ("origdate", self.orig_date().err()),
500            ("eventdate", self.event_date().err()),
501        ] {
502            if let Some(RetrievalError::TypeError(t)) = err {
503                malformed.push((key.to_string(), t));
504            }
505        }
506
507        if reqs.needs_date {
508            if let Err(RetrievalError::Missing(_)) = self.date() {
509                missing.push("year");
510            }
511        }
512
513        Report { missing, superfluous, malformed }
514    }
515
516    /// Serialize this entry into a BibLaTeX string.
517    pub fn to_biblatex_string(&self) -> String {
518        let mut biblatex = String::new();
519        let ty = self.entry_type.to_biblatex();
520
521        writeln!(biblatex, "@{}{{{},", ty, self.key).unwrap();
522
523        for (key, value) in &self.fields {
524            let key = match key.as_ref() {
525                "journal" => "journaltitle",
526                "address" => "location",
527                "school" => "institution",
528                k => k,
529            };
530
531            writeln!(
532                biblatex,
533                "{} = {},",
534                key,
535                value.to_biblatex_string(is_verbatim_field(key))
536            )
537            .unwrap();
538        }
539
540        biblatex.push('}');
541        biblatex
542    }
543
544    /// Serialize this entry into a BibTeX string.
545    ///
546    /// This function can return an error if there is a malformed date field.
547    pub fn to_bibtex_string(&self) -> Result<String, TypeError> {
548        let mut bibtex = String::new();
549        let ty = self.entry_type.to_bibtex();
550        let thesis = matches!(ty, EntryType::PhdThesis | EntryType::MastersThesis);
551
552        writeln!(bibtex, "@{}{{{},", ty, self.key).unwrap();
553
554        for (key, value) in &self.fields {
555            if key == "date" {
556                if let Some(date) = convert_result(self.date())? {
557                    if let PermissiveType::Typed(date) = date {
558                        for (key, value) in date.to_fieldset() {
559                            let v = [Spanned::zero(Chunk::Normal(value))]
560                                .to_biblatex_string(false);
561                            writeln!(bibtex, "{} = {},", key, v).unwrap();
562                        }
563                        continue;
564                    }
565                } else {
566                    continue;
567                }
568            }
569
570            let key = match key.as_ref() {
571                "journaltitle" => "journal",
572                "location" => "address",
573                "institution" if thesis => "school",
574                k => k,
575            };
576
577            writeln!(
578                bibtex,
579                "{} = {},",
580                key,
581                value.to_biblatex_string(is_verbatim_field(key))
582            )
583            .unwrap();
584        }
585
586        bibtex.push('}');
587        Ok(bibtex)
588    }
589
590    /// Get an entry but return None for empty chunk slices.
591    fn get_non_empty(&'_ self, key: &str) -> Option<ChunksRef<'_>> {
592        let entry = self.get(key)?;
593        if !entry.is_empty() {
594            Some(entry)
595        } else {
596            None
597        }
598    }
599
600    /// Resolves all data dependencies defined by `crossref` and `xdata` fields.
601    fn resolve_crossrefs(&mut self, bib: &Bibliography) -> Result<(), TypeError> {
602        let mut refs = vec![];
603
604        if let Some(crossref) = convert_result(self.get_as::<String>("crossref"))? {
605            refs.extend(bib.get(&crossref).cloned());
606        }
607
608        if let Some(keys) = convert_result(self.get_as::<Vec<String>>("xdata"))? {
609            for key in keys {
610                refs.extend(bib.get(&key).cloned());
611            }
612        }
613
614        for mut crossref in refs {
615            crossref.resolve_crossrefs(bib)?;
616            self.resolve_single_crossref(crossref)?;
617        }
618
619        self.remove("xdata");
620
621        Ok(())
622    }
623
624    /// Resolve data dependencies using another entry.
625    fn resolve_single_crossref(&mut self, crossref: Entry) -> Result<(), TypeError> {
626        let req = self.entry_type.requirements();
627
628        let mut relevant = req.required;
629        relevant.extend(req.optional);
630        relevant.extend(req.page_chapter_field.possible());
631        relevant.extend(req.author_eds_field.possible());
632
633        if self.entry_type == EntryType::XData {
634            for f in crossref.fields.keys() {
635                relevant.push(f);
636            }
637        }
638
639        for f in relevant {
640            if self.get(f).is_some() {
641                continue;
642            }
643
644            match f {
645                "journaltitle" | "journalsubtitle"
646                    if crossref.entry_type == EntryType::Periodical =>
647                {
648                    let key = if f.contains('s') { "subtitle" } else { "title" };
649
650                    if let Some(item) = crossref.get(key) {
651                        self.set(f, item.to_vec())
652                    }
653                }
654                "booktitle" | "booksubtitle" | "booktitleaddon"
655                    if crossref.entry_type.is_collection() =>
656                {
657                    let key = if f.contains('s') {
658                        "subtitle"
659                    } else if f.contains('a') {
660                        "titleaddon"
661                    } else {
662                        "title"
663                    };
664
665                    if let Some(item) = crossref.get(key) {
666                        self.set(f, item.to_vec())
667                    }
668                }
669                "maintitle" | "mainsubtitle" | "maintitleaddon"
670                    if crossref.entry_type.is_multi_volume() =>
671                {
672                    let key = if f.contains('s') {
673                        "subtitle"
674                    } else if f.contains('a') {
675                        "titleaddon"
676                    } else {
677                        "title"
678                    };
679
680                    if let Some(item) = crossref.get(key) {
681                        self.set(f, item.to_vec())
682                    }
683                }
684                "address" => {
685                    if let Some(item) =
686                        crossref.get(f).or_else(|| crossref.get("location"))
687                    {
688                        self.set(f, item.to_vec())
689                    }
690                }
691                "institution" => {
692                    if let Some(item) = crossref.get(f).or_else(|| crossref.get("school"))
693                    {
694                        self.set(f, item.to_vec())
695                    }
696                }
697                "school" => {
698                    if let Some(item) =
699                        crossref.get(f).or_else(|| crossref.get("institution"))
700                    {
701                        self.set(f, item.to_vec())
702                    }
703                }
704                "journaltitle" => {
705                    if let Some(item) =
706                        crossref.get(f).or_else(|| crossref.get("journal"))
707                    {
708                        self.set(f, item.to_vec())
709                    }
710                }
711                "title" | "addendum" | "note" => {}
712                _ => {
713                    if let Some(item) = crossref.get(f) {
714                        self.set(f, item.to_vec())
715                    }
716                }
717            }
718        }
719
720        if self.entry_type == EntryType::XData {
721            return Ok(());
722        }
723
724        if req.needs_date {
725            if let Some(date) = convert_result(crossref.date())? {
726                self.set_date(date);
727            }
728        }
729
730        Ok(())
731    }
732}
733
734/// A report of the validity of an `Entry`. Can be obtained by calling [`Entry::verify`].
735pub struct Report {
736    /// These fields were missing, although they are required for the entry type.
737    pub missing: Vec<&'static str>,
738    /// These fields were present but are not allowed for the entry type.
739    pub superfluous: Vec<&'static str>,
740    /// These fields were present but contained malformed data.
741    pub malformed: Vec<(String, TypeError)>,
742}
743
744impl Report {
745    /// Whether the report is empty and contains no errors.
746    pub fn is_ok(&self) -> bool {
747        self.missing.is_empty()
748            && self.superfluous.is_empty()
749            && self.malformed.is_empty()
750    }
751}
752
753impl Entry {
754    // BibTeX fields.
755    fields! {
756        // Fields without a specified return type simply return `ChunksRef`.
757        author: "author" => Vec<Person>,
758        book_title: "booktitle",
759        chapter: "chapter",
760        edition: "edition" => PermissiveType<i64>,
761        how_published: "howpublished",
762        note: "note",
763        number: "number",
764        organization: "organization" => Vec<Chunks>,
765        pages: "pages" => PermissiveType<Vec<std::ops::Range<u32>>>,
766        publisher: "publisher" => Vec<Chunks>,
767        series: "series",
768        title: "title",
769        type_: "type" => String,
770        volume: "volume" => PermissiveType<i64>,
771    }
772
773    alias_fields! {
774        address: "address" | "location",
775        location: "location" | "address",
776        annotation: "annotation" | "annote",
777        eprint_type: "eprinttype" | "archiveprefix",
778        eprint_class: "eprintclass" | "primaryclass",
779        journal: "journal" | "journaltitle",
780        journal_title: "journaltitle" | "journal",
781        sort_key: "key" | "sortkey" => String,
782        file: "file" | "pdf" => String,
783        school: "school" | "institution",
784        institution: "institution" | "school",
785    }
786
787    date_fields! {
788        date: "",
789        event_date: "event",
790        orig_date: "orig",
791        url_date: "url",
792    }
793
794    /// Get the `editor` and `editora` through `editorc` fields and their
795    /// respective `editortype` annotation fields, returning a vector with zero
796    /// to four entries, one for each editorial role.
797    ///
798    /// The default `EditorType::Editor` is assumed if the type field is empty.
799    pub fn editors(&self) -> Result<Vec<(Vec<Person>, EditorType)>, TypeError> {
800        let mut editors = vec![];
801
802        let mut parse = |name_field: &str, editor_field: &str| -> Result<(), TypeError> {
803            if let Some(persons) = convert_result(self.get_as::<Vec<Person>>(name_field))?
804            {
805                let editor_type = self
806                    .get(editor_field)
807                    .map(|chunks| chunks.parse::<EditorType>())
808                    .transpose()?
809                    .unwrap_or(EditorType::Editor);
810                editors.push((persons, editor_type));
811            }
812
813            Ok(())
814        };
815
816        parse("editor", "editortype")?;
817        parse("editora", "editoratype")?;
818        parse("editorb", "editorbtype")?;
819        parse("editorc", "editorctype")?;
820
821        Ok(editors)
822    }
823
824    // BibLaTeX supplemental fields.
825    fields! {
826        abstract_: "abstract",
827        addendum: "addendum",
828        afterword: "afterword" => Vec<Person>,
829        annotator: "annotator" => Vec<Person>,
830        author_type: "authortype" => String,
831        book_author: "bookauthor" => Vec<Person>,
832        book_pagination: "bookpagination" => Pagination,
833        book_subtitle: "booksubtitle",
834        book_title_addon: "booktitleaddon",
835        commentator: "commentator" => Vec<Person>,
836        doi: "doi" => String,
837        eid: "eid",
838        entry_subtype: "entrysubtype",
839        eprint: "eprint" => String,
840        eventtitle: "eventtitle",
841        eventtitle_addon: "eventtitleaddon",
842        foreword: "foreword" => Vec<Person>,
843        holder: "holder" => Vec<Person>,
844        index_title: "indextitle",
845        introduction: "introduction" => Vec<Person>,
846        isan: "isan",
847        isbn: "isbn",
848        ismn: "ismn",
849        isrn: "isrn",
850        issn: "issn",
851        issue: "issue",
852        issue_subtitle: "issuesubtitle",
853        issue_title: "issuetitle",
854        issue_title_addon: "issuetitleaddon",
855        iswc: "iswc",
856        journal_subtitle: "journalsubtitle",
857        journal_title_addon: "journaltitleaddon",
858        keywords: "keywords",
859        label: "label",
860        langid: "langid" => PermissiveType<Language>,
861        language: "language" => Vec<PermissiveType<Language>>,
862        library: "library",
863        main_subtitle: "mainsubtitle",
864        main_title: "maintitle",
865        main_title_addon: "maintitleaddon",
866        name_addon: "nameaddon",
867        options: "options",
868        orig_language: "origlanguage" => Vec<String>,
869        orig_location: "origlocation" => Vec<Chunks>,
870        page_total: "pagetotal",
871        pagination: "pagination" => Pagination,
872        part: "part",
873        pubstate: "pubstate",
874        reprint_title: "reprinttitle",
875        short_author: "shortauthor" => Vec<Person>,
876        short_editor: "shorteditor" => Vec<Person>,
877        shorthand: "shorthand",
878        shorthand_intro: "shorthandintro",
879        short_journal: "shortjournal",
880        short_series: "shortseries",
881        short_title: "shorttitle",
882        subtitle: "subtitle",
883        title_addon: "titleaddon",
884        translator: "translator" => Vec<Person>,
885        url: "url" => String,
886        venue: "venue",
887        version: "version",
888        volumes: "volumes" => i64,
889        gender: "gender" => Gender,
890    }
891}
892
893type Span = std::ops::Range<usize>;
894
895/// A value with the span it corresponds to in the source code.
896///
897/// Spans can be _detached,_ this means that they deliberately do not point
898/// into the source code. Such spans are created when manually setting fields
899/// with an empty bibliography or after parsing a file. Detached spans do not
900/// indicate valid index ranges in the source files and must not be used as
901/// such. A spanned item can be checked for detachment by calling
902/// [`Self::is_detached`].
903#[derive(Clone, Eq, PartialEq)]
904#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
905pub struct Spanned<T> {
906    /// The spanned value.
907    pub v: T,
908    /// The location in source code of the value.
909    pub span: Span,
910}
911
912impl<T> Spanned<T> {
913    /// Create a new instance from a value and its span.
914    pub fn new(v: T, span: Span) -> Self {
915        Self { v, span }
916    }
917
918    /// Create a new instance with a value and a zero-length span.
919    pub fn zero(v: T) -> Self {
920        Self { v, span: 0..0 }
921    }
922
923    /// Create a new instance with a detached span.
924    pub fn detached(v: T) -> Self {
925        Self { v, span: usize::MAX..usize::MAX }
926    }
927
928    /// Whether the span is detached.
929    pub fn is_detached(&self) -> bool {
930        self.span.start == usize::MAX
931    }
932
933    /// Convert from `&Spanned<T>` to `Spanned<&T>`
934    pub fn as_ref(&self) -> Spanned<&T> {
935        Spanned { v: &self.v, span: self.span.clone() }
936    }
937
938    /// Map the value using a function keeping the span.
939    pub fn map<F, U>(self, f: F) -> Spanned<U>
940    where
941        F: FnOnce(T) -> U,
942    {
943        Spanned { v: f(self.v), span: self.span }
944    }
945}
946
947impl<T: Debug> Debug for Spanned<T> {
948    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
949        self.v.fmt(f)?;
950        if f.alternate() {
951            f.write_str(" <")?;
952            self.span.fmt(f)?;
953            f.write_str(">")?;
954        }
955        Ok(())
956    }
957}
958
959#[cfg(test)]
960mod tests {
961    use std::fs;
962
963    use super::*;
964    use crate::raw::Token;
965
966    #[test]
967    fn test_correct_bib() {
968        let contents = fs::read_to_string("tests/gral.bib").unwrap();
969        let bibliography = Bibliography::parse(&contents).unwrap();
970        assert_eq!(bibliography.entries.len(), 83)
971    }
972
973    #[test]
974    fn test_repeated_key() {
975        let contents = fs::read_to_string("tests/gral_rep_key.bib").unwrap();
976        let bibliography = Bibliography::parse(&contents);
977        match bibliography {
978            Ok(_) => panic!("Should return Err"),
979            Err(s) => {
980                assert_eq!(s.kind, ParseErrorKind::DuplicateKey("ishihara2012".into()));
981            }
982        };
983    }
984
985    #[test]
986    fn test_parse_incorrect_result() {
987        let contents = fs::read_to_string("tests/incorrect_syntax.bib").unwrap();
988
989        let bibliography = Bibliography::parse(&contents);
990        match bibliography {
991            Ok(_) => {
992                panic!("Should return Err")
993            }
994            Err(s) => {
995                assert_eq!(
996                    s,
997                    ParseError::new(369..369, ParseErrorKind::Expected(Token::Equals))
998                );
999            }
1000        };
1001    }
1002
1003    #[test]
1004    fn test_parse_incorrect_types() {
1005        let contents = fs::read_to_string("tests/incorrect_data.bib").unwrap();
1006
1007        let bibliography = Bibliography::parse(&contents).unwrap();
1008        let rashid = bibliography.get("rashid2016").unwrap();
1009        match rashid.pagination() {
1010            Err(RetrievalError::TypeError(s)) => {
1011                assert_eq!(s, TypeError::new(352..359, TypeErrorKind::UnknownPagination));
1012            }
1013            _ => {
1014                panic!()
1015            }
1016        };
1017    }
1018
1019    #[test]
1020    fn test_keys() {
1021        let contents = fs::read_to_string("tests/editortypes.bib").unwrap();
1022
1023        let bibliography = Bibliography::parse(&contents).unwrap();
1024
1025        assert_eq!(
1026            bibliography.keys().collect::<Vec<_>>(),
1027            &["acerolaThisDifferenceGaussians2022", "mozart_KV183_1773", "Smith2018"]
1028        );
1029    }
1030
1031    #[test]
1032    fn test_gral_paper() {
1033        dump_debug("tests/gral.bib");
1034    }
1035
1036    #[test]
1037    fn test_ds_report() {
1038        dump_debug("tests/ds.bib");
1039    }
1040
1041    #[test]
1042    fn test_libra_paper() {
1043        dump_author_title("tests/libra.bib");
1044    }
1045
1046    #[test]
1047    fn test_rass_report() {
1048        dump_author_title("tests/rass.bib");
1049    }
1050
1051    #[test]
1052    fn test_polar_report() {
1053        dump_author_title("tests/polaritons.bib");
1054    }
1055
1056    #[test]
1057    fn test_comments() {
1058        let contents = fs::read_to_string("tests/comments.bib").unwrap();
1059
1060        let bibliography = Bibliography::parse(&contents).unwrap();
1061
1062        assert_eq!(
1063            bibliography.keys().collect::<Vec<_>>(),
1064            &[
1065                "mcelreath2007mathematical",
1066                "fischer2022equivalence",
1067                "roes2003belief",
1068                "wong2016null",
1069            ]
1070        );
1071
1072        assert_eq!(
1073            bibliography
1074                .get("wong2016null")
1075                .unwrap()
1076                .title()
1077                .unwrap()
1078                .format_verbatim(),
1079            "Null hypothesis testing (I)-5% significance level"
1080        );
1081    }
1082
1083    #[test]
1084    fn test_extended_name_format() {
1085        dump_author_title("tests/extended_name_format.bib");
1086    }
1087
1088    #[test]
1089    fn test_alias() {
1090        let contents = fs::read_to_string("tests/cross.bib").unwrap();
1091        let mut bibliography = Bibliography::parse(&contents).unwrap();
1092
1093        assert_eq!(bibliography.get("issue201"), bibliography.get("github"));
1094        bibliography.alias("issue201", "crap");
1095        assert_eq!(bibliography.get("crap"), bibliography.get("unstable"));
1096        bibliography.remove("crap").unwrap();
1097
1098        let entry = bibliography.get("cannonfodder").unwrap();
1099        assert_eq!(entry.key, "cannonfodder");
1100        assert_eq!(entry.entry_type, EntryType::Misc);
1101    }
1102
1103    #[test]
1104    fn test_bibtex_conversion() {
1105        let contents = fs::read_to_string("tests/cross.bib").unwrap();
1106        let mut bibliography = Bibliography::parse(&contents).unwrap();
1107
1108        let biblatex = bibliography.get_mut("haug2019").unwrap().to_biblatex_string();
1109        assert!(biblatex.contains("institution = {Technische Universität Berlin},"));
1110
1111        let bibtex =
1112            bibliography.get_mut("haug2019").unwrap().to_bibtex_string().unwrap();
1113        assert!(bibtex.contains("school = {Technische Universität Berlin},"));
1114        assert!(bibtex.contains("year = {2019},"));
1115        assert!(bibtex.contains("month = {10},"));
1116        assert!(!bibtex.contains("institution"));
1117        assert!(!bibtex.contains("date"));
1118    }
1119
1120    #[test]
1121    fn test_verify() {
1122        let mut contents = fs::read_to_string("tests/gral.bib").unwrap();
1123        let mut bibliography = Bibliography::parse(&contents).unwrap();
1124        assert!(bibliography.get_mut("lin_sida:_2007").unwrap().verify().is_ok());
1125
1126        contents = fs::read_to_string("tests/cross.bib").unwrap();
1127        bibliography = Bibliography::parse(&contents).unwrap();
1128
1129        assert!(bibliography.get_mut("haug2019").unwrap().verify().is_ok());
1130        assert!(bibliography.get_mut("cannonfodder").unwrap().verify().is_ok());
1131
1132        let ill = bibliography.get("ill-defined").unwrap();
1133        let report = ill.verify();
1134        assert_eq!(report.missing.len(), 3);
1135        assert_eq!(report.superfluous.len(), 3);
1136        assert_eq!(report.malformed.len(), 1);
1137        assert!(report.missing.contains(&"title"));
1138        assert!(report.missing.contains(&"year"));
1139        assert!(report.missing.contains(&"editor"));
1140        assert!(report.superfluous.contains(&"maintitle"));
1141        assert!(report.superfluous.contains(&"author"));
1142        assert!(report.superfluous.contains(&"chapter"));
1143        assert_eq!(report.malformed[0].0.as_str(), "gender");
1144    }
1145
1146    #[test]
1147    fn test_crossref() {
1148        let contents = fs::read_to_string("tests/cross.bib").unwrap();
1149        let bibliography = Bibliography::parse(&contents).unwrap();
1150
1151        let e = bibliography.get("macmillan").unwrap();
1152        assert_eq!(e.publisher().unwrap()[0].format_verbatim(), "Macmillan");
1153        assert_eq!(e.location().unwrap().format_verbatim(), "New York and London");
1154
1155        let book = bibliography.get("recursive").unwrap();
1156        assert_eq!(book.publisher().unwrap()[0].format_verbatim(), "Macmillan");
1157        assert_eq!(book.location().unwrap().format_verbatim(), "New York and London");
1158        assert_eq!(
1159            book.title().unwrap().format_verbatim(),
1160            "Recursive shennenigans and other important stuff"
1161        );
1162
1163        assert_eq!(
1164            bibliography.get("arrgh").unwrap().parents().unwrap(),
1165            vec!["polecon".to_string()]
1166        );
1167        let arrgh = bibliography.get("arrgh").unwrap();
1168        assert_eq!(arrgh.entry_type, EntryType::Article);
1169        assert_eq!(arrgh.volume().unwrap(), PermissiveType::Typed(115));
1170        assert_eq!(arrgh.editors().unwrap()[0].0[0].name, "Uhlig");
1171        assert_eq!(arrgh.number().unwrap().format_verbatim(), "6");
1172        assert_eq!(
1173            arrgh.journal().unwrap().format_verbatim(),
1174            "Journal of Political Economy"
1175        );
1176        assert_eq!(
1177            arrgh.title().unwrap().format_verbatim(),
1178            "An‐arrgh‐chy: The Law and Economics of Pirate Organization"
1179        );
1180    }
1181
1182    fn dump_debug(file: &str) {
1183        let contents = fs::read_to_string(file).unwrap();
1184        let bibliography = Bibliography::parse(&contents).unwrap();
1185        println!("{:#?}", bibliography);
1186    }
1187
1188    fn dump_author_title(file: &str) {
1189        let contents = fs::read_to_string(file).unwrap();
1190        let bibliography = Bibliography::parse(&contents).unwrap();
1191
1192        println!("{}", bibliography.to_biblatex_string());
1193
1194        for x in bibliography {
1195            let authors = x.author().unwrap_or_default();
1196            for a in authors {
1197                print!("{}, ", a);
1198            }
1199            println!("\"{}\".", x.title().unwrap().format_sentence());
1200        }
1201    }
1202
1203    #[test]
1204    fn linebreak_field() {
1205        let contents = r#"@book{key, title = {Hello
1206Martin}}"#;
1207        let bibliography = Bibliography::parse(contents).unwrap();
1208        let entry = bibliography.get("key").unwrap();
1209        assert_eq!(entry.title().unwrap().format_verbatim(), "Hello Martin");
1210    }
1211
1212    #[test]
1213    fn test_verbatim_fields() {
1214        let contents = fs::read_to_string("tests/libra.bib").unwrap();
1215        let bibliography = Bibliography::parse(&contents).unwrap();
1216
1217        // Import an entry/field with escaped colons
1218        let e = bibliography.get("dierksmeierJustHODLMoral2018").unwrap();
1219        assert_eq!(e.doi().unwrap(), "10.1007/s41463-018-0036-z");
1220        assert_eq!(
1221            e.file().unwrap(),
1222            "C:\\Users\\mhaug\\Zotero\\storage\\DTPR7TES\\Dierksmeier - 2018 - Just HODL On the Moral Claims of Bitcoin and Ripp.pdf"
1223        );
1224
1225        // Import an entry/field with unescaped colons
1226        let e = bibliography.get("LibraAssociationIndependent").unwrap();
1227        assert_eq!(e.url().unwrap(), "https://libra.org/association/");
1228
1229        // Test export of entry (not escaping colons)
1230        let e = bibliography.get("finextraFedGovernorChallenges2019").unwrap();
1231        assert_eq!(
1232            e.to_biblatex_string(),
1233            "@online{finextraFedGovernorChallenges2019,\nauthor = {FinExtra},\ndate = {2019-12-18},\nfile = {C:\\\\Users\\\\mhaug\\\\Zotero\\\\storage\\\\VY9LAKFE\\\\fed-governor-challenges-facebooks-libra-project.html},\ntitle = {Fed {Governor} Challenges {Facebook}'s {Libra} Project},\nurl = {https://www.finextra.com/newsarticle/34986/fed-governor-challenges-facebooks-libra-project},\nurldate = {2020-08-22},\n}"
1234        );
1235
1236        // Test URLs with math and backslashes
1237        let e = bibliography.get("weirdUrl2023").unwrap();
1238        assert_eq!(e.url().unwrap(), r#"example.com?A=$B\%\{}"#);
1239        assert_eq!(e.doi().unwrap(), r#"example.com?A=$B\%\{}"#);
1240    }
1241
1242    #[test]
1243    fn test_synthesized_entry() {
1244        let mut e = Entry::new("Test123".to_owned(), EntryType::Article);
1245        let brian = vec![Person {
1246            name: "Monroe".to_string(),
1247            given_name: "Brian Albert".to_string(),
1248            prefix: "".to_string(),
1249            suffix: "".to_string(),
1250        }];
1251
1252        e.set_author(brian.clone());
1253
1254        assert_eq!(Ok(brian), e.author());
1255    }
1256
1257    #[test]
1258    fn test_case_sensitivity() {
1259        let contents = fs::read_to_string("tests/case.bib").unwrap();
1260        let bibliography = Bibliography::parse(&contents).unwrap();
1261
1262        let entry = bibliography.get("biblatex2023").unwrap();
1263        let author = entry.author();
1264
1265        match author {
1266            Ok(a) => assert_eq!(a[0].name, "Kime"),
1267            Err(RetrievalError::Missing(_)) => {
1268                panic!("Tags should be case insensitive.");
1269            }
1270            _ => panic!(),
1271        }
1272    }
1273
1274    #[test]
1275    fn test_whitespace_collapse() {
1276        let raw = r#"@article{aksin,
1277            title        = {Effect of immobilization on catalytic characteristics of
1278                            saturated {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck}
1279                            reactions},
1280          }"#;
1281
1282        let bibliography = Bibliography::parse(raw).unwrap();
1283        let entry = bibliography.get("aksin").unwrap();
1284        assert_eq!(
1285            entry.title().unwrap().first().map(|s| s.as_ref().v),
1286            Some(Chunk::Normal(
1287                "Effect of immobilization on catalytic characteristics of saturated "
1288                    .to_string()
1289            ))
1290            .as_ref()
1291        );
1292    }
1293
1294    #[test]
1295    fn test_empty_date_fields() {
1296        let raw = r#"@article{test,
1297            year        = 2000,
1298            day         = {},
1299            month    = {},
1300          }"#;
1301
1302        let bibliography = Bibliography::parse(raw).unwrap();
1303        assert_eq!(
1304            bibliography.get("test").unwrap().date(),
1305            Err(TypeError::new(74..74, TypeErrorKind::MissingNumber).into())
1306        );
1307    }
1308
1309    #[test]
1310    #[allow(clippy::single_range_in_vec_init)]
1311    fn test_page_ranges() {
1312        let raw = r#"@article{test,
1313            pages = {1---2},
1314          }
1315          @article{test1,
1316            pages = {2--3},
1317          }
1318          @article{test2,
1319            pages = {1},
1320          }"#;
1321
1322        let bibliography = Bibliography::parse(raw).unwrap();
1323        assert_eq!(
1324            bibliography.get("test").unwrap().pages(),
1325            Ok(PermissiveType::Typed(vec![1..2]))
1326        );
1327        assert_eq!(
1328            bibliography.get("test1").unwrap().pages(),
1329            Ok(PermissiveType::Typed(vec![2..3]))
1330        );
1331        assert_eq!(
1332            bibliography.get("test2").unwrap().pages(),
1333            Ok(PermissiveType::Typed(vec![1..1]))
1334        );
1335    }
1336
1337    #[test]
1338    fn test_editor_types() {
1339        let contents = fs::read_to_string("tests/editortypes.bib").unwrap();
1340        let bibliography = Bibliography::parse(&contents).unwrap();
1341        let video = bibliography.get("acerolaThisDifferenceGaussians2022").unwrap();
1342        assert_eq!(
1343            video.editors(),
1344            Ok(vec![(
1345                vec![Person {
1346                    name: "Acerola".into(),
1347                    given_name: "".into(),
1348                    prefix: "".into(),
1349                    suffix: "".into()
1350                }],
1351                EditorType::Director
1352            )])
1353        );
1354
1355        let music = bibliography.get("mozart_KV183_1773").unwrap();
1356        assert_eq!(
1357            music.editors(),
1358            Ok(vec![(
1359                vec![Person {
1360                    name: "Mozart".into(),
1361                    given_name: "Wolfgang Amadeus".into(),
1362                    prefix: "".into(),
1363                    suffix: "".into()
1364                }],
1365                EditorType::Unknown("pianist".into()),
1366            )])
1367        );
1368
1369        let audio = bibliography.get("Smith2018").unwrap();
1370        assert_eq!(
1371            audio.editors(),
1372            Ok(vec![
1373                (
1374                    vec![Person {
1375                        name: "Smith".into(),
1376                        given_name: "Stacey Vanek".into(),
1377                        prefix: "".into(),
1378                        suffix: "".into()
1379                    }],
1380                    EditorType::Unknown("host".into()),
1381                ),
1382                (
1383                    vec![Person {
1384                        name: "Plotkin".into(),
1385                        given_name: "Stanley".into(),
1386                        prefix: "".into(),
1387                        suffix: "".into()
1388                    }],
1389                    EditorType::Unknown("participant".into()),
1390                )
1391            ])
1392        );
1393    }
1394}