Skip to main content

bibtex_parser/
lib.rs

1#![deny(clippy::all)]
2//! # bibtex-parser
3//!
4//! BibTeX parser for Rust.
5//!
6//! `bibtex-parser` supports strict parsing by default, explicit tolerant
7//! recovery for malformed input, string and month expansion, comments and
8//! preambles, validation, query/edit helpers, and configurable writing.
9//!
10//! ## Features
11//!
12//! - Borrowed values where possible for low-allocation parsing.
13//! - String variables, concatenation, and standard month constants.
14//! - Entries, strings, preambles, comments, and tolerant failures in source order.
15//! - Opt-in source-span capture.
16//! - DOI normalization, duplicate detection, validation, sorting, and field normalization.
17//! - Configurable writer for formatting and file output.
18//! - Optional `parallel` feature for parsing multiple files concurrently.
19//! - Optional `latex_to_unicode` feature for LaTeX accent conversion helpers.
20//!
21//! ## Parse
22//!
23//! ```
24//! use bibtex_parser::Library;
25//!
26//! let input = r#"
27//!     @string{venue = "VLDB"}
28//!     @article{paper,
29//!         author = "Jane Doe and John Smith",
30//!         title = "Example Paper",
31//!         journal = venue,
32//!         year = 2026
33//!     }
34//! "#;
35//!
36//! let library = Library::parse(input)?;
37//! let entry = library.find_by_key("paper").unwrap();
38//!
39//! assert_eq!(entry.get("journal"), Some("VLDB"));
40//! assert_eq!(entry.year(), Some("2026".to_string()));
41//! assert_eq!(entry.authors().len(), 2);
42//! # Ok::<(), bibtex_parser::Error>(())
43//! ```
44//!
45//! ## Tolerant Recovery
46//!
47//! ```
48//! use bibtex_parser::{Block, Library};
49//!
50//! let library = Library::parser()
51//!     .tolerant()
52//!     .capture_source()
53//!     .parse(r#"
54//!         @article{ok, title = "Good"}
55//!         @article{bad, title = "Missing close"
56//!         @book{recovered, title = "Recovered"}
57//!     "#)?;
58//!
59//! assert_eq!(library.entries().len(), 2);
60//! assert_eq!(library.failed_blocks().len(), 1);
61//!
62//! let has_failure_span = library.blocks().iter().any(|block| {
63//!     matches!(block, Block::Failed(failed) if failed.source.is_some())
64//! });
65//! assert!(has_failure_span);
66//! # Ok::<(), bibtex_parser::Error>(())
67//! ```
68//!
69//! ## Write
70//!
71//! ```
72//! use bibtex_parser::{Library, Writer, WriterConfig};
73//!
74//! let library = Library::parse(r#"@article{paper, title = "Example Paper"}"#)?;
75//! let mut output = Vec::new();
76//! let config = WriterConfig {
77//!     align_values: true,
78//!     ..Default::default()
79//! };
80//!
81//! Writer::with_config(&mut output, config).write_library(&library)?;
82//! assert!(String::from_utf8(output).unwrap().contains("@article{paper"));
83//! # Ok::<(), bibtex_parser::Error>(())
84//! ```
85//!
86//! ## `Library` Versus `ParsedDocument`
87//!
88//! Use [`Library`] when application code wants structured bibliography data.
89//! Use [`ParsedDocument`] when tooling needs source-order blocks, diagnostics,
90//! partial results, or source-preserving metadata.
91//!
92//! ```
93//! use bibtex_parser::{ParsedBlock, Parser};
94//!
95//! let input = r#"
96//!     % retained comment
97//!     @article{paper, title = "Example Paper"}
98//! "#;
99//!
100//! let document = Parser::new()
101//!     .capture_source()
102//!     .parse_document(input)?;
103//!
104//! assert_eq!(document.library().entries().len(), 1);
105//! assert_eq!(document.entries()[0].key(), "paper");
106//! assert!(matches!(document.blocks()[0], ParsedBlock::Comment(0)));
107//! assert!(document.entries()[0].source.is_some());
108//! # Ok::<(), bibtex_parser::Error>(())
109//! ```
110
111#![forbid(unsafe_code)]
112#![warn(
113    clippy::all,
114    clippy::pedantic,
115    clippy::nursery,
116    clippy::cargo,
117    missing_docs,
118    missing_debug_implementations
119)]
120#![allow(
121    clippy::module_name_repetitions,
122    clippy::missing_errors_doc,
123    clippy::missing_panics_doc,
124    clippy::multiple_crate_versions
125)]
126
127pub mod corpus;
128pub mod document;
129pub mod error;
130pub mod model;
131pub mod parser;
132#[cfg(feature = "python")]
133mod python;
134pub mod source;
135
136#[cfg(feature = "latex_to_unicode")]
137pub mod latex_unicode;
138
139mod library;
140mod writer;
141
142#[cfg(all(
143    feature = "python-extension",
144    not(all(target_os = "linux", target_arch = "aarch64"))
145))]
146#[global_allocator]
147static PYTHON_EXTENSION_ALLOCATOR: mimalloc::MiMalloc = mimalloc::MiMalloc;
148
149pub use corpus::{
150    CorpusEvent, CorpusSource, DuplicateKeyGroup, DuplicateKeyOccurrence, ParsedCorpus,
151};
152pub use document::{
153    Diagnostic, DiagnosticCode, DiagnosticSeverity, DiagnosticTarget, EntryDelimiter,
154    ExpansionOptions, ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment,
155    ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedPreamble,
156    ParsedSource, ParsedString, ParsedValue, StreamingSummary, UnresolvedVariablePolicy,
157    ValueDelimiter,
158};
159pub use error::{Error, Result, SourceId, SourceSpan};
160pub use library::{
161    Block, Comment, FailedBlock, FieldNameCase, FieldNormalizeOptions, IssueSummary, Library,
162    LibraryBuilder, LibraryStats, MonthStyle, Parser, Preamble, SortOptions, StringDefinition,
163    ValidationReport,
164};
165pub use model::{
166    canonical_biblatex_field_alias, classify_resource_field, normalize_biblatex_field_name,
167    normalize_doi, normalize_field_name_ascii, parse_date_parts, parse_names, DateParseError,
168    DateParts, Entry, EntryType, Field, PersonName, ResourceField, ResourceKind, ValidationError,
169    ValidationLevel, ValidationSeverity, Value,
170};
171pub use parser::{parse_bibtex, ParsedItem};
172pub use source::SourceMap;
173pub use writer::{
174    document_to_string, selected_entries_to_string, to_file, to_string, RawWriteMode,
175    TrailingComma, Writer, WriterConfig,
176};
177
178/// Re-export of common parser functions
179pub mod prelude {
180    pub use crate::{
181        canonical_biblatex_field_alias, classify_resource_field, document_to_string,
182        normalize_biblatex_field_name, normalize_doi, normalize_field_name_ascii, parse_bibtex,
183        parse_date_parts, parse_names, selected_entries_to_string, Block, Comment, CorpusEvent,
184        CorpusSource, DateParseError, DateParts, Diagnostic, DiagnosticCode, DiagnosticSeverity,
185        DiagnosticTarget, DuplicateKeyGroup, DuplicateKeyOccurrence, Entry, EntryDelimiter,
186        EntryType, Error, ExpansionOptions, FailedBlock, Field, FieldNameCase,
187        FieldNormalizeOptions, IssueSummary, Library, LibraryBuilder, LibraryStats, MonthStyle,
188        ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment, ParsedCorpus,
189        ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedItem,
190        ParsedPreamble, ParsedSource, ParsedString, ParsedValue, Parser, PersonName, Preamble,
191        RawWriteMode, ResourceField, ResourceKind, Result, SortOptions, SourceId, SourceMap,
192        SourceSpan, StreamingSummary, StringDefinition, TrailingComma, UnresolvedVariablePolicy,
193        ValidationError, ValidationLevel, ValidationReport, ValidationSeverity, Value,
194        ValueDelimiter, Writer, WriterConfig,
195    };
196}
197
198/// Parse a BibTeX library from a string.
199pub fn parse(input: &str) -> Result<Library<'_>> {
200    Library::parser().parse(input)
201}
202
203/// Parse a BibTeX library from a file.
204pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Library<'static>> {
205    let content = std::fs::read_to_string(path)?;
206    parse(&content).map(Library::into_owned)
207}
208
209#[cfg(feature = "python")]
210#[pyo3::pymodule]
211fn _native(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> {
212    python::register(m)
213}