Skip to main content

bibtex_parser/
lib.rs

1#![deny(clippy::all)]
2//! # bibtex-parser
3//!
4//! BibTeX parser for Rust.
5//!
6//! `bibtex-parser` supports strict parsing by default, explicit tolerant
7//! recovery for malformed input, string and month expansion, comments and
8//! preambles, validation, query/edit helpers, and configurable writing.
9//!
10//! ## Features
11//!
12//! - Borrowed values where possible for low-allocation parsing.
13//! - String variables, concatenation, and standard month constants.
14//! - Entries, strings, preambles, comments, and tolerant failures in source order.
15//! - Opt-in source-span capture.
16//! - DOI normalization, duplicate detection, validation, sorting, and field normalization.
17//! - Configurable writer for formatting and file output.
18//! - Optional `parallel` feature for parsing multiple files concurrently.
19//! - Optional `latex_to_unicode` feature for LaTeX accent conversion helpers.
20//!
21//! ## Parse
22//!
23//! ```
24//! use bibtex_parser::Library;
25//!
26//! let input = r#"
27//!     @string{venue = "VLDB"}
28//!     @article{paper,
29//!         author = "Jane Doe and John Smith",
30//!         title = "Example Paper",
31//!         journal = venue,
32//!         year = 2026
33//!     }
34//! "#;
35//!
36//! let library = Library::parse(input)?;
37//! let entry = library.find_by_key("paper").unwrap();
38//!
39//! assert_eq!(entry.get("journal"), Some("VLDB"));
40//! assert_eq!(entry.year(), Some("2026".to_string()));
41//! assert_eq!(entry.authors().len(), 2);
42//! # Ok::<(), bibtex_parser::Error>(())
43//! ```
44//!
45//! ## Tolerant Recovery
46//!
47//! ```
48//! use bibtex_parser::{Block, Library};
49//!
50//! let library = Library::parser()
51//!     .tolerant()
52//!     .capture_source()
53//!     .parse(r#"
54//!         @article{ok, title = "Good"}
55//!         @article{bad, title = "Missing close"
56//!         @book{recovered, title = "Recovered"}
57//!     "#)?;
58//!
59//! assert_eq!(library.entries().len(), 2);
60//! assert_eq!(library.failed_blocks().len(), 1);
61//!
62//! let has_failure_span = library.blocks().iter().any(|block| {
63//!     matches!(block, Block::Failed(failed) if failed.source.is_some())
64//! });
65//! assert!(has_failure_span);
66//! # Ok::<(), bibtex_parser::Error>(())
67//! ```
68//!
69//! ## Write
70//!
71//! ```
72//! use bibtex_parser::{Library, Writer, WriterConfig};
73//!
74//! let library = Library::parse(r#"@article{paper, title = "Example Paper"}"#)?;
75//! let mut output = Vec::new();
76//! let config = WriterConfig {
77//!     align_values: true,
78//!     ..Default::default()
79//! };
80//!
81//! Writer::with_config(&mut output, config).write_library(&library)?;
82//! assert!(String::from_utf8(output).unwrap().contains("@article{paper"));
83//! # Ok::<(), bibtex_parser::Error>(())
84//! ```
85//!
86//! ## `Library` Versus `ParsedDocument`
87//!
88//! Use [`Library`] when application code wants structured bibliography data.
89//! Use [`ParsedDocument`] when tooling needs source-order blocks, diagnostics,
90//! partial results, or source-preserving metadata.
91//!
92//! ```
93//! use bibtex_parser::{ParsedBlock, Parser};
94//!
95//! let input = r#"
96//!     % retained comment
97//!     @article{paper, title = "Example Paper"}
98//! "#;
99//!
100//! let document = Parser::new()
101//!     .capture_source()
102//!     .parse_document(input)?;
103//!
104//! assert_eq!(document.library().entries().len(), 1);
105//! assert_eq!(document.entries()[0].key(), "paper");
106//! assert!(matches!(document.blocks()[0], ParsedBlock::Comment(0)));
107//! assert!(document.entries()[0].source.is_some());
108//! # Ok::<(), bibtex_parser::Error>(())
109//! ```
110
111#![forbid(unsafe_code)]
112#![cfg_attr(not(feature = "python"), allow(dead_code))]
113#![warn(
114    clippy::all,
115    clippy::pedantic,
116    clippy::nursery,
117    clippy::cargo,
118    missing_docs,
119    missing_debug_implementations
120)]
121#![allow(
122    clippy::module_name_repetitions,
123    clippy::missing_errors_doc,
124    clippy::missing_panics_doc,
125    clippy::multiple_crate_versions
126)]
127
128pub mod corpus;
129pub mod document;
130pub mod error;
131pub mod model;
132pub mod parser;
133#[cfg(feature = "python")]
134mod python;
135pub mod source;
136
137#[cfg(feature = "latex_to_unicode")]
138pub mod latex_unicode;
139
140mod library;
141mod writer;
142
143#[cfg(all(
144    feature = "python-extension",
145    not(all(target_os = "linux", target_arch = "aarch64"))
146))]
147#[global_allocator]
148static PYTHON_EXTENSION_ALLOCATOR: mimalloc::MiMalloc = mimalloc::MiMalloc;
149
150pub use corpus::{
151    CorpusEvent, CorpusSource, DuplicateKeyGroup, DuplicateKeyOccurrence, ParsedCorpus,
152};
153pub use document::{
154    Diagnostic, DiagnosticCode, DiagnosticSeverity, DiagnosticTarget, EntryDelimiter,
155    ExpansionOptions, ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment,
156    ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedPreamble,
157    ParsedSource, ParsedString, ParsedValue, StreamingSummary, UnresolvedVariablePolicy,
158    ValueDelimiter,
159};
160pub use error::{Error, Result, SourceId, SourceSpan};
161pub use library::{
162    Block, Comment, FailedBlock, FieldNameCase, FieldNormalizeOptions, IssueSummary, Library,
163    LibraryBuilder, LibraryStats, MonthStyle, Parser, Preamble, SortOptions, StringDefinition,
164    ValidationReport,
165};
166pub use model::{
167    canonical_biblatex_field_alias, classify_resource_field, normalize_biblatex_field_name,
168    normalize_doi, normalize_field_name_ascii, parse_date_parts, parse_names, DateParseError,
169    DateParts, Entry, EntryType, Field, PersonName, ResourceField, ResourceKind, ValidationError,
170    ValidationLevel, ValidationSeverity, Value,
171};
172pub use parser::{parse_bibtex, ParsedItem};
173pub use source::SourceMap;
174pub use writer::{
175    document_to_string, selected_entries_to_string, to_file, to_string, RawWriteMode,
176    TrailingComma, Writer, WriterConfig,
177};
178
179/// Re-export of common parser functions
180pub mod prelude {
181    pub use crate::{
182        canonical_biblatex_field_alias, classify_resource_field, document_to_string,
183        normalize_biblatex_field_name, normalize_doi, normalize_field_name_ascii, parse_bibtex,
184        parse_date_parts, parse_names, selected_entries_to_string, Block, Comment, CorpusEvent,
185        CorpusSource, DateParseError, DateParts, Diagnostic, DiagnosticCode, DiagnosticSeverity,
186        DiagnosticTarget, DuplicateKeyGroup, DuplicateKeyOccurrence, Entry, EntryDelimiter,
187        EntryType, Error, ExpansionOptions, FailedBlock, Field, FieldNameCase,
188        FieldNormalizeOptions, IssueSummary, Library, LibraryBuilder, LibraryStats, MonthStyle,
189        ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment, ParsedCorpus,
190        ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedItem,
191        ParsedPreamble, ParsedSource, ParsedString, ParsedValue, Parser, PersonName, Preamble,
192        RawWriteMode, ResourceField, ResourceKind, Result, SortOptions, SourceId, SourceMap,
193        SourceSpan, StreamingSummary, StringDefinition, TrailingComma, UnresolvedVariablePolicy,
194        ValidationError, ValidationLevel, ValidationReport, ValidationSeverity, Value,
195        ValueDelimiter, Writer, WriterConfig,
196    };
197}
198
199/// Parse a BibTeX library from a string.
200pub fn parse(input: &str) -> Result<Library<'_>> {
201    Library::parser().parse(input)
202}
203
204/// Parse a BibTeX library from a file.
205pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Library<'static>> {
206    let content = std::fs::read_to_string(path)?;
207    parse(&content).map(Library::into_owned)
208}
209
210#[cfg(feature = "python")]
211#[pyo3::pymodule]
212fn _native(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> {
213    python::register(m)
214}