bibtex_parser/lib.rs
1#![deny(clippy::all)]
2//! # bibtex-parser
3//!
4//! BibTeX parser for Rust.
5//!
6//! `bibtex-parser` supports strict parsing by default, explicit tolerant
7//! recovery for malformed input, string and month expansion, comments and
8//! preambles, validation, query/edit helpers, and configurable writing.
9//!
10//! ## Features
11//!
12//! - Borrowed values where possible for low-allocation parsing.
13//! - String variables, concatenation, and standard month constants.
14//! - Entries, strings, preambles, comments, and tolerant failures in source order.
15//! - Opt-in source-span capture.
16//! - DOI normalization, duplicate detection, validation, sorting, and field normalization.
17//! - Configurable writer for formatting and file output.
18//! - Optional `parallel` feature for parsing multiple files concurrently.
19//! - Optional `latex_to_unicode` feature for LaTeX accent conversion helpers.
20//!
21//! ## Parse
22//!
23//! ```
24//! use bibtex_parser::Library;
25//!
26//! let input = r#"
27//! @string{venue = "VLDB"}
28//! @article{paper,
29//! author = "Jane Doe and John Smith",
30//! title = "Example Paper",
31//! journal = venue,
32//! year = 2026
33//! }
34//! "#;
35//!
36//! let library = Library::parse(input)?;
37//! let entry = library.find_by_key("paper").unwrap();
38//!
39//! assert_eq!(entry.get("journal"), Some("VLDB"));
40//! assert_eq!(entry.year(), Some("2026".to_string()));
41//! assert_eq!(entry.authors().len(), 2);
42//! # Ok::<(), bibtex_parser::Error>(())
43//! ```
44//!
45//! ## Tolerant Recovery
46//!
47//! ```
48//! use bibtex_parser::{Block, Library};
49//!
50//! let library = Library::parser()
51//! .tolerant()
52//! .capture_source()
53//! .parse(r#"
54//! @article{ok, title = "Good"}
55//! @article{bad, title = "Missing close"
56//! @book{recovered, title = "Recovered"}
57//! "#)?;
58//!
59//! assert_eq!(library.entries().len(), 2);
60//! assert_eq!(library.failed_blocks().len(), 1);
61//!
62//! let has_failure_span = library.blocks().iter().any(|block| {
63//! matches!(block, Block::Failed(failed) if failed.source.is_some())
64//! });
65//! assert!(has_failure_span);
66//! # Ok::<(), bibtex_parser::Error>(())
67//! ```
68//!
69//! ## Write
70//!
71//! ```
72//! use bibtex_parser::{Library, Writer, WriterConfig};
73//!
74//! let library = Library::parse(r#"@article{paper, title = "Example Paper"}"#)?;
75//! let mut output = Vec::new();
76//! let config = WriterConfig {
77//! align_values: true,
78//! ..Default::default()
79//! };
80//!
81//! Writer::with_config(&mut output, config).write_library(&library)?;
82//! assert!(String::from_utf8(output).unwrap().contains("@article{paper"));
83//! # Ok::<(), bibtex_parser::Error>(())
84//! ```
85//!
86//! ## `Library` Versus `ParsedDocument`
87//!
88//! Use [`Library`] when application code wants structured bibliography data.
89//! Use [`ParsedDocument`] when tooling needs source-order blocks, diagnostics,
90//! partial results, or source-preserving metadata.
91//!
92//! ```
93//! use bibtex_parser::{ParsedBlock, Parser};
94//!
95//! let input = r#"
96//! % retained comment
97//! @article{paper, title = "Example Paper"}
98//! "#;
99//!
100//! let document = Parser::new()
101//! .capture_source()
102//! .parse_document(input)?;
103//!
104//! assert_eq!(document.library().entries().len(), 1);
105//! assert_eq!(document.entries()[0].key(), "paper");
106//! assert!(matches!(document.blocks()[0], ParsedBlock::Comment(0)));
107//! assert!(document.entries()[0].source.is_some());
108//! # Ok::<(), bibtex_parser::Error>(())
109//! ```
110
111#![forbid(unsafe_code)]
112#![warn(
113 clippy::all,
114 clippy::pedantic,
115 clippy::nursery,
116 clippy::cargo,
117 missing_docs,
118 missing_debug_implementations
119)]
120#![allow(
121 clippy::module_name_repetitions,
122 clippy::missing_errors_doc,
123 clippy::missing_panics_doc,
124 clippy::multiple_crate_versions
125)]
126
127pub mod corpus;
128pub mod document;
129pub mod error;
130pub mod model;
131pub mod parser;
132#[cfg(feature = "python")]
133mod python;
134pub mod source;
135
136#[cfg(feature = "latex_to_unicode")]
137pub mod latex_unicode;
138
139mod library;
140mod writer;
141
142#[cfg(all(
143 feature = "python-extension",
144 not(all(target_os = "linux", target_arch = "aarch64"))
145))]
146#[global_allocator]
147static PYTHON_EXTENSION_ALLOCATOR: mimalloc::MiMalloc = mimalloc::MiMalloc;
148
149pub use corpus::{
150 CorpusEvent, CorpusSource, DuplicateKeyGroup, DuplicateKeyOccurrence, ParsedCorpus,
151};
152pub use document::{
153 Diagnostic, DiagnosticCode, DiagnosticSeverity, DiagnosticTarget, EntryDelimiter,
154 ExpansionOptions, ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment,
155 ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedPreamble,
156 ParsedSource, ParsedString, ParsedValue, StreamingSummary, UnresolvedVariablePolicy,
157 ValueDelimiter,
158};
159pub use error::{Error, Result, SourceId, SourceSpan};
160pub use library::{
161 Block, Comment, FailedBlock, FieldNameCase, FieldNormalizeOptions, IssueSummary, Library,
162 LibraryBuilder, LibraryStats, MonthStyle, Parser, Preamble, SortOptions, StringDefinition,
163 ValidationReport,
164};
165pub use model::{
166 canonical_biblatex_field_alias, classify_resource_field, normalize_biblatex_field_name,
167 normalize_doi, normalize_field_name_ascii, parse_date_parts, parse_names, DateParseError,
168 DateParts, Entry, EntryType, Field, PersonName, ResourceField, ResourceKind, ValidationError,
169 ValidationLevel, ValidationSeverity, Value,
170};
171pub use parser::{parse_bibtex, ParsedItem};
172pub use source::SourceMap;
173pub use writer::{
174 document_to_string, selected_entries_to_string, to_file, to_string, RawWriteMode,
175 TrailingComma, Writer, WriterConfig,
176};
177
178/// Re-export of common parser functions
179pub mod prelude {
180 pub use crate::{
181 canonical_biblatex_field_alias, classify_resource_field, document_to_string,
182 normalize_biblatex_field_name, normalize_doi, normalize_field_name_ascii, parse_bibtex,
183 parse_date_parts, parse_names, selected_entries_to_string, Block, Comment, CorpusEvent,
184 CorpusSource, DateParseError, DateParts, Diagnostic, DiagnosticCode, DiagnosticSeverity,
185 DiagnosticTarget, DuplicateKeyGroup, DuplicateKeyOccurrence, Entry, EntryDelimiter,
186 EntryType, Error, ExpansionOptions, FailedBlock, Field, FieldNameCase,
187 FieldNormalizeOptions, IssueSummary, Library, LibraryBuilder, LibraryStats, MonthStyle,
188 ParseEvent, ParseFlow, ParseStatus, ParseSummary, ParsedBlock, ParsedComment, ParsedCorpus,
189 ParsedDocument, ParsedEntry, ParsedEntryStatus, ParsedFailedBlock, ParsedField, ParsedItem,
190 ParsedPreamble, ParsedSource, ParsedString, ParsedValue, Parser, PersonName, Preamble,
191 RawWriteMode, ResourceField, ResourceKind, Result, SortOptions, SourceId, SourceMap,
192 SourceSpan, StreamingSummary, StringDefinition, TrailingComma, UnresolvedVariablePolicy,
193 ValidationError, ValidationLevel, ValidationReport, ValidationSeverity, Value,
194 ValueDelimiter, Writer, WriterConfig,
195 };
196}
197
198/// Parse a BibTeX library from a string.
199pub fn parse(input: &str) -> Result<Library<'_>> {
200 Library::parser().parse(input)
201}
202
203/// Parse a BibTeX library from a file.
204pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Library<'static>> {
205 let content = std::fs::read_to_string(path)?;
206 parse(&content).map(Library::into_owned)
207}
208
209#[cfg(feature = "python")]
210#[pyo3::pymodule]
211fn _native(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> {
212 python::register(m)
213}