Skip to main content

bibtex_parser/
writer.rs

1//! BibTeX writer for serializing libraries
2
3use crate::{Block, Entry, Library, ParsedBlock, ParsedDocument, ParsedEntry, Result, Value};
4use std::borrow::Cow;
5use std::io::{self, Write};
6
7/// Configuration for writing BibTeX
8#[derive(Debug, Clone)]
9pub struct WriterConfig {
10    /// Indentation string (default: "  ")
11    pub indent: String,
12    /// Whether to align field values (default: false)
13    pub align_values: bool,
14    /// Maximum line length for wrapping (default: 80)
15    pub max_line_length: usize,
16    /// Whether to sort entries by key (default: false)
17    pub sort_entries: bool,
18    /// Whether to sort fields within entries (default: false)
19    pub sort_fields: bool,
20    /// Raw-backed document writing behavior.
21    pub raw_write_mode: RawWriteMode,
22    /// Trailing comma behavior for structured entry writing.
23    pub trailing_comma: TrailingComma,
24    /// Separator written between document blocks.
25    pub entry_separator: String,
26}
27
28/// Raw-backed document writing behavior.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum RawWriteMode {
31    /// Reuse retained raw text where possible.
32    Preserve,
33    /// Ignore retained raw text and write normalized structured data.
34    Normalize,
35}
36
37/// Trailing comma behavior for structured entry writing.
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum TrailingComma {
40    /// Omit a trailing comma after the final field.
41    Omit,
42    /// Add a trailing comma after the final field.
43    Always,
44}
45
46impl Default for WriterConfig {
47    fn default() -> Self {
48        Self {
49            indent: "  ".to_string(),
50            align_values: false,
51            max_line_length: 80,
52            sort_entries: false,
53            sort_fields: false,
54            raw_write_mode: RawWriteMode::Preserve,
55            trailing_comma: TrailingComma::Omit,
56            entry_separator: "\n".to_string(),
57        }
58    }
59}
60
61/// BibTeX writer
62#[derive(Debug)]
63pub struct Writer<W: Write> {
64    writer: W,
65    config: WriterConfig,
66}
67
68impl<W: Write> Writer<W> {
69    /// Create a new writer with default configuration
70    pub fn new(writer: W) -> Self {
71        Self {
72            writer,
73            config: WriterConfig::default(),
74        }
75    }
76
77    /// Create a new writer with custom configuration
78    pub const fn with_config(writer: W, config: WriterConfig) -> Self {
79        Self { writer, config }
80    }
81
82    /// Access the writer configuration mutably
83    #[must_use]
84    pub fn config_mut(&mut self) -> &mut WriterConfig {
85        &mut self.config
86    }
87
88    /// Consume the writer and return the underlying writer
89    #[must_use]
90    pub fn into_inner(self) -> W {
91        self.writer
92    }
93
94    /// Write a complete library.
95    pub fn write_library(&mut self, library: &Library) -> io::Result<()> {
96        if self.config.sort_entries {
97            return self.write_library_sorted(library);
98        }
99
100        for (index, block) in library.blocks().into_iter().enumerate() {
101            if index > 0 {
102                writeln!(self.writer)?;
103            }
104            match block {
105                Block::Entry(entry, _) => self.write_entry(entry)?,
106                Block::String(definition) => {
107                    self.write_string(&definition.name, &definition.value)?;
108                }
109                Block::Preamble(preamble) => self.write_preamble(&preamble.value)?,
110                Block::Comment(comment) => self.write_comment(comment.text())?,
111                Block::Failed(failed) => self.writer.write_all(failed.raw.as_bytes())?,
112            }
113        }
114
115        Ok(())
116    }
117
118    /// Write a parsed document, reusing retained raw blocks when configured.
119    pub fn write_document(&mut self, document: &ParsedDocument) -> io::Result<()> {
120        self.write_document_with_raw_source(document, None)
121    }
122
123    pub(crate) fn write_document_with_raw_source(
124        &mut self,
125        document: &ParsedDocument,
126        raw_source: Option<&str>,
127    ) -> io::Result<()> {
128        for (index, block) in document.blocks().iter().copied().enumerate() {
129            if index > 0 {
130                self.writer
131                    .write_all(self.config.entry_separator.as_bytes())?;
132            }
133
134            match block {
135                ParsedBlock::Entry(entry_index) => {
136                    self.write_parsed_entry_with_raw_source(
137                        &document.entries()[entry_index],
138                        raw_source,
139                    )?;
140                }
141                ParsedBlock::String(string_index) => {
142                    let string = &document.strings()[string_index];
143                    if self.config.raw_write_mode == RawWriteMode::Preserve {
144                        if let Some(raw) =
145                            raw_text_with_source(string.raw.as_deref(), raw_source, string.source)
146                        {
147                            self.writer.write_all(raw.as_bytes())?;
148                            continue;
149                        }
150                    }
151                    self.write_string(&string.name, &string.value.value)?;
152                }
153                ParsedBlock::Preamble(preamble_index) => {
154                    let preamble = &document.preambles()[preamble_index];
155                    if self.config.raw_write_mode == RawWriteMode::Preserve {
156                        if let Some(raw) = raw_text_with_source(
157                            preamble.raw.as_deref(),
158                            raw_source,
159                            preamble.source,
160                        ) {
161                            self.writer.write_all(raw.as_bytes())?;
162                            continue;
163                        }
164                    }
165                    self.write_preamble(&preamble.value.value)?;
166                }
167                ParsedBlock::Comment(comment_index) => {
168                    let comment = &document.comments()[comment_index];
169                    if self.config.raw_write_mode == RawWriteMode::Preserve {
170                        if let Some(raw) =
171                            raw_text_with_source(comment.raw.as_deref(), raw_source, comment.source)
172                        {
173                            self.writer.write_all(raw.as_bytes())?;
174                            continue;
175                        }
176                    }
177                    self.write_comment(&comment.text)?;
178                }
179                ParsedBlock::Failed(failed_index) => {
180                    self.writer
181                        .write_all(document.failed_blocks()[failed_index].raw.as_bytes())?;
182                }
183            }
184        }
185
186        Ok(())
187    }
188
189    /// Write selected parsed-document entries in source order.
190    ///
191    /// Non-entry blocks are skipped. Duplicate keys in `keys` do not duplicate
192    /// output entries.
193    pub fn write_selected_entries(
194        &mut self,
195        document: &ParsedDocument,
196        keys: &[&str],
197    ) -> io::Result<()> {
198        self.write_selected_entries_with_raw_source(document, keys, None)
199    }
200
201    pub(crate) fn write_selected_entries_with_raw_source(
202        &mut self,
203        document: &ParsedDocument,
204        keys: &[&str],
205        raw_source: Option<&str>,
206    ) -> io::Result<()> {
207        let mut written = 0usize;
208        for block in document.blocks().iter().copied() {
209            let ParsedBlock::Entry(entry_index) = block else {
210                continue;
211            };
212            let entry = &document.entries()[entry_index];
213            if !keys.iter().any(|key| *key == entry.key()) {
214                continue;
215            }
216            if written > 0 {
217                self.writer
218                    .write_all(self.config.entry_separator.as_bytes())?;
219            }
220            self.write_parsed_entry_with_raw_source(entry, raw_source)?;
221            written += 1;
222        }
223
224        Ok(())
225    }
226
227    fn write_library_sorted(&mut self, library: &Library) -> io::Result<()> {
228        // Write preambles
229        for preamble in library.preambles() {
230            self.write_preamble(&preamble.value)?;
231            writeln!(self.writer)?;
232        }
233
234        // Write strings
235        let mut strings: Vec<_> = library.strings().iter().collect();
236        if self.config.sort_entries {
237            strings.sort_by(|a, b| a.name.cmp(&b.name));
238        }
239
240        for definition in strings {
241            self.write_string(&definition.name, &definition.value)?;
242            writeln!(self.writer)?;
243        }
244
245        // Write entries
246        let mut entries = library.entries().iter().collect::<Vec<_>>();
247        if self.config.sort_entries {
248            entries.sort_by(|a, b| a.key.cmp(&b.key));
249        }
250
251        for (i, entry) in entries.iter().enumerate() {
252            if i > 0 {
253                writeln!(self.writer)?;
254            }
255            self.write_entry(entry)?;
256        }
257
258        Ok(())
259    }
260
261    /// Write a single entry
262    pub fn write_entry(&mut self, entry: &Entry) -> io::Result<()> {
263        writeln!(self.writer, "@{}{{{},", entry.ty, entry.key)?;
264
265        let mut fields = entry.fields().to_vec();
266        if self.config.sort_fields {
267            fields.sort_by(|a, b| a.name.cmp(&b.name));
268        }
269
270        // Calculate alignment if needed
271        let max_name_len = if self.config.align_values {
272            fields.iter().map(|f| f.name.len()).max().unwrap_or(0)
273        } else {
274            0
275        };
276
277        for (i, field) in fields.iter().enumerate() {
278            write!(self.writer, "{}", self.config.indent)?;
279            write!(self.writer, "{}", field.name)?;
280
281            if self.config.align_values {
282                let padding = max_name_len - field.name.len();
283                write!(self.writer, "{}", " ".repeat(padding))?;
284            }
285
286            write!(self.writer, " = ")?;
287            self.write_value(&field.value)?;
288
289            if i < fields.len() - 1 || self.config.trailing_comma == TrailingComma::Always {
290                writeln!(self.writer, ",")?;
291            } else {
292                writeln!(self.writer)?;
293            }
294        }
295
296        writeln!(self.writer, "}}")?;
297        Ok(())
298    }
299
300    fn write_parsed_entry_with_raw_source(
301        &mut self,
302        entry: &ParsedEntry,
303        raw_source: Option<&str>,
304    ) -> io::Result<()> {
305        if self.config.raw_write_mode == RawWriteMode::Preserve {
306            if let Some(raw) = patched_entry_raw(entry, raw_source, &self.config) {
307                self.writer.write_all(raw.as_bytes())?;
308                return Ok(());
309            }
310        }
311
312        self.write_entry(&entry.clone().into_entry())
313    }
314
315    /// Write a string definition
316    fn write_string(&mut self, name: &str, value: &Value) -> io::Result<()> {
317        write!(self.writer, "@string{{{name} = ")?;
318        self.write_value(value)?;
319        writeln!(self.writer, "}}")?;
320        Ok(())
321    }
322
323    /// Write a preamble
324    fn write_preamble(&mut self, value: &Value) -> io::Result<()> {
325        write!(self.writer, "@preamble{{")?;
326        self.write_value(value)?;
327        writeln!(self.writer, "}}")?;
328        Ok(())
329    }
330
331    /// Write a comment.
332    fn write_comment(&mut self, text: &str) -> io::Result<()> {
333        let trimmed = text.trim_start();
334        if trimmed.starts_with('%') || trimmed.starts_with('@') {
335            self.writer.write_all(text.as_bytes())?;
336            if !text.ends_with('\n') {
337                writeln!(self.writer)?;
338            }
339        } else {
340            writeln!(self.writer, "@comment{{{text}}}")?;
341        }
342        Ok(())
343    }
344
345    /// Write a value
346    fn write_value(&mut self, value: &Value) -> io::Result<()> {
347        self.writer.write_all(value.to_bibtex_source().as_bytes())?;
348        Ok(())
349    }
350}
351
352fn raw_text_with_source<'a>(
353    raw: Option<&'a str>,
354    raw_source: Option<&'a str>,
355    span: Option<crate::SourceSpan>,
356) -> Option<&'a str> {
357    raw.or_else(|| source_slice(raw_source, span?))
358}
359
360fn source_slice(raw_source: Option<&str>, span: crate::SourceSpan) -> Option<&str> {
361    let raw_source = raw_source?;
362    raw_source.get(span.byte_start..span.byte_end)
363}
364
365fn patched_entry_raw<'entry>(
366    entry: &'entry ParsedEntry<'_>,
367    raw_source: Option<&'entry str>,
368    config: &WriterConfig,
369) -> Option<Cow<'entry, str>> {
370    let source = entry.source?;
371    let raw = raw_text_with_source(entry.raw.as_deref(), raw_source, Some(source))?;
372    let mut replacements = Vec::new();
373
374    push_token_replacement(
375        &mut replacements,
376        raw,
377        source.byte_start,
378        entry.entry_type_source,
379        &entry.ty.to_string(),
380        |raw_type| crate::EntryType::parse(raw_type) == entry.ty,
381    )?;
382    push_token_replacement(
383        &mut replacements,
384        raw,
385        source.byte_start,
386        entry.key_source,
387        &entry.key,
388        |raw_key| raw_key == entry.key,
389    )?;
390
391    for removed in entry.removed_field_sources() {
392        let start = removed.byte_start.checked_sub(source.byte_start)?;
393        let end = removed.byte_end.checked_sub(source.byte_start)?;
394        replacements.push((start, end, String::new()));
395    }
396
397    let mut added_fields = Vec::new();
398    for field in &entry.fields {
399        if field.source.is_none()
400            && field.name_source.is_none()
401            && field.value_source.is_none()
402            && field.raw.is_none()
403            && field.value.raw.is_none()
404        {
405            added_fields.push(field);
406            continue;
407        }
408
409        push_token_replacement(
410            &mut replacements,
411            raw,
412            source.byte_start,
413            field.name_source,
414            &field.name,
415            |raw_name| raw_name == field.name,
416        )?;
417
418        if field.value.raw.is_none() {
419            let value_source = field.value_source?;
420            if source_slice(raw_source, value_source).is_none() {
421                let start = value_source.byte_start.checked_sub(source.byte_start)?;
422                let end = value_source.byte_end.checked_sub(source.byte_start)?;
423                replacements.push((start, end, field.value.value.to_bibtex_source()));
424            }
425        }
426    }
427
428    if !entry.removed_field_sources().is_empty() && !added_fields.is_empty() {
429        return None;
430    }
431
432    if !added_fields.is_empty() {
433        let close = entry_close_offset(raw)?;
434        let (comma_insert, added_source) = render_added_fields(raw, close, &added_fields, config)?;
435        if let Some(comma_insert) = comma_insert {
436            replacements.push((comma_insert, comma_insert, ",".to_string()));
437        }
438        replacements.push((close, close, added_source));
439    }
440
441    if replacements.is_empty() {
442        return Some(Cow::Borrowed(raw));
443    }
444
445    replacements.sort_by_key(|(start, _, _)| *start);
446    let mut output = String::with_capacity(raw.len());
447    let mut cursor = 0;
448    for (start, end, replacement) in replacements {
449        if start < cursor || end > raw.len() {
450            return None;
451        }
452        output.push_str(&raw[cursor..start]);
453        output.push_str(&replacement);
454        cursor = end;
455    }
456    output.push_str(&raw[cursor..]);
457    Some(Cow::Owned(output))
458}
459
460fn entry_close_offset(raw: &str) -> Option<usize> {
461    let trimmed = raw.trim_end_matches(char::is_whitespace);
462    let close = trimmed.len().checked_sub(1)?;
463    matches!(raw.as_bytes().get(close), Some(b'}' | b')')).then_some(close)
464}
465
466fn render_added_fields(
467    raw: &str,
468    close: usize,
469    fields: &[&crate::ParsedField<'_>],
470    config: &WriterConfig,
471) -> Option<(Option<usize>, String)> {
472    let prefix = raw.get(..close)?;
473    let previous = prefix
474        .bytes()
475        .enumerate()
476        .rfind(|(_, byte)| !byte.is_ascii_whitespace());
477    let mut output = String::new();
478
479    let comma_insert = if matches!(previous, Some((_, b',' | b'{'))) {
480        None
481    } else if prefix.ends_with('\n') {
482        Some(previous?.0 + 1)
483    } else {
484        output.push(',');
485        None
486    };
487    if !prefix.ends_with('\n') {
488        output.push('\n');
489    }
490
491    for (index, field) in fields.iter().enumerate() {
492        output.push_str(&config.indent);
493        output.push_str(&field.name);
494        output.push_str(" = ");
495        output.push_str(&field.value.value.to_bibtex_source());
496        if index < fields.len() - 1 || config.trailing_comma == TrailingComma::Always {
497            output.push(',');
498        }
499        output.push('\n');
500    }
501
502    Some((comma_insert, output))
503}
504
505fn push_token_replacement(
506    replacements: &mut Vec<(usize, usize, String)>,
507    raw: &str,
508    base: usize,
509    span: Option<crate::SourceSpan>,
510    replacement: &str,
511    unchanged: impl FnOnce(&str) -> bool,
512) -> Option<()> {
513    let span = span?;
514    let start = span.byte_start.checked_sub(base)?;
515    let end = span.byte_end.checked_sub(base)?;
516    let original = raw.get(start..end)?;
517    if !unchanged(original) {
518        replacements.push((start, end, replacement.to_string()));
519    }
520    Some(())
521}
522
523/// Convenience function to write a library to a string.
524#[must_use = "Check the result to detect serialization errors"]
525pub fn to_string(library: &Library) -> Result<String> {
526    let mut buf = Vec::new();
527    let mut writer = Writer::new(&mut buf);
528    writer.write_library(library)?;
529    Ok(String::from_utf8(buf).expect("valid UTF-8"))
530}
531
532/// Convenience function to write a parsed document to a string.
533#[must_use = "Check the result to detect serialization errors"]
534pub fn document_to_string(document: &ParsedDocument) -> Result<String> {
535    let mut buf = Vec::new();
536    let mut writer = Writer::new(&mut buf);
537    writer.write_document(document)?;
538    Ok(String::from_utf8(buf).expect("valid UTF-8"))
539}
540
541/// Convenience function to write selected parsed-document entries to a string.
542#[must_use = "Check the result to detect serialization errors"]
543pub fn selected_entries_to_string(document: &ParsedDocument, keys: &[&str]) -> Result<String> {
544    let mut buf = Vec::new();
545    let mut writer = Writer::new(&mut buf);
546    writer.write_selected_entries(document, keys)?;
547    Ok(String::from_utf8(buf).expect("valid UTF-8"))
548}
549
550/// Convenience function to write a library to a file.
551#[must_use = "Check the result to detect IO or serialization errors"]
552pub fn to_file(library: &Library, path: impl AsRef<std::path::Path>) -> Result<()> {
553    let file = std::fs::File::create(path)?;
554    let mut writer = Writer::new(file);
555    writer.write_library(library)?;
556    Ok(())
557}
558
559#[cfg(test)]
560mod tests {
561    use super::*;
562    use crate::model::{EntryType, Field};
563    use std::borrow::Cow;
564
565    #[test]
566    fn test_write_entry() {
567        let entry = Entry {
568            ty: EntryType::Article,
569            key: Cow::Borrowed("test2023"),
570            fields: vec![
571                Field::new("author", Value::Literal(Cow::Borrowed("John Doe"))),
572                Field::new("title", Value::Literal(Cow::Borrowed("Test Article"))),
573                Field::new("year", Value::Number(2023)),
574            ],
575        };
576
577        let mut buf = Vec::new();
578        let mut writer = Writer::new(&mut buf);
579        writer.write_entry(&entry).unwrap();
580
581        let result = String::from_utf8(buf).unwrap();
582        assert!(result.contains("@article{test2023,"));
583        assert!(result.contains("author = {John Doe}"));
584        assert!(result.contains("title = {Test Article}"));
585        assert!(result.contains("year = 2023"));
586    }
587}