Skip to main content

bibtex_parser/
writer.rs

1//! BibTeX writer for serializing libraries
2
3use crate::{Block, Entry, Library, ParsedBlock, ParsedDocument, ParsedEntry, Result, Value};
4use std::borrow::Cow;
5use std::io::{self, Write};
6
7/// Configuration for writing BibTeX
8#[derive(Debug, Clone)]
9pub struct WriterConfig {
10    /// Indentation string (default: "  ")
11    pub indent: String,
12    /// Whether to align field values (default: false)
13    pub align_values: bool,
14    /// Maximum line length for wrapping (default: 80)
15    pub max_line_length: usize,
16    /// Whether to sort entries by key (default: false)
17    pub sort_entries: bool,
18    /// Whether to sort fields within entries (default: false)
19    pub sort_fields: bool,
20    /// Raw-backed document writing behavior.
21    pub raw_write_mode: RawWriteMode,
22    /// Trailing comma behavior for structured entry writing.
23    pub trailing_comma: TrailingComma,
24    /// Separator written between document blocks.
25    pub entry_separator: String,
26}
27
28/// Raw-backed document writing behavior.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum RawWriteMode {
31    /// Reuse retained raw text where possible.
32    Preserve,
33    /// Ignore retained raw text and write normalized structured data.
34    Normalize,
35}
36
37/// Trailing comma behavior for structured entry writing.
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum TrailingComma {
40    /// Omit a trailing comma after the final field.
41    Omit,
42    /// Add a trailing comma after the final field.
43    Always,
44}
45
46impl Default for WriterConfig {
47    fn default() -> Self {
48        Self {
49            indent: "  ".to_string(),
50            align_values: false,
51            max_line_length: 80,
52            sort_entries: false,
53            sort_fields: false,
54            raw_write_mode: RawWriteMode::Preserve,
55            trailing_comma: TrailingComma::Omit,
56            entry_separator: "\n".to_string(),
57        }
58    }
59}
60
61/// BibTeX writer
62#[derive(Debug)]
63pub struct Writer<W: Write> {
64    writer: W,
65    config: WriterConfig,
66}
67
68impl<W: Write> Writer<W> {
69    /// Create a new writer with default configuration
70    pub fn new(writer: W) -> Self {
71        Self {
72            writer,
73            config: WriterConfig::default(),
74        }
75    }
76
77    /// Create a new writer with custom configuration
78    pub const fn with_config(writer: W, config: WriterConfig) -> Self {
79        Self { writer, config }
80    }
81
82    /// Access the writer configuration mutably
83    #[must_use]
84    pub fn config_mut(&mut self) -> &mut WriterConfig {
85        &mut self.config
86    }
87
88    /// Consume the writer and return the underlying writer
89    #[must_use]
90    pub fn into_inner(self) -> W {
91        self.writer
92    }
93
94    /// Write a complete library.
95    pub fn write_library(&mut self, library: &Library) -> io::Result<()> {
96        if self.config.sort_entries {
97            return self.write_library_sorted(library);
98        }
99
100        for (index, block) in library.blocks().into_iter().enumerate() {
101            if index > 0 {
102                writeln!(self.writer)?;
103            }
104            match block {
105                Block::Entry(entry, _) => self.write_entry(entry)?,
106                Block::String(definition) => {
107                    self.write_string(&definition.name, &definition.value)?;
108                }
109                Block::Preamble(preamble) => self.write_preamble(&preamble.value)?,
110                Block::Comment(comment) => self.write_comment(comment.text())?,
111                Block::Failed(failed) => self.writer.write_all(failed.raw.as_bytes())?,
112            }
113        }
114
115        Ok(())
116    }
117
118    /// Write a parsed document, reusing retained raw blocks when configured.
119    pub fn write_document(&mut self, document: &ParsedDocument) -> io::Result<()> {
120        self.write_document_with_raw_source(document, None)
121    }
122
123    pub(crate) fn write_document_with_raw_source(
124        &mut self,
125        document: &ParsedDocument,
126        raw_source: Option<&str>,
127    ) -> io::Result<()> {
128        for (index, block) in document.blocks().iter().copied().enumerate() {
129            if index > 0 {
130                self.writer
131                    .write_all(self.config.entry_separator.as_bytes())?;
132            }
133
134            match block {
135                ParsedBlock::Entry(entry_index) => {
136                    self.write_parsed_entry_with_raw_source(
137                        &document.entries()[entry_index],
138                        raw_source,
139                    )?;
140                }
141                ParsedBlock::String(string_index) => {
142                    let string = &document.strings()[string_index];
143                    if self.config.raw_write_mode == RawWriteMode::Preserve {
144                        if let Some(raw) =
145                            raw_text_with_source(string.raw.as_deref(), raw_source, string.source)
146                        {
147                            self.writer.write_all(raw.as_bytes())?;
148                            continue;
149                        }
150                    }
151                    self.write_string(&string.name, &string.value.value)?;
152                }
153                ParsedBlock::Preamble(preamble_index) => {
154                    let preamble = &document.preambles()[preamble_index];
155                    if self.config.raw_write_mode == RawWriteMode::Preserve {
156                        if let Some(raw) = raw_text_with_source(
157                            preamble.raw.as_deref(),
158                            raw_source,
159                            preamble.source,
160                        ) {
161                            self.writer.write_all(raw.as_bytes())?;
162                            continue;
163                        }
164                    }
165                    self.write_preamble(&preamble.value.value)?;
166                }
167                ParsedBlock::Comment(comment_index) => {
168                    let comment = &document.comments()[comment_index];
169                    if self.config.raw_write_mode == RawWriteMode::Preserve {
170                        if let Some(raw) =
171                            raw_text_with_source(comment.raw.as_deref(), raw_source, comment.source)
172                        {
173                            self.writer.write_all(raw.as_bytes())?;
174                            continue;
175                        }
176                    }
177                    self.write_comment(&comment.text)?;
178                }
179                ParsedBlock::Failed(failed_index) => {
180                    self.writer
181                        .write_all(document.failed_blocks()[failed_index].raw.as_bytes())?;
182                }
183            }
184        }
185
186        Ok(())
187    }
188
189    /// Write selected parsed-document entries in source order.
190    ///
191    /// Non-entry blocks are skipped. Duplicate keys in `keys` do not duplicate
192    /// output entries.
193    pub fn write_selected_entries(
194        &mut self,
195        document: &ParsedDocument,
196        keys: &[&str],
197    ) -> io::Result<()> {
198        self.write_selected_entries_with_raw_source(document, keys, None)
199    }
200
201    pub(crate) fn write_selected_entries_with_raw_source(
202        &mut self,
203        document: &ParsedDocument,
204        keys: &[&str],
205        raw_source: Option<&str>,
206    ) -> io::Result<()> {
207        let mut written = 0usize;
208        for block in document.blocks().iter().copied() {
209            let ParsedBlock::Entry(entry_index) = block else {
210                continue;
211            };
212            let entry = &document.entries()[entry_index];
213            if !keys.iter().any(|key| *key == entry.key()) {
214                continue;
215            }
216            if written > 0 {
217                self.writer
218                    .write_all(self.config.entry_separator.as_bytes())?;
219            }
220            self.write_parsed_entry_with_raw_source(entry, raw_source)?;
221            written += 1;
222        }
223
224        Ok(())
225    }
226
227    fn write_library_sorted(&mut self, library: &Library) -> io::Result<()> {
228        // Write preambles
229        for preamble in library.preambles() {
230            self.write_preamble(&preamble.value)?;
231            writeln!(self.writer)?;
232        }
233
234        // Write strings
235        let mut strings: Vec<_> = library.strings().iter().collect();
236        if self.config.sort_entries {
237            strings.sort_by(|a, b| a.name.cmp(&b.name));
238        }
239
240        for definition in strings {
241            self.write_string(&definition.name, &definition.value)?;
242            writeln!(self.writer)?;
243        }
244
245        // Write entries
246        let mut entries = library.entries().iter().collect::<Vec<_>>();
247        if self.config.sort_entries {
248            entries.sort_by(|a, b| a.key.cmp(&b.key));
249        }
250
251        for (i, entry) in entries.iter().enumerate() {
252            if i > 0 {
253                writeln!(self.writer)?;
254            }
255            self.write_entry(entry)?;
256        }
257
258        Ok(())
259    }
260
261    /// Write a single entry
262    pub fn write_entry(&mut self, entry: &Entry) -> io::Result<()> {
263        writeln!(self.writer, "@{}{{{},", entry.ty, entry.key)?;
264
265        let mut fields = entry.fields().to_vec();
266        if self.config.sort_fields {
267            fields.sort_by(|a, b| a.name.cmp(&b.name));
268        }
269
270        // Calculate alignment if needed
271        let max_name_len = if self.config.align_values {
272            fields.iter().map(|f| f.name.len()).max().unwrap_or(0)
273        } else {
274            0
275        };
276
277        for (i, field) in fields.iter().enumerate() {
278            write!(self.writer, "{}", self.config.indent)?;
279            write!(self.writer, "{}", field.name)?;
280
281            if self.config.align_values {
282                let padding = max_name_len - field.name.len();
283                write!(self.writer, "{}", " ".repeat(padding))?;
284            }
285
286            write!(self.writer, " = ")?;
287            self.write_value(&field.value)?;
288
289            if i < fields.len() - 1 || self.config.trailing_comma == TrailingComma::Always {
290                writeln!(self.writer, ",")?;
291            } else {
292                writeln!(self.writer)?;
293            }
294        }
295
296        writeln!(self.writer, "}}")?;
297        Ok(())
298    }
299
300    fn write_parsed_entry_with_raw_source(
301        &mut self,
302        entry: &ParsedEntry,
303        raw_source: Option<&str>,
304    ) -> io::Result<()> {
305        if self.config.raw_write_mode == RawWriteMode::Preserve {
306            if let Some(raw) = patched_entry_raw(entry, raw_source) {
307                self.writer.write_all(raw.as_bytes())?;
308                return Ok(());
309            }
310        }
311
312        self.write_entry(&entry.clone().into_entry())
313    }
314
315    /// Write a string definition
316    fn write_string(&mut self, name: &str, value: &Value) -> io::Result<()> {
317        write!(self.writer, "@string{{{name} = ")?;
318        self.write_value(value)?;
319        writeln!(self.writer, "}}")?;
320        Ok(())
321    }
322
323    /// Write a preamble
324    fn write_preamble(&mut self, value: &Value) -> io::Result<()> {
325        write!(self.writer, "@preamble{{")?;
326        self.write_value(value)?;
327        writeln!(self.writer, "}}")?;
328        Ok(())
329    }
330
331    /// Write a comment.
332    fn write_comment(&mut self, text: &str) -> io::Result<()> {
333        let trimmed = text.trim_start();
334        if trimmed.starts_with('%') || trimmed.starts_with('@') {
335            self.writer.write_all(text.as_bytes())?;
336            if !text.ends_with('\n') {
337                writeln!(self.writer)?;
338            }
339        } else {
340            writeln!(self.writer, "@comment{{{text}}}")?;
341        }
342        Ok(())
343    }
344
345    /// Write a value
346    fn write_value(&mut self, value: &Value) -> io::Result<()> {
347        match value {
348            Value::Literal(s) => {
349                // Quote if contains special characters
350                if needs_quoting(s) {
351                    write!(self.writer, "\"{}\"", escape_quotes(s))?;
352                } else {
353                    write!(self.writer, "{{{s}}}")?;
354                }
355            }
356            Value::Number(n) => write!(self.writer, "{n}")?,
357            Value::Variable(name) => write!(self.writer, "{name}")?,
358            Value::Concat(parts) => {
359                for (i, part) in parts.iter().enumerate() {
360                    if i > 0 {
361                        write!(self.writer, " # ")?;
362                    }
363                    self.write_value(part)?;
364                }
365            }
366        }
367        Ok(())
368    }
369}
370
371/// Check if a string needs quoting
372#[must_use]
373fn needs_quoting(s: &str) -> bool {
374    s.contains(['{', '}', ',', '='])
375}
376
377/// Escape quotes in a string
378#[must_use]
379fn escape_quotes(s: &str) -> String {
380    s.replace('"', "\\\"")
381}
382
383fn raw_text_with_source<'a>(
384    raw: Option<&'a str>,
385    raw_source: Option<&'a str>,
386    span: Option<crate::SourceSpan>,
387) -> Option<&'a str> {
388    raw.or_else(|| source_slice(raw_source, span?))
389}
390
391fn source_slice(raw_source: Option<&str>, span: crate::SourceSpan) -> Option<&str> {
392    let raw_source = raw_source?;
393    raw_source.get(span.byte_start..span.byte_end)
394}
395
396fn patched_entry_raw<'entry>(
397    entry: &'entry ParsedEntry<'_>,
398    raw_source: Option<&'entry str>,
399) -> Option<Cow<'entry, str>> {
400    let source = entry.source?;
401    let raw = raw_text_with_source(entry.raw.as_deref(), raw_source, Some(source))?;
402    let mut replacements = Vec::new();
403
404    push_token_replacement(
405        &mut replacements,
406        raw,
407        source.byte_start,
408        entry.entry_type_source,
409        &entry.ty.to_string(),
410        |raw_type| crate::EntryType::parse(raw_type) == entry.ty,
411    )?;
412    push_token_replacement(
413        &mut replacements,
414        raw,
415        source.byte_start,
416        entry.key_source,
417        &entry.key,
418        |raw_key| raw_key == entry.key,
419    )?;
420
421    for field in &entry.fields {
422        push_token_replacement(
423            &mut replacements,
424            raw,
425            source.byte_start,
426            field.name_source,
427            &field.name,
428            |raw_name| raw_name == field.name,
429        )?;
430
431        if field.value.raw.is_none() {
432            let value_source = field.value_source?;
433            if source_slice(raw_source, value_source).is_none() {
434                let start = value_source.byte_start.checked_sub(source.byte_start)?;
435                let end = value_source.byte_end.checked_sub(source.byte_start)?;
436                replacements.push((start, end, field.value.value.to_bibtex_source()));
437            }
438        }
439    }
440
441    if replacements.is_empty() {
442        return Some(Cow::Borrowed(raw));
443    }
444
445    replacements.sort_by_key(|(start, _, _)| *start);
446    let mut output = String::with_capacity(raw.len());
447    let mut cursor = 0;
448    for (start, end, replacement) in replacements {
449        if start < cursor || end > raw.len() {
450            return None;
451        }
452        output.push_str(&raw[cursor..start]);
453        output.push_str(&replacement);
454        cursor = end;
455    }
456    output.push_str(&raw[cursor..]);
457    Some(Cow::Owned(output))
458}
459
460fn push_token_replacement(
461    replacements: &mut Vec<(usize, usize, String)>,
462    raw: &str,
463    base: usize,
464    span: Option<crate::SourceSpan>,
465    replacement: &str,
466    unchanged: impl FnOnce(&str) -> bool,
467) -> Option<()> {
468    let span = span?;
469    let start = span.byte_start.checked_sub(base)?;
470    let end = span.byte_end.checked_sub(base)?;
471    let original = raw.get(start..end)?;
472    if !unchanged(original) {
473        replacements.push((start, end, replacement.to_string()));
474    }
475    Some(())
476}
477
478/// Convenience function to write a library to a string.
479#[must_use = "Check the result to detect serialization errors"]
480pub fn to_string(library: &Library) -> Result<String> {
481    let mut buf = Vec::new();
482    let mut writer = Writer::new(&mut buf);
483    writer.write_library(library)?;
484    Ok(String::from_utf8(buf).expect("valid UTF-8"))
485}
486
487/// Convenience function to write a parsed document to a string.
488#[must_use = "Check the result to detect serialization errors"]
489pub fn document_to_string(document: &ParsedDocument) -> Result<String> {
490    let mut buf = Vec::new();
491    let mut writer = Writer::new(&mut buf);
492    writer.write_document(document)?;
493    Ok(String::from_utf8(buf).expect("valid UTF-8"))
494}
495
496/// Convenience function to write selected parsed-document entries to a string.
497#[must_use = "Check the result to detect serialization errors"]
498pub fn selected_entries_to_string(document: &ParsedDocument, keys: &[&str]) -> Result<String> {
499    let mut buf = Vec::new();
500    let mut writer = Writer::new(&mut buf);
501    writer.write_selected_entries(document, keys)?;
502    Ok(String::from_utf8(buf).expect("valid UTF-8"))
503}
504
505/// Convenience function to write a library to a file.
506#[must_use = "Check the result to detect IO or serialization errors"]
507pub fn to_file(library: &Library, path: impl AsRef<std::path::Path>) -> Result<()> {
508    let file = std::fs::File::create(path)?;
509    let mut writer = Writer::new(file);
510    writer.write_library(library)?;
511    Ok(())
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517    use crate::model::{EntryType, Field};
518    use std::borrow::Cow;
519
520    #[test]
521    fn test_write_entry() {
522        let entry = Entry {
523            ty: EntryType::Article,
524            key: Cow::Borrowed("test2023"),
525            fields: vec![
526                Field::new("author", Value::Literal(Cow::Borrowed("John Doe"))),
527                Field::new("title", Value::Literal(Cow::Borrowed("Test Article"))),
528                Field::new("year", Value::Number(2023)),
529            ],
530        };
531
532        let mut buf = Vec::new();
533        let mut writer = Writer::new(&mut buf);
534        writer.write_entry(&entry).unwrap();
535
536        let result = String::from_utf8(buf).unwrap();
537        assert!(result.contains("@article{test2023,"));
538        assert!(result.contains("author = {John Doe}"));
539        assert!(result.contains("title = {Test Article}"));
540        assert!(result.contains("year = 2023"));
541    }
542}