Skip to main content

bibtex_parser/
document.rs

1//! Parsed bibliography model with source metadata.
2//!
3//! [`Library`] is the compact API for bibliography data. [`ParsedDocument`]
4//! contains source-order blocks, per-item metadata, retained raw text,
5//! diagnostics, and partial parse results.
6
7use crate::library::BlockKind;
8use crate::library::RawBuildItem;
9use crate::model::normalize_text_projection;
10use crate::source::SourceCursor;
11use crate::{
12    normalize_doi, Comment, DateParseError, DateParts, Entry, EntryType, FailedBlock, Field,
13    Library, PersonName, Preamble, ResourceField, SourceId, SourceMap, SourceSpan,
14    StringDefinition, Value,
15};
16use std::borrow::Cow;
17use std::fmt;
18
19/// Parse status for a parsed bibliography document.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ParseStatus {
22    /// The document parsed without diagnostics that affect recovered content.
23    Ok,
24    /// The document contains useful parsed data plus recovered or failed blocks.
25    Partial,
26    /// The document could not produce meaningful bibliography data.
27    Failed,
28}
29
30/// Diagnostic severity.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum DiagnosticSeverity {
33    /// A problem that prevents some requested parse result from being valid.
34    Error,
35    /// A recoverable problem that callers may want to show or test.
36    Warning,
37    /// Additional parse information that is not itself a problem.
38    Info,
39}
40
41/// Stable machine-readable diagnostic code.
42///
43/// The initial parser diagnostic codes are:
44/// `missing-entry-key`, `missing-field-separator`, `expected-field-name`,
45/// `empty-field-value`, `expected-value-atom`, `bad-field-boundary`,
46/// `bad-value-boundary`, `unclosed-entry`, `unclosed-braced-value`, and
47/// `unclosed-quoted-value`.
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct DiagnosticCode(Cow<'static, str>);
50
51impl DiagnosticCode {
52    /// Generic parse error code used before finer-grained recovery classifies a failure.
53    pub const PARSE_ERROR: Self = Self(Cow::Borrowed("parse-error"));
54    /// Entry body did not contain a citation key.
55    pub const MISSING_ENTRY_KEY: Self = Self(Cow::Borrowed("missing-entry-key"));
56    /// Expected a comma after an entry key or `=` after a field name.
57    pub const MISSING_FIELD_SEPARATOR: Self = Self(Cow::Borrowed("missing-field-separator"));
58    /// Expected a field name inside an entry body.
59    pub const EXPECTED_FIELD_NAME: Self = Self(Cow::Borrowed("expected-field-name"));
60    /// Field separator was present but no value was provided.
61    pub const EMPTY_FIELD_VALUE: Self = Self(Cow::Borrowed("empty-field-value"));
62    /// Expected a literal, number, variable, quoted value, or braced value.
63    pub const EXPECTED_VALUE_ATOM: Self = Self(Cow::Borrowed("expected-value-atom"));
64    /// Expected a comma or entry close after a field value.
65    pub const BAD_FIELD_BOUNDARY: Self = Self(Cow::Borrowed("bad-field-boundary"));
66    /// Expected a value atom after a concatenation operator.
67    pub const BAD_VALUE_BOUNDARY: Self = Self(Cow::Borrowed("bad-value-boundary"));
68    /// Entry ended before its closing delimiter was found.
69    pub const UNCLOSED_ENTRY: Self = Self(Cow::Borrowed("unclosed-entry"));
70    /// Braced field value ended before its closing brace was found.
71    pub const UNCLOSED_BRACED_VALUE: Self = Self(Cow::Borrowed("unclosed-braced-value"));
72    /// Quoted field value ended before its closing quote was found.
73    pub const UNCLOSED_QUOTED_VALUE: Self = Self(Cow::Borrowed("unclosed-quoted-value"));
74
75    /// Create a borrowed static diagnostic code.
76    #[must_use]
77    pub const fn borrowed(code: &'static str) -> Self {
78        Self(Cow::Borrowed(code))
79    }
80
81    /// Create an owned diagnostic code.
82    #[must_use]
83    pub fn custom(code: impl Into<String>) -> Self {
84        Self(Cow::Owned(code.into()))
85    }
86
87    /// Return the diagnostic code as a string.
88    #[must_use]
89    pub fn as_str(&self) -> &str {
90        &self.0
91    }
92}
93
94impl fmt::Display for DiagnosticCode {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        f.write_str(self.as_str())
97    }
98}
99
100/// Location target for a diagnostic.
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub enum DiagnosticTarget {
103    /// The whole input file or source.
104    File,
105    /// A source-order block by index.
106    Block(usize),
107    /// An entry by parsed-entry index.
108    Entry(usize),
109    /// A field by parsed-entry and field index.
110    Field {
111        /// Parsed-entry index.
112        entry: usize,
113        /// Field index inside the parsed entry.
114        field: usize,
115    },
116    /// A value by parsed-entry and field index.
117    Value {
118        /// Parsed-entry index.
119        entry: usize,
120        /// Field index inside the parsed entry.
121        field: usize,
122    },
123    /// A failed block by failed-block index.
124    FailedBlock(usize),
125}
126
127/// Structured diagnostic emitted while building a parsed document.
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Diagnostic {
130    /// Diagnostic severity.
131    pub severity: DiagnosticSeverity,
132    /// Stable machine-readable code.
133    pub code: DiagnosticCode,
134    /// Human-readable message.
135    pub message: String,
136    /// Bibliography object targeted by this diagnostic.
137    pub target: DiagnosticTarget,
138    /// Source location, when available.
139    pub source: Option<SourceSpan>,
140    /// Short source context suitable for display, when available.
141    pub snippet: Option<String>,
142}
143
144impl Diagnostic {
145    /// Create an error diagnostic.
146    #[must_use]
147    pub fn error(
148        code: DiagnosticCode,
149        message: impl Into<String>,
150        target: DiagnosticTarget,
151        source: Option<SourceSpan>,
152    ) -> Self {
153        Self {
154            severity: DiagnosticSeverity::Error,
155            code,
156            message: message.into(),
157            target,
158            source,
159            snippet: None,
160        }
161    }
162
163    /// Attach source context to this diagnostic.
164    #[must_use]
165    pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
166        self.snippet = Some(snippet.into());
167        self
168    }
169}
170
171/// Summary counts for a parsed document.
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub struct ParseSummary {
174    /// File-level parse status.
175    pub status: ParseStatus,
176    /// Number of parsed entries.
177    pub entries: usize,
178    /// Number of warning diagnostics.
179    pub warnings: usize,
180    /// Number of error diagnostics.
181    pub errors: usize,
182    /// Number of informational diagnostics.
183    pub infos: usize,
184    /// Number of failed blocks.
185    pub failed_blocks: usize,
186    /// Number of entries recovered as partial entries.
187    pub recovered_blocks: usize,
188}
189
190/// Source metadata associated with a parsed document.
191#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct ParsedSource<'a> {
193    /// Source index inside the document.
194    pub id: SourceId,
195    /// Human-readable source name or path, when known.
196    pub name: Option<Cow<'a, str>>,
197}
198
199impl ParsedSource<'_> {
200    /// Return `true` when this source has no caller-provided name.
201    #[must_use]
202    pub const fn is_anonymous(&self) -> bool {
203        self.name.is_none()
204    }
205
206    /// Convert this source metadata into an owned value.
207    #[must_use]
208    pub fn into_owned(self) -> ParsedSource<'static> {
209        ParsedSource {
210            id: self.id,
211            name: self.name.map(|name| Cow::Owned(name.into_owned())),
212        }
213    }
214}
215
216/// Source-order block in a parsed document.
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum ParsedBlock {
219    /// A regular or partial bibliography entry by parsed-entry index.
220    Entry(usize),
221    /// A string definition by parsed-string index.
222    String(usize),
223    /// A preamble by parsed-preamble index.
224    Preamble(usize),
225    /// A comment by parsed-comment index.
226    Comment(usize),
227    /// A failed block by failed-block index.
228    Failed(usize),
229}
230
231/// Source-order event emitted by streaming parsing.
232#[derive(Debug, Clone, PartialEq)]
233pub enum ParseEvent<'a> {
234    /// A regular or recovered bibliography entry.
235    Entry(ParsedEntry<'a>),
236    /// A string definition.
237    String(ParsedString<'a>),
238    /// A preamble block.
239    Preamble(ParsedPreamble<'a>),
240    /// A comment block.
241    Comment(ParsedComment<'a>),
242    /// A malformed block retained by tolerant parsing.
243    Failed(ParsedFailedBlock<'a>),
244    /// A structured diagnostic associated with a preceding event.
245    Diagnostic(Diagnostic),
246}
247
248/// Callback control returned from streaming parse handlers.
249#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250pub enum ParseFlow {
251    /// Continue parsing.
252    Continue,
253    /// Stop after the current event.
254    Stop,
255}
256
257/// Summary returned after streaming parsing.
258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub struct StreamingSummary {
260    /// File-level status for processed events.
261    pub status: ParseStatus,
262    /// Number of emitted entries.
263    pub entries: usize,
264    /// Number of emitted string definitions.
265    pub strings: usize,
266    /// Number of emitted preambles.
267    pub preambles: usize,
268    /// Number of emitted comments.
269    pub comments: usize,
270    /// Number of emitted failed blocks.
271    pub failed_blocks: usize,
272    /// Number of warning diagnostics.
273    pub warnings: usize,
274    /// Number of error diagnostics.
275    pub errors: usize,
276    /// Number of informational diagnostics.
277    pub infos: usize,
278    /// Number of recovered partial entries.
279    pub recovered_blocks: usize,
280    /// `true` when the callback requested early stop.
281    pub stopped: bool,
282}
283
284impl Default for StreamingSummary {
285    fn default() -> Self {
286        Self {
287            status: ParseStatus::Ok,
288            entries: 0,
289            strings: 0,
290            preambles: 0,
291            comments: 0,
292            failed_blocks: 0,
293            warnings: 0,
294            errors: 0,
295            infos: 0,
296            recovered_blocks: 0,
297            stopped: false,
298        }
299    }
300}
301
302impl StreamingSummary {
303    pub(crate) fn finalize_status(&mut self) {
304        self.status = if self.errors == 0 {
305            ParseStatus::Ok
306        } else if self.entries == 0 && self.strings == 0 && self.preambles == 0 {
307            ParseStatus::Failed
308        } else {
309            ParseStatus::Partial
310        };
311    }
312
313    pub(crate) fn count_diagnostic(&mut self, diagnostic: &Diagnostic) {
314        match diagnostic.severity {
315            DiagnosticSeverity::Error => self.errors += 1,
316            DiagnosticSeverity::Warning => self.warnings += 1,
317            DiagnosticSeverity::Info => self.infos += 1,
318        }
319    }
320}
321
322/// Status of a parsed entry.
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ParsedEntryStatus {
325    /// Entry parsed completely.
326    Complete,
327    /// Entry has a recovered type or key plus at least some usable content.
328    Partial,
329}
330
331/// Delimiter used by a BibTeX entry body.
332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
333pub enum EntryDelimiter {
334    /// Entry used `{ ... }`.
335    Braces,
336    /// Entry used `( ... )`.
337    Parentheses,
338}
339
340/// Delimiter or source shape used by a BibTeX value.
341#[derive(Debug, Clone, Copy, PartialEq, Eq)]
342pub enum ValueDelimiter {
343    /// Value used `{ ... }`.
344    Braces,
345    /// Value used `" ... "`.
346    Quotes,
347    /// Value was a bare number or identifier.
348    Bare,
349    /// Value used one or more `#` concatenation separators.
350    Concatenation,
351}
352
353/// Policy for variables that cannot be resolved during value expansion.
354#[derive(Debug, Clone, Copy, PartialEq, Eq)]
355pub enum UnresolvedVariablePolicy {
356    /// Keep the variable name as ordinary text.
357    Preserve,
358    /// Render unresolved variables as `{undefined:name}`.
359    Placeholder,
360    /// Return an error for the first unresolved variable.
361    Error,
362}
363
364/// Options for expanding parsed values.
365#[derive(Debug, Clone, Copy, PartialEq, Eq)]
366pub struct ExpansionOptions {
367    /// Expand user `@string` definitions.
368    pub expand_strings: bool,
369    /// Expand standard three-letter BibTeX month variables.
370    pub expand_months: bool,
371    /// Behavior when a variable cannot be resolved.
372    pub unresolved_variables: UnresolvedVariablePolicy,
373}
374
375impl Default for ExpansionOptions {
376    fn default() -> Self {
377        Self {
378            expand_strings: true,
379            expand_months: true,
380            unresolved_variables: UnresolvedVariablePolicy::Error,
381        }
382    }
383}
384
385/// Parsed BibTeX value plus optional source-preserving metadata.
386#[derive(Debug, Clone, PartialEq)]
387pub struct ParsedValue<'a> {
388    /// Structured value.
389    pub value: Value<'a>,
390    /// Exact raw value text, when retained by the parser mode.
391    pub raw: Option<Cow<'a, str>>,
392    /// Source location for the value, when available.
393    pub source: Option<SourceSpan>,
394    /// Expanded text projection, when a parser mode computes it separately.
395    pub expanded: Option<Cow<'a, str>>,
396    /// Original value delimiter or source shape, when retained.
397    pub delimiter: Option<ValueDelimiter>,
398}
399
400impl<'a> ParsedValue<'a> {
401    /// Create parsed-value metadata from a structured value.
402    #[must_use]
403    pub const fn new(value: Value<'a>) -> Self {
404        Self {
405            value,
406            raw: None,
407            source: None,
408            expanded: None,
409            delimiter: None,
410        }
411    }
412
413    /// Convert this parsed value into the structured value.
414    #[must_use]
415    pub fn into_value(self) -> Value<'a> {
416        self.value
417    }
418
419    /// Return the structured parsed value.
420    #[must_use]
421    pub const fn parsed(&self) -> &Value<'a> {
422        &self.value
423    }
424
425    /// Return exact raw value text when raw preservation was requested.
426    #[must_use]
427    pub fn raw_text(&self) -> Option<&str> {
428        self.raw.as_deref()
429    }
430
431    /// Return requested expanded text when the parser populated it.
432    #[must_use]
433    pub fn expanded_text(&self) -> Option<&str> {
434        self.expanded.as_deref()
435    }
436
437    /// Return an ordinary text projection of the parsed value.
438    #[must_use]
439    pub fn plain_text(&self) -> String {
440        self.value.to_plain_string()
441    }
442
443    /// Return a display-oriented projection of the parsed value.
444    #[must_use]
445    pub fn lossy_text(&self) -> String {
446        self.value.to_lossy_string()
447    }
448
449    /// Return a Unicode-normalized plain-text projection.
450    #[cfg(feature = "latex_to_unicode")]
451    #[must_use]
452    pub fn unicode_plain_text(&self) -> String {
453        self.value.to_unicode_plain_string()
454    }
455
456    /// Convert this parsed value into an owned value.
457    #[must_use]
458    pub fn into_owned(self) -> ParsedValue<'static> {
459        ParsedValue {
460            value: self.value.into_owned(),
461            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
462            source: self.source,
463            expanded: self
464                .expanded
465                .map(|expanded| Cow::Owned(expanded.into_owned())),
466            delimiter: self.delimiter,
467        }
468    }
469
470    pub(crate) fn from_owned_value(
471        value: Value<'a>,
472        source: Option<SourceSpan>,
473        delimiter: Option<ValueDelimiter>,
474    ) -> ParsedValue<'static> {
475        ParsedValue {
476            value: value.into_owned(),
477            raw: None,
478            source,
479            expanded: None,
480            delimiter,
481        }
482    }
483}
484
485/// Parsed field plus optional source-preserving metadata.
486#[derive(Debug, Clone, PartialEq)]
487pub struct ParsedField<'a> {
488    /// Field name as it appeared after parsing.
489    pub name: Cow<'a, str>,
490    /// Parsed field value.
491    pub value: ParsedValue<'a>,
492    /// Exact raw field text, when retained by the parser mode.
493    pub raw: Option<Cow<'a, str>>,
494    /// Source location for the whole field, when available.
495    pub source: Option<SourceSpan>,
496    /// Source location for the field name, when available.
497    pub name_source: Option<SourceSpan>,
498    /// Source location for the field value, when available.
499    pub value_source: Option<SourceSpan>,
500}
501
502impl<'a> ParsedField<'a> {
503    /// Create parsed-field metadata from a structured field.
504    #[must_use]
505    pub fn from_field(field: Field<'a>) -> Self {
506        Self {
507            name: field.name,
508            value: ParsedValue::new(field.value),
509            raw: None,
510            source: None,
511            name_source: None,
512            value_source: None,
513        }
514    }
515
516    /// Convert this parsed field into the structured field.
517    #[must_use]
518    pub fn into_field(self) -> Field<'a> {
519        Field {
520            name: self.name,
521            value: self.value.into_value(),
522        }
523    }
524
525    /// Convert this parsed field into an owned value.
526    #[must_use]
527    pub fn into_owned(self) -> ParsedField<'static> {
528        ParsedField {
529            name: owned_field_name(self.name),
530            value: self.value.into_owned(),
531            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
532            source: self.source,
533            name_source: self.name_source,
534            value_source: self.value_source,
535        }
536    }
537
538    pub(crate) fn from_owned_field(field: Field<'a>) -> ParsedField<'static> {
539        ParsedField {
540            name: owned_field_name(field.name),
541            value: ParsedValue::from_owned_value(field.value, None, None),
542            raw: None,
543            source: None,
544            name_source: None,
545            value_source: None,
546        }
547    }
548}
549
550/// Parsed entry plus optional source-preserving metadata.
551#[derive(Debug, Clone, PartialEq)]
552pub struct ParsedEntry<'a> {
553    /// Entry type.
554    pub ty: EntryType<'a>,
555    /// Citation key.
556    pub key: Cow<'a, str>,
557    /// Parsed fields in source order.
558    pub fields: Vec<ParsedField<'a>>,
559    /// Whether the entry is complete or recovered.
560    pub status: ParsedEntryStatus,
561    /// Source location for the whole entry, when available.
562    pub source: Option<SourceSpan>,
563    /// Source location for the entry type token, when available.
564    pub entry_type_source: Option<SourceSpan>,
565    /// Source location for the citation key token, when available.
566    pub key_source: Option<SourceSpan>,
567    /// Entry body delimiter, when retained.
568    pub delimiter: Option<EntryDelimiter>,
569    /// Exact raw entry text, when retained by the parser mode.
570    pub raw: Option<Cow<'a, str>>,
571    /// Diagnostics attached to this entry.
572    pub diagnostics: Vec<Diagnostic>,
573}
574
575impl<'a> ParsedEntry<'a> {
576    /// Create parsed-entry metadata from a structured entry.
577    #[must_use]
578    pub fn from_entry(entry: Entry<'a>, source: Option<SourceSpan>) -> Self {
579        Self {
580            ty: entry.ty,
581            key: entry.key,
582            fields: entry
583                .fields
584                .into_iter()
585                .map(ParsedField::from_field)
586                .collect(),
587            status: ParsedEntryStatus::Complete,
588            source,
589            entry_type_source: None,
590            key_source: None,
591            delimiter: None,
592            raw: None,
593            diagnostics: Vec::new(),
594        }
595    }
596
597    pub(crate) fn from_entry_owned(
598        entry: Entry<'a>,
599        source: Option<SourceSpan>,
600    ) -> ParsedEntry<'static> {
601        ParsedEntry {
602            ty: entry.ty.into_owned(),
603            key: Cow::Owned(entry.key.into_owned()),
604            fields: entry
605                .fields
606                .into_iter()
607                .map(ParsedField::from_owned_field)
608                .collect(),
609            status: ParsedEntryStatus::Complete,
610            source,
611            entry_type_source: None,
612            key_source: None,
613            delimiter: None,
614            raw: None,
615            diagnostics: Vec::new(),
616        }
617    }
618
619    pub(crate) fn from_stream_entry(
620        entry: Entry<'a>,
621        source: SourceSpan,
622        raw: &'a str,
623        source_map: &SourceMap<'a>,
624        preserve_raw: bool,
625    ) -> Self {
626        let mut parsed = Self::from_entry(entry, Some(source));
627        parsed.apply_locations(raw, source_map, preserve_raw);
628        parsed
629    }
630
631    pub(crate) fn from_located_stream_entry_owned(
632        located: crate::parser::entry::LocatedEntry<'a>,
633        source: SourceSpan,
634        span_cursor: &mut SourceCursor<'_, 'a>,
635    ) -> ParsedEntry<'static> {
636        let entry = located.entry;
637        let entry_type_source = span_cursor.span(located.entry_type.0, located.entry_type.1);
638        let key_source = span_cursor.span(located.key.0, located.key.1);
639        let fields = entry
640            .fields
641            .into_iter()
642            .zip(located.fields)
643            .map(|(field, location)| {
644                let field_source = span_cursor.span(location.whole.0, location.whole.1);
645                let name_source = span_cursor.span(location.name.0, location.name.1);
646                let value_source = span_cursor.span(location.value.0, location.value.1);
647                ParsedField {
648                    name: owned_field_name(field.name),
649                    value: ParsedValue::from_owned_value(
650                        field.value,
651                        Some(value_source),
652                        Some(location.value_delimiter),
653                    ),
654                    raw: None,
655                    source: Some(field_source),
656                    name_source: Some(name_source),
657                    value_source: Some(value_source),
658                }
659            })
660            .collect();
661
662        ParsedEntry {
663            ty: entry.ty.into_owned(),
664            key: Cow::Owned(entry.key.into_owned()),
665            fields,
666            status: ParsedEntryStatus::Complete,
667            source: Some(source),
668            entry_type_source: Some(entry_type_source),
669            key_source: Some(key_source),
670            delimiter: Some(located.delimiter),
671            raw: None,
672            diagnostics: Vec::new(),
673        }
674    }
675
676    fn apply_locations(&mut self, raw: &'a str, source_map: &SourceMap<'a>, preserve_raw: bool) {
677        let Some(entry_span) = self.source else {
678            return;
679        };
680        let Some(locations) = locate_entry(raw, entry_span.byte_start, self.fields.len()) else {
681            return;
682        };
683
684        self.entry_type_source =
685            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
686        self.key_source = Some(source_map.span(locations.key.0, locations.key.1));
687        self.delimiter = Some(locations.delimiter);
688        if preserve_raw {
689            self.raw = Some(Cow::Borrowed(raw));
690        }
691
692        for (field, location) in self.fields.iter_mut().zip(locations.fields) {
693            field.source = Some(source_map.span(location.whole.0, location.whole.1));
694            field.name_source = Some(source_map.span(location.name.0, location.name.1));
695            field.value.source = Some(source_map.span(location.value.0, location.value.1));
696            field.value_source = field.value.source;
697            field.value.delimiter = Some(location.value_delimiter);
698
699            if preserve_raw {
700                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
701                    field.raw = Some(Cow::Borrowed(source));
702                }
703                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
704                    field.value.raw = Some(Cow::Borrowed(source));
705                }
706            }
707        }
708    }
709
710    /// Return the citation key.
711    #[must_use]
712    pub fn key(&self) -> &str {
713        &self.key
714    }
715
716    /// Convert this parsed entry into the structured entry.
717    #[must_use]
718    pub fn into_entry(self) -> Entry<'a> {
719        Entry {
720            ty: self.ty,
721            key: self.key,
722            fields: self
723                .fields
724                .into_iter()
725                .map(ParsedField::into_field)
726                .collect(),
727        }
728    }
729
730    /// Rename the citation key.
731    pub fn rename_key(&mut self, key: impl Into<Cow<'a, str>>) {
732        self.key = key.into();
733    }
734
735    /// Replace the entry type.
736    pub fn set_entry_type(&mut self, ty: EntryType<'a>) {
737        self.ty = ty;
738    }
739
740    /// Add a field and switch this entry to structured writing.
741    pub fn add_field(&mut self, name: impl Into<Cow<'a, str>>, value: Value<'a>) {
742        self.fields.push(ParsedField {
743            name: name.into(),
744            value: ParsedValue::new(value),
745            raw: None,
746            source: None,
747            name_source: None,
748            value_source: None,
749        });
750        self.raw = None;
751    }
752
753    /// Replace the first field value whose name matches exactly.
754    #[must_use]
755    pub fn replace_field_value(&mut self, name: &str, value: Value<'a>) -> bool {
756        self.replace_field_value_at(name, 0, value)
757    }
758
759    /// Replace a specific duplicate field occurrence by zero-based occurrence index.
760    #[must_use]
761    pub fn replace_field_value_at(
762        &mut self,
763        name: &str,
764        occurrence: usize,
765        value: Value<'a>,
766    ) -> bool {
767        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
768            return false;
769        };
770        let field = &mut self.fields[index];
771        field.value.value = value;
772        field.value.raw = None;
773        field.raw = None;
774        field.value.expanded = None;
775        true
776    }
777
778    /// Rename all fields whose name matches exactly.
779    #[must_use]
780    pub fn rename_field(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> usize {
781        let new = new.into();
782        let mut renamed = 0;
783        for field in &mut self.fields {
784            if field.name == old {
785                field.name.clone_from(&new);
786                field.raw = None;
787                renamed += 1;
788            }
789        }
790        renamed
791    }
792
793    /// Remove all fields whose name matches exactly.
794    #[must_use]
795    pub fn remove_field(&mut self, name: &str) -> usize {
796        let original_len = self.fields.len();
797        self.fields.retain(|field| field.name != name);
798        let removed = original_len - self.fields.len();
799        if removed > 0 {
800            self.raw = None;
801        }
802        removed
803    }
804
805    /// Remove a specific duplicate field occurrence by zero-based occurrence index.
806    #[must_use]
807    pub fn remove_field_at(&mut self, name: &str, occurrence: usize) -> bool {
808        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
809            return false;
810        };
811        self.fields.remove(index);
812        self.raw = None;
813        true
814    }
815
816    /// Remove configured export-only fields from this entry.
817    #[must_use]
818    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
819        let original_len = self.fields.len();
820        self.fields.retain(|field| {
821            !names
822                .iter()
823                .any(|name| field.name.eq_ignore_ascii_case(name))
824        });
825        let removed = original_len - self.fields.len();
826        if removed > 0 {
827            self.raw = None;
828        }
829        removed
830    }
831
832    /// Return the first field matching `name`, ignoring ASCII case.
833    #[must_use]
834    pub fn field_ignore_case(&self, name: &str) -> Option<&ParsedField<'a>> {
835        self.fields
836            .iter()
837            .find(|field| field.name.eq_ignore_ascii_case(name))
838    }
839
840    /// Return a field value as ordinary text, ignoring ASCII case.
841    #[must_use]
842    pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
843        self.field_ignore_case(name)
844            .map(|field| field.value.plain_text())
845    }
846
847    /// Return the normalized DOI, if the entry has a recognizable DOI field.
848    #[must_use]
849    pub fn doi(&self) -> Option<String> {
850        self.get_as_string_ignore_case("doi")
851            .and_then(|doi| normalize_doi(&doi))
852    }
853
854    /// Parse the `author` field into structured BibTeX names.
855    #[must_use]
856    pub fn authors(&self) -> Vec<PersonName> {
857        self.get_as_string_ignore_case("author")
858            .map_or_else(Vec::new, |authors| crate::parse_names(&authors))
859    }
860
861    /// Parse the `editor` field into structured BibTeX names.
862    #[must_use]
863    pub fn editors(&self) -> Vec<PersonName> {
864        self.get_as_string_ignore_case("editor")
865            .map_or_else(Vec::new, |editors| crate::parse_names(&editors))
866    }
867
868    /// Parse the `translator` field into structured BibTeX names.
869    #[must_use]
870    pub fn translators(&self) -> Vec<PersonName> {
871        self.get_as_string_ignore_case("translator")
872            .map_or_else(Vec::new, |translators| crate::parse_names(&translators))
873    }
874
875    /// Parse a specific date-like field into date parts.
876    #[must_use]
877    pub fn date_parts_for(
878        &self,
879        field: &str,
880    ) -> Option<std::result::Result<DateParts, DateParseError>> {
881        self.get_as_string_ignore_case(field)
882            .map(|value| crate::parse_date_parts(&value))
883    }
884
885    /// Return issued date parts for this entry.
886    #[must_use]
887    pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
888        self.clone().into_entry().date_parts()
889    }
890
891    /// Return classified resource and identifier fields in source order.
892    #[must_use]
893    pub fn resource_fields(&self) -> Vec<ResourceField> {
894        self.clone().into_entry().resource_fields()
895    }
896
897    /// Convert this parsed entry into an owned value.
898    #[must_use]
899    pub fn into_owned(self) -> ParsedEntry<'static> {
900        ParsedEntry {
901            ty: self.ty.into_owned(),
902            key: Cow::Owned(self.key.into_owned()),
903            fields: self
904                .fields
905                .into_iter()
906                .map(ParsedField::into_owned)
907                .collect(),
908            status: self.status,
909            source: self.source,
910            entry_type_source: self.entry_type_source,
911            key_source: self.key_source,
912            delimiter: self.delimiter,
913            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
914            diagnostics: self.diagnostics,
915        }
916    }
917}
918
919fn owned_field_name(name: Cow<'_, str>) -> Cow<'static, str> {
920    static_field_name(&name).map_or_else(|| Cow::Owned(name.into_owned()), Cow::Borrowed)
921}
922
923fn static_field_name(name: &str) -> Option<&'static str> {
924    Some(match name {
925        "abstract" => "abstract",
926        "address" => "address",
927        "archiveprefix" => "archiveprefix",
928        "author" => "author",
929        "booktitle" => "booktitle",
930        "chapter" => "chapter",
931        "copyright" => "copyright",
932        "crossref" => "crossref",
933        "date" => "date",
934        "doi" => "doi",
935        "edition" => "edition",
936        "editor" => "editor",
937        "eprint" => "eprint",
938        "eventdate" => "eventdate",
939        "file" => "file",
940        "institution" => "institution",
941        "isbn" => "isbn",
942        "issn" => "issn",
943        "journal" => "journal",
944        "keywords" => "keywords",
945        "language" => "language",
946        "month" => "month",
947        "note" => "note",
948        "number" => "number",
949        "organization" => "organization",
950        "origdate" => "origdate",
951        "pages" => "pages",
952        "pmcid" => "pmcid",
953        "pmid" => "pmid",
954        "primaryclass" => "primaryclass",
955        "publisher" => "publisher",
956        "school" => "school",
957        "series" => "series",
958        "timestamp" => "timestamp",
959        "title" => "title",
960        "translator" => "translator",
961        "type" => "type",
962        "url" => "url",
963        "urldate" => "urldate",
964        "volume" => "volume",
965        "year" => "year",
966        _ => return None,
967    })
968}
969
970fn nth_field_index(fields: &[ParsedField<'_>], name: &str, occurrence: usize) -> Option<usize> {
971    fields
972        .iter()
973        .enumerate()
974        .filter(|(_, field)| field.name == name)
975        .nth(occurrence)
976        .map(|(index, _)| index)
977}
978
979/// Parsed string definition plus optional source-preserving metadata.
980#[derive(Debug, Clone, PartialEq)]
981pub struct ParsedString<'a> {
982    /// String variable name.
983    pub name: Cow<'a, str>,
984    /// Parsed string value.
985    pub value: ParsedValue<'a>,
986    /// Source location for the definition, when available.
987    pub source: Option<SourceSpan>,
988    /// Exact raw string-definition text, when retained by the parser mode.
989    pub raw: Option<Cow<'a, str>>,
990}
991
992impl<'a> ParsedString<'a> {
993    /// Create parsed-string metadata from a structured string definition.
994    #[must_use]
995    pub fn from_definition(definition: StringDefinition<'a>) -> Self {
996        Self {
997            name: definition.name,
998            value: ParsedValue::new(definition.value),
999            source: definition.source,
1000            raw: None,
1001        }
1002    }
1003
1004    pub(crate) fn from_stream_definition(
1005        name: &'a str,
1006        value: Value<'a>,
1007        source: SourceSpan,
1008        raw: &'a str,
1009        preserve_raw: bool,
1010    ) -> Self {
1011        let value_raw = locate_definition_value(raw);
1012        Self {
1013            name: Cow::Borrowed(name),
1014            value: ParsedValue {
1015                value,
1016                raw: if preserve_raw {
1017                    value_raw.map(Cow::Borrowed)
1018                } else {
1019                    None
1020                },
1021                source: None,
1022                expanded: None,
1023                delimiter: value_raw.map(value_delimiter),
1024            },
1025            source: Some(source),
1026            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1027        }
1028    }
1029
1030    /// Convert this parsed string definition into an owned value.
1031    #[must_use]
1032    pub fn into_owned(self) -> ParsedString<'static> {
1033        ParsedString {
1034            name: Cow::Owned(self.name.into_owned()),
1035            value: self.value.into_owned(),
1036            source: self.source,
1037            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1038        }
1039    }
1040
1041    pub(crate) fn from_stream_definition_owned(
1042        name: &'a str,
1043        value: Value<'a>,
1044        source: SourceSpan,
1045        raw: &'a str,
1046    ) -> ParsedString<'static> {
1047        let value_raw = locate_definition_value(raw);
1048        ParsedString {
1049            name: Cow::Owned(name.to_string()),
1050            value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1051            source: Some(source),
1052            raw: None,
1053        }
1054    }
1055}
1056
1057/// Parsed preamble plus optional source-preserving metadata.
1058#[derive(Debug, Clone, PartialEq)]
1059pub struct ParsedPreamble<'a> {
1060    /// Parsed preamble value.
1061    pub value: ParsedValue<'a>,
1062    /// Source location for the preamble, when available.
1063    pub source: Option<SourceSpan>,
1064    /// Exact raw preamble text, when retained by the parser mode.
1065    pub raw: Option<Cow<'a, str>>,
1066}
1067
1068impl<'a> ParsedPreamble<'a> {
1069    /// Create parsed-preamble metadata from a structured preamble.
1070    #[must_use]
1071    pub fn from_preamble(preamble: Preamble<'a>) -> Self {
1072        Self {
1073            value: ParsedValue::new(preamble.value),
1074            source: preamble.source,
1075            raw: None,
1076        }
1077    }
1078
1079    pub(crate) fn from_stream_preamble(
1080        value: Value<'a>,
1081        source: SourceSpan,
1082        raw: &'a str,
1083        preserve_raw: bool,
1084    ) -> Self {
1085        let value_raw = locate_preamble_value(raw);
1086        Self {
1087            value: ParsedValue {
1088                value,
1089                raw: if preserve_raw {
1090                    value_raw.map(Cow::Borrowed)
1091                } else {
1092                    None
1093                },
1094                source: None,
1095                expanded: None,
1096                delimiter: value_raw.map(value_delimiter),
1097            },
1098            source: Some(source),
1099            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1100        }
1101    }
1102
1103    /// Convert this parsed preamble into an owned value.
1104    #[must_use]
1105    pub fn into_owned(self) -> ParsedPreamble<'static> {
1106        ParsedPreamble {
1107            value: self.value.into_owned(),
1108            source: self.source,
1109            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1110        }
1111    }
1112
1113    pub(crate) fn from_stream_preamble_owned(
1114        value: Value<'a>,
1115        source: SourceSpan,
1116        raw: &'a str,
1117    ) -> ParsedPreamble<'static> {
1118        let value_raw = locate_preamble_value(raw);
1119        ParsedPreamble {
1120            value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1121            source: Some(source),
1122            raw: None,
1123        }
1124    }
1125}
1126
1127/// Parsed comment plus optional source-preserving metadata.
1128#[derive(Debug, Clone, PartialEq, Eq)]
1129pub struct ParsedComment<'a> {
1130    /// Comment text.
1131    pub text: Cow<'a, str>,
1132    /// Source location for the comment, when available.
1133    pub source: Option<SourceSpan>,
1134    /// Exact raw comment text, when retained by the parser mode.
1135    pub raw: Option<Cow<'a, str>>,
1136}
1137
1138impl<'a> ParsedComment<'a> {
1139    /// Create parsed-comment metadata from a structured comment.
1140    #[must_use]
1141    pub fn from_comment(comment: Comment<'a>) -> Self {
1142        Self {
1143            text: comment.text,
1144            source: comment.source,
1145            raw: None,
1146        }
1147    }
1148
1149    pub(crate) fn from_stream_comment(
1150        text: &'a str,
1151        source: SourceSpan,
1152        raw: &'a str,
1153        preserve_raw: bool,
1154    ) -> Self {
1155        Self {
1156            text: Cow::Borrowed(text),
1157            source: Some(source),
1158            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1159        }
1160    }
1161
1162    /// Convert this parsed comment into an owned value.
1163    #[must_use]
1164    pub fn into_owned(self) -> ParsedComment<'static> {
1165        ParsedComment {
1166            text: Cow::Owned(self.text.into_owned()),
1167            source: self.source,
1168            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1169        }
1170    }
1171
1172    pub(crate) fn from_stream_comment_owned(
1173        text: &'a str,
1174        source: SourceSpan,
1175    ) -> ParsedComment<'static> {
1176        ParsedComment {
1177            text: Cow::Owned(text.to_string()),
1178            source: Some(source),
1179            raw: None,
1180        }
1181    }
1182}
1183
1184/// Failed block retained by a tolerant parse.
1185#[derive(Debug, Clone, PartialEq, Eq)]
1186pub struct ParsedFailedBlock<'a> {
1187    /// Raw source text for the failed block.
1188    pub raw: Cow<'a, str>,
1189    /// Human-readable parse error.
1190    pub error: String,
1191    /// Source location for the failed block, when available.
1192    pub source: Option<SourceSpan>,
1193    /// Diagnostics attached to this failed block.
1194    pub diagnostics: Vec<Diagnostic>,
1195}
1196
1197impl<'a> ParsedFailedBlock<'a> {
1198    /// Create failed-block metadata from a retained failed block.
1199    #[must_use]
1200    pub fn from_failed_block(
1201        index: usize,
1202        failed: FailedBlock<'a>,
1203        source_map: Option<&SourceMap<'_>>,
1204    ) -> Self {
1205        let diagnostic = diagnostic_for_failed_block(index, &failed, source_map);
1206
1207        Self {
1208            raw: failed.raw,
1209            error: failed.error,
1210            source: failed.source,
1211            diagnostics: vec![diagnostic],
1212        }
1213    }
1214
1215    /// Convert this failed block into an owned value.
1216    #[must_use]
1217    pub fn into_owned(self) -> ParsedFailedBlock<'static> {
1218        ParsedFailedBlock {
1219            raw: Cow::Owned(self.raw.into_owned()),
1220            error: self.error,
1221            source: self.source,
1222            diagnostics: self.diagnostics,
1223        }
1224    }
1225}
1226
1227/// Rich parsed document for tooling-grade bibliography workflows.
1228#[derive(Debug, Clone)]
1229pub struct ParsedDocument<'a> {
1230    library: Library<'a>,
1231    sources: Vec<ParsedSource<'a>>,
1232    entries: Vec<ParsedEntry<'a>>,
1233    strings: Vec<ParsedString<'a>>,
1234    preambles: Vec<ParsedPreamble<'a>>,
1235    comments: Vec<ParsedComment<'a>>,
1236    failed_blocks: Vec<ParsedFailedBlock<'a>>,
1237    blocks: Vec<ParsedBlock>,
1238    diagnostics: Vec<Diagnostic>,
1239    status: ParseStatus,
1240}
1241
1242impl<'a> ParsedDocument<'a> {
1243    /// Build a parsed document from the existing structured library model.
1244    #[must_use]
1245    pub fn from_library(library: Library<'a>) -> Self {
1246        Self::from_library_with_sources(
1247            library,
1248            vec![ParsedSource {
1249                id: SourceId::new(0),
1250                name: None,
1251            }],
1252        )
1253    }
1254
1255    pub(crate) fn from_library_with_sources(
1256        library: Library<'a>,
1257        sources: Vec<ParsedSource<'a>>,
1258    ) -> Self {
1259        Self::from_library_with_source_map(library, sources, None)
1260    }
1261
1262    pub(crate) fn from_library_with_source_map(
1263        library: Library<'a>,
1264        sources: Vec<ParsedSource<'a>>,
1265        source_map: Option<&SourceMap<'_>>,
1266    ) -> Self {
1267        let entries: Vec<ParsedEntry<'a>> = library
1268            .entries()
1269            .iter()
1270            .cloned()
1271            .enumerate()
1272            .map(|(index, entry)| ParsedEntry::from_entry(entry, library.entry_source(index)))
1273            .collect();
1274        let strings: Vec<ParsedString<'a>> = library
1275            .strings()
1276            .iter()
1277            .cloned()
1278            .map(ParsedString::from_definition)
1279            .collect();
1280        let preambles: Vec<ParsedPreamble<'a>> = library
1281            .preambles()
1282            .iter()
1283            .cloned()
1284            .map(ParsedPreamble::from_preamble)
1285            .collect();
1286        let comments = library
1287            .comments()
1288            .iter()
1289            .cloned()
1290            .map(ParsedComment::from_comment)
1291            .collect();
1292        let failed_blocks = library
1293            .failed_blocks()
1294            .iter()
1295            .cloned()
1296            .enumerate()
1297            .map(|(index, failed)| ParsedFailedBlock::from_failed_block(index, failed, source_map))
1298            .collect::<Vec<_>>();
1299        let diagnostics = failed_blocks
1300            .iter()
1301            .flat_map(|failed| failed.diagnostics.iter().cloned())
1302            .collect::<Vec<_>>();
1303        let blocks = library
1304            .block_kinds()
1305            .iter()
1306            .map(|kind| match *kind {
1307                BlockKind::Entry(index) => ParsedBlock::Entry(index),
1308                BlockKind::String(index) => ParsedBlock::String(index),
1309                BlockKind::Preamble(index) => ParsedBlock::Preamble(index),
1310                BlockKind::Comment(index) => ParsedBlock::Comment(index),
1311                BlockKind::Failed(index) => ParsedBlock::Failed(index),
1312            })
1313            .collect();
1314        let status = if failed_blocks.is_empty() {
1315            ParseStatus::Ok
1316        } else if entries.is_empty() && strings.is_empty() && preambles.is_empty() {
1317            ParseStatus::Failed
1318        } else {
1319            ParseStatus::Partial
1320        };
1321
1322        Self {
1323            library,
1324            sources,
1325            entries,
1326            strings,
1327            preambles,
1328            comments,
1329            failed_blocks,
1330            blocks,
1331            diagnostics,
1332            status,
1333        }
1334    }
1335
1336    pub(crate) const fn from_parsed_parts(
1337        library: Library<'a>,
1338        sources: Vec<ParsedSource<'a>>,
1339        entries: Vec<ParsedEntry<'a>>,
1340        strings: Vec<ParsedString<'a>>,
1341        preambles: Vec<ParsedPreamble<'a>>,
1342        comments: Vec<ParsedComment<'a>>,
1343        blocks: Vec<ParsedBlock>,
1344    ) -> Self {
1345        Self {
1346            library,
1347            sources,
1348            entries,
1349            strings,
1350            preambles,
1351            comments,
1352            failed_blocks: Vec::new(),
1353            blocks,
1354            diagnostics: Vec::new(),
1355            status: ParseStatus::Ok,
1356        }
1357    }
1358
1359    pub(crate) fn apply_entry_locations(
1360        &mut self,
1361        entry_index: usize,
1362        raw: &'a str,
1363        source_map: &SourceMap<'a>,
1364        preserve_raw: bool,
1365    ) {
1366        let Some(entry) = self.entries.get_mut(entry_index) else {
1367            return;
1368        };
1369        let Some(entry_span) = entry.source else {
1370            return;
1371        };
1372        let Some(locations) = locate_entry(raw, entry_span.byte_start, entry.fields.len()) else {
1373            return;
1374        };
1375
1376        entry.entry_type_source =
1377            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
1378        entry.key_source = Some(source_map.span(locations.key.0, locations.key.1));
1379        entry.delimiter = Some(locations.delimiter);
1380        if preserve_raw {
1381            entry.raw = Some(Cow::Borrowed(raw));
1382        }
1383
1384        for (field, location) in entry.fields.iter_mut().zip(locations.fields) {
1385            field.source = Some(source_map.span(location.whole.0, location.whole.1));
1386            field.name_source = Some(source_map.span(location.name.0, location.name.1));
1387            field.value.source = Some(source_map.span(location.value.0, location.value.1));
1388            field.value_source = field.value.source;
1389            field.value.delimiter = Some(location.value_delimiter);
1390
1391            if preserve_raw {
1392                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
1393                    field.raw = Some(Cow::Borrowed(source));
1394                }
1395                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
1396                    field.value.raw = Some(Cow::Borrowed(source));
1397                }
1398            }
1399        }
1400    }
1401
1402    pub(crate) fn apply_raw_items(&mut self, raw_items: &[RawBuildItem<'a>]) {
1403        let mut string_index = 0;
1404        let mut preamble_index = 0;
1405        let mut comment_index = 0;
1406
1407        for raw_item in raw_items {
1408            match raw_item {
1409                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, raw) => {
1410                    if let Some(parsed) = self.strings.get_mut(string_index) {
1411                        parsed.raw = Some(Cow::Borrowed(raw));
1412                        if let Some(value_raw) = locate_definition_value(raw) {
1413                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1414                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1415                        }
1416                    }
1417                    string_index += 1;
1418                }
1419                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(_), _, raw) => {
1420                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1421                        parsed.raw = Some(Cow::Borrowed(raw));
1422                        if let Some(value_raw) = locate_preamble_value(raw) {
1423                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1424                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1425                        }
1426                    }
1427                    preamble_index += 1;
1428                }
1429                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, raw) => {
1430                    if let Some(parsed) = self.comments.get_mut(comment_index) {
1431                        parsed.raw = Some(Cow::Borrowed(raw));
1432                    }
1433                    comment_index += 1;
1434                }
1435                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, _)
1436                | RawBuildItem::Failed(_) => {}
1437            }
1438        }
1439    }
1440
1441    pub(crate) fn apply_parsed_values(&mut self, raw_items: &[RawBuildItem<'a>]) {
1442        let mut entry_index = 0;
1443        let mut string_index = 0;
1444        let mut preamble_index = 0;
1445
1446        for raw_item in raw_items {
1447            match raw_item {
1448                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(raw_entry), _, _) => {
1449                    if let Some(entry) = self.entries.get_mut(entry_index) {
1450                        for (field, raw_field) in entry.fields.iter_mut().zip(&raw_entry.fields) {
1451                            field.value.value = raw_field.value.clone();
1452                            field.value.expanded = None;
1453                        }
1454                    }
1455                    entry_index += 1;
1456                }
1457                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, value), _, _) => {
1458                    if let Some(parsed) = self.strings.get_mut(string_index) {
1459                        parsed.value.value = value.clone();
1460                        parsed.value.expanded = None;
1461                    }
1462                    string_index += 1;
1463                }
1464                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), _, _) => {
1465                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1466                        parsed.value.value = value.clone();
1467                        parsed.value.expanded = None;
1468                    }
1469                    preamble_index += 1;
1470                }
1471                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, _)
1472                | RawBuildItem::Failed(_) => {}
1473            }
1474        }
1475    }
1476
1477    pub(crate) fn populate_expanded_values(
1478        &mut self,
1479        options: ExpansionOptions,
1480    ) -> crate::Result<()> {
1481        let strings = &self.strings;
1482        for entry in &mut self.entries {
1483            for field in &mut entry.fields {
1484                field.value.expanded = Some(Cow::Owned(expand_value_with_options(
1485                    &field.value.value,
1486                    strings,
1487                    options,
1488                    &mut Vec::new(),
1489                )?));
1490            }
1491        }
1492        for preamble in &mut self.preambles {
1493            preamble.value.expanded = Some(Cow::Owned(expand_value_with_options(
1494                &preamble.value.value,
1495                strings,
1496                options,
1497                &mut Vec::new(),
1498            )?));
1499        }
1500        Ok(())
1501    }
1502
1503    pub(crate) fn recover_partial_entries(
1504        &mut self,
1505        source_map: &SourceMap<'a>,
1506        preserve_raw: bool,
1507    ) {
1508        let old_entries = std::mem::take(&mut self.entries);
1509        let old_failed_blocks = std::mem::take(&mut self.failed_blocks);
1510        let old_blocks = std::mem::take(&mut self.blocks);
1511        let mut new_entries = Vec::with_capacity(old_entries.len());
1512        let mut new_failed_blocks = Vec::new();
1513        let mut new_blocks = Vec::with_capacity(old_blocks.len());
1514
1515        for block in old_blocks {
1516            match block {
1517                ParsedBlock::Entry(index) => {
1518                    let new_index = new_entries.len();
1519                    if let Some(entry) = old_entries.get(index) {
1520                        new_entries.push(entry.clone());
1521                        new_blocks.push(ParsedBlock::Entry(new_index));
1522                    }
1523                }
1524                ParsedBlock::Failed(index) => {
1525                    let Some(failed) = old_failed_blocks.get(index) else {
1526                        continue;
1527                    };
1528                    let new_index = new_entries.len();
1529                    if let Some(partial) =
1530                        recover_partial_entry(failed, source_map, new_index, preserve_raw)
1531                    {
1532                        new_entries.push(partial);
1533                        new_blocks.push(ParsedBlock::Entry(new_index));
1534                    } else {
1535                        let failed_index = new_failed_blocks.len();
1536                        new_failed_blocks.push(failed.clone());
1537                        new_blocks.push(ParsedBlock::Failed(failed_index));
1538                    }
1539                }
1540                ParsedBlock::String(index) => new_blocks.push(ParsedBlock::String(index)),
1541                ParsedBlock::Preamble(index) => new_blocks.push(ParsedBlock::Preamble(index)),
1542                ParsedBlock::Comment(index) => new_blocks.push(ParsedBlock::Comment(index)),
1543            }
1544        }
1545
1546        self.entries = new_entries;
1547        self.failed_blocks = new_failed_blocks;
1548        self.blocks = new_blocks;
1549        self.rebuild_diagnostics_and_status();
1550    }
1551
1552    fn rebuild_diagnostics_and_status(&mut self) {
1553        self.diagnostics.clear();
1554        self.diagnostics.extend(
1555            self.entries
1556                .iter()
1557                .flat_map(|entry| entry.diagnostics.iter().cloned()),
1558        );
1559        self.diagnostics.extend(
1560            self.failed_blocks
1561                .iter()
1562                .flat_map(|failed| failed.diagnostics.iter().cloned()),
1563        );
1564
1565        self.status = if self.diagnostics.is_empty() {
1566            ParseStatus::Ok
1567        } else if self.entries.is_empty() && self.strings.is_empty() && self.preambles.is_empty() {
1568            ParseStatus::Failed
1569        } else {
1570            ParseStatus::Partial
1571        };
1572    }
1573
1574    pub(crate) fn failed_from_error(
1575        sources: Vec<ParsedSource<'a>>,
1576        source_map: &SourceMap<'a>,
1577        error: &crate::Error,
1578    ) -> Self {
1579        let (byte, message, fallback_snippet) = match error {
1580            crate::Error::ParseError {
1581                line,
1582                column,
1583                message,
1584                snippet,
1585            } => (
1586                source_map.byte_at_line_column(*line, *column).unwrap_or(0),
1587                message.clone(),
1588                snippet.clone(),
1589            ),
1590            other => (0, other.to_string(), None),
1591        };
1592        let raw = source_map.input().get(byte..).unwrap_or_default();
1593        let failed_source = source_map.span(byte, source_map.len());
1594        let failed = FailedBlock {
1595            raw: Cow::Borrowed(raw),
1596            error: message.clone(),
1597            source: Some(failed_source),
1598        };
1599        let diagnostic = diagnostic_for_raw_failure(
1600            0,
1601            raw,
1602            message,
1603            Some(failed_source),
1604            Some(source_map),
1605            byte,
1606            fallback_snippet,
1607        );
1608        let failed_block = ParsedFailedBlock {
1609            raw: failed.raw,
1610            error: failed.error,
1611            source: failed.source,
1612            diagnostics: vec![diagnostic.clone()],
1613        };
1614
1615        Self {
1616            library: Library::new(),
1617            sources,
1618            entries: Vec::new(),
1619            strings: Vec::new(),
1620            preambles: Vec::new(),
1621            comments: Vec::new(),
1622            failed_blocks: vec![failed_block],
1623            blocks: vec![ParsedBlock::Failed(0)],
1624            diagnostics: vec![diagnostic],
1625            status: ParseStatus::Failed,
1626        }
1627    }
1628
1629    /// Return the compact structured library view.
1630    #[must_use]
1631    pub const fn library(&self) -> &Library<'a> {
1632        &self.library
1633    }
1634
1635    /// Consume this document and return the compact structured library view.
1636    #[must_use]
1637    pub fn into_library(self) -> Library<'a> {
1638        self.library
1639    }
1640
1641    /// Return source metadata.
1642    #[must_use]
1643    pub fn sources(&self) -> &[ParsedSource<'a>] {
1644        &self.sources
1645    }
1646
1647    /// Return parsed entries.
1648    #[must_use]
1649    pub fn entries(&self) -> &[ParsedEntry<'a>] {
1650        &self.entries
1651    }
1652
1653    /// Return mutable parsed entries.
1654    #[must_use]
1655    pub fn entries_mut(&mut self) -> &mut [ParsedEntry<'a>] {
1656        &mut self.entries
1657    }
1658
1659    /// Return a mutable entry by citation key.
1660    #[must_use]
1661    pub fn entry_mut_by_key(&mut self, key: &str) -> Option<&mut ParsedEntry<'a>> {
1662        self.entries.iter_mut().find(|entry| entry.key == key)
1663    }
1664
1665    /// Rename a citation key.
1666    #[must_use]
1667    pub fn rename_key(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> bool {
1668        let Some(entry) = self.entry_mut_by_key(old) else {
1669            return false;
1670        };
1671        entry.rename_key(new);
1672        true
1673    }
1674
1675    /// Remove configured export-only fields from all entries.
1676    #[must_use]
1677    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
1678        self.entries
1679            .iter_mut()
1680            .map(|entry| entry.remove_export_fields(names))
1681            .sum()
1682    }
1683
1684    /// Return parsed string definitions.
1685    #[must_use]
1686    pub fn strings(&self) -> &[ParsedString<'a>] {
1687        &self.strings
1688    }
1689
1690    /// Return parsed preambles.
1691    #[must_use]
1692    pub fn preambles(&self) -> &[ParsedPreamble<'a>] {
1693        &self.preambles
1694    }
1695
1696    /// Return parsed comments.
1697    #[must_use]
1698    pub fn comments(&self) -> &[ParsedComment<'a>] {
1699        &self.comments
1700    }
1701
1702    /// Return failed blocks retained by tolerant parsing.
1703    #[must_use]
1704    pub fn failed_blocks(&self) -> &[ParsedFailedBlock<'a>] {
1705        &self.failed_blocks
1706    }
1707
1708    /// Return source-order blocks.
1709    #[must_use]
1710    pub fn blocks(&self) -> &[ParsedBlock] {
1711        &self.blocks
1712    }
1713
1714    /// Return document diagnostics.
1715    #[must_use]
1716    pub fn diagnostics(&self) -> &[Diagnostic] {
1717        &self.diagnostics
1718    }
1719
1720    /// Return the document parse status.
1721    #[must_use]
1722    pub const fn status(&self) -> ParseStatus {
1723        self.status
1724    }
1725
1726    /// Return summary counts for the parsed document.
1727    #[must_use]
1728    pub fn summary(&self) -> ParseSummary {
1729        let mut warnings = 0;
1730        let mut errors = 0;
1731        let mut infos = 0;
1732
1733        for diagnostic in &self.diagnostics {
1734            match diagnostic.severity {
1735                DiagnosticSeverity::Error => errors += 1,
1736                DiagnosticSeverity::Warning => warnings += 1,
1737                DiagnosticSeverity::Info => infos += 1,
1738            }
1739        }
1740
1741        ParseSummary {
1742            status: self.status,
1743            entries: self.entries.len(),
1744            warnings,
1745            errors,
1746            infos,
1747            failed_blocks: self.failed_blocks.len(),
1748            recovered_blocks: self
1749                .entries
1750                .iter()
1751                .filter(|entry| entry.status == ParsedEntryStatus::Partial)
1752                .count(),
1753        }
1754    }
1755
1756    /// Expand a parsed value using this document's string definitions.
1757    ///
1758    /// This allocates the expanded text. The structured value itself remains
1759    /// unchanged, and unresolved-variable behavior follows `options`.
1760    pub fn expand_value(
1761        &self,
1762        value: &Value<'a>,
1763        options: ExpansionOptions,
1764    ) -> crate::Result<String> {
1765        expand_value_with_options(value, &self.strings, options, &mut Vec::new())
1766    }
1767
1768    /// Convert this parsed document into an owned value.
1769    #[must_use]
1770    pub fn into_owned(self) -> ParsedDocument<'static> {
1771        ParsedDocument {
1772            library: self.library.into_owned(),
1773            sources: self
1774                .sources
1775                .into_iter()
1776                .map(ParsedSource::into_owned)
1777                .collect(),
1778            entries: self
1779                .entries
1780                .into_iter()
1781                .map(ParsedEntry::into_owned)
1782                .collect(),
1783            strings: self
1784                .strings
1785                .into_iter()
1786                .map(ParsedString::into_owned)
1787                .collect(),
1788            preambles: self
1789                .preambles
1790                .into_iter()
1791                .map(ParsedPreamble::into_owned)
1792                .collect(),
1793            comments: self
1794                .comments
1795                .into_iter()
1796                .map(ParsedComment::into_owned)
1797                .collect(),
1798            failed_blocks: self
1799                .failed_blocks
1800                .into_iter()
1801                .map(ParsedFailedBlock::into_owned)
1802                .collect(),
1803            blocks: self.blocks,
1804            diagnostics: self.diagnostics,
1805            status: self.status,
1806        }
1807    }
1808}
1809
1810impl ParsedDocument<'static> {
1811    pub(crate) fn apply_raw_from_source(&mut self, source: &str) {
1812        for entry in &mut self.entries {
1813            if entry.raw.is_none() {
1814                entry.raw = owned_source_slice(source, entry.source);
1815            }
1816            for field in &mut entry.fields {
1817                if field.raw.is_none() {
1818                    field.raw = owned_source_slice(source, field.source);
1819                }
1820                if field.value.raw.is_none() {
1821                    field.value.raw = owned_source_slice(source, field.value_source);
1822                }
1823            }
1824        }
1825
1826        for string in &mut self.strings {
1827            if string.raw.is_none() {
1828                string.raw = owned_source_slice(source, string.source);
1829            }
1830        }
1831        for preamble in &mut self.preambles {
1832            if preamble.raw.is_none() {
1833                preamble.raw = owned_source_slice(source, preamble.source);
1834            }
1835        }
1836        for comment in &mut self.comments {
1837            if comment.raw.is_none() {
1838                comment.raw = owned_source_slice(source, comment.source);
1839            }
1840        }
1841    }
1842}
1843
1844fn owned_source_slice(source: &str, span: Option<SourceSpan>) -> Option<Cow<'static, str>> {
1845    let span = span?;
1846    source
1847        .get(span.byte_start..span.byte_end)
1848        .map(|raw| Cow::Owned(raw.to_string()))
1849}
1850
1851fn expand_value_with_options(
1852    value: &Value<'_>,
1853    strings: &[ParsedString<'_>],
1854    options: ExpansionOptions,
1855    stack: &mut Vec<String>,
1856) -> crate::Result<String> {
1857    match value {
1858        Value::Literal(text) => Ok(normalize_text_projection(text)),
1859        Value::Number(number) => Ok(number.to_string()),
1860        Value::Concat(parts) => {
1861            let mut expanded = String::new();
1862            for part in parts.iter() {
1863                expanded.push_str(&expand_value_with_options(part, strings, options, stack)?);
1864            }
1865            Ok(expanded)
1866        }
1867        Value::Variable(name) => expand_variable(name, strings, options, stack),
1868    }
1869}
1870
1871fn expand_variable(
1872    name: &str,
1873    strings: &[ParsedString<'_>],
1874    options: ExpansionOptions,
1875    stack: &mut Vec<String>,
1876) -> crate::Result<String> {
1877    if options.expand_strings {
1878        if let Some(definition) = strings
1879            .iter()
1880            .rev()
1881            .find(|definition| definition.name.as_ref() == name)
1882        {
1883            if stack.iter().any(|active| active == name) {
1884                return Err(crate::Error::CircularReference(name.to_string()));
1885            }
1886            stack.push(name.to_string());
1887            let expanded =
1888                expand_value_with_options(&definition.value.value, strings, options, stack);
1889            stack.pop();
1890            return expanded;
1891        }
1892    }
1893
1894    if options.expand_months {
1895        if let Some(month) = month_expansion(name) {
1896            return Ok(month.to_string());
1897        }
1898    }
1899
1900    match options.unresolved_variables {
1901        UnresolvedVariablePolicy::Preserve => Ok(name.to_string()),
1902        UnresolvedVariablePolicy::Placeholder => Ok(format!("{{undefined:{name}}}")),
1903        UnresolvedVariablePolicy::Error => Err(crate::Error::UndefinedVariable(name.to_string())),
1904    }
1905}
1906
1907fn month_expansion(name: &str) -> Option<&'static str> {
1908    if name.len() != 3 {
1909        return None;
1910    }
1911
1912    match name.to_ascii_lowercase().as_str() {
1913        "jan" => Some("January"),
1914        "feb" => Some("February"),
1915        "mar" => Some("March"),
1916        "apr" => Some("April"),
1917        "may" => Some("May"),
1918        "jun" => Some("June"),
1919        "jul" => Some("July"),
1920        "aug" => Some("August"),
1921        "sep" => Some("September"),
1922        "oct" => Some("October"),
1923        "nov" => Some("November"),
1924        "dec" => Some("December"),
1925        _ => None,
1926    }
1927}
1928
1929#[derive(Debug, Clone)]
1930struct EntryLocations {
1931    entry_type: (usize, usize),
1932    key: (usize, usize),
1933    delimiter: EntryDelimiter,
1934    fields: Vec<FieldLocations>,
1935}
1936
1937#[derive(Debug, Clone, Copy)]
1938struct FieldLocations {
1939    whole: (usize, usize),
1940    name: (usize, usize),
1941    value: (usize, usize),
1942    value_delimiter: ValueDelimiter,
1943}
1944
1945#[derive(Debug, Clone)]
1946struct FailureClassification {
1947    code: DiagnosticCode,
1948    range: (usize, usize),
1949}
1950
1951fn diagnostic_for_failed_block(
1952    index: usize,
1953    failed: &FailedBlock<'_>,
1954    source_map: Option<&SourceMap<'_>>,
1955) -> Diagnostic {
1956    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
1957    diagnostic_for_raw_failure(
1958        index,
1959        &failed.raw,
1960        failed.error.clone(),
1961        failed.source,
1962        source_map,
1963        absolute_start,
1964        None,
1965    )
1966}
1967
1968fn diagnostic_for_raw_failure(
1969    index: usize,
1970    raw: &str,
1971    fallback_message: String,
1972    fallback_source: Option<SourceSpan>,
1973    source_map: Option<&SourceMap<'_>>,
1974    absolute_start: usize,
1975    fallback_snippet: Option<String>,
1976) -> Diagnostic {
1977    let classification = classify_failure(raw);
1978    let source = source_map
1979        .map(|map| {
1980            map.span(
1981                absolute_start + classification.range.0,
1982                absolute_start + classification.range.1,
1983            )
1984        })
1985        .or(fallback_source);
1986    let snippet = source
1987        .and_then(|span| source_map.and_then(|map| map.snippet(span, 160)))
1988        .or(fallback_snippet)
1989        .or_else(|| Some(raw.chars().take(160).collect()));
1990
1991    let mut diagnostic = Diagnostic::error(
1992        classification.code.clone(),
1993        diagnostic_message(&classification.code, fallback_message),
1994        DiagnosticTarget::FailedBlock(index),
1995        source,
1996    );
1997    diagnostic.snippet = snippet;
1998    diagnostic
1999}
2000
2001fn recover_partial_entry<'a>(
2002    failed: &ParsedFailedBlock<'a>,
2003    source_map: &SourceMap<'a>,
2004    entry_index: usize,
2005    preserve_raw: bool,
2006) -> Option<ParsedEntry<'a>> {
2007    let raw: &'a str = match &failed.raw {
2008        Cow::Borrowed(raw) => raw,
2009        Cow::Owned(_) => return None,
2010    };
2011    let absolute_start = failed.source?.byte_start;
2012    let header = parse_partial_header(raw, source_map, absolute_start)?;
2013    let fields = recover_partial_fields(
2014        raw,
2015        source_map,
2016        absolute_start,
2017        header.field_start,
2018        header.closing,
2019        preserve_raw,
2020    );
2021    if fields.is_empty() {
2022        return None;
2023    }
2024
2025    let diagnostic = diagnostic_for_partial_entry(entry_index, failed, source_map);
2026
2027    Some(ParsedEntry {
2028        ty: header.ty,
2029        key: header.key,
2030        fields,
2031        status: ParsedEntryStatus::Partial,
2032        source: failed.source,
2033        entry_type_source: header.entry_type_source,
2034        key_source: header.key_source,
2035        delimiter: Some(header.delimiter),
2036        raw: preserve_raw.then(|| failed.raw.clone()),
2037        diagnostics: vec![diagnostic],
2038    })
2039}
2040
2041pub(crate) fn recover_partial_stream_entry<'a>(
2042    failed: &ParsedFailedBlock<'a>,
2043    source_map: &SourceMap<'a>,
2044    entry_index: usize,
2045    preserve_raw: bool,
2046) -> Option<ParsedEntry<'a>> {
2047    recover_partial_entry(failed, source_map, entry_index, preserve_raw)
2048}
2049
2050struct PartialHeader<'a> {
2051    ty: EntryType<'a>,
2052    key: Cow<'a, str>,
2053    entry_type_source: Option<SourceSpan>,
2054    key_source: Option<SourceSpan>,
2055    delimiter: EntryDelimiter,
2056    field_start: usize,
2057    closing: u8,
2058}
2059
2060fn parse_partial_header<'a>(
2061    raw: &'a str,
2062    source_map: &SourceMap<'a>,
2063    absolute_start: usize,
2064) -> Option<PartialHeader<'a>> {
2065    let bytes = raw.as_bytes();
2066    let mut pos = bytes.iter().position(|byte| *byte == b'@')? + 1;
2067
2068    let entry_type_start = pos;
2069    pos += scan_identifier(&bytes[pos..]);
2070    if pos == entry_type_start {
2071        return None;
2072    }
2073    let ty = EntryType::parse(&raw[entry_type_start..pos]);
2074    let entry_type_source =
2075        Some(source_map.span(absolute_start + entry_type_start, absolute_start + pos));
2076
2077    pos = skip_ascii_whitespace(bytes, pos);
2078    let (delimiter, closing) = match *bytes.get(pos)? {
2079        b'{' => (EntryDelimiter::Braces, b'}'),
2080        b'(' => (EntryDelimiter::Parentheses, b')'),
2081        _ => return None,
2082    };
2083    pos += 1;
2084    pos = skip_ascii_whitespace(bytes, pos);
2085
2086    let key_start = pos;
2087    pos += scan_identifier(&bytes[pos..]);
2088    if pos == key_start {
2089        return None;
2090    }
2091    let key = Cow::Borrowed(&raw[key_start..pos]);
2092    let key_source = Some(source_map.span(absolute_start + key_start, absolute_start + pos));
2093
2094    pos = skip_ascii_whitespace(bytes, pos);
2095    if bytes.get(pos) != Some(&b',') {
2096        return None;
2097    }
2098
2099    Some(PartialHeader {
2100        ty,
2101        key,
2102        entry_type_source,
2103        key_source,
2104        delimiter,
2105        field_start: pos + 1,
2106        closing,
2107    })
2108}
2109
2110fn recover_partial_fields<'a>(
2111    raw: &'a str,
2112    source_map: &SourceMap<'a>,
2113    absolute_start: usize,
2114    mut pos: usize,
2115    closing: u8,
2116    preserve_raw: bool,
2117) -> Vec<ParsedField<'a>> {
2118    let bytes = raw.as_bytes();
2119    let mut fields = Vec::new();
2120
2121    loop {
2122        pos = skip_ascii_whitespace(bytes, pos);
2123        let Some(&byte) = bytes.get(pos) else {
2124            break;
2125        };
2126        if byte == closing || byte == b'@' {
2127            break;
2128        }
2129
2130        let field_start = pos;
2131        let name_start = pos;
2132        pos += scan_identifier(&bytes[pos..]);
2133        if pos == name_start {
2134            break;
2135        }
2136        let name_end = pos;
2137        let name = Cow::Borrowed(&raw[name_start..name_end]);
2138
2139        pos = skip_ascii_whitespace(bytes, pos);
2140        if bytes.get(pos) != Some(&b'=') {
2141            break;
2142        }
2143        pos += 1;
2144        pos = skip_ascii_whitespace(bytes, pos);
2145
2146        let value_start = pos;
2147        let tail = &raw[value_start..];
2148        let mut value_input = tail;
2149        let Ok(value) = crate::parser::value::parse_value_field(&mut value_input) else {
2150            break;
2151        };
2152        let consumed = tail.len() - value_input.len();
2153        let value_end = trim_ascii_whitespace_end(bytes, value_start, value_start + consumed);
2154        let boundary = value_start + consumed;
2155        let field_end = match bytes.get(boundary) {
2156            Some(b',') => boundary + 1,
2157            Some(byte) if *byte == closing => boundary,
2158            Some(_) | None => boundary,
2159        };
2160
2161        let field_source =
2162            source_map.span(absolute_start + field_start, absolute_start + field_end);
2163        let value_source =
2164            source_map.span(absolute_start + value_start, absolute_start + value_end);
2165        fields.push(ParsedField {
2166            name,
2167            value: ParsedValue {
2168                value,
2169                raw: preserve_raw.then(|| Cow::Borrowed(&raw[value_start..value_end])),
2170                source: Some(value_source),
2171                expanded: None,
2172                delimiter: Some(value_delimiter(&raw[value_start..value_end])),
2173            },
2174            raw: preserve_raw.then(|| Cow::Borrowed(&raw[field_start..field_end])),
2175            source: Some(field_source),
2176            name_source: Some(
2177                source_map.span(absolute_start + name_start, absolute_start + name_end),
2178            ),
2179            value_source: Some(value_source),
2180        });
2181
2182        match bytes.get(boundary) {
2183            Some(b',') => pos = boundary + 1,
2184            Some(byte) if *byte == closing => break,
2185            _ => break,
2186        }
2187    }
2188
2189    fields
2190}
2191
2192fn diagnostic_for_partial_entry(
2193    entry_index: usize,
2194    failed: &ParsedFailedBlock<'_>,
2195    source_map: &SourceMap<'_>,
2196) -> Diagnostic {
2197    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2198    let mut diagnostic = diagnostic_for_raw_failure(
2199        entry_index,
2200        &failed.raw,
2201        failed.error.clone(),
2202        failed.source,
2203        Some(source_map),
2204        absolute_start,
2205        None,
2206    );
2207    diagnostic.target = DiagnosticTarget::Entry(entry_index);
2208    diagnostic
2209}
2210
2211fn diagnostic_message(code: &DiagnosticCode, fallback: String) -> String {
2212    match code.as_str() {
2213        "missing-entry-key" => "missing citation key".to_string(),
2214        "missing-field-separator" => "missing field separator".to_string(),
2215        "expected-field-name" => "expected field name".to_string(),
2216        "empty-field-value" => "empty field value".to_string(),
2217        "expected-value-atom" => "expected value atom".to_string(),
2218        "bad-field-boundary" => "expected comma or entry close after field value".to_string(),
2219        "bad-value-boundary" => "expected value after concatenation operator".to_string(),
2220        "unclosed-entry" => "entry ended before its closing delimiter".to_string(),
2221        "unclosed-braced-value" => "braced value ended before its closing brace".to_string(),
2222        "unclosed-quoted-value" => "quoted value ended before its closing quote".to_string(),
2223        _ => fallback,
2224    }
2225}
2226
2227fn classify_failure(raw: &str) -> FailureClassification {
2228    classify_failure_inner(raw).unwrap_or_else(|| FailureClassification {
2229        code: DiagnosticCode::PARSE_ERROR,
2230        range: empty_range(0),
2231    })
2232}
2233
2234fn classify_failure_inner(raw: &str) -> Option<FailureClassification> {
2235    let bytes = raw.as_bytes();
2236    let header = match parse_failure_header(bytes)? {
2237        Ok(header) => header,
2238        Err(classification) => return Some(classification),
2239    };
2240
2241    classify_failure_fields(bytes, header.pos, header.closing)
2242}
2243
2244#[derive(Debug, Clone, Copy)]
2245struct FailureHeader {
2246    pos: usize,
2247    closing: u8,
2248}
2249
2250fn parse_failure_header(bytes: &[u8]) -> Option<Result<FailureHeader, FailureClassification>> {
2251    let mut pos = bytes.iter().position(|byte| *byte == b'@')?;
2252    pos += 1;
2253    pos += scan_identifier(&bytes[pos..]);
2254    pos = skip_ascii_whitespace(bytes, pos);
2255
2256    let opening = *bytes.get(pos)?;
2257    let closing = match opening {
2258        b'{' => b'}',
2259        b'(' => b')',
2260        _ => {
2261            return Some(Err(classification(
2262                DiagnosticCode::UNCLOSED_ENTRY,
2263                pos,
2264                bytes.len(),
2265            )));
2266        }
2267    };
2268    pos += 1;
2269    pos = skip_ascii_whitespace(bytes, pos);
2270
2271    let key_len = scan_identifier(&bytes[pos..]);
2272    if key_len == 0 {
2273        return Some(Err(classification(
2274            DiagnosticCode::MISSING_ENTRY_KEY,
2275            pos,
2276            bytes.len(),
2277        )));
2278    }
2279    pos += key_len;
2280    pos = skip_ascii_whitespace(bytes, pos);
2281    if bytes.get(pos) != Some(&b',') {
2282        return Some(Err(classification(
2283            DiagnosticCode::MISSING_FIELD_SEPARATOR,
2284            pos,
2285            bytes.len(),
2286        )));
2287    }
2288    pos += 1;
2289
2290    Some(Ok(FailureHeader { pos, closing }))
2291}
2292
2293fn classify_failure_fields(
2294    bytes: &[u8],
2295    mut pos: usize,
2296    closing: u8,
2297) -> Option<FailureClassification> {
2298    loop {
2299        pos = skip_ascii_whitespace(bytes, pos);
2300        let Some(&byte) = bytes.get(pos) else {
2301            return Some(classification(
2302                DiagnosticCode::UNCLOSED_ENTRY,
2303                pos,
2304                bytes.len(),
2305            ));
2306        };
2307        if byte == closing {
2308            return None;
2309        }
2310        if byte == b'@' {
2311            return Some(classification(
2312                DiagnosticCode::UNCLOSED_ENTRY,
2313                pos,
2314                bytes.len(),
2315            ));
2316        }
2317
2318        let field_name_len = scan_identifier(&bytes[pos..]);
2319        if field_name_len == 0 {
2320            return Some(classification(
2321                DiagnosticCode::EXPECTED_FIELD_NAME,
2322                pos,
2323                bytes.len(),
2324            ));
2325        }
2326        pos += field_name_len;
2327        pos = skip_ascii_whitespace(bytes, pos);
2328        if bytes.get(pos) != Some(&b'=') {
2329            return Some(classification(
2330                DiagnosticCode::MISSING_FIELD_SEPARATOR,
2331                pos,
2332                bytes.len(),
2333            ));
2334        }
2335        pos += 1;
2336        pos = skip_ascii_whitespace(bytes, pos);
2337
2338        let Some(&value_start) = bytes.get(pos) else {
2339            return Some(classification(
2340                DiagnosticCode::EMPTY_FIELD_VALUE,
2341                pos,
2342                bytes.len(),
2343            ));
2344        };
2345        if value_start == b',' || value_start == closing {
2346            return Some(classification(
2347                DiagnosticCode::EMPTY_FIELD_VALUE,
2348                pos,
2349                bytes.len(),
2350            ));
2351        }
2352        if value_start == b'#' {
2353            return Some(classification(
2354                DiagnosticCode::EXPECTED_VALUE_ATOM,
2355                pos,
2356                bytes.len(),
2357            ));
2358        }
2359
2360        match scan_value_sequence(bytes, pos, closing) {
2361            Ok(next_pos) => pos = next_pos,
2362            Err(classification) => return Some(classification),
2363        }
2364    }
2365}
2366
2367fn scan_value_sequence(
2368    bytes: &[u8],
2369    mut pos: usize,
2370    closing: u8,
2371) -> Result<usize, FailureClassification> {
2372    loop {
2373        pos = skip_ascii_whitespace(bytes, pos);
2374        let atom_start = pos;
2375        let Some(&byte) = bytes.get(pos) else {
2376            return Err(classification(
2377                DiagnosticCode::EXPECTED_VALUE_ATOM,
2378                pos,
2379                bytes.len(),
2380            ));
2381        };
2382
2383        match byte {
2384            b'"' => {
2385                pos = skip_quoted_checked(bytes, pos + 1).ok_or_else(|| {
2386                    classification(
2387                        DiagnosticCode::UNCLOSED_QUOTED_VALUE,
2388                        atom_start,
2389                        bytes.len(),
2390                    )
2391                })?;
2392            }
2393            b'{' => {
2394                pos = skip_braced_checked(bytes, pos + 1).ok_or_else(|| {
2395                    classification(
2396                        DiagnosticCode::UNCLOSED_BRACED_VALUE,
2397                        atom_start,
2398                        bytes.len(),
2399                    )
2400                })?;
2401            }
2402            b',' => {
2403                return Err(classification(
2404                    DiagnosticCode::EMPTY_FIELD_VALUE,
2405                    pos,
2406                    bytes.len(),
2407                ));
2408            }
2409            b if b == closing => {
2410                return Err(classification(
2411                    DiagnosticCode::EMPTY_FIELD_VALUE,
2412                    pos,
2413                    bytes.len(),
2414                ));
2415            }
2416            b'#' => {
2417                return Err(classification(
2418                    DiagnosticCode::EXPECTED_VALUE_ATOM,
2419                    pos,
2420                    bytes.len(),
2421                ));
2422            }
2423            _ => {
2424                let identifier_len = scan_identifier(&bytes[pos..]);
2425                if identifier_len == 0 {
2426                    return Err(classification(
2427                        DiagnosticCode::EXPECTED_VALUE_ATOM,
2428                        pos,
2429                        bytes.len(),
2430                    ));
2431                }
2432                pos += identifier_len;
2433            }
2434        }
2435
2436        pos = skip_ascii_whitespace(bytes, pos);
2437        let Some(&boundary) = bytes.get(pos) else {
2438            return Err(classification(
2439                DiagnosticCode::UNCLOSED_ENTRY,
2440                pos,
2441                bytes.len(),
2442            ));
2443        };
2444
2445        match boundary {
2446            b'#' => {
2447                let hash = pos;
2448                pos += 1;
2449                pos = skip_ascii_whitespace(bytes, pos);
2450                if matches!(bytes.get(pos), None | Some(b',' | b'#'))
2451                    || bytes.get(pos) == Some(&closing)
2452                {
2453                    return Err(classification(
2454                        DiagnosticCode::BAD_VALUE_BOUNDARY,
2455                        hash,
2456                        bytes.len(),
2457                    ));
2458                }
2459            }
2460            b',' => return Ok(pos + 1),
2461            b if b == closing => return Ok(pos),
2462            _ => {
2463                return Err(classification(
2464                    DiagnosticCode::BAD_FIELD_BOUNDARY,
2465                    pos,
2466                    bytes.len(),
2467                ));
2468            }
2469        }
2470    }
2471}
2472
2473fn classification(code: DiagnosticCode, pos: usize, len: usize) -> FailureClassification {
2474    FailureClassification {
2475        code,
2476        range: single_byte_range(pos, len),
2477    }
2478}
2479
2480const fn empty_range(pos: usize) -> (usize, usize) {
2481    (pos, pos)
2482}
2483
2484fn single_byte_range(pos: usize, len: usize) -> (usize, usize) {
2485    let start = pos.min(len);
2486    (start, (start + 1).min(len))
2487}
2488
2489fn locate_entry(raw: &str, absolute_start: usize, field_count: usize) -> Option<EntryLocations> {
2490    let bytes = raw.as_bytes();
2491    let mut pos = 0;
2492    if bytes.get(pos) != Some(&b'@') {
2493        return None;
2494    }
2495    pos += 1;
2496
2497    let entry_type_start = pos;
2498    pos += scan_identifier(&bytes[pos..]);
2499    if pos == entry_type_start {
2500        return None;
2501    }
2502    let entry_type = (absolute_start + entry_type_start, absolute_start + pos);
2503
2504    pos = skip_ascii_whitespace(bytes, pos);
2505    let opening = *bytes.get(pos)?;
2506    let (delimiter, closing) = match opening {
2507        b'{' => (EntryDelimiter::Braces, b'}'),
2508        b'(' => (EntryDelimiter::Parentheses, b')'),
2509        _ => return None,
2510    };
2511    pos += 1;
2512    pos = skip_ascii_whitespace(bytes, pos);
2513
2514    let key_start = pos;
2515    pos += scan_identifier(&bytes[pos..]);
2516    if pos == key_start {
2517        return None;
2518    }
2519    let key = (absolute_start + key_start, absolute_start + pos);
2520
2521    pos = skip_ascii_whitespace(bytes, pos);
2522    if bytes.get(pos) != Some(&b',') {
2523        return Some(EntryLocations {
2524            entry_type,
2525            key,
2526            delimiter,
2527            fields: Vec::new(),
2528        });
2529    }
2530    pos += 1;
2531
2532    let mut fields = Vec::with_capacity(field_count);
2533    while fields.len() < field_count {
2534        pos = skip_ascii_whitespace(bytes, pos);
2535        if bytes.get(pos) == Some(&closing) || pos >= bytes.len() {
2536            break;
2537        }
2538
2539        let field_start = pos;
2540        let name_start = pos;
2541        pos += scan_identifier(&bytes[pos..]);
2542        if pos == name_start {
2543            break;
2544        }
2545        let name_end = pos;
2546
2547        pos = skip_ascii_whitespace(bytes, pos);
2548        if bytes.get(pos) != Some(&b'=') {
2549            break;
2550        }
2551        pos += 1;
2552        pos = skip_ascii_whitespace(bytes, pos);
2553
2554        let value_start = pos;
2555        let boundary = find_value_boundary(bytes, pos, closing);
2556        let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2557        let mut whole_end = value_end;
2558        pos = boundary;
2559        if bytes.get(pos) == Some(&b',') {
2560            whole_end = pos + 1;
2561            pos += 1;
2562        }
2563
2564        fields.push(FieldLocations {
2565            whole: (absolute_start + field_start, absolute_start + whole_end),
2566            name: (absolute_start + name_start, absolute_start + name_end),
2567            value: (absolute_start + value_start, absolute_start + value_end),
2568            value_delimiter: value_delimiter(&raw[value_start..value_end]),
2569        });
2570    }
2571
2572    Some(EntryLocations {
2573        entry_type,
2574        key,
2575        delimiter,
2576        fields,
2577    })
2578}
2579
2580fn value_delimiter(raw_value: &str) -> ValueDelimiter {
2581    let trimmed = raw_value.trim_start();
2582    if has_top_level_concat(trimmed.as_bytes()) {
2583        return ValueDelimiter::Concatenation;
2584    }
2585
2586    match trimmed.as_bytes().first() {
2587        Some(b'{') => ValueDelimiter::Braces,
2588        Some(b'"') => ValueDelimiter::Quotes,
2589        _ => ValueDelimiter::Bare,
2590    }
2591}
2592
2593fn locate_definition_value(raw: &str) -> Option<&str> {
2594    let bytes = raw.as_bytes();
2595    let equals = bytes.iter().position(|byte| *byte == b'=')?;
2596    let value_start = skip_ascii_whitespace(bytes, equals + 1);
2597    let closing = enclosing_close_byte(bytes)?;
2598    let boundary = find_value_boundary(bytes, value_start, closing);
2599    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2600    raw.get(value_start..value_end)
2601}
2602
2603fn locate_preamble_value(raw: &str) -> Option<&str> {
2604    let bytes = raw.as_bytes();
2605    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2606    let closing = match bytes[opening] {
2607        b'{' => b'}',
2608        b'(' => b')',
2609        _ => return None,
2610    };
2611    let value_start = skip_ascii_whitespace(bytes, opening + 1);
2612    let boundary = find_value_boundary(bytes, value_start, closing);
2613    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2614    raw.get(value_start..value_end)
2615}
2616
2617fn enclosing_close_byte(bytes: &[u8]) -> Option<u8> {
2618    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2619    match bytes[opening] {
2620        b'{' => Some(b'}'),
2621        b'(' => Some(b')'),
2622        _ => None,
2623    }
2624}
2625
2626fn has_top_level_concat(bytes: &[u8]) -> bool {
2627    let mut pos = 0;
2628    while let Some(&byte) = bytes.get(pos) {
2629        match byte {
2630            b'{' => pos = skip_braced(bytes, pos + 1),
2631            b'"' => pos = skip_quoted(bytes, pos + 1),
2632            b'#' => return true,
2633            _ => pos += 1,
2634        }
2635    }
2636    false
2637}
2638
2639fn skip_ascii_whitespace(bytes: &[u8], mut pos: usize) -> usize {
2640    while matches!(bytes.get(pos), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2641        pos += 1;
2642    }
2643    pos
2644}
2645
2646fn trim_ascii_whitespace_end(bytes: &[u8], start: usize, mut end: usize) -> usize {
2647    while end > start && matches!(bytes.get(end - 1), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2648        end -= 1;
2649    }
2650    end
2651}
2652
2653fn scan_identifier(bytes: &[u8]) -> usize {
2654    bytes
2655        .iter()
2656        .position(|byte| !is_identifier_byte(*byte))
2657        .unwrap_or(bytes.len())
2658}
2659
2660const fn is_identifier_byte(byte: u8) -> bool {
2661    matches!(
2662        byte,
2663        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
2664    )
2665}
2666
2667fn find_value_boundary(bytes: &[u8], mut pos: usize, closing: u8) -> usize {
2668    while let Some(&byte) = bytes.get(pos) {
2669        match byte {
2670            b'{' => pos = skip_braced(bytes, pos + 1),
2671            b'"' => pos = skip_quoted(bytes, pos + 1),
2672            b',' => break,
2673            b if b == closing => break,
2674            _ => pos += 1,
2675        }
2676    }
2677    pos
2678}
2679
2680fn skip_braced(bytes: &[u8], mut pos: usize) -> usize {
2681    let mut depth = 0usize;
2682    while let Some(&byte) = bytes.get(pos) {
2683        match byte {
2684            b'\\' => pos = (pos + 2).min(bytes.len()),
2685            b'{' => {
2686                depth += 1;
2687                pos += 1;
2688            }
2689            b'}' if depth == 0 => return pos + 1,
2690            b'}' => {
2691                depth -= 1;
2692                pos += 1;
2693            }
2694            _ => pos += 1,
2695        }
2696    }
2697    pos
2698}
2699
2700fn skip_braced_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2701    let mut depth = 0usize;
2702    while let Some(&byte) = bytes.get(pos) {
2703        match byte {
2704            b'\\' => pos = (pos + 2).min(bytes.len()),
2705            b'{' => {
2706                depth += 1;
2707                pos += 1;
2708            }
2709            b'}' if depth == 0 => return Some(pos + 1),
2710            b'}' => {
2711                depth -= 1;
2712                pos += 1;
2713            }
2714            _ => pos += 1,
2715        }
2716    }
2717    None
2718}
2719
2720fn skip_quoted(bytes: &[u8], mut pos: usize) -> usize {
2721    while let Some(&byte) = bytes.get(pos) {
2722        match byte {
2723            b'\\' => pos = (pos + 2).min(bytes.len()),
2724            b'"' => return pos + 1,
2725            _ => pos += 1,
2726        }
2727    }
2728    pos
2729}
2730
2731fn skip_quoted_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2732    while let Some(&byte) = bytes.get(pos) {
2733        match byte {
2734            b'\\' => pos = (pos + 2).min(bytes.len()),
2735            b'"' => return Some(pos + 1),
2736            _ => pos += 1,
2737        }
2738    }
2739    None
2740}