Skip to main content

bibtex_parser/
document.rs

1//! Parsed bibliography model with source metadata.
2//!
3//! [`Library`] is the compact API for bibliography data. [`ParsedDocument`]
4//! contains source-order blocks, per-item metadata, retained raw text,
5//! diagnostics, and partial parse results.
6
7use crate::library::BlockKind;
8use crate::library::RawBuildItem;
9use crate::model::normalize_text_projection;
10use crate::source::SourceCursor;
11use crate::{
12    normalize_doi, Comment, DateParseError, DateParts, Entry, EntryType, FailedBlock, Field,
13    Library, PersonName, Preamble, ResourceField, SourceId, SourceMap, SourceSpan,
14    StringDefinition, Value,
15};
16use std::borrow::Cow;
17use std::fmt;
18
19/// Parse status for a parsed bibliography document.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ParseStatus {
22    /// The document parsed without diagnostics that affect recovered content.
23    Ok,
24    /// The document contains useful parsed data plus recovered or failed blocks.
25    Partial,
26    /// The document could not produce meaningful bibliography data.
27    Failed,
28}
29
30/// Diagnostic severity.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum DiagnosticSeverity {
33    /// A problem that prevents some requested parse result from being valid.
34    Error,
35    /// A recoverable problem that callers may want to show or test.
36    Warning,
37    /// Additional parse information that is not itself a problem.
38    Info,
39}
40
41/// Stable machine-readable diagnostic code.
42///
43/// The initial parser diagnostic codes are:
44/// `missing-entry-key`, `missing-field-separator`, `expected-field-name`,
45/// `empty-field-value`, `expected-value-atom`, `bad-field-boundary`,
46/// `bad-value-boundary`, `unclosed-entry`, `unclosed-braced-value`, and
47/// `unclosed-quoted-value`.
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct DiagnosticCode(Cow<'static, str>);
50
51impl DiagnosticCode {
52    /// Generic parse error code used before finer-grained recovery classifies a failure.
53    pub const PARSE_ERROR: Self = Self(Cow::Borrowed("parse-error"));
54    /// Entry body did not contain a citation key.
55    pub const MISSING_ENTRY_KEY: Self = Self(Cow::Borrowed("missing-entry-key"));
56    /// Expected a comma after an entry key or `=` after a field name.
57    pub const MISSING_FIELD_SEPARATOR: Self = Self(Cow::Borrowed("missing-field-separator"));
58    /// Expected a field name inside an entry body.
59    pub const EXPECTED_FIELD_NAME: Self = Self(Cow::Borrowed("expected-field-name"));
60    /// Field separator was present but no value was provided.
61    pub const EMPTY_FIELD_VALUE: Self = Self(Cow::Borrowed("empty-field-value"));
62    /// Expected a literal, number, variable, quoted value, or braced value.
63    pub const EXPECTED_VALUE_ATOM: Self = Self(Cow::Borrowed("expected-value-atom"));
64    /// Expected a comma or entry close after a field value.
65    pub const BAD_FIELD_BOUNDARY: Self = Self(Cow::Borrowed("bad-field-boundary"));
66    /// Expected a value atom after a concatenation operator.
67    pub const BAD_VALUE_BOUNDARY: Self = Self(Cow::Borrowed("bad-value-boundary"));
68    /// Entry ended before its closing delimiter was found.
69    pub const UNCLOSED_ENTRY: Self = Self(Cow::Borrowed("unclosed-entry"));
70    /// Braced field value ended before its closing brace was found.
71    pub const UNCLOSED_BRACED_VALUE: Self = Self(Cow::Borrowed("unclosed-braced-value"));
72    /// Quoted field value ended before its closing quote was found.
73    pub const UNCLOSED_QUOTED_VALUE: Self = Self(Cow::Borrowed("unclosed-quoted-value"));
74
75    /// Create a borrowed static diagnostic code.
76    #[must_use]
77    pub const fn borrowed(code: &'static str) -> Self {
78        Self(Cow::Borrowed(code))
79    }
80
81    /// Create an owned diagnostic code.
82    #[must_use]
83    pub fn custom(code: impl Into<String>) -> Self {
84        Self(Cow::Owned(code.into()))
85    }
86
87    /// Return the diagnostic code as a string.
88    #[must_use]
89    pub fn as_str(&self) -> &str {
90        &self.0
91    }
92}
93
94impl fmt::Display for DiagnosticCode {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        f.write_str(self.as_str())
97    }
98}
99
100/// Location target for a diagnostic.
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub enum DiagnosticTarget {
103    /// The whole input file or source.
104    File,
105    /// A source-order block by index.
106    Block(usize),
107    /// An entry by parsed-entry index.
108    Entry(usize),
109    /// A field by parsed-entry and field index.
110    Field {
111        /// Parsed-entry index.
112        entry: usize,
113        /// Field index inside the parsed entry.
114        field: usize,
115    },
116    /// A value by parsed-entry and field index.
117    Value {
118        /// Parsed-entry index.
119        entry: usize,
120        /// Field index inside the parsed entry.
121        field: usize,
122    },
123    /// A failed block by failed-block index.
124    FailedBlock(usize),
125}
126
127/// Structured diagnostic emitted while building a parsed document.
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Diagnostic {
130    /// Diagnostic severity.
131    pub severity: DiagnosticSeverity,
132    /// Stable machine-readable code.
133    pub code: DiagnosticCode,
134    /// Human-readable message.
135    pub message: String,
136    /// Bibliography object targeted by this diagnostic.
137    pub target: DiagnosticTarget,
138    /// Source location, when available.
139    pub source: Option<SourceSpan>,
140    /// Short source context suitable for display, when available.
141    pub snippet: Option<String>,
142}
143
144impl Diagnostic {
145    /// Create an error diagnostic.
146    #[must_use]
147    pub fn error(
148        code: DiagnosticCode,
149        message: impl Into<String>,
150        target: DiagnosticTarget,
151        source: Option<SourceSpan>,
152    ) -> Self {
153        Self {
154            severity: DiagnosticSeverity::Error,
155            code,
156            message: message.into(),
157            target,
158            source,
159            snippet: None,
160        }
161    }
162
163    /// Attach source context to this diagnostic.
164    #[must_use]
165    pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
166        self.snippet = Some(snippet.into());
167        self
168    }
169}
170
171/// Summary counts for a parsed document.
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub struct ParseSummary {
174    /// File-level parse status.
175    pub status: ParseStatus,
176    /// Number of parsed entries.
177    pub entries: usize,
178    /// Number of warning diagnostics.
179    pub warnings: usize,
180    /// Number of error diagnostics.
181    pub errors: usize,
182    /// Number of informational diagnostics.
183    pub infos: usize,
184    /// Number of failed blocks.
185    pub failed_blocks: usize,
186    /// Number of entries recovered as partial entries.
187    pub recovered_blocks: usize,
188}
189
190/// Source metadata associated with a parsed document.
191#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct ParsedSource<'a> {
193    /// Source index inside the document.
194    pub id: SourceId,
195    /// Human-readable source name or path, when known.
196    pub name: Option<Cow<'a, str>>,
197}
198
199impl ParsedSource<'_> {
200    /// Return `true` when this source has no caller-provided name.
201    #[must_use]
202    pub const fn is_anonymous(&self) -> bool {
203        self.name.is_none()
204    }
205
206    /// Convert this source metadata into an owned value.
207    #[must_use]
208    pub fn into_owned(self) -> ParsedSource<'static> {
209        ParsedSource {
210            id: self.id,
211            name: self.name.map(|name| Cow::Owned(name.into_owned())),
212        }
213    }
214}
215
216/// Source-order block in a parsed document.
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum ParsedBlock {
219    /// A regular or partial bibliography entry by parsed-entry index.
220    Entry(usize),
221    /// A string definition by parsed-string index.
222    String(usize),
223    /// A preamble by parsed-preamble index.
224    Preamble(usize),
225    /// A comment by parsed-comment index.
226    Comment(usize),
227    /// A failed block by failed-block index.
228    Failed(usize),
229}
230
231/// Source-order event emitted by streaming parsing.
232#[derive(Debug, Clone, PartialEq)]
233pub enum ParseEvent<'a> {
234    /// A regular or recovered bibliography entry.
235    Entry(ParsedEntry<'a>),
236    /// A string definition.
237    String(ParsedString<'a>),
238    /// A preamble block.
239    Preamble(ParsedPreamble<'a>),
240    /// A comment block.
241    Comment(ParsedComment<'a>),
242    /// A malformed block retained by tolerant parsing.
243    Failed(ParsedFailedBlock<'a>),
244    /// A structured diagnostic associated with a preceding event.
245    Diagnostic(Diagnostic),
246}
247
248/// Callback control returned from streaming parse handlers.
249#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250pub enum ParseFlow {
251    /// Continue parsing.
252    Continue,
253    /// Stop after the current event.
254    Stop,
255}
256
257/// Summary returned after streaming parsing.
258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub struct StreamingSummary {
260    /// File-level status for processed events.
261    pub status: ParseStatus,
262    /// Number of emitted entries.
263    pub entries: usize,
264    /// Number of emitted string definitions.
265    pub strings: usize,
266    /// Number of emitted preambles.
267    pub preambles: usize,
268    /// Number of emitted comments.
269    pub comments: usize,
270    /// Number of emitted failed blocks.
271    pub failed_blocks: usize,
272    /// Number of warning diagnostics.
273    pub warnings: usize,
274    /// Number of error diagnostics.
275    pub errors: usize,
276    /// Number of informational diagnostics.
277    pub infos: usize,
278    /// Number of recovered partial entries.
279    pub recovered_blocks: usize,
280    /// `true` when the callback requested early stop.
281    pub stopped: bool,
282}
283
284impl Default for StreamingSummary {
285    fn default() -> Self {
286        Self {
287            status: ParseStatus::Ok,
288            entries: 0,
289            strings: 0,
290            preambles: 0,
291            comments: 0,
292            failed_blocks: 0,
293            warnings: 0,
294            errors: 0,
295            infos: 0,
296            recovered_blocks: 0,
297            stopped: false,
298        }
299    }
300}
301
302impl StreamingSummary {
303    pub(crate) fn finalize_status(&mut self) {
304        self.status = if self.errors == 0 {
305            ParseStatus::Ok
306        } else if self.entries == 0 && self.strings == 0 && self.preambles == 0 {
307            ParseStatus::Failed
308        } else {
309            ParseStatus::Partial
310        };
311    }
312
313    pub(crate) fn count_diagnostic(&mut self, diagnostic: &Diagnostic) {
314        match diagnostic.severity {
315            DiagnosticSeverity::Error => self.errors += 1,
316            DiagnosticSeverity::Warning => self.warnings += 1,
317            DiagnosticSeverity::Info => self.infos += 1,
318        }
319    }
320}
321
322/// Status of a parsed entry.
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ParsedEntryStatus {
325    /// Entry parsed completely.
326    Complete,
327    /// Entry has a recovered type or key plus at least some usable content.
328    Partial,
329}
330
331#[derive(Debug, Clone, PartialEq)]
332pub(crate) enum RemovedFieldSources {
333    One(SourceSpan),
334    Many(Vec<SourceSpan>),
335}
336
337impl RemovedFieldSources {
338    fn push(&mut self, source: SourceSpan) {
339        match self {
340            Self::One(first) => {
341                *self = Self::Many(vec![*first, source]);
342            }
343            Self::Many(sources) => sources.push(source),
344        }
345    }
346
347    fn as_slice(&self) -> &[SourceSpan] {
348        match self {
349            Self::One(source) => std::slice::from_ref(source),
350            Self::Many(sources) => sources.as_slice(),
351        }
352    }
353}
354
355/// Delimiter used by a BibTeX entry body.
356#[derive(Debug, Clone, Copy, PartialEq, Eq)]
357pub enum EntryDelimiter {
358    /// Entry used `{ ... }`.
359    Braces,
360    /// Entry used `( ... )`.
361    Parentheses,
362}
363
364/// Delimiter or source shape used by a BibTeX value.
365#[derive(Debug, Clone, Copy, PartialEq, Eq)]
366pub enum ValueDelimiter {
367    /// Value used `{ ... }`.
368    Braces,
369    /// Value used `" ... "`.
370    Quotes,
371    /// Value was a bare number or identifier.
372    Bare,
373    /// Value used one or more `#` concatenation separators.
374    Concatenation,
375}
376
377/// Policy for variables that cannot be resolved during value expansion.
378#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379pub enum UnresolvedVariablePolicy {
380    /// Keep the variable name as ordinary text.
381    Preserve,
382    /// Render unresolved variables as `{undefined:name}`.
383    Placeholder,
384    /// Return an error for the first unresolved variable.
385    Error,
386}
387
388/// Options for expanding parsed values.
389#[derive(Debug, Clone, Copy, PartialEq, Eq)]
390pub struct ExpansionOptions {
391    /// Expand user `@string` definitions.
392    pub expand_strings: bool,
393    /// Expand standard three-letter BibTeX month variables.
394    pub expand_months: bool,
395    /// Behavior when a variable cannot be resolved.
396    pub unresolved_variables: UnresolvedVariablePolicy,
397}
398
399impl Default for ExpansionOptions {
400    fn default() -> Self {
401        Self {
402            expand_strings: true,
403            expand_months: true,
404            unresolved_variables: UnresolvedVariablePolicy::Error,
405        }
406    }
407}
408
409/// Parsed BibTeX value plus optional source-preserving metadata.
410#[derive(Debug, Clone, PartialEq)]
411pub struct ParsedValue<'a> {
412    /// Structured value.
413    pub value: Value<'a>,
414    /// Exact raw value text, when retained by the parser mode.
415    pub raw: Option<Cow<'a, str>>,
416    /// Source location for the value, when available.
417    pub source: Option<SourceSpan>,
418    /// Expanded text projection, when a parser mode computes it separately.
419    pub expanded: Option<Cow<'a, str>>,
420    /// Original value delimiter or source shape, when retained.
421    pub delimiter: Option<ValueDelimiter>,
422}
423
424impl<'a> ParsedValue<'a> {
425    /// Create parsed-value metadata from a structured value.
426    #[must_use]
427    pub const fn new(value: Value<'a>) -> Self {
428        Self {
429            value,
430            raw: None,
431            source: None,
432            expanded: None,
433            delimiter: None,
434        }
435    }
436
437    /// Convert this parsed value into the structured value.
438    #[must_use]
439    pub fn into_value(self) -> Value<'a> {
440        self.value
441    }
442
443    /// Return the structured parsed value.
444    #[must_use]
445    pub const fn parsed(&self) -> &Value<'a> {
446        &self.value
447    }
448
449    /// Return exact raw value text when raw preservation was requested.
450    #[must_use]
451    pub fn raw_text(&self) -> Option<&str> {
452        self.raw.as_deref()
453    }
454
455    /// Return requested expanded text when the parser populated it.
456    #[must_use]
457    pub fn expanded_text(&self) -> Option<&str> {
458        self.expanded.as_deref()
459    }
460
461    /// Return an ordinary text projection of the parsed value.
462    #[must_use]
463    pub fn plain_text(&self) -> String {
464        self.value.to_plain_string()
465    }
466
467    /// Return a display-oriented projection of the parsed value.
468    #[must_use]
469    pub fn lossy_text(&self) -> String {
470        self.value.to_lossy_string()
471    }
472
473    /// Return a Unicode-normalized plain-text projection.
474    #[cfg(feature = "latex_to_unicode")]
475    #[must_use]
476    pub fn unicode_plain_text(&self) -> String {
477        self.value.to_unicode_plain_string()
478    }
479
480    /// Convert this parsed value into an owned value.
481    #[must_use]
482    pub fn into_owned(self) -> ParsedValue<'static> {
483        ParsedValue {
484            value: self.value.into_owned(),
485            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
486            source: self.source,
487            expanded: self
488                .expanded
489                .map(|expanded| Cow::Owned(expanded.into_owned())),
490            delimiter: self.delimiter,
491        }
492    }
493
494    pub(crate) fn from_owned_value(
495        value: Value<'a>,
496        source: Option<SourceSpan>,
497        delimiter: Option<ValueDelimiter>,
498    ) -> ParsedValue<'static> {
499        ParsedValue {
500            value: value.into_owned(),
501            raw: None,
502            source,
503            expanded: None,
504            delimiter,
505        }
506    }
507}
508
509/// Parsed field plus optional source-preserving metadata.
510#[derive(Debug, Clone, PartialEq)]
511pub struct ParsedField<'a> {
512    /// Field name as it appeared after parsing.
513    pub name: Cow<'a, str>,
514    /// Parsed field value.
515    pub value: ParsedValue<'a>,
516    /// Exact raw field text, when retained by the parser mode.
517    pub raw: Option<Cow<'a, str>>,
518    /// Source location for the whole field, when available.
519    pub source: Option<SourceSpan>,
520    /// Source location for the field name, when available.
521    pub name_source: Option<SourceSpan>,
522    /// Source location for the field value, when available.
523    pub value_source: Option<SourceSpan>,
524}
525
526impl<'a> ParsedField<'a> {
527    /// Create parsed-field metadata from a structured field.
528    #[must_use]
529    pub fn from_field(field: Field<'a>) -> Self {
530        Self {
531            name: field.name,
532            value: ParsedValue::new(field.value),
533            raw: None,
534            source: None,
535            name_source: None,
536            value_source: None,
537        }
538    }
539
540    /// Convert this parsed field into the structured field.
541    #[must_use]
542    pub fn into_field(self) -> Field<'a> {
543        Field {
544            name: self.name,
545            value: self.value.into_value(),
546        }
547    }
548
549    /// Convert this parsed field into an owned value.
550    #[must_use]
551    pub fn into_owned(self) -> ParsedField<'static> {
552        ParsedField {
553            name: owned_field_name(self.name),
554            value: self.value.into_owned(),
555            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
556            source: self.source,
557            name_source: self.name_source,
558            value_source: self.value_source,
559        }
560    }
561
562    pub(crate) fn from_owned_field(field: Field<'a>) -> ParsedField<'static> {
563        ParsedField {
564            name: owned_field_name(field.name),
565            value: ParsedValue::from_owned_value(field.value, None, None),
566            raw: None,
567            source: None,
568            name_source: None,
569            value_source: None,
570        }
571    }
572}
573
574/// Parsed entry plus optional source-preserving metadata.
575#[derive(Debug, Clone, PartialEq)]
576pub struct ParsedEntry<'a> {
577    /// Entry type.
578    pub ty: EntryType<'a>,
579    /// Citation key.
580    pub key: Cow<'a, str>,
581    /// Parsed fields in source order.
582    pub fields: Vec<ParsedField<'a>>,
583    /// Whether the entry is complete or recovered.
584    pub status: ParsedEntryStatus,
585    /// Source location for the whole entry, when available.
586    pub source: Option<SourceSpan>,
587    /// Source location for the entry type token, when available.
588    pub entry_type_source: Option<SourceSpan>,
589    /// Source location for the citation key token, when available.
590    pub key_source: Option<SourceSpan>,
591    /// Entry body delimiter, when retained.
592    pub delimiter: Option<EntryDelimiter>,
593    /// Exact raw entry text, when retained by the parser mode.
594    pub raw: Option<Cow<'a, str>>,
595    pub(crate) removed_field_sources: Option<Box<RemovedFieldSources>>,
596    /// Diagnostics attached to this entry.
597    pub diagnostics: Vec<Diagnostic>,
598}
599
600impl<'a> ParsedEntry<'a> {
601    /// Create parsed-entry metadata from a structured entry.
602    #[must_use]
603    pub fn from_entry(entry: Entry<'a>, source: Option<SourceSpan>) -> Self {
604        Self {
605            ty: entry.ty,
606            key: entry.key,
607            fields: entry
608                .fields
609                .into_iter()
610                .map(ParsedField::from_field)
611                .collect(),
612            status: ParsedEntryStatus::Complete,
613            source,
614            entry_type_source: None,
615            key_source: None,
616            delimiter: None,
617            raw: None,
618            removed_field_sources: None,
619            diagnostics: Vec::new(),
620        }
621    }
622
623    pub(crate) fn from_entry_owned(
624        entry: Entry<'a>,
625        source: Option<SourceSpan>,
626    ) -> ParsedEntry<'static> {
627        ParsedEntry {
628            ty: entry.ty.into_owned(),
629            key: Cow::Owned(entry.key.into_owned()),
630            fields: entry
631                .fields
632                .into_iter()
633                .map(ParsedField::from_owned_field)
634                .collect(),
635            status: ParsedEntryStatus::Complete,
636            source,
637            entry_type_source: None,
638            key_source: None,
639            delimiter: None,
640            raw: None,
641            removed_field_sources: None,
642            diagnostics: Vec::new(),
643        }
644    }
645
646    pub(crate) fn from_stream_entry(
647        entry: Entry<'a>,
648        source: SourceSpan,
649        raw: &'a str,
650        source_map: &SourceMap<'a>,
651        preserve_raw: bool,
652    ) -> Self {
653        let mut parsed = Self::from_entry(entry, Some(source));
654        parsed.apply_locations(raw, source_map, preserve_raw);
655        parsed
656    }
657
658    pub(crate) fn from_located_stream_entry_owned(
659        located: crate::parser::entry::LocatedEntry<'a>,
660        source: SourceSpan,
661        span_cursor: &mut SourceCursor<'_, 'a>,
662    ) -> ParsedEntry<'static> {
663        let entry = located.entry;
664        let entry_type_source = span_cursor.span(located.entry_type.0, located.entry_type.1);
665        let key_source = span_cursor.span(located.key.0, located.key.1);
666        let fields = entry
667            .fields
668            .into_iter()
669            .zip(located.fields)
670            .map(|(field, location)| {
671                let field_source = span_cursor.span(location.whole.0, location.whole.1);
672                let name_source = span_cursor.span(location.name.0, location.name.1);
673                let value_source = span_cursor.span(location.value.0, location.value.1);
674                ParsedField {
675                    name: owned_field_name(field.name),
676                    value: ParsedValue::from_owned_value(
677                        field.value,
678                        Some(value_source),
679                        Some(location.value_delimiter),
680                    ),
681                    raw: None,
682                    source: Some(field_source),
683                    name_source: Some(name_source),
684                    value_source: Some(value_source),
685                }
686            })
687            .collect();
688
689        ParsedEntry {
690            ty: entry.ty.into_owned(),
691            key: Cow::Owned(entry.key.into_owned()),
692            fields,
693            status: ParsedEntryStatus::Complete,
694            source: Some(source),
695            entry_type_source: Some(entry_type_source),
696            key_source: Some(key_source),
697            delimiter: Some(located.delimiter),
698            raw: None,
699            removed_field_sources: None,
700            diagnostics: Vec::new(),
701        }
702    }
703
704    fn apply_locations(&mut self, raw: &'a str, source_map: &SourceMap<'a>, preserve_raw: bool) {
705        let Some(entry_span) = self.source else {
706            return;
707        };
708        let Some(locations) = locate_entry(raw, entry_span.byte_start, self.fields.len()) else {
709            return;
710        };
711
712        self.entry_type_source =
713            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
714        self.key_source = Some(source_map.span(locations.key.0, locations.key.1));
715        self.delimiter = Some(locations.delimiter);
716        if preserve_raw {
717            self.raw = Some(Cow::Borrowed(raw));
718        }
719
720        for (field, location) in self.fields.iter_mut().zip(locations.fields) {
721            field.source = Some(source_map.span(location.whole.0, location.whole.1));
722            field.name_source = Some(source_map.span(location.name.0, location.name.1));
723            field.value.source = Some(source_map.span(location.value.0, location.value.1));
724            field.value_source = field.value.source;
725            field.value.delimiter = Some(location.value_delimiter);
726
727            if preserve_raw {
728                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
729                    field.raw = Some(Cow::Borrowed(source));
730                }
731                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
732                    field.value.raw = Some(Cow::Borrowed(source));
733                }
734            }
735        }
736    }
737
738    /// Return the citation key.
739    #[must_use]
740    pub fn key(&self) -> &str {
741        &self.key
742    }
743
744    /// Convert this parsed entry into the structured entry.
745    #[must_use]
746    pub fn into_entry(self) -> Entry<'a> {
747        Entry {
748            ty: self.ty,
749            key: self.key,
750            fields: self
751                .fields
752                .into_iter()
753                .map(ParsedField::into_field)
754                .collect(),
755        }
756    }
757
758    /// Rename the citation key.
759    pub fn rename_key(&mut self, key: impl Into<Cow<'a, str>>) {
760        self.key = key.into();
761    }
762
763    /// Replace the entry type.
764    pub fn set_entry_type(&mut self, ty: EntryType<'a>) {
765        self.ty = ty;
766    }
767
768    /// Add a field while preserving surrounding source text when possible.
769    pub fn add_field(&mut self, name: impl Into<Cow<'a, str>>, value: Value<'a>) {
770        self.fields.push(ParsedField {
771            name: name.into(),
772            value: ParsedValue::new(value),
773            raw: None,
774            source: None,
775            name_source: None,
776            value_source: None,
777        });
778    }
779
780    /// Replace the first field value whose name matches exactly.
781    #[must_use]
782    pub fn replace_field_value(&mut self, name: &str, value: Value<'a>) -> bool {
783        self.replace_field_value_at(name, 0, value)
784    }
785
786    /// Replace a specific duplicate field occurrence by zero-based occurrence index.
787    #[must_use]
788    pub fn replace_field_value_at(
789        &mut self,
790        name: &str,
791        occurrence: usize,
792        value: Value<'a>,
793    ) -> bool {
794        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
795            return false;
796        };
797        let field = &mut self.fields[index];
798        field.value.value = value;
799        field.value.raw = None;
800        field.raw = None;
801        field.value.expanded = None;
802        true
803    }
804
805    /// Rename all fields whose name matches exactly.
806    #[must_use]
807    pub fn rename_field(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> usize {
808        let new = new.into();
809        let mut renamed = 0;
810        for field in &mut self.fields {
811            if field.name == old {
812                field.name.clone_from(&new);
813                field.raw = None;
814                renamed += 1;
815            }
816        }
817        renamed
818    }
819
820    /// Remove all fields whose name matches exactly.
821    #[must_use]
822    pub fn remove_field(&mut self, name: &str) -> usize {
823        let mut removed = 0usize;
824        let mut index = 0usize;
825        while index < self.fields.len() {
826            if self.fields[index].name == name {
827                self.remove_field_index(index);
828                removed += 1;
829            } else {
830                index += 1;
831            }
832        }
833        removed
834    }
835
836    /// Remove a specific duplicate field occurrence by zero-based occurrence index.
837    #[must_use]
838    pub fn remove_field_at(&mut self, name: &str, occurrence: usize) -> bool {
839        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
840            return false;
841        };
842        self.remove_field_index(index);
843        true
844    }
845
846    /// Remove a field by absolute field index.
847    #[must_use]
848    pub fn remove_field_by_index(&mut self, index: usize) -> bool {
849        if index >= self.fields.len() {
850            return false;
851        }
852        self.remove_field_index(index);
853        true
854    }
855
856    /// Remove configured export-only fields from this entry.
857    #[must_use]
858    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
859        let mut removed = 0usize;
860        let mut index = 0usize;
861        while index < self.fields.len() {
862            if names
863                .iter()
864                .any(|name| self.fields[index].name.eq_ignore_ascii_case(name))
865            {
866                self.remove_field_index(index);
867                removed += 1;
868            } else {
869                index += 1;
870            }
871        }
872        removed
873    }
874
875    fn remove_field_index(&mut self, index: usize) {
876        let field = self.fields.remove(index);
877        if let Some(source) = field.source {
878            match &mut self.removed_field_sources {
879                Some(sources) => sources.push(source),
880                None => {
881                    self.removed_field_sources = Some(Box::new(RemovedFieldSources::One(source)));
882                }
883            }
884        } else {
885            self.raw = None;
886        }
887    }
888
889    pub(crate) fn removed_field_sources(&self) -> &[SourceSpan] {
890        self.removed_field_sources
891            .as_deref()
892            .map_or(&[], RemovedFieldSources::as_slice)
893    }
894
895    /// Return the first field matching `name`, ignoring ASCII case.
896    #[must_use]
897    pub fn field_ignore_case(&self, name: &str) -> Option<&ParsedField<'a>> {
898        self.fields
899            .iter()
900            .find(|field| field.name.eq_ignore_ascii_case(name))
901    }
902
903    /// Return a field value as ordinary text, ignoring ASCII case.
904    #[must_use]
905    pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
906        self.field_ignore_case(name)
907            .map(|field| field.value.plain_text())
908    }
909
910    /// Return the normalized DOI, if the entry has a recognizable DOI field.
911    #[must_use]
912    pub fn doi(&self) -> Option<String> {
913        self.get_as_string_ignore_case("doi")
914            .and_then(|doi| normalize_doi(&doi))
915    }
916
917    /// Parse the `author` field into structured BibTeX names.
918    #[must_use]
919    pub fn authors(&self) -> Vec<PersonName> {
920        self.get_as_string_ignore_case("author")
921            .map_or_else(Vec::new, |authors| crate::parse_names(&authors))
922    }
923
924    /// Parse the `editor` field into structured BibTeX names.
925    #[must_use]
926    pub fn editors(&self) -> Vec<PersonName> {
927        self.get_as_string_ignore_case("editor")
928            .map_or_else(Vec::new, |editors| crate::parse_names(&editors))
929    }
930
931    /// Parse the `translator` field into structured BibTeX names.
932    #[must_use]
933    pub fn translators(&self) -> Vec<PersonName> {
934        self.get_as_string_ignore_case("translator")
935            .map_or_else(Vec::new, |translators| crate::parse_names(&translators))
936    }
937
938    /// Parse a specific date-like field into date parts.
939    #[must_use]
940    pub fn date_parts_for(
941        &self,
942        field: &str,
943    ) -> Option<std::result::Result<DateParts, DateParseError>> {
944        self.get_as_string_ignore_case(field)
945            .map(|value| crate::parse_date_parts(&value))
946    }
947
948    /// Return issued date parts for this entry.
949    #[must_use]
950    pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
951        self.clone().into_entry().date_parts()
952    }
953
954    /// Return classified resource and identifier fields in source order.
955    #[must_use]
956    pub fn resource_fields(&self) -> Vec<ResourceField> {
957        self.clone().into_entry().resource_fields()
958    }
959
960    /// Convert this parsed entry into an owned value.
961    #[must_use]
962    pub fn into_owned(self) -> ParsedEntry<'static> {
963        ParsedEntry {
964            ty: self.ty.into_owned(),
965            key: Cow::Owned(self.key.into_owned()),
966            fields: self
967                .fields
968                .into_iter()
969                .map(ParsedField::into_owned)
970                .collect(),
971            status: self.status,
972            source: self.source,
973            entry_type_source: self.entry_type_source,
974            key_source: self.key_source,
975            delimiter: self.delimiter,
976            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
977            removed_field_sources: self.removed_field_sources,
978            diagnostics: self.diagnostics,
979        }
980    }
981}
982
983fn owned_field_name(name: Cow<'_, str>) -> Cow<'static, str> {
984    static_field_name(&name).map_or_else(|| Cow::Owned(name.into_owned()), Cow::Borrowed)
985}
986
987fn static_field_name(name: &str) -> Option<&'static str> {
988    Some(match name {
989        "abstract" => "abstract",
990        "address" => "address",
991        "archiveprefix" => "archiveprefix",
992        "author" => "author",
993        "booktitle" => "booktitle",
994        "chapter" => "chapter",
995        "copyright" => "copyright",
996        "crossref" => "crossref",
997        "date" => "date",
998        "doi" => "doi",
999        "edition" => "edition",
1000        "editor" => "editor",
1001        "eprint" => "eprint",
1002        "eventdate" => "eventdate",
1003        "file" => "file",
1004        "institution" => "institution",
1005        "isbn" => "isbn",
1006        "issn" => "issn",
1007        "journal" => "journal",
1008        "keywords" => "keywords",
1009        "language" => "language",
1010        "month" => "month",
1011        "note" => "note",
1012        "number" => "number",
1013        "organization" => "organization",
1014        "origdate" => "origdate",
1015        "pages" => "pages",
1016        "pmcid" => "pmcid",
1017        "pmid" => "pmid",
1018        "primaryclass" => "primaryclass",
1019        "publisher" => "publisher",
1020        "school" => "school",
1021        "series" => "series",
1022        "timestamp" => "timestamp",
1023        "title" => "title",
1024        "translator" => "translator",
1025        "type" => "type",
1026        "url" => "url",
1027        "urldate" => "urldate",
1028        "volume" => "volume",
1029        "year" => "year",
1030        _ => return None,
1031    })
1032}
1033
1034fn nth_field_index(fields: &[ParsedField<'_>], name: &str, occurrence: usize) -> Option<usize> {
1035    fields
1036        .iter()
1037        .enumerate()
1038        .filter(|(_, field)| field.name == name)
1039        .nth(occurrence)
1040        .map(|(index, _)| index)
1041}
1042
1043/// Parsed string definition plus optional source-preserving metadata.
1044#[derive(Debug, Clone, PartialEq)]
1045pub struct ParsedString<'a> {
1046    /// String variable name.
1047    pub name: Cow<'a, str>,
1048    /// Parsed string value.
1049    pub value: ParsedValue<'a>,
1050    /// Source location for the definition, when available.
1051    pub source: Option<SourceSpan>,
1052    /// Exact raw string-definition text, when retained by the parser mode.
1053    pub raw: Option<Cow<'a, str>>,
1054}
1055
1056impl<'a> ParsedString<'a> {
1057    /// Create parsed-string metadata from a structured string definition.
1058    #[must_use]
1059    pub fn from_definition(definition: StringDefinition<'a>) -> Self {
1060        Self {
1061            name: definition.name,
1062            value: ParsedValue::new(definition.value),
1063            source: definition.source,
1064            raw: None,
1065        }
1066    }
1067
1068    pub(crate) fn from_stream_definition(
1069        name: &'a str,
1070        value: Value<'a>,
1071        source: SourceSpan,
1072        raw: &'a str,
1073        preserve_raw: bool,
1074    ) -> Self {
1075        let value_raw = locate_definition_value(raw);
1076        Self {
1077            name: Cow::Borrowed(name),
1078            value: ParsedValue {
1079                value,
1080                raw: if preserve_raw {
1081                    value_raw.map(Cow::Borrowed)
1082                } else {
1083                    None
1084                },
1085                source: None,
1086                expanded: None,
1087                delimiter: value_raw.map(value_delimiter),
1088            },
1089            source: Some(source),
1090            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1091        }
1092    }
1093
1094    /// Convert this parsed string definition into an owned value.
1095    #[must_use]
1096    pub fn into_owned(self) -> ParsedString<'static> {
1097        ParsedString {
1098            name: Cow::Owned(self.name.into_owned()),
1099            value: self.value.into_owned(),
1100            source: self.source,
1101            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1102        }
1103    }
1104
1105    pub(crate) fn from_stream_definition_owned(
1106        name: &'a str,
1107        value: Value<'a>,
1108        source: SourceSpan,
1109        raw: &'a str,
1110    ) -> ParsedString<'static> {
1111        let value_raw = locate_definition_value(raw);
1112        ParsedString {
1113            name: Cow::Owned(name.to_string()),
1114            value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1115            source: Some(source),
1116            raw: None,
1117        }
1118    }
1119}
1120
1121/// Parsed preamble plus optional source-preserving metadata.
1122#[derive(Debug, Clone, PartialEq)]
1123pub struct ParsedPreamble<'a> {
1124    /// Parsed preamble value.
1125    pub value: ParsedValue<'a>,
1126    /// Source location for the preamble, when available.
1127    pub source: Option<SourceSpan>,
1128    /// Exact raw preamble text, when retained by the parser mode.
1129    pub raw: Option<Cow<'a, str>>,
1130}
1131
1132impl<'a> ParsedPreamble<'a> {
1133    /// Create parsed-preamble metadata from a structured preamble.
1134    #[must_use]
1135    pub fn from_preamble(preamble: Preamble<'a>) -> Self {
1136        Self {
1137            value: ParsedValue::new(preamble.value),
1138            source: preamble.source,
1139            raw: None,
1140        }
1141    }
1142
1143    pub(crate) fn from_stream_preamble(
1144        value: Value<'a>,
1145        source: SourceSpan,
1146        raw: &'a str,
1147        preserve_raw: bool,
1148    ) -> Self {
1149        let value_raw = locate_preamble_value(raw);
1150        Self {
1151            value: ParsedValue {
1152                value,
1153                raw: if preserve_raw {
1154                    value_raw.map(Cow::Borrowed)
1155                } else {
1156                    None
1157                },
1158                source: None,
1159                expanded: None,
1160                delimiter: value_raw.map(value_delimiter),
1161            },
1162            source: Some(source),
1163            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1164        }
1165    }
1166
1167    /// Convert this parsed preamble into an owned value.
1168    #[must_use]
1169    pub fn into_owned(self) -> ParsedPreamble<'static> {
1170        ParsedPreamble {
1171            value: self.value.into_owned(),
1172            source: self.source,
1173            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1174        }
1175    }
1176
1177    pub(crate) fn from_stream_preamble_owned(
1178        value: Value<'a>,
1179        source: SourceSpan,
1180        raw: &'a str,
1181    ) -> ParsedPreamble<'static> {
1182        let value_raw = locate_preamble_value(raw);
1183        ParsedPreamble {
1184            value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1185            source: Some(source),
1186            raw: None,
1187        }
1188    }
1189}
1190
1191/// Parsed comment plus optional source-preserving metadata.
1192#[derive(Debug, Clone, PartialEq, Eq)]
1193pub struct ParsedComment<'a> {
1194    /// Comment text.
1195    pub text: Cow<'a, str>,
1196    /// Source location for the comment, when available.
1197    pub source: Option<SourceSpan>,
1198    /// Exact raw comment text, when retained by the parser mode.
1199    pub raw: Option<Cow<'a, str>>,
1200}
1201
1202impl<'a> ParsedComment<'a> {
1203    /// Create parsed-comment metadata from a structured comment.
1204    #[must_use]
1205    pub fn from_comment(comment: Comment<'a>) -> Self {
1206        Self {
1207            text: comment.text,
1208            source: comment.source,
1209            raw: None,
1210        }
1211    }
1212
1213    pub(crate) fn from_stream_comment(
1214        text: &'a str,
1215        source: SourceSpan,
1216        raw: &'a str,
1217        preserve_raw: bool,
1218    ) -> Self {
1219        Self {
1220            text: Cow::Borrowed(text),
1221            source: Some(source),
1222            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1223        }
1224    }
1225
1226    /// Convert this parsed comment into an owned value.
1227    #[must_use]
1228    pub fn into_owned(self) -> ParsedComment<'static> {
1229        ParsedComment {
1230            text: Cow::Owned(self.text.into_owned()),
1231            source: self.source,
1232            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1233        }
1234    }
1235
1236    pub(crate) fn from_stream_comment_owned(
1237        text: &'a str,
1238        source: SourceSpan,
1239    ) -> ParsedComment<'static> {
1240        ParsedComment {
1241            text: Cow::Owned(text.to_string()),
1242            source: Some(source),
1243            raw: None,
1244        }
1245    }
1246}
1247
1248/// Failed block retained by a tolerant parse.
1249#[derive(Debug, Clone, PartialEq, Eq)]
1250pub struct ParsedFailedBlock<'a> {
1251    /// Raw source text for the failed block.
1252    pub raw: Cow<'a, str>,
1253    /// Human-readable parse error.
1254    pub error: String,
1255    /// Source location for the failed block, when available.
1256    pub source: Option<SourceSpan>,
1257    /// Diagnostics attached to this failed block.
1258    pub diagnostics: Vec<Diagnostic>,
1259}
1260
1261impl<'a> ParsedFailedBlock<'a> {
1262    /// Create failed-block metadata from a retained failed block.
1263    #[must_use]
1264    pub fn from_failed_block(
1265        index: usize,
1266        failed: FailedBlock<'a>,
1267        source_map: Option<&SourceMap<'_>>,
1268    ) -> Self {
1269        let diagnostic = diagnostic_for_failed_block(index, &failed, source_map);
1270
1271        Self {
1272            raw: failed.raw,
1273            error: failed.error,
1274            source: failed.source,
1275            diagnostics: vec![diagnostic],
1276        }
1277    }
1278
1279    /// Convert this failed block into an owned value.
1280    #[must_use]
1281    pub fn into_owned(self) -> ParsedFailedBlock<'static> {
1282        ParsedFailedBlock {
1283            raw: Cow::Owned(self.raw.into_owned()),
1284            error: self.error,
1285            source: self.source,
1286            diagnostics: self.diagnostics,
1287        }
1288    }
1289}
1290
1291/// Rich parsed document for tooling-grade bibliography workflows.
1292#[derive(Debug, Clone)]
1293pub struct ParsedDocument<'a> {
1294    library: Library<'a>,
1295    sources: Vec<ParsedSource<'a>>,
1296    entries: Vec<ParsedEntry<'a>>,
1297    strings: Vec<ParsedString<'a>>,
1298    preambles: Vec<ParsedPreamble<'a>>,
1299    comments: Vec<ParsedComment<'a>>,
1300    failed_blocks: Vec<ParsedFailedBlock<'a>>,
1301    blocks: Vec<ParsedBlock>,
1302    diagnostics: Vec<Diagnostic>,
1303    status: ParseStatus,
1304}
1305
1306impl<'a> ParsedDocument<'a> {
1307    /// Build a parsed document from the existing structured library model.
1308    #[must_use]
1309    pub fn from_library(library: Library<'a>) -> Self {
1310        Self::from_library_with_sources(
1311            library,
1312            vec![ParsedSource {
1313                id: SourceId::new(0),
1314                name: None,
1315            }],
1316        )
1317    }
1318
1319    pub(crate) fn from_library_with_sources(
1320        library: Library<'a>,
1321        sources: Vec<ParsedSource<'a>>,
1322    ) -> Self {
1323        Self::from_library_with_source_map(library, sources, None)
1324    }
1325
1326    pub(crate) fn from_library_with_source_map(
1327        library: Library<'a>,
1328        sources: Vec<ParsedSource<'a>>,
1329        source_map: Option<&SourceMap<'_>>,
1330    ) -> Self {
1331        let entries: Vec<ParsedEntry<'a>> = library
1332            .entries()
1333            .iter()
1334            .cloned()
1335            .enumerate()
1336            .map(|(index, entry)| ParsedEntry::from_entry(entry, library.entry_source(index)))
1337            .collect();
1338        let strings: Vec<ParsedString<'a>> = library
1339            .strings()
1340            .iter()
1341            .cloned()
1342            .map(ParsedString::from_definition)
1343            .collect();
1344        let preambles: Vec<ParsedPreamble<'a>> = library
1345            .preambles()
1346            .iter()
1347            .cloned()
1348            .map(ParsedPreamble::from_preamble)
1349            .collect();
1350        let comments = library
1351            .comments()
1352            .iter()
1353            .cloned()
1354            .map(ParsedComment::from_comment)
1355            .collect();
1356        let failed_blocks = library
1357            .failed_blocks()
1358            .iter()
1359            .cloned()
1360            .enumerate()
1361            .map(|(index, failed)| ParsedFailedBlock::from_failed_block(index, failed, source_map))
1362            .collect::<Vec<_>>();
1363        let diagnostics = failed_blocks
1364            .iter()
1365            .flat_map(|failed| failed.diagnostics.iter().cloned())
1366            .collect::<Vec<_>>();
1367        let blocks = library
1368            .block_kinds()
1369            .iter()
1370            .map(|kind| match *kind {
1371                BlockKind::Entry(index) => ParsedBlock::Entry(index),
1372                BlockKind::String(index) => ParsedBlock::String(index),
1373                BlockKind::Preamble(index) => ParsedBlock::Preamble(index),
1374                BlockKind::Comment(index) => ParsedBlock::Comment(index),
1375                BlockKind::Failed(index) => ParsedBlock::Failed(index),
1376            })
1377            .collect();
1378        let status = if failed_blocks.is_empty() {
1379            ParseStatus::Ok
1380        } else if entries.is_empty() && strings.is_empty() && preambles.is_empty() {
1381            ParseStatus::Failed
1382        } else {
1383            ParseStatus::Partial
1384        };
1385
1386        Self {
1387            library,
1388            sources,
1389            entries,
1390            strings,
1391            preambles,
1392            comments,
1393            failed_blocks,
1394            blocks,
1395            diagnostics,
1396            status,
1397        }
1398    }
1399
1400    pub(crate) const fn from_parsed_parts(
1401        library: Library<'a>,
1402        sources: Vec<ParsedSource<'a>>,
1403        entries: Vec<ParsedEntry<'a>>,
1404        strings: Vec<ParsedString<'a>>,
1405        preambles: Vec<ParsedPreamble<'a>>,
1406        comments: Vec<ParsedComment<'a>>,
1407        blocks: Vec<ParsedBlock>,
1408    ) -> Self {
1409        Self {
1410            library,
1411            sources,
1412            entries,
1413            strings,
1414            preambles,
1415            comments,
1416            failed_blocks: Vec::new(),
1417            blocks,
1418            diagnostics: Vec::new(),
1419            status: ParseStatus::Ok,
1420        }
1421    }
1422
1423    pub(crate) fn apply_entry_locations(
1424        &mut self,
1425        entry_index: usize,
1426        raw: &'a str,
1427        source_map: &SourceMap<'a>,
1428        preserve_raw: bool,
1429    ) {
1430        let Some(entry) = self.entries.get_mut(entry_index) else {
1431            return;
1432        };
1433        let Some(entry_span) = entry.source else {
1434            return;
1435        };
1436        let Some(locations) = locate_entry(raw, entry_span.byte_start, entry.fields.len()) else {
1437            return;
1438        };
1439
1440        entry.entry_type_source =
1441            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
1442        entry.key_source = Some(source_map.span(locations.key.0, locations.key.1));
1443        entry.delimiter = Some(locations.delimiter);
1444        if preserve_raw {
1445            entry.raw = Some(Cow::Borrowed(raw));
1446        }
1447
1448        for (field, location) in entry.fields.iter_mut().zip(locations.fields) {
1449            field.source = Some(source_map.span(location.whole.0, location.whole.1));
1450            field.name_source = Some(source_map.span(location.name.0, location.name.1));
1451            field.value.source = Some(source_map.span(location.value.0, location.value.1));
1452            field.value_source = field.value.source;
1453            field.value.delimiter = Some(location.value_delimiter);
1454
1455            if preserve_raw {
1456                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
1457                    field.raw = Some(Cow::Borrowed(source));
1458                }
1459                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
1460                    field.value.raw = Some(Cow::Borrowed(source));
1461                }
1462            }
1463        }
1464    }
1465
1466    pub(crate) fn apply_raw_items(&mut self, raw_items: &[RawBuildItem<'a>]) {
1467        let mut string_index = 0;
1468        let mut preamble_index = 0;
1469        let mut comment_index = 0;
1470
1471        for raw_item in raw_items {
1472            match raw_item {
1473                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, raw) => {
1474                    if let Some(parsed) = self.strings.get_mut(string_index) {
1475                        parsed.raw = Some(Cow::Borrowed(raw));
1476                        if let Some(value_raw) = locate_definition_value(raw) {
1477                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1478                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1479                        }
1480                    }
1481                    string_index += 1;
1482                }
1483                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(_), _, raw) => {
1484                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1485                        parsed.raw = Some(Cow::Borrowed(raw));
1486                        if let Some(value_raw) = locate_preamble_value(raw) {
1487                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1488                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1489                        }
1490                    }
1491                    preamble_index += 1;
1492                }
1493                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, raw) => {
1494                    if let Some(parsed) = self.comments.get_mut(comment_index) {
1495                        parsed.raw = Some(Cow::Borrowed(raw));
1496                    }
1497                    comment_index += 1;
1498                }
1499                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, _)
1500                | RawBuildItem::Failed(_) => {}
1501            }
1502        }
1503    }
1504
1505    pub(crate) fn apply_parsed_values(&mut self, raw_items: &[RawBuildItem<'a>]) {
1506        let mut entry_index = 0;
1507        let mut string_index = 0;
1508        let mut preamble_index = 0;
1509
1510        for raw_item in raw_items {
1511            match raw_item {
1512                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(raw_entry), _, _) => {
1513                    if let Some(entry) = self.entries.get_mut(entry_index) {
1514                        for (field, raw_field) in entry.fields.iter_mut().zip(&raw_entry.fields) {
1515                            field.value.value = raw_field.value.clone();
1516                            field.value.expanded = None;
1517                        }
1518                    }
1519                    entry_index += 1;
1520                }
1521                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, value), _, _) => {
1522                    if let Some(parsed) = self.strings.get_mut(string_index) {
1523                        parsed.value.value = value.clone();
1524                        parsed.value.expanded = None;
1525                    }
1526                    string_index += 1;
1527                }
1528                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), _, _) => {
1529                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1530                        parsed.value.value = value.clone();
1531                        parsed.value.expanded = None;
1532                    }
1533                    preamble_index += 1;
1534                }
1535                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, _)
1536                | RawBuildItem::Failed(_) => {}
1537            }
1538        }
1539    }
1540
1541    pub(crate) fn populate_expanded_values(
1542        &mut self,
1543        options: ExpansionOptions,
1544    ) -> crate::Result<()> {
1545        let strings = &self.strings;
1546        for entry in &mut self.entries {
1547            for field in &mut entry.fields {
1548                field.value.expanded = Some(Cow::Owned(expand_value_with_options(
1549                    &field.value.value,
1550                    strings,
1551                    options,
1552                    &mut Vec::new(),
1553                )?));
1554            }
1555        }
1556        for preamble in &mut self.preambles {
1557            preamble.value.expanded = Some(Cow::Owned(expand_value_with_options(
1558                &preamble.value.value,
1559                strings,
1560                options,
1561                &mut Vec::new(),
1562            )?));
1563        }
1564        Ok(())
1565    }
1566
1567    pub(crate) fn recover_partial_entries(
1568        &mut self,
1569        source_map: &SourceMap<'a>,
1570        preserve_raw: bool,
1571    ) {
1572        let old_entries = std::mem::take(&mut self.entries);
1573        let old_failed_blocks = std::mem::take(&mut self.failed_blocks);
1574        let old_blocks = std::mem::take(&mut self.blocks);
1575        let mut new_entries = Vec::with_capacity(old_entries.len());
1576        let mut new_failed_blocks = Vec::new();
1577        let mut new_blocks = Vec::with_capacity(old_blocks.len());
1578
1579        for block in old_blocks {
1580            match block {
1581                ParsedBlock::Entry(index) => {
1582                    let new_index = new_entries.len();
1583                    if let Some(entry) = old_entries.get(index) {
1584                        new_entries.push(entry.clone());
1585                        new_blocks.push(ParsedBlock::Entry(new_index));
1586                    }
1587                }
1588                ParsedBlock::Failed(index) => {
1589                    let Some(failed) = old_failed_blocks.get(index) else {
1590                        continue;
1591                    };
1592                    let new_index = new_entries.len();
1593                    if let Some(partial) =
1594                        recover_partial_entry(failed, source_map, new_index, preserve_raw)
1595                    {
1596                        new_entries.push(partial);
1597                        new_blocks.push(ParsedBlock::Entry(new_index));
1598                    } else {
1599                        let failed_index = new_failed_blocks.len();
1600                        new_failed_blocks.push(failed.clone());
1601                        new_blocks.push(ParsedBlock::Failed(failed_index));
1602                    }
1603                }
1604                ParsedBlock::String(index) => new_blocks.push(ParsedBlock::String(index)),
1605                ParsedBlock::Preamble(index) => new_blocks.push(ParsedBlock::Preamble(index)),
1606                ParsedBlock::Comment(index) => new_blocks.push(ParsedBlock::Comment(index)),
1607            }
1608        }
1609
1610        self.entries = new_entries;
1611        self.failed_blocks = new_failed_blocks;
1612        self.blocks = new_blocks;
1613        self.rebuild_diagnostics_and_status();
1614    }
1615
1616    fn rebuild_diagnostics_and_status(&mut self) {
1617        self.diagnostics.clear();
1618        self.diagnostics.extend(
1619            self.entries
1620                .iter()
1621                .flat_map(|entry| entry.diagnostics.iter().cloned()),
1622        );
1623        self.diagnostics.extend(
1624            self.failed_blocks
1625                .iter()
1626                .flat_map(|failed| failed.diagnostics.iter().cloned()),
1627        );
1628
1629        self.status = if self.diagnostics.is_empty() {
1630            ParseStatus::Ok
1631        } else if self.entries.is_empty() && self.strings.is_empty() && self.preambles.is_empty() {
1632            ParseStatus::Failed
1633        } else {
1634            ParseStatus::Partial
1635        };
1636    }
1637
1638    pub(crate) fn failed_from_error(
1639        sources: Vec<ParsedSource<'a>>,
1640        source_map: &SourceMap<'a>,
1641        error: &crate::Error,
1642    ) -> Self {
1643        let (byte, message, fallback_snippet) = match error {
1644            crate::Error::ParseError {
1645                line,
1646                column,
1647                message,
1648                snippet,
1649            } => (
1650                source_map.byte_at_line_column(*line, *column).unwrap_or(0),
1651                message.clone(),
1652                snippet.clone(),
1653            ),
1654            other => (0, other.to_string(), None),
1655        };
1656        let raw = source_map.input().get(byte..).unwrap_or_default();
1657        let failed_source = source_map.span(byte, source_map.len());
1658        let failed = FailedBlock {
1659            raw: Cow::Borrowed(raw),
1660            error: message.clone(),
1661            source: Some(failed_source),
1662        };
1663        let diagnostic = diagnostic_for_raw_failure(
1664            0,
1665            raw,
1666            message,
1667            Some(failed_source),
1668            Some(source_map),
1669            byte,
1670            fallback_snippet,
1671        );
1672        let failed_block = ParsedFailedBlock {
1673            raw: failed.raw,
1674            error: failed.error,
1675            source: failed.source,
1676            diagnostics: vec![diagnostic.clone()],
1677        };
1678
1679        Self {
1680            library: Library::new(),
1681            sources,
1682            entries: Vec::new(),
1683            strings: Vec::new(),
1684            preambles: Vec::new(),
1685            comments: Vec::new(),
1686            failed_blocks: vec![failed_block],
1687            blocks: vec![ParsedBlock::Failed(0)],
1688            diagnostics: vec![diagnostic],
1689            status: ParseStatus::Failed,
1690        }
1691    }
1692
1693    /// Return the compact structured library view.
1694    #[must_use]
1695    pub const fn library(&self) -> &Library<'a> {
1696        &self.library
1697    }
1698
1699    /// Consume this document and return the compact structured library view.
1700    #[must_use]
1701    pub fn into_library(self) -> Library<'a> {
1702        self.library
1703    }
1704
1705    /// Return source metadata.
1706    #[must_use]
1707    pub fn sources(&self) -> &[ParsedSource<'a>] {
1708        &self.sources
1709    }
1710
1711    /// Return parsed entries.
1712    #[must_use]
1713    pub fn entries(&self) -> &[ParsedEntry<'a>] {
1714        &self.entries
1715    }
1716
1717    /// Return mutable parsed entries.
1718    #[must_use]
1719    pub fn entries_mut(&mut self) -> &mut [ParsedEntry<'a>] {
1720        &mut self.entries
1721    }
1722
1723    /// Append an entry at the end of the document block order.
1724    pub fn push_entry(&mut self, entry: ParsedEntry<'a>) {
1725        let index = self.entries.len();
1726        self.entries.push(entry);
1727        self.blocks.push(ParsedBlock::Entry(index));
1728    }
1729
1730    /// Return a mutable entry by citation key.
1731    #[must_use]
1732    pub fn entry_mut_by_key(&mut self, key: &str) -> Option<&mut ParsedEntry<'a>> {
1733        self.entries.iter_mut().find(|entry| entry.key == key)
1734    }
1735
1736    /// Rename a citation key.
1737    #[must_use]
1738    pub fn rename_key(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> bool {
1739        let Some(entry) = self.entry_mut_by_key(old) else {
1740            return false;
1741        };
1742        entry.rename_key(new);
1743        true
1744    }
1745
1746    /// Remove configured export-only fields from all entries.
1747    #[must_use]
1748    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
1749        self.entries
1750            .iter_mut()
1751            .map(|entry| entry.remove_export_fields(names))
1752            .sum()
1753    }
1754
1755    /// Return parsed string definitions.
1756    #[must_use]
1757    pub fn strings(&self) -> &[ParsedString<'a>] {
1758        &self.strings
1759    }
1760
1761    /// Return parsed preambles.
1762    #[must_use]
1763    pub fn preambles(&self) -> &[ParsedPreamble<'a>] {
1764        &self.preambles
1765    }
1766
1767    /// Return parsed comments.
1768    #[must_use]
1769    pub fn comments(&self) -> &[ParsedComment<'a>] {
1770        &self.comments
1771    }
1772
1773    /// Return failed blocks retained by tolerant parsing.
1774    #[must_use]
1775    pub fn failed_blocks(&self) -> &[ParsedFailedBlock<'a>] {
1776        &self.failed_blocks
1777    }
1778
1779    /// Return source-order blocks.
1780    #[must_use]
1781    pub fn blocks(&self) -> &[ParsedBlock] {
1782        &self.blocks
1783    }
1784
1785    /// Return document diagnostics.
1786    #[must_use]
1787    pub fn diagnostics(&self) -> &[Diagnostic] {
1788        &self.diagnostics
1789    }
1790
1791    /// Return the document parse status.
1792    #[must_use]
1793    pub const fn status(&self) -> ParseStatus {
1794        self.status
1795    }
1796
1797    /// Return summary counts for the parsed document.
1798    #[must_use]
1799    pub fn summary(&self) -> ParseSummary {
1800        let mut warnings = 0;
1801        let mut errors = 0;
1802        let mut infos = 0;
1803
1804        for diagnostic in &self.diagnostics {
1805            match diagnostic.severity {
1806                DiagnosticSeverity::Error => errors += 1,
1807                DiagnosticSeverity::Warning => warnings += 1,
1808                DiagnosticSeverity::Info => infos += 1,
1809            }
1810        }
1811
1812        ParseSummary {
1813            status: self.status,
1814            entries: self.entries.len(),
1815            warnings,
1816            errors,
1817            infos,
1818            failed_blocks: self.failed_blocks.len(),
1819            recovered_blocks: self
1820                .entries
1821                .iter()
1822                .filter(|entry| entry.status == ParsedEntryStatus::Partial)
1823                .count(),
1824        }
1825    }
1826
1827    /// Expand a parsed value using this document's string definitions.
1828    ///
1829    /// This allocates the expanded text. The structured value itself remains
1830    /// unchanged, and unresolved-variable behavior follows `options`.
1831    pub fn expand_value(
1832        &self,
1833        value: &Value<'a>,
1834        options: ExpansionOptions,
1835    ) -> crate::Result<String> {
1836        expand_value_with_options(value, &self.strings, options, &mut Vec::new())
1837    }
1838
1839    /// Convert this parsed document into an owned value.
1840    #[must_use]
1841    pub fn into_owned(self) -> ParsedDocument<'static> {
1842        ParsedDocument {
1843            library: self.library.into_owned(),
1844            sources: self
1845                .sources
1846                .into_iter()
1847                .map(ParsedSource::into_owned)
1848                .collect(),
1849            entries: self
1850                .entries
1851                .into_iter()
1852                .map(ParsedEntry::into_owned)
1853                .collect(),
1854            strings: self
1855                .strings
1856                .into_iter()
1857                .map(ParsedString::into_owned)
1858                .collect(),
1859            preambles: self
1860                .preambles
1861                .into_iter()
1862                .map(ParsedPreamble::into_owned)
1863                .collect(),
1864            comments: self
1865                .comments
1866                .into_iter()
1867                .map(ParsedComment::into_owned)
1868                .collect(),
1869            failed_blocks: self
1870                .failed_blocks
1871                .into_iter()
1872                .map(ParsedFailedBlock::into_owned)
1873                .collect(),
1874            blocks: self.blocks,
1875            diagnostics: self.diagnostics,
1876            status: self.status,
1877        }
1878    }
1879}
1880
1881impl ParsedDocument<'static> {
1882    pub(crate) fn apply_raw_from_source(&mut self, source: &str) {
1883        for entry in &mut self.entries {
1884            if entry.raw.is_none() {
1885                entry.raw = owned_source_slice(source, entry.source);
1886            }
1887            for field in &mut entry.fields {
1888                if field.raw.is_none() {
1889                    field.raw = owned_source_slice(source, field.source);
1890                }
1891                if field.value.raw.is_none() {
1892                    field.value.raw = owned_source_slice(source, field.value_source);
1893                }
1894            }
1895        }
1896
1897        for string in &mut self.strings {
1898            if string.raw.is_none() {
1899                string.raw = owned_source_slice(source, string.source);
1900            }
1901        }
1902        for preamble in &mut self.preambles {
1903            if preamble.raw.is_none() {
1904                preamble.raw = owned_source_slice(source, preamble.source);
1905            }
1906        }
1907        for comment in &mut self.comments {
1908            if comment.raw.is_none() {
1909                comment.raw = owned_source_slice(source, comment.source);
1910            }
1911        }
1912    }
1913}
1914
1915fn owned_source_slice(source: &str, span: Option<SourceSpan>) -> Option<Cow<'static, str>> {
1916    let span = span?;
1917    source
1918        .get(span.byte_start..span.byte_end)
1919        .map(|raw| Cow::Owned(raw.to_string()))
1920}
1921
1922fn expand_value_with_options(
1923    value: &Value<'_>,
1924    strings: &[ParsedString<'_>],
1925    options: ExpansionOptions,
1926    stack: &mut Vec<String>,
1927) -> crate::Result<String> {
1928    match value {
1929        Value::Literal(text) => Ok(normalize_text_projection(text)),
1930        Value::Number(number) => Ok(number.to_string()),
1931        Value::Concat(parts) => {
1932            let mut expanded = String::new();
1933            for part in parts.iter() {
1934                expanded.push_str(&expand_value_with_options(part, strings, options, stack)?);
1935            }
1936            Ok(expanded)
1937        }
1938        Value::Variable(name) => expand_variable(name, strings, options, stack),
1939    }
1940}
1941
1942fn expand_variable(
1943    name: &str,
1944    strings: &[ParsedString<'_>],
1945    options: ExpansionOptions,
1946    stack: &mut Vec<String>,
1947) -> crate::Result<String> {
1948    if options.expand_strings {
1949        if let Some(definition) = strings
1950            .iter()
1951            .rev()
1952            .find(|definition| definition.name.as_ref() == name)
1953        {
1954            if stack.iter().any(|active| active == name) {
1955                return Err(crate::Error::CircularReference(name.to_string()));
1956            }
1957            stack.push(name.to_string());
1958            let expanded =
1959                expand_value_with_options(&definition.value.value, strings, options, stack);
1960            stack.pop();
1961            return expanded;
1962        }
1963    }
1964
1965    if options.expand_months {
1966        if let Some(month) = month_expansion(name) {
1967            return Ok(month.to_string());
1968        }
1969    }
1970
1971    match options.unresolved_variables {
1972        UnresolvedVariablePolicy::Preserve => Ok(name.to_string()),
1973        UnresolvedVariablePolicy::Placeholder => Ok(format!("{{undefined:{name}}}")),
1974        UnresolvedVariablePolicy::Error => Err(crate::Error::UndefinedVariable(name.to_string())),
1975    }
1976}
1977
1978fn month_expansion(name: &str) -> Option<&'static str> {
1979    if name.len() != 3 {
1980        return None;
1981    }
1982
1983    match name.to_ascii_lowercase().as_str() {
1984        "jan" => Some("January"),
1985        "feb" => Some("February"),
1986        "mar" => Some("March"),
1987        "apr" => Some("April"),
1988        "may" => Some("May"),
1989        "jun" => Some("June"),
1990        "jul" => Some("July"),
1991        "aug" => Some("August"),
1992        "sep" => Some("September"),
1993        "oct" => Some("October"),
1994        "nov" => Some("November"),
1995        "dec" => Some("December"),
1996        _ => None,
1997    }
1998}
1999
2000#[derive(Debug, Clone)]
2001struct EntryLocations {
2002    entry_type: (usize, usize),
2003    key: (usize, usize),
2004    delimiter: EntryDelimiter,
2005    fields: Vec<FieldLocations>,
2006}
2007
2008#[derive(Debug, Clone, Copy)]
2009struct FieldLocations {
2010    whole: (usize, usize),
2011    name: (usize, usize),
2012    value: (usize, usize),
2013    value_delimiter: ValueDelimiter,
2014}
2015
2016#[derive(Debug, Clone)]
2017struct FailureClassification {
2018    code: DiagnosticCode,
2019    range: (usize, usize),
2020}
2021
2022fn diagnostic_for_failed_block(
2023    index: usize,
2024    failed: &FailedBlock<'_>,
2025    source_map: Option<&SourceMap<'_>>,
2026) -> Diagnostic {
2027    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2028    diagnostic_for_raw_failure(
2029        index,
2030        &failed.raw,
2031        failed.error.clone(),
2032        failed.source,
2033        source_map,
2034        absolute_start,
2035        None,
2036    )
2037}
2038
2039fn diagnostic_for_raw_failure(
2040    index: usize,
2041    raw: &str,
2042    fallback_message: String,
2043    fallback_source: Option<SourceSpan>,
2044    source_map: Option<&SourceMap<'_>>,
2045    absolute_start: usize,
2046    fallback_snippet: Option<String>,
2047) -> Diagnostic {
2048    let classification = classify_failure(raw);
2049    let source = source_map
2050        .map(|map| {
2051            map.span(
2052                absolute_start + classification.range.0,
2053                absolute_start + classification.range.1,
2054            )
2055        })
2056        .or(fallback_source);
2057    let snippet = source
2058        .and_then(|span| source_map.and_then(|map| map.snippet(span, 160)))
2059        .or(fallback_snippet)
2060        .or_else(|| Some(raw.chars().take(160).collect()));
2061
2062    let mut diagnostic = Diagnostic::error(
2063        classification.code.clone(),
2064        diagnostic_message(&classification.code, fallback_message),
2065        DiagnosticTarget::FailedBlock(index),
2066        source,
2067    );
2068    diagnostic.snippet = snippet;
2069    diagnostic
2070}
2071
2072fn recover_partial_entry<'a>(
2073    failed: &ParsedFailedBlock<'a>,
2074    source_map: &SourceMap<'a>,
2075    entry_index: usize,
2076    preserve_raw: bool,
2077) -> Option<ParsedEntry<'a>> {
2078    let raw: &'a str = match &failed.raw {
2079        Cow::Borrowed(raw) => raw,
2080        Cow::Owned(_) => return None,
2081    };
2082    let absolute_start = failed.source?.byte_start;
2083    let header = parse_partial_header(raw, source_map, absolute_start)?;
2084    let fields = recover_partial_fields(
2085        raw,
2086        source_map,
2087        absolute_start,
2088        header.field_start,
2089        header.closing,
2090        preserve_raw,
2091    );
2092    if fields.is_empty() {
2093        return None;
2094    }
2095
2096    let diagnostic = diagnostic_for_partial_entry(entry_index, failed, source_map);
2097
2098    Some(ParsedEntry {
2099        ty: header.ty,
2100        key: header.key,
2101        fields,
2102        status: ParsedEntryStatus::Partial,
2103        source: failed.source,
2104        entry_type_source: header.entry_type_source,
2105        key_source: header.key_source,
2106        delimiter: Some(header.delimiter),
2107        raw: preserve_raw.then(|| failed.raw.clone()),
2108        removed_field_sources: None,
2109        diagnostics: vec![diagnostic],
2110    })
2111}
2112
2113pub(crate) fn recover_partial_stream_entry<'a>(
2114    failed: &ParsedFailedBlock<'a>,
2115    source_map: &SourceMap<'a>,
2116    entry_index: usize,
2117    preserve_raw: bool,
2118) -> Option<ParsedEntry<'a>> {
2119    recover_partial_entry(failed, source_map, entry_index, preserve_raw)
2120}
2121
2122struct PartialHeader<'a> {
2123    ty: EntryType<'a>,
2124    key: Cow<'a, str>,
2125    entry_type_source: Option<SourceSpan>,
2126    key_source: Option<SourceSpan>,
2127    delimiter: EntryDelimiter,
2128    field_start: usize,
2129    closing: u8,
2130}
2131
2132fn parse_partial_header<'a>(
2133    raw: &'a str,
2134    source_map: &SourceMap<'a>,
2135    absolute_start: usize,
2136) -> Option<PartialHeader<'a>> {
2137    let bytes = raw.as_bytes();
2138    let mut pos = bytes.iter().position(|byte| *byte == b'@')? + 1;
2139
2140    let entry_type_start = pos;
2141    pos += scan_identifier(&bytes[pos..]);
2142    if pos == entry_type_start {
2143        return None;
2144    }
2145    let ty = EntryType::parse(&raw[entry_type_start..pos]);
2146    let entry_type_source =
2147        Some(source_map.span(absolute_start + entry_type_start, absolute_start + pos));
2148
2149    pos = skip_ascii_whitespace(bytes, pos);
2150    let (delimiter, closing) = match *bytes.get(pos)? {
2151        b'{' => (EntryDelimiter::Braces, b'}'),
2152        b'(' => (EntryDelimiter::Parentheses, b')'),
2153        _ => return None,
2154    };
2155    pos += 1;
2156    pos = skip_ascii_whitespace(bytes, pos);
2157
2158    let key_start = pos;
2159    pos += scan_identifier(&bytes[pos..]);
2160    if pos == key_start {
2161        return None;
2162    }
2163    let key = Cow::Borrowed(&raw[key_start..pos]);
2164    let key_source = Some(source_map.span(absolute_start + key_start, absolute_start + pos));
2165
2166    pos = skip_ascii_whitespace(bytes, pos);
2167    if bytes.get(pos) != Some(&b',') {
2168        return None;
2169    }
2170
2171    Some(PartialHeader {
2172        ty,
2173        key,
2174        entry_type_source,
2175        key_source,
2176        delimiter,
2177        field_start: pos + 1,
2178        closing,
2179    })
2180}
2181
2182fn recover_partial_fields<'a>(
2183    raw: &'a str,
2184    source_map: &SourceMap<'a>,
2185    absolute_start: usize,
2186    mut pos: usize,
2187    closing: u8,
2188    preserve_raw: bool,
2189) -> Vec<ParsedField<'a>> {
2190    let bytes = raw.as_bytes();
2191    let mut fields = Vec::new();
2192
2193    loop {
2194        pos = skip_ascii_whitespace(bytes, pos);
2195        let Some(&byte) = bytes.get(pos) else {
2196            break;
2197        };
2198        if byte == closing || byte == b'@' {
2199            break;
2200        }
2201
2202        let field_start = pos;
2203        let name_start = pos;
2204        pos += scan_identifier(&bytes[pos..]);
2205        if pos == name_start {
2206            break;
2207        }
2208        let name_end = pos;
2209        let name = Cow::Borrowed(&raw[name_start..name_end]);
2210
2211        pos = skip_ascii_whitespace(bytes, pos);
2212        if bytes.get(pos) != Some(&b'=') {
2213            break;
2214        }
2215        pos += 1;
2216        pos = skip_ascii_whitespace(bytes, pos);
2217
2218        let value_start = pos;
2219        let tail = &raw[value_start..];
2220        let mut value_input = tail;
2221        let Ok(value) = crate::parser::value::parse_value_field(&mut value_input) else {
2222            break;
2223        };
2224        let consumed = tail.len() - value_input.len();
2225        let value_end = trim_ascii_whitespace_end(bytes, value_start, value_start + consumed);
2226        let boundary = value_start + consumed;
2227        let field_end = match bytes.get(boundary) {
2228            Some(b',') => boundary + 1,
2229            Some(byte) if *byte == closing => boundary,
2230            Some(_) | None => boundary,
2231        };
2232
2233        let field_source =
2234            source_map.span(absolute_start + field_start, absolute_start + field_end);
2235        let value_source =
2236            source_map.span(absolute_start + value_start, absolute_start + value_end);
2237        fields.push(ParsedField {
2238            name,
2239            value: ParsedValue {
2240                value,
2241                raw: preserve_raw.then(|| Cow::Borrowed(&raw[value_start..value_end])),
2242                source: Some(value_source),
2243                expanded: None,
2244                delimiter: Some(value_delimiter(&raw[value_start..value_end])),
2245            },
2246            raw: preserve_raw.then(|| Cow::Borrowed(&raw[field_start..field_end])),
2247            source: Some(field_source),
2248            name_source: Some(
2249                source_map.span(absolute_start + name_start, absolute_start + name_end),
2250            ),
2251            value_source: Some(value_source),
2252        });
2253
2254        match bytes.get(boundary) {
2255            Some(b',') => pos = boundary + 1,
2256            Some(byte) if *byte == closing => break,
2257            _ => break,
2258        }
2259    }
2260
2261    fields
2262}
2263
2264fn diagnostic_for_partial_entry(
2265    entry_index: usize,
2266    failed: &ParsedFailedBlock<'_>,
2267    source_map: &SourceMap<'_>,
2268) -> Diagnostic {
2269    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2270    let mut diagnostic = diagnostic_for_raw_failure(
2271        entry_index,
2272        &failed.raw,
2273        failed.error.clone(),
2274        failed.source,
2275        Some(source_map),
2276        absolute_start,
2277        None,
2278    );
2279    diagnostic.target = DiagnosticTarget::Entry(entry_index);
2280    diagnostic
2281}
2282
2283fn diagnostic_message(code: &DiagnosticCode, fallback: String) -> String {
2284    match code.as_str() {
2285        "missing-entry-key" => "missing citation key".to_string(),
2286        "missing-field-separator" => "missing field separator".to_string(),
2287        "expected-field-name" => "expected field name".to_string(),
2288        "empty-field-value" => "empty field value".to_string(),
2289        "expected-value-atom" => "expected value atom".to_string(),
2290        "bad-field-boundary" => "expected comma or entry close after field value".to_string(),
2291        "bad-value-boundary" => "expected value after concatenation operator".to_string(),
2292        "unclosed-entry" => "entry ended before its closing delimiter".to_string(),
2293        "unclosed-braced-value" => "braced value ended before its closing brace".to_string(),
2294        "unclosed-quoted-value" => "quoted value ended before its closing quote".to_string(),
2295        _ => fallback,
2296    }
2297}
2298
2299fn classify_failure(raw: &str) -> FailureClassification {
2300    classify_failure_inner(raw).unwrap_or_else(|| FailureClassification {
2301        code: DiagnosticCode::PARSE_ERROR,
2302        range: empty_range(0),
2303    })
2304}
2305
2306fn classify_failure_inner(raw: &str) -> Option<FailureClassification> {
2307    let bytes = raw.as_bytes();
2308    let header = match parse_failure_header(bytes)? {
2309        Ok(header) => header,
2310        Err(classification) => return Some(classification),
2311    };
2312
2313    classify_failure_fields(bytes, header.pos, header.closing)
2314}
2315
2316#[derive(Debug, Clone, Copy)]
2317struct FailureHeader {
2318    pos: usize,
2319    closing: u8,
2320}
2321
2322fn parse_failure_header(bytes: &[u8]) -> Option<Result<FailureHeader, FailureClassification>> {
2323    let mut pos = bytes.iter().position(|byte| *byte == b'@')?;
2324    pos += 1;
2325    pos += scan_identifier(&bytes[pos..]);
2326    pos = skip_ascii_whitespace(bytes, pos);
2327
2328    let opening = *bytes.get(pos)?;
2329    let closing = match opening {
2330        b'{' => b'}',
2331        b'(' => b')',
2332        _ => {
2333            return Some(Err(classification(
2334                DiagnosticCode::UNCLOSED_ENTRY,
2335                pos,
2336                bytes.len(),
2337            )));
2338        }
2339    };
2340    pos += 1;
2341    pos = skip_ascii_whitespace(bytes, pos);
2342
2343    let key_len = scan_identifier(&bytes[pos..]);
2344    if key_len == 0 {
2345        return Some(Err(classification(
2346            DiagnosticCode::MISSING_ENTRY_KEY,
2347            pos,
2348            bytes.len(),
2349        )));
2350    }
2351    pos += key_len;
2352    pos = skip_ascii_whitespace(bytes, pos);
2353    if bytes.get(pos) != Some(&b',') {
2354        return Some(Err(classification(
2355            DiagnosticCode::MISSING_FIELD_SEPARATOR,
2356            pos,
2357            bytes.len(),
2358        )));
2359    }
2360    pos += 1;
2361
2362    Some(Ok(FailureHeader { pos, closing }))
2363}
2364
2365fn classify_failure_fields(
2366    bytes: &[u8],
2367    mut pos: usize,
2368    closing: u8,
2369) -> Option<FailureClassification> {
2370    loop {
2371        pos = skip_ascii_whitespace(bytes, pos);
2372        let Some(&byte) = bytes.get(pos) else {
2373            return Some(classification(
2374                DiagnosticCode::UNCLOSED_ENTRY,
2375                pos,
2376                bytes.len(),
2377            ));
2378        };
2379        if byte == closing {
2380            return None;
2381        }
2382        if byte == b'@' {
2383            return Some(classification(
2384                DiagnosticCode::UNCLOSED_ENTRY,
2385                pos,
2386                bytes.len(),
2387            ));
2388        }
2389
2390        let field_name_len = scan_identifier(&bytes[pos..]);
2391        if field_name_len == 0 {
2392            return Some(classification(
2393                DiagnosticCode::EXPECTED_FIELD_NAME,
2394                pos,
2395                bytes.len(),
2396            ));
2397        }
2398        pos += field_name_len;
2399        pos = skip_ascii_whitespace(bytes, pos);
2400        if bytes.get(pos) != Some(&b'=') {
2401            return Some(classification(
2402                DiagnosticCode::MISSING_FIELD_SEPARATOR,
2403                pos,
2404                bytes.len(),
2405            ));
2406        }
2407        pos += 1;
2408        pos = skip_ascii_whitespace(bytes, pos);
2409
2410        let Some(&value_start) = bytes.get(pos) else {
2411            return Some(classification(
2412                DiagnosticCode::EMPTY_FIELD_VALUE,
2413                pos,
2414                bytes.len(),
2415            ));
2416        };
2417        if value_start == b',' || value_start == closing {
2418            return Some(classification(
2419                DiagnosticCode::EMPTY_FIELD_VALUE,
2420                pos,
2421                bytes.len(),
2422            ));
2423        }
2424        if value_start == b'#' {
2425            return Some(classification(
2426                DiagnosticCode::EXPECTED_VALUE_ATOM,
2427                pos,
2428                bytes.len(),
2429            ));
2430        }
2431
2432        match scan_value_sequence(bytes, pos, closing) {
2433            Ok(next_pos) => pos = next_pos,
2434            Err(classification) => return Some(classification),
2435        }
2436    }
2437}
2438
2439fn scan_value_sequence(
2440    bytes: &[u8],
2441    mut pos: usize,
2442    closing: u8,
2443) -> Result<usize, FailureClassification> {
2444    loop {
2445        pos = skip_ascii_whitespace(bytes, pos);
2446        let atom_start = pos;
2447        let Some(&byte) = bytes.get(pos) else {
2448            return Err(classification(
2449                DiagnosticCode::EXPECTED_VALUE_ATOM,
2450                pos,
2451                bytes.len(),
2452            ));
2453        };
2454
2455        match byte {
2456            b'"' => {
2457                pos = skip_quoted_checked(bytes, pos + 1).ok_or_else(|| {
2458                    classification(
2459                        DiagnosticCode::UNCLOSED_QUOTED_VALUE,
2460                        atom_start,
2461                        bytes.len(),
2462                    )
2463                })?;
2464            }
2465            b'{' => {
2466                pos = skip_braced_checked(bytes, pos + 1).ok_or_else(|| {
2467                    classification(
2468                        DiagnosticCode::UNCLOSED_BRACED_VALUE,
2469                        atom_start,
2470                        bytes.len(),
2471                    )
2472                })?;
2473            }
2474            b',' => {
2475                return Err(classification(
2476                    DiagnosticCode::EMPTY_FIELD_VALUE,
2477                    pos,
2478                    bytes.len(),
2479                ));
2480            }
2481            b if b == closing => {
2482                return Err(classification(
2483                    DiagnosticCode::EMPTY_FIELD_VALUE,
2484                    pos,
2485                    bytes.len(),
2486                ));
2487            }
2488            b'#' => {
2489                return Err(classification(
2490                    DiagnosticCode::EXPECTED_VALUE_ATOM,
2491                    pos,
2492                    bytes.len(),
2493                ));
2494            }
2495            _ => {
2496                let identifier_len = scan_identifier(&bytes[pos..]);
2497                if identifier_len == 0 {
2498                    return Err(classification(
2499                        DiagnosticCode::EXPECTED_VALUE_ATOM,
2500                        pos,
2501                        bytes.len(),
2502                    ));
2503                }
2504                pos += identifier_len;
2505            }
2506        }
2507
2508        pos = skip_ascii_whitespace(bytes, pos);
2509        let Some(&boundary) = bytes.get(pos) else {
2510            return Err(classification(
2511                DiagnosticCode::UNCLOSED_ENTRY,
2512                pos,
2513                bytes.len(),
2514            ));
2515        };
2516
2517        match boundary {
2518            b'#' => {
2519                let hash = pos;
2520                pos += 1;
2521                pos = skip_ascii_whitespace(bytes, pos);
2522                if matches!(bytes.get(pos), None | Some(b',' | b'#'))
2523                    || bytes.get(pos) == Some(&closing)
2524                {
2525                    return Err(classification(
2526                        DiagnosticCode::BAD_VALUE_BOUNDARY,
2527                        hash,
2528                        bytes.len(),
2529                    ));
2530                }
2531            }
2532            b',' => return Ok(pos + 1),
2533            b if b == closing => return Ok(pos),
2534            _ => {
2535                return Err(classification(
2536                    DiagnosticCode::BAD_FIELD_BOUNDARY,
2537                    pos,
2538                    bytes.len(),
2539                ));
2540            }
2541        }
2542    }
2543}
2544
2545fn classification(code: DiagnosticCode, pos: usize, len: usize) -> FailureClassification {
2546    FailureClassification {
2547        code,
2548        range: single_byte_range(pos, len),
2549    }
2550}
2551
2552const fn empty_range(pos: usize) -> (usize, usize) {
2553    (pos, pos)
2554}
2555
2556fn single_byte_range(pos: usize, len: usize) -> (usize, usize) {
2557    let start = pos.min(len);
2558    (start, (start + 1).min(len))
2559}
2560
2561fn locate_entry(raw: &str, absolute_start: usize, field_count: usize) -> Option<EntryLocations> {
2562    let bytes = raw.as_bytes();
2563    let mut pos = 0;
2564    if bytes.get(pos) != Some(&b'@') {
2565        return None;
2566    }
2567    pos += 1;
2568
2569    let entry_type_start = pos;
2570    pos += scan_identifier(&bytes[pos..]);
2571    if pos == entry_type_start {
2572        return None;
2573    }
2574    let entry_type = (absolute_start + entry_type_start, absolute_start + pos);
2575
2576    pos = skip_ascii_whitespace(bytes, pos);
2577    let opening = *bytes.get(pos)?;
2578    let (delimiter, closing) = match opening {
2579        b'{' => (EntryDelimiter::Braces, b'}'),
2580        b'(' => (EntryDelimiter::Parentheses, b')'),
2581        _ => return None,
2582    };
2583    pos += 1;
2584    pos = skip_ascii_whitespace(bytes, pos);
2585
2586    let key_start = pos;
2587    pos += scan_identifier(&bytes[pos..]);
2588    if pos == key_start {
2589        return None;
2590    }
2591    let key = (absolute_start + key_start, absolute_start + pos);
2592
2593    pos = skip_ascii_whitespace(bytes, pos);
2594    if bytes.get(pos) != Some(&b',') {
2595        return Some(EntryLocations {
2596            entry_type,
2597            key,
2598            delimiter,
2599            fields: Vec::new(),
2600        });
2601    }
2602    pos += 1;
2603
2604    let mut fields = Vec::with_capacity(field_count);
2605    while fields.len() < field_count {
2606        pos = skip_ascii_whitespace(bytes, pos);
2607        if bytes.get(pos) == Some(&closing) || pos >= bytes.len() {
2608            break;
2609        }
2610
2611        let field_start = pos;
2612        let name_start = pos;
2613        pos += scan_identifier(&bytes[pos..]);
2614        if pos == name_start {
2615            break;
2616        }
2617        let name_end = pos;
2618
2619        pos = skip_ascii_whitespace(bytes, pos);
2620        if bytes.get(pos) != Some(&b'=') {
2621            break;
2622        }
2623        pos += 1;
2624        pos = skip_ascii_whitespace(bytes, pos);
2625
2626        let value_start = pos;
2627        let boundary = find_value_boundary(bytes, pos, closing);
2628        let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2629        let mut whole_end = value_end;
2630        pos = boundary;
2631        if bytes.get(pos) == Some(&b',') {
2632            whole_end = pos + 1;
2633            pos += 1;
2634        }
2635
2636        fields.push(FieldLocations {
2637            whole: (absolute_start + field_start, absolute_start + whole_end),
2638            name: (absolute_start + name_start, absolute_start + name_end),
2639            value: (absolute_start + value_start, absolute_start + value_end),
2640            value_delimiter: value_delimiter(&raw[value_start..value_end]),
2641        });
2642    }
2643
2644    Some(EntryLocations {
2645        entry_type,
2646        key,
2647        delimiter,
2648        fields,
2649    })
2650}
2651
2652fn value_delimiter(raw_value: &str) -> ValueDelimiter {
2653    let trimmed = raw_value.trim_start();
2654    if has_top_level_concat(trimmed.as_bytes()) {
2655        return ValueDelimiter::Concatenation;
2656    }
2657
2658    match trimmed.as_bytes().first() {
2659        Some(b'{') => ValueDelimiter::Braces,
2660        Some(b'"') => ValueDelimiter::Quotes,
2661        _ => ValueDelimiter::Bare,
2662    }
2663}
2664
2665fn locate_definition_value(raw: &str) -> Option<&str> {
2666    let bytes = raw.as_bytes();
2667    let equals = bytes.iter().position(|byte| *byte == b'=')?;
2668    let value_start = skip_ascii_whitespace(bytes, equals + 1);
2669    let closing = enclosing_close_byte(bytes)?;
2670    let boundary = find_value_boundary(bytes, value_start, closing);
2671    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2672    raw.get(value_start..value_end)
2673}
2674
2675fn locate_preamble_value(raw: &str) -> Option<&str> {
2676    let bytes = raw.as_bytes();
2677    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2678    let closing = match bytes[opening] {
2679        b'{' => b'}',
2680        b'(' => b')',
2681        _ => return None,
2682    };
2683    let value_start = skip_ascii_whitespace(bytes, opening + 1);
2684    let boundary = find_value_boundary(bytes, value_start, closing);
2685    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2686    raw.get(value_start..value_end)
2687}
2688
2689fn enclosing_close_byte(bytes: &[u8]) -> Option<u8> {
2690    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2691    match bytes[opening] {
2692        b'{' => Some(b'}'),
2693        b'(' => Some(b')'),
2694        _ => None,
2695    }
2696}
2697
2698fn has_top_level_concat(bytes: &[u8]) -> bool {
2699    let mut pos = 0;
2700    while let Some(&byte) = bytes.get(pos) {
2701        match byte {
2702            b'{' => pos = skip_braced(bytes, pos + 1),
2703            b'"' => pos = skip_quoted(bytes, pos + 1),
2704            b'#' => return true,
2705            _ => pos += 1,
2706        }
2707    }
2708    false
2709}
2710
2711fn skip_ascii_whitespace(bytes: &[u8], mut pos: usize) -> usize {
2712    while matches!(bytes.get(pos), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2713        pos += 1;
2714    }
2715    pos
2716}
2717
2718fn trim_ascii_whitespace_end(bytes: &[u8], start: usize, mut end: usize) -> usize {
2719    while end > start && matches!(bytes.get(end - 1), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2720        end -= 1;
2721    }
2722    end
2723}
2724
2725fn scan_identifier(bytes: &[u8]) -> usize {
2726    bytes
2727        .iter()
2728        .position(|byte| !is_identifier_byte(*byte))
2729        .unwrap_or(bytes.len())
2730}
2731
2732const fn is_identifier_byte(byte: u8) -> bool {
2733    matches!(
2734        byte,
2735        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
2736    )
2737}
2738
2739fn find_value_boundary(bytes: &[u8], mut pos: usize, closing: u8) -> usize {
2740    while let Some(&byte) = bytes.get(pos) {
2741        match byte {
2742            b'{' => pos = skip_braced(bytes, pos + 1),
2743            b'"' => pos = skip_quoted(bytes, pos + 1),
2744            b',' => break,
2745            b if b == closing => break,
2746            _ => pos += 1,
2747        }
2748    }
2749    pos
2750}
2751
2752fn skip_braced(bytes: &[u8], mut pos: usize) -> usize {
2753    let mut depth = 0usize;
2754    while let Some(&byte) = bytes.get(pos) {
2755        match byte {
2756            b'\\' => pos = (pos + 2).min(bytes.len()),
2757            b'{' => {
2758                depth += 1;
2759                pos += 1;
2760            }
2761            b'}' if depth == 0 => return pos + 1,
2762            b'}' => {
2763                depth -= 1;
2764                pos += 1;
2765            }
2766            _ => pos += 1,
2767        }
2768    }
2769    pos
2770}
2771
2772fn skip_braced_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2773    let mut depth = 0usize;
2774    while let Some(&byte) = bytes.get(pos) {
2775        match byte {
2776            b'\\' => pos = (pos + 2).min(bytes.len()),
2777            b'{' => {
2778                depth += 1;
2779                pos += 1;
2780            }
2781            b'}' if depth == 0 => return Some(pos + 1),
2782            b'}' => {
2783                depth -= 1;
2784                pos += 1;
2785            }
2786            _ => pos += 1,
2787        }
2788    }
2789    None
2790}
2791
2792fn skip_quoted(bytes: &[u8], mut pos: usize) -> usize {
2793    while let Some(&byte) = bytes.get(pos) {
2794        match byte {
2795            b'\\' => pos = (pos + 2).min(bytes.len()),
2796            b'"' => return pos + 1,
2797            _ => pos += 1,
2798        }
2799    }
2800    pos
2801}
2802
2803fn skip_quoted_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2804    while let Some(&byte) = bytes.get(pos) {
2805        match byte {
2806            b'\\' => pos = (pos + 2).min(bytes.len()),
2807            b'"' => return Some(pos + 1),
2808            _ => pos += 1,
2809        }
2810    }
2811    None
2812}