Skip to main content

bibtex_parser/
document.rs

1//! Parsed bibliography model with source metadata.
2//!
3//! [`Library`] is the compact API for bibliography data. [`ParsedDocument`]
4//! contains source-order blocks, per-item metadata, retained raw text,
5//! diagnostics, and partial parse results.
6
7use crate::library::BlockKind;
8use crate::library::RawBuildItem;
9use crate::model::normalize_text_projection;
10use crate::{
11    normalize_doi, Comment, DateParseError, DateParts, Entry, EntryType, FailedBlock, Field,
12    Library, PersonName, Preamble, ResourceField, SourceId, SourceMap, SourceSpan,
13    StringDefinition, Value,
14};
15use std::borrow::Cow;
16use std::fmt;
17
18/// Parse status for a parsed bibliography document.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ParseStatus {
21    /// The document parsed without diagnostics that affect recovered content.
22    Ok,
23    /// The document contains useful parsed data plus recovered or failed blocks.
24    Partial,
25    /// The document could not produce meaningful bibliography data.
26    Failed,
27}
28
29/// Diagnostic severity.
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum DiagnosticSeverity {
32    /// A problem that prevents some requested parse result from being valid.
33    Error,
34    /// A recoverable problem that callers may want to show or test.
35    Warning,
36    /// Additional parse information that is not itself a problem.
37    Info,
38}
39
40/// Stable machine-readable diagnostic code.
41///
42/// The initial parser diagnostic codes are:
43/// `missing-entry-key`, `missing-field-separator`, `expected-field-name`,
44/// `empty-field-value`, `expected-value-atom`, `bad-field-boundary`,
45/// `bad-value-boundary`, `unclosed-entry`, `unclosed-braced-value`, and
46/// `unclosed-quoted-value`.
47#[derive(Debug, Clone, PartialEq, Eq, Hash)]
48pub struct DiagnosticCode(Cow<'static, str>);
49
50impl DiagnosticCode {
51    /// Generic parse error code used before finer-grained recovery classifies a failure.
52    pub const PARSE_ERROR: Self = Self(Cow::Borrowed("parse-error"));
53    /// Entry body did not contain a citation key.
54    pub const MISSING_ENTRY_KEY: Self = Self(Cow::Borrowed("missing-entry-key"));
55    /// Expected a comma after an entry key or `=` after a field name.
56    pub const MISSING_FIELD_SEPARATOR: Self = Self(Cow::Borrowed("missing-field-separator"));
57    /// Expected a field name inside an entry body.
58    pub const EXPECTED_FIELD_NAME: Self = Self(Cow::Borrowed("expected-field-name"));
59    /// Field separator was present but no value was provided.
60    pub const EMPTY_FIELD_VALUE: Self = Self(Cow::Borrowed("empty-field-value"));
61    /// Expected a literal, number, variable, quoted value, or braced value.
62    pub const EXPECTED_VALUE_ATOM: Self = Self(Cow::Borrowed("expected-value-atom"));
63    /// Expected a comma or entry close after a field value.
64    pub const BAD_FIELD_BOUNDARY: Self = Self(Cow::Borrowed("bad-field-boundary"));
65    /// Expected a value atom after a concatenation operator.
66    pub const BAD_VALUE_BOUNDARY: Self = Self(Cow::Borrowed("bad-value-boundary"));
67    /// Entry ended before its closing delimiter was found.
68    pub const UNCLOSED_ENTRY: Self = Self(Cow::Borrowed("unclosed-entry"));
69    /// Braced field value ended before its closing brace was found.
70    pub const UNCLOSED_BRACED_VALUE: Self = Self(Cow::Borrowed("unclosed-braced-value"));
71    /// Quoted field value ended before its closing quote was found.
72    pub const UNCLOSED_QUOTED_VALUE: Self = Self(Cow::Borrowed("unclosed-quoted-value"));
73
74    /// Create a borrowed static diagnostic code.
75    #[must_use]
76    pub const fn borrowed(code: &'static str) -> Self {
77        Self(Cow::Borrowed(code))
78    }
79
80    /// Create an owned diagnostic code.
81    #[must_use]
82    pub fn custom(code: impl Into<String>) -> Self {
83        Self(Cow::Owned(code.into()))
84    }
85
86    /// Return the diagnostic code as a string.
87    #[must_use]
88    pub fn as_str(&self) -> &str {
89        &self.0
90    }
91}
92
93impl fmt::Display for DiagnosticCode {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        f.write_str(self.as_str())
96    }
97}
98
99/// Location target for a diagnostic.
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub enum DiagnosticTarget {
102    /// The whole input file or source.
103    File,
104    /// A source-order block by index.
105    Block(usize),
106    /// An entry by parsed-entry index.
107    Entry(usize),
108    /// A field by parsed-entry and field index.
109    Field {
110        /// Parsed-entry index.
111        entry: usize,
112        /// Field index inside the parsed entry.
113        field: usize,
114    },
115    /// A value by parsed-entry and field index.
116    Value {
117        /// Parsed-entry index.
118        entry: usize,
119        /// Field index inside the parsed entry.
120        field: usize,
121    },
122    /// A failed block by failed-block index.
123    FailedBlock(usize),
124}
125
126/// Structured diagnostic emitted while building a parsed document.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct Diagnostic {
129    /// Diagnostic severity.
130    pub severity: DiagnosticSeverity,
131    /// Stable machine-readable code.
132    pub code: DiagnosticCode,
133    /// Human-readable message.
134    pub message: String,
135    /// Bibliography object targeted by this diagnostic.
136    pub target: DiagnosticTarget,
137    /// Source location, when available.
138    pub source: Option<SourceSpan>,
139    /// Short source context suitable for display, when available.
140    pub snippet: Option<String>,
141}
142
143impl Diagnostic {
144    /// Create an error diagnostic.
145    #[must_use]
146    pub fn error(
147        code: DiagnosticCode,
148        message: impl Into<String>,
149        target: DiagnosticTarget,
150        source: Option<SourceSpan>,
151    ) -> Self {
152        Self {
153            severity: DiagnosticSeverity::Error,
154            code,
155            message: message.into(),
156            target,
157            source,
158            snippet: None,
159        }
160    }
161
162    /// Attach source context to this diagnostic.
163    #[must_use]
164    pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
165        self.snippet = Some(snippet.into());
166        self
167    }
168}
169
170/// Summary counts for a parsed document.
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172pub struct ParseSummary {
173    /// File-level parse status.
174    pub status: ParseStatus,
175    /// Number of parsed entries.
176    pub entries: usize,
177    /// Number of warning diagnostics.
178    pub warnings: usize,
179    /// Number of error diagnostics.
180    pub errors: usize,
181    /// Number of informational diagnostics.
182    pub infos: usize,
183    /// Number of failed blocks.
184    pub failed_blocks: usize,
185    /// Number of entries recovered as partial entries.
186    pub recovered_blocks: usize,
187}
188
189/// Source metadata associated with a parsed document.
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub struct ParsedSource<'a> {
192    /// Source index inside the document.
193    pub id: SourceId,
194    /// Human-readable source name or path, when known.
195    pub name: Option<Cow<'a, str>>,
196}
197
198impl ParsedSource<'_> {
199    /// Return `true` when this source has no caller-provided name.
200    #[must_use]
201    pub const fn is_anonymous(&self) -> bool {
202        self.name.is_none()
203    }
204
205    /// Convert this source metadata into an owned value.
206    #[must_use]
207    pub fn into_owned(self) -> ParsedSource<'static> {
208        ParsedSource {
209            id: self.id,
210            name: self.name.map(|name| Cow::Owned(name.into_owned())),
211        }
212    }
213}
214
215/// Source-order block in a parsed document.
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
217pub enum ParsedBlock {
218    /// A regular or partial bibliography entry by parsed-entry index.
219    Entry(usize),
220    /// A string definition by parsed-string index.
221    String(usize),
222    /// A preamble by parsed-preamble index.
223    Preamble(usize),
224    /// A comment by parsed-comment index.
225    Comment(usize),
226    /// A failed block by failed-block index.
227    Failed(usize),
228}
229
230/// Source-order event emitted by streaming parsing.
231#[derive(Debug, Clone, PartialEq)]
232pub enum ParseEvent<'a> {
233    /// A regular or recovered bibliography entry.
234    Entry(ParsedEntry<'a>),
235    /// A string definition.
236    String(ParsedString<'a>),
237    /// A preamble block.
238    Preamble(ParsedPreamble<'a>),
239    /// A comment block.
240    Comment(ParsedComment<'a>),
241    /// A malformed block retained by tolerant parsing.
242    Failed(ParsedFailedBlock<'a>),
243    /// A structured diagnostic associated with a preceding event.
244    Diagnostic(Diagnostic),
245}
246
247/// Callback control returned from streaming parse handlers.
248#[derive(Debug, Clone, Copy, PartialEq, Eq)]
249pub enum ParseFlow {
250    /// Continue parsing.
251    Continue,
252    /// Stop after the current event.
253    Stop,
254}
255
256/// Summary returned after streaming parsing.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub struct StreamingSummary {
259    /// File-level status for processed events.
260    pub status: ParseStatus,
261    /// Number of emitted entries.
262    pub entries: usize,
263    /// Number of emitted string definitions.
264    pub strings: usize,
265    /// Number of emitted preambles.
266    pub preambles: usize,
267    /// Number of emitted comments.
268    pub comments: usize,
269    /// Number of emitted failed blocks.
270    pub failed_blocks: usize,
271    /// Number of warning diagnostics.
272    pub warnings: usize,
273    /// Number of error diagnostics.
274    pub errors: usize,
275    /// Number of informational diagnostics.
276    pub infos: usize,
277    /// Number of recovered partial entries.
278    pub recovered_blocks: usize,
279    /// `true` when the callback requested early stop.
280    pub stopped: bool,
281}
282
283impl Default for StreamingSummary {
284    fn default() -> Self {
285        Self {
286            status: ParseStatus::Ok,
287            entries: 0,
288            strings: 0,
289            preambles: 0,
290            comments: 0,
291            failed_blocks: 0,
292            warnings: 0,
293            errors: 0,
294            infos: 0,
295            recovered_blocks: 0,
296            stopped: false,
297        }
298    }
299}
300
301impl StreamingSummary {
302    pub(crate) fn finalize_status(&mut self) {
303        self.status = if self.errors == 0 {
304            ParseStatus::Ok
305        } else if self.entries == 0 && self.strings == 0 && self.preambles == 0 {
306            ParseStatus::Failed
307        } else {
308            ParseStatus::Partial
309        };
310    }
311
312    pub(crate) fn count_diagnostic(&mut self, diagnostic: &Diagnostic) {
313        match diagnostic.severity {
314            DiagnosticSeverity::Error => self.errors += 1,
315            DiagnosticSeverity::Warning => self.warnings += 1,
316            DiagnosticSeverity::Info => self.infos += 1,
317        }
318    }
319}
320
321/// Status of a parsed entry.
322#[derive(Debug, Clone, Copy, PartialEq, Eq)]
323pub enum ParsedEntryStatus {
324    /// Entry parsed completely.
325    Complete,
326    /// Entry has a recovered type or key plus at least some usable content.
327    Partial,
328}
329
330/// Delimiter used by a BibTeX entry body.
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332pub enum EntryDelimiter {
333    /// Entry used `{ ... }`.
334    Braces,
335    /// Entry used `( ... )`.
336    Parentheses,
337}
338
339/// Delimiter or source shape used by a BibTeX value.
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
341pub enum ValueDelimiter {
342    /// Value used `{ ... }`.
343    Braces,
344    /// Value used `" ... "`.
345    Quotes,
346    /// Value was a bare number or identifier.
347    Bare,
348    /// Value used one or more `#` concatenation separators.
349    Concatenation,
350}
351
352/// Policy for variables that cannot be resolved during value expansion.
353#[derive(Debug, Clone, Copy, PartialEq, Eq)]
354pub enum UnresolvedVariablePolicy {
355    /// Keep the variable name as ordinary text.
356    Preserve,
357    /// Render unresolved variables as `{undefined:name}`.
358    Placeholder,
359    /// Return an error for the first unresolved variable.
360    Error,
361}
362
363/// Options for expanding parsed values.
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
365pub struct ExpansionOptions {
366    /// Expand user `@string` definitions.
367    pub expand_strings: bool,
368    /// Expand standard three-letter BibTeX month variables.
369    pub expand_months: bool,
370    /// Behavior when a variable cannot be resolved.
371    pub unresolved_variables: UnresolvedVariablePolicy,
372}
373
374impl Default for ExpansionOptions {
375    fn default() -> Self {
376        Self {
377            expand_strings: true,
378            expand_months: true,
379            unresolved_variables: UnresolvedVariablePolicy::Error,
380        }
381    }
382}
383
384/// Parsed BibTeX value plus optional source-preserving metadata.
385#[derive(Debug, Clone, PartialEq)]
386pub struct ParsedValue<'a> {
387    /// Structured value.
388    pub value: Value<'a>,
389    /// Exact raw value text, when retained by the parser mode.
390    pub raw: Option<Cow<'a, str>>,
391    /// Source location for the value, when available.
392    pub source: Option<SourceSpan>,
393    /// Expanded text projection, when a parser mode computes it separately.
394    pub expanded: Option<Cow<'a, str>>,
395    /// Original value delimiter or source shape, when retained.
396    pub delimiter: Option<ValueDelimiter>,
397}
398
399impl<'a> ParsedValue<'a> {
400    /// Create parsed-value metadata from a structured value.
401    #[must_use]
402    pub const fn new(value: Value<'a>) -> Self {
403        Self {
404            value,
405            raw: None,
406            source: None,
407            expanded: None,
408            delimiter: None,
409        }
410    }
411
412    /// Convert this parsed value into the structured value.
413    #[must_use]
414    pub fn into_value(self) -> Value<'a> {
415        self.value
416    }
417
418    /// Return the structured parsed value.
419    #[must_use]
420    pub const fn parsed(&self) -> &Value<'a> {
421        &self.value
422    }
423
424    /// Return exact raw value text when raw preservation was requested.
425    #[must_use]
426    pub fn raw_text(&self) -> Option<&str> {
427        self.raw.as_deref()
428    }
429
430    /// Return requested expanded text when the parser populated it.
431    #[must_use]
432    pub fn expanded_text(&self) -> Option<&str> {
433        self.expanded.as_deref()
434    }
435
436    /// Return an ordinary text projection of the parsed value.
437    #[must_use]
438    pub fn plain_text(&self) -> String {
439        self.value.to_plain_string()
440    }
441
442    /// Return a display-oriented projection of the parsed value.
443    #[must_use]
444    pub fn lossy_text(&self) -> String {
445        self.value.to_lossy_string()
446    }
447
448    /// Return a Unicode-normalized plain-text projection.
449    #[cfg(feature = "latex_to_unicode")]
450    #[must_use]
451    pub fn unicode_plain_text(&self) -> String {
452        self.value.to_unicode_plain_string()
453    }
454
455    /// Convert this parsed value into an owned value.
456    #[must_use]
457    pub fn into_owned(self) -> ParsedValue<'static> {
458        ParsedValue {
459            value: self.value.into_owned(),
460            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
461            source: self.source,
462            expanded: self
463                .expanded
464                .map(|expanded| Cow::Owned(expanded.into_owned())),
465            delimiter: self.delimiter,
466        }
467    }
468}
469
470/// Parsed field plus optional source-preserving metadata.
471#[derive(Debug, Clone, PartialEq)]
472pub struct ParsedField<'a> {
473    /// Field name as it appeared after parsing.
474    pub name: Cow<'a, str>,
475    /// Parsed field value.
476    pub value: ParsedValue<'a>,
477    /// Exact raw field text, when retained by the parser mode.
478    pub raw: Option<Cow<'a, str>>,
479    /// Source location for the whole field, when available.
480    pub source: Option<SourceSpan>,
481    /// Source location for the field name, when available.
482    pub name_source: Option<SourceSpan>,
483    /// Source location for the field value, when available.
484    pub value_source: Option<SourceSpan>,
485}
486
487impl<'a> ParsedField<'a> {
488    /// Create parsed-field metadata from a structured field.
489    #[must_use]
490    pub fn from_field(field: Field<'a>) -> Self {
491        Self {
492            name: field.name,
493            value: ParsedValue::new(field.value),
494            raw: None,
495            source: None,
496            name_source: None,
497            value_source: None,
498        }
499    }
500
501    /// Convert this parsed field into the structured field.
502    #[must_use]
503    pub fn into_field(self) -> Field<'a> {
504        Field {
505            name: self.name,
506            value: self.value.into_value(),
507        }
508    }
509
510    /// Convert this parsed field into an owned value.
511    #[must_use]
512    pub fn into_owned(self) -> ParsedField<'static> {
513        ParsedField {
514            name: Cow::Owned(self.name.into_owned()),
515            value: self.value.into_owned(),
516            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
517            source: self.source,
518            name_source: self.name_source,
519            value_source: self.value_source,
520        }
521    }
522}
523
524/// Parsed entry plus optional source-preserving metadata.
525#[derive(Debug, Clone, PartialEq)]
526pub struct ParsedEntry<'a> {
527    /// Entry type.
528    pub ty: EntryType<'a>,
529    /// Citation key.
530    pub key: Cow<'a, str>,
531    /// Parsed fields in source order.
532    pub fields: Vec<ParsedField<'a>>,
533    /// Whether the entry is complete or recovered.
534    pub status: ParsedEntryStatus,
535    /// Source location for the whole entry, when available.
536    pub source: Option<SourceSpan>,
537    /// Source location for the entry type token, when available.
538    pub entry_type_source: Option<SourceSpan>,
539    /// Source location for the citation key token, when available.
540    pub key_source: Option<SourceSpan>,
541    /// Entry body delimiter, when retained.
542    pub delimiter: Option<EntryDelimiter>,
543    /// Exact raw entry text, when retained by the parser mode.
544    pub raw: Option<Cow<'a, str>>,
545    /// Diagnostics attached to this entry.
546    pub diagnostics: Vec<Diagnostic>,
547}
548
549impl<'a> ParsedEntry<'a> {
550    /// Create parsed-entry metadata from a structured entry.
551    #[must_use]
552    pub fn from_entry(entry: Entry<'a>, source: Option<SourceSpan>) -> Self {
553        Self {
554            ty: entry.ty,
555            key: entry.key,
556            fields: entry
557                .fields
558                .into_iter()
559                .map(ParsedField::from_field)
560                .collect(),
561            status: ParsedEntryStatus::Complete,
562            source,
563            entry_type_source: None,
564            key_source: None,
565            delimiter: None,
566            raw: None,
567            diagnostics: Vec::new(),
568        }
569    }
570
571    pub(crate) fn from_stream_entry(
572        entry: Entry<'a>,
573        source: SourceSpan,
574        raw: &'a str,
575        source_map: &SourceMap<'a>,
576        preserve_raw: bool,
577    ) -> Self {
578        let mut parsed = Self::from_entry(entry, Some(source));
579        parsed.apply_locations(raw, source_map, preserve_raw);
580        parsed
581    }
582
583    fn apply_locations(&mut self, raw: &'a str, source_map: &SourceMap<'a>, preserve_raw: bool) {
584        let Some(entry_span) = self.source else {
585            return;
586        };
587        let Some(locations) = locate_entry(raw, entry_span.byte_start, self.fields.len()) else {
588            return;
589        };
590
591        self.entry_type_source =
592            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
593        self.key_source = Some(source_map.span(locations.key.0, locations.key.1));
594        self.delimiter = Some(locations.delimiter);
595        if preserve_raw {
596            self.raw = Some(Cow::Borrowed(raw));
597        }
598
599        for (field, location) in self.fields.iter_mut().zip(locations.fields) {
600            field.source = Some(source_map.span(location.whole.0, location.whole.1));
601            field.name_source = Some(source_map.span(location.name.0, location.name.1));
602            field.value.source = Some(source_map.span(location.value.0, location.value.1));
603            field.value_source = field.value.source;
604            field.value.delimiter = Some(location.value_delimiter);
605
606            if preserve_raw {
607                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
608                    field.raw = Some(Cow::Borrowed(source));
609                }
610                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
611                    field.value.raw = Some(Cow::Borrowed(source));
612                }
613            }
614        }
615    }
616
617    /// Return the citation key.
618    #[must_use]
619    pub fn key(&self) -> &str {
620        &self.key
621    }
622
623    /// Convert this parsed entry into the structured entry.
624    #[must_use]
625    pub fn into_entry(self) -> Entry<'a> {
626        Entry {
627            ty: self.ty,
628            key: self.key,
629            fields: self
630                .fields
631                .into_iter()
632                .map(ParsedField::into_field)
633                .collect(),
634        }
635    }
636
637    /// Rename the citation key.
638    pub fn rename_key(&mut self, key: impl Into<Cow<'a, str>>) {
639        self.key = key.into();
640    }
641
642    /// Replace the entry type.
643    pub fn set_entry_type(&mut self, ty: EntryType<'a>) {
644        self.ty = ty;
645    }
646
647    /// Add a field and switch this entry to structured writing.
648    pub fn add_field(&mut self, name: impl Into<Cow<'a, str>>, value: Value<'a>) {
649        self.fields.push(ParsedField {
650            name: name.into(),
651            value: ParsedValue::new(value),
652            raw: None,
653            source: None,
654            name_source: None,
655            value_source: None,
656        });
657        self.raw = None;
658    }
659
660    /// Replace the first field value whose name matches exactly.
661    #[must_use]
662    pub fn replace_field_value(&mut self, name: &str, value: Value<'a>) -> bool {
663        self.replace_field_value_at(name, 0, value)
664    }
665
666    /// Replace a specific duplicate field occurrence by zero-based occurrence index.
667    #[must_use]
668    pub fn replace_field_value_at(
669        &mut self,
670        name: &str,
671        occurrence: usize,
672        value: Value<'a>,
673    ) -> bool {
674        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
675            return false;
676        };
677        let field = &mut self.fields[index];
678        field.value.value = value;
679        field.value.raw = None;
680        field.raw = None;
681        field.value.expanded = None;
682        true
683    }
684
685    /// Rename all fields whose name matches exactly.
686    #[must_use]
687    pub fn rename_field(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> usize {
688        let new = new.into();
689        let mut renamed = 0;
690        for field in &mut self.fields {
691            if field.name == old {
692                field.name.clone_from(&new);
693                field.raw = None;
694                renamed += 1;
695            }
696        }
697        renamed
698    }
699
700    /// Remove all fields whose name matches exactly.
701    #[must_use]
702    pub fn remove_field(&mut self, name: &str) -> usize {
703        let original_len = self.fields.len();
704        self.fields.retain(|field| field.name != name);
705        let removed = original_len - self.fields.len();
706        if removed > 0 {
707            self.raw = None;
708        }
709        removed
710    }
711
712    /// Remove a specific duplicate field occurrence by zero-based occurrence index.
713    #[must_use]
714    pub fn remove_field_at(&mut self, name: &str, occurrence: usize) -> bool {
715        let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
716            return false;
717        };
718        self.fields.remove(index);
719        self.raw = None;
720        true
721    }
722
723    /// Remove configured export-only fields from this entry.
724    #[must_use]
725    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
726        let original_len = self.fields.len();
727        self.fields.retain(|field| {
728            !names
729                .iter()
730                .any(|name| field.name.eq_ignore_ascii_case(name))
731        });
732        let removed = original_len - self.fields.len();
733        if removed > 0 {
734            self.raw = None;
735        }
736        removed
737    }
738
739    /// Return the first field matching `name`, ignoring ASCII case.
740    #[must_use]
741    pub fn field_ignore_case(&self, name: &str) -> Option<&ParsedField<'a>> {
742        self.fields
743            .iter()
744            .find(|field| field.name.eq_ignore_ascii_case(name))
745    }
746
747    /// Return a field value as ordinary text, ignoring ASCII case.
748    #[must_use]
749    pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
750        self.field_ignore_case(name)
751            .map(|field| field.value.plain_text())
752    }
753
754    /// Return the normalized DOI, if the entry has a recognizable DOI field.
755    #[must_use]
756    pub fn doi(&self) -> Option<String> {
757        self.get_as_string_ignore_case("doi")
758            .and_then(|doi| normalize_doi(&doi))
759    }
760
761    /// Parse the `author` field into structured BibTeX names.
762    #[must_use]
763    pub fn authors(&self) -> Vec<PersonName> {
764        self.get_as_string_ignore_case("author")
765            .map_or_else(Vec::new, |authors| crate::parse_names(&authors))
766    }
767
768    /// Parse the `editor` field into structured BibTeX names.
769    #[must_use]
770    pub fn editors(&self) -> Vec<PersonName> {
771        self.get_as_string_ignore_case("editor")
772            .map_or_else(Vec::new, |editors| crate::parse_names(&editors))
773    }
774
775    /// Parse the `translator` field into structured BibTeX names.
776    #[must_use]
777    pub fn translators(&self) -> Vec<PersonName> {
778        self.get_as_string_ignore_case("translator")
779            .map_or_else(Vec::new, |translators| crate::parse_names(&translators))
780    }
781
782    /// Parse a specific date-like field into date parts.
783    #[must_use]
784    pub fn date_parts_for(
785        &self,
786        field: &str,
787    ) -> Option<std::result::Result<DateParts, DateParseError>> {
788        self.get_as_string_ignore_case(field)
789            .map(|value| crate::parse_date_parts(&value))
790    }
791
792    /// Return issued date parts for this entry.
793    #[must_use]
794    pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
795        self.clone().into_entry().date_parts()
796    }
797
798    /// Return classified resource and identifier fields in source order.
799    #[must_use]
800    pub fn resource_fields(&self) -> Vec<ResourceField> {
801        self.clone().into_entry().resource_fields()
802    }
803
804    /// Convert this parsed entry into an owned value.
805    #[must_use]
806    pub fn into_owned(self) -> ParsedEntry<'static> {
807        ParsedEntry {
808            ty: self.ty.into_owned(),
809            key: Cow::Owned(self.key.into_owned()),
810            fields: self
811                .fields
812                .into_iter()
813                .map(ParsedField::into_owned)
814                .collect(),
815            status: self.status,
816            source: self.source,
817            entry_type_source: self.entry_type_source,
818            key_source: self.key_source,
819            delimiter: self.delimiter,
820            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
821            diagnostics: self.diagnostics,
822        }
823    }
824}
825
826fn nth_field_index(fields: &[ParsedField<'_>], name: &str, occurrence: usize) -> Option<usize> {
827    fields
828        .iter()
829        .enumerate()
830        .filter(|(_, field)| field.name == name)
831        .nth(occurrence)
832        .map(|(index, _)| index)
833}
834
835/// Parsed string definition plus optional source-preserving metadata.
836#[derive(Debug, Clone, PartialEq)]
837pub struct ParsedString<'a> {
838    /// String variable name.
839    pub name: Cow<'a, str>,
840    /// Parsed string value.
841    pub value: ParsedValue<'a>,
842    /// Source location for the definition, when available.
843    pub source: Option<SourceSpan>,
844    /// Exact raw string-definition text, when retained by the parser mode.
845    pub raw: Option<Cow<'a, str>>,
846}
847
848impl<'a> ParsedString<'a> {
849    /// Create parsed-string metadata from a structured string definition.
850    #[must_use]
851    pub fn from_definition(definition: StringDefinition<'a>) -> Self {
852        Self {
853            name: definition.name,
854            value: ParsedValue::new(definition.value),
855            source: definition.source,
856            raw: None,
857        }
858    }
859
860    pub(crate) fn from_stream_definition(
861        name: &'a str,
862        value: Value<'a>,
863        source: SourceSpan,
864        raw: &'a str,
865        preserve_raw: bool,
866    ) -> Self {
867        let value_raw = locate_definition_value(raw);
868        Self {
869            name: Cow::Borrowed(name),
870            value: ParsedValue {
871                value,
872                raw: if preserve_raw {
873                    value_raw.map(Cow::Borrowed)
874                } else {
875                    None
876                },
877                source: None,
878                expanded: None,
879                delimiter: value_raw.map(value_delimiter),
880            },
881            source: Some(source),
882            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
883        }
884    }
885
886    /// Convert this parsed string definition into an owned value.
887    #[must_use]
888    pub fn into_owned(self) -> ParsedString<'static> {
889        ParsedString {
890            name: Cow::Owned(self.name.into_owned()),
891            value: self.value.into_owned(),
892            source: self.source,
893            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
894        }
895    }
896}
897
898/// Parsed preamble plus optional source-preserving metadata.
899#[derive(Debug, Clone, PartialEq)]
900pub struct ParsedPreamble<'a> {
901    /// Parsed preamble value.
902    pub value: ParsedValue<'a>,
903    /// Source location for the preamble, when available.
904    pub source: Option<SourceSpan>,
905    /// Exact raw preamble text, when retained by the parser mode.
906    pub raw: Option<Cow<'a, str>>,
907}
908
909impl<'a> ParsedPreamble<'a> {
910    /// Create parsed-preamble metadata from a structured preamble.
911    #[must_use]
912    pub fn from_preamble(preamble: Preamble<'a>) -> Self {
913        Self {
914            value: ParsedValue::new(preamble.value),
915            source: preamble.source,
916            raw: None,
917        }
918    }
919
920    pub(crate) fn from_stream_preamble(
921        value: Value<'a>,
922        source: SourceSpan,
923        raw: &'a str,
924        preserve_raw: bool,
925    ) -> Self {
926        let value_raw = locate_preamble_value(raw);
927        Self {
928            value: ParsedValue {
929                value,
930                raw: if preserve_raw {
931                    value_raw.map(Cow::Borrowed)
932                } else {
933                    None
934                },
935                source: None,
936                expanded: None,
937                delimiter: value_raw.map(value_delimiter),
938            },
939            source: Some(source),
940            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
941        }
942    }
943
944    /// Convert this parsed preamble into an owned value.
945    #[must_use]
946    pub fn into_owned(self) -> ParsedPreamble<'static> {
947        ParsedPreamble {
948            value: self.value.into_owned(),
949            source: self.source,
950            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
951        }
952    }
953}
954
955/// Parsed comment plus optional source-preserving metadata.
956#[derive(Debug, Clone, PartialEq, Eq)]
957pub struct ParsedComment<'a> {
958    /// Comment text.
959    pub text: Cow<'a, str>,
960    /// Source location for the comment, when available.
961    pub source: Option<SourceSpan>,
962    /// Exact raw comment text, when retained by the parser mode.
963    pub raw: Option<Cow<'a, str>>,
964}
965
966impl<'a> ParsedComment<'a> {
967    /// Create parsed-comment metadata from a structured comment.
968    #[must_use]
969    pub fn from_comment(comment: Comment<'a>) -> Self {
970        Self {
971            text: comment.text,
972            source: comment.source,
973            raw: None,
974        }
975    }
976
977    pub(crate) fn from_stream_comment(
978        text: &'a str,
979        source: SourceSpan,
980        raw: &'a str,
981        preserve_raw: bool,
982    ) -> Self {
983        Self {
984            text: Cow::Borrowed(text),
985            source: Some(source),
986            raw: preserve_raw.then_some(Cow::Borrowed(raw)),
987        }
988    }
989
990    /// Convert this parsed comment into an owned value.
991    #[must_use]
992    pub fn into_owned(self) -> ParsedComment<'static> {
993        ParsedComment {
994            text: Cow::Owned(self.text.into_owned()),
995            source: self.source,
996            raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
997        }
998    }
999}
1000
1001/// Failed block retained by a tolerant parse.
1002#[derive(Debug, Clone, PartialEq, Eq)]
1003pub struct ParsedFailedBlock<'a> {
1004    /// Raw source text for the failed block.
1005    pub raw: Cow<'a, str>,
1006    /// Human-readable parse error.
1007    pub error: String,
1008    /// Source location for the failed block, when available.
1009    pub source: Option<SourceSpan>,
1010    /// Diagnostics attached to this failed block.
1011    pub diagnostics: Vec<Diagnostic>,
1012}
1013
1014impl<'a> ParsedFailedBlock<'a> {
1015    /// Create failed-block metadata from a retained failed block.
1016    #[must_use]
1017    pub fn from_failed_block(
1018        index: usize,
1019        failed: FailedBlock<'a>,
1020        source_map: Option<&SourceMap<'_>>,
1021    ) -> Self {
1022        let diagnostic = diagnostic_for_failed_block(index, &failed, source_map);
1023
1024        Self {
1025            raw: failed.raw,
1026            error: failed.error,
1027            source: failed.source,
1028            diagnostics: vec![diagnostic],
1029        }
1030    }
1031
1032    /// Convert this failed block into an owned value.
1033    #[must_use]
1034    pub fn into_owned(self) -> ParsedFailedBlock<'static> {
1035        ParsedFailedBlock {
1036            raw: Cow::Owned(self.raw.into_owned()),
1037            error: self.error,
1038            source: self.source,
1039            diagnostics: self.diagnostics,
1040        }
1041    }
1042}
1043
1044/// Rich parsed document for tooling-grade bibliography workflows.
1045#[derive(Debug, Clone)]
1046pub struct ParsedDocument<'a> {
1047    library: Library<'a>,
1048    sources: Vec<ParsedSource<'a>>,
1049    entries: Vec<ParsedEntry<'a>>,
1050    strings: Vec<ParsedString<'a>>,
1051    preambles: Vec<ParsedPreamble<'a>>,
1052    comments: Vec<ParsedComment<'a>>,
1053    failed_blocks: Vec<ParsedFailedBlock<'a>>,
1054    blocks: Vec<ParsedBlock>,
1055    diagnostics: Vec<Diagnostic>,
1056    status: ParseStatus,
1057}
1058
1059impl<'a> ParsedDocument<'a> {
1060    /// Build a parsed document from the existing structured library model.
1061    #[must_use]
1062    pub fn from_library(library: Library<'a>) -> Self {
1063        Self::from_library_with_sources(
1064            library,
1065            vec![ParsedSource {
1066                id: SourceId::new(0),
1067                name: None,
1068            }],
1069        )
1070    }
1071
1072    pub(crate) fn from_library_with_sources(
1073        library: Library<'a>,
1074        sources: Vec<ParsedSource<'a>>,
1075    ) -> Self {
1076        Self::from_library_with_source_map(library, sources, None)
1077    }
1078
1079    pub(crate) fn from_library_with_source_map(
1080        library: Library<'a>,
1081        sources: Vec<ParsedSource<'a>>,
1082        source_map: Option<&SourceMap<'_>>,
1083    ) -> Self {
1084        let entries: Vec<ParsedEntry<'a>> = library
1085            .entries()
1086            .iter()
1087            .cloned()
1088            .enumerate()
1089            .map(|(index, entry)| ParsedEntry::from_entry(entry, library.entry_source(index)))
1090            .collect();
1091        let strings: Vec<ParsedString<'a>> = library
1092            .strings()
1093            .iter()
1094            .cloned()
1095            .map(ParsedString::from_definition)
1096            .collect();
1097        let preambles: Vec<ParsedPreamble<'a>> = library
1098            .preambles()
1099            .iter()
1100            .cloned()
1101            .map(ParsedPreamble::from_preamble)
1102            .collect();
1103        let comments = library
1104            .comments()
1105            .iter()
1106            .cloned()
1107            .map(ParsedComment::from_comment)
1108            .collect();
1109        let failed_blocks = library
1110            .failed_blocks()
1111            .iter()
1112            .cloned()
1113            .enumerate()
1114            .map(|(index, failed)| ParsedFailedBlock::from_failed_block(index, failed, source_map))
1115            .collect::<Vec<_>>();
1116        let diagnostics = failed_blocks
1117            .iter()
1118            .flat_map(|failed| failed.diagnostics.iter().cloned())
1119            .collect::<Vec<_>>();
1120        let blocks = library
1121            .block_kinds()
1122            .iter()
1123            .map(|kind| match *kind {
1124                BlockKind::Entry(index) => ParsedBlock::Entry(index),
1125                BlockKind::String(index) => ParsedBlock::String(index),
1126                BlockKind::Preamble(index) => ParsedBlock::Preamble(index),
1127                BlockKind::Comment(index) => ParsedBlock::Comment(index),
1128                BlockKind::Failed(index) => ParsedBlock::Failed(index),
1129            })
1130            .collect();
1131        let status = if failed_blocks.is_empty() {
1132            ParseStatus::Ok
1133        } else if entries.is_empty() && strings.is_empty() && preambles.is_empty() {
1134            ParseStatus::Failed
1135        } else {
1136            ParseStatus::Partial
1137        };
1138
1139        Self {
1140            library,
1141            sources,
1142            entries,
1143            strings,
1144            preambles,
1145            comments,
1146            failed_blocks,
1147            blocks,
1148            diagnostics,
1149            status,
1150        }
1151    }
1152
1153    pub(crate) fn apply_entry_locations(
1154        &mut self,
1155        entry_index: usize,
1156        raw: &'a str,
1157        source_map: &SourceMap<'a>,
1158        preserve_raw: bool,
1159    ) {
1160        let Some(entry) = self.entries.get_mut(entry_index) else {
1161            return;
1162        };
1163        let Some(entry_span) = entry.source else {
1164            return;
1165        };
1166        let Some(locations) = locate_entry(raw, entry_span.byte_start, entry.fields.len()) else {
1167            return;
1168        };
1169
1170        entry.entry_type_source =
1171            Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
1172        entry.key_source = Some(source_map.span(locations.key.0, locations.key.1));
1173        entry.delimiter = Some(locations.delimiter);
1174        if preserve_raw {
1175            entry.raw = Some(Cow::Borrowed(raw));
1176        }
1177
1178        for (field, location) in entry.fields.iter_mut().zip(locations.fields) {
1179            field.source = Some(source_map.span(location.whole.0, location.whole.1));
1180            field.name_source = Some(source_map.span(location.name.0, location.name.1));
1181            field.value.source = Some(source_map.span(location.value.0, location.value.1));
1182            field.value_source = field.value.source;
1183            field.value.delimiter = Some(location.value_delimiter);
1184
1185            if preserve_raw {
1186                if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
1187                    field.raw = Some(Cow::Borrowed(source));
1188                }
1189                if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
1190                    field.value.raw = Some(Cow::Borrowed(source));
1191                }
1192            }
1193        }
1194    }
1195
1196    pub(crate) fn apply_raw_items(&mut self, raw_items: &[RawBuildItem<'a>]) {
1197        let mut string_index = 0;
1198        let mut preamble_index = 0;
1199        let mut comment_index = 0;
1200
1201        for raw_item in raw_items {
1202            match raw_item {
1203                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, raw) => {
1204                    if let Some(parsed) = self.strings.get_mut(string_index) {
1205                        parsed.raw = Some(Cow::Borrowed(raw));
1206                        if let Some(value_raw) = locate_definition_value(raw) {
1207                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1208                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1209                        }
1210                    }
1211                    string_index += 1;
1212                }
1213                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(_), _, raw) => {
1214                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1215                        parsed.raw = Some(Cow::Borrowed(raw));
1216                        if let Some(value_raw) = locate_preamble_value(raw) {
1217                            parsed.value.raw = Some(Cow::Borrowed(value_raw));
1218                            parsed.value.delimiter = Some(value_delimiter(value_raw));
1219                        }
1220                    }
1221                    preamble_index += 1;
1222                }
1223                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, raw) => {
1224                    if let Some(parsed) = self.comments.get_mut(comment_index) {
1225                        parsed.raw = Some(Cow::Borrowed(raw));
1226                    }
1227                    comment_index += 1;
1228                }
1229                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, _)
1230                | RawBuildItem::Failed(_) => {}
1231            }
1232        }
1233    }
1234
1235    pub(crate) fn apply_parsed_values(&mut self, raw_items: &[RawBuildItem<'a>]) {
1236        let mut entry_index = 0;
1237        let mut string_index = 0;
1238        let mut preamble_index = 0;
1239
1240        for raw_item in raw_items {
1241            match raw_item {
1242                RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(raw_entry), _, _) => {
1243                    if let Some(entry) = self.entries.get_mut(entry_index) {
1244                        for (field, raw_field) in entry.fields.iter_mut().zip(&raw_entry.fields) {
1245                            field.value.value = raw_field.value.clone();
1246                            field.value.expanded = None;
1247                        }
1248                    }
1249                    entry_index += 1;
1250                }
1251                RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, value), _, _) => {
1252                    if let Some(parsed) = self.strings.get_mut(string_index) {
1253                        parsed.value.value = value.clone();
1254                        parsed.value.expanded = None;
1255                    }
1256                    string_index += 1;
1257                }
1258                RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), _, _) => {
1259                    if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1260                        parsed.value.value = value.clone();
1261                        parsed.value.expanded = None;
1262                    }
1263                    preamble_index += 1;
1264                }
1265                RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, _)
1266                | RawBuildItem::Failed(_) => {}
1267            }
1268        }
1269    }
1270
1271    pub(crate) fn populate_expanded_values(
1272        &mut self,
1273        options: ExpansionOptions,
1274    ) -> crate::Result<()> {
1275        let strings = &self.strings;
1276        for entry in &mut self.entries {
1277            for field in &mut entry.fields {
1278                field.value.expanded = Some(Cow::Owned(expand_value_with_options(
1279                    &field.value.value,
1280                    strings,
1281                    options,
1282                    &mut Vec::new(),
1283                )?));
1284            }
1285        }
1286        for preamble in &mut self.preambles {
1287            preamble.value.expanded = Some(Cow::Owned(expand_value_with_options(
1288                &preamble.value.value,
1289                strings,
1290                options,
1291                &mut Vec::new(),
1292            )?));
1293        }
1294        Ok(())
1295    }
1296
1297    pub(crate) fn recover_partial_entries(
1298        &mut self,
1299        source_map: &SourceMap<'a>,
1300        preserve_raw: bool,
1301    ) {
1302        let old_entries = std::mem::take(&mut self.entries);
1303        let old_failed_blocks = std::mem::take(&mut self.failed_blocks);
1304        let old_blocks = std::mem::take(&mut self.blocks);
1305        let mut new_entries = Vec::with_capacity(old_entries.len());
1306        let mut new_failed_blocks = Vec::new();
1307        let mut new_blocks = Vec::with_capacity(old_blocks.len());
1308
1309        for block in old_blocks {
1310            match block {
1311                ParsedBlock::Entry(index) => {
1312                    let new_index = new_entries.len();
1313                    if let Some(entry) = old_entries.get(index) {
1314                        new_entries.push(entry.clone());
1315                        new_blocks.push(ParsedBlock::Entry(new_index));
1316                    }
1317                }
1318                ParsedBlock::Failed(index) => {
1319                    let Some(failed) = old_failed_blocks.get(index) else {
1320                        continue;
1321                    };
1322                    let new_index = new_entries.len();
1323                    if let Some(partial) =
1324                        recover_partial_entry(failed, source_map, new_index, preserve_raw)
1325                    {
1326                        new_entries.push(partial);
1327                        new_blocks.push(ParsedBlock::Entry(new_index));
1328                    } else {
1329                        let failed_index = new_failed_blocks.len();
1330                        new_failed_blocks.push(failed.clone());
1331                        new_blocks.push(ParsedBlock::Failed(failed_index));
1332                    }
1333                }
1334                ParsedBlock::String(index) => new_blocks.push(ParsedBlock::String(index)),
1335                ParsedBlock::Preamble(index) => new_blocks.push(ParsedBlock::Preamble(index)),
1336                ParsedBlock::Comment(index) => new_blocks.push(ParsedBlock::Comment(index)),
1337            }
1338        }
1339
1340        self.entries = new_entries;
1341        self.failed_blocks = new_failed_blocks;
1342        self.blocks = new_blocks;
1343        self.rebuild_diagnostics_and_status();
1344    }
1345
1346    fn rebuild_diagnostics_and_status(&mut self) {
1347        self.diagnostics.clear();
1348        self.diagnostics.extend(
1349            self.entries
1350                .iter()
1351                .flat_map(|entry| entry.diagnostics.iter().cloned()),
1352        );
1353        self.diagnostics.extend(
1354            self.failed_blocks
1355                .iter()
1356                .flat_map(|failed| failed.diagnostics.iter().cloned()),
1357        );
1358
1359        self.status = if self.diagnostics.is_empty() {
1360            ParseStatus::Ok
1361        } else if self.entries.is_empty() && self.strings.is_empty() && self.preambles.is_empty() {
1362            ParseStatus::Failed
1363        } else {
1364            ParseStatus::Partial
1365        };
1366    }
1367
1368    pub(crate) fn failed_from_error(
1369        sources: Vec<ParsedSource<'a>>,
1370        source_map: &SourceMap<'a>,
1371        error: &crate::Error,
1372    ) -> Self {
1373        let (byte, message, fallback_snippet) = match error {
1374            crate::Error::ParseError {
1375                line,
1376                column,
1377                message,
1378                snippet,
1379            } => (
1380                source_map.byte_at_line_column(*line, *column).unwrap_or(0),
1381                message.clone(),
1382                snippet.clone(),
1383            ),
1384            other => (0, other.to_string(), None),
1385        };
1386        let raw = source_map.input().get(byte..).unwrap_or_default();
1387        let failed_source = source_map.span(byte, source_map.len());
1388        let failed = FailedBlock {
1389            raw: Cow::Borrowed(raw),
1390            error: message.clone(),
1391            source: Some(failed_source),
1392        };
1393        let diagnostic = diagnostic_for_raw_failure(
1394            0,
1395            raw,
1396            message,
1397            Some(failed_source),
1398            Some(source_map),
1399            byte,
1400            fallback_snippet,
1401        );
1402        let failed_block = ParsedFailedBlock {
1403            raw: failed.raw,
1404            error: failed.error,
1405            source: failed.source,
1406            diagnostics: vec![diagnostic.clone()],
1407        };
1408
1409        Self {
1410            library: Library::new(),
1411            sources,
1412            entries: Vec::new(),
1413            strings: Vec::new(),
1414            preambles: Vec::new(),
1415            comments: Vec::new(),
1416            failed_blocks: vec![failed_block],
1417            blocks: vec![ParsedBlock::Failed(0)],
1418            diagnostics: vec![diagnostic],
1419            status: ParseStatus::Failed,
1420        }
1421    }
1422
1423    /// Return the compact structured library view.
1424    #[must_use]
1425    pub const fn library(&self) -> &Library<'a> {
1426        &self.library
1427    }
1428
1429    /// Consume this document and return the compact structured library view.
1430    #[must_use]
1431    pub fn into_library(self) -> Library<'a> {
1432        self.library
1433    }
1434
1435    /// Return source metadata.
1436    #[must_use]
1437    pub fn sources(&self) -> &[ParsedSource<'a>] {
1438        &self.sources
1439    }
1440
1441    /// Return parsed entries.
1442    #[must_use]
1443    pub fn entries(&self) -> &[ParsedEntry<'a>] {
1444        &self.entries
1445    }
1446
1447    /// Return mutable parsed entries.
1448    #[must_use]
1449    pub fn entries_mut(&mut self) -> &mut [ParsedEntry<'a>] {
1450        &mut self.entries
1451    }
1452
1453    /// Return a mutable entry by citation key.
1454    #[must_use]
1455    pub fn entry_mut_by_key(&mut self, key: &str) -> Option<&mut ParsedEntry<'a>> {
1456        self.entries.iter_mut().find(|entry| entry.key == key)
1457    }
1458
1459    /// Rename a citation key.
1460    #[must_use]
1461    pub fn rename_key(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> bool {
1462        let Some(entry) = self.entry_mut_by_key(old) else {
1463            return false;
1464        };
1465        entry.rename_key(new);
1466        true
1467    }
1468
1469    /// Remove configured export-only fields from all entries.
1470    #[must_use]
1471    pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
1472        self.entries
1473            .iter_mut()
1474            .map(|entry| entry.remove_export_fields(names))
1475            .sum()
1476    }
1477
1478    /// Return parsed string definitions.
1479    #[must_use]
1480    pub fn strings(&self) -> &[ParsedString<'a>] {
1481        &self.strings
1482    }
1483
1484    /// Return parsed preambles.
1485    #[must_use]
1486    pub fn preambles(&self) -> &[ParsedPreamble<'a>] {
1487        &self.preambles
1488    }
1489
1490    /// Return parsed comments.
1491    #[must_use]
1492    pub fn comments(&self) -> &[ParsedComment<'a>] {
1493        &self.comments
1494    }
1495
1496    /// Return failed blocks retained by tolerant parsing.
1497    #[must_use]
1498    pub fn failed_blocks(&self) -> &[ParsedFailedBlock<'a>] {
1499        &self.failed_blocks
1500    }
1501
1502    /// Return source-order blocks.
1503    #[must_use]
1504    pub fn blocks(&self) -> &[ParsedBlock] {
1505        &self.blocks
1506    }
1507
1508    /// Return document diagnostics.
1509    #[must_use]
1510    pub fn diagnostics(&self) -> &[Diagnostic] {
1511        &self.diagnostics
1512    }
1513
1514    /// Return the document parse status.
1515    #[must_use]
1516    pub const fn status(&self) -> ParseStatus {
1517        self.status
1518    }
1519
1520    /// Return summary counts for the parsed document.
1521    #[must_use]
1522    pub fn summary(&self) -> ParseSummary {
1523        let mut warnings = 0;
1524        let mut errors = 0;
1525        let mut infos = 0;
1526
1527        for diagnostic in &self.diagnostics {
1528            match diagnostic.severity {
1529                DiagnosticSeverity::Error => errors += 1,
1530                DiagnosticSeverity::Warning => warnings += 1,
1531                DiagnosticSeverity::Info => infos += 1,
1532            }
1533        }
1534
1535        ParseSummary {
1536            status: self.status,
1537            entries: self.entries.len(),
1538            warnings,
1539            errors,
1540            infos,
1541            failed_blocks: self.failed_blocks.len(),
1542            recovered_blocks: self
1543                .entries
1544                .iter()
1545                .filter(|entry| entry.status == ParsedEntryStatus::Partial)
1546                .count(),
1547        }
1548    }
1549
1550    /// Expand a parsed value using this document's string definitions.
1551    ///
1552    /// This allocates the expanded text. The structured value itself remains
1553    /// unchanged, and unresolved-variable behavior follows `options`.
1554    pub fn expand_value(
1555        &self,
1556        value: &Value<'a>,
1557        options: ExpansionOptions,
1558    ) -> crate::Result<String> {
1559        expand_value_with_options(value, &self.strings, options, &mut Vec::new())
1560    }
1561
1562    /// Convert this parsed document into an owned value.
1563    #[must_use]
1564    pub fn into_owned(self) -> ParsedDocument<'static> {
1565        ParsedDocument {
1566            library: self.library.into_owned(),
1567            sources: self
1568                .sources
1569                .into_iter()
1570                .map(ParsedSource::into_owned)
1571                .collect(),
1572            entries: self
1573                .entries
1574                .into_iter()
1575                .map(ParsedEntry::into_owned)
1576                .collect(),
1577            strings: self
1578                .strings
1579                .into_iter()
1580                .map(ParsedString::into_owned)
1581                .collect(),
1582            preambles: self
1583                .preambles
1584                .into_iter()
1585                .map(ParsedPreamble::into_owned)
1586                .collect(),
1587            comments: self
1588                .comments
1589                .into_iter()
1590                .map(ParsedComment::into_owned)
1591                .collect(),
1592            failed_blocks: self
1593                .failed_blocks
1594                .into_iter()
1595                .map(ParsedFailedBlock::into_owned)
1596                .collect(),
1597            blocks: self.blocks,
1598            diagnostics: self.diagnostics,
1599            status: self.status,
1600        }
1601    }
1602}
1603
1604fn expand_value_with_options(
1605    value: &Value<'_>,
1606    strings: &[ParsedString<'_>],
1607    options: ExpansionOptions,
1608    stack: &mut Vec<String>,
1609) -> crate::Result<String> {
1610    match value {
1611        Value::Literal(text) => Ok(normalize_text_projection(text)),
1612        Value::Number(number) => Ok(number.to_string()),
1613        Value::Concat(parts) => {
1614            let mut expanded = String::new();
1615            for part in parts.iter() {
1616                expanded.push_str(&expand_value_with_options(part, strings, options, stack)?);
1617            }
1618            Ok(expanded)
1619        }
1620        Value::Variable(name) => expand_variable(name, strings, options, stack),
1621    }
1622}
1623
1624fn expand_variable(
1625    name: &str,
1626    strings: &[ParsedString<'_>],
1627    options: ExpansionOptions,
1628    stack: &mut Vec<String>,
1629) -> crate::Result<String> {
1630    if options.expand_strings {
1631        if let Some(definition) = strings
1632            .iter()
1633            .rev()
1634            .find(|definition| definition.name.as_ref() == name)
1635        {
1636            if stack.iter().any(|active| active == name) {
1637                return Err(crate::Error::CircularReference(name.to_string()));
1638            }
1639            stack.push(name.to_string());
1640            let expanded =
1641                expand_value_with_options(&definition.value.value, strings, options, stack);
1642            stack.pop();
1643            return expanded;
1644        }
1645    }
1646
1647    if options.expand_months {
1648        if let Some(month) = month_expansion(name) {
1649            return Ok(month.to_string());
1650        }
1651    }
1652
1653    match options.unresolved_variables {
1654        UnresolvedVariablePolicy::Preserve => Ok(name.to_string()),
1655        UnresolvedVariablePolicy::Placeholder => Ok(format!("{{undefined:{name}}}")),
1656        UnresolvedVariablePolicy::Error => Err(crate::Error::UndefinedVariable(name.to_string())),
1657    }
1658}
1659
1660fn month_expansion(name: &str) -> Option<&'static str> {
1661    if name.len() != 3 {
1662        return None;
1663    }
1664
1665    match name.to_ascii_lowercase().as_str() {
1666        "jan" => Some("January"),
1667        "feb" => Some("February"),
1668        "mar" => Some("March"),
1669        "apr" => Some("April"),
1670        "may" => Some("May"),
1671        "jun" => Some("June"),
1672        "jul" => Some("July"),
1673        "aug" => Some("August"),
1674        "sep" => Some("September"),
1675        "oct" => Some("October"),
1676        "nov" => Some("November"),
1677        "dec" => Some("December"),
1678        _ => None,
1679    }
1680}
1681
1682#[derive(Debug, Clone)]
1683struct EntryLocations {
1684    entry_type: (usize, usize),
1685    key: (usize, usize),
1686    delimiter: EntryDelimiter,
1687    fields: Vec<FieldLocations>,
1688}
1689
1690#[derive(Debug, Clone, Copy)]
1691struct FieldLocations {
1692    whole: (usize, usize),
1693    name: (usize, usize),
1694    value: (usize, usize),
1695    value_delimiter: ValueDelimiter,
1696}
1697
1698#[derive(Debug, Clone)]
1699struct FailureClassification {
1700    code: DiagnosticCode,
1701    range: (usize, usize),
1702}
1703
1704fn diagnostic_for_failed_block(
1705    index: usize,
1706    failed: &FailedBlock<'_>,
1707    source_map: Option<&SourceMap<'_>>,
1708) -> Diagnostic {
1709    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
1710    diagnostic_for_raw_failure(
1711        index,
1712        &failed.raw,
1713        failed.error.clone(),
1714        failed.source,
1715        source_map,
1716        absolute_start,
1717        None,
1718    )
1719}
1720
1721fn diagnostic_for_raw_failure(
1722    index: usize,
1723    raw: &str,
1724    fallback_message: String,
1725    fallback_source: Option<SourceSpan>,
1726    source_map: Option<&SourceMap<'_>>,
1727    absolute_start: usize,
1728    fallback_snippet: Option<String>,
1729) -> Diagnostic {
1730    let classification = classify_failure(raw);
1731    let source = source_map
1732        .map(|map| {
1733            map.span(
1734                absolute_start + classification.range.0,
1735                absolute_start + classification.range.1,
1736            )
1737        })
1738        .or(fallback_source);
1739    let snippet = source
1740        .and_then(|span| source_map.and_then(|map| map.snippet(span, 160)))
1741        .or(fallback_snippet)
1742        .or_else(|| Some(raw.chars().take(160).collect()));
1743
1744    let mut diagnostic = Diagnostic::error(
1745        classification.code.clone(),
1746        diagnostic_message(&classification.code, fallback_message),
1747        DiagnosticTarget::FailedBlock(index),
1748        source,
1749    );
1750    diagnostic.snippet = snippet;
1751    diagnostic
1752}
1753
1754fn recover_partial_entry<'a>(
1755    failed: &ParsedFailedBlock<'a>,
1756    source_map: &SourceMap<'a>,
1757    entry_index: usize,
1758    preserve_raw: bool,
1759) -> Option<ParsedEntry<'a>> {
1760    let raw: &'a str = match &failed.raw {
1761        Cow::Borrowed(raw) => raw,
1762        Cow::Owned(_) => return None,
1763    };
1764    let absolute_start = failed.source?.byte_start;
1765    let header = parse_partial_header(raw, source_map, absolute_start)?;
1766    let fields = recover_partial_fields(
1767        raw,
1768        source_map,
1769        absolute_start,
1770        header.field_start,
1771        header.closing,
1772        preserve_raw,
1773    );
1774    if fields.is_empty() {
1775        return None;
1776    }
1777
1778    let diagnostic = diagnostic_for_partial_entry(entry_index, failed, source_map);
1779
1780    Some(ParsedEntry {
1781        ty: header.ty,
1782        key: header.key,
1783        fields,
1784        status: ParsedEntryStatus::Partial,
1785        source: failed.source,
1786        entry_type_source: header.entry_type_source,
1787        key_source: header.key_source,
1788        delimiter: Some(header.delimiter),
1789        raw: preserve_raw.then(|| failed.raw.clone()),
1790        diagnostics: vec![diagnostic],
1791    })
1792}
1793
1794pub(crate) fn recover_partial_stream_entry<'a>(
1795    failed: &ParsedFailedBlock<'a>,
1796    source_map: &SourceMap<'a>,
1797    entry_index: usize,
1798    preserve_raw: bool,
1799) -> Option<ParsedEntry<'a>> {
1800    recover_partial_entry(failed, source_map, entry_index, preserve_raw)
1801}
1802
1803struct PartialHeader<'a> {
1804    ty: EntryType<'a>,
1805    key: Cow<'a, str>,
1806    entry_type_source: Option<SourceSpan>,
1807    key_source: Option<SourceSpan>,
1808    delimiter: EntryDelimiter,
1809    field_start: usize,
1810    closing: u8,
1811}
1812
1813fn parse_partial_header<'a>(
1814    raw: &'a str,
1815    source_map: &SourceMap<'a>,
1816    absolute_start: usize,
1817) -> Option<PartialHeader<'a>> {
1818    let bytes = raw.as_bytes();
1819    let mut pos = bytes.iter().position(|byte| *byte == b'@')? + 1;
1820
1821    let entry_type_start = pos;
1822    pos += scan_identifier(&bytes[pos..]);
1823    if pos == entry_type_start {
1824        return None;
1825    }
1826    let ty = EntryType::parse(&raw[entry_type_start..pos]);
1827    let entry_type_source =
1828        Some(source_map.span(absolute_start + entry_type_start, absolute_start + pos));
1829
1830    pos = skip_ascii_whitespace(bytes, pos);
1831    let (delimiter, closing) = match *bytes.get(pos)? {
1832        b'{' => (EntryDelimiter::Braces, b'}'),
1833        b'(' => (EntryDelimiter::Parentheses, b')'),
1834        _ => return None,
1835    };
1836    pos += 1;
1837    pos = skip_ascii_whitespace(bytes, pos);
1838
1839    let key_start = pos;
1840    pos += scan_identifier(&bytes[pos..]);
1841    if pos == key_start {
1842        return None;
1843    }
1844    let key = Cow::Borrowed(&raw[key_start..pos]);
1845    let key_source = Some(source_map.span(absolute_start + key_start, absolute_start + pos));
1846
1847    pos = skip_ascii_whitespace(bytes, pos);
1848    if bytes.get(pos) != Some(&b',') {
1849        return None;
1850    }
1851
1852    Some(PartialHeader {
1853        ty,
1854        key,
1855        entry_type_source,
1856        key_source,
1857        delimiter,
1858        field_start: pos + 1,
1859        closing,
1860    })
1861}
1862
1863fn recover_partial_fields<'a>(
1864    raw: &'a str,
1865    source_map: &SourceMap<'a>,
1866    absolute_start: usize,
1867    mut pos: usize,
1868    closing: u8,
1869    preserve_raw: bool,
1870) -> Vec<ParsedField<'a>> {
1871    let bytes = raw.as_bytes();
1872    let mut fields = Vec::new();
1873
1874    loop {
1875        pos = skip_ascii_whitespace(bytes, pos);
1876        let Some(&byte) = bytes.get(pos) else {
1877            break;
1878        };
1879        if byte == closing || byte == b'@' {
1880            break;
1881        }
1882
1883        let field_start = pos;
1884        let name_start = pos;
1885        pos += scan_identifier(&bytes[pos..]);
1886        if pos == name_start {
1887            break;
1888        }
1889        let name_end = pos;
1890        let name = Cow::Borrowed(&raw[name_start..name_end]);
1891
1892        pos = skip_ascii_whitespace(bytes, pos);
1893        if bytes.get(pos) != Some(&b'=') {
1894            break;
1895        }
1896        pos += 1;
1897        pos = skip_ascii_whitespace(bytes, pos);
1898
1899        let value_start = pos;
1900        let tail = &raw[value_start..];
1901        let mut value_input = tail;
1902        let Ok(value) = crate::parser::value::parse_value_field(&mut value_input) else {
1903            break;
1904        };
1905        let consumed = tail.len() - value_input.len();
1906        let value_end = trim_ascii_whitespace_end(bytes, value_start, value_start + consumed);
1907        let boundary = value_start + consumed;
1908        let field_end = match bytes.get(boundary) {
1909            Some(b',') => boundary + 1,
1910            Some(byte) if *byte == closing => boundary,
1911            Some(_) | None => boundary,
1912        };
1913
1914        let field_source =
1915            source_map.span(absolute_start + field_start, absolute_start + field_end);
1916        let value_source =
1917            source_map.span(absolute_start + value_start, absolute_start + value_end);
1918        fields.push(ParsedField {
1919            name,
1920            value: ParsedValue {
1921                value,
1922                raw: preserve_raw.then(|| Cow::Borrowed(&raw[value_start..value_end])),
1923                source: Some(value_source),
1924                expanded: None,
1925                delimiter: Some(value_delimiter(&raw[value_start..value_end])),
1926            },
1927            raw: preserve_raw.then(|| Cow::Borrowed(&raw[field_start..field_end])),
1928            source: Some(field_source),
1929            name_source: Some(
1930                source_map.span(absolute_start + name_start, absolute_start + name_end),
1931            ),
1932            value_source: Some(value_source),
1933        });
1934
1935        match bytes.get(boundary) {
1936            Some(b',') => pos = boundary + 1,
1937            Some(byte) if *byte == closing => break,
1938            _ => break,
1939        }
1940    }
1941
1942    fields
1943}
1944
1945fn diagnostic_for_partial_entry(
1946    entry_index: usize,
1947    failed: &ParsedFailedBlock<'_>,
1948    source_map: &SourceMap<'_>,
1949) -> Diagnostic {
1950    let absolute_start = failed.source.map_or(0, |source| source.byte_start);
1951    let mut diagnostic = diagnostic_for_raw_failure(
1952        entry_index,
1953        &failed.raw,
1954        failed.error.clone(),
1955        failed.source,
1956        Some(source_map),
1957        absolute_start,
1958        None,
1959    );
1960    diagnostic.target = DiagnosticTarget::Entry(entry_index);
1961    diagnostic
1962}
1963
1964fn diagnostic_message(code: &DiagnosticCode, fallback: String) -> String {
1965    match code.as_str() {
1966        "missing-entry-key" => "missing citation key".to_string(),
1967        "missing-field-separator" => "missing field separator".to_string(),
1968        "expected-field-name" => "expected field name".to_string(),
1969        "empty-field-value" => "empty field value".to_string(),
1970        "expected-value-atom" => "expected value atom".to_string(),
1971        "bad-field-boundary" => "expected comma or entry close after field value".to_string(),
1972        "bad-value-boundary" => "expected value after concatenation operator".to_string(),
1973        "unclosed-entry" => "entry ended before its closing delimiter".to_string(),
1974        "unclosed-braced-value" => "braced value ended before its closing brace".to_string(),
1975        "unclosed-quoted-value" => "quoted value ended before its closing quote".to_string(),
1976        _ => fallback,
1977    }
1978}
1979
1980fn classify_failure(raw: &str) -> FailureClassification {
1981    classify_failure_inner(raw).unwrap_or_else(|| FailureClassification {
1982        code: DiagnosticCode::PARSE_ERROR,
1983        range: empty_range(0),
1984    })
1985}
1986
1987fn classify_failure_inner(raw: &str) -> Option<FailureClassification> {
1988    let bytes = raw.as_bytes();
1989    let header = match parse_failure_header(bytes)? {
1990        Ok(header) => header,
1991        Err(classification) => return Some(classification),
1992    };
1993
1994    classify_failure_fields(bytes, header.pos, header.closing)
1995}
1996
1997#[derive(Debug, Clone, Copy)]
1998struct FailureHeader {
1999    pos: usize,
2000    closing: u8,
2001}
2002
2003fn parse_failure_header(bytes: &[u8]) -> Option<Result<FailureHeader, FailureClassification>> {
2004    let mut pos = bytes.iter().position(|byte| *byte == b'@')?;
2005    pos += 1;
2006    pos += scan_identifier(&bytes[pos..]);
2007    pos = skip_ascii_whitespace(bytes, pos);
2008
2009    let opening = *bytes.get(pos)?;
2010    let closing = match opening {
2011        b'{' => b'}',
2012        b'(' => b')',
2013        _ => {
2014            return Some(Err(classification(
2015                DiagnosticCode::UNCLOSED_ENTRY,
2016                pos,
2017                bytes.len(),
2018            )));
2019        }
2020    };
2021    pos += 1;
2022    pos = skip_ascii_whitespace(bytes, pos);
2023
2024    let key_len = scan_identifier(&bytes[pos..]);
2025    if key_len == 0 {
2026        return Some(Err(classification(
2027            DiagnosticCode::MISSING_ENTRY_KEY,
2028            pos,
2029            bytes.len(),
2030        )));
2031    }
2032    pos += key_len;
2033    pos = skip_ascii_whitespace(bytes, pos);
2034    if bytes.get(pos) != Some(&b',') {
2035        return Some(Err(classification(
2036            DiagnosticCode::MISSING_FIELD_SEPARATOR,
2037            pos,
2038            bytes.len(),
2039        )));
2040    }
2041    pos += 1;
2042
2043    Some(Ok(FailureHeader { pos, closing }))
2044}
2045
2046fn classify_failure_fields(
2047    bytes: &[u8],
2048    mut pos: usize,
2049    closing: u8,
2050) -> Option<FailureClassification> {
2051    loop {
2052        pos = skip_ascii_whitespace(bytes, pos);
2053        let Some(&byte) = bytes.get(pos) else {
2054            return Some(classification(
2055                DiagnosticCode::UNCLOSED_ENTRY,
2056                pos,
2057                bytes.len(),
2058            ));
2059        };
2060        if byte == closing {
2061            return None;
2062        }
2063        if byte == b'@' {
2064            return Some(classification(
2065                DiagnosticCode::UNCLOSED_ENTRY,
2066                pos,
2067                bytes.len(),
2068            ));
2069        }
2070
2071        let field_name_len = scan_identifier(&bytes[pos..]);
2072        if field_name_len == 0 {
2073            return Some(classification(
2074                DiagnosticCode::EXPECTED_FIELD_NAME,
2075                pos,
2076                bytes.len(),
2077            ));
2078        }
2079        pos += field_name_len;
2080        pos = skip_ascii_whitespace(bytes, pos);
2081        if bytes.get(pos) != Some(&b'=') {
2082            return Some(classification(
2083                DiagnosticCode::MISSING_FIELD_SEPARATOR,
2084                pos,
2085                bytes.len(),
2086            ));
2087        }
2088        pos += 1;
2089        pos = skip_ascii_whitespace(bytes, pos);
2090
2091        let Some(&value_start) = bytes.get(pos) else {
2092            return Some(classification(
2093                DiagnosticCode::EMPTY_FIELD_VALUE,
2094                pos,
2095                bytes.len(),
2096            ));
2097        };
2098        if value_start == b',' || value_start == closing {
2099            return Some(classification(
2100                DiagnosticCode::EMPTY_FIELD_VALUE,
2101                pos,
2102                bytes.len(),
2103            ));
2104        }
2105        if value_start == b'#' {
2106            return Some(classification(
2107                DiagnosticCode::EXPECTED_VALUE_ATOM,
2108                pos,
2109                bytes.len(),
2110            ));
2111        }
2112
2113        match scan_value_sequence(bytes, pos, closing) {
2114            Ok(next_pos) => pos = next_pos,
2115            Err(classification) => return Some(classification),
2116        }
2117    }
2118}
2119
2120fn scan_value_sequence(
2121    bytes: &[u8],
2122    mut pos: usize,
2123    closing: u8,
2124) -> Result<usize, FailureClassification> {
2125    loop {
2126        pos = skip_ascii_whitespace(bytes, pos);
2127        let atom_start = pos;
2128        let Some(&byte) = bytes.get(pos) else {
2129            return Err(classification(
2130                DiagnosticCode::EXPECTED_VALUE_ATOM,
2131                pos,
2132                bytes.len(),
2133            ));
2134        };
2135
2136        match byte {
2137            b'"' => {
2138                pos = skip_quoted_checked(bytes, pos + 1).ok_or_else(|| {
2139                    classification(
2140                        DiagnosticCode::UNCLOSED_QUOTED_VALUE,
2141                        atom_start,
2142                        bytes.len(),
2143                    )
2144                })?;
2145            }
2146            b'{' => {
2147                pos = skip_braced_checked(bytes, pos + 1).ok_or_else(|| {
2148                    classification(
2149                        DiagnosticCode::UNCLOSED_BRACED_VALUE,
2150                        atom_start,
2151                        bytes.len(),
2152                    )
2153                })?;
2154            }
2155            b',' => {
2156                return Err(classification(
2157                    DiagnosticCode::EMPTY_FIELD_VALUE,
2158                    pos,
2159                    bytes.len(),
2160                ));
2161            }
2162            b if b == closing => {
2163                return Err(classification(
2164                    DiagnosticCode::EMPTY_FIELD_VALUE,
2165                    pos,
2166                    bytes.len(),
2167                ));
2168            }
2169            b'#' => {
2170                return Err(classification(
2171                    DiagnosticCode::EXPECTED_VALUE_ATOM,
2172                    pos,
2173                    bytes.len(),
2174                ));
2175            }
2176            _ => {
2177                let identifier_len = scan_identifier(&bytes[pos..]);
2178                if identifier_len == 0 {
2179                    return Err(classification(
2180                        DiagnosticCode::EXPECTED_VALUE_ATOM,
2181                        pos,
2182                        bytes.len(),
2183                    ));
2184                }
2185                pos += identifier_len;
2186            }
2187        }
2188
2189        pos = skip_ascii_whitespace(bytes, pos);
2190        let Some(&boundary) = bytes.get(pos) else {
2191            return Err(classification(
2192                DiagnosticCode::UNCLOSED_ENTRY,
2193                pos,
2194                bytes.len(),
2195            ));
2196        };
2197
2198        match boundary {
2199            b'#' => {
2200                let hash = pos;
2201                pos += 1;
2202                pos = skip_ascii_whitespace(bytes, pos);
2203                if matches!(bytes.get(pos), None | Some(b',' | b'#'))
2204                    || bytes.get(pos) == Some(&closing)
2205                {
2206                    return Err(classification(
2207                        DiagnosticCode::BAD_VALUE_BOUNDARY,
2208                        hash,
2209                        bytes.len(),
2210                    ));
2211                }
2212            }
2213            b',' => return Ok(pos + 1),
2214            b if b == closing => return Ok(pos),
2215            _ => {
2216                return Err(classification(
2217                    DiagnosticCode::BAD_FIELD_BOUNDARY,
2218                    pos,
2219                    bytes.len(),
2220                ));
2221            }
2222        }
2223    }
2224}
2225
2226fn classification(code: DiagnosticCode, pos: usize, len: usize) -> FailureClassification {
2227    FailureClassification {
2228        code,
2229        range: single_byte_range(pos, len),
2230    }
2231}
2232
2233const fn empty_range(pos: usize) -> (usize, usize) {
2234    (pos, pos)
2235}
2236
2237fn single_byte_range(pos: usize, len: usize) -> (usize, usize) {
2238    let start = pos.min(len);
2239    (start, (start + 1).min(len))
2240}
2241
2242fn locate_entry(raw: &str, absolute_start: usize, field_count: usize) -> Option<EntryLocations> {
2243    let bytes = raw.as_bytes();
2244    let mut pos = 0;
2245    if bytes.get(pos) != Some(&b'@') {
2246        return None;
2247    }
2248    pos += 1;
2249
2250    let entry_type_start = pos;
2251    pos += scan_identifier(&bytes[pos..]);
2252    if pos == entry_type_start {
2253        return None;
2254    }
2255    let entry_type = (absolute_start + entry_type_start, absolute_start + pos);
2256
2257    pos = skip_ascii_whitespace(bytes, pos);
2258    let opening = *bytes.get(pos)?;
2259    let (delimiter, closing) = match opening {
2260        b'{' => (EntryDelimiter::Braces, b'}'),
2261        b'(' => (EntryDelimiter::Parentheses, b')'),
2262        _ => return None,
2263    };
2264    pos += 1;
2265    pos = skip_ascii_whitespace(bytes, pos);
2266
2267    let key_start = pos;
2268    pos += scan_identifier(&bytes[pos..]);
2269    if pos == key_start {
2270        return None;
2271    }
2272    let key = (absolute_start + key_start, absolute_start + pos);
2273
2274    pos = skip_ascii_whitespace(bytes, pos);
2275    if bytes.get(pos) != Some(&b',') {
2276        return Some(EntryLocations {
2277            entry_type,
2278            key,
2279            delimiter,
2280            fields: Vec::new(),
2281        });
2282    }
2283    pos += 1;
2284
2285    let mut fields = Vec::with_capacity(field_count);
2286    while fields.len() < field_count {
2287        pos = skip_ascii_whitespace(bytes, pos);
2288        if bytes.get(pos) == Some(&closing) || pos >= bytes.len() {
2289            break;
2290        }
2291
2292        let field_start = pos;
2293        let name_start = pos;
2294        pos += scan_identifier(&bytes[pos..]);
2295        if pos == name_start {
2296            break;
2297        }
2298        let name_end = pos;
2299
2300        pos = skip_ascii_whitespace(bytes, pos);
2301        if bytes.get(pos) != Some(&b'=') {
2302            break;
2303        }
2304        pos += 1;
2305        pos = skip_ascii_whitespace(bytes, pos);
2306
2307        let value_start = pos;
2308        let boundary = find_value_boundary(bytes, pos, closing);
2309        let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2310        let mut whole_end = value_end;
2311        pos = boundary;
2312        if bytes.get(pos) == Some(&b',') {
2313            whole_end = pos + 1;
2314            pos += 1;
2315        }
2316
2317        fields.push(FieldLocations {
2318            whole: (absolute_start + field_start, absolute_start + whole_end),
2319            name: (absolute_start + name_start, absolute_start + name_end),
2320            value: (absolute_start + value_start, absolute_start + value_end),
2321            value_delimiter: value_delimiter(&raw[value_start..value_end]),
2322        });
2323    }
2324
2325    Some(EntryLocations {
2326        entry_type,
2327        key,
2328        delimiter,
2329        fields,
2330    })
2331}
2332
2333fn value_delimiter(raw_value: &str) -> ValueDelimiter {
2334    let trimmed = raw_value.trim_start();
2335    if has_top_level_concat(trimmed.as_bytes()) {
2336        return ValueDelimiter::Concatenation;
2337    }
2338
2339    match trimmed.as_bytes().first() {
2340        Some(b'{') => ValueDelimiter::Braces,
2341        Some(b'"') => ValueDelimiter::Quotes,
2342        _ => ValueDelimiter::Bare,
2343    }
2344}
2345
2346fn locate_definition_value(raw: &str) -> Option<&str> {
2347    let bytes = raw.as_bytes();
2348    let equals = bytes.iter().position(|byte| *byte == b'=')?;
2349    let value_start = skip_ascii_whitespace(bytes, equals + 1);
2350    let closing = enclosing_close_byte(bytes)?;
2351    let boundary = find_value_boundary(bytes, value_start, closing);
2352    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2353    raw.get(value_start..value_end)
2354}
2355
2356fn locate_preamble_value(raw: &str) -> Option<&str> {
2357    let bytes = raw.as_bytes();
2358    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2359    let closing = match bytes[opening] {
2360        b'{' => b'}',
2361        b'(' => b')',
2362        _ => return None,
2363    };
2364    let value_start = skip_ascii_whitespace(bytes, opening + 1);
2365    let boundary = find_value_boundary(bytes, value_start, closing);
2366    let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2367    raw.get(value_start..value_end)
2368}
2369
2370fn enclosing_close_byte(bytes: &[u8]) -> Option<u8> {
2371    let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2372    match bytes[opening] {
2373        b'{' => Some(b'}'),
2374        b'(' => Some(b')'),
2375        _ => None,
2376    }
2377}
2378
2379fn has_top_level_concat(bytes: &[u8]) -> bool {
2380    let mut pos = 0;
2381    while let Some(&byte) = bytes.get(pos) {
2382        match byte {
2383            b'{' => pos = skip_braced(bytes, pos + 1),
2384            b'"' => pos = skip_quoted(bytes, pos + 1),
2385            b'#' => return true,
2386            _ => pos += 1,
2387        }
2388    }
2389    false
2390}
2391
2392fn skip_ascii_whitespace(bytes: &[u8], mut pos: usize) -> usize {
2393    while matches!(bytes.get(pos), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2394        pos += 1;
2395    }
2396    pos
2397}
2398
2399fn trim_ascii_whitespace_end(bytes: &[u8], start: usize, mut end: usize) -> usize {
2400    while end > start && matches!(bytes.get(end - 1), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2401        end -= 1;
2402    }
2403    end
2404}
2405
2406fn scan_identifier(bytes: &[u8]) -> usize {
2407    bytes
2408        .iter()
2409        .position(|byte| !is_identifier_byte(*byte))
2410        .unwrap_or(bytes.len())
2411}
2412
2413const fn is_identifier_byte(byte: u8) -> bool {
2414    matches!(
2415        byte,
2416        b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
2417    )
2418}
2419
2420fn find_value_boundary(bytes: &[u8], mut pos: usize, closing: u8) -> usize {
2421    while let Some(&byte) = bytes.get(pos) {
2422        match byte {
2423            b'{' => pos = skip_braced(bytes, pos + 1),
2424            b'"' => pos = skip_quoted(bytes, pos + 1),
2425            b',' => break,
2426            b if b == closing => break,
2427            _ => pos += 1,
2428        }
2429    }
2430    pos
2431}
2432
2433fn skip_braced(bytes: &[u8], mut pos: usize) -> usize {
2434    let mut depth = 0usize;
2435    while let Some(&byte) = bytes.get(pos) {
2436        match byte {
2437            b'\\' => pos = (pos + 2).min(bytes.len()),
2438            b'{' => {
2439                depth += 1;
2440                pos += 1;
2441            }
2442            b'}' if depth == 0 => return pos + 1,
2443            b'}' => {
2444                depth -= 1;
2445                pos += 1;
2446            }
2447            _ => pos += 1,
2448        }
2449    }
2450    pos
2451}
2452
2453fn skip_braced_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2454    let mut depth = 0usize;
2455    while let Some(&byte) = bytes.get(pos) {
2456        match byte {
2457            b'\\' => pos = (pos + 2).min(bytes.len()),
2458            b'{' => {
2459                depth += 1;
2460                pos += 1;
2461            }
2462            b'}' if depth == 0 => return Some(pos + 1),
2463            b'}' => {
2464                depth -= 1;
2465                pos += 1;
2466            }
2467            _ => pos += 1,
2468        }
2469    }
2470    None
2471}
2472
2473fn skip_quoted(bytes: &[u8], mut pos: usize) -> usize {
2474    while let Some(&byte) = bytes.get(pos) {
2475        match byte {
2476            b'\\' => pos = (pos + 2).min(bytes.len()),
2477            b'"' => return pos + 1,
2478            _ => pos += 1,
2479        }
2480    }
2481    pos
2482}
2483
2484fn skip_quoted_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2485    while let Some(&byte) = bytes.get(pos) {
2486        match byte {
2487            b'\\' => pos = (pos + 2).min(bytes.len()),
2488            b'"' => return Some(pos + 1),
2489            _ => pos += 1,
2490        }
2491    }
2492    None
2493}