1use crate::library::BlockKind;
8use crate::library::RawBuildItem;
9use crate::model::normalize_text_projection;
10use crate::source::SourceCursor;
11use crate::{
12 normalize_doi, Comment, DateParseError, DateParts, Entry, EntryType, FailedBlock, Field,
13 Library, PersonName, Preamble, ResourceField, SourceId, SourceMap, SourceSpan,
14 StringDefinition, Value,
15};
16use std::borrow::Cow;
17use std::fmt;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ParseStatus {
22 Ok,
24 Partial,
26 Failed,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum DiagnosticSeverity {
33 Error,
35 Warning,
37 Info,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct DiagnosticCode(Cow<'static, str>);
50
51impl DiagnosticCode {
52 pub const PARSE_ERROR: Self = Self(Cow::Borrowed("parse-error"));
54 pub const MISSING_ENTRY_KEY: Self = Self(Cow::Borrowed("missing-entry-key"));
56 pub const MISSING_FIELD_SEPARATOR: Self = Self(Cow::Borrowed("missing-field-separator"));
58 pub const EXPECTED_FIELD_NAME: Self = Self(Cow::Borrowed("expected-field-name"));
60 pub const EMPTY_FIELD_VALUE: Self = Self(Cow::Borrowed("empty-field-value"));
62 pub const EXPECTED_VALUE_ATOM: Self = Self(Cow::Borrowed("expected-value-atom"));
64 pub const BAD_FIELD_BOUNDARY: Self = Self(Cow::Borrowed("bad-field-boundary"));
66 pub const BAD_VALUE_BOUNDARY: Self = Self(Cow::Borrowed("bad-value-boundary"));
68 pub const UNCLOSED_ENTRY: Self = Self(Cow::Borrowed("unclosed-entry"));
70 pub const UNCLOSED_BRACED_VALUE: Self = Self(Cow::Borrowed("unclosed-braced-value"));
72 pub const UNCLOSED_QUOTED_VALUE: Self = Self(Cow::Borrowed("unclosed-quoted-value"));
74
75 #[must_use]
77 pub const fn borrowed(code: &'static str) -> Self {
78 Self(Cow::Borrowed(code))
79 }
80
81 #[must_use]
83 pub fn custom(code: impl Into<String>) -> Self {
84 Self(Cow::Owned(code.into()))
85 }
86
87 #[must_use]
89 pub fn as_str(&self) -> &str {
90 &self.0
91 }
92}
93
94impl fmt::Display for DiagnosticCode {
95 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96 f.write_str(self.as_str())
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
102pub enum DiagnosticTarget {
103 File,
105 Block(usize),
107 Entry(usize),
109 Field {
111 entry: usize,
113 field: usize,
115 },
116 Value {
118 entry: usize,
120 field: usize,
122 },
123 FailedBlock(usize),
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Diagnostic {
130 pub severity: DiagnosticSeverity,
132 pub code: DiagnosticCode,
134 pub message: String,
136 pub target: DiagnosticTarget,
138 pub source: Option<SourceSpan>,
140 pub snippet: Option<String>,
142}
143
144impl Diagnostic {
145 #[must_use]
147 pub fn error(
148 code: DiagnosticCode,
149 message: impl Into<String>,
150 target: DiagnosticTarget,
151 source: Option<SourceSpan>,
152 ) -> Self {
153 Self {
154 severity: DiagnosticSeverity::Error,
155 code,
156 message: message.into(),
157 target,
158 source,
159 snippet: None,
160 }
161 }
162
163 #[must_use]
165 pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
166 self.snippet = Some(snippet.into());
167 self
168 }
169}
170
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub struct ParseSummary {
174 pub status: ParseStatus,
176 pub entries: usize,
178 pub warnings: usize,
180 pub errors: usize,
182 pub infos: usize,
184 pub failed_blocks: usize,
186 pub recovered_blocks: usize,
188}
189
190#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct ParsedSource<'a> {
193 pub id: SourceId,
195 pub name: Option<Cow<'a, str>>,
197}
198
199impl ParsedSource<'_> {
200 #[must_use]
202 pub const fn is_anonymous(&self) -> bool {
203 self.name.is_none()
204 }
205
206 #[must_use]
208 pub fn into_owned(self) -> ParsedSource<'static> {
209 ParsedSource {
210 id: self.id,
211 name: self.name.map(|name| Cow::Owned(name.into_owned())),
212 }
213 }
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum ParsedBlock {
219 Entry(usize),
221 String(usize),
223 Preamble(usize),
225 Comment(usize),
227 Failed(usize),
229}
230
231#[derive(Debug, Clone, PartialEq)]
233pub enum ParseEvent<'a> {
234 Entry(ParsedEntry<'a>),
236 String(ParsedString<'a>),
238 Preamble(ParsedPreamble<'a>),
240 Comment(ParsedComment<'a>),
242 Failed(ParsedFailedBlock<'a>),
244 Diagnostic(Diagnostic),
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250pub enum ParseFlow {
251 Continue,
253 Stop,
255}
256
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub struct StreamingSummary {
260 pub status: ParseStatus,
262 pub entries: usize,
264 pub strings: usize,
266 pub preambles: usize,
268 pub comments: usize,
270 pub failed_blocks: usize,
272 pub warnings: usize,
274 pub errors: usize,
276 pub infos: usize,
278 pub recovered_blocks: usize,
280 pub stopped: bool,
282}
283
284impl Default for StreamingSummary {
285 fn default() -> Self {
286 Self {
287 status: ParseStatus::Ok,
288 entries: 0,
289 strings: 0,
290 preambles: 0,
291 comments: 0,
292 failed_blocks: 0,
293 warnings: 0,
294 errors: 0,
295 infos: 0,
296 recovered_blocks: 0,
297 stopped: false,
298 }
299 }
300}
301
302impl StreamingSummary {
303 pub(crate) fn finalize_status(&mut self) {
304 self.status = if self.errors == 0 {
305 ParseStatus::Ok
306 } else if self.entries == 0 && self.strings == 0 && self.preambles == 0 {
307 ParseStatus::Failed
308 } else {
309 ParseStatus::Partial
310 };
311 }
312
313 pub(crate) fn count_diagnostic(&mut self, diagnostic: &Diagnostic) {
314 match diagnostic.severity {
315 DiagnosticSeverity::Error => self.errors += 1,
316 DiagnosticSeverity::Warning => self.warnings += 1,
317 DiagnosticSeverity::Info => self.infos += 1,
318 }
319 }
320}
321
322#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ParsedEntryStatus {
325 Complete,
327 Partial,
329}
330
331#[derive(Debug, Clone, PartialEq)]
332pub(crate) enum RemovedFieldSources {
333 One(SourceSpan),
334 Many(Vec<SourceSpan>),
335}
336
337impl RemovedFieldSources {
338 fn push(&mut self, source: SourceSpan) {
339 match self {
340 Self::One(first) => {
341 *self = Self::Many(vec![*first, source]);
342 }
343 Self::Many(sources) => sources.push(source),
344 }
345 }
346
347 fn as_slice(&self) -> &[SourceSpan] {
348 match self {
349 Self::One(source) => std::slice::from_ref(source),
350 Self::Many(sources) => sources.as_slice(),
351 }
352 }
353}
354
355#[derive(Debug, Clone, Copy, PartialEq, Eq)]
357pub enum EntryDelimiter {
358 Braces,
360 Parentheses,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
366pub enum ValueDelimiter {
367 Braces,
369 Quotes,
371 Bare,
373 Concatenation,
375}
376
377#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379pub enum UnresolvedVariablePolicy {
380 Preserve,
382 Placeholder,
384 Error,
386}
387
388#[derive(Debug, Clone, Copy, PartialEq, Eq)]
390pub struct ExpansionOptions {
391 pub expand_strings: bool,
393 pub expand_months: bool,
395 pub unresolved_variables: UnresolvedVariablePolicy,
397}
398
399impl Default for ExpansionOptions {
400 fn default() -> Self {
401 Self {
402 expand_strings: true,
403 expand_months: true,
404 unresolved_variables: UnresolvedVariablePolicy::Error,
405 }
406 }
407}
408
409#[derive(Debug, Clone, PartialEq)]
411pub struct ParsedValue<'a> {
412 pub value: Value<'a>,
414 pub raw: Option<Cow<'a, str>>,
416 pub source: Option<SourceSpan>,
418 pub expanded: Option<Cow<'a, str>>,
420 pub delimiter: Option<ValueDelimiter>,
422}
423
424impl<'a> ParsedValue<'a> {
425 #[must_use]
427 pub const fn new(value: Value<'a>) -> Self {
428 Self {
429 value,
430 raw: None,
431 source: None,
432 expanded: None,
433 delimiter: None,
434 }
435 }
436
437 #[must_use]
439 pub fn into_value(self) -> Value<'a> {
440 self.value
441 }
442
443 #[must_use]
445 pub const fn parsed(&self) -> &Value<'a> {
446 &self.value
447 }
448
449 #[must_use]
451 pub fn raw_text(&self) -> Option<&str> {
452 self.raw.as_deref()
453 }
454
455 #[must_use]
457 pub fn expanded_text(&self) -> Option<&str> {
458 self.expanded.as_deref()
459 }
460
461 #[must_use]
463 pub fn plain_text(&self) -> String {
464 self.value.to_plain_string()
465 }
466
467 #[must_use]
469 pub fn lossy_text(&self) -> String {
470 self.value.to_lossy_string()
471 }
472
473 #[cfg(feature = "latex_to_unicode")]
475 #[must_use]
476 pub fn unicode_plain_text(&self) -> String {
477 self.value.to_unicode_plain_string()
478 }
479
480 #[must_use]
482 pub fn into_owned(self) -> ParsedValue<'static> {
483 ParsedValue {
484 value: self.value.into_owned(),
485 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
486 source: self.source,
487 expanded: self
488 .expanded
489 .map(|expanded| Cow::Owned(expanded.into_owned())),
490 delimiter: self.delimiter,
491 }
492 }
493
494 pub(crate) fn from_owned_value(
495 value: Value<'a>,
496 source: Option<SourceSpan>,
497 delimiter: Option<ValueDelimiter>,
498 ) -> ParsedValue<'static> {
499 ParsedValue {
500 value: value.into_owned(),
501 raw: None,
502 source,
503 expanded: None,
504 delimiter,
505 }
506 }
507}
508
509#[derive(Debug, Clone, PartialEq)]
511pub struct ParsedField<'a> {
512 pub name: Cow<'a, str>,
514 pub value: ParsedValue<'a>,
516 pub raw: Option<Cow<'a, str>>,
518 pub source: Option<SourceSpan>,
520 pub name_source: Option<SourceSpan>,
522 pub value_source: Option<SourceSpan>,
524}
525
526impl<'a> ParsedField<'a> {
527 #[must_use]
529 pub fn from_field(field: Field<'a>) -> Self {
530 Self {
531 name: field.name,
532 value: ParsedValue::new(field.value),
533 raw: None,
534 source: None,
535 name_source: None,
536 value_source: None,
537 }
538 }
539
540 #[must_use]
542 pub fn into_field(self) -> Field<'a> {
543 Field {
544 name: self.name,
545 value: self.value.into_value(),
546 }
547 }
548
549 #[must_use]
551 pub fn into_owned(self) -> ParsedField<'static> {
552 ParsedField {
553 name: owned_field_name(self.name),
554 value: self.value.into_owned(),
555 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
556 source: self.source,
557 name_source: self.name_source,
558 value_source: self.value_source,
559 }
560 }
561
562 pub(crate) fn from_owned_field(field: Field<'a>) -> ParsedField<'static> {
563 ParsedField {
564 name: owned_field_name(field.name),
565 value: ParsedValue::from_owned_value(field.value, None, None),
566 raw: None,
567 source: None,
568 name_source: None,
569 value_source: None,
570 }
571 }
572}
573
574#[derive(Debug, Clone, PartialEq)]
576pub struct ParsedEntry<'a> {
577 pub ty: EntryType<'a>,
579 pub key: Cow<'a, str>,
581 pub fields: Vec<ParsedField<'a>>,
583 pub status: ParsedEntryStatus,
585 pub source: Option<SourceSpan>,
587 pub entry_type_source: Option<SourceSpan>,
589 pub key_source: Option<SourceSpan>,
591 pub delimiter: Option<EntryDelimiter>,
593 pub raw: Option<Cow<'a, str>>,
595 pub(crate) removed_field_sources: Option<Box<RemovedFieldSources>>,
596 pub diagnostics: Vec<Diagnostic>,
598}
599
600impl<'a> ParsedEntry<'a> {
601 #[must_use]
603 pub fn from_entry(entry: Entry<'a>, source: Option<SourceSpan>) -> Self {
604 Self {
605 ty: entry.ty,
606 key: entry.key,
607 fields: entry
608 .fields
609 .into_iter()
610 .map(ParsedField::from_field)
611 .collect(),
612 status: ParsedEntryStatus::Complete,
613 source,
614 entry_type_source: None,
615 key_source: None,
616 delimiter: None,
617 raw: None,
618 removed_field_sources: None,
619 diagnostics: Vec::new(),
620 }
621 }
622
623 pub(crate) fn from_entry_owned(
624 entry: Entry<'a>,
625 source: Option<SourceSpan>,
626 ) -> ParsedEntry<'static> {
627 ParsedEntry {
628 ty: entry.ty.into_owned(),
629 key: Cow::Owned(entry.key.into_owned()),
630 fields: entry
631 .fields
632 .into_iter()
633 .map(ParsedField::from_owned_field)
634 .collect(),
635 status: ParsedEntryStatus::Complete,
636 source,
637 entry_type_source: None,
638 key_source: None,
639 delimiter: None,
640 raw: None,
641 removed_field_sources: None,
642 diagnostics: Vec::new(),
643 }
644 }
645
646 pub(crate) fn from_stream_entry(
647 entry: Entry<'a>,
648 source: SourceSpan,
649 raw: &'a str,
650 source_map: &SourceMap<'a>,
651 preserve_raw: bool,
652 ) -> Self {
653 let mut parsed = Self::from_entry(entry, Some(source));
654 parsed.apply_locations(raw, source_map, preserve_raw);
655 parsed
656 }
657
658 pub(crate) fn from_located_stream_entry_owned(
659 located: crate::parser::entry::LocatedEntry<'a>,
660 source: SourceSpan,
661 span_cursor: &mut SourceCursor<'_, 'a>,
662 ) -> ParsedEntry<'static> {
663 let entry = located.entry;
664 let entry_type_source = span_cursor.span(located.entry_type.0, located.entry_type.1);
665 let key_source = span_cursor.span(located.key.0, located.key.1);
666 let fields = entry
667 .fields
668 .into_iter()
669 .zip(located.fields)
670 .map(|(field, location)| {
671 let field_source = span_cursor.span(location.whole.0, location.whole.1);
672 let name_source = span_cursor.span(location.name.0, location.name.1);
673 let value_source = span_cursor.span(location.value.0, location.value.1);
674 ParsedField {
675 name: owned_field_name(field.name),
676 value: ParsedValue::from_owned_value(
677 field.value,
678 Some(value_source),
679 Some(location.value_delimiter),
680 ),
681 raw: None,
682 source: Some(field_source),
683 name_source: Some(name_source),
684 value_source: Some(value_source),
685 }
686 })
687 .collect();
688
689 ParsedEntry {
690 ty: entry.ty.into_owned(),
691 key: Cow::Owned(entry.key.into_owned()),
692 fields,
693 status: ParsedEntryStatus::Complete,
694 source: Some(source),
695 entry_type_source: Some(entry_type_source),
696 key_source: Some(key_source),
697 delimiter: Some(located.delimiter),
698 raw: None,
699 removed_field_sources: None,
700 diagnostics: Vec::new(),
701 }
702 }
703
704 fn apply_locations(&mut self, raw: &'a str, source_map: &SourceMap<'a>, preserve_raw: bool) {
705 let Some(entry_span) = self.source else {
706 return;
707 };
708 let Some(locations) = locate_entry(raw, entry_span.byte_start, self.fields.len()) else {
709 return;
710 };
711
712 self.entry_type_source =
713 Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
714 self.key_source = Some(source_map.span(locations.key.0, locations.key.1));
715 self.delimiter = Some(locations.delimiter);
716 if preserve_raw {
717 self.raw = Some(Cow::Borrowed(raw));
718 }
719
720 for (field, location) in self.fields.iter_mut().zip(locations.fields) {
721 field.source = Some(source_map.span(location.whole.0, location.whole.1));
722 field.name_source = Some(source_map.span(location.name.0, location.name.1));
723 field.value.source = Some(source_map.span(location.value.0, location.value.1));
724 field.value_source = field.value.source;
725 field.value.delimiter = Some(location.value_delimiter);
726
727 if preserve_raw {
728 if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
729 field.raw = Some(Cow::Borrowed(source));
730 }
731 if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
732 field.value.raw = Some(Cow::Borrowed(source));
733 }
734 }
735 }
736 }
737
738 #[must_use]
740 pub fn key(&self) -> &str {
741 &self.key
742 }
743
744 #[must_use]
746 pub fn into_entry(self) -> Entry<'a> {
747 Entry {
748 ty: self.ty,
749 key: self.key,
750 fields: self
751 .fields
752 .into_iter()
753 .map(ParsedField::into_field)
754 .collect(),
755 }
756 }
757
758 pub fn rename_key(&mut self, key: impl Into<Cow<'a, str>>) {
760 self.key = key.into();
761 }
762
763 pub fn set_entry_type(&mut self, ty: EntryType<'a>) {
765 self.ty = ty;
766 }
767
768 pub fn add_field(&mut self, name: impl Into<Cow<'a, str>>, value: Value<'a>) {
770 self.fields.push(ParsedField {
771 name: name.into(),
772 value: ParsedValue::new(value),
773 raw: None,
774 source: None,
775 name_source: None,
776 value_source: None,
777 });
778 }
779
780 #[must_use]
782 pub fn replace_field_value(&mut self, name: &str, value: Value<'a>) -> bool {
783 self.replace_field_value_at(name, 0, value)
784 }
785
786 #[must_use]
788 pub fn replace_field_value_at(
789 &mut self,
790 name: &str,
791 occurrence: usize,
792 value: Value<'a>,
793 ) -> bool {
794 let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
795 return false;
796 };
797 let field = &mut self.fields[index];
798 field.value.value = value;
799 field.value.raw = None;
800 field.raw = None;
801 field.value.expanded = None;
802 true
803 }
804
805 #[must_use]
807 pub fn rename_field(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> usize {
808 let new = new.into();
809 let mut renamed = 0;
810 for field in &mut self.fields {
811 if field.name == old {
812 field.name.clone_from(&new);
813 field.raw = None;
814 renamed += 1;
815 }
816 }
817 renamed
818 }
819
820 #[must_use]
822 pub fn remove_field(&mut self, name: &str) -> usize {
823 let mut removed = 0usize;
824 let mut index = 0usize;
825 while index < self.fields.len() {
826 if self.fields[index].name == name {
827 self.remove_field_index(index);
828 removed += 1;
829 } else {
830 index += 1;
831 }
832 }
833 removed
834 }
835
836 #[must_use]
838 pub fn remove_field_at(&mut self, name: &str, occurrence: usize) -> bool {
839 let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
840 return false;
841 };
842 self.remove_field_index(index);
843 true
844 }
845
846 #[must_use]
848 pub fn remove_field_by_index(&mut self, index: usize) -> bool {
849 if index >= self.fields.len() {
850 return false;
851 }
852 self.remove_field_index(index);
853 true
854 }
855
856 #[must_use]
858 pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
859 let mut removed = 0usize;
860 let mut index = 0usize;
861 while index < self.fields.len() {
862 if names
863 .iter()
864 .any(|name| self.fields[index].name.eq_ignore_ascii_case(name))
865 {
866 self.remove_field_index(index);
867 removed += 1;
868 } else {
869 index += 1;
870 }
871 }
872 removed
873 }
874
875 fn remove_field_index(&mut self, index: usize) {
876 let field = self.fields.remove(index);
877 if let Some(source) = field.source {
878 match &mut self.removed_field_sources {
879 Some(sources) => sources.push(source),
880 None => {
881 self.removed_field_sources = Some(Box::new(RemovedFieldSources::One(source)));
882 }
883 }
884 } else {
885 self.raw = None;
886 }
887 }
888
889 pub(crate) fn removed_field_sources(&self) -> &[SourceSpan] {
890 self.removed_field_sources
891 .as_deref()
892 .map_or(&[], RemovedFieldSources::as_slice)
893 }
894
895 #[must_use]
897 pub fn field_ignore_case(&self, name: &str) -> Option<&ParsedField<'a>> {
898 self.fields
899 .iter()
900 .find(|field| field.name.eq_ignore_ascii_case(name))
901 }
902
903 #[must_use]
905 pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
906 self.field_ignore_case(name)
907 .map(|field| field.value.plain_text())
908 }
909
910 #[must_use]
912 pub fn doi(&self) -> Option<String> {
913 self.get_as_string_ignore_case("doi")
914 .and_then(|doi| normalize_doi(&doi))
915 }
916
917 #[must_use]
919 pub fn authors(&self) -> Vec<PersonName> {
920 self.get_as_string_ignore_case("author")
921 .map_or_else(Vec::new, |authors| crate::parse_names(&authors))
922 }
923
924 #[must_use]
926 pub fn editors(&self) -> Vec<PersonName> {
927 self.get_as_string_ignore_case("editor")
928 .map_or_else(Vec::new, |editors| crate::parse_names(&editors))
929 }
930
931 #[must_use]
933 pub fn translators(&self) -> Vec<PersonName> {
934 self.get_as_string_ignore_case("translator")
935 .map_or_else(Vec::new, |translators| crate::parse_names(&translators))
936 }
937
938 #[must_use]
940 pub fn date_parts_for(
941 &self,
942 field: &str,
943 ) -> Option<std::result::Result<DateParts, DateParseError>> {
944 self.get_as_string_ignore_case(field)
945 .map(|value| crate::parse_date_parts(&value))
946 }
947
948 #[must_use]
950 pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
951 self.clone().into_entry().date_parts()
952 }
953
954 #[must_use]
956 pub fn resource_fields(&self) -> Vec<ResourceField> {
957 self.clone().into_entry().resource_fields()
958 }
959
960 #[must_use]
962 pub fn into_owned(self) -> ParsedEntry<'static> {
963 ParsedEntry {
964 ty: self.ty.into_owned(),
965 key: Cow::Owned(self.key.into_owned()),
966 fields: self
967 .fields
968 .into_iter()
969 .map(ParsedField::into_owned)
970 .collect(),
971 status: self.status,
972 source: self.source,
973 entry_type_source: self.entry_type_source,
974 key_source: self.key_source,
975 delimiter: self.delimiter,
976 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
977 removed_field_sources: self.removed_field_sources,
978 diagnostics: self.diagnostics,
979 }
980 }
981}
982
983fn owned_field_name(name: Cow<'_, str>) -> Cow<'static, str> {
984 static_field_name(&name).map_or_else(|| Cow::Owned(name.into_owned()), Cow::Borrowed)
985}
986
987fn static_field_name(name: &str) -> Option<&'static str> {
988 Some(match name {
989 "abstract" => "abstract",
990 "address" => "address",
991 "archiveprefix" => "archiveprefix",
992 "author" => "author",
993 "booktitle" => "booktitle",
994 "chapter" => "chapter",
995 "copyright" => "copyright",
996 "crossref" => "crossref",
997 "date" => "date",
998 "doi" => "doi",
999 "edition" => "edition",
1000 "editor" => "editor",
1001 "eprint" => "eprint",
1002 "eventdate" => "eventdate",
1003 "file" => "file",
1004 "institution" => "institution",
1005 "isbn" => "isbn",
1006 "issn" => "issn",
1007 "journal" => "journal",
1008 "keywords" => "keywords",
1009 "language" => "language",
1010 "month" => "month",
1011 "note" => "note",
1012 "number" => "number",
1013 "organization" => "organization",
1014 "origdate" => "origdate",
1015 "pages" => "pages",
1016 "pmcid" => "pmcid",
1017 "pmid" => "pmid",
1018 "primaryclass" => "primaryclass",
1019 "publisher" => "publisher",
1020 "school" => "school",
1021 "series" => "series",
1022 "timestamp" => "timestamp",
1023 "title" => "title",
1024 "translator" => "translator",
1025 "type" => "type",
1026 "url" => "url",
1027 "urldate" => "urldate",
1028 "volume" => "volume",
1029 "year" => "year",
1030 _ => return None,
1031 })
1032}
1033
1034fn nth_field_index(fields: &[ParsedField<'_>], name: &str, occurrence: usize) -> Option<usize> {
1035 fields
1036 .iter()
1037 .enumerate()
1038 .filter(|(_, field)| field.name == name)
1039 .nth(occurrence)
1040 .map(|(index, _)| index)
1041}
1042
1043#[derive(Debug, Clone, PartialEq)]
1045pub struct ParsedString<'a> {
1046 pub name: Cow<'a, str>,
1048 pub value: ParsedValue<'a>,
1050 pub source: Option<SourceSpan>,
1052 pub raw: Option<Cow<'a, str>>,
1054}
1055
1056impl<'a> ParsedString<'a> {
1057 #[must_use]
1059 pub fn from_definition(definition: StringDefinition<'a>) -> Self {
1060 Self {
1061 name: definition.name,
1062 value: ParsedValue::new(definition.value),
1063 source: definition.source,
1064 raw: None,
1065 }
1066 }
1067
1068 pub(crate) fn from_stream_definition(
1069 name: &'a str,
1070 value: Value<'a>,
1071 source: SourceSpan,
1072 raw: &'a str,
1073 preserve_raw: bool,
1074 ) -> Self {
1075 let value_raw = locate_definition_value(raw);
1076 Self {
1077 name: Cow::Borrowed(name),
1078 value: ParsedValue {
1079 value,
1080 raw: if preserve_raw {
1081 value_raw.map(Cow::Borrowed)
1082 } else {
1083 None
1084 },
1085 source: None,
1086 expanded: None,
1087 delimiter: value_raw.map(value_delimiter),
1088 },
1089 source: Some(source),
1090 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1091 }
1092 }
1093
1094 #[must_use]
1096 pub fn into_owned(self) -> ParsedString<'static> {
1097 ParsedString {
1098 name: Cow::Owned(self.name.into_owned()),
1099 value: self.value.into_owned(),
1100 source: self.source,
1101 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1102 }
1103 }
1104
1105 pub(crate) fn from_stream_definition_owned(
1106 name: &'a str,
1107 value: Value<'a>,
1108 source: SourceSpan,
1109 raw: &'a str,
1110 ) -> ParsedString<'static> {
1111 let value_raw = locate_definition_value(raw);
1112 ParsedString {
1113 name: Cow::Owned(name.to_string()),
1114 value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1115 source: Some(source),
1116 raw: None,
1117 }
1118 }
1119}
1120
1121#[derive(Debug, Clone, PartialEq)]
1123pub struct ParsedPreamble<'a> {
1124 pub value: ParsedValue<'a>,
1126 pub source: Option<SourceSpan>,
1128 pub raw: Option<Cow<'a, str>>,
1130}
1131
1132impl<'a> ParsedPreamble<'a> {
1133 #[must_use]
1135 pub fn from_preamble(preamble: Preamble<'a>) -> Self {
1136 Self {
1137 value: ParsedValue::new(preamble.value),
1138 source: preamble.source,
1139 raw: None,
1140 }
1141 }
1142
1143 pub(crate) fn from_stream_preamble(
1144 value: Value<'a>,
1145 source: SourceSpan,
1146 raw: &'a str,
1147 preserve_raw: bool,
1148 ) -> Self {
1149 let value_raw = locate_preamble_value(raw);
1150 Self {
1151 value: ParsedValue {
1152 value,
1153 raw: if preserve_raw {
1154 value_raw.map(Cow::Borrowed)
1155 } else {
1156 None
1157 },
1158 source: None,
1159 expanded: None,
1160 delimiter: value_raw.map(value_delimiter),
1161 },
1162 source: Some(source),
1163 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1164 }
1165 }
1166
1167 #[must_use]
1169 pub fn into_owned(self) -> ParsedPreamble<'static> {
1170 ParsedPreamble {
1171 value: self.value.into_owned(),
1172 source: self.source,
1173 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1174 }
1175 }
1176
1177 pub(crate) fn from_stream_preamble_owned(
1178 value: Value<'a>,
1179 source: SourceSpan,
1180 raw: &'a str,
1181 ) -> ParsedPreamble<'static> {
1182 let value_raw = locate_preamble_value(raw);
1183 ParsedPreamble {
1184 value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1185 source: Some(source),
1186 raw: None,
1187 }
1188 }
1189}
1190
1191#[derive(Debug, Clone, PartialEq, Eq)]
1193pub struct ParsedComment<'a> {
1194 pub text: Cow<'a, str>,
1196 pub source: Option<SourceSpan>,
1198 pub raw: Option<Cow<'a, str>>,
1200}
1201
1202impl<'a> ParsedComment<'a> {
1203 #[must_use]
1205 pub fn from_comment(comment: Comment<'a>) -> Self {
1206 Self {
1207 text: comment.text,
1208 source: comment.source,
1209 raw: None,
1210 }
1211 }
1212
1213 pub(crate) fn from_stream_comment(
1214 text: &'a str,
1215 source: SourceSpan,
1216 raw: &'a str,
1217 preserve_raw: bool,
1218 ) -> Self {
1219 Self {
1220 text: Cow::Borrowed(text),
1221 source: Some(source),
1222 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1223 }
1224 }
1225
1226 #[must_use]
1228 pub fn into_owned(self) -> ParsedComment<'static> {
1229 ParsedComment {
1230 text: Cow::Owned(self.text.into_owned()),
1231 source: self.source,
1232 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1233 }
1234 }
1235
1236 pub(crate) fn from_stream_comment_owned(
1237 text: &'a str,
1238 source: SourceSpan,
1239 ) -> ParsedComment<'static> {
1240 ParsedComment {
1241 text: Cow::Owned(text.to_string()),
1242 source: Some(source),
1243 raw: None,
1244 }
1245 }
1246}
1247
1248#[derive(Debug, Clone, PartialEq, Eq)]
1250pub struct ParsedFailedBlock<'a> {
1251 pub raw: Cow<'a, str>,
1253 pub error: String,
1255 pub source: Option<SourceSpan>,
1257 pub diagnostics: Vec<Diagnostic>,
1259}
1260
1261impl<'a> ParsedFailedBlock<'a> {
1262 #[must_use]
1264 pub fn from_failed_block(
1265 index: usize,
1266 failed: FailedBlock<'a>,
1267 source_map: Option<&SourceMap<'_>>,
1268 ) -> Self {
1269 let diagnostic = diagnostic_for_failed_block(index, &failed, source_map);
1270
1271 Self {
1272 raw: failed.raw,
1273 error: failed.error,
1274 source: failed.source,
1275 diagnostics: vec![diagnostic],
1276 }
1277 }
1278
1279 #[must_use]
1281 pub fn into_owned(self) -> ParsedFailedBlock<'static> {
1282 ParsedFailedBlock {
1283 raw: Cow::Owned(self.raw.into_owned()),
1284 error: self.error,
1285 source: self.source,
1286 diagnostics: self.diagnostics,
1287 }
1288 }
1289}
1290
1291#[derive(Debug, Clone)]
1293pub struct ParsedDocument<'a> {
1294 library: Library<'a>,
1295 sources: Vec<ParsedSource<'a>>,
1296 entries: Vec<ParsedEntry<'a>>,
1297 strings: Vec<ParsedString<'a>>,
1298 preambles: Vec<ParsedPreamble<'a>>,
1299 comments: Vec<ParsedComment<'a>>,
1300 failed_blocks: Vec<ParsedFailedBlock<'a>>,
1301 blocks: Vec<ParsedBlock>,
1302 diagnostics: Vec<Diagnostic>,
1303 status: ParseStatus,
1304}
1305
1306impl<'a> ParsedDocument<'a> {
1307 #[must_use]
1309 pub fn from_library(library: Library<'a>) -> Self {
1310 Self::from_library_with_sources(
1311 library,
1312 vec![ParsedSource {
1313 id: SourceId::new(0),
1314 name: None,
1315 }],
1316 )
1317 }
1318
1319 pub(crate) fn from_library_with_sources(
1320 library: Library<'a>,
1321 sources: Vec<ParsedSource<'a>>,
1322 ) -> Self {
1323 Self::from_library_with_source_map(library, sources, None)
1324 }
1325
1326 pub(crate) fn from_library_with_source_map(
1327 library: Library<'a>,
1328 sources: Vec<ParsedSource<'a>>,
1329 source_map: Option<&SourceMap<'_>>,
1330 ) -> Self {
1331 let entries: Vec<ParsedEntry<'a>> = library
1332 .entries()
1333 .iter()
1334 .cloned()
1335 .enumerate()
1336 .map(|(index, entry)| ParsedEntry::from_entry(entry, library.entry_source(index)))
1337 .collect();
1338 let strings: Vec<ParsedString<'a>> = library
1339 .strings()
1340 .iter()
1341 .cloned()
1342 .map(ParsedString::from_definition)
1343 .collect();
1344 let preambles: Vec<ParsedPreamble<'a>> = library
1345 .preambles()
1346 .iter()
1347 .cloned()
1348 .map(ParsedPreamble::from_preamble)
1349 .collect();
1350 let comments = library
1351 .comments()
1352 .iter()
1353 .cloned()
1354 .map(ParsedComment::from_comment)
1355 .collect();
1356 let failed_blocks = library
1357 .failed_blocks()
1358 .iter()
1359 .cloned()
1360 .enumerate()
1361 .map(|(index, failed)| ParsedFailedBlock::from_failed_block(index, failed, source_map))
1362 .collect::<Vec<_>>();
1363 let diagnostics = failed_blocks
1364 .iter()
1365 .flat_map(|failed| failed.diagnostics.iter().cloned())
1366 .collect::<Vec<_>>();
1367 let blocks = library
1368 .block_kinds()
1369 .iter()
1370 .map(|kind| match *kind {
1371 BlockKind::Entry(index) => ParsedBlock::Entry(index),
1372 BlockKind::String(index) => ParsedBlock::String(index),
1373 BlockKind::Preamble(index) => ParsedBlock::Preamble(index),
1374 BlockKind::Comment(index) => ParsedBlock::Comment(index),
1375 BlockKind::Failed(index) => ParsedBlock::Failed(index),
1376 })
1377 .collect();
1378 let status = if failed_blocks.is_empty() {
1379 ParseStatus::Ok
1380 } else if entries.is_empty() && strings.is_empty() && preambles.is_empty() {
1381 ParseStatus::Failed
1382 } else {
1383 ParseStatus::Partial
1384 };
1385
1386 Self {
1387 library,
1388 sources,
1389 entries,
1390 strings,
1391 preambles,
1392 comments,
1393 failed_blocks,
1394 blocks,
1395 diagnostics,
1396 status,
1397 }
1398 }
1399
1400 pub(crate) const fn from_parsed_parts(
1401 library: Library<'a>,
1402 sources: Vec<ParsedSource<'a>>,
1403 entries: Vec<ParsedEntry<'a>>,
1404 strings: Vec<ParsedString<'a>>,
1405 preambles: Vec<ParsedPreamble<'a>>,
1406 comments: Vec<ParsedComment<'a>>,
1407 blocks: Vec<ParsedBlock>,
1408 ) -> Self {
1409 Self {
1410 library,
1411 sources,
1412 entries,
1413 strings,
1414 preambles,
1415 comments,
1416 failed_blocks: Vec::new(),
1417 blocks,
1418 diagnostics: Vec::new(),
1419 status: ParseStatus::Ok,
1420 }
1421 }
1422
1423 pub(crate) fn apply_entry_locations(
1424 &mut self,
1425 entry_index: usize,
1426 raw: &'a str,
1427 source_map: &SourceMap<'a>,
1428 preserve_raw: bool,
1429 ) {
1430 let Some(entry) = self.entries.get_mut(entry_index) else {
1431 return;
1432 };
1433 let Some(entry_span) = entry.source else {
1434 return;
1435 };
1436 let Some(locations) = locate_entry(raw, entry_span.byte_start, entry.fields.len()) else {
1437 return;
1438 };
1439
1440 entry.entry_type_source =
1441 Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
1442 entry.key_source = Some(source_map.span(locations.key.0, locations.key.1));
1443 entry.delimiter = Some(locations.delimiter);
1444 if preserve_raw {
1445 entry.raw = Some(Cow::Borrowed(raw));
1446 }
1447
1448 for (field, location) in entry.fields.iter_mut().zip(locations.fields) {
1449 field.source = Some(source_map.span(location.whole.0, location.whole.1));
1450 field.name_source = Some(source_map.span(location.name.0, location.name.1));
1451 field.value.source = Some(source_map.span(location.value.0, location.value.1));
1452 field.value_source = field.value.source;
1453 field.value.delimiter = Some(location.value_delimiter);
1454
1455 if preserve_raw {
1456 if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
1457 field.raw = Some(Cow::Borrowed(source));
1458 }
1459 if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
1460 field.value.raw = Some(Cow::Borrowed(source));
1461 }
1462 }
1463 }
1464 }
1465
1466 pub(crate) fn apply_raw_items(&mut self, raw_items: &[RawBuildItem<'a>]) {
1467 let mut string_index = 0;
1468 let mut preamble_index = 0;
1469 let mut comment_index = 0;
1470
1471 for raw_item in raw_items {
1472 match raw_item {
1473 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, raw) => {
1474 if let Some(parsed) = self.strings.get_mut(string_index) {
1475 parsed.raw = Some(Cow::Borrowed(raw));
1476 if let Some(value_raw) = locate_definition_value(raw) {
1477 parsed.value.raw = Some(Cow::Borrowed(value_raw));
1478 parsed.value.delimiter = Some(value_delimiter(value_raw));
1479 }
1480 }
1481 string_index += 1;
1482 }
1483 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(_), _, raw) => {
1484 if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1485 parsed.raw = Some(Cow::Borrowed(raw));
1486 if let Some(value_raw) = locate_preamble_value(raw) {
1487 parsed.value.raw = Some(Cow::Borrowed(value_raw));
1488 parsed.value.delimiter = Some(value_delimiter(value_raw));
1489 }
1490 }
1491 preamble_index += 1;
1492 }
1493 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, raw) => {
1494 if let Some(parsed) = self.comments.get_mut(comment_index) {
1495 parsed.raw = Some(Cow::Borrowed(raw));
1496 }
1497 comment_index += 1;
1498 }
1499 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, _)
1500 | RawBuildItem::Failed(_) => {}
1501 }
1502 }
1503 }
1504
1505 pub(crate) fn apply_parsed_values(&mut self, raw_items: &[RawBuildItem<'a>]) {
1506 let mut entry_index = 0;
1507 let mut string_index = 0;
1508 let mut preamble_index = 0;
1509
1510 for raw_item in raw_items {
1511 match raw_item {
1512 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(raw_entry), _, _) => {
1513 if let Some(entry) = self.entries.get_mut(entry_index) {
1514 for (field, raw_field) in entry.fields.iter_mut().zip(&raw_entry.fields) {
1515 field.value.value = raw_field.value.clone();
1516 field.value.expanded = None;
1517 }
1518 }
1519 entry_index += 1;
1520 }
1521 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, value), _, _) => {
1522 if let Some(parsed) = self.strings.get_mut(string_index) {
1523 parsed.value.value = value.clone();
1524 parsed.value.expanded = None;
1525 }
1526 string_index += 1;
1527 }
1528 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), _, _) => {
1529 if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1530 parsed.value.value = value.clone();
1531 parsed.value.expanded = None;
1532 }
1533 preamble_index += 1;
1534 }
1535 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, _)
1536 | RawBuildItem::Failed(_) => {}
1537 }
1538 }
1539 }
1540
1541 pub(crate) fn populate_expanded_values(
1542 &mut self,
1543 options: ExpansionOptions,
1544 ) -> crate::Result<()> {
1545 let strings = &self.strings;
1546 for entry in &mut self.entries {
1547 for field in &mut entry.fields {
1548 field.value.expanded = Some(Cow::Owned(expand_value_with_options(
1549 &field.value.value,
1550 strings,
1551 options,
1552 &mut Vec::new(),
1553 )?));
1554 }
1555 }
1556 for preamble in &mut self.preambles {
1557 preamble.value.expanded = Some(Cow::Owned(expand_value_with_options(
1558 &preamble.value.value,
1559 strings,
1560 options,
1561 &mut Vec::new(),
1562 )?));
1563 }
1564 Ok(())
1565 }
1566
1567 pub(crate) fn recover_partial_entries(
1568 &mut self,
1569 source_map: &SourceMap<'a>,
1570 preserve_raw: bool,
1571 ) {
1572 let old_entries = std::mem::take(&mut self.entries);
1573 let old_failed_blocks = std::mem::take(&mut self.failed_blocks);
1574 let old_blocks = std::mem::take(&mut self.blocks);
1575 let mut new_entries = Vec::with_capacity(old_entries.len());
1576 let mut new_failed_blocks = Vec::new();
1577 let mut new_blocks = Vec::with_capacity(old_blocks.len());
1578
1579 for block in old_blocks {
1580 match block {
1581 ParsedBlock::Entry(index) => {
1582 let new_index = new_entries.len();
1583 if let Some(entry) = old_entries.get(index) {
1584 new_entries.push(entry.clone());
1585 new_blocks.push(ParsedBlock::Entry(new_index));
1586 }
1587 }
1588 ParsedBlock::Failed(index) => {
1589 let Some(failed) = old_failed_blocks.get(index) else {
1590 continue;
1591 };
1592 let new_index = new_entries.len();
1593 if let Some(partial) =
1594 recover_partial_entry(failed, source_map, new_index, preserve_raw)
1595 {
1596 new_entries.push(partial);
1597 new_blocks.push(ParsedBlock::Entry(new_index));
1598 } else {
1599 let failed_index = new_failed_blocks.len();
1600 new_failed_blocks.push(failed.clone());
1601 new_blocks.push(ParsedBlock::Failed(failed_index));
1602 }
1603 }
1604 ParsedBlock::String(index) => new_blocks.push(ParsedBlock::String(index)),
1605 ParsedBlock::Preamble(index) => new_blocks.push(ParsedBlock::Preamble(index)),
1606 ParsedBlock::Comment(index) => new_blocks.push(ParsedBlock::Comment(index)),
1607 }
1608 }
1609
1610 self.entries = new_entries;
1611 self.failed_blocks = new_failed_blocks;
1612 self.blocks = new_blocks;
1613 self.rebuild_diagnostics_and_status();
1614 }
1615
1616 fn rebuild_diagnostics_and_status(&mut self) {
1617 self.diagnostics.clear();
1618 self.diagnostics.extend(
1619 self.entries
1620 .iter()
1621 .flat_map(|entry| entry.diagnostics.iter().cloned()),
1622 );
1623 self.diagnostics.extend(
1624 self.failed_blocks
1625 .iter()
1626 .flat_map(|failed| failed.diagnostics.iter().cloned()),
1627 );
1628
1629 self.status = if self.diagnostics.is_empty() {
1630 ParseStatus::Ok
1631 } else if self.entries.is_empty() && self.strings.is_empty() && self.preambles.is_empty() {
1632 ParseStatus::Failed
1633 } else {
1634 ParseStatus::Partial
1635 };
1636 }
1637
1638 pub(crate) fn failed_from_error(
1639 sources: Vec<ParsedSource<'a>>,
1640 source_map: &SourceMap<'a>,
1641 error: &crate::Error,
1642 ) -> Self {
1643 let (byte, message, fallback_snippet) = match error {
1644 crate::Error::ParseError {
1645 line,
1646 column,
1647 message,
1648 snippet,
1649 } => (
1650 source_map.byte_at_line_column(*line, *column).unwrap_or(0),
1651 message.clone(),
1652 snippet.clone(),
1653 ),
1654 other => (0, other.to_string(), None),
1655 };
1656 let raw = source_map.input().get(byte..).unwrap_or_default();
1657 let failed_source = source_map.span(byte, source_map.len());
1658 let failed = FailedBlock {
1659 raw: Cow::Borrowed(raw),
1660 error: message.clone(),
1661 source: Some(failed_source),
1662 };
1663 let diagnostic = diagnostic_for_raw_failure(
1664 0,
1665 raw,
1666 message,
1667 Some(failed_source),
1668 Some(source_map),
1669 byte,
1670 fallback_snippet,
1671 );
1672 let failed_block = ParsedFailedBlock {
1673 raw: failed.raw,
1674 error: failed.error,
1675 source: failed.source,
1676 diagnostics: vec![diagnostic.clone()],
1677 };
1678
1679 Self {
1680 library: Library::new(),
1681 sources,
1682 entries: Vec::new(),
1683 strings: Vec::new(),
1684 preambles: Vec::new(),
1685 comments: Vec::new(),
1686 failed_blocks: vec![failed_block],
1687 blocks: vec![ParsedBlock::Failed(0)],
1688 diagnostics: vec![diagnostic],
1689 status: ParseStatus::Failed,
1690 }
1691 }
1692
1693 #[must_use]
1695 pub const fn library(&self) -> &Library<'a> {
1696 &self.library
1697 }
1698
1699 #[must_use]
1701 pub fn into_library(self) -> Library<'a> {
1702 self.library
1703 }
1704
1705 #[must_use]
1707 pub fn sources(&self) -> &[ParsedSource<'a>] {
1708 &self.sources
1709 }
1710
1711 #[must_use]
1713 pub fn entries(&self) -> &[ParsedEntry<'a>] {
1714 &self.entries
1715 }
1716
1717 #[must_use]
1719 pub fn entries_mut(&mut self) -> &mut [ParsedEntry<'a>] {
1720 &mut self.entries
1721 }
1722
1723 pub fn push_entry(&mut self, entry: ParsedEntry<'a>) {
1725 let index = self.entries.len();
1726 self.entries.push(entry);
1727 self.blocks.push(ParsedBlock::Entry(index));
1728 }
1729
1730 #[must_use]
1732 pub fn entry_mut_by_key(&mut self, key: &str) -> Option<&mut ParsedEntry<'a>> {
1733 self.entries.iter_mut().find(|entry| entry.key == key)
1734 }
1735
1736 #[must_use]
1738 pub fn rename_key(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> bool {
1739 let Some(entry) = self.entry_mut_by_key(old) else {
1740 return false;
1741 };
1742 entry.rename_key(new);
1743 true
1744 }
1745
1746 #[must_use]
1748 pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
1749 self.entries
1750 .iter_mut()
1751 .map(|entry| entry.remove_export_fields(names))
1752 .sum()
1753 }
1754
1755 #[must_use]
1757 pub fn strings(&self) -> &[ParsedString<'a>] {
1758 &self.strings
1759 }
1760
1761 #[must_use]
1763 pub fn preambles(&self) -> &[ParsedPreamble<'a>] {
1764 &self.preambles
1765 }
1766
1767 #[must_use]
1769 pub fn comments(&self) -> &[ParsedComment<'a>] {
1770 &self.comments
1771 }
1772
1773 #[must_use]
1775 pub fn failed_blocks(&self) -> &[ParsedFailedBlock<'a>] {
1776 &self.failed_blocks
1777 }
1778
1779 #[must_use]
1781 pub fn blocks(&self) -> &[ParsedBlock] {
1782 &self.blocks
1783 }
1784
1785 #[must_use]
1787 pub fn diagnostics(&self) -> &[Diagnostic] {
1788 &self.diagnostics
1789 }
1790
1791 #[must_use]
1793 pub const fn status(&self) -> ParseStatus {
1794 self.status
1795 }
1796
1797 #[must_use]
1799 pub fn summary(&self) -> ParseSummary {
1800 let mut warnings = 0;
1801 let mut errors = 0;
1802 let mut infos = 0;
1803
1804 for diagnostic in &self.diagnostics {
1805 match diagnostic.severity {
1806 DiagnosticSeverity::Error => errors += 1,
1807 DiagnosticSeverity::Warning => warnings += 1,
1808 DiagnosticSeverity::Info => infos += 1,
1809 }
1810 }
1811
1812 ParseSummary {
1813 status: self.status,
1814 entries: self.entries.len(),
1815 warnings,
1816 errors,
1817 infos,
1818 failed_blocks: self.failed_blocks.len(),
1819 recovered_blocks: self
1820 .entries
1821 .iter()
1822 .filter(|entry| entry.status == ParsedEntryStatus::Partial)
1823 .count(),
1824 }
1825 }
1826
1827 pub fn expand_value(
1832 &self,
1833 value: &Value<'a>,
1834 options: ExpansionOptions,
1835 ) -> crate::Result<String> {
1836 expand_value_with_options(value, &self.strings, options, &mut Vec::new())
1837 }
1838
1839 #[must_use]
1841 pub fn into_owned(self) -> ParsedDocument<'static> {
1842 ParsedDocument {
1843 library: self.library.into_owned(),
1844 sources: self
1845 .sources
1846 .into_iter()
1847 .map(ParsedSource::into_owned)
1848 .collect(),
1849 entries: self
1850 .entries
1851 .into_iter()
1852 .map(ParsedEntry::into_owned)
1853 .collect(),
1854 strings: self
1855 .strings
1856 .into_iter()
1857 .map(ParsedString::into_owned)
1858 .collect(),
1859 preambles: self
1860 .preambles
1861 .into_iter()
1862 .map(ParsedPreamble::into_owned)
1863 .collect(),
1864 comments: self
1865 .comments
1866 .into_iter()
1867 .map(ParsedComment::into_owned)
1868 .collect(),
1869 failed_blocks: self
1870 .failed_blocks
1871 .into_iter()
1872 .map(ParsedFailedBlock::into_owned)
1873 .collect(),
1874 blocks: self.blocks,
1875 diagnostics: self.diagnostics,
1876 status: self.status,
1877 }
1878 }
1879}
1880
1881impl ParsedDocument<'static> {
1882 pub(crate) fn apply_raw_from_source(&mut self, source: &str) {
1883 for entry in &mut self.entries {
1884 if entry.raw.is_none() {
1885 entry.raw = owned_source_slice(source, entry.source);
1886 }
1887 for field in &mut entry.fields {
1888 if field.raw.is_none() {
1889 field.raw = owned_source_slice(source, field.source);
1890 }
1891 if field.value.raw.is_none() {
1892 field.value.raw = owned_source_slice(source, field.value_source);
1893 }
1894 }
1895 }
1896
1897 for string in &mut self.strings {
1898 if string.raw.is_none() {
1899 string.raw = owned_source_slice(source, string.source);
1900 }
1901 }
1902 for preamble in &mut self.preambles {
1903 if preamble.raw.is_none() {
1904 preamble.raw = owned_source_slice(source, preamble.source);
1905 }
1906 }
1907 for comment in &mut self.comments {
1908 if comment.raw.is_none() {
1909 comment.raw = owned_source_slice(source, comment.source);
1910 }
1911 }
1912 }
1913}
1914
1915fn owned_source_slice(source: &str, span: Option<SourceSpan>) -> Option<Cow<'static, str>> {
1916 let span = span?;
1917 source
1918 .get(span.byte_start..span.byte_end)
1919 .map(|raw| Cow::Owned(raw.to_string()))
1920}
1921
1922fn expand_value_with_options(
1923 value: &Value<'_>,
1924 strings: &[ParsedString<'_>],
1925 options: ExpansionOptions,
1926 stack: &mut Vec<String>,
1927) -> crate::Result<String> {
1928 match value {
1929 Value::Literal(text) => Ok(normalize_text_projection(text)),
1930 Value::Number(number) => Ok(number.to_string()),
1931 Value::Concat(parts) => {
1932 let mut expanded = String::new();
1933 for part in parts.iter() {
1934 expanded.push_str(&expand_value_with_options(part, strings, options, stack)?);
1935 }
1936 Ok(expanded)
1937 }
1938 Value::Variable(name) => expand_variable(name, strings, options, stack),
1939 }
1940}
1941
1942fn expand_variable(
1943 name: &str,
1944 strings: &[ParsedString<'_>],
1945 options: ExpansionOptions,
1946 stack: &mut Vec<String>,
1947) -> crate::Result<String> {
1948 if options.expand_strings {
1949 if let Some(definition) = strings
1950 .iter()
1951 .rev()
1952 .find(|definition| definition.name.as_ref() == name)
1953 {
1954 if stack.iter().any(|active| active == name) {
1955 return Err(crate::Error::CircularReference(name.to_string()));
1956 }
1957 stack.push(name.to_string());
1958 let expanded =
1959 expand_value_with_options(&definition.value.value, strings, options, stack);
1960 stack.pop();
1961 return expanded;
1962 }
1963 }
1964
1965 if options.expand_months {
1966 if let Some(month) = month_expansion(name) {
1967 return Ok(month.to_string());
1968 }
1969 }
1970
1971 match options.unresolved_variables {
1972 UnresolvedVariablePolicy::Preserve => Ok(name.to_string()),
1973 UnresolvedVariablePolicy::Placeholder => Ok(format!("{{undefined:{name}}}")),
1974 UnresolvedVariablePolicy::Error => Err(crate::Error::UndefinedVariable(name.to_string())),
1975 }
1976}
1977
1978fn month_expansion(name: &str) -> Option<&'static str> {
1979 if name.len() != 3 {
1980 return None;
1981 }
1982
1983 match name.to_ascii_lowercase().as_str() {
1984 "jan" => Some("January"),
1985 "feb" => Some("February"),
1986 "mar" => Some("March"),
1987 "apr" => Some("April"),
1988 "may" => Some("May"),
1989 "jun" => Some("June"),
1990 "jul" => Some("July"),
1991 "aug" => Some("August"),
1992 "sep" => Some("September"),
1993 "oct" => Some("October"),
1994 "nov" => Some("November"),
1995 "dec" => Some("December"),
1996 _ => None,
1997 }
1998}
1999
2000#[derive(Debug, Clone)]
2001struct EntryLocations {
2002 entry_type: (usize, usize),
2003 key: (usize, usize),
2004 delimiter: EntryDelimiter,
2005 fields: Vec<FieldLocations>,
2006}
2007
2008#[derive(Debug, Clone, Copy)]
2009struct FieldLocations {
2010 whole: (usize, usize),
2011 name: (usize, usize),
2012 value: (usize, usize),
2013 value_delimiter: ValueDelimiter,
2014}
2015
2016#[derive(Debug, Clone)]
2017struct FailureClassification {
2018 code: DiagnosticCode,
2019 range: (usize, usize),
2020}
2021
2022fn diagnostic_for_failed_block(
2023 index: usize,
2024 failed: &FailedBlock<'_>,
2025 source_map: Option<&SourceMap<'_>>,
2026) -> Diagnostic {
2027 let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2028 diagnostic_for_raw_failure(
2029 index,
2030 &failed.raw,
2031 failed.error.clone(),
2032 failed.source,
2033 source_map,
2034 absolute_start,
2035 None,
2036 )
2037}
2038
2039fn diagnostic_for_raw_failure(
2040 index: usize,
2041 raw: &str,
2042 fallback_message: String,
2043 fallback_source: Option<SourceSpan>,
2044 source_map: Option<&SourceMap<'_>>,
2045 absolute_start: usize,
2046 fallback_snippet: Option<String>,
2047) -> Diagnostic {
2048 let classification = classify_failure(raw);
2049 let source = source_map
2050 .map(|map| {
2051 map.span(
2052 absolute_start + classification.range.0,
2053 absolute_start + classification.range.1,
2054 )
2055 })
2056 .or(fallback_source);
2057 let snippet = source
2058 .and_then(|span| source_map.and_then(|map| map.snippet(span, 160)))
2059 .or(fallback_snippet)
2060 .or_else(|| Some(raw.chars().take(160).collect()));
2061
2062 let mut diagnostic = Diagnostic::error(
2063 classification.code.clone(),
2064 diagnostic_message(&classification.code, fallback_message),
2065 DiagnosticTarget::FailedBlock(index),
2066 source,
2067 );
2068 diagnostic.snippet = snippet;
2069 diagnostic
2070}
2071
2072fn recover_partial_entry<'a>(
2073 failed: &ParsedFailedBlock<'a>,
2074 source_map: &SourceMap<'a>,
2075 entry_index: usize,
2076 preserve_raw: bool,
2077) -> Option<ParsedEntry<'a>> {
2078 let raw: &'a str = match &failed.raw {
2079 Cow::Borrowed(raw) => raw,
2080 Cow::Owned(_) => return None,
2081 };
2082 let absolute_start = failed.source?.byte_start;
2083 let header = parse_partial_header(raw, source_map, absolute_start)?;
2084 let fields = recover_partial_fields(
2085 raw,
2086 source_map,
2087 absolute_start,
2088 header.field_start,
2089 header.closing,
2090 preserve_raw,
2091 );
2092 if fields.is_empty() {
2093 return None;
2094 }
2095
2096 let diagnostic = diagnostic_for_partial_entry(entry_index, failed, source_map);
2097
2098 Some(ParsedEntry {
2099 ty: header.ty,
2100 key: header.key,
2101 fields,
2102 status: ParsedEntryStatus::Partial,
2103 source: failed.source,
2104 entry_type_source: header.entry_type_source,
2105 key_source: header.key_source,
2106 delimiter: Some(header.delimiter),
2107 raw: preserve_raw.then(|| failed.raw.clone()),
2108 removed_field_sources: None,
2109 diagnostics: vec![diagnostic],
2110 })
2111}
2112
2113pub(crate) fn recover_partial_stream_entry<'a>(
2114 failed: &ParsedFailedBlock<'a>,
2115 source_map: &SourceMap<'a>,
2116 entry_index: usize,
2117 preserve_raw: bool,
2118) -> Option<ParsedEntry<'a>> {
2119 recover_partial_entry(failed, source_map, entry_index, preserve_raw)
2120}
2121
2122struct PartialHeader<'a> {
2123 ty: EntryType<'a>,
2124 key: Cow<'a, str>,
2125 entry_type_source: Option<SourceSpan>,
2126 key_source: Option<SourceSpan>,
2127 delimiter: EntryDelimiter,
2128 field_start: usize,
2129 closing: u8,
2130}
2131
2132fn parse_partial_header<'a>(
2133 raw: &'a str,
2134 source_map: &SourceMap<'a>,
2135 absolute_start: usize,
2136) -> Option<PartialHeader<'a>> {
2137 let bytes = raw.as_bytes();
2138 let mut pos = bytes.iter().position(|byte| *byte == b'@')? + 1;
2139
2140 let entry_type_start = pos;
2141 pos += scan_identifier(&bytes[pos..]);
2142 if pos == entry_type_start {
2143 return None;
2144 }
2145 let ty = EntryType::parse(&raw[entry_type_start..pos]);
2146 let entry_type_source =
2147 Some(source_map.span(absolute_start + entry_type_start, absolute_start + pos));
2148
2149 pos = skip_ascii_whitespace(bytes, pos);
2150 let (delimiter, closing) = match *bytes.get(pos)? {
2151 b'{' => (EntryDelimiter::Braces, b'}'),
2152 b'(' => (EntryDelimiter::Parentheses, b')'),
2153 _ => return None,
2154 };
2155 pos += 1;
2156 pos = skip_ascii_whitespace(bytes, pos);
2157
2158 let key_start = pos;
2159 pos += scan_identifier(&bytes[pos..]);
2160 if pos == key_start {
2161 return None;
2162 }
2163 let key = Cow::Borrowed(&raw[key_start..pos]);
2164 let key_source = Some(source_map.span(absolute_start + key_start, absolute_start + pos));
2165
2166 pos = skip_ascii_whitespace(bytes, pos);
2167 if bytes.get(pos) != Some(&b',') {
2168 return None;
2169 }
2170
2171 Some(PartialHeader {
2172 ty,
2173 key,
2174 entry_type_source,
2175 key_source,
2176 delimiter,
2177 field_start: pos + 1,
2178 closing,
2179 })
2180}
2181
2182fn recover_partial_fields<'a>(
2183 raw: &'a str,
2184 source_map: &SourceMap<'a>,
2185 absolute_start: usize,
2186 mut pos: usize,
2187 closing: u8,
2188 preserve_raw: bool,
2189) -> Vec<ParsedField<'a>> {
2190 let bytes = raw.as_bytes();
2191 let mut fields = Vec::new();
2192
2193 loop {
2194 pos = skip_ascii_whitespace(bytes, pos);
2195 let Some(&byte) = bytes.get(pos) else {
2196 break;
2197 };
2198 if byte == closing || byte == b'@' {
2199 break;
2200 }
2201
2202 let field_start = pos;
2203 let name_start = pos;
2204 pos += scan_identifier(&bytes[pos..]);
2205 if pos == name_start {
2206 break;
2207 }
2208 let name_end = pos;
2209 let name = Cow::Borrowed(&raw[name_start..name_end]);
2210
2211 pos = skip_ascii_whitespace(bytes, pos);
2212 if bytes.get(pos) != Some(&b'=') {
2213 break;
2214 }
2215 pos += 1;
2216 pos = skip_ascii_whitespace(bytes, pos);
2217
2218 let value_start = pos;
2219 let tail = &raw[value_start..];
2220 let mut value_input = tail;
2221 let Ok(value) = crate::parser::value::parse_value_field(&mut value_input) else {
2222 break;
2223 };
2224 let consumed = tail.len() - value_input.len();
2225 let value_end = trim_ascii_whitespace_end(bytes, value_start, value_start + consumed);
2226 let boundary = value_start + consumed;
2227 let field_end = match bytes.get(boundary) {
2228 Some(b',') => boundary + 1,
2229 Some(byte) if *byte == closing => boundary,
2230 Some(_) | None => boundary,
2231 };
2232
2233 let field_source =
2234 source_map.span(absolute_start + field_start, absolute_start + field_end);
2235 let value_source =
2236 source_map.span(absolute_start + value_start, absolute_start + value_end);
2237 fields.push(ParsedField {
2238 name,
2239 value: ParsedValue {
2240 value,
2241 raw: preserve_raw.then(|| Cow::Borrowed(&raw[value_start..value_end])),
2242 source: Some(value_source),
2243 expanded: None,
2244 delimiter: Some(value_delimiter(&raw[value_start..value_end])),
2245 },
2246 raw: preserve_raw.then(|| Cow::Borrowed(&raw[field_start..field_end])),
2247 source: Some(field_source),
2248 name_source: Some(
2249 source_map.span(absolute_start + name_start, absolute_start + name_end),
2250 ),
2251 value_source: Some(value_source),
2252 });
2253
2254 match bytes.get(boundary) {
2255 Some(b',') => pos = boundary + 1,
2256 Some(byte) if *byte == closing => break,
2257 _ => break,
2258 }
2259 }
2260
2261 fields
2262}
2263
2264fn diagnostic_for_partial_entry(
2265 entry_index: usize,
2266 failed: &ParsedFailedBlock<'_>,
2267 source_map: &SourceMap<'_>,
2268) -> Diagnostic {
2269 let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2270 let mut diagnostic = diagnostic_for_raw_failure(
2271 entry_index,
2272 &failed.raw,
2273 failed.error.clone(),
2274 failed.source,
2275 Some(source_map),
2276 absolute_start,
2277 None,
2278 );
2279 diagnostic.target = DiagnosticTarget::Entry(entry_index);
2280 diagnostic
2281}
2282
2283fn diagnostic_message(code: &DiagnosticCode, fallback: String) -> String {
2284 match code.as_str() {
2285 "missing-entry-key" => "missing citation key".to_string(),
2286 "missing-field-separator" => "missing field separator".to_string(),
2287 "expected-field-name" => "expected field name".to_string(),
2288 "empty-field-value" => "empty field value".to_string(),
2289 "expected-value-atom" => "expected value atom".to_string(),
2290 "bad-field-boundary" => "expected comma or entry close after field value".to_string(),
2291 "bad-value-boundary" => "expected value after concatenation operator".to_string(),
2292 "unclosed-entry" => "entry ended before its closing delimiter".to_string(),
2293 "unclosed-braced-value" => "braced value ended before its closing brace".to_string(),
2294 "unclosed-quoted-value" => "quoted value ended before its closing quote".to_string(),
2295 _ => fallback,
2296 }
2297}
2298
2299fn classify_failure(raw: &str) -> FailureClassification {
2300 classify_failure_inner(raw).unwrap_or_else(|| FailureClassification {
2301 code: DiagnosticCode::PARSE_ERROR,
2302 range: empty_range(0),
2303 })
2304}
2305
2306fn classify_failure_inner(raw: &str) -> Option<FailureClassification> {
2307 let bytes = raw.as_bytes();
2308 let header = match parse_failure_header(bytes)? {
2309 Ok(header) => header,
2310 Err(classification) => return Some(classification),
2311 };
2312
2313 classify_failure_fields(bytes, header.pos, header.closing)
2314}
2315
2316#[derive(Debug, Clone, Copy)]
2317struct FailureHeader {
2318 pos: usize,
2319 closing: u8,
2320}
2321
2322fn parse_failure_header(bytes: &[u8]) -> Option<Result<FailureHeader, FailureClassification>> {
2323 let mut pos = bytes.iter().position(|byte| *byte == b'@')?;
2324 pos += 1;
2325 pos += scan_identifier(&bytes[pos..]);
2326 pos = skip_ascii_whitespace(bytes, pos);
2327
2328 let opening = *bytes.get(pos)?;
2329 let closing = match opening {
2330 b'{' => b'}',
2331 b'(' => b')',
2332 _ => {
2333 return Some(Err(classification(
2334 DiagnosticCode::UNCLOSED_ENTRY,
2335 pos,
2336 bytes.len(),
2337 )));
2338 }
2339 };
2340 pos += 1;
2341 pos = skip_ascii_whitespace(bytes, pos);
2342
2343 let key_len = scan_identifier(&bytes[pos..]);
2344 if key_len == 0 {
2345 return Some(Err(classification(
2346 DiagnosticCode::MISSING_ENTRY_KEY,
2347 pos,
2348 bytes.len(),
2349 )));
2350 }
2351 pos += key_len;
2352 pos = skip_ascii_whitespace(bytes, pos);
2353 if bytes.get(pos) != Some(&b',') {
2354 return Some(Err(classification(
2355 DiagnosticCode::MISSING_FIELD_SEPARATOR,
2356 pos,
2357 bytes.len(),
2358 )));
2359 }
2360 pos += 1;
2361
2362 Some(Ok(FailureHeader { pos, closing }))
2363}
2364
2365fn classify_failure_fields(
2366 bytes: &[u8],
2367 mut pos: usize,
2368 closing: u8,
2369) -> Option<FailureClassification> {
2370 loop {
2371 pos = skip_ascii_whitespace(bytes, pos);
2372 let Some(&byte) = bytes.get(pos) else {
2373 return Some(classification(
2374 DiagnosticCode::UNCLOSED_ENTRY,
2375 pos,
2376 bytes.len(),
2377 ));
2378 };
2379 if byte == closing {
2380 return None;
2381 }
2382 if byte == b'@' {
2383 return Some(classification(
2384 DiagnosticCode::UNCLOSED_ENTRY,
2385 pos,
2386 bytes.len(),
2387 ));
2388 }
2389
2390 let field_name_len = scan_identifier(&bytes[pos..]);
2391 if field_name_len == 0 {
2392 return Some(classification(
2393 DiagnosticCode::EXPECTED_FIELD_NAME,
2394 pos,
2395 bytes.len(),
2396 ));
2397 }
2398 pos += field_name_len;
2399 pos = skip_ascii_whitespace(bytes, pos);
2400 if bytes.get(pos) != Some(&b'=') {
2401 return Some(classification(
2402 DiagnosticCode::MISSING_FIELD_SEPARATOR,
2403 pos,
2404 bytes.len(),
2405 ));
2406 }
2407 pos += 1;
2408 pos = skip_ascii_whitespace(bytes, pos);
2409
2410 let Some(&value_start) = bytes.get(pos) else {
2411 return Some(classification(
2412 DiagnosticCode::EMPTY_FIELD_VALUE,
2413 pos,
2414 bytes.len(),
2415 ));
2416 };
2417 if value_start == b',' || value_start == closing {
2418 return Some(classification(
2419 DiagnosticCode::EMPTY_FIELD_VALUE,
2420 pos,
2421 bytes.len(),
2422 ));
2423 }
2424 if value_start == b'#' {
2425 return Some(classification(
2426 DiagnosticCode::EXPECTED_VALUE_ATOM,
2427 pos,
2428 bytes.len(),
2429 ));
2430 }
2431
2432 match scan_value_sequence(bytes, pos, closing) {
2433 Ok(next_pos) => pos = next_pos,
2434 Err(classification) => return Some(classification),
2435 }
2436 }
2437}
2438
2439fn scan_value_sequence(
2440 bytes: &[u8],
2441 mut pos: usize,
2442 closing: u8,
2443) -> Result<usize, FailureClassification> {
2444 loop {
2445 pos = skip_ascii_whitespace(bytes, pos);
2446 let atom_start = pos;
2447 let Some(&byte) = bytes.get(pos) else {
2448 return Err(classification(
2449 DiagnosticCode::EXPECTED_VALUE_ATOM,
2450 pos,
2451 bytes.len(),
2452 ));
2453 };
2454
2455 match byte {
2456 b'"' => {
2457 pos = skip_quoted_checked(bytes, pos + 1).ok_or_else(|| {
2458 classification(
2459 DiagnosticCode::UNCLOSED_QUOTED_VALUE,
2460 atom_start,
2461 bytes.len(),
2462 )
2463 })?;
2464 }
2465 b'{' => {
2466 pos = skip_braced_checked(bytes, pos + 1).ok_or_else(|| {
2467 classification(
2468 DiagnosticCode::UNCLOSED_BRACED_VALUE,
2469 atom_start,
2470 bytes.len(),
2471 )
2472 })?;
2473 }
2474 b',' => {
2475 return Err(classification(
2476 DiagnosticCode::EMPTY_FIELD_VALUE,
2477 pos,
2478 bytes.len(),
2479 ));
2480 }
2481 b if b == closing => {
2482 return Err(classification(
2483 DiagnosticCode::EMPTY_FIELD_VALUE,
2484 pos,
2485 bytes.len(),
2486 ));
2487 }
2488 b'#' => {
2489 return Err(classification(
2490 DiagnosticCode::EXPECTED_VALUE_ATOM,
2491 pos,
2492 bytes.len(),
2493 ));
2494 }
2495 _ => {
2496 let identifier_len = scan_identifier(&bytes[pos..]);
2497 if identifier_len == 0 {
2498 return Err(classification(
2499 DiagnosticCode::EXPECTED_VALUE_ATOM,
2500 pos,
2501 bytes.len(),
2502 ));
2503 }
2504 pos += identifier_len;
2505 }
2506 }
2507
2508 pos = skip_ascii_whitespace(bytes, pos);
2509 let Some(&boundary) = bytes.get(pos) else {
2510 return Err(classification(
2511 DiagnosticCode::UNCLOSED_ENTRY,
2512 pos,
2513 bytes.len(),
2514 ));
2515 };
2516
2517 match boundary {
2518 b'#' => {
2519 let hash = pos;
2520 pos += 1;
2521 pos = skip_ascii_whitespace(bytes, pos);
2522 if matches!(bytes.get(pos), None | Some(b',' | b'#'))
2523 || bytes.get(pos) == Some(&closing)
2524 {
2525 return Err(classification(
2526 DiagnosticCode::BAD_VALUE_BOUNDARY,
2527 hash,
2528 bytes.len(),
2529 ));
2530 }
2531 }
2532 b',' => return Ok(pos + 1),
2533 b if b == closing => return Ok(pos),
2534 _ => {
2535 return Err(classification(
2536 DiagnosticCode::BAD_FIELD_BOUNDARY,
2537 pos,
2538 bytes.len(),
2539 ));
2540 }
2541 }
2542 }
2543}
2544
2545fn classification(code: DiagnosticCode, pos: usize, len: usize) -> FailureClassification {
2546 FailureClassification {
2547 code,
2548 range: single_byte_range(pos, len),
2549 }
2550}
2551
2552const fn empty_range(pos: usize) -> (usize, usize) {
2553 (pos, pos)
2554}
2555
2556fn single_byte_range(pos: usize, len: usize) -> (usize, usize) {
2557 let start = pos.min(len);
2558 (start, (start + 1).min(len))
2559}
2560
2561fn locate_entry(raw: &str, absolute_start: usize, field_count: usize) -> Option<EntryLocations> {
2562 let bytes = raw.as_bytes();
2563 let mut pos = 0;
2564 if bytes.get(pos) != Some(&b'@') {
2565 return None;
2566 }
2567 pos += 1;
2568
2569 let entry_type_start = pos;
2570 pos += scan_identifier(&bytes[pos..]);
2571 if pos == entry_type_start {
2572 return None;
2573 }
2574 let entry_type = (absolute_start + entry_type_start, absolute_start + pos);
2575
2576 pos = skip_ascii_whitespace(bytes, pos);
2577 let opening = *bytes.get(pos)?;
2578 let (delimiter, closing) = match opening {
2579 b'{' => (EntryDelimiter::Braces, b'}'),
2580 b'(' => (EntryDelimiter::Parentheses, b')'),
2581 _ => return None,
2582 };
2583 pos += 1;
2584 pos = skip_ascii_whitespace(bytes, pos);
2585
2586 let key_start = pos;
2587 pos += scan_identifier(&bytes[pos..]);
2588 if pos == key_start {
2589 return None;
2590 }
2591 let key = (absolute_start + key_start, absolute_start + pos);
2592
2593 pos = skip_ascii_whitespace(bytes, pos);
2594 if bytes.get(pos) != Some(&b',') {
2595 return Some(EntryLocations {
2596 entry_type,
2597 key,
2598 delimiter,
2599 fields: Vec::new(),
2600 });
2601 }
2602 pos += 1;
2603
2604 let mut fields = Vec::with_capacity(field_count);
2605 while fields.len() < field_count {
2606 pos = skip_ascii_whitespace(bytes, pos);
2607 if bytes.get(pos) == Some(&closing) || pos >= bytes.len() {
2608 break;
2609 }
2610
2611 let field_start = pos;
2612 let name_start = pos;
2613 pos += scan_identifier(&bytes[pos..]);
2614 if pos == name_start {
2615 break;
2616 }
2617 let name_end = pos;
2618
2619 pos = skip_ascii_whitespace(bytes, pos);
2620 if bytes.get(pos) != Some(&b'=') {
2621 break;
2622 }
2623 pos += 1;
2624 pos = skip_ascii_whitespace(bytes, pos);
2625
2626 let value_start = pos;
2627 let boundary = find_value_boundary(bytes, pos, closing);
2628 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2629 let mut whole_end = value_end;
2630 pos = boundary;
2631 if bytes.get(pos) == Some(&b',') {
2632 whole_end = pos + 1;
2633 pos += 1;
2634 }
2635
2636 fields.push(FieldLocations {
2637 whole: (absolute_start + field_start, absolute_start + whole_end),
2638 name: (absolute_start + name_start, absolute_start + name_end),
2639 value: (absolute_start + value_start, absolute_start + value_end),
2640 value_delimiter: value_delimiter(&raw[value_start..value_end]),
2641 });
2642 }
2643
2644 Some(EntryLocations {
2645 entry_type,
2646 key,
2647 delimiter,
2648 fields,
2649 })
2650}
2651
2652fn value_delimiter(raw_value: &str) -> ValueDelimiter {
2653 let trimmed = raw_value.trim_start();
2654 if has_top_level_concat(trimmed.as_bytes()) {
2655 return ValueDelimiter::Concatenation;
2656 }
2657
2658 match trimmed.as_bytes().first() {
2659 Some(b'{') => ValueDelimiter::Braces,
2660 Some(b'"') => ValueDelimiter::Quotes,
2661 _ => ValueDelimiter::Bare,
2662 }
2663}
2664
2665fn locate_definition_value(raw: &str) -> Option<&str> {
2666 let bytes = raw.as_bytes();
2667 let equals = bytes.iter().position(|byte| *byte == b'=')?;
2668 let value_start = skip_ascii_whitespace(bytes, equals + 1);
2669 let closing = enclosing_close_byte(bytes)?;
2670 let boundary = find_value_boundary(bytes, value_start, closing);
2671 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2672 raw.get(value_start..value_end)
2673}
2674
2675fn locate_preamble_value(raw: &str) -> Option<&str> {
2676 let bytes = raw.as_bytes();
2677 let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2678 let closing = match bytes[opening] {
2679 b'{' => b'}',
2680 b'(' => b')',
2681 _ => return None,
2682 };
2683 let value_start = skip_ascii_whitespace(bytes, opening + 1);
2684 let boundary = find_value_boundary(bytes, value_start, closing);
2685 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2686 raw.get(value_start..value_end)
2687}
2688
2689fn enclosing_close_byte(bytes: &[u8]) -> Option<u8> {
2690 let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2691 match bytes[opening] {
2692 b'{' => Some(b'}'),
2693 b'(' => Some(b')'),
2694 _ => None,
2695 }
2696}
2697
2698fn has_top_level_concat(bytes: &[u8]) -> bool {
2699 let mut pos = 0;
2700 while let Some(&byte) = bytes.get(pos) {
2701 match byte {
2702 b'{' => pos = skip_braced(bytes, pos + 1),
2703 b'"' => pos = skip_quoted(bytes, pos + 1),
2704 b'#' => return true,
2705 _ => pos += 1,
2706 }
2707 }
2708 false
2709}
2710
2711fn skip_ascii_whitespace(bytes: &[u8], mut pos: usize) -> usize {
2712 while matches!(bytes.get(pos), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2713 pos += 1;
2714 }
2715 pos
2716}
2717
2718fn trim_ascii_whitespace_end(bytes: &[u8], start: usize, mut end: usize) -> usize {
2719 while end > start && matches!(bytes.get(end - 1), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2720 end -= 1;
2721 }
2722 end
2723}
2724
2725fn scan_identifier(bytes: &[u8]) -> usize {
2726 bytes
2727 .iter()
2728 .position(|byte| !is_identifier_byte(*byte))
2729 .unwrap_or(bytes.len())
2730}
2731
2732const fn is_identifier_byte(byte: u8) -> bool {
2733 matches!(
2734 byte,
2735 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
2736 )
2737}
2738
2739fn find_value_boundary(bytes: &[u8], mut pos: usize, closing: u8) -> usize {
2740 while let Some(&byte) = bytes.get(pos) {
2741 match byte {
2742 b'{' => pos = skip_braced(bytes, pos + 1),
2743 b'"' => pos = skip_quoted(bytes, pos + 1),
2744 b',' => break,
2745 b if b == closing => break,
2746 _ => pos += 1,
2747 }
2748 }
2749 pos
2750}
2751
2752fn skip_braced(bytes: &[u8], mut pos: usize) -> usize {
2753 let mut depth = 0usize;
2754 while let Some(&byte) = bytes.get(pos) {
2755 match byte {
2756 b'\\' => pos = (pos + 2).min(bytes.len()),
2757 b'{' => {
2758 depth += 1;
2759 pos += 1;
2760 }
2761 b'}' if depth == 0 => return pos + 1,
2762 b'}' => {
2763 depth -= 1;
2764 pos += 1;
2765 }
2766 _ => pos += 1,
2767 }
2768 }
2769 pos
2770}
2771
2772fn skip_braced_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2773 let mut depth = 0usize;
2774 while let Some(&byte) = bytes.get(pos) {
2775 match byte {
2776 b'\\' => pos = (pos + 2).min(bytes.len()),
2777 b'{' => {
2778 depth += 1;
2779 pos += 1;
2780 }
2781 b'}' if depth == 0 => return Some(pos + 1),
2782 b'}' => {
2783 depth -= 1;
2784 pos += 1;
2785 }
2786 _ => pos += 1,
2787 }
2788 }
2789 None
2790}
2791
2792fn skip_quoted(bytes: &[u8], mut pos: usize) -> usize {
2793 while let Some(&byte) = bytes.get(pos) {
2794 match byte {
2795 b'\\' => pos = (pos + 2).min(bytes.len()),
2796 b'"' => return pos + 1,
2797 _ => pos += 1,
2798 }
2799 }
2800 pos
2801}
2802
2803fn skip_quoted_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2804 while let Some(&byte) = bytes.get(pos) {
2805 match byte {
2806 b'\\' => pos = (pos + 2).min(bytes.len()),
2807 b'"' => return Some(pos + 1),
2808 _ => pos += 1,
2809 }
2810 }
2811 None
2812}