1use crate::library::BlockKind;
8use crate::library::RawBuildItem;
9use crate::model::normalize_text_projection;
10use crate::source::SourceCursor;
11use crate::{
12 normalize_doi, Comment, DateParseError, DateParts, Entry, EntryType, FailedBlock, Field,
13 Library, PersonName, Preamble, ResourceField, SourceId, SourceMap, SourceSpan,
14 StringDefinition, Value,
15};
16use std::borrow::Cow;
17use std::fmt;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ParseStatus {
22 Ok,
24 Partial,
26 Failed,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum DiagnosticSeverity {
33 Error,
35 Warning,
37 Info,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct DiagnosticCode(Cow<'static, str>);
50
51impl DiagnosticCode {
52 pub const PARSE_ERROR: Self = Self(Cow::Borrowed("parse-error"));
54 pub const MISSING_ENTRY_KEY: Self = Self(Cow::Borrowed("missing-entry-key"));
56 pub const MISSING_FIELD_SEPARATOR: Self = Self(Cow::Borrowed("missing-field-separator"));
58 pub const EXPECTED_FIELD_NAME: Self = Self(Cow::Borrowed("expected-field-name"));
60 pub const EMPTY_FIELD_VALUE: Self = Self(Cow::Borrowed("empty-field-value"));
62 pub const EXPECTED_VALUE_ATOM: Self = Self(Cow::Borrowed("expected-value-atom"));
64 pub const BAD_FIELD_BOUNDARY: Self = Self(Cow::Borrowed("bad-field-boundary"));
66 pub const BAD_VALUE_BOUNDARY: Self = Self(Cow::Borrowed("bad-value-boundary"));
68 pub const UNCLOSED_ENTRY: Self = Self(Cow::Borrowed("unclosed-entry"));
70 pub const UNCLOSED_BRACED_VALUE: Self = Self(Cow::Borrowed("unclosed-braced-value"));
72 pub const UNCLOSED_QUOTED_VALUE: Self = Self(Cow::Borrowed("unclosed-quoted-value"));
74
75 #[must_use]
77 pub const fn borrowed(code: &'static str) -> Self {
78 Self(Cow::Borrowed(code))
79 }
80
81 #[must_use]
83 pub fn custom(code: impl Into<String>) -> Self {
84 Self(Cow::Owned(code.into()))
85 }
86
87 #[must_use]
89 pub fn as_str(&self) -> &str {
90 &self.0
91 }
92}
93
94impl fmt::Display for DiagnosticCode {
95 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96 f.write_str(self.as_str())
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
102pub enum DiagnosticTarget {
103 File,
105 Block(usize),
107 Entry(usize),
109 Field {
111 entry: usize,
113 field: usize,
115 },
116 Value {
118 entry: usize,
120 field: usize,
122 },
123 FailedBlock(usize),
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Diagnostic {
130 pub severity: DiagnosticSeverity,
132 pub code: DiagnosticCode,
134 pub message: String,
136 pub target: DiagnosticTarget,
138 pub source: Option<SourceSpan>,
140 pub snippet: Option<String>,
142}
143
144impl Diagnostic {
145 #[must_use]
147 pub fn error(
148 code: DiagnosticCode,
149 message: impl Into<String>,
150 target: DiagnosticTarget,
151 source: Option<SourceSpan>,
152 ) -> Self {
153 Self {
154 severity: DiagnosticSeverity::Error,
155 code,
156 message: message.into(),
157 target,
158 source,
159 snippet: None,
160 }
161 }
162
163 #[must_use]
165 pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
166 self.snippet = Some(snippet.into());
167 self
168 }
169}
170
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub struct ParseSummary {
174 pub status: ParseStatus,
176 pub entries: usize,
178 pub warnings: usize,
180 pub errors: usize,
182 pub infos: usize,
184 pub failed_blocks: usize,
186 pub recovered_blocks: usize,
188}
189
190#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct ParsedSource<'a> {
193 pub id: SourceId,
195 pub name: Option<Cow<'a, str>>,
197}
198
199impl ParsedSource<'_> {
200 #[must_use]
202 pub const fn is_anonymous(&self) -> bool {
203 self.name.is_none()
204 }
205
206 #[must_use]
208 pub fn into_owned(self) -> ParsedSource<'static> {
209 ParsedSource {
210 id: self.id,
211 name: self.name.map(|name| Cow::Owned(name.into_owned())),
212 }
213 }
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum ParsedBlock {
219 Entry(usize),
221 String(usize),
223 Preamble(usize),
225 Comment(usize),
227 Failed(usize),
229}
230
231#[derive(Debug, Clone, PartialEq)]
233pub enum ParseEvent<'a> {
234 Entry(ParsedEntry<'a>),
236 String(ParsedString<'a>),
238 Preamble(ParsedPreamble<'a>),
240 Comment(ParsedComment<'a>),
242 Failed(ParsedFailedBlock<'a>),
244 Diagnostic(Diagnostic),
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250pub enum ParseFlow {
251 Continue,
253 Stop,
255}
256
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub struct StreamingSummary {
260 pub status: ParseStatus,
262 pub entries: usize,
264 pub strings: usize,
266 pub preambles: usize,
268 pub comments: usize,
270 pub failed_blocks: usize,
272 pub warnings: usize,
274 pub errors: usize,
276 pub infos: usize,
278 pub recovered_blocks: usize,
280 pub stopped: bool,
282}
283
284impl Default for StreamingSummary {
285 fn default() -> Self {
286 Self {
287 status: ParseStatus::Ok,
288 entries: 0,
289 strings: 0,
290 preambles: 0,
291 comments: 0,
292 failed_blocks: 0,
293 warnings: 0,
294 errors: 0,
295 infos: 0,
296 recovered_blocks: 0,
297 stopped: false,
298 }
299 }
300}
301
302impl StreamingSummary {
303 pub(crate) fn finalize_status(&mut self) {
304 self.status = if self.errors == 0 {
305 ParseStatus::Ok
306 } else if self.entries == 0 && self.strings == 0 && self.preambles == 0 {
307 ParseStatus::Failed
308 } else {
309 ParseStatus::Partial
310 };
311 }
312
313 pub(crate) fn count_diagnostic(&mut self, diagnostic: &Diagnostic) {
314 match diagnostic.severity {
315 DiagnosticSeverity::Error => self.errors += 1,
316 DiagnosticSeverity::Warning => self.warnings += 1,
317 DiagnosticSeverity::Info => self.infos += 1,
318 }
319 }
320}
321
322#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ParsedEntryStatus {
325 Complete,
327 Partial,
329}
330
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
333pub enum EntryDelimiter {
334 Braces,
336 Parentheses,
338}
339
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
342pub enum ValueDelimiter {
343 Braces,
345 Quotes,
347 Bare,
349 Concatenation,
351}
352
353#[derive(Debug, Clone, Copy, PartialEq, Eq)]
355pub enum UnresolvedVariablePolicy {
356 Preserve,
358 Placeholder,
360 Error,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
366pub struct ExpansionOptions {
367 pub expand_strings: bool,
369 pub expand_months: bool,
371 pub unresolved_variables: UnresolvedVariablePolicy,
373}
374
375impl Default for ExpansionOptions {
376 fn default() -> Self {
377 Self {
378 expand_strings: true,
379 expand_months: true,
380 unresolved_variables: UnresolvedVariablePolicy::Error,
381 }
382 }
383}
384
385#[derive(Debug, Clone, PartialEq)]
387pub struct ParsedValue<'a> {
388 pub value: Value<'a>,
390 pub raw: Option<Cow<'a, str>>,
392 pub source: Option<SourceSpan>,
394 pub expanded: Option<Cow<'a, str>>,
396 pub delimiter: Option<ValueDelimiter>,
398}
399
400impl<'a> ParsedValue<'a> {
401 #[must_use]
403 pub const fn new(value: Value<'a>) -> Self {
404 Self {
405 value,
406 raw: None,
407 source: None,
408 expanded: None,
409 delimiter: None,
410 }
411 }
412
413 #[must_use]
415 pub fn into_value(self) -> Value<'a> {
416 self.value
417 }
418
419 #[must_use]
421 pub const fn parsed(&self) -> &Value<'a> {
422 &self.value
423 }
424
425 #[must_use]
427 pub fn raw_text(&self) -> Option<&str> {
428 self.raw.as_deref()
429 }
430
431 #[must_use]
433 pub fn expanded_text(&self) -> Option<&str> {
434 self.expanded.as_deref()
435 }
436
437 #[must_use]
439 pub fn plain_text(&self) -> String {
440 self.value.to_plain_string()
441 }
442
443 #[must_use]
445 pub fn lossy_text(&self) -> String {
446 self.value.to_lossy_string()
447 }
448
449 #[cfg(feature = "latex_to_unicode")]
451 #[must_use]
452 pub fn unicode_plain_text(&self) -> String {
453 self.value.to_unicode_plain_string()
454 }
455
456 #[must_use]
458 pub fn into_owned(self) -> ParsedValue<'static> {
459 ParsedValue {
460 value: self.value.into_owned(),
461 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
462 source: self.source,
463 expanded: self
464 .expanded
465 .map(|expanded| Cow::Owned(expanded.into_owned())),
466 delimiter: self.delimiter,
467 }
468 }
469
470 pub(crate) fn from_owned_value(
471 value: Value<'a>,
472 source: Option<SourceSpan>,
473 delimiter: Option<ValueDelimiter>,
474 ) -> ParsedValue<'static> {
475 ParsedValue {
476 value: value.into_owned(),
477 raw: None,
478 source,
479 expanded: None,
480 delimiter,
481 }
482 }
483}
484
485#[derive(Debug, Clone, PartialEq)]
487pub struct ParsedField<'a> {
488 pub name: Cow<'a, str>,
490 pub value: ParsedValue<'a>,
492 pub raw: Option<Cow<'a, str>>,
494 pub source: Option<SourceSpan>,
496 pub name_source: Option<SourceSpan>,
498 pub value_source: Option<SourceSpan>,
500}
501
502impl<'a> ParsedField<'a> {
503 #[must_use]
505 pub fn from_field(field: Field<'a>) -> Self {
506 Self {
507 name: field.name,
508 value: ParsedValue::new(field.value),
509 raw: None,
510 source: None,
511 name_source: None,
512 value_source: None,
513 }
514 }
515
516 #[must_use]
518 pub fn into_field(self) -> Field<'a> {
519 Field {
520 name: self.name,
521 value: self.value.into_value(),
522 }
523 }
524
525 #[must_use]
527 pub fn into_owned(self) -> ParsedField<'static> {
528 ParsedField {
529 name: owned_field_name(self.name),
530 value: self.value.into_owned(),
531 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
532 source: self.source,
533 name_source: self.name_source,
534 value_source: self.value_source,
535 }
536 }
537
538 pub(crate) fn from_owned_field(field: Field<'a>) -> ParsedField<'static> {
539 ParsedField {
540 name: owned_field_name(field.name),
541 value: ParsedValue::from_owned_value(field.value, None, None),
542 raw: None,
543 source: None,
544 name_source: None,
545 value_source: None,
546 }
547 }
548}
549
550#[derive(Debug, Clone, PartialEq)]
552pub struct ParsedEntry<'a> {
553 pub ty: EntryType<'a>,
555 pub key: Cow<'a, str>,
557 pub fields: Vec<ParsedField<'a>>,
559 pub status: ParsedEntryStatus,
561 pub source: Option<SourceSpan>,
563 pub entry_type_source: Option<SourceSpan>,
565 pub key_source: Option<SourceSpan>,
567 pub delimiter: Option<EntryDelimiter>,
569 pub raw: Option<Cow<'a, str>>,
571 pub diagnostics: Vec<Diagnostic>,
573}
574
575impl<'a> ParsedEntry<'a> {
576 #[must_use]
578 pub fn from_entry(entry: Entry<'a>, source: Option<SourceSpan>) -> Self {
579 Self {
580 ty: entry.ty,
581 key: entry.key,
582 fields: entry
583 .fields
584 .into_iter()
585 .map(ParsedField::from_field)
586 .collect(),
587 status: ParsedEntryStatus::Complete,
588 source,
589 entry_type_source: None,
590 key_source: None,
591 delimiter: None,
592 raw: None,
593 diagnostics: Vec::new(),
594 }
595 }
596
597 pub(crate) fn from_entry_owned(
598 entry: Entry<'a>,
599 source: Option<SourceSpan>,
600 ) -> ParsedEntry<'static> {
601 ParsedEntry {
602 ty: entry.ty.into_owned(),
603 key: Cow::Owned(entry.key.into_owned()),
604 fields: entry
605 .fields
606 .into_iter()
607 .map(ParsedField::from_owned_field)
608 .collect(),
609 status: ParsedEntryStatus::Complete,
610 source,
611 entry_type_source: None,
612 key_source: None,
613 delimiter: None,
614 raw: None,
615 diagnostics: Vec::new(),
616 }
617 }
618
619 pub(crate) fn from_stream_entry(
620 entry: Entry<'a>,
621 source: SourceSpan,
622 raw: &'a str,
623 source_map: &SourceMap<'a>,
624 preserve_raw: bool,
625 ) -> Self {
626 let mut parsed = Self::from_entry(entry, Some(source));
627 parsed.apply_locations(raw, source_map, preserve_raw);
628 parsed
629 }
630
631 pub(crate) fn from_located_stream_entry_owned(
632 located: crate::parser::entry::LocatedEntry<'a>,
633 source: SourceSpan,
634 span_cursor: &mut SourceCursor<'_, 'a>,
635 ) -> ParsedEntry<'static> {
636 let entry = located.entry;
637 let entry_type_source = span_cursor.span(located.entry_type.0, located.entry_type.1);
638 let key_source = span_cursor.span(located.key.0, located.key.1);
639 let fields = entry
640 .fields
641 .into_iter()
642 .zip(located.fields)
643 .map(|(field, location)| {
644 let field_source = span_cursor.span(location.whole.0, location.whole.1);
645 let name_source = span_cursor.span(location.name.0, location.name.1);
646 let value_source = span_cursor.span(location.value.0, location.value.1);
647 ParsedField {
648 name: owned_field_name(field.name),
649 value: ParsedValue::from_owned_value(
650 field.value,
651 Some(value_source),
652 Some(location.value_delimiter),
653 ),
654 raw: None,
655 source: Some(field_source),
656 name_source: Some(name_source),
657 value_source: Some(value_source),
658 }
659 })
660 .collect();
661
662 ParsedEntry {
663 ty: entry.ty.into_owned(),
664 key: Cow::Owned(entry.key.into_owned()),
665 fields,
666 status: ParsedEntryStatus::Complete,
667 source: Some(source),
668 entry_type_source: Some(entry_type_source),
669 key_source: Some(key_source),
670 delimiter: Some(located.delimiter),
671 raw: None,
672 diagnostics: Vec::new(),
673 }
674 }
675
676 fn apply_locations(&mut self, raw: &'a str, source_map: &SourceMap<'a>, preserve_raw: bool) {
677 let Some(entry_span) = self.source else {
678 return;
679 };
680 let Some(locations) = locate_entry(raw, entry_span.byte_start, self.fields.len()) else {
681 return;
682 };
683
684 self.entry_type_source =
685 Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
686 self.key_source = Some(source_map.span(locations.key.0, locations.key.1));
687 self.delimiter = Some(locations.delimiter);
688 if preserve_raw {
689 self.raw = Some(Cow::Borrowed(raw));
690 }
691
692 for (field, location) in self.fields.iter_mut().zip(locations.fields) {
693 field.source = Some(source_map.span(location.whole.0, location.whole.1));
694 field.name_source = Some(source_map.span(location.name.0, location.name.1));
695 field.value.source = Some(source_map.span(location.value.0, location.value.1));
696 field.value_source = field.value.source;
697 field.value.delimiter = Some(location.value_delimiter);
698
699 if preserve_raw {
700 if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
701 field.raw = Some(Cow::Borrowed(source));
702 }
703 if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
704 field.value.raw = Some(Cow::Borrowed(source));
705 }
706 }
707 }
708 }
709
710 #[must_use]
712 pub fn key(&self) -> &str {
713 &self.key
714 }
715
716 #[must_use]
718 pub fn into_entry(self) -> Entry<'a> {
719 Entry {
720 ty: self.ty,
721 key: self.key,
722 fields: self
723 .fields
724 .into_iter()
725 .map(ParsedField::into_field)
726 .collect(),
727 }
728 }
729
730 pub fn rename_key(&mut self, key: impl Into<Cow<'a, str>>) {
732 self.key = key.into();
733 }
734
735 pub fn set_entry_type(&mut self, ty: EntryType<'a>) {
737 self.ty = ty;
738 }
739
740 pub fn add_field(&mut self, name: impl Into<Cow<'a, str>>, value: Value<'a>) {
742 self.fields.push(ParsedField {
743 name: name.into(),
744 value: ParsedValue::new(value),
745 raw: None,
746 source: None,
747 name_source: None,
748 value_source: None,
749 });
750 self.raw = None;
751 }
752
753 #[must_use]
755 pub fn replace_field_value(&mut self, name: &str, value: Value<'a>) -> bool {
756 self.replace_field_value_at(name, 0, value)
757 }
758
759 #[must_use]
761 pub fn replace_field_value_at(
762 &mut self,
763 name: &str,
764 occurrence: usize,
765 value: Value<'a>,
766 ) -> bool {
767 let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
768 return false;
769 };
770 let field = &mut self.fields[index];
771 field.value.value = value;
772 field.value.raw = None;
773 field.raw = None;
774 field.value.expanded = None;
775 true
776 }
777
778 #[must_use]
780 pub fn rename_field(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> usize {
781 let new = new.into();
782 let mut renamed = 0;
783 for field in &mut self.fields {
784 if field.name == old {
785 field.name.clone_from(&new);
786 field.raw = None;
787 renamed += 1;
788 }
789 }
790 renamed
791 }
792
793 #[must_use]
795 pub fn remove_field(&mut self, name: &str) -> usize {
796 let original_len = self.fields.len();
797 self.fields.retain(|field| field.name != name);
798 let removed = original_len - self.fields.len();
799 if removed > 0 {
800 self.raw = None;
801 }
802 removed
803 }
804
805 #[must_use]
807 pub fn remove_field_at(&mut self, name: &str, occurrence: usize) -> bool {
808 let Some(index) = nth_field_index(&self.fields, name, occurrence) else {
809 return false;
810 };
811 self.fields.remove(index);
812 self.raw = None;
813 true
814 }
815
816 #[must_use]
818 pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
819 let original_len = self.fields.len();
820 self.fields.retain(|field| {
821 !names
822 .iter()
823 .any(|name| field.name.eq_ignore_ascii_case(name))
824 });
825 let removed = original_len - self.fields.len();
826 if removed > 0 {
827 self.raw = None;
828 }
829 removed
830 }
831
832 #[must_use]
834 pub fn field_ignore_case(&self, name: &str) -> Option<&ParsedField<'a>> {
835 self.fields
836 .iter()
837 .find(|field| field.name.eq_ignore_ascii_case(name))
838 }
839
840 #[must_use]
842 pub fn get_as_string_ignore_case(&self, name: &str) -> Option<String> {
843 self.field_ignore_case(name)
844 .map(|field| field.value.plain_text())
845 }
846
847 #[must_use]
849 pub fn doi(&self) -> Option<String> {
850 self.get_as_string_ignore_case("doi")
851 .and_then(|doi| normalize_doi(&doi))
852 }
853
854 #[must_use]
856 pub fn authors(&self) -> Vec<PersonName> {
857 self.get_as_string_ignore_case("author")
858 .map_or_else(Vec::new, |authors| crate::parse_names(&authors))
859 }
860
861 #[must_use]
863 pub fn editors(&self) -> Vec<PersonName> {
864 self.get_as_string_ignore_case("editor")
865 .map_or_else(Vec::new, |editors| crate::parse_names(&editors))
866 }
867
868 #[must_use]
870 pub fn translators(&self) -> Vec<PersonName> {
871 self.get_as_string_ignore_case("translator")
872 .map_or_else(Vec::new, |translators| crate::parse_names(&translators))
873 }
874
875 #[must_use]
877 pub fn date_parts_for(
878 &self,
879 field: &str,
880 ) -> Option<std::result::Result<DateParts, DateParseError>> {
881 self.get_as_string_ignore_case(field)
882 .map(|value| crate::parse_date_parts(&value))
883 }
884
885 #[must_use]
887 pub fn date_parts(&self) -> Option<std::result::Result<DateParts, DateParseError>> {
888 self.clone().into_entry().date_parts()
889 }
890
891 #[must_use]
893 pub fn resource_fields(&self) -> Vec<ResourceField> {
894 self.clone().into_entry().resource_fields()
895 }
896
897 #[must_use]
899 pub fn into_owned(self) -> ParsedEntry<'static> {
900 ParsedEntry {
901 ty: self.ty.into_owned(),
902 key: Cow::Owned(self.key.into_owned()),
903 fields: self
904 .fields
905 .into_iter()
906 .map(ParsedField::into_owned)
907 .collect(),
908 status: self.status,
909 source: self.source,
910 entry_type_source: self.entry_type_source,
911 key_source: self.key_source,
912 delimiter: self.delimiter,
913 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
914 diagnostics: self.diagnostics,
915 }
916 }
917}
918
919fn owned_field_name(name: Cow<'_, str>) -> Cow<'static, str> {
920 static_field_name(&name).map_or_else(|| Cow::Owned(name.into_owned()), Cow::Borrowed)
921}
922
923fn static_field_name(name: &str) -> Option<&'static str> {
924 Some(match name {
925 "abstract" => "abstract",
926 "address" => "address",
927 "archiveprefix" => "archiveprefix",
928 "author" => "author",
929 "booktitle" => "booktitle",
930 "chapter" => "chapter",
931 "copyright" => "copyright",
932 "crossref" => "crossref",
933 "date" => "date",
934 "doi" => "doi",
935 "edition" => "edition",
936 "editor" => "editor",
937 "eprint" => "eprint",
938 "eventdate" => "eventdate",
939 "file" => "file",
940 "institution" => "institution",
941 "isbn" => "isbn",
942 "issn" => "issn",
943 "journal" => "journal",
944 "keywords" => "keywords",
945 "language" => "language",
946 "month" => "month",
947 "note" => "note",
948 "number" => "number",
949 "organization" => "organization",
950 "origdate" => "origdate",
951 "pages" => "pages",
952 "pmcid" => "pmcid",
953 "pmid" => "pmid",
954 "primaryclass" => "primaryclass",
955 "publisher" => "publisher",
956 "school" => "school",
957 "series" => "series",
958 "timestamp" => "timestamp",
959 "title" => "title",
960 "translator" => "translator",
961 "type" => "type",
962 "url" => "url",
963 "urldate" => "urldate",
964 "volume" => "volume",
965 "year" => "year",
966 _ => return None,
967 })
968}
969
970fn nth_field_index(fields: &[ParsedField<'_>], name: &str, occurrence: usize) -> Option<usize> {
971 fields
972 .iter()
973 .enumerate()
974 .filter(|(_, field)| field.name == name)
975 .nth(occurrence)
976 .map(|(index, _)| index)
977}
978
979#[derive(Debug, Clone, PartialEq)]
981pub struct ParsedString<'a> {
982 pub name: Cow<'a, str>,
984 pub value: ParsedValue<'a>,
986 pub source: Option<SourceSpan>,
988 pub raw: Option<Cow<'a, str>>,
990}
991
992impl<'a> ParsedString<'a> {
993 #[must_use]
995 pub fn from_definition(definition: StringDefinition<'a>) -> Self {
996 Self {
997 name: definition.name,
998 value: ParsedValue::new(definition.value),
999 source: definition.source,
1000 raw: None,
1001 }
1002 }
1003
1004 pub(crate) fn from_stream_definition(
1005 name: &'a str,
1006 value: Value<'a>,
1007 source: SourceSpan,
1008 raw: &'a str,
1009 preserve_raw: bool,
1010 ) -> Self {
1011 let value_raw = locate_definition_value(raw);
1012 Self {
1013 name: Cow::Borrowed(name),
1014 value: ParsedValue {
1015 value,
1016 raw: if preserve_raw {
1017 value_raw.map(Cow::Borrowed)
1018 } else {
1019 None
1020 },
1021 source: None,
1022 expanded: None,
1023 delimiter: value_raw.map(value_delimiter),
1024 },
1025 source: Some(source),
1026 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1027 }
1028 }
1029
1030 #[must_use]
1032 pub fn into_owned(self) -> ParsedString<'static> {
1033 ParsedString {
1034 name: Cow::Owned(self.name.into_owned()),
1035 value: self.value.into_owned(),
1036 source: self.source,
1037 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1038 }
1039 }
1040
1041 pub(crate) fn from_stream_definition_owned(
1042 name: &'a str,
1043 value: Value<'a>,
1044 source: SourceSpan,
1045 raw: &'a str,
1046 ) -> ParsedString<'static> {
1047 let value_raw = locate_definition_value(raw);
1048 ParsedString {
1049 name: Cow::Owned(name.to_string()),
1050 value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1051 source: Some(source),
1052 raw: None,
1053 }
1054 }
1055}
1056
1057#[derive(Debug, Clone, PartialEq)]
1059pub struct ParsedPreamble<'a> {
1060 pub value: ParsedValue<'a>,
1062 pub source: Option<SourceSpan>,
1064 pub raw: Option<Cow<'a, str>>,
1066}
1067
1068impl<'a> ParsedPreamble<'a> {
1069 #[must_use]
1071 pub fn from_preamble(preamble: Preamble<'a>) -> Self {
1072 Self {
1073 value: ParsedValue::new(preamble.value),
1074 source: preamble.source,
1075 raw: None,
1076 }
1077 }
1078
1079 pub(crate) fn from_stream_preamble(
1080 value: Value<'a>,
1081 source: SourceSpan,
1082 raw: &'a str,
1083 preserve_raw: bool,
1084 ) -> Self {
1085 let value_raw = locate_preamble_value(raw);
1086 Self {
1087 value: ParsedValue {
1088 value,
1089 raw: if preserve_raw {
1090 value_raw.map(Cow::Borrowed)
1091 } else {
1092 None
1093 },
1094 source: None,
1095 expanded: None,
1096 delimiter: value_raw.map(value_delimiter),
1097 },
1098 source: Some(source),
1099 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1100 }
1101 }
1102
1103 #[must_use]
1105 pub fn into_owned(self) -> ParsedPreamble<'static> {
1106 ParsedPreamble {
1107 value: self.value.into_owned(),
1108 source: self.source,
1109 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1110 }
1111 }
1112
1113 pub(crate) fn from_stream_preamble_owned(
1114 value: Value<'a>,
1115 source: SourceSpan,
1116 raw: &'a str,
1117 ) -> ParsedPreamble<'static> {
1118 let value_raw = locate_preamble_value(raw);
1119 ParsedPreamble {
1120 value: ParsedValue::from_owned_value(value, None, value_raw.map(value_delimiter)),
1121 source: Some(source),
1122 raw: None,
1123 }
1124 }
1125}
1126
1127#[derive(Debug, Clone, PartialEq, Eq)]
1129pub struct ParsedComment<'a> {
1130 pub text: Cow<'a, str>,
1132 pub source: Option<SourceSpan>,
1134 pub raw: Option<Cow<'a, str>>,
1136}
1137
1138impl<'a> ParsedComment<'a> {
1139 #[must_use]
1141 pub fn from_comment(comment: Comment<'a>) -> Self {
1142 Self {
1143 text: comment.text,
1144 source: comment.source,
1145 raw: None,
1146 }
1147 }
1148
1149 pub(crate) fn from_stream_comment(
1150 text: &'a str,
1151 source: SourceSpan,
1152 raw: &'a str,
1153 preserve_raw: bool,
1154 ) -> Self {
1155 Self {
1156 text: Cow::Borrowed(text),
1157 source: Some(source),
1158 raw: preserve_raw.then_some(Cow::Borrowed(raw)),
1159 }
1160 }
1161
1162 #[must_use]
1164 pub fn into_owned(self) -> ParsedComment<'static> {
1165 ParsedComment {
1166 text: Cow::Owned(self.text.into_owned()),
1167 source: self.source,
1168 raw: self.raw.map(|raw| Cow::Owned(raw.into_owned())),
1169 }
1170 }
1171
1172 pub(crate) fn from_stream_comment_owned(
1173 text: &'a str,
1174 source: SourceSpan,
1175 ) -> ParsedComment<'static> {
1176 ParsedComment {
1177 text: Cow::Owned(text.to_string()),
1178 source: Some(source),
1179 raw: None,
1180 }
1181 }
1182}
1183
1184#[derive(Debug, Clone, PartialEq, Eq)]
1186pub struct ParsedFailedBlock<'a> {
1187 pub raw: Cow<'a, str>,
1189 pub error: String,
1191 pub source: Option<SourceSpan>,
1193 pub diagnostics: Vec<Diagnostic>,
1195}
1196
1197impl<'a> ParsedFailedBlock<'a> {
1198 #[must_use]
1200 pub fn from_failed_block(
1201 index: usize,
1202 failed: FailedBlock<'a>,
1203 source_map: Option<&SourceMap<'_>>,
1204 ) -> Self {
1205 let diagnostic = diagnostic_for_failed_block(index, &failed, source_map);
1206
1207 Self {
1208 raw: failed.raw,
1209 error: failed.error,
1210 source: failed.source,
1211 diagnostics: vec![diagnostic],
1212 }
1213 }
1214
1215 #[must_use]
1217 pub fn into_owned(self) -> ParsedFailedBlock<'static> {
1218 ParsedFailedBlock {
1219 raw: Cow::Owned(self.raw.into_owned()),
1220 error: self.error,
1221 source: self.source,
1222 diagnostics: self.diagnostics,
1223 }
1224 }
1225}
1226
1227#[derive(Debug, Clone)]
1229pub struct ParsedDocument<'a> {
1230 library: Library<'a>,
1231 sources: Vec<ParsedSource<'a>>,
1232 entries: Vec<ParsedEntry<'a>>,
1233 strings: Vec<ParsedString<'a>>,
1234 preambles: Vec<ParsedPreamble<'a>>,
1235 comments: Vec<ParsedComment<'a>>,
1236 failed_blocks: Vec<ParsedFailedBlock<'a>>,
1237 blocks: Vec<ParsedBlock>,
1238 diagnostics: Vec<Diagnostic>,
1239 status: ParseStatus,
1240}
1241
1242impl<'a> ParsedDocument<'a> {
1243 #[must_use]
1245 pub fn from_library(library: Library<'a>) -> Self {
1246 Self::from_library_with_sources(
1247 library,
1248 vec![ParsedSource {
1249 id: SourceId::new(0),
1250 name: None,
1251 }],
1252 )
1253 }
1254
1255 pub(crate) fn from_library_with_sources(
1256 library: Library<'a>,
1257 sources: Vec<ParsedSource<'a>>,
1258 ) -> Self {
1259 Self::from_library_with_source_map(library, sources, None)
1260 }
1261
1262 pub(crate) fn from_library_with_source_map(
1263 library: Library<'a>,
1264 sources: Vec<ParsedSource<'a>>,
1265 source_map: Option<&SourceMap<'_>>,
1266 ) -> Self {
1267 let entries: Vec<ParsedEntry<'a>> = library
1268 .entries()
1269 .iter()
1270 .cloned()
1271 .enumerate()
1272 .map(|(index, entry)| ParsedEntry::from_entry(entry, library.entry_source(index)))
1273 .collect();
1274 let strings: Vec<ParsedString<'a>> = library
1275 .strings()
1276 .iter()
1277 .cloned()
1278 .map(ParsedString::from_definition)
1279 .collect();
1280 let preambles: Vec<ParsedPreamble<'a>> = library
1281 .preambles()
1282 .iter()
1283 .cloned()
1284 .map(ParsedPreamble::from_preamble)
1285 .collect();
1286 let comments = library
1287 .comments()
1288 .iter()
1289 .cloned()
1290 .map(ParsedComment::from_comment)
1291 .collect();
1292 let failed_blocks = library
1293 .failed_blocks()
1294 .iter()
1295 .cloned()
1296 .enumerate()
1297 .map(|(index, failed)| ParsedFailedBlock::from_failed_block(index, failed, source_map))
1298 .collect::<Vec<_>>();
1299 let diagnostics = failed_blocks
1300 .iter()
1301 .flat_map(|failed| failed.diagnostics.iter().cloned())
1302 .collect::<Vec<_>>();
1303 let blocks = library
1304 .block_kinds()
1305 .iter()
1306 .map(|kind| match *kind {
1307 BlockKind::Entry(index) => ParsedBlock::Entry(index),
1308 BlockKind::String(index) => ParsedBlock::String(index),
1309 BlockKind::Preamble(index) => ParsedBlock::Preamble(index),
1310 BlockKind::Comment(index) => ParsedBlock::Comment(index),
1311 BlockKind::Failed(index) => ParsedBlock::Failed(index),
1312 })
1313 .collect();
1314 let status = if failed_blocks.is_empty() {
1315 ParseStatus::Ok
1316 } else if entries.is_empty() && strings.is_empty() && preambles.is_empty() {
1317 ParseStatus::Failed
1318 } else {
1319 ParseStatus::Partial
1320 };
1321
1322 Self {
1323 library,
1324 sources,
1325 entries,
1326 strings,
1327 preambles,
1328 comments,
1329 failed_blocks,
1330 blocks,
1331 diagnostics,
1332 status,
1333 }
1334 }
1335
1336 pub(crate) const fn from_parsed_parts(
1337 library: Library<'a>,
1338 sources: Vec<ParsedSource<'a>>,
1339 entries: Vec<ParsedEntry<'a>>,
1340 strings: Vec<ParsedString<'a>>,
1341 preambles: Vec<ParsedPreamble<'a>>,
1342 comments: Vec<ParsedComment<'a>>,
1343 blocks: Vec<ParsedBlock>,
1344 ) -> Self {
1345 Self {
1346 library,
1347 sources,
1348 entries,
1349 strings,
1350 preambles,
1351 comments,
1352 failed_blocks: Vec::new(),
1353 blocks,
1354 diagnostics: Vec::new(),
1355 status: ParseStatus::Ok,
1356 }
1357 }
1358
1359 pub(crate) fn apply_entry_locations(
1360 &mut self,
1361 entry_index: usize,
1362 raw: &'a str,
1363 source_map: &SourceMap<'a>,
1364 preserve_raw: bool,
1365 ) {
1366 let Some(entry) = self.entries.get_mut(entry_index) else {
1367 return;
1368 };
1369 let Some(entry_span) = entry.source else {
1370 return;
1371 };
1372 let Some(locations) = locate_entry(raw, entry_span.byte_start, entry.fields.len()) else {
1373 return;
1374 };
1375
1376 entry.entry_type_source =
1377 Some(source_map.span(locations.entry_type.0, locations.entry_type.1));
1378 entry.key_source = Some(source_map.span(locations.key.0, locations.key.1));
1379 entry.delimiter = Some(locations.delimiter);
1380 if preserve_raw {
1381 entry.raw = Some(Cow::Borrowed(raw));
1382 }
1383
1384 for (field, location) in entry.fields.iter_mut().zip(locations.fields) {
1385 field.source = Some(source_map.span(location.whole.0, location.whole.1));
1386 field.name_source = Some(source_map.span(location.name.0, location.name.1));
1387 field.value.source = Some(source_map.span(location.value.0, location.value.1));
1388 field.value_source = field.value.source;
1389 field.value.delimiter = Some(location.value_delimiter);
1390
1391 if preserve_raw {
1392 if let Some(source) = field.source.and_then(|span| source_map.slice(span)) {
1393 field.raw = Some(Cow::Borrowed(source));
1394 }
1395 if let Some(source) = field.value_source.and_then(|span| source_map.slice(span)) {
1396 field.value.raw = Some(Cow::Borrowed(source));
1397 }
1398 }
1399 }
1400 }
1401
1402 pub(crate) fn apply_raw_items(&mut self, raw_items: &[RawBuildItem<'a>]) {
1403 let mut string_index = 0;
1404 let mut preamble_index = 0;
1405 let mut comment_index = 0;
1406
1407 for raw_item in raw_items {
1408 match raw_item {
1409 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, _), _, raw) => {
1410 if let Some(parsed) = self.strings.get_mut(string_index) {
1411 parsed.raw = Some(Cow::Borrowed(raw));
1412 if let Some(value_raw) = locate_definition_value(raw) {
1413 parsed.value.raw = Some(Cow::Borrowed(value_raw));
1414 parsed.value.delimiter = Some(value_delimiter(value_raw));
1415 }
1416 }
1417 string_index += 1;
1418 }
1419 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(_), _, raw) => {
1420 if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1421 parsed.raw = Some(Cow::Borrowed(raw));
1422 if let Some(value_raw) = locate_preamble_value(raw) {
1423 parsed.value.raw = Some(Cow::Borrowed(value_raw));
1424 parsed.value.delimiter = Some(value_delimiter(value_raw));
1425 }
1426 }
1427 preamble_index += 1;
1428 }
1429 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, raw) => {
1430 if let Some(parsed) = self.comments.get_mut(comment_index) {
1431 parsed.raw = Some(Cow::Borrowed(raw));
1432 }
1433 comment_index += 1;
1434 }
1435 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(_), _, _)
1436 | RawBuildItem::Failed(_) => {}
1437 }
1438 }
1439 }
1440
1441 pub(crate) fn apply_parsed_values(&mut self, raw_items: &[RawBuildItem<'a>]) {
1442 let mut entry_index = 0;
1443 let mut string_index = 0;
1444 let mut preamble_index = 0;
1445
1446 for raw_item in raw_items {
1447 match raw_item {
1448 RawBuildItem::Parsed(crate::parser::ParsedItem::Entry(raw_entry), _, _) => {
1449 if let Some(entry) = self.entries.get_mut(entry_index) {
1450 for (field, raw_field) in entry.fields.iter_mut().zip(&raw_entry.fields) {
1451 field.value.value = raw_field.value.clone();
1452 field.value.expanded = None;
1453 }
1454 }
1455 entry_index += 1;
1456 }
1457 RawBuildItem::Parsed(crate::parser::ParsedItem::String(_, value), _, _) => {
1458 if let Some(parsed) = self.strings.get_mut(string_index) {
1459 parsed.value.value = value.clone();
1460 parsed.value.expanded = None;
1461 }
1462 string_index += 1;
1463 }
1464 RawBuildItem::Parsed(crate::parser::ParsedItem::Preamble(value), _, _) => {
1465 if let Some(parsed) = self.preambles.get_mut(preamble_index) {
1466 parsed.value.value = value.clone();
1467 parsed.value.expanded = None;
1468 }
1469 preamble_index += 1;
1470 }
1471 RawBuildItem::Parsed(crate::parser::ParsedItem::Comment(_), _, _)
1472 | RawBuildItem::Failed(_) => {}
1473 }
1474 }
1475 }
1476
1477 pub(crate) fn populate_expanded_values(
1478 &mut self,
1479 options: ExpansionOptions,
1480 ) -> crate::Result<()> {
1481 let strings = &self.strings;
1482 for entry in &mut self.entries {
1483 for field in &mut entry.fields {
1484 field.value.expanded = Some(Cow::Owned(expand_value_with_options(
1485 &field.value.value,
1486 strings,
1487 options,
1488 &mut Vec::new(),
1489 )?));
1490 }
1491 }
1492 for preamble in &mut self.preambles {
1493 preamble.value.expanded = Some(Cow::Owned(expand_value_with_options(
1494 &preamble.value.value,
1495 strings,
1496 options,
1497 &mut Vec::new(),
1498 )?));
1499 }
1500 Ok(())
1501 }
1502
1503 pub(crate) fn recover_partial_entries(
1504 &mut self,
1505 source_map: &SourceMap<'a>,
1506 preserve_raw: bool,
1507 ) {
1508 let old_entries = std::mem::take(&mut self.entries);
1509 let old_failed_blocks = std::mem::take(&mut self.failed_blocks);
1510 let old_blocks = std::mem::take(&mut self.blocks);
1511 let mut new_entries = Vec::with_capacity(old_entries.len());
1512 let mut new_failed_blocks = Vec::new();
1513 let mut new_blocks = Vec::with_capacity(old_blocks.len());
1514
1515 for block in old_blocks {
1516 match block {
1517 ParsedBlock::Entry(index) => {
1518 let new_index = new_entries.len();
1519 if let Some(entry) = old_entries.get(index) {
1520 new_entries.push(entry.clone());
1521 new_blocks.push(ParsedBlock::Entry(new_index));
1522 }
1523 }
1524 ParsedBlock::Failed(index) => {
1525 let Some(failed) = old_failed_blocks.get(index) else {
1526 continue;
1527 };
1528 let new_index = new_entries.len();
1529 if let Some(partial) =
1530 recover_partial_entry(failed, source_map, new_index, preserve_raw)
1531 {
1532 new_entries.push(partial);
1533 new_blocks.push(ParsedBlock::Entry(new_index));
1534 } else {
1535 let failed_index = new_failed_blocks.len();
1536 new_failed_blocks.push(failed.clone());
1537 new_blocks.push(ParsedBlock::Failed(failed_index));
1538 }
1539 }
1540 ParsedBlock::String(index) => new_blocks.push(ParsedBlock::String(index)),
1541 ParsedBlock::Preamble(index) => new_blocks.push(ParsedBlock::Preamble(index)),
1542 ParsedBlock::Comment(index) => new_blocks.push(ParsedBlock::Comment(index)),
1543 }
1544 }
1545
1546 self.entries = new_entries;
1547 self.failed_blocks = new_failed_blocks;
1548 self.blocks = new_blocks;
1549 self.rebuild_diagnostics_and_status();
1550 }
1551
1552 fn rebuild_diagnostics_and_status(&mut self) {
1553 self.diagnostics.clear();
1554 self.diagnostics.extend(
1555 self.entries
1556 .iter()
1557 .flat_map(|entry| entry.diagnostics.iter().cloned()),
1558 );
1559 self.diagnostics.extend(
1560 self.failed_blocks
1561 .iter()
1562 .flat_map(|failed| failed.diagnostics.iter().cloned()),
1563 );
1564
1565 self.status = if self.diagnostics.is_empty() {
1566 ParseStatus::Ok
1567 } else if self.entries.is_empty() && self.strings.is_empty() && self.preambles.is_empty() {
1568 ParseStatus::Failed
1569 } else {
1570 ParseStatus::Partial
1571 };
1572 }
1573
1574 pub(crate) fn failed_from_error(
1575 sources: Vec<ParsedSource<'a>>,
1576 source_map: &SourceMap<'a>,
1577 error: &crate::Error,
1578 ) -> Self {
1579 let (byte, message, fallback_snippet) = match error {
1580 crate::Error::ParseError {
1581 line,
1582 column,
1583 message,
1584 snippet,
1585 } => (
1586 source_map.byte_at_line_column(*line, *column).unwrap_or(0),
1587 message.clone(),
1588 snippet.clone(),
1589 ),
1590 other => (0, other.to_string(), None),
1591 };
1592 let raw = source_map.input().get(byte..).unwrap_or_default();
1593 let failed_source = source_map.span(byte, source_map.len());
1594 let failed = FailedBlock {
1595 raw: Cow::Borrowed(raw),
1596 error: message.clone(),
1597 source: Some(failed_source),
1598 };
1599 let diagnostic = diagnostic_for_raw_failure(
1600 0,
1601 raw,
1602 message,
1603 Some(failed_source),
1604 Some(source_map),
1605 byte,
1606 fallback_snippet,
1607 );
1608 let failed_block = ParsedFailedBlock {
1609 raw: failed.raw,
1610 error: failed.error,
1611 source: failed.source,
1612 diagnostics: vec![diagnostic.clone()],
1613 };
1614
1615 Self {
1616 library: Library::new(),
1617 sources,
1618 entries: Vec::new(),
1619 strings: Vec::new(),
1620 preambles: Vec::new(),
1621 comments: Vec::new(),
1622 failed_blocks: vec![failed_block],
1623 blocks: vec![ParsedBlock::Failed(0)],
1624 diagnostics: vec![diagnostic],
1625 status: ParseStatus::Failed,
1626 }
1627 }
1628
1629 #[must_use]
1631 pub const fn library(&self) -> &Library<'a> {
1632 &self.library
1633 }
1634
1635 #[must_use]
1637 pub fn into_library(self) -> Library<'a> {
1638 self.library
1639 }
1640
1641 #[must_use]
1643 pub fn sources(&self) -> &[ParsedSource<'a>] {
1644 &self.sources
1645 }
1646
1647 #[must_use]
1649 pub fn entries(&self) -> &[ParsedEntry<'a>] {
1650 &self.entries
1651 }
1652
1653 #[must_use]
1655 pub fn entries_mut(&mut self) -> &mut [ParsedEntry<'a>] {
1656 &mut self.entries
1657 }
1658
1659 #[must_use]
1661 pub fn entry_mut_by_key(&mut self, key: &str) -> Option<&mut ParsedEntry<'a>> {
1662 self.entries.iter_mut().find(|entry| entry.key == key)
1663 }
1664
1665 #[must_use]
1667 pub fn rename_key(&mut self, old: &str, new: impl Into<Cow<'a, str>>) -> bool {
1668 let Some(entry) = self.entry_mut_by_key(old) else {
1669 return false;
1670 };
1671 entry.rename_key(new);
1672 true
1673 }
1674
1675 #[must_use]
1677 pub fn remove_export_fields(&mut self, names: &[&str]) -> usize {
1678 self.entries
1679 .iter_mut()
1680 .map(|entry| entry.remove_export_fields(names))
1681 .sum()
1682 }
1683
1684 #[must_use]
1686 pub fn strings(&self) -> &[ParsedString<'a>] {
1687 &self.strings
1688 }
1689
1690 #[must_use]
1692 pub fn preambles(&self) -> &[ParsedPreamble<'a>] {
1693 &self.preambles
1694 }
1695
1696 #[must_use]
1698 pub fn comments(&self) -> &[ParsedComment<'a>] {
1699 &self.comments
1700 }
1701
1702 #[must_use]
1704 pub fn failed_blocks(&self) -> &[ParsedFailedBlock<'a>] {
1705 &self.failed_blocks
1706 }
1707
1708 #[must_use]
1710 pub fn blocks(&self) -> &[ParsedBlock] {
1711 &self.blocks
1712 }
1713
1714 #[must_use]
1716 pub fn diagnostics(&self) -> &[Diagnostic] {
1717 &self.diagnostics
1718 }
1719
1720 #[must_use]
1722 pub const fn status(&self) -> ParseStatus {
1723 self.status
1724 }
1725
1726 #[must_use]
1728 pub fn summary(&self) -> ParseSummary {
1729 let mut warnings = 0;
1730 let mut errors = 0;
1731 let mut infos = 0;
1732
1733 for diagnostic in &self.diagnostics {
1734 match diagnostic.severity {
1735 DiagnosticSeverity::Error => errors += 1,
1736 DiagnosticSeverity::Warning => warnings += 1,
1737 DiagnosticSeverity::Info => infos += 1,
1738 }
1739 }
1740
1741 ParseSummary {
1742 status: self.status,
1743 entries: self.entries.len(),
1744 warnings,
1745 errors,
1746 infos,
1747 failed_blocks: self.failed_blocks.len(),
1748 recovered_blocks: self
1749 .entries
1750 .iter()
1751 .filter(|entry| entry.status == ParsedEntryStatus::Partial)
1752 .count(),
1753 }
1754 }
1755
1756 pub fn expand_value(
1761 &self,
1762 value: &Value<'a>,
1763 options: ExpansionOptions,
1764 ) -> crate::Result<String> {
1765 expand_value_with_options(value, &self.strings, options, &mut Vec::new())
1766 }
1767
1768 #[must_use]
1770 pub fn into_owned(self) -> ParsedDocument<'static> {
1771 ParsedDocument {
1772 library: self.library.into_owned(),
1773 sources: self
1774 .sources
1775 .into_iter()
1776 .map(ParsedSource::into_owned)
1777 .collect(),
1778 entries: self
1779 .entries
1780 .into_iter()
1781 .map(ParsedEntry::into_owned)
1782 .collect(),
1783 strings: self
1784 .strings
1785 .into_iter()
1786 .map(ParsedString::into_owned)
1787 .collect(),
1788 preambles: self
1789 .preambles
1790 .into_iter()
1791 .map(ParsedPreamble::into_owned)
1792 .collect(),
1793 comments: self
1794 .comments
1795 .into_iter()
1796 .map(ParsedComment::into_owned)
1797 .collect(),
1798 failed_blocks: self
1799 .failed_blocks
1800 .into_iter()
1801 .map(ParsedFailedBlock::into_owned)
1802 .collect(),
1803 blocks: self.blocks,
1804 diagnostics: self.diagnostics,
1805 status: self.status,
1806 }
1807 }
1808}
1809
1810impl ParsedDocument<'static> {
1811 pub(crate) fn apply_raw_from_source(&mut self, source: &str) {
1812 for entry in &mut self.entries {
1813 if entry.raw.is_none() {
1814 entry.raw = owned_source_slice(source, entry.source);
1815 }
1816 for field in &mut entry.fields {
1817 if field.raw.is_none() {
1818 field.raw = owned_source_slice(source, field.source);
1819 }
1820 if field.value.raw.is_none() {
1821 field.value.raw = owned_source_slice(source, field.value_source);
1822 }
1823 }
1824 }
1825
1826 for string in &mut self.strings {
1827 if string.raw.is_none() {
1828 string.raw = owned_source_slice(source, string.source);
1829 }
1830 }
1831 for preamble in &mut self.preambles {
1832 if preamble.raw.is_none() {
1833 preamble.raw = owned_source_slice(source, preamble.source);
1834 }
1835 }
1836 for comment in &mut self.comments {
1837 if comment.raw.is_none() {
1838 comment.raw = owned_source_slice(source, comment.source);
1839 }
1840 }
1841 }
1842}
1843
1844fn owned_source_slice(source: &str, span: Option<SourceSpan>) -> Option<Cow<'static, str>> {
1845 let span = span?;
1846 source
1847 .get(span.byte_start..span.byte_end)
1848 .map(|raw| Cow::Owned(raw.to_string()))
1849}
1850
1851fn expand_value_with_options(
1852 value: &Value<'_>,
1853 strings: &[ParsedString<'_>],
1854 options: ExpansionOptions,
1855 stack: &mut Vec<String>,
1856) -> crate::Result<String> {
1857 match value {
1858 Value::Literal(text) => Ok(normalize_text_projection(text)),
1859 Value::Number(number) => Ok(number.to_string()),
1860 Value::Concat(parts) => {
1861 let mut expanded = String::new();
1862 for part in parts.iter() {
1863 expanded.push_str(&expand_value_with_options(part, strings, options, stack)?);
1864 }
1865 Ok(expanded)
1866 }
1867 Value::Variable(name) => expand_variable(name, strings, options, stack),
1868 }
1869}
1870
1871fn expand_variable(
1872 name: &str,
1873 strings: &[ParsedString<'_>],
1874 options: ExpansionOptions,
1875 stack: &mut Vec<String>,
1876) -> crate::Result<String> {
1877 if options.expand_strings {
1878 if let Some(definition) = strings
1879 .iter()
1880 .rev()
1881 .find(|definition| definition.name.as_ref() == name)
1882 {
1883 if stack.iter().any(|active| active == name) {
1884 return Err(crate::Error::CircularReference(name.to_string()));
1885 }
1886 stack.push(name.to_string());
1887 let expanded =
1888 expand_value_with_options(&definition.value.value, strings, options, stack);
1889 stack.pop();
1890 return expanded;
1891 }
1892 }
1893
1894 if options.expand_months {
1895 if let Some(month) = month_expansion(name) {
1896 return Ok(month.to_string());
1897 }
1898 }
1899
1900 match options.unresolved_variables {
1901 UnresolvedVariablePolicy::Preserve => Ok(name.to_string()),
1902 UnresolvedVariablePolicy::Placeholder => Ok(format!("{{undefined:{name}}}")),
1903 UnresolvedVariablePolicy::Error => Err(crate::Error::UndefinedVariable(name.to_string())),
1904 }
1905}
1906
1907fn month_expansion(name: &str) -> Option<&'static str> {
1908 if name.len() != 3 {
1909 return None;
1910 }
1911
1912 match name.to_ascii_lowercase().as_str() {
1913 "jan" => Some("January"),
1914 "feb" => Some("February"),
1915 "mar" => Some("March"),
1916 "apr" => Some("April"),
1917 "may" => Some("May"),
1918 "jun" => Some("June"),
1919 "jul" => Some("July"),
1920 "aug" => Some("August"),
1921 "sep" => Some("September"),
1922 "oct" => Some("October"),
1923 "nov" => Some("November"),
1924 "dec" => Some("December"),
1925 _ => None,
1926 }
1927}
1928
1929#[derive(Debug, Clone)]
1930struct EntryLocations {
1931 entry_type: (usize, usize),
1932 key: (usize, usize),
1933 delimiter: EntryDelimiter,
1934 fields: Vec<FieldLocations>,
1935}
1936
1937#[derive(Debug, Clone, Copy)]
1938struct FieldLocations {
1939 whole: (usize, usize),
1940 name: (usize, usize),
1941 value: (usize, usize),
1942 value_delimiter: ValueDelimiter,
1943}
1944
1945#[derive(Debug, Clone)]
1946struct FailureClassification {
1947 code: DiagnosticCode,
1948 range: (usize, usize),
1949}
1950
1951fn diagnostic_for_failed_block(
1952 index: usize,
1953 failed: &FailedBlock<'_>,
1954 source_map: Option<&SourceMap<'_>>,
1955) -> Diagnostic {
1956 let absolute_start = failed.source.map_or(0, |source| source.byte_start);
1957 diagnostic_for_raw_failure(
1958 index,
1959 &failed.raw,
1960 failed.error.clone(),
1961 failed.source,
1962 source_map,
1963 absolute_start,
1964 None,
1965 )
1966}
1967
1968fn diagnostic_for_raw_failure(
1969 index: usize,
1970 raw: &str,
1971 fallback_message: String,
1972 fallback_source: Option<SourceSpan>,
1973 source_map: Option<&SourceMap<'_>>,
1974 absolute_start: usize,
1975 fallback_snippet: Option<String>,
1976) -> Diagnostic {
1977 let classification = classify_failure(raw);
1978 let source = source_map
1979 .map(|map| {
1980 map.span(
1981 absolute_start + classification.range.0,
1982 absolute_start + classification.range.1,
1983 )
1984 })
1985 .or(fallback_source);
1986 let snippet = source
1987 .and_then(|span| source_map.and_then(|map| map.snippet(span, 160)))
1988 .or(fallback_snippet)
1989 .or_else(|| Some(raw.chars().take(160).collect()));
1990
1991 let mut diagnostic = Diagnostic::error(
1992 classification.code.clone(),
1993 diagnostic_message(&classification.code, fallback_message),
1994 DiagnosticTarget::FailedBlock(index),
1995 source,
1996 );
1997 diagnostic.snippet = snippet;
1998 diagnostic
1999}
2000
2001fn recover_partial_entry<'a>(
2002 failed: &ParsedFailedBlock<'a>,
2003 source_map: &SourceMap<'a>,
2004 entry_index: usize,
2005 preserve_raw: bool,
2006) -> Option<ParsedEntry<'a>> {
2007 let raw: &'a str = match &failed.raw {
2008 Cow::Borrowed(raw) => raw,
2009 Cow::Owned(_) => return None,
2010 };
2011 let absolute_start = failed.source?.byte_start;
2012 let header = parse_partial_header(raw, source_map, absolute_start)?;
2013 let fields = recover_partial_fields(
2014 raw,
2015 source_map,
2016 absolute_start,
2017 header.field_start,
2018 header.closing,
2019 preserve_raw,
2020 );
2021 if fields.is_empty() {
2022 return None;
2023 }
2024
2025 let diagnostic = diagnostic_for_partial_entry(entry_index, failed, source_map);
2026
2027 Some(ParsedEntry {
2028 ty: header.ty,
2029 key: header.key,
2030 fields,
2031 status: ParsedEntryStatus::Partial,
2032 source: failed.source,
2033 entry_type_source: header.entry_type_source,
2034 key_source: header.key_source,
2035 delimiter: Some(header.delimiter),
2036 raw: preserve_raw.then(|| failed.raw.clone()),
2037 diagnostics: vec![diagnostic],
2038 })
2039}
2040
2041pub(crate) fn recover_partial_stream_entry<'a>(
2042 failed: &ParsedFailedBlock<'a>,
2043 source_map: &SourceMap<'a>,
2044 entry_index: usize,
2045 preserve_raw: bool,
2046) -> Option<ParsedEntry<'a>> {
2047 recover_partial_entry(failed, source_map, entry_index, preserve_raw)
2048}
2049
2050struct PartialHeader<'a> {
2051 ty: EntryType<'a>,
2052 key: Cow<'a, str>,
2053 entry_type_source: Option<SourceSpan>,
2054 key_source: Option<SourceSpan>,
2055 delimiter: EntryDelimiter,
2056 field_start: usize,
2057 closing: u8,
2058}
2059
2060fn parse_partial_header<'a>(
2061 raw: &'a str,
2062 source_map: &SourceMap<'a>,
2063 absolute_start: usize,
2064) -> Option<PartialHeader<'a>> {
2065 let bytes = raw.as_bytes();
2066 let mut pos = bytes.iter().position(|byte| *byte == b'@')? + 1;
2067
2068 let entry_type_start = pos;
2069 pos += scan_identifier(&bytes[pos..]);
2070 if pos == entry_type_start {
2071 return None;
2072 }
2073 let ty = EntryType::parse(&raw[entry_type_start..pos]);
2074 let entry_type_source =
2075 Some(source_map.span(absolute_start + entry_type_start, absolute_start + pos));
2076
2077 pos = skip_ascii_whitespace(bytes, pos);
2078 let (delimiter, closing) = match *bytes.get(pos)? {
2079 b'{' => (EntryDelimiter::Braces, b'}'),
2080 b'(' => (EntryDelimiter::Parentheses, b')'),
2081 _ => return None,
2082 };
2083 pos += 1;
2084 pos = skip_ascii_whitespace(bytes, pos);
2085
2086 let key_start = pos;
2087 pos += scan_identifier(&bytes[pos..]);
2088 if pos == key_start {
2089 return None;
2090 }
2091 let key = Cow::Borrowed(&raw[key_start..pos]);
2092 let key_source = Some(source_map.span(absolute_start + key_start, absolute_start + pos));
2093
2094 pos = skip_ascii_whitespace(bytes, pos);
2095 if bytes.get(pos) != Some(&b',') {
2096 return None;
2097 }
2098
2099 Some(PartialHeader {
2100 ty,
2101 key,
2102 entry_type_source,
2103 key_source,
2104 delimiter,
2105 field_start: pos + 1,
2106 closing,
2107 })
2108}
2109
2110fn recover_partial_fields<'a>(
2111 raw: &'a str,
2112 source_map: &SourceMap<'a>,
2113 absolute_start: usize,
2114 mut pos: usize,
2115 closing: u8,
2116 preserve_raw: bool,
2117) -> Vec<ParsedField<'a>> {
2118 let bytes = raw.as_bytes();
2119 let mut fields = Vec::new();
2120
2121 loop {
2122 pos = skip_ascii_whitespace(bytes, pos);
2123 let Some(&byte) = bytes.get(pos) else {
2124 break;
2125 };
2126 if byte == closing || byte == b'@' {
2127 break;
2128 }
2129
2130 let field_start = pos;
2131 let name_start = pos;
2132 pos += scan_identifier(&bytes[pos..]);
2133 if pos == name_start {
2134 break;
2135 }
2136 let name_end = pos;
2137 let name = Cow::Borrowed(&raw[name_start..name_end]);
2138
2139 pos = skip_ascii_whitespace(bytes, pos);
2140 if bytes.get(pos) != Some(&b'=') {
2141 break;
2142 }
2143 pos += 1;
2144 pos = skip_ascii_whitespace(bytes, pos);
2145
2146 let value_start = pos;
2147 let tail = &raw[value_start..];
2148 let mut value_input = tail;
2149 let Ok(value) = crate::parser::value::parse_value_field(&mut value_input) else {
2150 break;
2151 };
2152 let consumed = tail.len() - value_input.len();
2153 let value_end = trim_ascii_whitespace_end(bytes, value_start, value_start + consumed);
2154 let boundary = value_start + consumed;
2155 let field_end = match bytes.get(boundary) {
2156 Some(b',') => boundary + 1,
2157 Some(byte) if *byte == closing => boundary,
2158 Some(_) | None => boundary,
2159 };
2160
2161 let field_source =
2162 source_map.span(absolute_start + field_start, absolute_start + field_end);
2163 let value_source =
2164 source_map.span(absolute_start + value_start, absolute_start + value_end);
2165 fields.push(ParsedField {
2166 name,
2167 value: ParsedValue {
2168 value,
2169 raw: preserve_raw.then(|| Cow::Borrowed(&raw[value_start..value_end])),
2170 source: Some(value_source),
2171 expanded: None,
2172 delimiter: Some(value_delimiter(&raw[value_start..value_end])),
2173 },
2174 raw: preserve_raw.then(|| Cow::Borrowed(&raw[field_start..field_end])),
2175 source: Some(field_source),
2176 name_source: Some(
2177 source_map.span(absolute_start + name_start, absolute_start + name_end),
2178 ),
2179 value_source: Some(value_source),
2180 });
2181
2182 match bytes.get(boundary) {
2183 Some(b',') => pos = boundary + 1,
2184 Some(byte) if *byte == closing => break,
2185 _ => break,
2186 }
2187 }
2188
2189 fields
2190}
2191
2192fn diagnostic_for_partial_entry(
2193 entry_index: usize,
2194 failed: &ParsedFailedBlock<'_>,
2195 source_map: &SourceMap<'_>,
2196) -> Diagnostic {
2197 let absolute_start = failed.source.map_or(0, |source| source.byte_start);
2198 let mut diagnostic = diagnostic_for_raw_failure(
2199 entry_index,
2200 &failed.raw,
2201 failed.error.clone(),
2202 failed.source,
2203 Some(source_map),
2204 absolute_start,
2205 None,
2206 );
2207 diagnostic.target = DiagnosticTarget::Entry(entry_index);
2208 diagnostic
2209}
2210
2211fn diagnostic_message(code: &DiagnosticCode, fallback: String) -> String {
2212 match code.as_str() {
2213 "missing-entry-key" => "missing citation key".to_string(),
2214 "missing-field-separator" => "missing field separator".to_string(),
2215 "expected-field-name" => "expected field name".to_string(),
2216 "empty-field-value" => "empty field value".to_string(),
2217 "expected-value-atom" => "expected value atom".to_string(),
2218 "bad-field-boundary" => "expected comma or entry close after field value".to_string(),
2219 "bad-value-boundary" => "expected value after concatenation operator".to_string(),
2220 "unclosed-entry" => "entry ended before its closing delimiter".to_string(),
2221 "unclosed-braced-value" => "braced value ended before its closing brace".to_string(),
2222 "unclosed-quoted-value" => "quoted value ended before its closing quote".to_string(),
2223 _ => fallback,
2224 }
2225}
2226
2227fn classify_failure(raw: &str) -> FailureClassification {
2228 classify_failure_inner(raw).unwrap_or_else(|| FailureClassification {
2229 code: DiagnosticCode::PARSE_ERROR,
2230 range: empty_range(0),
2231 })
2232}
2233
2234fn classify_failure_inner(raw: &str) -> Option<FailureClassification> {
2235 let bytes = raw.as_bytes();
2236 let header = match parse_failure_header(bytes)? {
2237 Ok(header) => header,
2238 Err(classification) => return Some(classification),
2239 };
2240
2241 classify_failure_fields(bytes, header.pos, header.closing)
2242}
2243
2244#[derive(Debug, Clone, Copy)]
2245struct FailureHeader {
2246 pos: usize,
2247 closing: u8,
2248}
2249
2250fn parse_failure_header(bytes: &[u8]) -> Option<Result<FailureHeader, FailureClassification>> {
2251 let mut pos = bytes.iter().position(|byte| *byte == b'@')?;
2252 pos += 1;
2253 pos += scan_identifier(&bytes[pos..]);
2254 pos = skip_ascii_whitespace(bytes, pos);
2255
2256 let opening = *bytes.get(pos)?;
2257 let closing = match opening {
2258 b'{' => b'}',
2259 b'(' => b')',
2260 _ => {
2261 return Some(Err(classification(
2262 DiagnosticCode::UNCLOSED_ENTRY,
2263 pos,
2264 bytes.len(),
2265 )));
2266 }
2267 };
2268 pos += 1;
2269 pos = skip_ascii_whitespace(bytes, pos);
2270
2271 let key_len = scan_identifier(&bytes[pos..]);
2272 if key_len == 0 {
2273 return Some(Err(classification(
2274 DiagnosticCode::MISSING_ENTRY_KEY,
2275 pos,
2276 bytes.len(),
2277 )));
2278 }
2279 pos += key_len;
2280 pos = skip_ascii_whitespace(bytes, pos);
2281 if bytes.get(pos) != Some(&b',') {
2282 return Some(Err(classification(
2283 DiagnosticCode::MISSING_FIELD_SEPARATOR,
2284 pos,
2285 bytes.len(),
2286 )));
2287 }
2288 pos += 1;
2289
2290 Some(Ok(FailureHeader { pos, closing }))
2291}
2292
2293fn classify_failure_fields(
2294 bytes: &[u8],
2295 mut pos: usize,
2296 closing: u8,
2297) -> Option<FailureClassification> {
2298 loop {
2299 pos = skip_ascii_whitespace(bytes, pos);
2300 let Some(&byte) = bytes.get(pos) else {
2301 return Some(classification(
2302 DiagnosticCode::UNCLOSED_ENTRY,
2303 pos,
2304 bytes.len(),
2305 ));
2306 };
2307 if byte == closing {
2308 return None;
2309 }
2310 if byte == b'@' {
2311 return Some(classification(
2312 DiagnosticCode::UNCLOSED_ENTRY,
2313 pos,
2314 bytes.len(),
2315 ));
2316 }
2317
2318 let field_name_len = scan_identifier(&bytes[pos..]);
2319 if field_name_len == 0 {
2320 return Some(classification(
2321 DiagnosticCode::EXPECTED_FIELD_NAME,
2322 pos,
2323 bytes.len(),
2324 ));
2325 }
2326 pos += field_name_len;
2327 pos = skip_ascii_whitespace(bytes, pos);
2328 if bytes.get(pos) != Some(&b'=') {
2329 return Some(classification(
2330 DiagnosticCode::MISSING_FIELD_SEPARATOR,
2331 pos,
2332 bytes.len(),
2333 ));
2334 }
2335 pos += 1;
2336 pos = skip_ascii_whitespace(bytes, pos);
2337
2338 let Some(&value_start) = bytes.get(pos) else {
2339 return Some(classification(
2340 DiagnosticCode::EMPTY_FIELD_VALUE,
2341 pos,
2342 bytes.len(),
2343 ));
2344 };
2345 if value_start == b',' || value_start == closing {
2346 return Some(classification(
2347 DiagnosticCode::EMPTY_FIELD_VALUE,
2348 pos,
2349 bytes.len(),
2350 ));
2351 }
2352 if value_start == b'#' {
2353 return Some(classification(
2354 DiagnosticCode::EXPECTED_VALUE_ATOM,
2355 pos,
2356 bytes.len(),
2357 ));
2358 }
2359
2360 match scan_value_sequence(bytes, pos, closing) {
2361 Ok(next_pos) => pos = next_pos,
2362 Err(classification) => return Some(classification),
2363 }
2364 }
2365}
2366
2367fn scan_value_sequence(
2368 bytes: &[u8],
2369 mut pos: usize,
2370 closing: u8,
2371) -> Result<usize, FailureClassification> {
2372 loop {
2373 pos = skip_ascii_whitespace(bytes, pos);
2374 let atom_start = pos;
2375 let Some(&byte) = bytes.get(pos) else {
2376 return Err(classification(
2377 DiagnosticCode::EXPECTED_VALUE_ATOM,
2378 pos,
2379 bytes.len(),
2380 ));
2381 };
2382
2383 match byte {
2384 b'"' => {
2385 pos = skip_quoted_checked(bytes, pos + 1).ok_or_else(|| {
2386 classification(
2387 DiagnosticCode::UNCLOSED_QUOTED_VALUE,
2388 atom_start,
2389 bytes.len(),
2390 )
2391 })?;
2392 }
2393 b'{' => {
2394 pos = skip_braced_checked(bytes, pos + 1).ok_or_else(|| {
2395 classification(
2396 DiagnosticCode::UNCLOSED_BRACED_VALUE,
2397 atom_start,
2398 bytes.len(),
2399 )
2400 })?;
2401 }
2402 b',' => {
2403 return Err(classification(
2404 DiagnosticCode::EMPTY_FIELD_VALUE,
2405 pos,
2406 bytes.len(),
2407 ));
2408 }
2409 b if b == closing => {
2410 return Err(classification(
2411 DiagnosticCode::EMPTY_FIELD_VALUE,
2412 pos,
2413 bytes.len(),
2414 ));
2415 }
2416 b'#' => {
2417 return Err(classification(
2418 DiagnosticCode::EXPECTED_VALUE_ATOM,
2419 pos,
2420 bytes.len(),
2421 ));
2422 }
2423 _ => {
2424 let identifier_len = scan_identifier(&bytes[pos..]);
2425 if identifier_len == 0 {
2426 return Err(classification(
2427 DiagnosticCode::EXPECTED_VALUE_ATOM,
2428 pos,
2429 bytes.len(),
2430 ));
2431 }
2432 pos += identifier_len;
2433 }
2434 }
2435
2436 pos = skip_ascii_whitespace(bytes, pos);
2437 let Some(&boundary) = bytes.get(pos) else {
2438 return Err(classification(
2439 DiagnosticCode::UNCLOSED_ENTRY,
2440 pos,
2441 bytes.len(),
2442 ));
2443 };
2444
2445 match boundary {
2446 b'#' => {
2447 let hash = pos;
2448 pos += 1;
2449 pos = skip_ascii_whitespace(bytes, pos);
2450 if matches!(bytes.get(pos), None | Some(b',' | b'#'))
2451 || bytes.get(pos) == Some(&closing)
2452 {
2453 return Err(classification(
2454 DiagnosticCode::BAD_VALUE_BOUNDARY,
2455 hash,
2456 bytes.len(),
2457 ));
2458 }
2459 }
2460 b',' => return Ok(pos + 1),
2461 b if b == closing => return Ok(pos),
2462 _ => {
2463 return Err(classification(
2464 DiagnosticCode::BAD_FIELD_BOUNDARY,
2465 pos,
2466 bytes.len(),
2467 ));
2468 }
2469 }
2470 }
2471}
2472
2473fn classification(code: DiagnosticCode, pos: usize, len: usize) -> FailureClassification {
2474 FailureClassification {
2475 code,
2476 range: single_byte_range(pos, len),
2477 }
2478}
2479
2480const fn empty_range(pos: usize) -> (usize, usize) {
2481 (pos, pos)
2482}
2483
2484fn single_byte_range(pos: usize, len: usize) -> (usize, usize) {
2485 let start = pos.min(len);
2486 (start, (start + 1).min(len))
2487}
2488
2489fn locate_entry(raw: &str, absolute_start: usize, field_count: usize) -> Option<EntryLocations> {
2490 let bytes = raw.as_bytes();
2491 let mut pos = 0;
2492 if bytes.get(pos) != Some(&b'@') {
2493 return None;
2494 }
2495 pos += 1;
2496
2497 let entry_type_start = pos;
2498 pos += scan_identifier(&bytes[pos..]);
2499 if pos == entry_type_start {
2500 return None;
2501 }
2502 let entry_type = (absolute_start + entry_type_start, absolute_start + pos);
2503
2504 pos = skip_ascii_whitespace(bytes, pos);
2505 let opening = *bytes.get(pos)?;
2506 let (delimiter, closing) = match opening {
2507 b'{' => (EntryDelimiter::Braces, b'}'),
2508 b'(' => (EntryDelimiter::Parentheses, b')'),
2509 _ => return None,
2510 };
2511 pos += 1;
2512 pos = skip_ascii_whitespace(bytes, pos);
2513
2514 let key_start = pos;
2515 pos += scan_identifier(&bytes[pos..]);
2516 if pos == key_start {
2517 return None;
2518 }
2519 let key = (absolute_start + key_start, absolute_start + pos);
2520
2521 pos = skip_ascii_whitespace(bytes, pos);
2522 if bytes.get(pos) != Some(&b',') {
2523 return Some(EntryLocations {
2524 entry_type,
2525 key,
2526 delimiter,
2527 fields: Vec::new(),
2528 });
2529 }
2530 pos += 1;
2531
2532 let mut fields = Vec::with_capacity(field_count);
2533 while fields.len() < field_count {
2534 pos = skip_ascii_whitespace(bytes, pos);
2535 if bytes.get(pos) == Some(&closing) || pos >= bytes.len() {
2536 break;
2537 }
2538
2539 let field_start = pos;
2540 let name_start = pos;
2541 pos += scan_identifier(&bytes[pos..]);
2542 if pos == name_start {
2543 break;
2544 }
2545 let name_end = pos;
2546
2547 pos = skip_ascii_whitespace(bytes, pos);
2548 if bytes.get(pos) != Some(&b'=') {
2549 break;
2550 }
2551 pos += 1;
2552 pos = skip_ascii_whitespace(bytes, pos);
2553
2554 let value_start = pos;
2555 let boundary = find_value_boundary(bytes, pos, closing);
2556 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2557 let mut whole_end = value_end;
2558 pos = boundary;
2559 if bytes.get(pos) == Some(&b',') {
2560 whole_end = pos + 1;
2561 pos += 1;
2562 }
2563
2564 fields.push(FieldLocations {
2565 whole: (absolute_start + field_start, absolute_start + whole_end),
2566 name: (absolute_start + name_start, absolute_start + name_end),
2567 value: (absolute_start + value_start, absolute_start + value_end),
2568 value_delimiter: value_delimiter(&raw[value_start..value_end]),
2569 });
2570 }
2571
2572 Some(EntryLocations {
2573 entry_type,
2574 key,
2575 delimiter,
2576 fields,
2577 })
2578}
2579
2580fn value_delimiter(raw_value: &str) -> ValueDelimiter {
2581 let trimmed = raw_value.trim_start();
2582 if has_top_level_concat(trimmed.as_bytes()) {
2583 return ValueDelimiter::Concatenation;
2584 }
2585
2586 match trimmed.as_bytes().first() {
2587 Some(b'{') => ValueDelimiter::Braces,
2588 Some(b'"') => ValueDelimiter::Quotes,
2589 _ => ValueDelimiter::Bare,
2590 }
2591}
2592
2593fn locate_definition_value(raw: &str) -> Option<&str> {
2594 let bytes = raw.as_bytes();
2595 let equals = bytes.iter().position(|byte| *byte == b'=')?;
2596 let value_start = skip_ascii_whitespace(bytes, equals + 1);
2597 let closing = enclosing_close_byte(bytes)?;
2598 let boundary = find_value_boundary(bytes, value_start, closing);
2599 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2600 raw.get(value_start..value_end)
2601}
2602
2603fn locate_preamble_value(raw: &str) -> Option<&str> {
2604 let bytes = raw.as_bytes();
2605 let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2606 let closing = match bytes[opening] {
2607 b'{' => b'}',
2608 b'(' => b')',
2609 _ => return None,
2610 };
2611 let value_start = skip_ascii_whitespace(bytes, opening + 1);
2612 let boundary = find_value_boundary(bytes, value_start, closing);
2613 let value_end = trim_ascii_whitespace_end(bytes, value_start, boundary);
2614 raw.get(value_start..value_end)
2615}
2616
2617fn enclosing_close_byte(bytes: &[u8]) -> Option<u8> {
2618 let opening = bytes.iter().position(|byte| matches!(byte, b'{' | b'('))?;
2619 match bytes[opening] {
2620 b'{' => Some(b'}'),
2621 b'(' => Some(b')'),
2622 _ => None,
2623 }
2624}
2625
2626fn has_top_level_concat(bytes: &[u8]) -> bool {
2627 let mut pos = 0;
2628 while let Some(&byte) = bytes.get(pos) {
2629 match byte {
2630 b'{' => pos = skip_braced(bytes, pos + 1),
2631 b'"' => pos = skip_quoted(bytes, pos + 1),
2632 b'#' => return true,
2633 _ => pos += 1,
2634 }
2635 }
2636 false
2637}
2638
2639fn skip_ascii_whitespace(bytes: &[u8], mut pos: usize) -> usize {
2640 while matches!(bytes.get(pos), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2641 pos += 1;
2642 }
2643 pos
2644}
2645
2646fn trim_ascii_whitespace_end(bytes: &[u8], start: usize, mut end: usize) -> usize {
2647 while end > start && matches!(bytes.get(end - 1), Some(b' ' | b'\t' | b'\n' | b'\r')) {
2648 end -= 1;
2649 }
2650 end
2651}
2652
2653fn scan_identifier(bytes: &[u8]) -> usize {
2654 bytes
2655 .iter()
2656 .position(|byte| !is_identifier_byte(*byte))
2657 .unwrap_or(bytes.len())
2658}
2659
2660const fn is_identifier_byte(byte: u8) -> bool {
2661 matches!(
2662 byte,
2663 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
2664 )
2665}
2666
2667fn find_value_boundary(bytes: &[u8], mut pos: usize, closing: u8) -> usize {
2668 while let Some(&byte) = bytes.get(pos) {
2669 match byte {
2670 b'{' => pos = skip_braced(bytes, pos + 1),
2671 b'"' => pos = skip_quoted(bytes, pos + 1),
2672 b',' => break,
2673 b if b == closing => break,
2674 _ => pos += 1,
2675 }
2676 }
2677 pos
2678}
2679
2680fn skip_braced(bytes: &[u8], mut pos: usize) -> usize {
2681 let mut depth = 0usize;
2682 while let Some(&byte) = bytes.get(pos) {
2683 match byte {
2684 b'\\' => pos = (pos + 2).min(bytes.len()),
2685 b'{' => {
2686 depth += 1;
2687 pos += 1;
2688 }
2689 b'}' if depth == 0 => return pos + 1,
2690 b'}' => {
2691 depth -= 1;
2692 pos += 1;
2693 }
2694 _ => pos += 1,
2695 }
2696 }
2697 pos
2698}
2699
2700fn skip_braced_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2701 let mut depth = 0usize;
2702 while let Some(&byte) = bytes.get(pos) {
2703 match byte {
2704 b'\\' => pos = (pos + 2).min(bytes.len()),
2705 b'{' => {
2706 depth += 1;
2707 pos += 1;
2708 }
2709 b'}' if depth == 0 => return Some(pos + 1),
2710 b'}' => {
2711 depth -= 1;
2712 pos += 1;
2713 }
2714 _ => pos += 1,
2715 }
2716 }
2717 None
2718}
2719
2720fn skip_quoted(bytes: &[u8], mut pos: usize) -> usize {
2721 while let Some(&byte) = bytes.get(pos) {
2722 match byte {
2723 b'\\' => pos = (pos + 2).min(bytes.len()),
2724 b'"' => return pos + 1,
2725 _ => pos += 1,
2726 }
2727 }
2728 pos
2729}
2730
2731fn skip_quoted_checked(bytes: &[u8], mut pos: usize) -> Option<usize> {
2732 while let Some(&byte) = bytes.get(pos) {
2733 match byte {
2734 b'\\' => pos = (pos + 2).min(bytes.len()),
2735 b'"' => return Some(pos + 1),
2736 _ => pos += 1,
2737 }
2738 }
2739 None
2740}