1use crate::{
4 Diagnostic, ParseEvent, ParseStatus, ParsedDocument, ParsedEntry, ParsedSource, SourceId,
5 SourceSpan,
6};
7use std::borrow::Cow;
8use std::collections::BTreeMap;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct CorpusSource<'a> {
13 pub name: &'a str,
15 pub input: &'a str,
17}
18
19#[derive(Debug, Clone, PartialEq)]
21pub enum CorpusEvent<'a> {
22 SourceStart(ParsedSource<'a>),
24 Event {
26 source: SourceId,
28 event: Box<ParseEvent<'a>>,
30 },
31 SourceEnd(ParsedSource<'a>),
33}
34
35impl<'a> CorpusSource<'a> {
36 #[must_use]
38 pub const fn new(name: &'a str, input: &'a str) -> Self {
39 Self { name, input }
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct ParsedCorpus<'a> {
46 documents: Vec<ParsedDocument<'a>>,
47 sources: Vec<ParsedSource<'a>>,
48 duplicate_keys: Vec<DuplicateKeyGroup>,
49 status: ParseStatus,
50}
51
52impl<'a> ParsedCorpus<'a> {
53 pub(crate) fn from_documents(documents: Vec<ParsedDocument<'a>>) -> Self {
54 let sources = documents
55 .iter()
56 .flat_map(|document| document.sources().iter().cloned())
57 .collect::<Vec<_>>();
58 let duplicate_keys = find_duplicate_keys(&documents);
59 let status = corpus_status(&documents);
60
61 Self {
62 documents,
63 sources,
64 duplicate_keys,
65 status,
66 }
67 }
68
69 #[must_use]
71 pub fn documents(&self) -> &[ParsedDocument<'a>] {
72 &self.documents
73 }
74
75 #[must_use]
77 pub fn sources(&self) -> &[ParsedSource<'a>] {
78 &self.sources
79 }
80
81 #[must_use]
83 pub fn source(&self, id: SourceId) -> Option<&ParsedSource<'a>> {
84 self.sources.iter().find(|source| source.id == id)
85 }
86
87 pub fn entries(&self) -> impl Iterator<Item = &ParsedEntry<'a>> + '_ {
89 self.documents
90 .iter()
91 .flat_map(|document| document.entries().iter())
92 }
93
94 pub fn diagnostics(&self) -> impl Iterator<Item = &Diagnostic> + '_ {
96 self.documents
97 .iter()
98 .flat_map(|document| document.diagnostics().iter())
99 }
100
101 #[must_use]
103 pub fn duplicate_keys(&self) -> &[DuplicateKeyGroup] {
104 &self.duplicate_keys
105 }
106
107 #[must_use]
109 pub const fn status(&self) -> ParseStatus {
110 self.status
111 }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
116pub struct DuplicateKeyGroup {
117 pub key: String,
119 pub occurrences: Vec<DuplicateKeyOccurrence>,
121 pub cross_source: bool,
123}
124
125impl DuplicateKeyGroup {
126 #[must_use]
128 pub const fn is_same_source(&self) -> bool {
129 !self.cross_source
130 }
131}
132
133#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct DuplicateKeyOccurrence {
136 pub source: SourceId,
138 pub source_name: Option<String>,
140 pub document_index: usize,
142 pub entry_index: usize,
144 pub key_source: Option<SourceSpan>,
146}
147
148fn find_duplicate_keys(documents: &[ParsedDocument<'_>]) -> Vec<DuplicateKeyGroup> {
149 let mut groups: BTreeMap<String, Vec<DuplicateKeyOccurrence>> = BTreeMap::new();
150
151 for (document_index, document) in documents.iter().enumerate() {
152 for (entry_index, entry) in document.entries().iter().enumerate() {
153 let source = entry
154 .source
155 .and_then(|span| span.source)
156 .unwrap_or_else(|| SourceId::new(document_index));
157 let source_name = document
158 .sources()
159 .iter()
160 .find(|parsed_source| parsed_source.id == source)
161 .and_then(|parsed_source| parsed_source.name.as_ref())
162 .map(Cow::as_ref)
163 .map(ToOwned::to_owned);
164
165 groups
166 .entry(entry.key().to_string())
167 .or_default()
168 .push(DuplicateKeyOccurrence {
169 source,
170 source_name,
171 document_index,
172 entry_index,
173 key_source: entry.key_source,
174 });
175 }
176 }
177
178 groups
179 .into_iter()
180 .filter_map(|(key, occurrences)| {
181 if occurrences.len() < 2 {
182 return None;
183 }
184 let first_source = occurrences[0].source;
185 let cross_source = occurrences
186 .iter()
187 .any(|occurrence| occurrence.source != first_source);
188 Some(DuplicateKeyGroup {
189 key,
190 occurrences,
191 cross_source,
192 })
193 })
194 .collect()
195}
196
197fn corpus_status(documents: &[ParsedDocument<'_>]) -> ParseStatus {
198 let has_content = documents.iter().any(|document| {
199 !document.entries().is_empty()
200 || !document.strings().is_empty()
201 || !document.preambles().is_empty()
202 });
203 let has_problem = documents
204 .iter()
205 .any(|document| document.status() != ParseStatus::Ok);
206
207 if !has_problem {
208 ParseStatus::Ok
209 } else if has_content {
210 ParseStatus::Partial
211 } else {
212 ParseStatus::Failed
213 }
214}