use crate::{
Diagnostic, ParseEvent, ParseStatus, ParsedDocument, ParsedEntry, ParsedSource, SourceId,
SourceSpan,
};
use std::borrow::Cow;
use std::collections::BTreeMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CorpusSource<'a> {
pub name: &'a str,
pub input: &'a str,
}
#[derive(Debug, Clone, PartialEq)]
pub enum CorpusEvent<'a> {
SourceStart(ParsedSource<'a>),
Event {
source: SourceId,
event: Box<ParseEvent<'a>>,
},
SourceEnd(ParsedSource<'a>),
}
impl<'a> CorpusSource<'a> {
#[must_use]
pub const fn new(name: &'a str, input: &'a str) -> Self {
Self { name, input }
}
}
#[derive(Debug, Clone)]
pub struct ParsedCorpus<'a> {
documents: Vec<ParsedDocument<'a>>,
sources: Vec<ParsedSource<'a>>,
duplicate_keys: Vec<DuplicateKeyGroup>,
status: ParseStatus,
}
impl<'a> ParsedCorpus<'a> {
pub(crate) fn from_documents(documents: Vec<ParsedDocument<'a>>) -> Self {
let sources = documents
.iter()
.flat_map(|document| document.sources().iter().cloned())
.collect::<Vec<_>>();
let duplicate_keys = find_duplicate_keys(&documents);
let status = corpus_status(&documents);
Self {
documents,
sources,
duplicate_keys,
status,
}
}
#[must_use]
pub fn documents(&self) -> &[ParsedDocument<'a>] {
&self.documents
}
#[must_use]
pub fn sources(&self) -> &[ParsedSource<'a>] {
&self.sources
}
#[must_use]
pub fn source(&self, id: SourceId) -> Option<&ParsedSource<'a>> {
self.sources.iter().find(|source| source.id == id)
}
pub fn entries(&self) -> impl Iterator<Item = &ParsedEntry<'a>> + '_ {
self.documents
.iter()
.flat_map(|document| document.entries().iter())
}
pub fn diagnostics(&self) -> impl Iterator<Item = &Diagnostic> + '_ {
self.documents
.iter()
.flat_map(|document| document.diagnostics().iter())
}
#[must_use]
pub fn duplicate_keys(&self) -> &[DuplicateKeyGroup] {
&self.duplicate_keys
}
#[must_use]
pub const fn status(&self) -> ParseStatus {
self.status
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DuplicateKeyGroup {
pub key: String,
pub occurrences: Vec<DuplicateKeyOccurrence>,
pub cross_source: bool,
}
impl DuplicateKeyGroup {
#[must_use]
pub const fn is_same_source(&self) -> bool {
!self.cross_source
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DuplicateKeyOccurrence {
pub source: SourceId,
pub source_name: Option<String>,
pub document_index: usize,
pub entry_index: usize,
pub key_source: Option<SourceSpan>,
}
fn find_duplicate_keys(documents: &[ParsedDocument<'_>]) -> Vec<DuplicateKeyGroup> {
let mut groups: BTreeMap<String, Vec<DuplicateKeyOccurrence>> = BTreeMap::new();
for (document_index, document) in documents.iter().enumerate() {
for (entry_index, entry) in document.entries().iter().enumerate() {
let source = entry
.source
.and_then(|span| span.source)
.unwrap_or_else(|| SourceId::new(document_index));
let source_name = document
.sources()
.iter()
.find(|parsed_source| parsed_source.id == source)
.and_then(|parsed_source| parsed_source.name.as_ref())
.map(Cow::as_ref)
.map(ToOwned::to_owned);
groups
.entry(entry.key().to_string())
.or_default()
.push(DuplicateKeyOccurrence {
source,
source_name,
document_index,
entry_index,
key_source: entry.key_source,
});
}
}
groups
.into_iter()
.filter_map(|(key, occurrences)| {
if occurrences.len() < 2 {
return None;
}
let first_source = occurrences[0].source;
let cross_source = occurrences
.iter()
.any(|occurrence| occurrence.source != first_source);
Some(DuplicateKeyGroup {
key,
occurrences,
cross_source,
})
})
.collect()
}
fn corpus_status(documents: &[ParsedDocument<'_>]) -> ParseStatus {
let has_content = documents.iter().any(|document| {
!document.entries().is_empty()
|| !document.strings().is_empty()
|| !document.preambles().is_empty()
});
let has_problem = documents
.iter()
.any(|document| document.status() != ParseStatus::Ok);
if !has_problem {
ParseStatus::Ok
} else if has_content {
ParseStatus::Partial
} else {
ParseStatus::Failed
}
}