use std::{
collections::HashMap,
fs::File,
io::{BufRead, BufReader},
path::{Path, PathBuf},
string::FromUtf8Error,
};
use audec::auto_decompress;
use log::debug;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::{
compression::Compression,
event::{Event, Weights},
formats::FileFormat,
hepmc2::HepMCParser,
progress_bar::{Progress, ProgressBar},
traits::{Rewind, TryConvert, UpdateWeights},
util::trim_ascii_start,
};
#[cfg(feature = "lhef")]
use crate::lhef::LHEFParser;
#[cfg(feature = "ntuple")]
use crate::ntuple::NTupleConverter;
#[cfg(feature = "stripper-xml")]
use crate::stripper_xml::StripperXmlParser;
const ROOT_MAGIC_BYTES: [u8; 4] = [b'r', b'o', b'o', b't'];
pub struct FileReader(Box<dyn EventFileReader>);
impl FileReader {
pub fn try_new(infile: PathBuf) -> Result<Self, CreateError> {
let format = detect_event_file_format(&infile)?;
debug!("Read {infile:?} as {format:?} file");
let reader: Box<dyn EventFileReader> = match format {
FileFormat::HepMC2 => {
use crate::hepmc2::FileReader as HepMCReader;
Box::new(HepMCReader::try_new(infile)?)
}
#[cfg(feature = "lhef")]
FileFormat::Lhef => {
use crate::lhef::FileReader as LhefReader;
Box::new(LhefReader::try_new(infile)?)
}
#[cfg(feature = "ntuple")]
FileFormat::BlackHatNtuple => {
use crate::ntuple::FileReader as NTupleReader;
Box::new(NTupleReader::try_new(infile)?)
}
#[cfg(feature = "stripper-xml")]
FileFormat::StripperXml => {
use crate::stripper_xml::FileReader as XMLReader;
Box::new(XMLReader::try_new(infile)?)
}
};
Ok(Self(reader))
}
}
impl EventFileReader for FileReader {
fn path(&self) -> &Path {
self.0.path()
}
fn header(&self) -> &[u8] {
self.0.header()
}
}
impl Rewind for FileReader {
type Error = CreateError;
fn rewind(&mut self) -> Result<(), Self::Error> {
self.0.rewind()
}
}
impl Iterator for FileReader {
type Item = Result<EventRecord, ReadError>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
pub struct FileIO(Box<dyn EventFileIO>);
impl Rewind for FileIO {
type Error = FileIOError;
fn rewind(&mut self) -> Result<(), Self::Error> {
self.0.rewind()
}
}
impl Iterator for FileIO {
type Item = Result<EventRecord, FileIOError>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[derive(Clone, Debug, Default)]
pub struct IOBuilder {
scaling: HashMap<String, f64>,
compression: Option<Compression>,
weight_names: Vec<String>,
}
impl IOBuilder {
pub fn compression(
&mut self,
compression: Option<Compression>,
) -> &mut Self {
self.compression = compression;
self
}
pub fn weight_names(&mut self, weight_names: Vec<String>) -> &mut Self {
self.weight_names = weight_names;
self
}
pub fn build_from_files(
self,
infile: PathBuf,
outfile: PathBuf,
) -> Result<FileIO, CreateError> {
let IOBuilder {
scaling,
compression,
weight_names,
} = self;
let _scaling = scaling;
let format = detect_event_file_format(&infile)?;
debug!("Read {infile:?} as {format:?} file");
let io: Box<dyn EventFileIO> = match format {
FileFormat::HepMC2 => {
use crate::hepmc2::FileIO as HepMCIO;
Box::new(HepMCIO::try_new(
infile,
outfile,
compression,
weight_names,
)?)
}
#[cfg(feature = "lhef")]
FileFormat::Lhef => {
use crate::lhef::FileIO as LHEFIO;
Box::new(LHEFIO::try_new(
infile,
outfile,
compression,
weight_names,
)?)
}
#[cfg(feature = "ntuple")]
FileFormat::BlackHatNtuple => {
use crate::ntuple::FileIO as NTupleIO;
Box::new(NTupleIO::try_new(infile, outfile, weight_names)?)
}
#[cfg(feature = "stripper-xml")]
FileFormat::StripperXml => {
use crate::stripper_xml::FileIO as XMLIO;
Box::new(XMLIO::try_new(
infile,
outfile,
compression,
weight_names,
&_scaling,
)?)
}
};
Ok(FileIO(io))
}
pub fn build_from_files_iter<I, P, Q>(
self,
files: I,
) -> Result<CombinedFileIO, CombinedBuildError>
where
I: IntoIterator<Item = (P, Q)>,
P: AsRef<Path>,
Q: AsRef<Path>,
{
#[cfg(feature = "stripper-xml")]
{
let (files, scaling) = crate::stripper_xml::extract_scaling(files)?;
let mut builder = self;
builder.scaling = scaling;
Ok(builder.build_from_files_iter_known_scaling(files)?)
}
#[cfg(not(feature = "stripper-xml"))]
Ok(self.build_from_files_iter_known_scaling(files)?)
}
fn build_from_files_iter_known_scaling<I, P, Q>(
self,
files: I,
) -> Result<CombinedFileIO, FileIOError>
where
I: IntoIterator<Item = (P, Q)>,
P: AsRef<Path>,
Q: AsRef<Path>,
{
let files = Vec::from_iter(files.into_iter().map(|(source, sink)| {
let infile = source.as_ref().to_path_buf();
let outfile = sink.as_ref().to_path_buf();
IOFiles { infile, outfile }
}));
CombinedFileIO::new(files, self)
}
}
pub fn detect_event_file_format(
infile: &Path,
) -> Result<FileFormat, CreateError> {
use CreateError::*;
use FileFormat::*;
let file = File::open(infile).map_err(OpenInput)?;
let mut r = auto_decompress(BufReader::new(file));
let Ok(bytes) = r.fill_buf() else {
return Ok(HepMC2);
};
if bytes.starts_with(&ROOT_MAGIC_BYTES) {
#[cfg(not(feature = "ntuple"))]
return Err(RootUnsupported);
#[cfg(feature = "ntuple")]
return Ok(BlackHatNtuple);
}
if trim_ascii_start(bytes).starts_with(b"<?xml") {
#[cfg(not(feature = "stripper-xml"))]
return Err(XMLUnsupported);
#[cfg(feature = "stripper-xml")]
return Ok(StripperXml);
}
#[cfg(feature = "lhef")]
if bytes.starts_with(b"<LesHouchesEvents") {
return Ok(Lhef);
}
Ok(HepMC2)
}
impl UpdateWeights for FileIO {
type Error = FileIOError;
fn update_all_weights(
&mut self,
weights: &[Weights],
) -> Result<usize, Self::Error> {
self.0.update_all_weights(weights)
}
fn update_next_weights(
&mut self,
weights: &Weights,
) -> Result<bool, Self::Error> {
self.0.update_next_weights(weights)
}
fn finish_weight_update(&mut self) -> Result<(), Self::Error> {
self.0.finish_weight_update()
}
}
#[derive(Debug, Error)]
pub enum CombinedBuildError {
#[error("Failed to build file-based event I/O object")]
FileIO(#[from] FileIOError),
#[cfg(feature = "stripper-xml")]
#[error("Failed to extract weight scaling")]
WeightScaling(#[from] CreateError),
}
#[derive(Debug, Error)]
#[error("Error in event I/O reading from {infile} and writing to {outfile}")]
pub struct FileIOError {
infile: PathBuf,
outfile: PathBuf,
source: ErrorKind,
}
impl FileIOError {
pub fn new(infile: PathBuf, outfile: PathBuf, source: ErrorKind) -> Self {
Self {
infile,
outfile,
source,
}
}
pub fn infile(&self) -> &PathBuf {
&self.infile
}
pub fn outfile(&self) -> &PathBuf {
&self.outfile
}
}
#[derive(Debug, Error)]
pub enum ErrorKind {
#[error("Failed to create event I/O object")]
Create(#[from] CreateError),
#[error("Failed to read event")]
Read(#[from] ReadError),
#[error("Failed to write event")]
Write(#[from] WriteError),
}
#[derive(Debug, Error)]
pub enum CreateError {
#[error("Failed to open input file")]
OpenInput(#[source] std::io::Error),
#[error("Failed to read from input file")]
Read(#[source] std::io::Error),
#[error("Failed to create target file")]
CreateTarget(#[source] std::io::Error),
#[error("Failed to compress target file")]
CompressTarget(#[source] std::io::Error),
#[error("Failed to compress target file")]
Write(#[source] std::io::Error),
#[error("UTF8 error")]
Utf8(#[from] Utf8Error),
#[cfg(not(feature = "ntuple"))]
#[error("Support for ROOT ntuple format is not enabled. Reinstall cres with `cargo install cres --features=ntuple`")]
RootUnsupported,
#[cfg(not(feature = "stripper-xml"))]
#[error("Support for STRIPPER XML format is not enabled. Reinstall cres with `cargo install cres --features=stripper-xml`")]
XMLUnsupported,
#[cfg(feature = "ntuple")]
#[error("{0}")]
NTuple(String),
#[cfg(feature = "stripper-xml")]
#[error("XML Error in input file")]
XMLError(#[from] crate::stripper_xml::Error),
}
#[derive(Debug, Error)]
pub enum Utf8Error {
#[error("UTF8 error")]
Utf8(#[from] std::str::Utf8Error),
#[error("UTF8 error")]
FromUtf8(#[from] FromUtf8Error),
}
#[derive(Debug, Error)]
pub enum ReadError {
#[error("I/O error")]
IO(#[from] std::io::Error),
#[error("Failed to find {0} in {1}")]
FindEntry(&'static str, String),
#[error("Failed to find weight\"{0}\": Event has weights {1}")]
FindWeight(String, String),
#[error("{value} is not a valid value for {entry} in {record}")]
InvalidEntry {
value: String,
entry: &'static str,
record: String,
},
#[error("Failed to parse {0} in {1}")]
ParseEntry(&'static str, String),
#[error("Failed to recognise {0} in {1}")]
UnrecognisedEntry(&'static str, String),
#[error("UTF8 error")]
Utf8(#[from] Utf8Error),
#[cfg(feature = "ntuple")]
#[error("Failed to read NTuple record")]
NTuple(#[from] ntuple::reader::ReadError),
#[cfg(feature = "stripper-xml")]
#[error("XML Error in input file")]
XMLError(#[from] crate::stripper_xml::Error),
}
#[derive(Debug, Error)]
pub enum WriteError {
#[error("I/O error")]
IO(#[from] std::io::Error),
}
#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
struct IOFiles {
infile: PathBuf,
outfile: PathBuf,
}
pub struct CombinedFileIO {
files: Vec<IOFiles>,
current: Option<FileIO>,
current_file_idx: usize,
builder: IOBuilder,
nevents_read: usize,
total_size_hint: (usize, Option<usize>),
}
impl CombinedFileIO {
fn new(
files: Vec<IOFiles>,
builder: IOBuilder,
) -> Result<Self, FileIOError> {
let mut res = Self {
files,
current: None,
current_file_idx: 0,
builder,
nevents_read: 0,
total_size_hint: (0, Some(0)),
};
res.init()?;
Ok(res)
}
fn open(&mut self, idx: usize) -> Result<(), FileIOError> {
let IOFiles { infile, outfile } = self.files[idx].clone();
self.current = Some(
self.builder
.clone()
.build_from_files(infile, outfile)
.map_err(|source| {
let IOFiles { infile, outfile } = self.files[idx].clone();
FileIOError {
infile,
outfile,
source: source.into(),
}
})?,
);
self.current_file_idx = idx;
Ok(())
}
fn init(&mut self) -> Result<(), FileIOError> {
if self.files.is_empty() {
return Ok(());
}
for idx in 0..self.files.len() {
self.open(idx)?;
self.total_size_hint = combine_size_hints(
self.total_size_hint,
self.current.as_ref().unwrap().size_hint(),
);
}
self.open(0)?;
Ok(())
}
}
fn combine_size_hints(
mut h: (usize, Option<usize>),
g: (usize, Option<usize>),
) -> (usize, Option<usize>) {
h.0 += g.0;
h.1 = match (h.1, g.1) {
(None, _) | (_, None) => None,
(Some(h), Some(g)) => Some(h + g),
};
h
}
impl Rewind for CombinedFileIO {
type Error = FileIOError;
fn rewind(&mut self) -> Result<(), Self::Error> {
self.current = None;
self.nevents_read = 0;
Ok(())
}
}
impl Iterator for CombinedFileIO {
type Item = <FileIO as Iterator>::Item;
fn next(&mut self) -> Option<Self::Item> {
if let Some(current) = self.current.as_mut() {
let next = current.next();
if next.is_some() {
self.nevents_read += 1;
return next;
}
if self.current_file_idx + 1 == self.files.len() {
return None;
}
if let Err(err) = self.open(self.current_file_idx + 1) {
Some(Err(err))
} else {
self.next()
}
} else if self.files.is_empty() {
None
} else if let Err(err) = self.open(0) {
Some(Err(err))
} else {
self.next()
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let min = self.total_size_hint.0.saturating_sub(self.nevents_read);
let max = self
.total_size_hint
.1
.map(|max| max.saturating_sub(self.nevents_read));
(min, max)
}
}
impl UpdateWeights for CombinedFileIO {
type Error = FileIOError;
fn update_all_weights(
&mut self,
weights: &[Weights],
) -> Result<usize, Self::Error> {
self.rewind()?;
let mut nevent = 0;
let progress =
ProgressBar::new(weights.len() as u64, "events written:");
for idx in 0..self.files.len() {
self.open(idx)?;
let current = self.current.as_mut().unwrap();
while nevent < weights.len() {
if !current.update_next_weights(&weights[nevent])? {
break;
}
progress.inc(1);
nevent += 1;
}
current.finish_weight_update()?;
}
progress.finish();
Ok(nevent)
}
fn update_next_weights(
&mut self,
weights: &Weights,
) -> Result<bool, Self::Error> {
while self.current_file_idx < self.files.len() {
let current = self.current.as_mut().unwrap();
let res = current.update_next_weights(weights)?;
if res {
return Ok(true);
}
current.finish_weight_update()?;
self.open(self.current_file_idx + 1)?;
}
Ok(false)
}
}
pub trait EventFileReader:
Iterator<Item = Result<EventRecord, ReadError>> + Rewind<Error = CreateError>
{
fn path(&self) -> &Path;
fn header(&self) -> &[u8];
}
pub trait EventFileIO:
Iterator<Item = Result<EventRecord, FileIOError>>
+ Rewind<Error = FileIOError>
+ UpdateWeights<Error = FileIOError>
{
}
impl EventFileIO for crate::hepmc2::FileIO {}
#[cfg(feature = "lhef")]
impl EventFileIO for crate::lhef::FileIO {}
#[cfg(feature = "ntuple")]
impl EventFileIO for crate::ntuple::FileIO {}
#[cfg(feature = "stripper-xml")]
impl EventFileIO for crate::stripper_xml::FileIO {}
#[non_exhaustive]
#[derive(Clone, Debug, PartialEq)]
pub enum EventRecord {
HepMC(String),
#[cfg(feature = "lhef")]
LHEF(String),
#[cfg(feature = "ntuple")]
NTuple(Box<ntuple::Event>),
#[cfg(feature = "stripper-xml")]
StripperXml(String),
}
impl TryFrom<EventRecord> for String {
type Error = EventRecord;
fn try_from(e: EventRecord) -> Result<Self, Self::Error> {
use EventRecord::*;
match e {
HepMC(s) => Ok(s),
#[cfg(feature = "lhef")]
LHEF(s) => Ok(s),
#[cfg(feature = "ntuple")]
ev @ NTuple(_) => Err(ev),
#[cfg(feature = "stripper-xml")]
StripperXml(s) => Ok(s),
}
}
}
#[derive(
Deserialize,
Serialize,
Clone,
Debug,
Default,
Eq,
PartialEq,
Ord,
PartialOrd,
Hash,
)]
pub struct Converter {
#[cfg(feature = "multiweight")]
weight_names: Vec<String>,
}
impl Converter {
pub fn new() -> Self {
Self::default()
}
#[cfg(feature = "multiweight")]
pub fn with_weights(weight_names: Vec<String>) -> Self {
Self { weight_names }
}
#[cfg(feature = "multiweight")]
pub fn weight_names(&self) -> &[String] {
self.weight_names.as_ref()
}
}
impl TryConvert<EventRecord, Event> for Converter {
type Error = ErrorKind;
fn try_convert(&self, record: EventRecord) -> Result<Event, Self::Error> {
let event = match record {
EventRecord::HepMC(record) => self.parse_hepmc(&record)?,
#[cfg(feature = "lhef")]
EventRecord::LHEF(record) => self.parse_lhef(&record)?,
#[cfg(feature = "ntuple")]
EventRecord::NTuple(record) => self.convert_ntuple(*record)?,
#[cfg(feature = "stripper-xml")]
EventRecord::StripperXml(record) => {
self.parse_stripper_xml(&record)?
}
};
Ok(event)
}
}