use crate::model::{Atom, Comp, Error as ModelError, Field, Message, Rep, Segment};
use crate::parser::parse;
use thiserror::Error;
#[derive(Debug, Error, Clone)]
pub enum BatchError {
#[error("Invalid batch structure: {0}")]
InvalidStructure(String),
#[error("Missing required segment: {0}")]
MissingSegment(String),
#[error("Mismatched batch headers/trailers")]
MismatchedHeaders,
#[error("Parse error: {0}")]
ParseError(String),
#[error("Count mismatch: expected {expected}, got {actual}")]
CountMismatch {
expected: usize,
actual: usize,
},
}
impl From<ModelError> for BatchError {
fn from(e: ModelError) -> Self {
BatchError::ParseError(e.to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BatchType {
Single,
File,
}
#[derive(Debug, Clone, PartialEq)]
pub struct BatchInfo {
pub batch_type: BatchType,
pub field_separator: Option<char>,
pub encoding_characters: Option<String>,
pub sending_application: Option<String>,
pub sending_facility: Option<String>,
pub receiving_application: Option<String>,
pub receiving_facility: Option<String>,
pub file_creation_time: Option<String>,
pub security: Option<String>,
pub batch_name: Option<String>,
pub batch_comment: Option<String>,
pub message_count: Option<usize>,
pub trailer_comment: Option<String>,
}
impl Default for BatchInfo {
fn default() -> Self {
Self {
batch_type: BatchType::Single,
field_separator: None,
encoding_characters: None,
sending_application: None,
sending_facility: None,
receiving_application: None,
receiving_facility: None,
file_creation_time: None,
security: None,
batch_name: None,
batch_comment: None,
message_count: None,
trailer_comment: None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Batch {
pub header: Option<Segment>,
pub messages: Vec<Message>,
pub trailer: Option<Segment>,
pub info: BatchInfo,
}
impl Batch {
pub fn new() -> Self {
Self {
header: None,
messages: Vec::new(),
trailer: None,
info: BatchInfo::default(),
}
}
pub fn add_message(&mut self, message: Message) {
self.messages.push(message);
}
pub fn message_count(&self) -> usize {
self.messages.len()
}
pub fn iter_messages(&self) -> impl Iterator<Item = &Message> {
self.messages.iter()
}
}
impl Default for Batch {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct FileBatch {
pub header: Option<Segment>,
pub batches: Vec<Batch>,
pub trailer: Option<Segment>,
pub info: BatchInfo,
}
impl FileBatch {
pub fn new() -> Self {
Self {
header: None,
batches: Vec::new(),
trailer: None,
info: BatchInfo {
batch_type: BatchType::File,
..BatchInfo::default()
},
}
}
pub fn add_batch(&mut self, batch: Batch) {
self.batches.push(batch);
}
pub fn total_message_count(&self) -> usize {
self.batches.iter().map(Batch::message_count).sum()
}
pub fn iter_all_messages(&self) -> impl Iterator<Item = &Message> {
self.batches.iter().flat_map(|b| b.messages.iter())
}
}
impl Default for FileBatch {
fn default() -> Self {
Self::new()
}
}
pub fn parse_batch(data: &[u8]) -> Result<FileBatch, BatchError> {
let text = std::str::from_utf8(data)
.map_err(|_err| BatchError::InvalidStructure("Invalid UTF-8 data".to_string()))?;
let lines: Vec<&str> = text.split(['\r', '\n']).filter(|l| !l.is_empty()).collect();
if lines.is_empty() {
return Err(BatchError::InvalidStructure("Empty batch data".to_string()));
}
let Some(first_line) = lines.first().copied() else {
return Err(BatchError::InvalidStructure("Empty batch data".to_string()));
};
if first_line.starts_with("FHS") {
parse_file_batch(&lines)
} else if first_line.starts_with("BHS") {
let batch = parse_single_batch(&lines)?;
let mut file_batch = FileBatch::new();
file_batch.info.batch_type = BatchType::Single;
file_batch.info.field_separator = batch.info.field_separator;
file_batch.info.encoding_characters = batch.info.encoding_characters.clone();
file_batch.info.sending_application = batch.info.sending_application.clone();
file_batch.info.sending_facility = batch.info.sending_facility.clone();
file_batch.info.receiving_application = batch.info.receiving_application.clone();
file_batch.info.receiving_facility = batch.info.receiving_facility.clone();
file_batch.info.security = batch.info.security.clone();
file_batch.info.batch_name = batch.info.batch_name.clone();
file_batch.info.batch_comment = batch.info.batch_comment.clone();
file_batch.info.message_count = batch.info.message_count;
file_batch.info.trailer_comment = batch.info.trailer_comment.clone();
file_batch.add_batch(batch);
Ok(file_batch)
} else if first_line.starts_with("MSH") {
let messages = parse_messages(&lines)?;
let batch = Batch {
header: None,
messages,
trailer: None,
info: BatchInfo::default(),
};
let mut file_batch = FileBatch::new();
file_batch.add_batch(batch);
Ok(file_batch)
} else {
Err(BatchError::InvalidStructure(format!(
"Unknown first segment: {}",
segment_prefix(first_line)
)))
}
}
fn parse_file_batch(lines: &[&str]) -> Result<FileBatch, BatchError> {
let mut file_batch = FileBatch::new();
let mut current_batch_lines: Vec<&str> = Vec::new();
let mut in_batch = false;
let mut has_fhs = false;
for line in lines {
if line.starts_with("FHS") {
has_fhs = true;
file_batch.header = Some(parse_segment(line)?);
let info = extract_batch_info(line, "FHS")?;
file_batch.info.encoding_characters = info.encoding_characters;
file_batch.info.sending_application = info.sending_application;
file_batch.info.sending_facility = info.sending_facility;
file_batch.info.receiving_application = info.receiving_application;
file_batch.info.receiving_facility = info.receiving_facility;
file_batch.info.file_creation_time = info.file_creation_time;
file_batch.info.security = info.security;
file_batch.info.field_separator = info.field_separator;
file_batch.info.batch_name = info.batch_name;
file_batch.info.batch_comment = info.batch_comment;
} else if line.starts_with("FTS") {
file_batch.trailer = Some(parse_segment(line)?);
let info = extract_batch_info(line, "FTS")?;
file_batch.info.message_count = info.message_count;
file_batch.info.trailer_comment = info.trailer_comment;
} else if line.starts_with("BHS") {
in_batch = true;
current_batch_lines.push(line);
} else if line.starts_with("BTS") {
current_batch_lines.push(line);
let batch = parse_single_batch(¤t_batch_lines)?;
file_batch.add_batch(batch);
current_batch_lines.clear();
in_batch = false;
} else if in_batch {
current_batch_lines.push(line);
} else if line.starts_with("MSH") {
let messages = parse_messages(std::slice::from_ref(line))?;
let batch = Batch {
header: None,
messages,
trailer: None,
info: BatchInfo::default(),
};
file_batch.add_batch(batch);
}
}
if !has_fhs {
return Err(BatchError::MissingSegment("FHS".to_string()));
}
if file_batch.info.message_count.is_none() {
file_batch.info.message_count = Some(file_batch.total_message_count());
}
Ok(file_batch)
}
fn parse_single_batch(lines: &[&str]) -> Result<Batch, BatchError> {
let mut batch = Batch::new();
let mut message_lines: Vec<&str> = Vec::new();
let mut has_bhs = false;
let mut has_bts = false;
for line in lines {
if line.starts_with("BHS") {
has_bhs = true;
batch.header = Some(parse_segment(line)?);
batch.info = extract_batch_info(line, "BHS")?;
} else if line.starts_with("BTS") {
has_bts = true;
batch.trailer = Some(parse_segment(line)?);
let info = extract_batch_info(line, "BTS")?;
batch.info.message_count = info.message_count;
batch.info.trailer_comment = info.trailer_comment;
} else if line.starts_with("MSH") {
if !message_lines.is_empty() {
let msg_text = message_lines.join("\r");
let msg = parse(msg_text.as_bytes())?;
batch.add_message(msg);
message_lines.clear();
}
message_lines.push(line);
} else {
message_lines.push(line);
}
}
if !message_lines.is_empty() {
let msg_text = message_lines.join("\r");
let msg = parse(msg_text.as_bytes())?;
batch.add_message(msg);
}
if !has_bhs && (has_bts || !batch.messages.is_empty()) {
return Err(BatchError::MissingSegment("BHS".to_string()));
}
if !has_bts && (has_bhs || !batch.messages.is_empty()) {
return Err(BatchError::MissingSegment("BTS".to_string()));
}
if batch.info.message_count.is_none() {
batch.info.message_count = Some(batch.message_count());
}
if let Some(expected) = batch.info.message_count
&& expected != batch.message_count()
{
return Err(BatchError::CountMismatch {
expected,
actual: batch.message_count(),
});
}
Ok(batch)
}
fn parse_messages(lines: &[&str]) -> Result<Vec<Message>, BatchError> {
let mut messages = Vec::new();
let mut message_lines: Vec<&str> = Vec::new();
for line in lines {
if line.starts_with("MSH") && !message_lines.is_empty() {
let msg_text = message_lines.join("\r");
let msg = parse(msg_text.as_bytes())?;
messages.push(msg);
message_lines.clear();
}
message_lines.push(line);
}
if !message_lines.is_empty() {
let msg_text = message_lines.join("\r");
let msg = parse(msg_text.as_bytes())?;
messages.push(msg);
}
Ok(messages)
}
fn parse_segment(line: &str) -> Result<Segment, BatchError> {
if line.len() < 3 {
return Err(BatchError::InvalidStructure(format!(
"Segment too short: {line}"
)));
}
let Some(id_bytes) = line.as_bytes().get(0..3) else {
return Err(BatchError::InvalidStructure(format!(
"Segment too short: {line}"
)));
};
let Ok(id) = <[u8; 3]>::try_from(id_bytes) else {
return Err(BatchError::InvalidStructure(format!(
"Segment too short: {line}"
)));
};
let field_sep = line.chars().nth(3).unwrap_or('|');
let fields_str = fields_after_separator(line);
let field_strs: Vec<&str> = fields_str.split(field_sep).collect();
let fields: Vec<Field> = field_strs
.iter()
.map(|s| Field {
reps: vec![Rep {
comps: vec![Comp {
subs: vec![Atom::Text((*s).to_string())],
}],
}],
})
.collect();
Ok(Segment { id, fields })
}
fn extract_batch_info(line: &str, segment_type: &str) -> Result<BatchInfo, BatchError> {
let mut info = BatchInfo::default();
if line.len() < 4 {
return Ok(info);
}
let field_sep = line.chars().nth(3).unwrap_or('|');
info.field_separator = Some(field_sep);
let fields: Vec<&str> = fields_after_separator(line).split(field_sep).collect();
if segment_type == "FTS" || segment_type == "BTS" {
info.message_count = fields.first().and_then(|s| s.parse::<usize>().ok());
if let Some(comment) = fields.get(1) {
info.trailer_comment = Some((*comment).to_string());
}
return Ok(info);
}
if let Some(encoding_characters) = fields.first() {
info.encoding_characters = Some((*encoding_characters).to_string());
}
if let Some(sending_application) = fields.get(1) {
info.sending_application = Some((*sending_application).to_string());
}
if let Some(sending_facility) = fields.get(2) {
info.sending_facility = Some((*sending_facility).to_string());
}
if let Some(receiving_application) = fields.get(3) {
info.receiving_application = Some((*receiving_application).to_string());
}
if let Some(receiving_facility) = fields.get(4) {
info.receiving_facility = Some((*receiving_facility).to_string());
}
if let Some(raw_datetime) = fields.get(5) {
let datetime = (*raw_datetime).to_string();
if segment_type == "FHS" {
info.file_creation_time = Some(datetime);
}
}
if let Some(security) = fields.get(6) {
info.security = Some((*security).to_string());
}
if let Some(batch_name) = fields.get(8) {
info.batch_name = Some((*batch_name).to_string());
}
if let Some(batch_comment) = fields.get(9) {
info.batch_comment = Some((*batch_comment).to_string());
}
Ok(info)
}
fn fields_after_separator(line: &str) -> &str {
line.get(4..).unwrap_or_default()
}
fn segment_prefix(line: &str) -> &str {
line.get(..3).unwrap_or(line)
}