use cyanea_core::{CyaneaError, Result};
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct AbiRecord {
pub sequence: Vec<u8>,
pub quality: Vec<u8>,
pub traces: AbiTraces,
pub sample_name: String,
pub peak_positions: Vec<u16>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct AbiTraces {
pub a: Vec<i16>,
pub c: Vec<i16>,
pub g: Vec<i16>,
pub t: Vec<i16>,
}
const ABIF_MAGIC: &[u8; 4] = b"ABIF";
const DIR_ENTRY_SIZE: usize = 28;
#[derive(Debug)]
struct DirEntry {
tag_name: [u8; 4],
tag_number: i32,
_element_type: i16,
_element_size: i16,
num_elements: i32,
data_size: i32,
data_offset: i32,
}
fn read_i16_be(data: &[u8], offset: usize) -> Result<i16> {
if offset + 2 > data.len() {
return Err(CyaneaError::Parse(format!(
"ABI: unexpected EOF reading i16 at offset {}",
offset
)));
}
Ok(i16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn read_i32_be(data: &[u8], offset: usize) -> Result<i32> {
if offset + 4 > data.len() {
return Err(CyaneaError::Parse(format!(
"ABI: unexpected EOF reading i32 at offset {}",
offset
)));
}
Ok(i32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]))
}
fn parse_dir_entry(data: &[u8], offset: usize) -> Result<DirEntry> {
if offset + DIR_ENTRY_SIZE > data.len() {
return Err(CyaneaError::Parse(format!(
"ABI: directory entry at offset {} exceeds file size",
offset
)));
}
let mut tag_name = [0u8; 4];
tag_name.copy_from_slice(&data[offset..offset + 4]);
Ok(DirEntry {
tag_name,
tag_number: read_i32_be(data, offset + 4)?,
_element_type: read_i16_be(data, offset + 8)?,
_element_size: read_i16_be(data, offset + 10)?,
num_elements: read_i32_be(data, offset + 12)?,
data_size: read_i32_be(data, offset + 16)?,
data_offset: read_i32_be(data, offset + 20)?,
})
}
fn get_entry_data<'a>(
file_data: &'a [u8],
entry: &DirEntry,
entry_file_offset: usize,
) -> Result<&'a [u8]> {
let size = entry.data_size as usize;
if size <= 4 {
let inline_start = entry_file_offset + 20;
if inline_start + size > file_data.len() {
return Err(CyaneaError::Parse(
"ABI: inline data exceeds file size".to_string(),
));
}
Ok(&file_data[inline_start..inline_start + size])
} else {
let offset = entry.data_offset as usize;
if offset + size > file_data.len() {
return Err(CyaneaError::Parse(format!(
"ABI: data at offset {} with size {} exceeds file size {}",
offset,
size,
file_data.len()
)));
}
Ok(&file_data[offset..offset + size])
}
}
fn read_i16_array(data: &[u8], count: usize) -> Result<Vec<i16>> {
if data.len() < count * 2 {
return Err(CyaneaError::Parse(format!(
"ABI: expected {} bytes for {} i16 values, got {}",
count * 2,
count,
data.len()
)));
}
let mut values = Vec::with_capacity(count);
for i in 0..count {
let offset = i * 2;
values.push(i16::from_be_bytes([data[offset], data[offset + 1]]));
}
Ok(values)
}
fn read_u16_array(data: &[u8], count: usize) -> Result<Vec<u16>> {
if data.len() < count * 2 {
return Err(CyaneaError::Parse(format!(
"ABI: expected {} bytes for {} u16 values, got {}",
count * 2,
count,
data.len()
)));
}
let mut values = Vec::with_capacity(count);
for i in 0..count {
let offset = i * 2;
values.push(u16::from_be_bytes([data[offset], data[offset + 1]]));
}
Ok(values)
}
pub fn parse_abi_bytes(data: &[u8]) -> Result<AbiRecord> {
if data.len() < 128 {
return Err(CyaneaError::Parse(
"ABI: file too small for ABIF header".to_string(),
));
}
if &data[0..4] != ABIF_MAGIC {
return Err(CyaneaError::Parse(format!(
"ABI: invalid magic bytes (expected 'ABIF', got '{}')",
String::from_utf8_lossy(&data[0..4])
)));
}
let version = read_i16_be(data, 4)?;
if version < 100 {
return Err(CyaneaError::Parse(format!(
"ABI: unsupported version {} (expected >= 100)",
version
)));
}
let num_entries = read_i32_be(data, 18)? as usize;
let dir_offset = read_i32_be(data, 26)? as usize;
if dir_offset + num_entries * DIR_ENTRY_SIZE > data.len() {
return Err(CyaneaError::Parse(
"ABI: directory extends beyond file".to_string(),
));
}
let mut entries = Vec::with_capacity(num_entries);
for i in 0..num_entries {
let offset = dir_offset + i * DIR_ENTRY_SIZE;
entries.push((offset, parse_dir_entry(data, offset)?));
}
let find_entry = |tag: &[u8; 4], number: i32| -> Option<usize> {
entries
.iter()
.position(|(_, e)| &e.tag_name == tag && e.tag_number == number)
};
let channel_order: [u8; 4] = if let Some(idx) = find_entry(b"FWO_", 1) {
let (entry_offset, ref entry) = entries[idx];
let fwo_data = get_entry_data(data, entry, entry_offset)?;
if fwo_data.len() >= 4 {
[fwo_data[0], fwo_data[1], fwo_data[2], fwo_data[3]]
} else {
*b"ACGT"
}
} else {
*b"ACGT"
};
let pbas_idx = find_entry(b"PBAS", 2)
.or_else(|| find_entry(b"PBAS", 1))
.ok_or_else(|| CyaneaError::Parse("ABI: missing PBAS (called bases) tag".to_string()))?;
let (pbas_offset, ref pbas_entry) = entries[pbas_idx];
let pbas_data = get_entry_data(data, pbas_entry, pbas_offset)?;
let sequence = pbas_data[..pbas_entry.num_elements as usize].to_vec();
let pcon_idx = find_entry(b"PCON", 2)
.or_else(|| find_entry(b"PCON", 1))
.ok_or_else(|| {
CyaneaError::Parse("ABI: missing PCON (quality values) tag".to_string())
})?;
let (pcon_offset, ref pcon_entry) = entries[pcon_idx];
let pcon_data = get_entry_data(data, pcon_entry, pcon_offset)?;
let quality = pcon_data[..pcon_entry.num_elements as usize].to_vec();
let mut trace_channels: [Vec<i16>; 4] = [Vec::new(), Vec::new(), Vec::new(), Vec::new()];
for (i, data_num) in [9i32, 10, 11, 12].iter().enumerate() {
let idx = find_entry(b"DATA", *data_num).ok_or_else(|| {
CyaneaError::Parse(format!("ABI: missing DATA.{} (trace channel) tag", data_num))
})?;
let (entry_offset, ref entry) = entries[idx];
let channel_data = get_entry_data(data, entry, entry_offset)?;
trace_channels[i] = read_i16_array(channel_data, entry.num_elements as usize)?;
}
let mut trace_a = Vec::new();
let mut trace_c = Vec::new();
let mut trace_g = Vec::new();
let mut trace_t = Vec::new();
for (i, &base) in channel_order.iter().enumerate() {
match base {
b'A' | b'a' => trace_a = trace_channels[i].clone(),
b'C' | b'c' => trace_c = trace_channels[i].clone(),
b'G' | b'g' => trace_g = trace_channels[i].clone(),
b'T' | b't' => trace_t = trace_channels[i].clone(),
_ => {}
}
}
let ploc_idx = find_entry(b"PLOC", 2)
.or_else(|| find_entry(b"PLOC", 1))
.ok_or_else(|| {
CyaneaError::Parse("ABI: missing PLOC (peak positions) tag".to_string())
})?;
let (ploc_offset, ref ploc_entry) = entries[ploc_idx];
let ploc_data = get_entry_data(data, ploc_entry, ploc_offset)?;
let peak_positions = read_u16_array(ploc_data, ploc_entry.num_elements as usize)?;
let sample_name = if let Some(idx) = find_entry(b"SMPL", 1) {
let (entry_offset, ref entry) = entries[idx];
let smpl_data = get_entry_data(data, entry, entry_offset)?;
String::from_utf8_lossy(&smpl_data[..entry.num_elements as usize]).to_string()
} else {
String::new()
};
Ok(AbiRecord {
sequence,
quality,
traces: AbiTraces {
a: trace_a,
c: trace_c,
g: trace_g,
t: trace_t,
},
sample_name,
peak_positions,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn build_test_abi(sequence: &[u8], quality: &[u8], sample_name: &str) -> Vec<u8> {
let seq_len = sequence.len();
let trace_len: usize = 50; let trace_bytes = trace_len * 2; let peak_bytes = seq_len * 2;
let data_start: usize = 128;
let pbas_offset = data_start;
let pcon_offset = pbas_offset + seq_len;
let data9_offset = pcon_offset + seq_len;
let data10_offset = data9_offset + trace_bytes;
let data11_offset = data10_offset + trace_bytes;
let data12_offset = data11_offset + trace_bytes;
let ploc_offset = data12_offset + trace_bytes;
let smpl_offset = ploc_offset + peak_bytes;
let dir_offset = smpl_offset + sample_name.len();
let num_entries: i32 = 9;
let total_size = dir_offset + (num_entries as usize) * DIR_ENTRY_SIZE;
let mut buf = vec![0u8; total_size];
buf[0..4].copy_from_slice(b"ABIF");
buf[4..6].copy_from_slice(&101i16.to_be_bytes());
buf[6..10].copy_from_slice(b"tdir");
buf[10..14].copy_from_slice(&1i32.to_be_bytes());
buf[14..16].copy_from_slice(&1023i16.to_be_bytes());
buf[16..18].copy_from_slice(&28i16.to_be_bytes());
buf[18..22].copy_from_slice(&num_entries.to_be_bytes());
let dir_data_size = num_entries * DIR_ENTRY_SIZE as i32;
buf[22..26].copy_from_slice(&dir_data_size.to_be_bytes());
buf[26..30].copy_from_slice(&(dir_offset as i32).to_be_bytes());
buf[pbas_offset..pbas_offset + seq_len].copy_from_slice(sequence);
buf[pcon_offset..pcon_offset + seq_len].copy_from_slice(quality);
for ch in 0..4u16 {
let ch_offset = data9_offset + (ch as usize) * trace_bytes;
for i in 0..trace_len {
let value = ((ch as i16 + 1) * 100 + i as i16) as i16;
let byte_offset = ch_offset + i * 2;
buf[byte_offset..byte_offset + 2].copy_from_slice(&value.to_be_bytes());
}
}
for i in 0..seq_len {
let pos = ((i * trace_len) / seq_len.max(1)) as u16;
let byte_offset = ploc_offset + i * 2;
buf[byte_offset..byte_offset + 2].copy_from_slice(&pos.to_be_bytes());
}
buf[smpl_offset..smpl_offset + sample_name.len()]
.copy_from_slice(sample_name.as_bytes());
fn write_entry(
buf: &mut [u8],
dir_offset: usize,
idx: usize,
tag: &[u8; 4],
number: i32,
elem_type: i16,
elem_size: i16,
num_elems: i32,
d_size: i32,
d_offset: i32,
) {
let base = dir_offset + idx * 28;
buf[base..base + 4].copy_from_slice(tag);
buf[base + 4..base + 8].copy_from_slice(&number.to_be_bytes());
buf[base + 8..base + 10].copy_from_slice(&elem_type.to_be_bytes());
buf[base + 10..base + 12].copy_from_slice(&elem_size.to_be_bytes());
buf[base + 12..base + 16].copy_from_slice(&num_elems.to_be_bytes());
buf[base + 16..base + 20].copy_from_slice(&d_size.to_be_bytes());
buf[base + 20..base + 24].copy_from_slice(&d_offset.to_be_bytes());
}
{
let base = dir_offset;
buf[base..base + 4].copy_from_slice(b"FWO_");
buf[base + 4..base + 8].copy_from_slice(&1i32.to_be_bytes());
buf[base + 8..base + 10].copy_from_slice(&2i16.to_be_bytes()); buf[base + 10..base + 12].copy_from_slice(&1i16.to_be_bytes()); buf[base + 12..base + 16].copy_from_slice(&4i32.to_be_bytes()); buf[base + 16..base + 20].copy_from_slice(&4i32.to_be_bytes()); buf[base + 20] = b'A';
buf[base + 21] = b'C';
buf[base + 22] = b'G';
buf[base + 23] = b'T';
}
write_entry(
&mut buf, dir_offset,
1,
b"PBAS",
2,
2, 1, seq_len as i32,
seq_len as i32,
pbas_offset as i32,
);
write_entry(
&mut buf, dir_offset,
2,
b"PCON",
2,
1, 1,
seq_len as i32,
seq_len as i32,
pcon_offset as i32,
);
for ch in 0..4u32 {
let ch_offset = data9_offset + (ch as usize) * trace_bytes;
write_entry(
&mut buf, dir_offset,
3 + ch as usize,
b"DATA",
9 + ch as i32,
4, 2, trace_len as i32,
trace_bytes as i32,
ch_offset as i32,
);
}
write_entry(
&mut buf, dir_offset,
7,
b"PLOC",
2,
5, 2,
seq_len as i32,
peak_bytes as i32,
ploc_offset as i32,
);
write_entry(
&mut buf, dir_offset,
8,
b"SMPL",
1,
18, 1,
sample_name.len() as i32,
sample_name.len() as i32,
smpl_offset as i32,
);
buf
}
#[test]
fn abi_valid_sequence() {
let seq = b"ACGTACGT";
let qual = vec![40u8; 8];
let data = build_test_abi(seq, &qual, "TestSample");
let record = parse_abi_bytes(&data).unwrap();
assert_eq!(record.sequence, b"ACGTACGT");
}
#[test]
fn abi_quality_values() {
let seq = b"ACGTNN";
let qual = vec![30, 35, 40, 45, 10, 5];
let data = build_test_abi(seq, &qual, "QualTest");
let record = parse_abi_bytes(&data).unwrap();
assert_eq!(record.quality.len(), seq.len());
assert_eq!(record.quality, qual);
}
#[test]
fn abi_trace_consistency() {
let seq = b"ATGC";
let qual = vec![40u8; 4];
let data = build_test_abi(seq, &qual, "TraceTest");
let record = parse_abi_bytes(&data).unwrap();
assert_eq!(record.traces.a.len(), 50);
assert_eq!(record.traces.c.len(), 50);
assert_eq!(record.traces.g.len(), 50);
assert_eq!(record.traces.t.len(), 50);
assert!(record.traces.a.iter().any(|&v| v > 0));
assert!(record.traces.c.iter().any(|&v| v > 0));
assert!(record.traces.g.iter().any(|&v| v > 0));
assert!(record.traces.t.iter().any(|&v| v > 0));
}
#[test]
fn abi_sample_name() {
let seq = b"ACGT";
let qual = vec![40u8; 4];
let data = build_test_abi(seq, &qual, "MySample_2024");
let record = parse_abi_bytes(&data).unwrap();
assert_eq!(record.sample_name, "MySample_2024");
}
#[test]
fn abi_invalid_magic() {
let mut data = vec![0u8; 256];
data[0..4].copy_from_slice(b"NOPE");
let result = parse_abi_bytes(&data);
assert!(result.is_err());
let err_msg = format!("{}", result.unwrap_err());
assert!(err_msg.contains("magic"));
}
}