use arrow::array::StringArray;
use bytes::Bytes;
use datafusion::datasource::file_format::write::BatchSerializer;
use noodles::fasta::{
record::{Definition, Sequence},
Record,
};
use super::columns_from_batch::get_array_column;
#[derive(Debug, Default)]
pub(crate) struct FASTASerializer {}
impl BatchSerializer for FASTASerializer {
fn serialize(
&self,
batch: arrow::array::RecordBatch,
_initial: bool,
) -> datafusion::error::Result<bytes::Bytes> {
let ids = get_array_column::<StringArray>(&batch, "id")?;
let descriptions = get_array_column::<StringArray>(&batch, "description")?;
let sequences = get_array_column::<StringArray>(&batch, "sequence")?;
let b = Vec::new();
let mut fasta_writer = noodles::fasta::writer::Writer::new(b);
for i in 0..batch.num_rows() {
let id = ids.value(i);
let description = descriptions.value(i);
let sequence = sequences.value(i);
let definition = Definition::new(id, Some(Vec::from(description)));
let sequence = Sequence::from(Vec::from(sequence));
let record = Record::new(definition, sequence);
fasta_writer.write_record(&record)?;
}
Ok(Bytes::from(fasta_writer.get_ref().to_vec()))
}
}