use arrow::array::StringArray;
use bytes::Bytes;
use datafusion::datasource::file_format::write::BatchSerializer;
use super::columns_from_batch::get_array_column;
#[derive(Debug, Default)]
pub(crate) struct FASTQSerializer {}
impl BatchSerializer for FASTQSerializer {
fn serialize(
&self,
batch: arrow::array::RecordBatch,
_initial: bool,
) -> datafusion::error::Result<bytes::Bytes> {
let names = get_array_column::<StringArray>(&batch, "name")?;
let descriptions = get_array_column::<StringArray>(&batch, "description")?;
let sequences = get_array_column::<StringArray>(&batch, "sequence")?;
let quality_scores = get_array_column::<StringArray>(&batch, "quality_scores")?;
let b = Vec::new();
let mut fasta_writer = noodles::fastq::io::Writer::new(b);
for i in 0..batch.num_rows() {
let id = names.value(i);
let description = descriptions.value(i);
let sequence = sequences.value(i);
let quality_scores = quality_scores.value(i);
let definition = noodles::fastq::record::Definition::new(id, description);
let record = noodles::fastq::Record::new(definition, sequence, quality_scores);
fasta_writer.write_record(&record)?;
}
Ok(Bytes::from(fasta_writer.get_ref().to_vec()))
}
}