use anyhow::{Context, Result};
use bytes::Bytes;
use datafusion::parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
use micromegas_tracing::prelude::*;
#[allow(deprecated)]
use parquet::format::FileMetaData as ThriftFileMetaData;
use parquet::thrift::TSerializable;
use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputProtocol};
#[allow(deprecated)]
pub fn parse_legacy_and_upgrade(metadata_bytes: &[u8], num_rows: i64) -> Result<ParquetMetaData> {
let mut transport = thrift::transport::TBufferChannel::with_capacity(metadata_bytes.len(), 0);
transport.set_readable_bytes(metadata_bytes);
let mut protocol = TCompactInputProtocol::new(transport);
let mut thrift_meta = ThriftFileMetaData::read_from_in_protocol(&mut protocol)
.context("parsing legacy metadata with thrift")?;
if thrift_meta.num_rows == 0 {
trace!("injecting num_rows={} into legacy metadata", num_rows);
thrift_meta.num_rows = num_rows;
}
let mut corrected_bytes: Vec<u8> = Vec::with_capacity(metadata_bytes.len() * 2);
let mut out_protocol = TCompactOutputProtocol::new(&mut corrected_bytes);
thrift_meta
.write_to_out_protocol(&mut out_protocol)
.context("serializing corrected thrift metadata")?;
out_protocol.flush()?;
ParquetMetaDataReader::decode_metadata(&Bytes::copy_from_slice(&corrected_bytes))
.context("re-parsing with Arrow 57.0")
}