import tempfile
import pytest
import cryo
import polars as pl
queries = [
{
'datatype': ['blocks'],
'start_block': 17000000,
'end_block': 17000100,
}
]
file_formats = [
['parquet', pl.read_parquet],
['csv', pl.read_csv],
['json', pl.read_json],
]
@pytest.mark.parametrize('query', queries)
@pytest.mark.parametrize('format', file_formats)
def test_file_output(query, format):
extension, reader = format
output_dir = tempfile.mkdtemp()
if extension != 'parquet':
query = dict(query, **{extension: True})
result = cryo.freeze(output_dir=output_dir, **query)
for datatype in query['datatype']:
path = result['paths'][datatype]
assert isinstance(path, list) and len(path) == 1
path = path[0]
df_freeze = reader(path)
query_without_datatype = dict(query)
del query_without_datatype['datatype']
df_collect = cryo.collect(datatype, **query_without_datatype)
assert df_freeze.frame_equal(df_collect)
python_formats = [
'polars',
'pandas',
'list',
'dict',
]
@pytest.mark.parametrize('query', queries)
@pytest.mark.parametrize('format', python_formats)
def python_output_python_formats(query, format):
output_format, output_type = format
df = cryo.collect(output_format=output_format, **query)
assert isinstance(df, output_type)