tantivy_columnar/column/
serialize.rs

1use std::io;
2use std::io::Write;
3use std::sync::Arc;
4
5use common::OwnedBytes;
6use sstable::Dictionary;
7
8use crate::column::{BytesColumn, Column};
9use crate::column_index::{serialize_column_index, SerializableColumnIndex};
10use crate::column_values::{
11    load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
12    CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
13};
14use crate::iterable::Iterable;
15use crate::{StrColumn, Version};
16
17pub fn serialize_column_mappable_to_u128<T: MonotonicallyMappableToU128>(
18    column_index: SerializableColumnIndex<'_>,
19    iterable: &dyn Iterable<T>,
20    output: &mut impl Write,
21) -> io::Result<()> {
22    let column_index_num_bytes = serialize_column_index(column_index, output)?;
23    serialize_column_values_u128(iterable, output)?;
24    output.write_all(&column_index_num_bytes.to_le_bytes())?;
25    Ok(())
26}
27
28pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
29    column_index: SerializableColumnIndex<'_>,
30    column_values: &impl Iterable<T>,
31    output: &mut impl Write,
32) -> io::Result<()> {
33    let column_index_num_bytes = serialize_column_index(column_index, output)?;
34    serialize_u64_based_column_values(
35        column_values,
36        &[CodecType::Bitpacked, CodecType::BlockwiseLinear],
37        output,
38    )?;
39    output.write_all(&column_index_num_bytes.to_le_bytes())?;
40    Ok(())
41}
42
43pub fn open_column_u64<T: MonotonicallyMappableToU64>(
44    bytes: OwnedBytes,
45    format_version: Version,
46) -> io::Result<Column<T>> {
47    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
48    let column_index_num_bytes = u32::from_le_bytes(
49        column_index_num_bytes_payload
50            .as_slice()
51            .try_into()
52            .unwrap(),
53    );
54    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
55    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
56    let column_values = load_u64_based_column_values(column_values_data)?;
57    Ok(Column {
58        index: column_index,
59        values: column_values,
60    })
61}
62
63pub fn open_column_u128<T: MonotonicallyMappableToU128>(
64    bytes: OwnedBytes,
65    format_version: Version,
66) -> io::Result<Column<T>> {
67    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
68    let column_index_num_bytes = u32::from_le_bytes(
69        column_index_num_bytes_payload
70            .as_slice()
71            .try_into()
72            .unwrap(),
73    );
74    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
75    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
76    let column_values = crate::column_values::open_u128_mapped(column_values_data)?;
77    Ok(Column {
78        index: column_index,
79        values: column_values,
80    })
81}
82
83/// Open the column as u64.
84///
85/// See [`open_u128_as_compact_u64`] for more details.
86pub fn open_column_u128_as_compact_u64(
87    bytes: OwnedBytes,
88    format_version: Version,
89) -> io::Result<Column<u64>> {
90    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
91    let column_index_num_bytes = u32::from_le_bytes(
92        column_index_num_bytes_payload
93            .as_slice()
94            .try_into()
95            .unwrap(),
96    );
97    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
98    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
99    let column_values = crate::column_values::open_u128_as_compact_u64(column_values_data)?;
100    Ok(Column {
101        index: column_index,
102        values: column_values,
103    })
104}
105
106pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> io::Result<BytesColumn> {
107    let (body, dictionary_len_bytes) = data.rsplit(4);
108    let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
109    let (dictionary_bytes, column_bytes) = body.split(dictionary_len as usize);
110    let dictionary = Arc::new(Dictionary::from_bytes(dictionary_bytes)?);
111    let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes, format_version)?;
112    Ok(BytesColumn {
113        dictionary,
114        term_ord_column,
115    })
116}
117
118pub fn open_column_str(data: OwnedBytes, format_version: Version) -> io::Result<StrColumn> {
119    let bytes_column = open_column_bytes(data, format_version)?;
120    Ok(StrColumn::wrap(bytes_column))
121}