tantivy_columnar/column/
serialize.rs1use std::io;
2use std::io::Write;
3use std::sync::Arc;
4
5use common::OwnedBytes;
6use sstable::Dictionary;
7
8use crate::column::{BytesColumn, Column};
9use crate::column_index::{serialize_column_index, SerializableColumnIndex};
10use crate::column_values::{
11 load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
12 CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
13};
14use crate::iterable::Iterable;
15use crate::{StrColumn, Version};
16
17pub fn serialize_column_mappable_to_u128<T: MonotonicallyMappableToU128>(
18 column_index: SerializableColumnIndex<'_>,
19 iterable: &dyn Iterable<T>,
20 output: &mut impl Write,
21) -> io::Result<()> {
22 let column_index_num_bytes = serialize_column_index(column_index, output)?;
23 serialize_column_values_u128(iterable, output)?;
24 output.write_all(&column_index_num_bytes.to_le_bytes())?;
25 Ok(())
26}
27
28pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
29 column_index: SerializableColumnIndex<'_>,
30 column_values: &impl Iterable<T>,
31 output: &mut impl Write,
32) -> io::Result<()> {
33 let column_index_num_bytes = serialize_column_index(column_index, output)?;
34 serialize_u64_based_column_values(
35 column_values,
36 &[CodecType::Bitpacked, CodecType::BlockwiseLinear],
37 output,
38 )?;
39 output.write_all(&column_index_num_bytes.to_le_bytes())?;
40 Ok(())
41}
42
43pub fn open_column_u64<T: MonotonicallyMappableToU64>(
44 bytes: OwnedBytes,
45 format_version: Version,
46) -> io::Result<Column<T>> {
47 let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
48 let column_index_num_bytes = u32::from_le_bytes(
49 column_index_num_bytes_payload
50 .as_slice()
51 .try_into()
52 .unwrap(),
53 );
54 let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
55 let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
56 let column_values = load_u64_based_column_values(column_values_data)?;
57 Ok(Column {
58 index: column_index,
59 values: column_values,
60 })
61}
62
63pub fn open_column_u128<T: MonotonicallyMappableToU128>(
64 bytes: OwnedBytes,
65 format_version: Version,
66) -> io::Result<Column<T>> {
67 let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
68 let column_index_num_bytes = u32::from_le_bytes(
69 column_index_num_bytes_payload
70 .as_slice()
71 .try_into()
72 .unwrap(),
73 );
74 let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
75 let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
76 let column_values = crate::column_values::open_u128_mapped(column_values_data)?;
77 Ok(Column {
78 index: column_index,
79 values: column_values,
80 })
81}
82
83pub fn open_column_u128_as_compact_u64(
87 bytes: OwnedBytes,
88 format_version: Version,
89) -> io::Result<Column<u64>> {
90 let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
91 let column_index_num_bytes = u32::from_le_bytes(
92 column_index_num_bytes_payload
93 .as_slice()
94 .try_into()
95 .unwrap(),
96 );
97 let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
98 let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
99 let column_values = crate::column_values::open_u128_as_compact_u64(column_values_data)?;
100 Ok(Column {
101 index: column_index,
102 values: column_values,
103 })
104}
105
106pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> io::Result<BytesColumn> {
107 let (body, dictionary_len_bytes) = data.rsplit(4);
108 let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
109 let (dictionary_bytes, column_bytes) = body.split(dictionary_len as usize);
110 let dictionary = Arc::new(Dictionary::from_bytes(dictionary_bytes)?);
111 let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes, format_version)?;
112 Ok(BytesColumn {
113 dictionary,
114 term_ord_column,
115 })
116}
117
118pub fn open_column_str(data: OwnedBytes, format_version: Version) -> io::Result<StrColumn> {
119 let bytes_column = open_column_bytes(data, format_version)?;
120 Ok(StrColumn::wrap(bytes_column))
121}