tantivy_columnar/
lib.rs

1//! # Tantivy-Columnar
2//!
3//! `tantivy-columnar`provides a columnar storage for tantivy.
4//! The crate allows for efficient read operations on specific columns rather than entire records.
5//!
6//! ## Overview
7//!
8//! - **columnar**: Reading, writing, and merging multiple columns:
9//!   - **[ColumnarWriter]**: Makes it possible to create a new columnar.
10//!   - **[ColumnarReader]**: The ColumnarReader makes it possible to access a set of columns
11//!     associated to field names.
12//!   - **[merge_columnar]**: Contains the functionalities to merge multiple ColumnarReader or
13//!     segments into a single one.
14//!
15//! - **column**: A single column, which contains
16//!     - [column_index]: Resolves the rows for a document id. Manages the cardinality of the
17//!       column.
18//!     - [column_values]: Stores the values of a column in a dense format.
19
20#[cfg(test)]
21#[macro_use]
22extern crate more_asserts;
23
24use std::fmt::Display;
25use std::io;
26
27mod block_accessor;
28mod column;
29pub mod column_index;
30pub mod column_values;
31mod columnar;
32mod dictionary;
33mod dynamic_column;
34mod iterable;
35pub(crate) mod utils;
36mod value;
37
38pub use block_accessor::ColumnBlockAccessor;
39pub use column::{BytesColumn, Column, StrColumn};
40pub use column_index::ColumnIndex;
41pub use column_values::{
42    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
43};
44pub use columnar::{
45    CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
46    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
47};
48use sstable::VoidSSTable;
49pub use value::{NumericalType, NumericalValue};
50
51pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
52
53pub type RowId = u32;
54pub type DocId = u32;
55
56#[derive(Clone, Copy, Debug)]
57pub struct RowAddr {
58    pub segment_ord: u32,
59    pub row_id: RowId,
60}
61
62pub use sstable::Dictionary;
63pub type Streamer<'a> = sstable::Streamer<'a, VoidSSTable>;
64
65pub use common::DateTime;
66
67#[derive(Copy, Clone, Debug)]
68pub struct InvalidData;
69
70impl From<InvalidData> for io::Error {
71    fn from(_: InvalidData) -> Self {
72        io::Error::new(io::ErrorKind::InvalidData, "Invalid data")
73    }
74}
75
76/// Enum describing the number of values that can exist per document
77/// (or per row if you will).
78///
79/// The cardinality must fit on 2 bits.
80#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
81#[repr(u8)]
82pub enum Cardinality {
83    /// All documents contain exactly one value.
84    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
85    #[default]
86    Full = 0,
87    /// All documents contain at most one value.
88    Optional = 1,
89    /// All documents may contain any number of values.
90    Multivalued = 2,
91}
92
93impl Display for Cardinality {
94    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
95        let short_str = match self {
96            Cardinality::Full => "full",
97            Cardinality::Optional => "opt",
98            Cardinality::Multivalued => "mult",
99        };
100        write!(f, "{short_str}")
101    }
102}
103
104impl Cardinality {
105    pub fn is_optional(&self) -> bool {
106        matches!(self, Cardinality::Optional)
107    }
108    pub fn is_multivalue(&self) -> bool {
109        matches!(self, Cardinality::Multivalued)
110    }
111    pub fn is_full(&self) -> bool {
112        matches!(self, Cardinality::Full)
113    }
114    pub(crate) fn to_code(self) -> u8 {
115        self as u8
116    }
117    pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidData> {
118        match code {
119            0 => Ok(Cardinality::Full),
120            1 => Ok(Cardinality::Optional),
121            2 => Ok(Cardinality::Multivalued),
122            _ => Err(InvalidData),
123        }
124    }
125}
126
127#[cfg(test)]
128mod tests;
129
130#[cfg(test)]
131mod compat_tests;