1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//! # Tantivy-Columnar
//!
//! `tantivy-columnar`provides a columnar storage for tantivy.
//! The crate allows for efficient read operations on specific columns rather than entire records.
//!
//! ## Overview
//!
//! - **columnar**: Reading, writing, and merging multiple columns:
//!   - **[ColumnarWriter]**: Makes it possible to create a new columnar.
//!   - **[ColumnarReader]**: The ColumnarReader makes it possible to access a set of columns
//!     associated to field names.
//!   - **[merge_columnar]**: Contains the functionalities to merge multiple ColumnarReader or
//!     segments into a single one.
//!
//! - **column**: A single column, which contains
//!     - [column_index]: Resolves the rows for a document id. Manages the cardinality of the
//!       column.
//!     - [column_values]: Stores the values of a column in a dense format.

#![cfg_attr(all(feature = "unstable", test), feature(test))]

#[cfg(test)]
#[macro_use]
extern crate more_asserts;

#[cfg(all(test, feature = "unstable"))]
extern crate test;

use std::fmt::Display;
use std::io;

mod block_accessor;
mod column;
pub mod column_index;
pub mod column_values;
mod columnar;
mod dictionary;
mod dynamic_column;
mod iterable;
pub(crate) mod utils;
mod value;

pub use block_accessor::ColumnBlockAccessor;
pub use column::{BytesColumn, Column, StrColumn};
pub use column_index::ColumnIndex;
pub use column_values::{
    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
};
pub use columnar::{
    merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
};
use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue};

pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};

pub type RowId = u32;
pub type DocId = u32;

#[derive(Clone, Copy, Debug)]
pub struct RowAddr {
    pub segment_ord: u32,
    pub row_id: RowId,
}

pub use sstable::Dictionary;
pub type Streamer<'a> = sstable::Streamer<'a, VoidSSTable>;

pub use common::DateTime;

#[derive(Copy, Clone, Debug)]
pub struct InvalidData;

impl From<InvalidData> for io::Error {
    fn from(_: InvalidData) -> Self {
        io::Error::new(io::ErrorKind::InvalidData, "Invalid data")
    }
}

/// Enum describing the number of values that can exist per document
/// (or per row if you will).
///
/// The cardinality must fit on 2 bits.
#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[repr(u8)]
pub enum Cardinality {
    /// All documents contain exactly one value.
    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
    #[default]
    Full = 0,
    /// All documents contain at most one value.
    Optional = 1,
    /// All documents may contain any number of values.
    Multivalued = 2,
}

impl Display for Cardinality {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        let short_str = match self {
            Cardinality::Full => "full",
            Cardinality::Optional => "opt",
            Cardinality::Multivalued => "mult",
        };
        write!(f, "{short_str}")
    }
}

impl Cardinality {
    pub fn is_optional(&self) -> bool {
        matches!(self, Cardinality::Optional)
    }
    pub fn is_multivalue(&self) -> bool {
        matches!(self, Cardinality::Multivalued)
    }
    pub fn is_full(&self) -> bool {
        matches!(self, Cardinality::Full)
    }
    pub(crate) fn to_code(self) -> u8 {
        self as u8
    }
    pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidData> {
        match code {
            0 => Ok(Cardinality::Full),
            1 => Ok(Cardinality::Optional),
            2 => Ok(Cardinality::Multivalued),
            _ => Err(InvalidData),
        }
    }
}

#[cfg(test)]
mod tests;