tantivy_columnar/column/
dictionary_encoded.rs1use std::ops::Deref;
2use std::sync::Arc;
3use std::{fmt, io};
4
5use sstable::{Dictionary, VoidSSTable};
6
7use crate::column::Column;
8use crate::RowId;
9
10#[derive(Clone)]
19pub struct BytesColumn {
20 pub(crate) dictionary: Arc<Dictionary<VoidSSTable>>,
21 pub(crate) term_ord_column: Column<u64>,
22}
23
24impl fmt::Debug for BytesColumn {
25 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26 f.debug_struct("BytesColumn")
27 .field("term_ord_column", &self.term_ord_column)
28 .finish()
29 }
30}
31
32impl BytesColumn {
33 pub fn empty(num_docs: u32) -> BytesColumn {
34 BytesColumn {
35 dictionary: Arc::new(Dictionary::empty()),
36 term_ord_column: Column::build_empty_column(num_docs),
37 }
38 }
39
40 pub fn ord_to_bytes(&self, ord: u64, output: &mut Vec<u8>) -> io::Result<bool> {
45 self.dictionary.ord_to_term(ord, output)
46 }
47
48 pub fn num_rows(&self) -> RowId {
50 self.term_ord_column.num_docs()
51 }
52
53 pub fn term_ords(&self, row_id: RowId) -> impl Iterator<Item = u64> + '_ {
54 self.term_ord_column.values_for_doc(row_id)
55 }
56
57 pub fn ords(&self) -> &Column<u64> {
59 &self.term_ord_column
60 }
61
62 pub fn num_terms(&self) -> usize {
63 self.dictionary.num_terms()
64 }
65
66 pub fn dictionary(&self) -> &Dictionary<VoidSSTable> {
67 self.dictionary.as_ref()
68 }
69}
70
71#[derive(Clone)]
72pub struct StrColumn(BytesColumn);
73
74impl fmt::Debug for StrColumn {
75 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76 write!(f, "{:?}", self.term_ord_column)
77 }
78}
79
80impl From<StrColumn> for BytesColumn {
81 fn from(str_column: StrColumn) -> BytesColumn {
82 str_column.0
83 }
84}
85
86impl StrColumn {
87 pub fn wrap(bytes_column: BytesColumn) -> StrColumn {
88 StrColumn(bytes_column)
89 }
90
91 pub fn dictionary(&self) -> &Dictionary<VoidSSTable> {
92 self.0.dictionary.as_ref()
93 }
94
95 pub fn ord_to_str(&self, term_ord: u64, output: &mut String) -> io::Result<bool> {
97 unsafe {
98 let buf = output.as_mut_vec();
99 if !self.0.dictionary.ord_to_term(term_ord, buf)? {
100 return Ok(false);
101 }
102 if std::str::from_utf8(buf.as_slice()).is_err() {
104 buf.clear();
105 return Err(io::Error::new(
106 io::ErrorKind::InvalidData,
107 "Not valid utf-8",
108 ));
109 }
110 }
111 Ok(true)
112 }
113}
114
115impl Deref for StrColumn {
116 type Target = BytesColumn;
117
118 fn deref(&self) -> &Self::Target {
119 &self.0
120 }
121}