1use serde::{Deserialize, Serialize};
2use serde_with::{serde_as, DefaultOnError, DefaultOnNull};
3use std::path::Path;
4pub use tantivy::store::Compressor;
5use tantivy::{
6 query::QueryParser, schema::*, store::ZstdCompressor, tokenizer::TextAnalyzer, Index,
7 TantivyError,
8};
9use tantivy_meta_tokenizer::{get_tokenizer, META_TOKENIZER};
10
11pub mod index;
12pub mod search;
13
14#[serde_as]
15#[derive(Debug, Default, Serialize, Deserialize)]
16pub struct Book {
17 pub id: u64,
18
19 pub title: String,
20 #[serde_as(deserialize_as = "DefaultOnNull")]
21 pub author: String,
22 #[serde_as(deserialize_as = "DefaultOnNull")]
23 pub publisher: String,
24 #[serde_as(deserialize_as = "DefaultOnNull")]
25 pub extension: String,
26 #[serde_as(deserialize_as = "DefaultOnError")]
27 pub filesize: u64,
28 #[serde_as(deserialize_as = "DefaultOnNull")]
29 pub language: String,
30 #[serde_as(deserialize_as = "DefaultOnError")]
31 pub year: u64,
32 #[serde_as(deserialize_as = "DefaultOnError")]
33 pub pages: u64,
34 #[serde_as(deserialize_as = "DefaultOnNull")]
35 pub isbn: String,
36 #[serde_as(deserialize_as = "DefaultOnNull")]
37 pub ipfs_cid: String,
38}
39
40impl From<(&Schema, Document)> for Book {
41 fn from((schema, doc): (&Schema, Document)) -> Self {
42 macro_rules! get_field_text {
43 ($field:expr) => {
44 doc.get_first(schema.get_field($field).unwrap())
45 .unwrap()
46 .as_text()
47 .unwrap_or_default()
48 .to_owned()
49 };
50 }
51
52 macro_rules! get_field_u64 {
53 ($field:expr) => {
54 doc.get_first(schema.get_field($field).unwrap())
55 .unwrap()
56 .as_u64()
57 .unwrap_or_default()
58 };
59 }
60
61 Book {
62 id: get_field_u64!("id"),
63 title: get_field_text!("title"),
64 author: get_field_text!("author"),
65 publisher: get_field_text!("publisher"),
66 extension: get_field_text!("extension"),
67 filesize: get_field_u64!("filesize"),
68 language: get_field_text!("language"),
69 year: get_field_u64!("year"),
70 pages: get_field_u64!("pages"),
71 isbn: get_field_text!("isbn"),
72 ipfs_cid: get_field_text!("ipfs_cid"),
73 }
74 }
75}
76
77#[derive(Clone)]
78pub struct Searcher {
79 pub compressor: Compressor,
80
81 index: Index,
82 schema: Schema,
83 query_parser: QueryParser,
84 tokenizer: TextAnalyzer,
85
86 id: Field,
88 title: Field,
89 author: Field,
90 publisher: Field,
91 publisher_exist: Field,
92 extension: Field,
93 filesize: Field,
94 language: Field,
95 year: Field,
96 pages: Field,
97 isbn: Field,
98 ipfs_cid: Field,
99}
100
101impl Searcher {
102 pub fn new(index_dir: impl AsRef<Path>) -> Self {
103 let text_indexing = TextFieldIndexing::default()
104 .set_tokenizer(META_TOKENIZER)
105 .set_index_option(IndexRecordOption::WithFreqsAndPositions);
106 let text_options = TextOptions::default()
107 .set_indexing_options(text_indexing)
108 .set_stored();
109
110 let mut schema_builder = Schema::builder();
111 let id = schema_builder.add_u64_field("id", INDEXED | STORED);
112 let title = schema_builder.add_text_field("title", text_options.clone());
113 let author = schema_builder.add_text_field("author", text_options.clone());
114 let publisher = schema_builder.add_text_field("publisher", text_options);
115 let publisher_exist = schema_builder.add_bool_field("publisher_exist", FAST);
117 let extension = schema_builder.add_text_field("extension", STRING | STORED);
118 let filesize = schema_builder.add_u64_field("filesize", STORED);
119 let language = schema_builder.add_text_field("language", TEXT | STORED);
120 let year = schema_builder.add_u64_field("year", STORED);
121 let pages = schema_builder.add_u64_field("pages", STORED | FAST);
122 let isbn = schema_builder.add_text_field("isbn", TEXT | STORED);
123 let ipfs_cid = schema_builder.add_text_field("ipfs_cid", STORED);
124 let schema = schema_builder.build();
125
126 let index_dir = index_dir.as_ref();
128 let mut index = Index::open_in_dir(index_dir).unwrap_or_else(|err| {
129 if let TantivyError::OpenDirectoryError(_) | TantivyError::OpenReadError(_) = err {
130 std::fs::create_dir_all(index_dir).expect("create index directory");
131 Index::create_in_dir(index_dir, schema.clone()).unwrap()
132 } else {
133 panic!("Error opening index: {err:?}")
134 }
135 });
136
137 let tokenizer = get_tokenizer();
138 index
139 .tokenizers()
140 .register(META_TOKENIZER, tokenizer.clone());
141 _ = index.set_default_multithread_executor();
142
143 let mut query_parser = QueryParser::for_index(&index, vec![title, author, publisher, isbn]);
144 query_parser.set_conjunction_by_default();
145
146 Self {
147 compressor: Compressor::Brotli,
148
149 index,
150 schema,
151 query_parser,
152 tokenizer,
153
154 id,
155 title,
156 author,
157 publisher,
158 publisher_exist,
159 extension,
160 filesize,
161 language,
162 year,
163 pages,
164 isbn,
165 ipfs_cid,
166 }
167 }
168
169 pub fn set_compressor(&mut self, compressor: &str) {
170 let compressor = match compressor {
171 "none" => Compressor::None,
172 "lz4" => Compressor::Lz4,
173 "brotli" => Compressor::Brotli,
174 "snappy" => Compressor::Snappy,
175 _ => {
176 if compressor.starts_with("zstd") {
177 Compressor::Zstd(ZstdCompressor::default())
178 } else {
179 println!(
180 "compressor not valid: {:#?}",
181 ["none", "lz4", "brotli", "snappy", "zstd",]
182 );
183 std::process::exit(1);
184 }
185 }
186 };
187
188 self.index.settings_mut().docstore_compression = compressor;
189 }
190}