bing/
doc.rs

1use std::path::Path;
2
3use anyhow::Result;
4use tantivy::{
5  collector::{Count, TopDocs},
6  query::{BooleanQuery, QueryParser, TermQuery},
7  schema::{Field, IndexRecordOption, Schema, Value},
8  Index, IndexReader, IndexWriter, TantivyDocument, TantivyError, Term,
9};
10
11#[derive(Debug, Clone, PartialEq)]
12pub struct Doc {
13  pub id: u64,
14  pub ts: u64,
15  pub uid: u64,
16  pub org_id: u64,
17  pub repo_id: u64,
18  pub tag_li: Vec<String>,
19  pub title: String,
20  pub txt: String,
21}
22
23pub struct DocFiled {
24  pub id: Field,
25  pub uid: Field,
26  pub org_id: Field,
27  pub repo_id: Field,
28  pub tag_li: Field,
29  pub ts: Field,
30  pub title: Field,
31  pub txt: Field,
32}
33
34impl DocFiled {
35  pub fn new(schema: &Schema) -> Self {
36    Self {
37      id: schema.get_field("id").unwrap(),
38      uid: schema.get_field("uid").unwrap(),
39      org_id: schema.get_field("org_id").unwrap(),
40      repo_id: schema.get_field("repo_id").unwrap(),
41      tag_li: schema.get_field("tag_li").unwrap(),
42      ts: schema.get_field("ts").unwrap(),
43      title: schema.get_field("title").unwrap(),
44      txt: schema.get_field("txt").unwrap(),
45    }
46  }
47
48  pub fn dump(&self, doc: Doc) -> TantivyDocument {
49    let mut tdoc = TantivyDocument::new();
50    tdoc.add_u64(self.id, doc.id);
51    tdoc.add_u64(self.uid, doc.uid);
52    tdoc.add_u64(self.org_id, doc.org_id);
53    tdoc.add_u64(self.repo_id, doc.repo_id);
54    for i in doc.tag_li.iter() {
55      tdoc.add_text(self.tag_li, i);
56    }
57    tdoc.add_u64(self.ts, doc.ts);
58    tdoc.add_text(self.title, doc.title);
59    tdoc.add_text(self.txt, doc.txt);
60    tdoc
61  }
62}
63
64#[static_init::dynamic]
65pub static FIELD: DocFiled = DocFiled::new(&crate::schema::DOC);
66
67pub struct Db {
68  pub index: Index,
69}
70
71pub fn open(path: impl AsRef<Path>) -> Result<Db> {
72  Ok(Db {
73    index: crate::open(path, &crate::schema::DOC)?,
74  })
75}
76
77impl Db {
78  pub fn searcher(&self) -> Result<Searcher> {
79    let index = &self.index;
80    Ok(Searcher::new(
81      index.reader()?,
82      QueryParser::for_index(index, vec![FIELD.title, FIELD.txt, FIELD.tag_li]),
83    ))
84  }
85
86  pub fn writer(&self) -> Result<Writer> {
87    Ok(Writer {
88      inner: self.index.writer(
89        2 << 24, // 32MB
90      )?,
91    })
92  }
93}
94
95// pub fn query(&self, query: impl AsRef<str>) -> Result<QueryParser> {
96//   let query_parser = QueryParser::for_index(&index, vec![FIELD.title, FIELD.txt, FIELD.tag_li]);
97// }
98
99pub struct Searcher {
100  pub reader: IndexReader,
101  pub parser: QueryParser,
102  pub ts: u64,
103}
104
105#[derive(Debug)]
106pub struct SearchResult {
107  pub li: Vec<u64>,
108  /// 当 offset = 0 时返回
109  pub count: usize,
110}
111
112impl Searcher {
113  pub fn new(reader: IndexReader, parser: QueryParser) -> Self {
114    Self {
115      reader,
116      parser,
117      ts: sts::sec(),
118    }
119  }
120
121  pub fn search(
122    &mut self,
123    query: impl AsRef<str>,
124    uid: u64,
125    org_id: u64,
126    repo_id_li: impl IntoIterator<Item = u64>,
127    tag_li: impl IntoIterator<Item = String>,
128    ts_begin: Option<u64>,
129    ts_end: Option<u64>,
130    limit: usize,
131    offset: usize,
132  ) -> Result<SearchResult> {
133    let now = sts::sec();
134    if now > self.ts {
135      self.reader.reload()?;
136      self.ts = now;
137    }
138    let searcher = self.reader.searcher();
139    let mut query_li = vec![];
140
141    // 解析原始查询字符串
142    let query = query.as_ref();
143    if !query.is_empty() {
144      query_li.push(self.parser.parse_query(query)?);
145    }
146
147    // uid 和 org_id 过滤
148    for (val, field) in [(uid, FIELD.uid), (org_id, FIELD.org_id)] {
149      if val > 0 {
150        query_li.push(Box::new(TermQuery::new(
151          Term::from_field_u64(field, val),
152          IndexRecordOption::Basic,
153        )));
154      }
155    }
156
157    // repo_id_li 过滤 (OR 逻辑)
158    {
159      let mut repo_id_filter = vec![];
160      for repo_id_val in repo_id_li {
161        repo_id_filter.push(Box::new(TermQuery::new(
162          Term::from_field_u64(FIELD.repo_id, repo_id_val),
163          IndexRecordOption::Basic,
164        )) as Box<dyn tantivy::query::Query>);
165      }
166      if !repo_id_filter.is_empty() {
167        query_li.push(Box::new(BooleanQuery::union(repo_id_filter)));
168      }
169    }
170
171    // tag_li 过滤
172    for tag_val in tag_li {
173      query_li.push(Box::new(TermQuery::new(
174        Term::from_field_text(FIELD.tag_li, tag_val.as_ref()),
175        IndexRecordOption::Basic,
176      )));
177    }
178
179    // ts_begin 和 ts_end 过滤
180    if let (Some(ts_b), Some(ts_e)) = (ts_begin, ts_end) {
181      query_li.push(
182        self
183          .parser
184          .parse_query(&format!("ts:[{} TO {}]", ts_b, ts_e))?,
185      );
186    } else if let Some(ts_b) = ts_begin {
187      query_li.push(self.parser.parse_query(&format!("ts:[{} TO *]", ts_b))?);
188    } else if let Some(ts_e) = ts_end {
189      query_li.push(self.parser.parse_query(&format!("ts:[* TO {}]", ts_e))?);
190    }
191
192    let query = BooleanQuery::intersection(query_li);
193    let (top_li, count) = if offset == 0 {
194      searcher.search(&query, &(TopDocs::with_limit(limit), Count))?
195    } else {
196      (
197        searcher.search(&query, &TopDocs::with_limit(limit).and_offset(offset))?,
198        0,
199      )
200    };
201
202    let mut li = Vec::with_capacity(top_li.len());
203    for (_score, doc_address) in &top_li {
204      let doc = searcher.doc::<TantivyDocument>(*doc_address)?;
205      if let Some(id_val) = doc.get_first(FIELD.id)
206        && let Some(id) = id_val.as_u64()
207      {
208        li.push(id);
209      }
210    }
211    Ok(SearchResult { li, count })
212  }
213}
214
215pub struct Writer {
216  pub inner: IndexWriter<TantivyDocument>,
217}
218
219impl Writer {
220  pub fn rm(&mut self, id: u64) -> Result<(), TantivyError> {
221    let inner = &mut self.inner;
222    inner.delete_term(Term::from_field_u64(FIELD.id, id));
223    inner.commit()?;
224    Ok(())
225  }
226
227  pub fn add(&mut self, doc: Doc) -> Result<u64, TantivyError> {
228    let inner = &mut self.inner;
229    inner.delete_term(Term::from_field_u64(FIELD.id, doc.id));
230    let doc_id = inner.add_document(FIELD.dump(doc))?;
231    // https://fulmicoton.com/posts/behold-tantivy-part2/
232    inner.commit()?;
233    Ok(doc_id)
234  }
235}