cozo/fts/
ast.rs

1/*
2 * Copyright 2023, The Cozo Project Authors.
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
5 * If a copy of the MPL was not distributed with this file,
6 * You can obtain one at https://mozilla.org/MPL/2.0/.
7 */
8
9use ordered_float::OrderedFloat;
10use crate::fts::tokenizer::TextAnalyzer;
11use smartstring::{LazyCompact, SmartString};
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub(crate) struct FtsLiteral {
15    pub(crate) value: SmartString<LazyCompact>,
16    pub(crate) is_prefix: bool,
17    pub(crate) booster: OrderedFloat<f64>,
18}
19
20impl FtsLiteral {
21    pub(crate) fn tokenize(self, tokenizer: &TextAnalyzer, coll: &mut Vec<Self>) {
22        if self.is_prefix {
23            coll.push(self);
24            return;
25        }
26
27        let mut tokens = tokenizer.token_stream(&self.value);
28        while let Some(t) = tokens.next() {
29            coll.push(FtsLiteral {
30                value: SmartString::from(&t.text),
31                is_prefix: false,
32                booster: self.booster,
33            })
34        }
35    }
36}
37
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub(crate) struct FtsNear {
40    pub(crate) literals: Vec<FtsLiteral>,
41    pub(crate) distance: u32,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Hash)]
45pub(crate) enum FtsExpr {
46    Literal(FtsLiteral),
47    Near(FtsNear),
48    And(Vec<FtsExpr>),
49    Or(Vec<FtsExpr>),
50    Not(Box<FtsExpr>, Box<FtsExpr>),
51}
52
53impl FtsExpr {
54    // pub(crate) fn needs_idf(&self) -> bool {
55    //     match self {
56    //         FtsExpr::Literal(_) => false,
57    //         FtsExpr::Near(_) => false,
58    //         FtsExpr::And(exprs) => exprs.iter().any(|e| e.needs_idf()),
59    //         FtsExpr::Or(_) => true,
60    //         FtsExpr::Not(lhs, _) => lhs.needs_idf(),
61    //     }
62    // }
63
64    pub(crate) fn tokenize(self, tokenizer: &TextAnalyzer) -> Self {
65        self.do_tokenize(tokenizer).flatten()
66    }
67
68    pub(crate) fn is_empty(&self) -> bool {
69        match self {
70            FtsExpr::Literal(l) => {
71                l.booster == 0. || l.value.is_empty()
72            },
73            FtsExpr::Near(FtsNear{ literals, .. }) => {literals.is_empty()}
74            FtsExpr::And(v) => {v.is_empty()}
75            FtsExpr::Or(v) => {v.is_empty()}
76            FtsExpr::Not(lhs, _) => {lhs.is_empty()}
77        }
78    }
79
80    pub(crate) fn flatten(self) -> Self {
81        match self {
82            FtsExpr::And(exprs) => {
83                let mut flattened = vec![];
84                for e in exprs {
85                    match e.flatten() {
86                        FtsExpr::And(es) => flattened.extend(es),
87                        e => {
88                            if !e.is_empty() {
89                                flattened.push(e)
90                            }
91                        },
92                    }
93                }
94                if flattened.len() == 1 {
95                    flattened.into_iter().next().unwrap()
96                } else {
97                    FtsExpr::And(flattened)
98                }
99            }
100            FtsExpr::Or(exprs) => {
101                let mut flattened = vec![];
102                for e in exprs {
103                    match e.flatten() {
104                        FtsExpr::Or(es) => flattened.extend(es),
105                        e => {
106                            if !e.is_empty() {
107                                flattened.push(e)
108                            }
109                        },                    }
110                }
111                if flattened.len() == 1 {
112                    flattened.into_iter().next().unwrap()
113                } else {
114                    FtsExpr::Or(flattened)
115                }
116            }
117            FtsExpr::Not(lhs, rhs) => {
118                let lhs = lhs.flatten();
119                let rhs = rhs.flatten();
120                if rhs.is_empty() {
121                    lhs
122                } else {
123                    FtsExpr::Not(Box::new(lhs), Box::new(rhs))
124                }
125            }
126            FtsExpr::Literal(l) => FtsExpr::Literal(l),
127            FtsExpr::Near(n) => FtsExpr::Near(n),
128        }
129    }
130
131    fn do_tokenize(self, tokenizer: &TextAnalyzer) -> Self {
132        match self {
133            FtsExpr::Literal(l) => {
134                let mut tokens = vec![];
135                l.tokenize(tokenizer, &mut tokens);
136                if tokens.len() == 1 {
137                    FtsExpr::Literal(tokens.into_iter().next().unwrap())
138                } else {
139                    FtsExpr::And(tokens.into_iter().map(FtsExpr::Literal).collect())
140                }
141            }
142            FtsExpr::Near(FtsNear { literals, distance }) => {
143                let mut tokens = vec![];
144                for l in literals {
145                    l.tokenize(tokenizer, &mut tokens);
146                }
147                FtsExpr::Near(FtsNear {
148                    literals: tokens,
149                    distance,
150                })
151            }
152            FtsExpr::And(exprs) => FtsExpr::And(
153                exprs
154                    .into_iter()
155                    .map(|e| e.do_tokenize(tokenizer))
156                    .collect(),
157            ),
158            FtsExpr::Or(exprs) => FtsExpr::Or(
159                exprs
160                    .into_iter()
161                    .map(|e| e.do_tokenize(tokenizer))
162                    .collect(),
163            ),
164            FtsExpr::Not(lhs, rhs) => FtsExpr::Not(
165                Box::new(lhs.do_tokenize(tokenizer)),
166                Box::new(rhs.do_tokenize(tokenizer)),
167            ),
168        }
169    }
170}