tantivy_common/
json_path_writer.rs

1use crate::replace_in_place;
2
3/// Separates the different segments of a json path.
4pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
5pub const JSON_PATH_SEGMENT_SEP_STR: &str =
6    unsafe { std::str::from_utf8_unchecked(&[JSON_PATH_SEGMENT_SEP]) };
7
8/// Separates the json path and the value in
9/// a JSON term binary representation.
10pub const JSON_END_OF_PATH: u8 = 0u8;
11pub const JSON_END_OF_PATH_STR: &str =
12    unsafe { std::str::from_utf8_unchecked(&[JSON_END_OF_PATH]) };
13
14/// Create a new JsonPathWriter, that creates flattened json paths for tantivy.
15#[derive(Clone, Debug, Default)]
16pub struct JsonPathWriter {
17    path: String,
18    indices: Vec<usize>,
19    expand_dots: bool,
20}
21
22impl JsonPathWriter {
23    pub fn with_expand_dots(expand_dots: bool) -> Self {
24        JsonPathWriter {
25            path: String::new(),
26            indices: Vec::new(),
27            expand_dots,
28        }
29    }
30
31    pub fn new() -> Self {
32        JsonPathWriter {
33            path: String::new(),
34            indices: Vec::new(),
35            expand_dots: false,
36        }
37    }
38
39    /// When expand_dots is enabled, json object like
40    /// `{"k8s.node.id": 5}` is processed as if it was
41    /// `{"k8s": {"node": {"id": 5}}}`.
42    /// This option has the merit of allowing users to
43    /// write queries  like `k8s.node.id:5`.
44    /// On the other, enabling that feature can lead to
45    /// ambiguity.
46    #[inline]
47    pub fn set_expand_dots(&mut self, expand_dots: bool) {
48        self.expand_dots = expand_dots;
49    }
50
51    /// Push a new segment to the path.
52    #[inline]
53    pub fn push(&mut self, segment: &str) {
54        let len_path = self.path.len();
55        self.indices.push(len_path);
56        if self.indices.len() > 1 {
57            self.path.push(JSON_PATH_SEGMENT_SEP as char);
58        }
59        self.path.push_str(segment);
60        if self.expand_dots {
61            // This might include the separation byte, which is ok because it is not a dot.
62            let appended_segment = &mut self.path[len_path..];
63            // The unsafe below is safe as long as b'.' and JSON_PATH_SEGMENT_SEP are
64            // valid single byte ut8 strings.
65            // By utf-8 design, they cannot be part of another codepoint.
66            unsafe {
67                replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, appended_segment.as_bytes_mut())
68            };
69        }
70    }
71
72    /// Set the end of JSON path marker.
73    #[inline]
74    pub fn set_end(&mut self) {
75        self.path.push_str(JSON_END_OF_PATH_STR);
76    }
77
78    /// Remove the last segment. Does nothing if the path is empty.
79    #[inline]
80    pub fn pop(&mut self) {
81        if let Some(last_idx) = self.indices.pop() {
82            self.path.truncate(last_idx);
83        }
84    }
85
86    /// Clear the path.
87    #[inline]
88    pub fn clear(&mut self) {
89        self.path.clear();
90        self.indices.clear();
91    }
92
93    /// Get the current path.
94    #[inline]
95    pub fn as_str(&self) -> &str {
96        &self.path
97    }
98}
99
100impl From<JsonPathWriter> for String {
101    #[inline]
102    fn from(value: JsonPathWriter) -> Self {
103        value.path
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn json_path_writer_test() {
113        let mut writer = JsonPathWriter::new();
114        writer.set_expand_dots(false);
115
116        writer.push("root");
117        assert_eq!(writer.as_str(), "root");
118
119        writer.push("child");
120        assert_eq!(writer.as_str(), "root\u{1}child");
121
122        writer.pop();
123        assert_eq!(writer.as_str(), "root");
124
125        writer.push("k8s.node.id");
126        assert_eq!(writer.as_str(), "root\u{1}k8s.node.id");
127
128        writer.set_expand_dots(true);
129        writer.pop();
130        writer.push("k8s.node.id");
131        assert_eq!(writer.as_str(), "root\u{1}k8s\u{1}node\u{1}id");
132    }
133
134    #[test]
135    fn test_json_path_expand_dots_enabled_pop_segment() {
136        let mut json_writer = JsonPathWriter::with_expand_dots(true);
137        json_writer.push("hello");
138        assert_eq!(json_writer.as_str(), "hello");
139        json_writer.push("color.hue");
140        assert_eq!(json_writer.as_str(), "hello\x01color\x01hue");
141        json_writer.pop();
142        assert_eq!(json_writer.as_str(), "hello");
143    }
144}