pharia_skill/csi/
document_index.rs

1use jiff::Timestamp;
2use serde::{Deserialize, Serialize};
3use serde_json::Value;
4
5/// Which documents you want to search in, and which type of index should be used
6#[derive(Clone, Debug, Default, Serialize)]
7pub struct IndexPath {
8    /// The namespace the collection belongs to
9    pub namespace: String,
10    /// The collection you want to search in
11    pub collection: String,
12    /// The search index you want to use for the collection
13    pub index: String,
14}
15
16impl IndexPath {
17    pub fn new(
18        namespace: impl Into<String>,
19        collection: impl Into<String>,
20        index: impl Into<String>,
21    ) -> Self {
22        Self {
23            namespace: namespace.into(),
24            collection: collection.into(),
25            index: index.into(),
26        }
27    }
28}
29
30/// Location of a document in the search engine
31#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
32pub struct DocumentPath {
33    /// The namespace the collection belongs to
34    pub namespace: String,
35    /// The collection you want to search in
36    pub collection: String,
37    /// The name of the document
38    pub name: String,
39}
40
41impl DocumentPath {
42    pub fn new(
43        namespace: impl Into<String>,
44        collection: impl Into<String>,
45        name: impl Into<String>,
46    ) -> Self {
47        Self {
48            namespace: namespace.into(),
49            collection: collection.into(),
50            name: name.into(),
51        }
52    }
53}
54
55#[derive(Debug, Serialize)]
56pub struct SearchRequest {
57    pub query: String,
58    pub index_path: IndexPath,
59    pub max_results: u32,
60    pub min_score: Option<f64>,
61    pub filters: Vec<SearchFilter>,
62}
63
64impl SearchRequest {
65    pub fn new(query: impl Into<String>, index_path: IndexPath) -> Self {
66        Self {
67            query: query.into(),
68            index_path,
69            max_results: 1,
70            min_score: None,
71            filters: Vec::new(),
72        }
73    }
74
75    #[must_use]
76    pub fn with_filters(mut self, filters: impl Into<Vec<SearchFilter>>) -> Self {
77        self.filters = filters.into();
78        self
79    }
80
81    #[must_use]
82    pub fn with_max_results(mut self, max_results: u32) -> Self {
83        self.max_results = max_results;
84        self
85    }
86
87    #[must_use]
88    pub fn with_min_score(mut self, min_score: Option<f64>) -> Self {
89        self.min_score = min_score;
90        self
91    }
92}
93
94/// Result to a search query
95#[derive(Clone, Debug, Deserialize, PartialEq)]
96pub struct SearchResult {
97    /// The path to the document that was found
98    pub document_path: DocumentPath,
99    /// The content of the document that was found
100    pub content: String,
101    /// How relevant the document is to the search query
102    pub score: f64,
103    pub start: TextCursor,
104    pub end: TextCursor,
105}
106
107#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
108pub struct TextCursor {
109    /// The index of the item in the document
110    pub item: u32,
111    /// The position of the cursor within the item
112    pub position: u32,
113}
114
115#[derive(Clone, Serialize, Debug)]
116#[serde(rename_all = "snake_case")]
117pub enum SearchFilter {
118    Without(Vec<FilterCondition>),
119    WithOneOf(Vec<FilterCondition>),
120    With(Vec<FilterCondition>),
121}
122
123#[derive(Clone, Serialize, Debug)]
124#[serde(rename_all = "snake_case")]
125pub enum FilterCondition {
126    Metadata(MetadataFilter),
127}
128
129#[derive(Copy, Clone, Serialize, Debug)]
130#[serde(rename_all = "snake_case")]
131pub enum ModalityType {
132    Text,
133}
134
135#[derive(Clone, Serialize, Debug)]
136pub struct MetadataFilter {
137    pub field: String,
138    #[serde(flatten)]
139    pub condition: MetadataFilterCondition,
140}
141
142#[derive(Clone, Serialize, Debug)]
143#[serde(rename_all = "snake_case")]
144pub enum MetadataFilterCondition {
145    GreaterThan(f64),
146    GreaterThanOrEqualTo(f64),
147    LessThan(f64),
148    LessThanOrEqualTo(f64),
149    After(Timestamp),
150    AtOrAfter(Timestamp),
151    Before(Timestamp),
152    AtOrBefore(Timestamp),
153    EqualTo(MetadataFieldValue),
154    IsNull(serde_bool::True),
155}
156
157#[derive(Clone, Serialize, Debug)]
158#[serde(untagged)]
159pub enum MetadataFieldValue {
160    String(String),
161    Integer(i64),
162    Boolean(bool),
163}
164
165#[derive(Clone, Debug, Deserialize, Serialize)]
166#[serde(rename_all = "snake_case", tag = "modality")]
167pub enum Modality {
168    Text { text: String },
169    Image,
170}
171
172#[derive(Clone, Debug, Deserialize, Serialize)]
173pub struct Document<Metadata = Value> {
174    pub path: DocumentPath,
175    pub contents: Vec<Modality>,
176    pub metadata: Option<Metadata>,
177}