Skip to main content

docbox_search/
models.rs

1//! # Search Models
2//!
3//! This file contains the models for serializing and deserializing
4//! search data
5
6use chrono::{DateTime, Utc};
7use docbox_database::models::{
8    document_box::{DocumentBoxScopeRaw, WithScope},
9    file::FileWithExtra,
10    folder::{FolderId, FolderWithExtra},
11    link::LinkWithExtra,
12    shared::FolderPathSegment,
13    user::{User, UserId},
14};
15use garde::Validate;
16use mime::Mime;
17use serde::{Deserialize, Serialize};
18use serde_with::{serde_as, skip_serializing_none};
19use utoipa::ToSchema;
20use uuid::Uuid;
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
23pub enum SearchIndexType {
24    File,
25    Folder,
26    Link,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct SearchIndexData {
31    /// Type of item the search index data is representing
32    #[serde(rename = "item_type")]
33    pub ty: SearchIndexType,
34
35    /// ID of the folder the indexed item is within.
36    ///
37    /// (For searching only withing a specific folder path)
38    pub folder_id: FolderId,
39    /// Document box scope that this item is within
40    ///
41    /// (For restricting search scope)
42    pub document_box: DocumentBoxScopeRaw,
43
44    /// Unique ID for the actual document
45    ///
46    /// this is to allow multiple page documents to be stored as
47    /// separate search index items without overriding each other
48    pub item_id: Uuid,
49    /// Name of this item
50    pub name: String,
51    /// Mime type when working with file items (Otherwise none)
52    pub mime: Option<String>,
53    /// For files this is the file content (With an associated page number)
54    /// For links this is the link value
55    pub content: Option<String>,
56    /// Creation date for the item
57    pub created_at: DateTime<Utc>,
58    /// User who created the item
59    pub created_by: Option<UserId>,
60    /// Optional pages of document content
61    pub pages: Option<Vec<DocumentPage>>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct DocumentPage {
66    pub page: u64,
67    pub content: String,
68}
69
70#[skip_serializing_none]
71#[derive(Debug, Serialize, Deserialize)]
72pub struct UpdateSearchIndexData {
73    pub folder_id: FolderId,
74    pub name: String,
75    pub content: Option<String>,
76    pub pages: Option<Vec<DocumentPage>>,
77}
78
79/// Search results scoped to a specific file
80#[derive(Debug)]
81pub struct FileSearchResults {
82    // Total number of hits against the item
83    pub total_hits: u64,
84    /// Matches within the contents
85    pub results: Vec<PageResult>,
86}
87
88#[derive(Debug)]
89pub struct SearchResults {
90    pub results: Vec<FlattenedItemResult>,
91    pub total_hits: u64,
92}
93
94/// Condensed version of a file result
95#[derive(Debug)]
96pub struct FlattenedItemResult {
97    /// Type of item being included in the search index
98    pub item_ty: SearchIndexType,
99    /// ID of the item itself
100    pub item_id: Uuid,
101    /// Scope the item is within
102    pub document_box: DocumentBoxScopeRaw,
103    /// Matches within the page content
104    pub page_matches: Vec<PageResult>,
105    // Total number of hits against the item
106    pub total_hits: u64,
107    // Score of the item (Sum of all the content scores)
108    pub score: SearchScore,
109
110    /// Whether the content matches
111    pub name_match: bool,
112
113    /// Whether the name matches
114    pub content_match: bool,
115}
116
117#[derive(Debug, Serialize, ToSchema)]
118#[serde(untagged)]
119pub enum SearchScore {
120    /// Typesense uses integer scoring
121    Integer(u64),
122    /// OpenSearch and database use float scoring
123    Float(f32),
124}
125
126#[derive(Debug, Serialize, ToSchema)]
127pub struct PageResult {
128    pub page: u64,
129    pub matches: Vec<String>,
130}
131
132/// Extended search request to search within multiple document
133/// boxes
134#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
135#[serde(default)]
136pub struct AdminSearchRequest {
137    #[garde(skip)]
138    #[schema(value_type = Vec<String>)]
139    pub scopes: Vec<DocumentBoxScopeRaw>,
140
141    #[serde(flatten)]
142    #[garde(dive)]
143    pub request: SearchRequest,
144}
145
146/// Request to search within a file
147#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
148#[serde(default)]
149pub struct FileSearchRequest {
150    /// The search query
151    #[garde(skip)]
152    pub query: Option<String>,
153
154    /// Offset to start returning results from
155    #[garde(skip)]
156    pub offset: Option<u64>,
157
158    /// Maximum number of results to return
159    #[garde(skip)]
160    pub limit: Option<u16>,
161}
162
163/// Wrapper around [Mime] to implement [Serialize] and [Deserialize]
164#[serde_as]
165#[derive(Debug, Clone, Deserialize, Serialize)]
166#[serde(transparent)]
167pub struct StringMime(#[serde_as(as = "serde_with::DisplayFromStr")] pub Mime);
168
169/// Request to search within a document box
170#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
171#[serde(default)]
172pub struct SearchRequest {
173    /// The search query
174    #[garde(skip)]
175    pub query: Option<String>,
176
177    /// Enable searching with AI
178    #[garde(skip)]
179    pub neural: bool,
180
181    /// Search only include a specific mime type
182    #[garde(skip)]
183    #[schema(value_type = Option<String>)]
184    pub mime: Option<StringMime>,
185
186    /// Whether to include document names
187    #[garde(skip)]
188    pub include_name: bool,
189
190    /// Whether to include document content
191    #[garde(skip)]
192    pub include_content: bool,
193
194    /// Creation date range search
195    #[garde(dive)]
196    pub created_at: Option<SearchRange>,
197
198    /// Search by a created user
199    #[garde(skip)]
200    pub created_by: Option<UserId>,
201
202    /// Enforce search to a specific folder, empty for all
203    /// folders
204    #[garde(skip)]
205    #[schema(value_type = Option<Uuid>)]
206    pub folder_id: Option<FolderId>,
207
208    /// Number of items to include in the response
209    #[garde(skip)]
210    pub size: Option<u16>,
211
212    /// Offset to start results from
213    #[garde(skip)]
214    pub offset: Option<u64>,
215
216    /// Maximum number of pages too return per file
217    #[garde(range(max = 100))]
218    #[schema(maximum = 100)]
219    pub max_pages: Option<u16>,
220
221    /// Offset to start at when aggregating page results
222    #[garde(skip)]
223    pub pages_offset: Option<u64>,
224}
225
226#[derive(Default, Debug, Deserialize, Serialize, ToSchema)]
227pub struct SearchRange {
228    pub start: Option<DateTime<Utc>>,
229    pub end: Option<DateTime<Utc>>,
230}
231
232impl Validate for SearchRange {
233    type Context = ();
234
235    fn validate_into(
236        &self,
237        _ctx: &Self::Context,
238        parent: &mut dyn FnMut() -> garde::Path,
239        report: &mut garde::Report,
240    ) {
241        match (&self.start, &self.end) {
242            (None, None) => report.append(
243                parent(),
244                garde::Error::new("date range must have a start or end point"),
245            ),
246            (Some(start), Some(end)) => {
247                if start > end {
248                    report.append(
249                        parent().join("start"),
250                        garde::Error::new("date range start cannot be after end"),
251                    )
252                }
253            }
254            (None, Some(_)) | (Some(_), None) => {}
255        }
256    }
257}
258
259#[derive(Debug, Serialize, ToSchema)]
260#[serde(tag = "type")]
261pub enum SearchResultData {
262    File(FileWithExtra),
263    Folder(FolderWithExtra),
264    Link(LinkWithExtra),
265}
266
267#[derive(Debug, Serialize, ToSchema)]
268pub struct SearchResultResponse {
269    pub total_hits: u64,
270    pub results: Vec<SearchResultItem>,
271}
272
273#[derive(Debug, Serialize, ToSchema)]
274pub struct FileSearchResultResponse {
275    pub total_hits: u64,
276    pub results: Vec<PageResult>,
277}
278
279#[derive(Debug, Serialize, ToSchema)]
280pub struct AdminSearchResultResponse {
281    pub total_hits: u64,
282    pub results: Vec<WithScope<SearchResultItem>>,
283}
284
285#[derive(Debug, Serialize, ToSchema)]
286pub struct SearchResultItem {
287    /// The result score
288    pub score: SearchScore,
289    /// Path to the search result item
290    pub path: Vec<FolderPathSegment>,
291    /// The item itself
292    #[serde(flatten)]
293    pub data: SearchResultData,
294
295    pub page_matches: Vec<PageResult>,
296    pub total_hits: u64,
297
298    pub name_match: bool,
299    pub content_match: bool,
300}
301
302/// Request to list users
303#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
304#[serde(default)]
305pub struct UsersRequest {
306    /// Offset to start returning results from
307    #[garde(skip)]
308    pub offset: Option<u64>,
309
310    /// Number of items to include in the response
311    #[garde(skip)]
312    pub size: Option<u16>,
313}
314
315#[derive(Debug, Serialize)]
316pub struct AdminUsersResults {
317    /// The users
318    pub results: Vec<User>,
319    /// The total number of users
320    pub total: i64,
321}