Skip to main content

docbox_search/
models.rs

1//! # Search Models
2//!
3//! This file contains the models for serializing and deserializing
4//! search data
5
6use chrono::{DateTime, Utc};
7use docbox_database::models::{
8    document_box::{DocumentBoxScopeRaw, WithScope},
9    file::FileWithExtra,
10    folder::{FolderId, FolderWithExtra},
11    link::LinkWithExtra,
12    shared::FolderPathSegment,
13    user::{User, UserId},
14};
15use garde::Validate;
16use mime::Mime;
17use serde::{Deserialize, Serialize};
18use serde_with::{serde_as, skip_serializing_none};
19use utoipa::ToSchema;
20use uuid::Uuid;
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
23pub enum SearchIndexType {
24    File,
25    Folder,
26    Link,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct SearchIndexData {
31    /// Type of item the search index data is representing
32    #[serde(rename = "item_type")]
33    pub ty: SearchIndexType,
34
35    /// ID of the folder the indexed item is within.
36    ///
37    /// (For searching only withing a specific folder path)
38    pub folder_id: FolderId,
39    /// Document box scope that this item is within
40    ///
41    /// (For restricting search scope)
42    pub document_box: DocumentBoxScopeRaw,
43
44    /// Unique ID for the actual document
45    ///
46    /// this is to allow multiple page documents to be stored as
47    /// separate search index items without overriding each other
48    pub item_id: Uuid,
49    /// Name of this item
50    pub name: String,
51    /// Mime type when working with file items (Otherwise none)
52    pub mime: Option<String>,
53    /// For files this is the file content (With an associated page number)
54    /// For links this is the link value
55    pub content: Option<String>,
56    /// Creation date for the item
57    pub created_at: DateTime<Utc>,
58    /// User who created the item
59    pub created_by: Option<UserId>,
60    /// Optional pages of document content
61    pub pages: Option<Vec<DocumentPage>>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct DocumentPage {
66    pub page: u64,
67    pub content: String,
68}
69
70#[skip_serializing_none]
71#[derive(Debug, Serialize, Deserialize)]
72pub struct UpdateSearchIndexData {
73    pub folder_id: FolderId,
74    pub name: String,
75    pub content: Option<String>,
76    pub pages: Option<Vec<DocumentPage>>,
77}
78
79/// Search results scoped to a specific file
80pub struct FileSearchResults {
81    // Total number of hits against the item
82    pub total_hits: u64,
83    /// Matches within the contents
84    pub results: Vec<PageResult>,
85}
86
87#[derive(Debug)]
88pub struct SearchResults {
89    pub results: Vec<FlattenedItemResult>,
90    pub total_hits: u64,
91}
92
93/// Condensed version of a file result
94#[derive(Debug)]
95pub struct FlattenedItemResult {
96    /// Type of item being included in the search index
97    pub item_ty: SearchIndexType,
98    /// ID of the item itself
99    pub item_id: Uuid,
100    /// Scope the item is within
101    pub document_box: DocumentBoxScopeRaw,
102    /// Matches within the page content
103    pub page_matches: Vec<PageResult>,
104    // Total number of hits against the item
105    pub total_hits: u64,
106    // Score of the item (Sum of all the content scores)
107    pub score: SearchScore,
108
109    /// Whether the content matches
110    pub name_match: bool,
111
112    /// Whether the name matches
113    pub content_match: bool,
114}
115
116#[derive(Debug, Serialize, ToSchema)]
117#[serde(untagged)]
118pub enum SearchScore {
119    /// Typesense uses integer scoring
120    Integer(u64),
121    /// OpenSearch and database use float scoring
122    Float(f32),
123}
124
125#[derive(Debug, Serialize, ToSchema)]
126pub struct PageResult {
127    pub page: u64,
128    pub matches: Vec<String>,
129}
130
131/// Extended search request to search within multiple document
132/// boxes
133#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
134#[serde(default)]
135pub struct AdminSearchRequest {
136    #[garde(skip)]
137    #[schema(value_type = Vec<String>)]
138    pub scopes: Vec<DocumentBoxScopeRaw>,
139
140    #[serde(flatten)]
141    #[garde(dive)]
142    pub request: SearchRequest,
143}
144
145/// Request to search within a file
146#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
147#[serde(default)]
148pub struct FileSearchRequest {
149    /// The search query
150    #[garde(skip)]
151    pub query: Option<String>,
152
153    /// Offset to start returning results from
154    #[garde(skip)]
155    pub offset: Option<u64>,
156
157    /// Maximum number of results to return
158    #[garde(skip)]
159    pub limit: Option<u16>,
160}
161
162/// Wrapper around [Mime] to implement [Serialize] and [Deserialize]
163#[serde_as]
164#[derive(Debug, Clone, Deserialize, Serialize)]
165#[serde(transparent)]
166pub struct StringMime(#[serde_as(as = "serde_with::DisplayFromStr")] pub Mime);
167
168/// Request to search within a document box
169#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
170#[serde(default)]
171pub struct SearchRequest {
172    /// The search query
173    #[garde(skip)]
174    pub query: Option<String>,
175
176    /// Enable searching with AI
177    #[garde(skip)]
178    pub neural: bool,
179
180    /// Search only include a specific mime type
181    #[garde(skip)]
182    #[schema(value_type = Option<String>)]
183    pub mime: Option<StringMime>,
184
185    /// Whether to include document names
186    #[garde(skip)]
187    pub include_name: bool,
188
189    /// Whether to include document content
190    #[garde(skip)]
191    pub include_content: bool,
192
193    /// Creation date range search
194    #[garde(dive)]
195    pub created_at: Option<SearchRange>,
196
197    /// Search by a created user
198    #[garde(skip)]
199    pub created_by: Option<UserId>,
200
201    /// Enforce search to a specific folder, empty for all
202    /// folders
203    #[garde(skip)]
204    #[schema(value_type = Option<Uuid>)]
205    pub folder_id: Option<FolderId>,
206
207    /// Number of items to include in the response
208    #[garde(skip)]
209    pub size: Option<u16>,
210
211    /// Offset to start results from
212    #[garde(skip)]
213    pub offset: Option<u64>,
214
215    /// Maximum number of pages too return per file
216    #[garde(range(max = 100))]
217    #[schema(maximum = 100)]
218    pub max_pages: Option<u16>,
219
220    /// Offset to start at when aggregating page results
221    #[garde(skip)]
222    pub pages_offset: Option<u64>,
223}
224
225#[derive(Default, Debug, Deserialize, Serialize, ToSchema)]
226pub struct SearchRange {
227    pub start: Option<DateTime<Utc>>,
228    pub end: Option<DateTime<Utc>>,
229}
230
231impl Validate for SearchRange {
232    type Context = ();
233
234    fn validate_into(
235        &self,
236        _ctx: &Self::Context,
237        parent: &mut dyn FnMut() -> garde::Path,
238        report: &mut garde::Report,
239    ) {
240        match (&self.start, &self.end) {
241            (None, None) => report.append(
242                parent(),
243                garde::Error::new("date range must have a start or end point"),
244            ),
245            (Some(start), Some(end)) => {
246                if start > end {
247                    report.append(
248                        parent().join("start"),
249                        garde::Error::new("date range start cannot be after end"),
250                    )
251                }
252            }
253            (None, Some(_)) | (Some(_), None) => {}
254        }
255    }
256}
257
258#[derive(Debug, Serialize, ToSchema)]
259#[serde(tag = "type")]
260pub enum SearchResultData {
261    File(FileWithExtra),
262    Folder(FolderWithExtra),
263    Link(LinkWithExtra),
264}
265
266#[derive(Debug, Serialize, ToSchema)]
267pub struct SearchResultResponse {
268    pub total_hits: u64,
269    pub results: Vec<SearchResultItem>,
270}
271
272#[derive(Debug, Serialize, ToSchema)]
273pub struct FileSearchResultResponse {
274    pub total_hits: u64,
275    pub results: Vec<PageResult>,
276}
277
278#[derive(Debug, Serialize, ToSchema)]
279pub struct AdminSearchResultResponse {
280    pub total_hits: u64,
281    pub results: Vec<WithScope<SearchResultItem>>,
282}
283
284#[derive(Debug, Serialize, ToSchema)]
285pub struct SearchResultItem {
286    /// The result score
287    pub score: SearchScore,
288    /// Path to the search result item
289    pub path: Vec<FolderPathSegment>,
290    /// The item itself
291    #[serde(flatten)]
292    pub data: SearchResultData,
293
294    pub page_matches: Vec<PageResult>,
295    pub total_hits: u64,
296
297    pub name_match: bool,
298    pub content_match: bool,
299}
300
301/// Request to list users
302#[derive(Default, Debug, Validate, Deserialize, Serialize, ToSchema)]
303#[serde(default)]
304pub struct UsersRequest {
305    /// Offset to start returning results from
306    #[garde(skip)]
307    pub offset: Option<u64>,
308
309    /// Number of items to include in the response
310    #[garde(skip)]
311    pub size: Option<u16>,
312}
313
314#[derive(Debug, Serialize)]
315pub struct AdminUsersResults {
316    /// The users
317    pub results: Vec<User>,
318    /// The total number of users
319    pub total: i64,
320}