prax_query/
search.rs

1//! Full-text search support across database backends.
2//!
3//! This module provides a unified API for full-text search across different
4//! database backends, abstracting over their specific implementations.
5//!
6//! # Supported Features
7//!
8//! | Feature          | PostgreSQL   | MySQL      | SQLite  | MSSQL   | MongoDB      |
9//! |------------------|--------------|------------|---------|---------|--------------|
10//! | Full-Text Index  | ✅ tsvector  | ✅ FULLTEXT| ✅ FTS5 | ✅      | ✅ Atlas     |
11//! | Search Ranking   | ✅ ts_rank   | ✅         | ✅ bm25 | ✅ RANK | ✅ score     |
12//! | Phrase Search    | ✅           | ✅         | ✅      | ✅      | ✅           |
13//! | Faceted Search   | ✅           | ❌         | ❌      | ❌      | ✅           |
14//! | Fuzzy Search     | ✅ pg_trgm   | ❌         | ❌      | ✅      | ✅           |
15//! | Highlighting     | ✅ ts_headline| ❌        | ✅ highlight| ❌  | ✅ highlight |
16//!
17//! # Example Usage
18//!
19//! ```rust,ignore
20//! use prax_query::search::{SearchQuery, SearchOptions};
21//!
22//! // Simple search
23//! let search = SearchQuery::new("rust async programming")
24//!     .columns(["title", "body"])
25//!     .with_ranking()
26//!     .build();
27//!
28//! // Generate SQL
29//! let sql = search.to_postgres_sql("posts")?;
30//! ```
31
32use std::borrow::Cow;
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{QueryError, QueryResult};
37use crate::sql::DatabaseType;
38
39/// Full-text search mode/operator.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
41pub enum SearchMode {
42    /// Match any word (OR).
43    #[default]
44    Any,
45    /// Match all words (AND).
46    All,
47    /// Match exact phrase.
48    Phrase,
49    /// Boolean mode with operators (+, -, *).
50    Boolean,
51    /// Natural language mode.
52    Natural,
53}
54
55impl SearchMode {
56    /// Convert to PostgreSQL tsquery format.
57    pub fn to_postgres_operator(&self) -> &'static str {
58        match self {
59            Self::Any | Self::Natural => " | ",
60            Self::All | Self::Boolean => " & ",
61            Self::Phrase => " <-> ",
62        }
63    }
64}
65
66/// Text search language/configuration.
67#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
68pub enum SearchLanguage {
69    /// Simple (no stemming).
70    Simple,
71    /// English.
72    English,
73    /// Spanish.
74    Spanish,
75    /// French.
76    French,
77    /// German.
78    German,
79    /// Custom language/configuration name.
80    Custom(String),
81}
82
83impl SearchLanguage {
84    /// Get the PostgreSQL text search configuration name.
85    pub fn to_postgres_config(&self) -> Cow<'static, str> {
86        match self {
87            Self::Simple => Cow::Borrowed("simple"),
88            Self::English => Cow::Borrowed("english"),
89            Self::Spanish => Cow::Borrowed("spanish"),
90            Self::French => Cow::Borrowed("french"),
91            Self::German => Cow::Borrowed("german"),
92            Self::Custom(name) => Cow::Owned(name.clone()),
93        }
94    }
95
96    /// Get the SQLite FTS5 tokenizer.
97    pub fn to_sqlite_tokenizer(&self) -> &'static str {
98        match self {
99            Self::Simple => "unicode61",
100            Self::English => "porter unicode61",
101            _ => "unicode61", // SQLite has limited language support
102        }
103    }
104}
105
106impl Default for SearchLanguage {
107    fn default() -> Self {
108        Self::English
109    }
110}
111
112/// Ranking/scoring options for search results.
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
114pub struct RankingOptions {
115    /// Whether to include ranking score.
116    pub enabled: bool,
117    /// Column alias for the score.
118    pub score_alias: String,
119    /// Normalization option (PostgreSQL-specific).
120    pub normalization: u32,
121    /// Field weights (field_name -> weight).
122    pub weights: Vec<(String, f32)>,
123}
124
125impl Default for RankingOptions {
126    fn default() -> Self {
127        Self {
128            enabled: false,
129            score_alias: "search_score".to_string(),
130            normalization: 0,
131            weights: Vec::new(),
132        }
133    }
134}
135
136impl RankingOptions {
137    /// Enable ranking.
138    pub fn enabled(mut self) -> Self {
139        self.enabled = true;
140        self
141    }
142
143    /// Set the score column alias.
144    pub fn alias(mut self, alias: impl Into<String>) -> Self {
145        self.score_alias = alias.into();
146        self
147    }
148
149    /// Set PostgreSQL normalization option.
150    pub fn normalization(mut self, norm: u32) -> Self {
151        self.normalization = norm;
152        self
153    }
154
155    /// Add a field weight.
156    pub fn weight(mut self, field: impl Into<String>, weight: f32) -> Self {
157        self.weights.push((field.into(), weight));
158        self
159    }
160}
161
162/// Highlighting options for search results.
163#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
164pub struct HighlightOptions {
165    /// Whether to include highlights.
166    pub enabled: bool,
167    /// Start tag for highlights.
168    pub start_tag: String,
169    /// End tag for highlights.
170    pub end_tag: String,
171    /// Maximum length of highlighted text.
172    pub max_length: Option<u32>,
173    /// Number of fragments to return.
174    pub max_fragments: Option<u32>,
175    /// Fragment delimiter.
176    pub delimiter: String,
177}
178
179impl Default for HighlightOptions {
180    fn default() -> Self {
181        Self {
182            enabled: false,
183            start_tag: "<b>".to_string(),
184            end_tag: "</b>".to_string(),
185            max_length: Some(150),
186            max_fragments: Some(3),
187            delimiter: " ... ".to_string(),
188        }
189    }
190}
191
192impl HighlightOptions {
193    /// Enable highlighting.
194    pub fn enabled(mut self) -> Self {
195        self.enabled = true;
196        self
197    }
198
199    /// Set highlight tags.
200    pub fn tags(mut self, start: impl Into<String>, end: impl Into<String>) -> Self {
201        self.start_tag = start.into();
202        self.end_tag = end.into();
203        self
204    }
205
206    /// Set maximum text length.
207    pub fn max_length(mut self, length: u32) -> Self {
208        self.max_length = Some(length);
209        self
210    }
211
212    /// Set maximum number of fragments.
213    pub fn max_fragments(mut self, count: u32) -> Self {
214        self.max_fragments = Some(count);
215        self
216    }
217
218    /// Set fragment delimiter.
219    pub fn delimiter(mut self, delimiter: impl Into<String>) -> Self {
220        self.delimiter = delimiter.into();
221        self
222    }
223}
224
225/// Fuzzy search options.
226#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
227pub struct FuzzyOptions {
228    /// Whether to enable fuzzy matching.
229    pub enabled: bool,
230    /// Maximum edit distance (Levenshtein).
231    pub max_edits: u32,
232    /// Prefix length that must match exactly.
233    pub prefix_length: u32,
234    /// Similarity threshold (0.0-1.0).
235    pub threshold: f32,
236}
237
238impl Default for FuzzyOptions {
239    fn default() -> Self {
240        Self {
241            enabled: false,
242            max_edits: 2,
243            prefix_length: 0,
244            threshold: 0.3,
245        }
246    }
247}
248
249impl FuzzyOptions {
250    /// Enable fuzzy search.
251    pub fn enabled(mut self) -> Self {
252        self.enabled = true;
253        self
254    }
255
256    /// Set maximum edit distance.
257    pub fn max_edits(mut self, edits: u32) -> Self {
258        self.max_edits = edits;
259        self
260    }
261
262    /// Set prefix length.
263    pub fn prefix_length(mut self, length: u32) -> Self {
264        self.prefix_length = length;
265        self
266    }
267
268    /// Set similarity threshold.
269    pub fn threshold(mut self, threshold: f32) -> Self {
270        self.threshold = threshold;
271        self
272    }
273}
274
275/// A full-text search query.
276#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
277pub struct SearchQuery {
278    /// Search terms.
279    pub query: String,
280    /// Columns to search in.
281    pub columns: Vec<String>,
282    /// Search mode.
283    pub mode: SearchMode,
284    /// Language/configuration.
285    pub language: SearchLanguage,
286    /// Ranking options.
287    pub ranking: RankingOptions,
288    /// Highlighting options.
289    pub highlight: HighlightOptions,
290    /// Fuzzy search options.
291    pub fuzzy: FuzzyOptions,
292    /// Minimum word length.
293    pub min_word_length: Option<u32>,
294    /// Filter by category/field (for faceted search).
295    pub filters: Vec<(String, String)>,
296}
297
298impl SearchQuery {
299    /// Create a new search query.
300    pub fn new(query: impl Into<String>) -> SearchQueryBuilder {
301        SearchQueryBuilder::new(query)
302    }
303
304    /// Generate PostgreSQL full-text search SQL.
305    pub fn to_postgres_sql(&self, table: &str) -> QueryResult<SearchSql> {
306        let config = self.language.to_postgres_config();
307
308        // Build tsvector expression
309        let tsvector = if self.columns.len() == 1 {
310            format!("to_tsvector('{}', {})", config, self.columns[0])
311        } else {
312            let concat_cols = self.columns.join(" || ' ' || ");
313            format!("to_tsvector('{}', {})", config, concat_cols)
314        };
315
316        // Build tsquery expression
317        let words: Vec<&str> = self.query.split_whitespace().collect();
318        let tsquery_parts: Vec<String> = words
319            .iter()
320            .map(|w| format!("'{}'", w.replace('\'', "''")))
321            .collect();
322        let tsquery = format!(
323            "to_tsquery('{}', '{}')",
324            config,
325            tsquery_parts.join(self.mode.to_postgres_operator())
326        );
327
328        // Build WHERE clause
329        let where_clause = format!("{} @@ {}", tsvector, tsquery);
330
331        // Build SELECT columns
332        let mut select_cols = vec!["*".to_string()];
333
334        // Add ranking
335        if self.ranking.enabled {
336            let weights = if self.ranking.weights.is_empty() {
337                String::new()
338            } else {
339                // PostgreSQL uses setweight for field weights
340                String::new()
341            };
342            select_cols.push(format!(
343                "ts_rank({}{}, {}) AS {}",
344                tsvector, weights, tsquery, self.ranking.score_alias
345            ));
346        }
347
348        // Add highlighting
349        if self.highlight.enabled && !self.columns.is_empty() {
350            let col = &self.columns[0];
351            select_cols.push(format!(
352                "ts_headline('{}', {}, {}, 'StartSel={}, StopSel={}, MaxWords={}, MaxFragments={}') AS highlighted",
353                config,
354                col,
355                tsquery,
356                self.highlight.start_tag,
357                self.highlight.end_tag,
358                self.highlight.max_length.unwrap_or(35),
359                self.highlight.max_fragments.unwrap_or(3)
360            ));
361        }
362
363        let sql = format!(
364            "SELECT {} FROM {} WHERE {}",
365            select_cols.join(", "),
366            table,
367            where_clause
368        );
369
370        let order_by = if self.ranking.enabled {
371            Some(format!("{} DESC", self.ranking.score_alias))
372        } else {
373            None
374        };
375
376        Ok(SearchSql {
377            sql,
378            order_by,
379            params: vec![],
380        })
381    }
382
383    /// Generate MySQL full-text search SQL.
384    pub fn to_mysql_sql(&self, table: &str) -> QueryResult<SearchSql> {
385        let columns = self.columns.join(", ");
386
387        // MySQL MATCH ... AGAINST syntax
388        let match_mode = match self.mode {
389            SearchMode::Natural | SearchMode::Any => "",
390            SearchMode::Boolean | SearchMode::All => " IN BOOLEAN MODE",
391            SearchMode::Phrase => " IN BOOLEAN MODE", // Use quotes for phrase
392        };
393
394        let search_query = if self.mode == SearchMode::Phrase {
395            format!("\"{}\"", self.query)
396        } else if self.mode == SearchMode::All {
397            // Add + prefix for required terms
398            self.query
399                .split_whitespace()
400                .map(|w| format!("+{}", w))
401                .collect::<Vec<_>>()
402                .join(" ")
403        } else {
404            self.query.clone()
405        };
406
407        let match_expr = format!("MATCH({}) AGAINST('{}'{}))", columns, search_query, match_mode);
408
409        let mut select_cols = vec!["*".to_string()];
410
411        // Add ranking (MySQL returns relevance from MATCH)
412        if self.ranking.enabled {
413            select_cols.push(format!("{} AS {}", match_expr, self.ranking.score_alias));
414        }
415
416        let sql = format!(
417            "SELECT {} FROM {} WHERE {}",
418            select_cols.join(", "),
419            table,
420            match_expr
421        );
422
423        let order_by = if self.ranking.enabled {
424            Some(format!("{} DESC", self.ranking.score_alias))
425        } else {
426            None
427        };
428
429        Ok(SearchSql {
430            sql,
431            order_by,
432            params: vec![],
433        })
434    }
435
436    /// Generate SQLite FTS5 search SQL.
437    pub fn to_sqlite_sql(&self, table: &str, fts_table: &str) -> QueryResult<SearchSql> {
438        let search_query = match self.mode {
439            SearchMode::Phrase => format!("\"{}\"", self.query),
440            SearchMode::All => self.query.split_whitespace().collect::<Vec<_>>().join(" AND "),
441            SearchMode::Any => self.query.split_whitespace().collect::<Vec<_>>().join(" OR "),
442            _ => self.query.clone(),
443        };
444
445        let mut select_cols = vec![format!("{}.*", table)];
446
447        // Add ranking (SQLite uses bm25)
448        if self.ranking.enabled {
449            select_cols.push(format!("bm25({}) AS {}", fts_table, self.ranking.score_alias));
450        }
451
452        // Add highlighting
453        if self.highlight.enabled && !self.columns.is_empty() {
454            select_cols.push(format!(
455                "highlight({}, 0, '{}', '{}') AS highlighted",
456                fts_table, self.highlight.start_tag, self.highlight.end_tag
457            ));
458        }
459
460        let sql = format!(
461            "SELECT {} FROM {} JOIN {} ON {}.rowid = {}.rowid WHERE {} MATCH '{}'",
462            select_cols.join(", "),
463            table,
464            fts_table,
465            table,
466            fts_table,
467            fts_table,
468            search_query
469        );
470
471        let order_by = if self.ranking.enabled {
472            Some(format!("{}", self.ranking.score_alias))
473        } else {
474            None
475        };
476
477        Ok(SearchSql {
478            sql,
479            order_by,
480            params: vec![],
481        })
482    }
483
484    /// Generate MSSQL full-text search SQL.
485    pub fn to_mssql_sql(&self, table: &str) -> QueryResult<SearchSql> {
486        let columns = self.columns.join(", ");
487
488        let contains_expr = match self.mode {
489            SearchMode::Phrase => format!("\"{}\"", self.query),
490            SearchMode::All => {
491                let terms: Vec<String> = self
492                    .query
493                    .split_whitespace()
494                    .map(|w| format!("\"{}\"", w))
495                    .collect();
496                terms.join(" AND ")
497            }
498            SearchMode::Any | SearchMode::Natural => {
499                let terms: Vec<String> = self
500                    .query
501                    .split_whitespace()
502                    .map(|w| format!("\"{}\"", w))
503                    .collect();
504                terms.join(" OR ")
505            }
506            SearchMode::Boolean => self.query.clone(),
507        };
508
509        let select_cols = vec!["*".to_string()];
510
511        // Add ranking (MSSQL uses CONTAINSTABLE for ranking)
512        if self.ranking.enabled {
513            let sql = format!(
514                "SELECT {}.*, ft.RANK AS {} FROM {} \
515                 INNER JOIN CONTAINSTABLE({}, ({}), '{}') AS ft \
516                 ON {}.id = ft.[KEY]",
517                table,
518                self.ranking.score_alias,
519                table,
520                table,
521                columns,
522                contains_expr,
523                table
524            );
525
526            return Ok(SearchSql {
527                sql,
528                order_by: Some(format!("{} DESC", self.ranking.score_alias)),
529                params: vec![],
530            });
531        }
532
533        let sql = format!(
534            "SELECT {} FROM {} WHERE CONTAINS(({}), '{}')",
535            select_cols.join(", "),
536            table,
537            columns,
538            contains_expr
539        );
540
541        Ok(SearchSql {
542            sql,
543            order_by: None,
544            params: vec![],
545        })
546    }
547
548    /// Generate search SQL for the specified database type.
549    pub fn to_sql(&self, table: &str, db_type: DatabaseType) -> QueryResult<SearchSql> {
550        match db_type {
551            DatabaseType::PostgreSQL => self.to_postgres_sql(table),
552            DatabaseType::MySQL => self.to_mysql_sql(table),
553            DatabaseType::SQLite => self.to_sqlite_sql(table, &format!("{}_fts", table)),
554            DatabaseType::MSSQL => self.to_mssql_sql(table),
555        }
556    }
557}
558
559/// Builder for search queries.
560#[derive(Debug, Clone)]
561pub struct SearchQueryBuilder {
562    query: String,
563    columns: Vec<String>,
564    mode: SearchMode,
565    language: SearchLanguage,
566    ranking: RankingOptions,
567    highlight: HighlightOptions,
568    fuzzy: FuzzyOptions,
569    min_word_length: Option<u32>,
570    filters: Vec<(String, String)>,
571}
572
573impl SearchQueryBuilder {
574    /// Create a new search query builder.
575    pub fn new(query: impl Into<String>) -> Self {
576        Self {
577            query: query.into(),
578            columns: Vec::new(),
579            mode: SearchMode::default(),
580            language: SearchLanguage::default(),
581            ranking: RankingOptions::default(),
582            highlight: HighlightOptions::default(),
583            fuzzy: FuzzyOptions::default(),
584            min_word_length: None,
585            filters: Vec::new(),
586        }
587    }
588
589    /// Add a column to search.
590    pub fn column(mut self, column: impl Into<String>) -> Self {
591        self.columns.push(column.into());
592        self
593    }
594
595    /// Add multiple columns to search.
596    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
597        self.columns.extend(columns.into_iter().map(Into::into));
598        self
599    }
600
601    /// Set the search mode.
602    pub fn mode(mut self, mode: SearchMode) -> Self {
603        self.mode = mode;
604        self
605    }
606
607    /// Set to match all words.
608    pub fn match_all(self) -> Self {
609        self.mode(SearchMode::All)
610    }
611
612    /// Set to match any word.
613    pub fn match_any(self) -> Self {
614        self.mode(SearchMode::Any)
615    }
616
617    /// Set to match exact phrase.
618    pub fn phrase(self) -> Self {
619        self.mode(SearchMode::Phrase)
620    }
621
622    /// Set to boolean mode.
623    pub fn boolean(self) -> Self {
624        self.mode(SearchMode::Boolean)
625    }
626
627    /// Set the search language.
628    pub fn language(mut self, language: SearchLanguage) -> Self {
629        self.language = language;
630        self
631    }
632
633    /// Enable ranking with default options.
634    pub fn with_ranking(mut self) -> Self {
635        self.ranking.enabled = true;
636        self
637    }
638
639    /// Configure ranking options.
640    pub fn ranking(mut self, options: RankingOptions) -> Self {
641        self.ranking = options;
642        self
643    }
644
645    /// Enable highlighting with default options.
646    pub fn with_highlight(mut self) -> Self {
647        self.highlight.enabled = true;
648        self
649    }
650
651    /// Configure highlighting options.
652    pub fn highlight(mut self, options: HighlightOptions) -> Self {
653        self.highlight = options;
654        self
655    }
656
657    /// Enable fuzzy matching with default options.
658    pub fn with_fuzzy(mut self) -> Self {
659        self.fuzzy.enabled = true;
660        self
661    }
662
663    /// Configure fuzzy search options.
664    pub fn fuzzy(mut self, options: FuzzyOptions) -> Self {
665        self.fuzzy = options;
666        self
667    }
668
669    /// Set minimum word length.
670    pub fn min_word_length(mut self, length: u32) -> Self {
671        self.min_word_length = Some(length);
672        self
673    }
674
675    /// Add a filter for faceted search.
676    pub fn filter(mut self, field: impl Into<String>, value: impl Into<String>) -> Self {
677        self.filters.push((field.into(), value.into()));
678        self
679    }
680
681    /// Build the search query.
682    pub fn build(self) -> SearchQuery {
683        SearchQuery {
684            query: self.query,
685            columns: self.columns,
686            mode: self.mode,
687            language: self.language,
688            ranking: self.ranking,
689            highlight: self.highlight,
690            fuzzy: self.fuzzy,
691            min_word_length: self.min_word_length,
692            filters: self.filters,
693        }
694    }
695}
696
697/// Generated search SQL.
698#[derive(Debug, Clone)]
699pub struct SearchSql {
700    /// The main SQL query.
701    pub sql: String,
702    /// Optional ORDER BY clause.
703    pub order_by: Option<String>,
704    /// Query parameters.
705    pub params: Vec<String>,
706}
707
708/// Full-text index definition.
709#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
710pub struct FullTextIndex {
711    /// Index name.
712    pub name: String,
713    /// Table name.
714    pub table: String,
715    /// Columns in the index.
716    pub columns: Vec<String>,
717    /// Language/configuration.
718    pub language: SearchLanguage,
719    /// Index type (for MySQL: FULLTEXT).
720    pub index_type: Option<String>,
721}
722
723impl FullTextIndex {
724    /// Create a new full-text index builder.
725    pub fn builder(name: impl Into<String>) -> FullTextIndexBuilder {
726        FullTextIndexBuilder::new(name)
727    }
728
729    /// Generate PostgreSQL CREATE INDEX SQL.
730    pub fn to_postgres_sql(&self) -> String {
731        let config = self.language.to_postgres_config();
732        let columns_expr = if self.columns.len() == 1 {
733            format!("to_tsvector('{}', {})", config, self.columns[0])
734        } else {
735            let concat = self.columns.join(" || ' ' || ");
736            format!("to_tsvector('{}', {})", config, concat)
737        };
738
739        format!(
740            "CREATE INDEX {} ON {} USING GIN ({});",
741            self.name, self.table, columns_expr
742        )
743    }
744
745    /// Generate MySQL CREATE INDEX SQL.
746    pub fn to_mysql_sql(&self) -> String {
747        format!(
748            "CREATE FULLTEXT INDEX {} ON {} ({});",
749            self.name,
750            self.table,
751            self.columns.join(", ")
752        )
753    }
754
755    /// Generate SQLite FTS5 virtual table SQL.
756    pub fn to_sqlite_sql(&self) -> String {
757        let tokenizer = self.language.to_sqlite_tokenizer();
758        format!(
759            "CREATE VIRTUAL TABLE {}_fts USING fts5({}, content='{}', tokenize='{}');",
760            self.table,
761            self.columns.join(", "),
762            self.table,
763            tokenizer
764        )
765    }
766
767    /// Generate MSSQL full-text catalog and index SQL.
768    pub fn to_mssql_sql(&self, catalog_name: &str) -> Vec<String> {
769        vec![
770            format!("CREATE FULLTEXT CATALOG {} AS DEFAULT;", catalog_name),
771            format!(
772                "CREATE FULLTEXT INDEX ON {} ({}) KEY INDEX PK_{} ON {};",
773                self.table,
774                self.columns.join(", "),
775                self.table,
776                catalog_name
777            ),
778        ]
779    }
780
781    /// Generate index SQL for the specified database type.
782    pub fn to_sql(&self, db_type: DatabaseType) -> QueryResult<Vec<String>> {
783        match db_type {
784            DatabaseType::PostgreSQL => Ok(vec![self.to_postgres_sql()]),
785            DatabaseType::MySQL => Ok(vec![self.to_mysql_sql()]),
786            DatabaseType::SQLite => Ok(vec![self.to_sqlite_sql()]),
787            DatabaseType::MSSQL => Ok(self.to_mssql_sql(&format!("{}_catalog", self.table))),
788        }
789    }
790
791    /// Generate DROP INDEX SQL.
792    pub fn to_drop_sql(&self, db_type: DatabaseType) -> QueryResult<String> {
793        match db_type {
794            DatabaseType::PostgreSQL => Ok(format!("DROP INDEX IF EXISTS {};", self.name)),
795            DatabaseType::MySQL => Ok(format!("DROP INDEX {} ON {};", self.name, self.table)),
796            DatabaseType::SQLite => Ok(format!("DROP TABLE IF EXISTS {}_fts;", self.table)),
797            DatabaseType::MSSQL => Ok(format!(
798                "DROP FULLTEXT INDEX ON {}; DROP FULLTEXT CATALOG {}_catalog;",
799                self.table, self.table
800            )),
801        }
802    }
803}
804
805/// Builder for full-text indexes.
806#[derive(Debug, Clone)]
807pub struct FullTextIndexBuilder {
808    name: String,
809    table: Option<String>,
810    columns: Vec<String>,
811    language: SearchLanguage,
812    index_type: Option<String>,
813}
814
815impl FullTextIndexBuilder {
816    /// Create a new builder.
817    pub fn new(name: impl Into<String>) -> Self {
818        Self {
819            name: name.into(),
820            table: None,
821            columns: Vec::new(),
822            language: SearchLanguage::default(),
823            index_type: None,
824        }
825    }
826
827    /// Set the table name.
828    pub fn on_table(mut self, table: impl Into<String>) -> Self {
829        self.table = Some(table.into());
830        self
831    }
832
833    /// Add a column.
834    pub fn column(mut self, column: impl Into<String>) -> Self {
835        self.columns.push(column.into());
836        self
837    }
838
839    /// Add multiple columns.
840    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
841        self.columns.extend(columns.into_iter().map(Into::into));
842        self
843    }
844
845    /// Set the language.
846    pub fn language(mut self, language: SearchLanguage) -> Self {
847        self.language = language;
848        self
849    }
850
851    /// Build the index definition.
852    pub fn build(self) -> QueryResult<FullTextIndex> {
853        let table = self.table.ok_or_else(|| {
854            QueryError::invalid_input("table", "Must specify table with on_table()")
855        })?;
856
857        if self.columns.is_empty() {
858            return Err(QueryError::invalid_input(
859                "columns",
860                "Must specify at least one column",
861            ));
862        }
863
864        Ok(FullTextIndex {
865            name: self.name,
866            table,
867            columns: self.columns,
868            language: self.language,
869            index_type: self.index_type,
870        })
871    }
872}
873
874/// MongoDB Atlas Search support.
875pub mod mongodb {
876    use serde::{Deserialize, Serialize};
877
878    /// Atlas Search index definition.
879    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
880    pub struct AtlasSearchIndex {
881        /// Index name.
882        pub name: String,
883        /// Collection name.
884        pub collection: String,
885        /// Analyzer to use.
886        pub analyzer: String,
887        /// Field mappings.
888        pub mappings: SearchMappings,
889    }
890
891    /// Field mappings for Atlas Search.
892    #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
893    pub struct SearchMappings {
894        /// Whether to dynamically map fields.
895        pub dynamic: bool,
896        /// Explicit field definitions.
897        pub fields: Vec<SearchField>,
898    }
899
900    /// A searchable field definition.
901    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
902    pub struct SearchField {
903        /// Field path.
904        pub path: String,
905        /// Field type.
906        pub field_type: SearchFieldType,
907        /// Analyzer for text fields.
908        pub analyzer: Option<String>,
909        /// Whether to store for faceting.
910        pub facet: bool,
911    }
912
913    /// Atlas Search field types.
914    #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
915    pub enum SearchFieldType {
916        /// String/text field.
917        String,
918        /// Number field.
919        Number,
920        /// Date field.
921        Date,
922        /// Boolean field.
923        Boolean,
924        /// ObjectId field.
925        ObjectId,
926        /// Geo field.
927        Geo,
928        /// Autocomplete field.
929        Autocomplete,
930    }
931
932    impl SearchFieldType {
933        /// Get the Atlas Search type name.
934        pub fn as_str(&self) -> &'static str {
935            match self {
936                Self::String => "string",
937                Self::Number => "number",
938                Self::Date => "date",
939                Self::Boolean => "boolean",
940                Self::ObjectId => "objectId",
941                Self::Geo => "geo",
942                Self::Autocomplete => "autocomplete",
943            }
944        }
945    }
946
947    /// Atlas Search query builder.
948    #[derive(Debug, Clone, Default)]
949    pub struct AtlasSearchQuery {
950        /// Search text.
951        pub query: String,
952        /// Fields to search.
953        pub path: Vec<String>,
954        /// Fuzzy options.
955        pub fuzzy: Option<FuzzyConfig>,
956        /// Score options.
957        pub score: Option<ScoreConfig>,
958        /// Highlight options.
959        pub highlight: Option<HighlightConfig>,
960    }
961
962    /// Fuzzy search configuration.
963    #[derive(Debug, Clone, Serialize, Deserialize)]
964    pub struct FuzzyConfig {
965        /// Maximum edits.
966        pub max_edits: u32,
967        /// Prefix length.
968        pub prefix_length: u32,
969        /// Max expansions.
970        pub max_expansions: u32,
971    }
972
973    impl Default for FuzzyConfig {
974        fn default() -> Self {
975            Self {
976                max_edits: 2,
977                prefix_length: 0,
978                max_expansions: 50,
979            }
980        }
981    }
982
983    /// Score configuration.
984    #[derive(Debug, Clone, Default, Serialize, Deserialize)]
985    pub struct ScoreConfig {
986        /// Boost factor.
987        pub boost: Option<f64>,
988        /// Score function.
989        pub function: Option<String>,
990    }
991
992    /// Highlight configuration.
993    #[derive(Debug, Clone, Serialize, Deserialize)]
994    pub struct HighlightConfig {
995        /// Path to highlight.
996        pub path: String,
997        /// Max characters per highlight.
998        pub max_chars_to_examine: u32,
999        /// Max number of highlights.
1000        pub max_num_passages: u32,
1001    }
1002
1003    impl Default for HighlightConfig {
1004        fn default() -> Self {
1005            Self {
1006                path: String::new(),
1007                max_chars_to_examine: 500_000,
1008                max_num_passages: 5,
1009            }
1010        }
1011    }
1012
1013    impl AtlasSearchQuery {
1014        /// Create a new search query.
1015        pub fn new(query: impl Into<String>) -> Self {
1016            Self {
1017                query: query.into(),
1018                ..Default::default()
1019            }
1020        }
1021
1022        /// Add a field to search.
1023        pub fn path(mut self, path: impl Into<String>) -> Self {
1024            self.path.push(path.into());
1025            self
1026        }
1027
1028        /// Add multiple fields to search.
1029        pub fn paths(mut self, paths: impl IntoIterator<Item = impl Into<String>>) -> Self {
1030            self.path.extend(paths.into_iter().map(Into::into));
1031            self
1032        }
1033
1034        /// Enable fuzzy matching.
1035        pub fn fuzzy(mut self, config: FuzzyConfig) -> Self {
1036            self.fuzzy = Some(config);
1037            self
1038        }
1039
1040        /// Set score boost.
1041        pub fn boost(mut self, factor: f64) -> Self {
1042            self.score = Some(ScoreConfig {
1043                boost: Some(factor),
1044                function: None,
1045            });
1046            self
1047        }
1048
1049        /// Enable highlighting.
1050        pub fn highlight(mut self, path: impl Into<String>) -> Self {
1051            self.highlight = Some(HighlightConfig {
1052                path: path.into(),
1053                ..Default::default()
1054            });
1055            self
1056        }
1057
1058        /// Build the $search aggregation stage.
1059        pub fn to_search_stage(&self) -> serde_json::Value {
1060            let mut text = serde_json::json!({
1061                "query": self.query,
1062                "path": if self.path.len() == 1 {
1063                    serde_json::Value::String(self.path[0].clone())
1064                } else {
1065                    serde_json::Value::Array(self.path.iter().map(|p| serde_json::Value::String(p.clone())).collect())
1066                }
1067            });
1068
1069            if let Some(ref fuzzy) = self.fuzzy {
1070                text["fuzzy"] = serde_json::json!({
1071                    "maxEdits": fuzzy.max_edits,
1072                    "prefixLength": fuzzy.prefix_length,
1073                    "maxExpansions": fuzzy.max_expansions
1074                });
1075            }
1076
1077            let mut search = serde_json::json!({
1078                "$search": {
1079                    "text": text
1080                }
1081            });
1082
1083            if let Some(ref hl) = self.highlight {
1084                search["$search"]["highlight"] = serde_json::json!({
1085                    "path": hl.path,
1086                    "maxCharsToExamine": hl.max_chars_to_examine,
1087                    "maxNumPassages": hl.max_num_passages
1088                });
1089            }
1090
1091            search
1092        }
1093
1094        /// Build the aggregation pipeline for search.
1095        pub fn to_pipeline(&self) -> Vec<serde_json::Value> {
1096            let mut pipeline = vec![self.to_search_stage()];
1097
1098            // Add score metadata
1099            pipeline.push(serde_json::json!({
1100                "$addFields": {
1101                    "score": { "$meta": "searchScore" }
1102                }
1103            }));
1104
1105            // Add highlights if enabled
1106            if self.highlight.is_some() {
1107                pipeline.push(serde_json::json!({
1108                    "$addFields": {
1109                        "highlights": { "$meta": "searchHighlights" }
1110                    }
1111                }));
1112            }
1113
1114            pipeline
1115        }
1116    }
1117
1118    /// Builder for Atlas Search index.
1119    #[derive(Debug, Clone, Default)]
1120    pub struct AtlasSearchIndexBuilder {
1121        name: String,
1122        collection: Option<String>,
1123        analyzer: String,
1124        dynamic: bool,
1125        fields: Vec<SearchField>,
1126    }
1127
1128    impl AtlasSearchIndexBuilder {
1129        /// Create a new builder.
1130        pub fn new(name: impl Into<String>) -> Self {
1131            Self {
1132                name: name.into(),
1133                analyzer: "lucene.standard".to_string(),
1134                ..Default::default()
1135            }
1136        }
1137
1138        /// Set the collection.
1139        pub fn collection(mut self, collection: impl Into<String>) -> Self {
1140            self.collection = Some(collection.into());
1141            self
1142        }
1143
1144        /// Set the analyzer.
1145        pub fn analyzer(mut self, analyzer: impl Into<String>) -> Self {
1146            self.analyzer = analyzer.into();
1147            self
1148        }
1149
1150        /// Enable dynamic mapping.
1151        pub fn dynamic(mut self) -> Self {
1152            self.dynamic = true;
1153            self
1154        }
1155
1156        /// Add a text field.
1157        pub fn text_field(mut self, path: impl Into<String>) -> Self {
1158            self.fields.push(SearchField {
1159                path: path.into(),
1160                field_type: SearchFieldType::String,
1161                analyzer: None,
1162                facet: false,
1163            });
1164            self
1165        }
1166
1167        /// Add a faceted field.
1168        pub fn facet_field(mut self, path: impl Into<String>, field_type: SearchFieldType) -> Self {
1169            self.fields.push(SearchField {
1170                path: path.into(),
1171                field_type,
1172                analyzer: None,
1173                facet: true,
1174            });
1175            self
1176        }
1177
1178        /// Add an autocomplete field.
1179        pub fn autocomplete_field(mut self, path: impl Into<String>) -> Self {
1180            self.fields.push(SearchField {
1181                path: path.into(),
1182                field_type: SearchFieldType::Autocomplete,
1183                analyzer: None,
1184                facet: false,
1185            });
1186            self
1187        }
1188
1189        /// Build the index definition.
1190        pub fn build(self) -> serde_json::Value {
1191            let mut fields = serde_json::Map::new();
1192
1193            for field in &self.fields {
1194                let mut field_def = serde_json::json!({
1195                    "type": field.field_type.as_str()
1196                });
1197
1198                if let Some(ref analyzer) = field.analyzer {
1199                    field_def["analyzer"] = serde_json::Value::String(analyzer.clone());
1200                }
1201
1202                fields.insert(field.path.clone(), field_def);
1203            }
1204
1205            serde_json::json!({
1206                "name": self.name,
1207                "analyzer": self.analyzer,
1208                "mappings": {
1209                    "dynamic": self.dynamic,
1210                    "fields": fields
1211                }
1212            })
1213        }
1214    }
1215
1216    /// Helper to create a search query.
1217    pub fn search(query: impl Into<String>) -> AtlasSearchQuery {
1218        AtlasSearchQuery::new(query)
1219    }
1220
1221    /// Helper to create a search index builder.
1222    pub fn search_index(name: impl Into<String>) -> AtlasSearchIndexBuilder {
1223        AtlasSearchIndexBuilder::new(name)
1224    }
1225}
1226
1227#[cfg(test)]
1228mod tests {
1229    use super::*;
1230
1231    #[test]
1232    fn test_search_query_builder() {
1233        let search = SearchQuery::new("rust async")
1234            .columns(["title", "body"])
1235            .match_all()
1236            .with_ranking()
1237            .build();
1238
1239        assert_eq!(search.query, "rust async");
1240        assert_eq!(search.columns, vec!["title", "body"]);
1241        assert_eq!(search.mode, SearchMode::All);
1242        assert!(search.ranking.enabled);
1243    }
1244
1245    #[test]
1246    fn test_postgres_search_sql() {
1247        let search = SearchQuery::new("rust programming")
1248            .column("content")
1249            .with_ranking()
1250            .build();
1251
1252        let sql = search.to_postgres_sql("posts").unwrap();
1253        assert!(sql.sql.contains("to_tsvector"));
1254        assert!(sql.sql.contains("to_tsquery"));
1255        assert!(sql.sql.contains("ts_rank"));
1256        assert!(sql.sql.contains("@@"));
1257    }
1258
1259    #[test]
1260    fn test_mysql_search_sql() {
1261        let search = SearchQuery::new("database performance")
1262            .columns(["title", "body"])
1263            .match_any()
1264            .build();
1265
1266        let sql = search.to_mysql_sql("articles").unwrap();
1267        assert!(sql.sql.contains("MATCH"));
1268        assert!(sql.sql.contains("AGAINST"));
1269    }
1270
1271    #[test]
1272    fn test_sqlite_search_sql() {
1273        let search = SearchQuery::new("web development")
1274            .column("content")
1275            .with_ranking()
1276            .build();
1277
1278        let sql = search.to_sqlite_sql("posts", "posts_fts").unwrap();
1279        assert!(sql.sql.contains("MATCH"));
1280        assert!(sql.sql.contains("bm25"));
1281    }
1282
1283    #[test]
1284    fn test_mssql_search_sql() {
1285        let search = SearchQuery::new("machine learning")
1286            .columns(["title", "abstract"])
1287            .phrase()
1288            .build();
1289
1290        let sql = search.to_mssql_sql("papers").unwrap();
1291        assert!(sql.sql.contains("CONTAINS"));
1292    }
1293
1294    #[test]
1295    fn test_mssql_ranked_search() {
1296        let search = SearchQuery::new("neural network")
1297            .column("content")
1298            .with_ranking()
1299            .build();
1300
1301        let sql = search.to_mssql_sql("papers").unwrap();
1302        assert!(sql.sql.contains("CONTAINSTABLE"));
1303        assert!(sql.sql.contains("RANK"));
1304    }
1305
1306    #[test]
1307    fn test_fulltext_index_postgres() {
1308        let index = FullTextIndex::builder("posts_search_idx")
1309            .on_table("posts")
1310            .columns(["title", "body"])
1311            .language(SearchLanguage::English)
1312            .build()
1313            .unwrap();
1314
1315        let sql = index.to_postgres_sql();
1316        assert!(sql.contains("CREATE INDEX posts_search_idx"));
1317        assert!(sql.contains("USING GIN"));
1318        assert!(sql.contains("to_tsvector"));
1319    }
1320
1321    #[test]
1322    fn test_fulltext_index_mysql() {
1323        let index = FullTextIndex::builder("posts_fulltext")
1324            .on_table("posts")
1325            .columns(["title", "body"])
1326            .build()
1327            .unwrap();
1328
1329        let sql = index.to_mysql_sql();
1330        assert_eq!(sql, "CREATE FULLTEXT INDEX posts_fulltext ON posts (title, body);");
1331    }
1332
1333    #[test]
1334    fn test_fulltext_index_sqlite() {
1335        let index = FullTextIndex::builder("posts_fts")
1336            .on_table("posts")
1337            .columns(["title", "content"])
1338            .build()
1339            .unwrap();
1340
1341        let sql = index.to_sqlite_sql();
1342        assert!(sql.contains("CREATE VIRTUAL TABLE"));
1343        assert!(sql.contains("USING fts5"));
1344    }
1345
1346    #[test]
1347    fn test_highlight_options() {
1348        let opts = HighlightOptions::default()
1349            .enabled()
1350            .tags("<mark>", "</mark>")
1351            .max_length(200)
1352            .max_fragments(5);
1353
1354        assert!(opts.enabled);
1355        assert_eq!(opts.start_tag, "<mark>");
1356        assert_eq!(opts.end_tag, "</mark>");
1357        assert_eq!(opts.max_length, Some(200));
1358    }
1359
1360    #[test]
1361    fn test_fuzzy_options() {
1362        let opts = FuzzyOptions::default()
1363            .enabled()
1364            .max_edits(1)
1365            .threshold(0.5);
1366
1367        assert!(opts.enabled);
1368        assert_eq!(opts.max_edits, 1);
1369        assert_eq!(opts.threshold, 0.5);
1370    }
1371
1372    #[test]
1373    fn test_ranking_with_weights() {
1374        let opts = RankingOptions::default()
1375            .enabled()
1376            .alias("relevance")
1377            .weight("title", 2.0)
1378            .weight("body", 1.0);
1379
1380        assert_eq!(opts.score_alias, "relevance");
1381        assert_eq!(opts.weights.len(), 2);
1382    }
1383
1384    mod mongodb_tests {
1385        use super::super::mongodb::*;
1386
1387        #[test]
1388        fn test_atlas_search_query() {
1389            let query = search("rust async")
1390                .paths(["title", "body"])
1391                .fuzzy(FuzzyConfig::default())
1392                .boost(2.0);
1393
1394            let stage = query.to_search_stage();
1395            assert!(stage["$search"]["text"]["query"].is_string());
1396        }
1397
1398        #[test]
1399        fn test_atlas_search_pipeline() {
1400            let query = search("database")
1401                .path("content")
1402                .highlight("content");
1403
1404            let pipeline = query.to_pipeline();
1405            assert!(pipeline.len() >= 2);
1406            assert!(pipeline[0]["$search"].is_object());
1407        }
1408
1409        #[test]
1410        fn test_atlas_search_index_builder() {
1411            let index = search_index("default")
1412                .collection("posts")
1413                .analyzer("lucene.english")
1414                .dynamic()
1415                .text_field("title")
1416                .text_field("body")
1417                .facet_field("category", SearchFieldType::String)
1418                .build();
1419
1420            assert!(index["name"].is_string());
1421            assert!(index["mappings"]["dynamic"].as_bool().unwrap());
1422        }
1423    }
1424}
1425
1426