Skip to main content

prax_query/
search.rs

1//! Full-text search support across database backends.
2//!
3//! This module provides a unified API for full-text search across different
4//! database backends, abstracting over their specific implementations.
5//!
6//! # Supported Features
7//!
8//! | Feature          | PostgreSQL   | MySQL      | SQLite  | MSSQL   | MongoDB      |
9//! |------------------|--------------|------------|---------|---------|--------------|
10//! | Full-Text Index  | ✅ tsvector  | ✅ FULLTEXT| ✅ FTS5 | ✅      | ✅ Atlas     |
11//! | Search Ranking   | ✅ ts_rank   | ✅         | ✅ bm25 | ✅ RANK | ✅ score     |
12//! | Phrase Search    | ✅           | ✅         | ✅      | ✅      | ✅           |
13//! | Faceted Search   | ✅           | ❌         | ❌      | ❌      | ✅           |
14//! | Fuzzy Search     | ✅ pg_trgm   | ❌         | ❌      | ✅      | ✅           |
15//! | Highlighting     | ✅ ts_headline| ❌        | ✅ highlight| ❌  | ✅ highlight |
16//!
17//! # Example Usage
18//!
19//! ```rust,ignore
20//! use prax_query::search::{SearchQuery, SearchOptions};
21//!
22//! // Simple search
23//! let search = SearchQuery::new("rust async programming")
24//!     .columns(["title", "body"])
25//!     .with_ranking()
26//!     .build();
27//!
28//! // Generate SQL
29//! let sql = search.to_postgres_sql("posts")?;
30//! ```
31
32use std::borrow::Cow;
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{QueryError, QueryResult};
37use crate::sql::DatabaseType;
38
39/// Full-text search mode/operator.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
41pub enum SearchMode {
42    /// Match any word (OR).
43    #[default]
44    Any,
45    /// Match all words (AND).
46    All,
47    /// Match exact phrase.
48    Phrase,
49    /// Boolean mode with operators (+, -, *).
50    Boolean,
51    /// Natural language mode.
52    Natural,
53}
54
55impl SearchMode {
56    /// Convert to PostgreSQL tsquery format.
57    pub fn to_postgres_operator(&self) -> &'static str {
58        match self {
59            Self::Any | Self::Natural => " | ",
60            Self::All | Self::Boolean => " & ",
61            Self::Phrase => " <-> ",
62        }
63    }
64}
65
66/// Text search language/configuration.
67#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
68pub enum SearchLanguage {
69    /// Simple (no stemming).
70    Simple,
71    /// English.
72    English,
73    /// Spanish.
74    Spanish,
75    /// French.
76    French,
77    /// German.
78    German,
79    /// Custom language/configuration name.
80    Custom(String),
81}
82
83impl SearchLanguage {
84    /// Get the PostgreSQL text search configuration name.
85    pub fn to_postgres_config(&self) -> Cow<'static, str> {
86        match self {
87            Self::Simple => Cow::Borrowed("simple"),
88            Self::English => Cow::Borrowed("english"),
89            Self::Spanish => Cow::Borrowed("spanish"),
90            Self::French => Cow::Borrowed("french"),
91            Self::German => Cow::Borrowed("german"),
92            Self::Custom(name) => Cow::Owned(name.clone()),
93        }
94    }
95
96    /// Get the SQLite FTS5 tokenizer.
97    pub fn to_sqlite_tokenizer(&self) -> &'static str {
98        match self {
99            Self::Simple => "unicode61",
100            Self::English => "porter unicode61",
101            _ => "unicode61", // SQLite has limited language support
102        }
103    }
104}
105
106impl Default for SearchLanguage {
107    fn default() -> Self {
108        Self::English
109    }
110}
111
112/// Ranking/scoring options for search results.
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
114pub struct RankingOptions {
115    /// Whether to include ranking score.
116    pub enabled: bool,
117    /// Column alias for the score.
118    pub score_alias: String,
119    /// Normalization option (PostgreSQL-specific).
120    pub normalization: u32,
121    /// Field weights (field_name -> weight).
122    pub weights: Vec<(String, f32)>,
123}
124
125impl Default for RankingOptions {
126    fn default() -> Self {
127        Self {
128            enabled: false,
129            score_alias: "search_score".to_string(),
130            normalization: 0,
131            weights: Vec::new(),
132        }
133    }
134}
135
136impl RankingOptions {
137    /// Enable ranking.
138    pub fn enabled(mut self) -> Self {
139        self.enabled = true;
140        self
141    }
142
143    /// Set the score column alias.
144    pub fn alias(mut self, alias: impl Into<String>) -> Self {
145        self.score_alias = alias.into();
146        self
147    }
148
149    /// Set PostgreSQL normalization option.
150    pub fn normalization(mut self, norm: u32) -> Self {
151        self.normalization = norm;
152        self
153    }
154
155    /// Add a field weight.
156    pub fn weight(mut self, field: impl Into<String>, weight: f32) -> Self {
157        self.weights.push((field.into(), weight));
158        self
159    }
160}
161
162/// Highlighting options for search results.
163#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
164pub struct HighlightOptions {
165    /// Whether to include highlights.
166    pub enabled: bool,
167    /// Start tag for highlights.
168    pub start_tag: String,
169    /// End tag for highlights.
170    pub end_tag: String,
171    /// Maximum length of highlighted text.
172    pub max_length: Option<u32>,
173    /// Number of fragments to return.
174    pub max_fragments: Option<u32>,
175    /// Fragment delimiter.
176    pub delimiter: String,
177}
178
179impl Default for HighlightOptions {
180    fn default() -> Self {
181        Self {
182            enabled: false,
183            start_tag: "<b>".to_string(),
184            end_tag: "</b>".to_string(),
185            max_length: Some(150),
186            max_fragments: Some(3),
187            delimiter: " ... ".to_string(),
188        }
189    }
190}
191
192impl HighlightOptions {
193    /// Enable highlighting.
194    pub fn enabled(mut self) -> Self {
195        self.enabled = true;
196        self
197    }
198
199    /// Set highlight tags.
200    pub fn tags(mut self, start: impl Into<String>, end: impl Into<String>) -> Self {
201        self.start_tag = start.into();
202        self.end_tag = end.into();
203        self
204    }
205
206    /// Set maximum text length.
207    pub fn max_length(mut self, length: u32) -> Self {
208        self.max_length = Some(length);
209        self
210    }
211
212    /// Set maximum number of fragments.
213    pub fn max_fragments(mut self, count: u32) -> Self {
214        self.max_fragments = Some(count);
215        self
216    }
217
218    /// Set fragment delimiter.
219    pub fn delimiter(mut self, delimiter: impl Into<String>) -> Self {
220        self.delimiter = delimiter.into();
221        self
222    }
223}
224
225/// Fuzzy search options.
226#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
227pub struct FuzzyOptions {
228    /// Whether to enable fuzzy matching.
229    pub enabled: bool,
230    /// Maximum edit distance (Levenshtein).
231    pub max_edits: u32,
232    /// Prefix length that must match exactly.
233    pub prefix_length: u32,
234    /// Similarity threshold (0.0-1.0).
235    pub threshold: f32,
236}
237
238impl Default for FuzzyOptions {
239    fn default() -> Self {
240        Self {
241            enabled: false,
242            max_edits: 2,
243            prefix_length: 0,
244            threshold: 0.3,
245        }
246    }
247}
248
249impl FuzzyOptions {
250    /// Enable fuzzy search.
251    pub fn enabled(mut self) -> Self {
252        self.enabled = true;
253        self
254    }
255
256    /// Set maximum edit distance.
257    pub fn max_edits(mut self, edits: u32) -> Self {
258        self.max_edits = edits;
259        self
260    }
261
262    /// Set prefix length.
263    pub fn prefix_length(mut self, length: u32) -> Self {
264        self.prefix_length = length;
265        self
266    }
267
268    /// Set similarity threshold.
269    pub fn threshold(mut self, threshold: f32) -> Self {
270        self.threshold = threshold;
271        self
272    }
273}
274
275/// A full-text search query.
276#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
277pub struct SearchQuery {
278    /// Search terms.
279    pub query: String,
280    /// Columns to search in.
281    pub columns: Vec<String>,
282    /// Search mode.
283    pub mode: SearchMode,
284    /// Language/configuration.
285    pub language: SearchLanguage,
286    /// Ranking options.
287    pub ranking: RankingOptions,
288    /// Highlighting options.
289    pub highlight: HighlightOptions,
290    /// Fuzzy search options.
291    pub fuzzy: FuzzyOptions,
292    /// Minimum word length.
293    pub min_word_length: Option<u32>,
294    /// Filter by category/field (for faceted search).
295    pub filters: Vec<(String, String)>,
296}
297
298impl SearchQuery {
299    /// Create a new search query.
300    pub fn new(query: impl Into<String>) -> SearchQueryBuilder {
301        SearchQueryBuilder::new(query)
302    }
303
304    /// Generate PostgreSQL full-text search SQL.
305    pub fn to_postgres_sql(&self, table: &str) -> QueryResult<SearchSql> {
306        let config = self.language.to_postgres_config();
307
308        // Build tsvector expression
309        let tsvector = if self.columns.len() == 1 {
310            format!("to_tsvector('{}', {})", config, self.columns[0])
311        } else {
312            let concat_cols = self.columns.join(" || ' ' || ");
313            format!("to_tsvector('{}', {})", config, concat_cols)
314        };
315
316        // Build tsquery expression
317        let words: Vec<&str> = self.query.split_whitespace().collect();
318        let tsquery_parts: Vec<String> = words
319            .iter()
320            .map(|w| format!("'{}'", w.replace('\'', "''")))
321            .collect();
322        let tsquery = format!(
323            "to_tsquery('{}', '{}')",
324            config,
325            tsquery_parts.join(self.mode.to_postgres_operator())
326        );
327
328        // Build WHERE clause
329        let where_clause = format!("{} @@ {}", tsvector, tsquery);
330
331        // Build SELECT columns
332        let mut select_cols = vec!["*".to_string()];
333
334        // Add ranking
335        if self.ranking.enabled {
336            let weights = if self.ranking.weights.is_empty() {
337                String::new()
338            } else {
339                // PostgreSQL uses setweight for field weights
340                String::new()
341            };
342            select_cols.push(format!(
343                "ts_rank({}{}, {}) AS {}",
344                tsvector, weights, tsquery, self.ranking.score_alias
345            ));
346        }
347
348        // Add highlighting
349        if self.highlight.enabled && !self.columns.is_empty() {
350            let col = &self.columns[0];
351            select_cols.push(format!(
352                "ts_headline('{}', {}, {}, 'StartSel={}, StopSel={}, MaxWords={}, MaxFragments={}') AS highlighted",
353                config,
354                col,
355                tsquery,
356                self.highlight.start_tag,
357                self.highlight.end_tag,
358                self.highlight.max_length.unwrap_or(35),
359                self.highlight.max_fragments.unwrap_or(3)
360            ));
361        }
362
363        let sql = format!(
364            "SELECT {} FROM {} WHERE {}",
365            select_cols.join(", "),
366            table,
367            where_clause
368        );
369
370        let order_by = if self.ranking.enabled {
371            Some(format!("{} DESC", self.ranking.score_alias))
372        } else {
373            None
374        };
375
376        Ok(SearchSql {
377            sql,
378            order_by,
379            params: vec![],
380        })
381    }
382
383    /// Generate MySQL full-text search SQL.
384    pub fn to_mysql_sql(&self, table: &str) -> QueryResult<SearchSql> {
385        let columns = self.columns.join(", ");
386
387        // MySQL MATCH ... AGAINST syntax
388        let match_mode = match self.mode {
389            SearchMode::Natural | SearchMode::Any => "",
390            SearchMode::Boolean | SearchMode::All => " IN BOOLEAN MODE",
391            SearchMode::Phrase => " IN BOOLEAN MODE", // Use quotes for phrase
392        };
393
394        let search_query = if self.mode == SearchMode::Phrase {
395            format!("\"{}\"", self.query)
396        } else if self.mode == SearchMode::All {
397            // Add + prefix for required terms
398            self.query
399                .split_whitespace()
400                .map(|w| format!("+{}", w))
401                .collect::<Vec<_>>()
402                .join(" ")
403        } else {
404            self.query.clone()
405        };
406
407        let match_expr = format!(
408            "MATCH({}) AGAINST('{}'{}))",
409            columns, search_query, match_mode
410        );
411
412        let mut select_cols = vec!["*".to_string()];
413
414        // Add ranking (MySQL returns relevance from MATCH)
415        if self.ranking.enabled {
416            select_cols.push(format!("{} AS {}", match_expr, self.ranking.score_alias));
417        }
418
419        let sql = format!(
420            "SELECT {} FROM {} WHERE {}",
421            select_cols.join(", "),
422            table,
423            match_expr
424        );
425
426        let order_by = if self.ranking.enabled {
427            Some(format!("{} DESC", self.ranking.score_alias))
428        } else {
429            None
430        };
431
432        Ok(SearchSql {
433            sql,
434            order_by,
435            params: vec![],
436        })
437    }
438
439    /// Generate SQLite FTS5 search SQL.
440    pub fn to_sqlite_sql(&self, table: &str, fts_table: &str) -> QueryResult<SearchSql> {
441        let search_query = match self.mode {
442            SearchMode::Phrase => format!("\"{}\"", self.query),
443            SearchMode::All => self
444                .query
445                .split_whitespace()
446                .collect::<Vec<_>>()
447                .join(" AND "),
448            SearchMode::Any => self
449                .query
450                .split_whitespace()
451                .collect::<Vec<_>>()
452                .join(" OR "),
453            _ => self.query.clone(),
454        };
455
456        let mut select_cols = vec![format!("{}.*", table)];
457
458        // Add ranking (SQLite uses bm25)
459        if self.ranking.enabled {
460            select_cols.push(format!(
461                "bm25({}) AS {}",
462                fts_table, self.ranking.score_alias
463            ));
464        }
465
466        // Add highlighting
467        if self.highlight.enabled && !self.columns.is_empty() {
468            select_cols.push(format!(
469                "highlight({}, 0, '{}', '{}') AS highlighted",
470                fts_table, self.highlight.start_tag, self.highlight.end_tag
471            ));
472        }
473
474        let sql = format!(
475            "SELECT {} FROM {} JOIN {} ON {}.rowid = {}.rowid WHERE {} MATCH '{}'",
476            select_cols.join(", "),
477            table,
478            fts_table,
479            table,
480            fts_table,
481            fts_table,
482            search_query
483        );
484
485        let order_by = if self.ranking.enabled {
486            Some(format!("{}", self.ranking.score_alias))
487        } else {
488            None
489        };
490
491        Ok(SearchSql {
492            sql,
493            order_by,
494            params: vec![],
495        })
496    }
497
498    /// Generate MSSQL full-text search SQL.
499    pub fn to_mssql_sql(&self, table: &str) -> QueryResult<SearchSql> {
500        let columns = self.columns.join(", ");
501
502        let contains_expr = match self.mode {
503            SearchMode::Phrase => format!("\"{}\"", self.query),
504            SearchMode::All => {
505                let terms: Vec<String> = self
506                    .query
507                    .split_whitespace()
508                    .map(|w| format!("\"{}\"", w))
509                    .collect();
510                terms.join(" AND ")
511            }
512            SearchMode::Any | SearchMode::Natural => {
513                let terms: Vec<String> = self
514                    .query
515                    .split_whitespace()
516                    .map(|w| format!("\"{}\"", w))
517                    .collect();
518                terms.join(" OR ")
519            }
520            SearchMode::Boolean => self.query.clone(),
521        };
522
523        let select_cols = vec!["*".to_string()];
524
525        // Add ranking (MSSQL uses CONTAINSTABLE for ranking)
526        if self.ranking.enabled {
527            let sql = format!(
528                "SELECT {}.*, ft.RANK AS {} FROM {} \
529                 INNER JOIN CONTAINSTABLE({}, ({}), '{}') AS ft \
530                 ON {}.id = ft.[KEY]",
531                table, self.ranking.score_alias, table, table, columns, contains_expr, table
532            );
533
534            return Ok(SearchSql {
535                sql,
536                order_by: Some(format!("{} DESC", self.ranking.score_alias)),
537                params: vec![],
538            });
539        }
540
541        let sql = format!(
542            "SELECT {} FROM {} WHERE CONTAINS(({}), '{}')",
543            select_cols.join(", "),
544            table,
545            columns,
546            contains_expr
547        );
548
549        Ok(SearchSql {
550            sql,
551            order_by: None,
552            params: vec![],
553        })
554    }
555
556    /// Generate search SQL for the specified database type.
557    pub fn to_sql(&self, table: &str, db_type: DatabaseType) -> QueryResult<SearchSql> {
558        match db_type {
559            DatabaseType::PostgreSQL => self.to_postgres_sql(table),
560            DatabaseType::MySQL => self.to_mysql_sql(table),
561            DatabaseType::SQLite => self.to_sqlite_sql(table, &format!("{}_fts", table)),
562            DatabaseType::MSSQL => self.to_mssql_sql(table),
563        }
564    }
565}
566
567/// Builder for search queries.
568#[derive(Debug, Clone)]
569pub struct SearchQueryBuilder {
570    query: String,
571    columns: Vec<String>,
572    mode: SearchMode,
573    language: SearchLanguage,
574    ranking: RankingOptions,
575    highlight: HighlightOptions,
576    fuzzy: FuzzyOptions,
577    min_word_length: Option<u32>,
578    filters: Vec<(String, String)>,
579}
580
581impl SearchQueryBuilder {
582    /// Create a new search query builder.
583    pub fn new(query: impl Into<String>) -> Self {
584        Self {
585            query: query.into(),
586            columns: Vec::new(),
587            mode: SearchMode::default(),
588            language: SearchLanguage::default(),
589            ranking: RankingOptions::default(),
590            highlight: HighlightOptions::default(),
591            fuzzy: FuzzyOptions::default(),
592            min_word_length: None,
593            filters: Vec::new(),
594        }
595    }
596
597    /// Add a column to search.
598    pub fn column(mut self, column: impl Into<String>) -> Self {
599        self.columns.push(column.into());
600        self
601    }
602
603    /// Add multiple columns to search.
604    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
605        self.columns.extend(columns.into_iter().map(Into::into));
606        self
607    }
608
609    /// Set the search mode.
610    pub fn mode(mut self, mode: SearchMode) -> Self {
611        self.mode = mode;
612        self
613    }
614
615    /// Set to match all words.
616    pub fn match_all(self) -> Self {
617        self.mode(SearchMode::All)
618    }
619
620    /// Set to match any word.
621    pub fn match_any(self) -> Self {
622        self.mode(SearchMode::Any)
623    }
624
625    /// Set to match exact phrase.
626    pub fn phrase(self) -> Self {
627        self.mode(SearchMode::Phrase)
628    }
629
630    /// Set to boolean mode.
631    pub fn boolean(self) -> Self {
632        self.mode(SearchMode::Boolean)
633    }
634
635    /// Set the search language.
636    pub fn language(mut self, language: SearchLanguage) -> Self {
637        self.language = language;
638        self
639    }
640
641    /// Enable ranking with default options.
642    pub fn with_ranking(mut self) -> Self {
643        self.ranking.enabled = true;
644        self
645    }
646
647    /// Configure ranking options.
648    pub fn ranking(mut self, options: RankingOptions) -> Self {
649        self.ranking = options;
650        self
651    }
652
653    /// Enable highlighting with default options.
654    pub fn with_highlight(mut self) -> Self {
655        self.highlight.enabled = true;
656        self
657    }
658
659    /// Configure highlighting options.
660    pub fn highlight(mut self, options: HighlightOptions) -> Self {
661        self.highlight = options;
662        self
663    }
664
665    /// Enable fuzzy matching with default options.
666    pub fn with_fuzzy(mut self) -> Self {
667        self.fuzzy.enabled = true;
668        self
669    }
670
671    /// Configure fuzzy search options.
672    pub fn fuzzy(mut self, options: FuzzyOptions) -> Self {
673        self.fuzzy = options;
674        self
675    }
676
677    /// Set minimum word length.
678    pub fn min_word_length(mut self, length: u32) -> Self {
679        self.min_word_length = Some(length);
680        self
681    }
682
683    /// Add a filter for faceted search.
684    pub fn filter(mut self, field: impl Into<String>, value: impl Into<String>) -> Self {
685        self.filters.push((field.into(), value.into()));
686        self
687    }
688
689    /// Build the search query.
690    pub fn build(self) -> SearchQuery {
691        SearchQuery {
692            query: self.query,
693            columns: self.columns,
694            mode: self.mode,
695            language: self.language,
696            ranking: self.ranking,
697            highlight: self.highlight,
698            fuzzy: self.fuzzy,
699            min_word_length: self.min_word_length,
700            filters: self.filters,
701        }
702    }
703}
704
705/// Generated search SQL.
706#[derive(Debug, Clone)]
707pub struct SearchSql {
708    /// The main SQL query.
709    pub sql: String,
710    /// Optional ORDER BY clause.
711    pub order_by: Option<String>,
712    /// Query parameters.
713    pub params: Vec<String>,
714}
715
716/// Full-text index definition.
717#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
718pub struct FullTextIndex {
719    /// Index name.
720    pub name: String,
721    /// Table name.
722    pub table: String,
723    /// Columns in the index.
724    pub columns: Vec<String>,
725    /// Language/configuration.
726    pub language: SearchLanguage,
727    /// Index type (for MySQL: FULLTEXT).
728    pub index_type: Option<String>,
729}
730
731impl FullTextIndex {
732    /// Create a new full-text index builder.
733    pub fn builder(name: impl Into<String>) -> FullTextIndexBuilder {
734        FullTextIndexBuilder::new(name)
735    }
736
737    /// Generate PostgreSQL CREATE INDEX SQL.
738    pub fn to_postgres_sql(&self) -> String {
739        let config = self.language.to_postgres_config();
740        let columns_expr = if self.columns.len() == 1 {
741            format!("to_tsvector('{}', {})", config, self.columns[0])
742        } else {
743            let concat = self.columns.join(" || ' ' || ");
744            format!("to_tsvector('{}', {})", config, concat)
745        };
746
747        format!(
748            "CREATE INDEX {} ON {} USING GIN ({});",
749            self.name, self.table, columns_expr
750        )
751    }
752
753    /// Generate MySQL CREATE INDEX SQL.
754    pub fn to_mysql_sql(&self) -> String {
755        format!(
756            "CREATE FULLTEXT INDEX {} ON {} ({});",
757            self.name,
758            self.table,
759            self.columns.join(", ")
760        )
761    }
762
763    /// Generate SQLite FTS5 virtual table SQL.
764    pub fn to_sqlite_sql(&self) -> String {
765        let tokenizer = self.language.to_sqlite_tokenizer();
766        format!(
767            "CREATE VIRTUAL TABLE {}_fts USING fts5({}, content='{}', tokenize='{}');",
768            self.table,
769            self.columns.join(", "),
770            self.table,
771            tokenizer
772        )
773    }
774
775    /// Generate MSSQL full-text catalog and index SQL.
776    pub fn to_mssql_sql(&self, catalog_name: &str) -> Vec<String> {
777        vec![
778            format!("CREATE FULLTEXT CATALOG {} AS DEFAULT;", catalog_name),
779            format!(
780                "CREATE FULLTEXT INDEX ON {} ({}) KEY INDEX PK_{} ON {};",
781                self.table,
782                self.columns.join(", "),
783                self.table,
784                catalog_name
785            ),
786        ]
787    }
788
789    /// Generate index SQL for the specified database type.
790    pub fn to_sql(&self, db_type: DatabaseType) -> QueryResult<Vec<String>> {
791        match db_type {
792            DatabaseType::PostgreSQL => Ok(vec![self.to_postgres_sql()]),
793            DatabaseType::MySQL => Ok(vec![self.to_mysql_sql()]),
794            DatabaseType::SQLite => Ok(vec![self.to_sqlite_sql()]),
795            DatabaseType::MSSQL => Ok(self.to_mssql_sql(&format!("{}_catalog", self.table))),
796        }
797    }
798
799    /// Generate DROP INDEX SQL.
800    pub fn to_drop_sql(&self, db_type: DatabaseType) -> QueryResult<String> {
801        match db_type {
802            DatabaseType::PostgreSQL => Ok(format!("DROP INDEX IF EXISTS {};", self.name)),
803            DatabaseType::MySQL => Ok(format!("DROP INDEX {} ON {};", self.name, self.table)),
804            DatabaseType::SQLite => Ok(format!("DROP TABLE IF EXISTS {}_fts;", self.table)),
805            DatabaseType::MSSQL => Ok(format!(
806                "DROP FULLTEXT INDEX ON {}; DROP FULLTEXT CATALOG {}_catalog;",
807                self.table, self.table
808            )),
809        }
810    }
811}
812
813/// Builder for full-text indexes.
814#[derive(Debug, Clone)]
815pub struct FullTextIndexBuilder {
816    name: String,
817    table: Option<String>,
818    columns: Vec<String>,
819    language: SearchLanguage,
820    index_type: Option<String>,
821}
822
823impl FullTextIndexBuilder {
824    /// Create a new builder.
825    pub fn new(name: impl Into<String>) -> Self {
826        Self {
827            name: name.into(),
828            table: None,
829            columns: Vec::new(),
830            language: SearchLanguage::default(),
831            index_type: None,
832        }
833    }
834
835    /// Set the table name.
836    pub fn on_table(mut self, table: impl Into<String>) -> Self {
837        self.table = Some(table.into());
838        self
839    }
840
841    /// Add a column.
842    pub fn column(mut self, column: impl Into<String>) -> Self {
843        self.columns.push(column.into());
844        self
845    }
846
847    /// Add multiple columns.
848    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
849        self.columns.extend(columns.into_iter().map(Into::into));
850        self
851    }
852
853    /// Set the language.
854    pub fn language(mut self, language: SearchLanguage) -> Self {
855        self.language = language;
856        self
857    }
858
859    /// Build the index definition.
860    pub fn build(self) -> QueryResult<FullTextIndex> {
861        let table = self.table.ok_or_else(|| {
862            QueryError::invalid_input("table", "Must specify table with on_table()")
863        })?;
864
865        if self.columns.is_empty() {
866            return Err(QueryError::invalid_input(
867                "columns",
868                "Must specify at least one column",
869            ));
870        }
871
872        Ok(FullTextIndex {
873            name: self.name,
874            table,
875            columns: self.columns,
876            language: self.language,
877            index_type: self.index_type,
878        })
879    }
880}
881
882/// MongoDB Atlas Search support.
883pub mod mongodb {
884    use serde::{Deserialize, Serialize};
885
886    /// Atlas Search index definition.
887    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
888    pub struct AtlasSearchIndex {
889        /// Index name.
890        pub name: String,
891        /// Collection name.
892        pub collection: String,
893        /// Analyzer to use.
894        pub analyzer: String,
895        /// Field mappings.
896        pub mappings: SearchMappings,
897    }
898
899    /// Field mappings for Atlas Search.
900    #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
901    pub struct SearchMappings {
902        /// Whether to dynamically map fields.
903        pub dynamic: bool,
904        /// Explicit field definitions.
905        pub fields: Vec<SearchField>,
906    }
907
908    /// A searchable field definition.
909    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
910    pub struct SearchField {
911        /// Field path.
912        pub path: String,
913        /// Field type.
914        pub field_type: SearchFieldType,
915        /// Analyzer for text fields.
916        pub analyzer: Option<String>,
917        /// Whether to store for faceting.
918        pub facet: bool,
919    }
920
921    /// Atlas Search field types.
922    #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
923    pub enum SearchFieldType {
924        /// String/text field.
925        String,
926        /// Number field.
927        Number,
928        /// Date field.
929        Date,
930        /// Boolean field.
931        Boolean,
932        /// ObjectId field.
933        ObjectId,
934        /// Geo field.
935        Geo,
936        /// Autocomplete field.
937        Autocomplete,
938    }
939
940    impl SearchFieldType {
941        /// Get the Atlas Search type name.
942        pub fn as_str(&self) -> &'static str {
943            match self {
944                Self::String => "string",
945                Self::Number => "number",
946                Self::Date => "date",
947                Self::Boolean => "boolean",
948                Self::ObjectId => "objectId",
949                Self::Geo => "geo",
950                Self::Autocomplete => "autocomplete",
951            }
952        }
953    }
954
955    /// Atlas Search query builder.
956    #[derive(Debug, Clone, Default)]
957    pub struct AtlasSearchQuery {
958        /// Search text.
959        pub query: String,
960        /// Fields to search.
961        pub path: Vec<String>,
962        /// Fuzzy options.
963        pub fuzzy: Option<FuzzyConfig>,
964        /// Score options.
965        pub score: Option<ScoreConfig>,
966        /// Highlight options.
967        pub highlight: Option<HighlightConfig>,
968    }
969
970    /// Fuzzy search configuration.
971    #[derive(Debug, Clone, Serialize, Deserialize)]
972    pub struct FuzzyConfig {
973        /// Maximum edits.
974        pub max_edits: u32,
975        /// Prefix length.
976        pub prefix_length: u32,
977        /// Max expansions.
978        pub max_expansions: u32,
979    }
980
981    impl Default for FuzzyConfig {
982        fn default() -> Self {
983            Self {
984                max_edits: 2,
985                prefix_length: 0,
986                max_expansions: 50,
987            }
988        }
989    }
990
991    /// Score configuration.
992    #[derive(Debug, Clone, Default, Serialize, Deserialize)]
993    pub struct ScoreConfig {
994        /// Boost factor.
995        pub boost: Option<f64>,
996        /// Score function.
997        pub function: Option<String>,
998    }
999
1000    /// Highlight configuration.
1001    #[derive(Debug, Clone, Serialize, Deserialize)]
1002    pub struct HighlightConfig {
1003        /// Path to highlight.
1004        pub path: String,
1005        /// Max characters per highlight.
1006        pub max_chars_to_examine: u32,
1007        /// Max number of highlights.
1008        pub max_num_passages: u32,
1009    }
1010
1011    impl Default for HighlightConfig {
1012        fn default() -> Self {
1013            Self {
1014                path: String::new(),
1015                max_chars_to_examine: 500_000,
1016                max_num_passages: 5,
1017            }
1018        }
1019    }
1020
1021    impl AtlasSearchQuery {
1022        /// Create a new search query.
1023        pub fn new(query: impl Into<String>) -> Self {
1024            Self {
1025                query: query.into(),
1026                ..Default::default()
1027            }
1028        }
1029
1030        /// Add a field to search.
1031        pub fn path(mut self, path: impl Into<String>) -> Self {
1032            self.path.push(path.into());
1033            self
1034        }
1035
1036        /// Add multiple fields to search.
1037        pub fn paths(mut self, paths: impl IntoIterator<Item = impl Into<String>>) -> Self {
1038            self.path.extend(paths.into_iter().map(Into::into));
1039            self
1040        }
1041
1042        /// Enable fuzzy matching.
1043        pub fn fuzzy(mut self, config: FuzzyConfig) -> Self {
1044            self.fuzzy = Some(config);
1045            self
1046        }
1047
1048        /// Set score boost.
1049        pub fn boost(mut self, factor: f64) -> Self {
1050            self.score = Some(ScoreConfig {
1051                boost: Some(factor),
1052                function: None,
1053            });
1054            self
1055        }
1056
1057        /// Enable highlighting.
1058        pub fn highlight(mut self, path: impl Into<String>) -> Self {
1059            self.highlight = Some(HighlightConfig {
1060                path: path.into(),
1061                ..Default::default()
1062            });
1063            self
1064        }
1065
1066        /// Build the $search aggregation stage.
1067        pub fn to_search_stage(&self) -> serde_json::Value {
1068            let mut text = serde_json::json!({
1069                "query": self.query,
1070                "path": if self.path.len() == 1 {
1071                    serde_json::Value::String(self.path[0].clone())
1072                } else {
1073                    serde_json::Value::Array(self.path.iter().map(|p| serde_json::Value::String(p.clone())).collect())
1074                }
1075            });
1076
1077            if let Some(ref fuzzy) = self.fuzzy {
1078                text["fuzzy"] = serde_json::json!({
1079                    "maxEdits": fuzzy.max_edits,
1080                    "prefixLength": fuzzy.prefix_length,
1081                    "maxExpansions": fuzzy.max_expansions
1082                });
1083            }
1084
1085            let mut search = serde_json::json!({
1086                "$search": {
1087                    "text": text
1088                }
1089            });
1090
1091            if let Some(ref hl) = self.highlight {
1092                search["$search"]["highlight"] = serde_json::json!({
1093                    "path": hl.path,
1094                    "maxCharsToExamine": hl.max_chars_to_examine,
1095                    "maxNumPassages": hl.max_num_passages
1096                });
1097            }
1098
1099            search
1100        }
1101
1102        /// Build the aggregation pipeline for search.
1103        pub fn to_pipeline(&self) -> Vec<serde_json::Value> {
1104            let mut pipeline = vec![self.to_search_stage()];
1105
1106            // Add score metadata
1107            pipeline.push(serde_json::json!({
1108                "$addFields": {
1109                    "score": { "$meta": "searchScore" }
1110                }
1111            }));
1112
1113            // Add highlights if enabled
1114            if self.highlight.is_some() {
1115                pipeline.push(serde_json::json!({
1116                    "$addFields": {
1117                        "highlights": { "$meta": "searchHighlights" }
1118                    }
1119                }));
1120            }
1121
1122            pipeline
1123        }
1124    }
1125
1126    /// Builder for Atlas Search index.
1127    #[derive(Debug, Clone, Default)]
1128    pub struct AtlasSearchIndexBuilder {
1129        name: String,
1130        collection: Option<String>,
1131        analyzer: String,
1132        dynamic: bool,
1133        fields: Vec<SearchField>,
1134    }
1135
1136    impl AtlasSearchIndexBuilder {
1137        /// Create a new builder.
1138        pub fn new(name: impl Into<String>) -> Self {
1139            Self {
1140                name: name.into(),
1141                analyzer: "lucene.standard".to_string(),
1142                ..Default::default()
1143            }
1144        }
1145
1146        /// Set the collection.
1147        pub fn collection(mut self, collection: impl Into<String>) -> Self {
1148            self.collection = Some(collection.into());
1149            self
1150        }
1151
1152        /// Set the analyzer.
1153        pub fn analyzer(mut self, analyzer: impl Into<String>) -> Self {
1154            self.analyzer = analyzer.into();
1155            self
1156        }
1157
1158        /// Enable dynamic mapping.
1159        pub fn dynamic(mut self) -> Self {
1160            self.dynamic = true;
1161            self
1162        }
1163
1164        /// Add a text field.
1165        pub fn text_field(mut self, path: impl Into<String>) -> Self {
1166            self.fields.push(SearchField {
1167                path: path.into(),
1168                field_type: SearchFieldType::String,
1169                analyzer: None,
1170                facet: false,
1171            });
1172            self
1173        }
1174
1175        /// Add a faceted field.
1176        pub fn facet_field(mut self, path: impl Into<String>, field_type: SearchFieldType) -> Self {
1177            self.fields.push(SearchField {
1178                path: path.into(),
1179                field_type,
1180                analyzer: None,
1181                facet: true,
1182            });
1183            self
1184        }
1185
1186        /// Add an autocomplete field.
1187        pub fn autocomplete_field(mut self, path: impl Into<String>) -> Self {
1188            self.fields.push(SearchField {
1189                path: path.into(),
1190                field_type: SearchFieldType::Autocomplete,
1191                analyzer: None,
1192                facet: false,
1193            });
1194            self
1195        }
1196
1197        /// Build the index definition.
1198        pub fn build(self) -> serde_json::Value {
1199            let mut fields = serde_json::Map::new();
1200
1201            for field in &self.fields {
1202                let mut field_def = serde_json::json!({
1203                    "type": field.field_type.as_str()
1204                });
1205
1206                if let Some(ref analyzer) = field.analyzer {
1207                    field_def["analyzer"] = serde_json::Value::String(analyzer.clone());
1208                }
1209
1210                fields.insert(field.path.clone(), field_def);
1211            }
1212
1213            serde_json::json!({
1214                "name": self.name,
1215                "analyzer": self.analyzer,
1216                "mappings": {
1217                    "dynamic": self.dynamic,
1218                    "fields": fields
1219                }
1220            })
1221        }
1222    }
1223
1224    /// Helper to create a search query.
1225    pub fn search(query: impl Into<String>) -> AtlasSearchQuery {
1226        AtlasSearchQuery::new(query)
1227    }
1228
1229    /// Helper to create a search index builder.
1230    pub fn search_index(name: impl Into<String>) -> AtlasSearchIndexBuilder {
1231        AtlasSearchIndexBuilder::new(name)
1232    }
1233}
1234
1235#[cfg(test)]
1236mod tests {
1237    use super::*;
1238
1239    #[test]
1240    fn test_search_query_builder() {
1241        let search = SearchQuery::new("rust async")
1242            .columns(["title", "body"])
1243            .match_all()
1244            .with_ranking()
1245            .build();
1246
1247        assert_eq!(search.query, "rust async");
1248        assert_eq!(search.columns, vec!["title", "body"]);
1249        assert_eq!(search.mode, SearchMode::All);
1250        assert!(search.ranking.enabled);
1251    }
1252
1253    #[test]
1254    fn test_postgres_search_sql() {
1255        let search = SearchQuery::new("rust programming")
1256            .column("content")
1257            .with_ranking()
1258            .build();
1259
1260        let sql = search.to_postgres_sql("posts").unwrap();
1261        assert!(sql.sql.contains("to_tsvector"));
1262        assert!(sql.sql.contains("to_tsquery"));
1263        assert!(sql.sql.contains("ts_rank"));
1264        assert!(sql.sql.contains("@@"));
1265    }
1266
1267    #[test]
1268    fn test_mysql_search_sql() {
1269        let search = SearchQuery::new("database performance")
1270            .columns(["title", "body"])
1271            .match_any()
1272            .build();
1273
1274        let sql = search.to_mysql_sql("articles").unwrap();
1275        assert!(sql.sql.contains("MATCH"));
1276        assert!(sql.sql.contains("AGAINST"));
1277    }
1278
1279    #[test]
1280    fn test_sqlite_search_sql() {
1281        let search = SearchQuery::new("web development")
1282            .column("content")
1283            .with_ranking()
1284            .build();
1285
1286        let sql = search.to_sqlite_sql("posts", "posts_fts").unwrap();
1287        assert!(sql.sql.contains("MATCH"));
1288        assert!(sql.sql.contains("bm25"));
1289    }
1290
1291    #[test]
1292    fn test_mssql_search_sql() {
1293        let search = SearchQuery::new("machine learning")
1294            .columns(["title", "abstract"])
1295            .phrase()
1296            .build();
1297
1298        let sql = search.to_mssql_sql("papers").unwrap();
1299        assert!(sql.sql.contains("CONTAINS"));
1300    }
1301
1302    #[test]
1303    fn test_mssql_ranked_search() {
1304        let search = SearchQuery::new("neural network")
1305            .column("content")
1306            .with_ranking()
1307            .build();
1308
1309        let sql = search.to_mssql_sql("papers").unwrap();
1310        assert!(sql.sql.contains("CONTAINSTABLE"));
1311        assert!(sql.sql.contains("RANK"));
1312    }
1313
1314    #[test]
1315    fn test_fulltext_index_postgres() {
1316        let index = FullTextIndex::builder("posts_search_idx")
1317            .on_table("posts")
1318            .columns(["title", "body"])
1319            .language(SearchLanguage::English)
1320            .build()
1321            .unwrap();
1322
1323        let sql = index.to_postgres_sql();
1324        assert!(sql.contains("CREATE INDEX posts_search_idx"));
1325        assert!(sql.contains("USING GIN"));
1326        assert!(sql.contains("to_tsvector"));
1327    }
1328
1329    #[test]
1330    fn test_fulltext_index_mysql() {
1331        let index = FullTextIndex::builder("posts_fulltext")
1332            .on_table("posts")
1333            .columns(["title", "body"])
1334            .build()
1335            .unwrap();
1336
1337        let sql = index.to_mysql_sql();
1338        assert_eq!(
1339            sql,
1340            "CREATE FULLTEXT INDEX posts_fulltext ON posts (title, body);"
1341        );
1342    }
1343
1344    #[test]
1345    fn test_fulltext_index_sqlite() {
1346        let index = FullTextIndex::builder("posts_fts")
1347            .on_table("posts")
1348            .columns(["title", "content"])
1349            .build()
1350            .unwrap();
1351
1352        let sql = index.to_sqlite_sql();
1353        assert!(sql.contains("CREATE VIRTUAL TABLE"));
1354        assert!(sql.contains("USING fts5"));
1355    }
1356
1357    #[test]
1358    fn test_highlight_options() {
1359        let opts = HighlightOptions::default()
1360            .enabled()
1361            .tags("<mark>", "</mark>")
1362            .max_length(200)
1363            .max_fragments(5);
1364
1365        assert!(opts.enabled);
1366        assert_eq!(opts.start_tag, "<mark>");
1367        assert_eq!(opts.end_tag, "</mark>");
1368        assert_eq!(opts.max_length, Some(200));
1369    }
1370
1371    #[test]
1372    fn test_fuzzy_options() {
1373        let opts = FuzzyOptions::default()
1374            .enabled()
1375            .max_edits(1)
1376            .threshold(0.5);
1377
1378        assert!(opts.enabled);
1379        assert_eq!(opts.max_edits, 1);
1380        assert_eq!(opts.threshold, 0.5);
1381    }
1382
1383    #[test]
1384    fn test_ranking_with_weights() {
1385        let opts = RankingOptions::default()
1386            .enabled()
1387            .alias("relevance")
1388            .weight("title", 2.0)
1389            .weight("body", 1.0);
1390
1391        assert_eq!(opts.score_alias, "relevance");
1392        assert_eq!(opts.weights.len(), 2);
1393    }
1394
1395    mod mongodb_tests {
1396        use super::super::mongodb::*;
1397
1398        #[test]
1399        fn test_atlas_search_query() {
1400            let query = search("rust async")
1401                .paths(["title", "body"])
1402                .fuzzy(FuzzyConfig::default())
1403                .boost(2.0);
1404
1405            let stage = query.to_search_stage();
1406            assert!(stage["$search"]["text"]["query"].is_string());
1407        }
1408
1409        #[test]
1410        fn test_atlas_search_pipeline() {
1411            let query = search("database").path("content").highlight("content");
1412
1413            let pipeline = query.to_pipeline();
1414            assert!(pipeline.len() >= 2);
1415            assert!(pipeline[0]["$search"].is_object());
1416        }
1417
1418        #[test]
1419        fn test_atlas_search_index_builder() {
1420            let index = search_index("default")
1421                .collection("posts")
1422                .analyzer("lucene.english")
1423                .dynamic()
1424                .text_field("title")
1425                .text_field("body")
1426                .facet_field("category", SearchFieldType::String)
1427                .build();
1428
1429            assert!(index["name"].is_string());
1430            assert!(index["mappings"]["dynamic"].as_bool().unwrap());
1431        }
1432    }
1433}