Skip to main content

prax_query/
search.rs

1//! Full-text search support across database backends.
2//!
3//! This module provides a unified API for full-text search across different
4//! database backends, abstracting over their specific implementations.
5//!
6//! # Supported Features
7//!
8//! | Feature          | PostgreSQL   | MySQL      | SQLite  | MSSQL   | MongoDB      |
9//! |------------------|--------------|------------|---------|---------|--------------|
10//! | Full-Text Index  | ✅ tsvector  | ✅ FULLTEXT| ✅ FTS5 | ✅      | ✅ Atlas     |
11//! | Search Ranking   | ✅ ts_rank   | ✅         | ✅ bm25 | ✅ RANK | ✅ score     |
12//! | Phrase Search    | ✅           | ✅         | ✅      | ✅      | ✅           |
13//! | Faceted Search   | ✅           | ❌         | ❌      | ❌      | ✅           |
14//! | Fuzzy Search     | ✅ pg_trgm   | ❌         | ❌      | ✅      | ✅           |
15//! | Highlighting     | ✅ ts_headline| ❌        | ✅ highlight| ❌  | ✅ highlight |
16//!
17//! # Example Usage
18//!
19//! ```rust,ignore
20//! use prax_query::search::{SearchQuery, SearchOptions};
21//!
22//! // Simple search
23//! let search = SearchQuery::new("rust async programming")
24//!     .columns(["title", "body"])
25//!     .with_ranking()
26//!     .build();
27//!
28//! // Generate SQL
29//! let sql = search.to_postgres_sql("posts")?;
30//! ```
31
32use std::borrow::Cow;
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{QueryError, QueryResult};
37use crate::sql::DatabaseType;
38
39/// Full-text search mode/operator.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
41pub enum SearchMode {
42    /// Match any word (OR).
43    #[default]
44    Any,
45    /// Match all words (AND).
46    All,
47    /// Match exact phrase.
48    Phrase,
49    /// Boolean mode with operators (+, -, *).
50    Boolean,
51    /// Natural language mode.
52    Natural,
53}
54
55impl SearchMode {
56    /// Convert to PostgreSQL tsquery format.
57    pub fn to_postgres_operator(&self) -> &'static str {
58        match self {
59            Self::Any | Self::Natural => " | ",
60            Self::All | Self::Boolean => " & ",
61            Self::Phrase => " <-> ",
62        }
63    }
64}
65
66/// Text search language/configuration.
67#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
68pub enum SearchLanguage {
69    /// Simple (no stemming).
70    Simple,
71    /// English.
72    #[default]
73    English,
74    /// Spanish.
75    Spanish,
76    /// French.
77    French,
78    /// German.
79    German,
80    /// Custom language/configuration name.
81    Custom(String),
82}
83
84impl SearchLanguage {
85    /// Get the PostgreSQL text search configuration name.
86    pub fn to_postgres_config(&self) -> Cow<'static, str> {
87        match self {
88            Self::Simple => Cow::Borrowed("simple"),
89            Self::English => Cow::Borrowed("english"),
90            Self::Spanish => Cow::Borrowed("spanish"),
91            Self::French => Cow::Borrowed("french"),
92            Self::German => Cow::Borrowed("german"),
93            Self::Custom(name) => Cow::Owned(name.clone()),
94        }
95    }
96
97    /// Get the SQLite FTS5 tokenizer.
98    pub fn to_sqlite_tokenizer(&self) -> &'static str {
99        match self {
100            Self::Simple => "unicode61",
101            Self::English => "porter unicode61",
102            _ => "unicode61", // SQLite has limited language support
103        }
104    }
105}
106
107/// Ranking/scoring options for search results.
108#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
109pub struct RankingOptions {
110    /// Whether to include ranking score.
111    pub enabled: bool,
112    /// Column alias for the score.
113    pub score_alias: String,
114    /// Normalization option (PostgreSQL-specific).
115    pub normalization: u32,
116    /// Field weights (field_name -> weight).
117    pub weights: Vec<(String, f32)>,
118}
119
120impl Default for RankingOptions {
121    fn default() -> Self {
122        Self {
123            enabled: false,
124            score_alias: "search_score".to_string(),
125            normalization: 0,
126            weights: Vec::new(),
127        }
128    }
129}
130
131impl RankingOptions {
132    /// Enable ranking.
133    pub fn enabled(mut self) -> Self {
134        self.enabled = true;
135        self
136    }
137
138    /// Set the score column alias.
139    pub fn alias(mut self, alias: impl Into<String>) -> Self {
140        self.score_alias = alias.into();
141        self
142    }
143
144    /// Set PostgreSQL normalization option.
145    pub fn normalization(mut self, norm: u32) -> Self {
146        self.normalization = norm;
147        self
148    }
149
150    /// Add a field weight.
151    pub fn weight(mut self, field: impl Into<String>, weight: f32) -> Self {
152        self.weights.push((field.into(), weight));
153        self
154    }
155}
156
157/// Highlighting options for search results.
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub struct HighlightOptions {
160    /// Whether to include highlights.
161    pub enabled: bool,
162    /// Start tag for highlights.
163    pub start_tag: String,
164    /// End tag for highlights.
165    pub end_tag: String,
166    /// Maximum length of highlighted text.
167    pub max_length: Option<u32>,
168    /// Number of fragments to return.
169    pub max_fragments: Option<u32>,
170    /// Fragment delimiter.
171    pub delimiter: String,
172}
173
174impl Default for HighlightOptions {
175    fn default() -> Self {
176        Self {
177            enabled: false,
178            start_tag: "<b>".to_string(),
179            end_tag: "</b>".to_string(),
180            max_length: Some(150),
181            max_fragments: Some(3),
182            delimiter: " ... ".to_string(),
183        }
184    }
185}
186
187impl HighlightOptions {
188    /// Enable highlighting.
189    pub fn enabled(mut self) -> Self {
190        self.enabled = true;
191        self
192    }
193
194    /// Set highlight tags.
195    pub fn tags(mut self, start: impl Into<String>, end: impl Into<String>) -> Self {
196        self.start_tag = start.into();
197        self.end_tag = end.into();
198        self
199    }
200
201    /// Set maximum text length.
202    pub fn max_length(mut self, length: u32) -> Self {
203        self.max_length = Some(length);
204        self
205    }
206
207    /// Set maximum number of fragments.
208    pub fn max_fragments(mut self, count: u32) -> Self {
209        self.max_fragments = Some(count);
210        self
211    }
212
213    /// Set fragment delimiter.
214    pub fn delimiter(mut self, delimiter: impl Into<String>) -> Self {
215        self.delimiter = delimiter.into();
216        self
217    }
218}
219
220/// Fuzzy search options.
221#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
222pub struct FuzzyOptions {
223    /// Whether to enable fuzzy matching.
224    pub enabled: bool,
225    /// Maximum edit distance (Levenshtein).
226    pub max_edits: u32,
227    /// Prefix length that must match exactly.
228    pub prefix_length: u32,
229    /// Similarity threshold (0.0-1.0).
230    pub threshold: f32,
231}
232
233impl Default for FuzzyOptions {
234    fn default() -> Self {
235        Self {
236            enabled: false,
237            max_edits: 2,
238            prefix_length: 0,
239            threshold: 0.3,
240        }
241    }
242}
243
244impl FuzzyOptions {
245    /// Enable fuzzy search.
246    pub fn enabled(mut self) -> Self {
247        self.enabled = true;
248        self
249    }
250
251    /// Set maximum edit distance.
252    pub fn max_edits(mut self, edits: u32) -> Self {
253        self.max_edits = edits;
254        self
255    }
256
257    /// Set prefix length.
258    pub fn prefix_length(mut self, length: u32) -> Self {
259        self.prefix_length = length;
260        self
261    }
262
263    /// Set similarity threshold.
264    pub fn threshold(mut self, threshold: f32) -> Self {
265        self.threshold = threshold;
266        self
267    }
268}
269
270/// A full-text search query.
271#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
272pub struct SearchQuery {
273    /// Search terms.
274    pub query: String,
275    /// Columns to search in.
276    pub columns: Vec<String>,
277    /// Search mode.
278    pub mode: SearchMode,
279    /// Language/configuration.
280    pub language: SearchLanguage,
281    /// Ranking options.
282    pub ranking: RankingOptions,
283    /// Highlighting options.
284    pub highlight: HighlightOptions,
285    /// Fuzzy search options.
286    pub fuzzy: FuzzyOptions,
287    /// Minimum word length.
288    pub min_word_length: Option<u32>,
289    /// Filter by category/field (for faceted search).
290    pub filters: Vec<(String, String)>,
291}
292
293impl SearchQuery {
294    /// Create a new search query.
295    pub fn new(query: impl Into<String>) -> SearchQueryBuilder {
296        SearchQueryBuilder::new(query)
297    }
298
299    /// Generate PostgreSQL full-text search SQL.
300    pub fn to_postgres_sql(&self, table: &str) -> QueryResult<SearchSql> {
301        let config = self.language.to_postgres_config();
302
303        // Build tsvector expression
304        let tsvector = if self.columns.len() == 1 {
305            format!("to_tsvector('{}', {})", config, self.columns[0])
306        } else {
307            let concat_cols = self.columns.join(" || ' ' || ");
308            format!("to_tsvector('{}', {})", config, concat_cols)
309        };
310
311        // Build tsquery expression
312        let words: Vec<&str> = self.query.split_whitespace().collect();
313        let tsquery_parts: Vec<String> = words
314            .iter()
315            .map(|w| format!("'{}'", w.replace('\'', "''")))
316            .collect();
317        let tsquery = format!(
318            "to_tsquery('{}', '{}')",
319            config,
320            tsquery_parts.join(self.mode.to_postgres_operator())
321        );
322
323        // Build WHERE clause
324        let where_clause = format!("{} @@ {}", tsvector, tsquery);
325
326        // Build SELECT columns
327        let mut select_cols = vec!["*".to_string()];
328
329        // Add ranking
330        if self.ranking.enabled {
331            let weights = if self.ranking.weights.is_empty() {
332                String::new()
333            } else {
334                // PostgreSQL uses setweight for field weights
335                String::new()
336            };
337            select_cols.push(format!(
338                "ts_rank({}{}, {}) AS {}",
339                tsvector, weights, tsquery, self.ranking.score_alias
340            ));
341        }
342
343        // Add highlighting
344        if self.highlight.enabled && !self.columns.is_empty() {
345            let col = &self.columns[0];
346            select_cols.push(format!(
347                "ts_headline('{}', {}, {}, 'StartSel={}, StopSel={}, MaxWords={}, MaxFragments={}') AS highlighted",
348                config,
349                col,
350                tsquery,
351                self.highlight.start_tag,
352                self.highlight.end_tag,
353                self.highlight.max_length.unwrap_or(35),
354                self.highlight.max_fragments.unwrap_or(3)
355            ));
356        }
357
358        let sql = format!(
359            "SELECT {} FROM {} WHERE {}",
360            select_cols.join(", "),
361            table,
362            where_clause
363        );
364
365        let order_by = if self.ranking.enabled {
366            Some(format!("{} DESC", self.ranking.score_alias))
367        } else {
368            None
369        };
370
371        Ok(SearchSql {
372            sql,
373            order_by,
374            params: vec![],
375        })
376    }
377
378    /// Generate MySQL full-text search SQL.
379    pub fn to_mysql_sql(&self, table: &str) -> QueryResult<SearchSql> {
380        let columns = self.columns.join(", ");
381
382        // MySQL MATCH ... AGAINST syntax
383        let match_mode = match self.mode {
384            SearchMode::Natural | SearchMode::Any => "",
385            SearchMode::Boolean | SearchMode::All => " IN BOOLEAN MODE",
386            SearchMode::Phrase => " IN BOOLEAN MODE", // Use quotes for phrase
387        };
388
389        let search_query = if self.mode == SearchMode::Phrase {
390            format!("\"{}\"", self.query)
391        } else if self.mode == SearchMode::All {
392            // Add + prefix for required terms
393            self.query
394                .split_whitespace()
395                .map(|w| format!("+{}", w))
396                .collect::<Vec<_>>()
397                .join(" ")
398        } else {
399            self.query.clone()
400        };
401
402        let match_expr = format!(
403            "MATCH({}) AGAINST('{}'{}))",
404            columns, search_query, match_mode
405        );
406
407        let mut select_cols = vec!["*".to_string()];
408
409        // Add ranking (MySQL returns relevance from MATCH)
410        if self.ranking.enabled {
411            select_cols.push(format!("{} AS {}", match_expr, self.ranking.score_alias));
412        }
413
414        let sql = format!(
415            "SELECT {} FROM {} WHERE {}",
416            select_cols.join(", "),
417            table,
418            match_expr
419        );
420
421        let order_by = if self.ranking.enabled {
422            Some(format!("{} DESC", self.ranking.score_alias))
423        } else {
424            None
425        };
426
427        Ok(SearchSql {
428            sql,
429            order_by,
430            params: vec![],
431        })
432    }
433
434    /// Generate SQLite FTS5 search SQL.
435    pub fn to_sqlite_sql(&self, table: &str, fts_table: &str) -> QueryResult<SearchSql> {
436        let search_query = match self.mode {
437            SearchMode::Phrase => format!("\"{}\"", self.query),
438            SearchMode::All => self
439                .query
440                .split_whitespace()
441                .collect::<Vec<_>>()
442                .join(" AND "),
443            SearchMode::Any => self
444                .query
445                .split_whitespace()
446                .collect::<Vec<_>>()
447                .join(" OR "),
448            _ => self.query.clone(),
449        };
450
451        let mut select_cols = vec![format!("{}.*", table)];
452
453        // Add ranking (SQLite uses bm25)
454        if self.ranking.enabled {
455            select_cols.push(format!(
456                "bm25({}) AS {}",
457                fts_table, self.ranking.score_alias
458            ));
459        }
460
461        // Add highlighting
462        if self.highlight.enabled && !self.columns.is_empty() {
463            select_cols.push(format!(
464                "highlight({}, 0, '{}', '{}') AS highlighted",
465                fts_table, self.highlight.start_tag, self.highlight.end_tag
466            ));
467        }
468
469        let sql = format!(
470            "SELECT {} FROM {} JOIN {} ON {}.rowid = {}.rowid WHERE {} MATCH '{}'",
471            select_cols.join(", "),
472            table,
473            fts_table,
474            table,
475            fts_table,
476            fts_table,
477            search_query
478        );
479
480        let order_by = if self.ranking.enabled {
481            Some(self.ranking.score_alias.to_string())
482        } else {
483            None
484        };
485
486        Ok(SearchSql {
487            sql,
488            order_by,
489            params: vec![],
490        })
491    }
492
493    /// Generate MSSQL full-text search SQL.
494    pub fn to_mssql_sql(&self, table: &str) -> QueryResult<SearchSql> {
495        let columns = self.columns.join(", ");
496
497        let contains_expr = match self.mode {
498            SearchMode::Phrase => format!("\"{}\"", self.query),
499            SearchMode::All => {
500                let terms: Vec<String> = self
501                    .query
502                    .split_whitespace()
503                    .map(|w| format!("\"{}\"", w))
504                    .collect();
505                terms.join(" AND ")
506            }
507            SearchMode::Any | SearchMode::Natural => {
508                let terms: Vec<String> = self
509                    .query
510                    .split_whitespace()
511                    .map(|w| format!("\"{}\"", w))
512                    .collect();
513                terms.join(" OR ")
514            }
515            SearchMode::Boolean => self.query.clone(),
516        };
517
518        let select_cols = ["*".to_string()];
519
520        // Add ranking (MSSQL uses CONTAINSTABLE for ranking)
521        if self.ranking.enabled {
522            let sql = format!(
523                "SELECT {}.*, ft.RANK AS {} FROM {} \
524                 INNER JOIN CONTAINSTABLE({}, ({}), '{}') AS ft \
525                 ON {}.id = ft.[KEY]",
526                table, self.ranking.score_alias, table, table, columns, contains_expr, table
527            );
528
529            return Ok(SearchSql {
530                sql,
531                order_by: Some(format!("{} DESC", self.ranking.score_alias)),
532                params: vec![],
533            });
534        }
535
536        let sql = format!(
537            "SELECT {} FROM {} WHERE CONTAINS(({}), '{}')",
538            select_cols.join(", "),
539            table,
540            columns,
541            contains_expr
542        );
543
544        Ok(SearchSql {
545            sql,
546            order_by: None,
547            params: vec![],
548        })
549    }
550
551    /// Generate search SQL for the specified database type.
552    pub fn to_sql(&self, table: &str, db_type: DatabaseType) -> QueryResult<SearchSql> {
553        match db_type {
554            DatabaseType::PostgreSQL => self.to_postgres_sql(table),
555            DatabaseType::MySQL => self.to_mysql_sql(table),
556            DatabaseType::SQLite => self.to_sqlite_sql(table, &format!("{}_fts", table)),
557            DatabaseType::MSSQL => self.to_mssql_sql(table),
558        }
559    }
560}
561
562/// Builder for search queries.
563#[derive(Debug, Clone)]
564pub struct SearchQueryBuilder {
565    query: String,
566    columns: Vec<String>,
567    mode: SearchMode,
568    language: SearchLanguage,
569    ranking: RankingOptions,
570    highlight: HighlightOptions,
571    fuzzy: FuzzyOptions,
572    min_word_length: Option<u32>,
573    filters: Vec<(String, String)>,
574}
575
576impl SearchQueryBuilder {
577    /// Create a new search query builder.
578    pub fn new(query: impl Into<String>) -> Self {
579        Self {
580            query: query.into(),
581            columns: Vec::new(),
582            mode: SearchMode::default(),
583            language: SearchLanguage::default(),
584            ranking: RankingOptions::default(),
585            highlight: HighlightOptions::default(),
586            fuzzy: FuzzyOptions::default(),
587            min_word_length: None,
588            filters: Vec::new(),
589        }
590    }
591
592    /// Add a column to search.
593    pub fn column(mut self, column: impl Into<String>) -> Self {
594        self.columns.push(column.into());
595        self
596    }
597
598    /// Add multiple columns to search.
599    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
600        self.columns.extend(columns.into_iter().map(Into::into));
601        self
602    }
603
604    /// Set the search mode.
605    pub fn mode(mut self, mode: SearchMode) -> Self {
606        self.mode = mode;
607        self
608    }
609
610    /// Set to match all words.
611    pub fn match_all(self) -> Self {
612        self.mode(SearchMode::All)
613    }
614
615    /// Set to match any word.
616    pub fn match_any(self) -> Self {
617        self.mode(SearchMode::Any)
618    }
619
620    /// Set to match exact phrase.
621    pub fn phrase(self) -> Self {
622        self.mode(SearchMode::Phrase)
623    }
624
625    /// Set to boolean mode.
626    pub fn boolean(self) -> Self {
627        self.mode(SearchMode::Boolean)
628    }
629
630    /// Set the search language.
631    pub fn language(mut self, language: SearchLanguage) -> Self {
632        self.language = language;
633        self
634    }
635
636    /// Enable ranking with default options.
637    pub fn with_ranking(mut self) -> Self {
638        self.ranking.enabled = true;
639        self
640    }
641
642    /// Configure ranking options.
643    pub fn ranking(mut self, options: RankingOptions) -> Self {
644        self.ranking = options;
645        self
646    }
647
648    /// Enable highlighting with default options.
649    pub fn with_highlight(mut self) -> Self {
650        self.highlight.enabled = true;
651        self
652    }
653
654    /// Configure highlighting options.
655    pub fn highlight(mut self, options: HighlightOptions) -> Self {
656        self.highlight = options;
657        self
658    }
659
660    /// Enable fuzzy matching with default options.
661    pub fn with_fuzzy(mut self) -> Self {
662        self.fuzzy.enabled = true;
663        self
664    }
665
666    /// Configure fuzzy search options.
667    pub fn fuzzy(mut self, options: FuzzyOptions) -> Self {
668        self.fuzzy = options;
669        self
670    }
671
672    /// Set minimum word length.
673    pub fn min_word_length(mut self, length: u32) -> Self {
674        self.min_word_length = Some(length);
675        self
676    }
677
678    /// Add a filter for faceted search.
679    pub fn filter(mut self, field: impl Into<String>, value: impl Into<String>) -> Self {
680        self.filters.push((field.into(), value.into()));
681        self
682    }
683
684    /// Build the search query.
685    pub fn build(self) -> SearchQuery {
686        SearchQuery {
687            query: self.query,
688            columns: self.columns,
689            mode: self.mode,
690            language: self.language,
691            ranking: self.ranking,
692            highlight: self.highlight,
693            fuzzy: self.fuzzy,
694            min_word_length: self.min_word_length,
695            filters: self.filters,
696        }
697    }
698}
699
700/// Generated search SQL.
701#[derive(Debug, Clone)]
702pub struct SearchSql {
703    /// The main SQL query.
704    pub sql: String,
705    /// Optional ORDER BY clause.
706    pub order_by: Option<String>,
707    /// Query parameters.
708    pub params: Vec<String>,
709}
710
711/// Full-text index definition.
712#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
713pub struct FullTextIndex {
714    /// Index name.
715    pub name: String,
716    /// Table name.
717    pub table: String,
718    /// Columns in the index.
719    pub columns: Vec<String>,
720    /// Language/configuration.
721    pub language: SearchLanguage,
722    /// Index type (for MySQL: FULLTEXT).
723    pub index_type: Option<String>,
724}
725
726impl FullTextIndex {
727    /// Create a new full-text index builder.
728    pub fn builder(name: impl Into<String>) -> FullTextIndexBuilder {
729        FullTextIndexBuilder::new(name)
730    }
731
732    /// Generate PostgreSQL CREATE INDEX SQL.
733    pub fn to_postgres_sql(&self) -> String {
734        let config = self.language.to_postgres_config();
735        let columns_expr = if self.columns.len() == 1 {
736            format!("to_tsvector('{}', {})", config, self.columns[0])
737        } else {
738            let concat = self.columns.join(" || ' ' || ");
739            format!("to_tsvector('{}', {})", config, concat)
740        };
741
742        format!(
743            "CREATE INDEX {} ON {} USING GIN ({});",
744            self.name, self.table, columns_expr
745        )
746    }
747
748    /// Generate MySQL CREATE INDEX SQL.
749    pub fn to_mysql_sql(&self) -> String {
750        format!(
751            "CREATE FULLTEXT INDEX {} ON {} ({});",
752            self.name,
753            self.table,
754            self.columns.join(", ")
755        )
756    }
757
758    /// Generate SQLite FTS5 virtual table SQL.
759    pub fn to_sqlite_sql(&self) -> String {
760        let tokenizer = self.language.to_sqlite_tokenizer();
761        format!(
762            "CREATE VIRTUAL TABLE {}_fts USING fts5({}, content='{}', tokenize='{}');",
763            self.table,
764            self.columns.join(", "),
765            self.table,
766            tokenizer
767        )
768    }
769
770    /// Generate MSSQL full-text catalog and index SQL.
771    pub fn to_mssql_sql(&self, catalog_name: &str) -> Vec<String> {
772        vec![
773            format!("CREATE FULLTEXT CATALOG {} AS DEFAULT;", catalog_name),
774            format!(
775                "CREATE FULLTEXT INDEX ON {} ({}) KEY INDEX PK_{} ON {};",
776                self.table,
777                self.columns.join(", "),
778                self.table,
779                catalog_name
780            ),
781        ]
782    }
783
784    /// Generate index SQL for the specified database type.
785    pub fn to_sql(&self, db_type: DatabaseType) -> QueryResult<Vec<String>> {
786        match db_type {
787            DatabaseType::PostgreSQL => Ok(vec![self.to_postgres_sql()]),
788            DatabaseType::MySQL => Ok(vec![self.to_mysql_sql()]),
789            DatabaseType::SQLite => Ok(vec![self.to_sqlite_sql()]),
790            DatabaseType::MSSQL => Ok(self.to_mssql_sql(&format!("{}_catalog", self.table))),
791        }
792    }
793
794    /// Generate DROP INDEX SQL.
795    pub fn to_drop_sql(&self, db_type: DatabaseType) -> QueryResult<String> {
796        match db_type {
797            DatabaseType::PostgreSQL => Ok(format!("DROP INDEX IF EXISTS {};", self.name)),
798            DatabaseType::MySQL => Ok(format!("DROP INDEX {} ON {};", self.name, self.table)),
799            DatabaseType::SQLite => Ok(format!("DROP TABLE IF EXISTS {}_fts;", self.table)),
800            DatabaseType::MSSQL => Ok(format!(
801                "DROP FULLTEXT INDEX ON {}; DROP FULLTEXT CATALOG {}_catalog;",
802                self.table, self.table
803            )),
804        }
805    }
806}
807
808/// Builder for full-text indexes.
809#[derive(Debug, Clone)]
810pub struct FullTextIndexBuilder {
811    name: String,
812    table: Option<String>,
813    columns: Vec<String>,
814    language: SearchLanguage,
815    index_type: Option<String>,
816}
817
818impl FullTextIndexBuilder {
819    /// Create a new builder.
820    pub fn new(name: impl Into<String>) -> Self {
821        Self {
822            name: name.into(),
823            table: None,
824            columns: Vec::new(),
825            language: SearchLanguage::default(),
826            index_type: None,
827        }
828    }
829
830    /// Set the table name.
831    pub fn on_table(mut self, table: impl Into<String>) -> Self {
832        self.table = Some(table.into());
833        self
834    }
835
836    /// Add a column.
837    pub fn column(mut self, column: impl Into<String>) -> Self {
838        self.columns.push(column.into());
839        self
840    }
841
842    /// Add multiple columns.
843    pub fn columns(mut self, columns: impl IntoIterator<Item = impl Into<String>>) -> Self {
844        self.columns.extend(columns.into_iter().map(Into::into));
845        self
846    }
847
848    /// Set the language.
849    pub fn language(mut self, language: SearchLanguage) -> Self {
850        self.language = language;
851        self
852    }
853
854    /// Build the index definition.
855    pub fn build(self) -> QueryResult<FullTextIndex> {
856        let table = self.table.ok_or_else(|| {
857            QueryError::invalid_input("table", "Must specify table with on_table()")
858        })?;
859
860        if self.columns.is_empty() {
861            return Err(QueryError::invalid_input(
862                "columns",
863                "Must specify at least one column",
864            ));
865        }
866
867        Ok(FullTextIndex {
868            name: self.name,
869            table,
870            columns: self.columns,
871            language: self.language,
872            index_type: self.index_type,
873        })
874    }
875}
876
877/// MongoDB Atlas Search support.
878pub mod mongodb {
879    use serde::{Deserialize, Serialize};
880
881    /// Atlas Search index definition.
882    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
883    pub struct AtlasSearchIndex {
884        /// Index name.
885        pub name: String,
886        /// Collection name.
887        pub collection: String,
888        /// Analyzer to use.
889        pub analyzer: String,
890        /// Field mappings.
891        pub mappings: SearchMappings,
892    }
893
894    /// Field mappings for Atlas Search.
895    #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
896    pub struct SearchMappings {
897        /// Whether to dynamically map fields.
898        pub dynamic: bool,
899        /// Explicit field definitions.
900        pub fields: Vec<SearchField>,
901    }
902
903    /// A searchable field definition.
904    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
905    pub struct SearchField {
906        /// Field path.
907        pub path: String,
908        /// Field type.
909        pub field_type: SearchFieldType,
910        /// Analyzer for text fields.
911        pub analyzer: Option<String>,
912        /// Whether to store for faceting.
913        pub facet: bool,
914    }
915
916    /// Atlas Search field types.
917    #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
918    pub enum SearchFieldType {
919        /// String/text field.
920        String,
921        /// Number field.
922        Number,
923        /// Date field.
924        Date,
925        /// Boolean field.
926        Boolean,
927        /// ObjectId field.
928        ObjectId,
929        /// Geo field.
930        Geo,
931        /// Autocomplete field.
932        Autocomplete,
933    }
934
935    impl SearchFieldType {
936        /// Get the Atlas Search type name.
937        pub fn as_str(&self) -> &'static str {
938            match self {
939                Self::String => "string",
940                Self::Number => "number",
941                Self::Date => "date",
942                Self::Boolean => "boolean",
943                Self::ObjectId => "objectId",
944                Self::Geo => "geo",
945                Self::Autocomplete => "autocomplete",
946            }
947        }
948    }
949
950    /// Atlas Search query builder.
951    #[derive(Debug, Clone, Default)]
952    pub struct AtlasSearchQuery {
953        /// Search text.
954        pub query: String,
955        /// Fields to search.
956        pub path: Vec<String>,
957        /// Fuzzy options.
958        pub fuzzy: Option<FuzzyConfig>,
959        /// Score options.
960        pub score: Option<ScoreConfig>,
961        /// Highlight options.
962        pub highlight: Option<HighlightConfig>,
963    }
964
965    /// Fuzzy search configuration.
966    #[derive(Debug, Clone, Serialize, Deserialize)]
967    pub struct FuzzyConfig {
968        /// Maximum edits.
969        pub max_edits: u32,
970        /// Prefix length.
971        pub prefix_length: u32,
972        /// Max expansions.
973        pub max_expansions: u32,
974    }
975
976    impl Default for FuzzyConfig {
977        fn default() -> Self {
978            Self {
979                max_edits: 2,
980                prefix_length: 0,
981                max_expansions: 50,
982            }
983        }
984    }
985
986    /// Score configuration.
987    #[derive(Debug, Clone, Default, Serialize, Deserialize)]
988    pub struct ScoreConfig {
989        /// Boost factor.
990        pub boost: Option<f64>,
991        /// Score function.
992        pub function: Option<String>,
993    }
994
995    /// Highlight configuration.
996    #[derive(Debug, Clone, Serialize, Deserialize)]
997    pub struct HighlightConfig {
998        /// Path to highlight.
999        pub path: String,
1000        /// Max characters per highlight.
1001        pub max_chars_to_examine: u32,
1002        /// Max number of highlights.
1003        pub max_num_passages: u32,
1004    }
1005
1006    impl Default for HighlightConfig {
1007        fn default() -> Self {
1008            Self {
1009                path: String::new(),
1010                max_chars_to_examine: 500_000,
1011                max_num_passages: 5,
1012            }
1013        }
1014    }
1015
1016    impl AtlasSearchQuery {
1017        /// Create a new search query.
1018        pub fn new(query: impl Into<String>) -> Self {
1019            Self {
1020                query: query.into(),
1021                ..Default::default()
1022            }
1023        }
1024
1025        /// Add a field to search.
1026        pub fn path(mut self, path: impl Into<String>) -> Self {
1027            self.path.push(path.into());
1028            self
1029        }
1030
1031        /// Add multiple fields to search.
1032        pub fn paths(mut self, paths: impl IntoIterator<Item = impl Into<String>>) -> Self {
1033            self.path.extend(paths.into_iter().map(Into::into));
1034            self
1035        }
1036
1037        /// Enable fuzzy matching.
1038        pub fn fuzzy(mut self, config: FuzzyConfig) -> Self {
1039            self.fuzzy = Some(config);
1040            self
1041        }
1042
1043        /// Set score boost.
1044        pub fn boost(mut self, factor: f64) -> Self {
1045            self.score = Some(ScoreConfig {
1046                boost: Some(factor),
1047                function: None,
1048            });
1049            self
1050        }
1051
1052        /// Enable highlighting.
1053        pub fn highlight(mut self, path: impl Into<String>) -> Self {
1054            self.highlight = Some(HighlightConfig {
1055                path: path.into(),
1056                ..Default::default()
1057            });
1058            self
1059        }
1060
1061        /// Build the $search aggregation stage.
1062        pub fn to_search_stage(&self) -> serde_json::Value {
1063            let mut text = serde_json::json!({
1064                "query": self.query,
1065                "path": if self.path.len() == 1 {
1066                    serde_json::Value::String(self.path[0].clone())
1067                } else {
1068                    serde_json::Value::Array(self.path.iter().map(|p| serde_json::Value::String(p.clone())).collect())
1069                }
1070            });
1071
1072            if let Some(ref fuzzy) = self.fuzzy {
1073                text["fuzzy"] = serde_json::json!({
1074                    "maxEdits": fuzzy.max_edits,
1075                    "prefixLength": fuzzy.prefix_length,
1076                    "maxExpansions": fuzzy.max_expansions
1077                });
1078            }
1079
1080            let mut search = serde_json::json!({
1081                "$search": {
1082                    "text": text
1083                }
1084            });
1085
1086            if let Some(ref hl) = self.highlight {
1087                search["$search"]["highlight"] = serde_json::json!({
1088                    "path": hl.path,
1089                    "maxCharsToExamine": hl.max_chars_to_examine,
1090                    "maxNumPassages": hl.max_num_passages
1091                });
1092            }
1093
1094            search
1095        }
1096
1097        /// Build the aggregation pipeline for search.
1098        pub fn to_pipeline(&self) -> Vec<serde_json::Value> {
1099            let mut pipeline = vec![self.to_search_stage()];
1100
1101            // Add score metadata
1102            pipeline.push(serde_json::json!({
1103                "$addFields": {
1104                    "score": { "$meta": "searchScore" }
1105                }
1106            }));
1107
1108            // Add highlights if enabled
1109            if self.highlight.is_some() {
1110                pipeline.push(serde_json::json!({
1111                    "$addFields": {
1112                        "highlights": { "$meta": "searchHighlights" }
1113                    }
1114                }));
1115            }
1116
1117            pipeline
1118        }
1119    }
1120
1121    /// Builder for Atlas Search index.
1122    #[derive(Debug, Clone, Default)]
1123    pub struct AtlasSearchIndexBuilder {
1124        name: String,
1125        collection: Option<String>,
1126        analyzer: String,
1127        dynamic: bool,
1128        fields: Vec<SearchField>,
1129    }
1130
1131    impl AtlasSearchIndexBuilder {
1132        /// Create a new builder.
1133        pub fn new(name: impl Into<String>) -> Self {
1134            Self {
1135                name: name.into(),
1136                analyzer: "lucene.standard".to_string(),
1137                ..Default::default()
1138            }
1139        }
1140
1141        /// Set the collection.
1142        pub fn collection(mut self, collection: impl Into<String>) -> Self {
1143            self.collection = Some(collection.into());
1144            self
1145        }
1146
1147        /// Set the analyzer.
1148        pub fn analyzer(mut self, analyzer: impl Into<String>) -> Self {
1149            self.analyzer = analyzer.into();
1150            self
1151        }
1152
1153        /// Enable dynamic mapping.
1154        pub fn dynamic(mut self) -> Self {
1155            self.dynamic = true;
1156            self
1157        }
1158
1159        /// Add a text field.
1160        pub fn text_field(mut self, path: impl Into<String>) -> Self {
1161            self.fields.push(SearchField {
1162                path: path.into(),
1163                field_type: SearchFieldType::String,
1164                analyzer: None,
1165                facet: false,
1166            });
1167            self
1168        }
1169
1170        /// Add a faceted field.
1171        pub fn facet_field(mut self, path: impl Into<String>, field_type: SearchFieldType) -> Self {
1172            self.fields.push(SearchField {
1173                path: path.into(),
1174                field_type,
1175                analyzer: None,
1176                facet: true,
1177            });
1178            self
1179        }
1180
1181        /// Add an autocomplete field.
1182        pub fn autocomplete_field(mut self, path: impl Into<String>) -> Self {
1183            self.fields.push(SearchField {
1184                path: path.into(),
1185                field_type: SearchFieldType::Autocomplete,
1186                analyzer: None,
1187                facet: false,
1188            });
1189            self
1190        }
1191
1192        /// Build the index definition.
1193        pub fn build(self) -> serde_json::Value {
1194            let mut fields = serde_json::Map::new();
1195
1196            for field in &self.fields {
1197                let mut field_def = serde_json::json!({
1198                    "type": field.field_type.as_str()
1199                });
1200
1201                if let Some(ref analyzer) = field.analyzer {
1202                    field_def["analyzer"] = serde_json::Value::String(analyzer.clone());
1203                }
1204
1205                fields.insert(field.path.clone(), field_def);
1206            }
1207
1208            serde_json::json!({
1209                "name": self.name,
1210                "analyzer": self.analyzer,
1211                "mappings": {
1212                    "dynamic": self.dynamic,
1213                    "fields": fields
1214                }
1215            })
1216        }
1217    }
1218
1219    /// Helper to create a search query.
1220    pub fn search(query: impl Into<String>) -> AtlasSearchQuery {
1221        AtlasSearchQuery::new(query)
1222    }
1223
1224    /// Helper to create a search index builder.
1225    pub fn search_index(name: impl Into<String>) -> AtlasSearchIndexBuilder {
1226        AtlasSearchIndexBuilder::new(name)
1227    }
1228}
1229
1230#[cfg(test)]
1231mod tests {
1232    use super::*;
1233
1234    #[test]
1235    fn test_search_query_builder() {
1236        let search = SearchQuery::new("rust async")
1237            .columns(["title", "body"])
1238            .match_all()
1239            .with_ranking()
1240            .build();
1241
1242        assert_eq!(search.query, "rust async");
1243        assert_eq!(search.columns, vec!["title", "body"]);
1244        assert_eq!(search.mode, SearchMode::All);
1245        assert!(search.ranking.enabled);
1246    }
1247
1248    #[test]
1249    fn test_postgres_search_sql() {
1250        let search = SearchQuery::new("rust programming")
1251            .column("content")
1252            .with_ranking()
1253            .build();
1254
1255        let sql = search.to_postgres_sql("posts").unwrap();
1256        assert!(sql.sql.contains("to_tsvector"));
1257        assert!(sql.sql.contains("to_tsquery"));
1258        assert!(sql.sql.contains("ts_rank"));
1259        assert!(sql.sql.contains("@@"));
1260    }
1261
1262    #[test]
1263    fn test_mysql_search_sql() {
1264        let search = SearchQuery::new("database performance")
1265            .columns(["title", "body"])
1266            .match_any()
1267            .build();
1268
1269        let sql = search.to_mysql_sql("articles").unwrap();
1270        assert!(sql.sql.contains("MATCH"));
1271        assert!(sql.sql.contains("AGAINST"));
1272    }
1273
1274    #[test]
1275    fn test_sqlite_search_sql() {
1276        let search = SearchQuery::new("web development")
1277            .column("content")
1278            .with_ranking()
1279            .build();
1280
1281        let sql = search.to_sqlite_sql("posts", "posts_fts").unwrap();
1282        assert!(sql.sql.contains("MATCH"));
1283        assert!(sql.sql.contains("bm25"));
1284    }
1285
1286    #[test]
1287    fn test_mssql_search_sql() {
1288        let search = SearchQuery::new("machine learning")
1289            .columns(["title", "abstract"])
1290            .phrase()
1291            .build();
1292
1293        let sql = search.to_mssql_sql("papers").unwrap();
1294        assert!(sql.sql.contains("CONTAINS"));
1295    }
1296
1297    #[test]
1298    fn test_mssql_ranked_search() {
1299        let search = SearchQuery::new("neural network")
1300            .column("content")
1301            .with_ranking()
1302            .build();
1303
1304        let sql = search.to_mssql_sql("papers").unwrap();
1305        assert!(sql.sql.contains("CONTAINSTABLE"));
1306        assert!(sql.sql.contains("RANK"));
1307    }
1308
1309    #[test]
1310    fn test_fulltext_index_postgres() {
1311        let index = FullTextIndex::builder("posts_search_idx")
1312            .on_table("posts")
1313            .columns(["title", "body"])
1314            .language(SearchLanguage::English)
1315            .build()
1316            .unwrap();
1317
1318        let sql = index.to_postgres_sql();
1319        assert!(sql.contains("CREATE INDEX posts_search_idx"));
1320        assert!(sql.contains("USING GIN"));
1321        assert!(sql.contains("to_tsvector"));
1322    }
1323
1324    #[test]
1325    fn test_fulltext_index_mysql() {
1326        let index = FullTextIndex::builder("posts_fulltext")
1327            .on_table("posts")
1328            .columns(["title", "body"])
1329            .build()
1330            .unwrap();
1331
1332        let sql = index.to_mysql_sql();
1333        assert_eq!(
1334            sql,
1335            "CREATE FULLTEXT INDEX posts_fulltext ON posts (title, body);"
1336        );
1337    }
1338
1339    #[test]
1340    fn test_fulltext_index_sqlite() {
1341        let index = FullTextIndex::builder("posts_fts")
1342            .on_table("posts")
1343            .columns(["title", "content"])
1344            .build()
1345            .unwrap();
1346
1347        let sql = index.to_sqlite_sql();
1348        assert!(sql.contains("CREATE VIRTUAL TABLE"));
1349        assert!(sql.contains("USING fts5"));
1350    }
1351
1352    #[test]
1353    fn test_highlight_options() {
1354        let opts = HighlightOptions::default()
1355            .enabled()
1356            .tags("<mark>", "</mark>")
1357            .max_length(200)
1358            .max_fragments(5);
1359
1360        assert!(opts.enabled);
1361        assert_eq!(opts.start_tag, "<mark>");
1362        assert_eq!(opts.end_tag, "</mark>");
1363        assert_eq!(opts.max_length, Some(200));
1364    }
1365
1366    #[test]
1367    fn test_fuzzy_options() {
1368        let opts = FuzzyOptions::default()
1369            .enabled()
1370            .max_edits(1)
1371            .threshold(0.5);
1372
1373        assert!(opts.enabled);
1374        assert_eq!(opts.max_edits, 1);
1375        assert_eq!(opts.threshold, 0.5);
1376    }
1377
1378    #[test]
1379    fn test_ranking_with_weights() {
1380        let opts = RankingOptions::default()
1381            .enabled()
1382            .alias("relevance")
1383            .weight("title", 2.0)
1384            .weight("body", 1.0);
1385
1386        assert_eq!(opts.score_alias, "relevance");
1387        assert_eq!(opts.weights.len(), 2);
1388    }
1389
1390    mod mongodb_tests {
1391        use super::super::mongodb::*;
1392
1393        #[test]
1394        fn test_atlas_search_query() {
1395            let query = search("rust async")
1396                .paths(["title", "body"])
1397                .fuzzy(FuzzyConfig::default())
1398                .boost(2.0);
1399
1400            let stage = query.to_search_stage();
1401            assert!(stage["$search"]["text"]["query"].is_string());
1402        }
1403
1404        #[test]
1405        fn test_atlas_search_pipeline() {
1406            let query = search("database").path("content").highlight("content");
1407
1408            let pipeline = query.to_pipeline();
1409            assert!(pipeline.len() >= 2);
1410            assert!(pipeline[0]["$search"].is_object());
1411        }
1412
1413        #[test]
1414        fn test_atlas_search_index_builder() {
1415            let index = search_index("default")
1416                .collection("posts")
1417                .analyzer("lucene.english")
1418                .dynamic()
1419                .text_field("title")
1420                .text_field("body")
1421                .facet_field("category", SearchFieldType::String)
1422                .build();
1423
1424            assert!(index["name"].is_string());
1425            assert!(index["mappings"]["dynamic"].as_bool().unwrap());
1426        }
1427    }
1428}