Skip to main content

idb/innodb/
fts.rs

1//! Full-text search (FTS) auxiliary table detection and metadata extraction.
2//!
3//! InnoDB FTS indexes create auxiliary tables with filenames following the pattern:
4//! `FTS_<table_id_hex>_<type>.ibd` where type is one of:
5//! - `CONFIG` — FTS configuration
6//! - `<index_id_hex>_INDEX_<N>` — inverted index shards (N = 0-5)
7//! - `BEING_DELETED` / `BEING_DELETED_CACHE` — deletion bookkeeping
8//! - `DELETED` / `DELETED_CACHE` — deleted document IDs
9
10use serde::Serialize;
11
12/// Type of FTS auxiliary file.
13///
14/// # Examples
15///
16/// ```
17/// use idb::innodb::fts::FtsFileType;
18///
19/// let ft = FtsFileType::Config;
20/// assert_eq!(format!("{ft}"), "CONFIG");
21///
22/// let ft = FtsFileType::Index(3);
23/// assert_eq!(format!("{ft}"), "INDEX_3");
24/// ```
25#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
26pub enum FtsFileType {
27    /// FTS configuration table.
28    Config,
29    /// Inverted index shard (0-5).
30    Index(u8),
31    /// Deleted document IDs.
32    Delete,
33    /// Deleted document IDs (cache).
34    DeleteCache,
35    /// Documents being deleted.
36    BeingDeleted,
37    /// Documents being deleted (cache).
38    BeingDeletedCache,
39}
40
41impl std::fmt::Display for FtsFileType {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            FtsFileType::Config => write!(f, "CONFIG"),
45            FtsFileType::Index(n) => write!(f, "INDEX_{n}"),
46            FtsFileType::Delete => write!(f, "DELETED"),
47            FtsFileType::DeleteCache => write!(f, "DELETED_CACHE"),
48            FtsFileType::BeingDeleted => write!(f, "BEING_DELETED"),
49            FtsFileType::BeingDeletedCache => write!(f, "BEING_DELETED_CACHE"),
50        }
51    }
52}
53
54/// Parsed metadata from an FTS auxiliary filename.
55///
56/// # Examples
57///
58/// ```
59/// use idb::innodb::fts::{parse_fts_filename, FtsFileType};
60///
61/// let info = parse_fts_filename("FTS_0000000000000437_CONFIG.ibd").unwrap();
62/// assert_eq!(info.table_id_hex, "0000000000000437");
63/// assert_eq!(info.file_type, FtsFileType::Config);
64///
65/// let info = parse_fts_filename("FTS_0000000000000437_00000000000004a2_INDEX_1.ibd").unwrap();
66/// assert_eq!(info.table_id_hex, "0000000000000437");
67/// assert_eq!(info.index_id_hex, Some("00000000000004a2".to_string()));
68/// assert_eq!(info.file_type, FtsFileType::Index(1));
69/// ```
70#[derive(Debug, Clone, Serialize)]
71pub struct FtsFileInfo {
72    /// Hex-encoded table ID from the filename.
73    pub table_id_hex: String,
74    /// Hex-encoded index ID (for INDEX files only).
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub index_id_hex: Option<String>,
77    /// Type of FTS auxiliary file.
78    pub file_type: FtsFileType,
79}
80
81/// Check if a filename matches the FTS auxiliary table naming pattern.
82///
83/// # Examples
84///
85/// ```
86/// use idb::innodb::fts::is_fts_auxiliary;
87///
88/// assert!(is_fts_auxiliary("FTS_0000000000000437_00000000000004a2_INDEX_1.ibd"));
89/// assert!(is_fts_auxiliary("FTS_0000000000000437_CONFIG.ibd"));
90/// assert!(is_fts_auxiliary("FTS_0000000000000100_DELETED.ibd"));
91/// assert!(!is_fts_auxiliary("users.ibd"));
92/// assert!(!is_fts_auxiliary("FTS_bad.ibd"));
93/// ```
94pub fn is_fts_auxiliary(filename: &str) -> bool {
95    parse_fts_filename(filename).is_some()
96}
97
98/// Parse an FTS auxiliary filename into structured metadata.
99///
100/// Returns `None` if the filename doesn't match the FTS naming pattern.
101pub fn parse_fts_filename(filename: &str) -> Option<FtsFileInfo> {
102    // Strip path prefix — only look at the filename itself
103    let name = filename
104        .rsplit('/')
105        .next()
106        .unwrap_or(filename)
107        .rsplit('\\')
108        .next()
109        .unwrap_or(filename);
110
111    // Must start with "FTS_" and end with ".ibd"
112    let stripped = name.strip_prefix("FTS_")?.strip_suffix(".ibd")?;
113
114    // Table ID is always the first 16 hex chars
115    if stripped.len() < 16 {
116        return None;
117    }
118
119    let table_id_hex = &stripped[..16];
120    if !table_id_hex.chars().all(|c| c.is_ascii_hexdigit()) {
121        return None;
122    }
123
124    let rest = &stripped[16..];
125    if rest.is_empty() {
126        return None;
127    }
128
129    // Rest starts with underscore separator
130    let rest = rest.strip_prefix('_')?;
131
132    // Try to match known suffixes
133    if rest == "CONFIG" {
134        return Some(FtsFileInfo {
135            table_id_hex: table_id_hex.to_string(),
136            index_id_hex: None,
137            file_type: FtsFileType::Config,
138        });
139    }
140
141    if rest == "DELETED" {
142        return Some(FtsFileInfo {
143            table_id_hex: table_id_hex.to_string(),
144            index_id_hex: None,
145            file_type: FtsFileType::Delete,
146        });
147    }
148
149    if rest == "DELETED_CACHE" {
150        return Some(FtsFileInfo {
151            table_id_hex: table_id_hex.to_string(),
152            index_id_hex: None,
153            file_type: FtsFileType::DeleteCache,
154        });
155    }
156
157    if rest == "BEING_DELETED" {
158        return Some(FtsFileInfo {
159            table_id_hex: table_id_hex.to_string(),
160            index_id_hex: None,
161            file_type: FtsFileType::BeingDeleted,
162        });
163    }
164
165    if rest == "BEING_DELETED_CACHE" {
166        return Some(FtsFileInfo {
167            table_id_hex: table_id_hex.to_string(),
168            index_id_hex: None,
169            file_type: FtsFileType::BeingDeletedCache,
170        });
171    }
172
173    // Try INDEX pattern: <16-char index_id>_INDEX_<N>
174    if rest.len() >= 24 {
175        let index_id_hex = &rest[..16];
176        if index_id_hex.chars().all(|c| c.is_ascii_hexdigit()) {
177            let suffix = &rest[16..];
178            if let Some(n_str) = suffix.strip_prefix("_INDEX_") {
179                if let Ok(n) = n_str.parse::<u8>() {
180                    return Some(FtsFileInfo {
181                        table_id_hex: table_id_hex.to_string(),
182                        index_id_hex: Some(index_id_hex.to_string()),
183                        file_type: FtsFileType::Index(n),
184                    });
185                }
186            }
187        }
188    }
189
190    None
191}
192
193/// Summary of FTS auxiliary files for a single table.
194#[derive(Debug, Clone, Serialize)]
195pub struct FtsTableSummary {
196    /// Hex-encoded table ID.
197    pub table_id: String,
198    /// Number of index shard files found.
199    pub index_count: usize,
200    /// Whether a CONFIG file was found.
201    pub has_config: bool,
202    /// Whether DELETED/BEING_DELETED files were found.
203    pub has_delete: bool,
204}
205
206/// Group a list of FTS file infos by table ID into summaries.
207///
208/// # Examples
209///
210/// ```
211/// use idb::innodb::fts::{parse_fts_filename, summarize_fts_files};
212///
213/// let files = vec![
214///     "FTS_0000000000000437_CONFIG.ibd",
215///     "FTS_0000000000000437_00000000000004a2_INDEX_0.ibd",
216///     "FTS_0000000000000437_00000000000004a2_INDEX_1.ibd",
217///     "FTS_0000000000000437_DELETED.ibd",
218/// ];
219///
220/// let infos: Vec<_> = files.iter().filter_map(|f| parse_fts_filename(f)).collect();
221/// let summaries = summarize_fts_files(&infos);
222/// assert_eq!(summaries.len(), 1);
223/// assert_eq!(summaries[0].index_count, 2);
224/// assert!(summaries[0].has_config);
225/// assert!(summaries[0].has_delete);
226/// ```
227pub fn summarize_fts_files(infos: &[FtsFileInfo]) -> Vec<FtsTableSummary> {
228    use std::collections::HashMap;
229
230    let mut tables: HashMap<&str, (usize, bool, bool)> = HashMap::new();
231
232    for info in infos {
233        let entry = tables
234            .entry(&info.table_id_hex)
235            .or_insert((0, false, false));
236
237        match &info.file_type {
238            FtsFileType::Index(_) => entry.0 += 1,
239            FtsFileType::Config => entry.1 = true,
240            FtsFileType::Delete
241            | FtsFileType::DeleteCache
242            | FtsFileType::BeingDeleted
243            | FtsFileType::BeingDeletedCache => entry.2 = true,
244        }
245    }
246
247    let mut summaries: Vec<FtsTableSummary> = tables
248        .into_iter()
249        .map(
250            |(tid, (idx_count, has_config, has_delete))| FtsTableSummary {
251                table_id: tid.to_string(),
252                index_count: idx_count,
253                has_config,
254                has_delete,
255            },
256        )
257        .collect();
258
259    summaries.sort_by(|a, b| a.table_id.cmp(&b.table_id));
260    summaries
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266
267    #[test]
268    fn test_is_fts_auxiliary() {
269        assert!(is_fts_auxiliary(
270            "FTS_0000000000000437_00000000000004a2_INDEX_1.ibd"
271        ));
272        assert!(is_fts_auxiliary("FTS_0000000000000437_CONFIG.ibd"));
273        assert!(is_fts_auxiliary("FTS_0000000000000100_DELETED.ibd"));
274        assert!(is_fts_auxiliary("FTS_0000000000000100_BEING_DELETED.ibd"));
275        assert!(!is_fts_auxiliary("users.ibd"));
276        assert!(!is_fts_auxiliary("FTS_bad.ibd"));
277        assert!(!is_fts_auxiliary("FTS_.ibd"));
278    }
279
280    #[test]
281    fn test_parse_fts_config() {
282        let info = parse_fts_filename("FTS_0000000000000437_CONFIG.ibd").unwrap();
283        assert_eq!(info.table_id_hex, "0000000000000437");
284        assert_eq!(info.file_type, FtsFileType::Config);
285        assert!(info.index_id_hex.is_none());
286    }
287
288    #[test]
289    fn test_parse_fts_index() {
290        let info = parse_fts_filename("FTS_0000000000000437_00000000000004a2_INDEX_3.ibd").unwrap();
291        assert_eq!(info.table_id_hex, "0000000000000437");
292        assert_eq!(info.index_id_hex, Some("00000000000004a2".to_string()));
293        assert_eq!(info.file_type, FtsFileType::Index(3));
294    }
295
296    #[test]
297    fn test_parse_fts_deleted() {
298        let info = parse_fts_filename("FTS_0000000000000100_DELETED.ibd").unwrap();
299        assert_eq!(info.file_type, FtsFileType::Delete);
300
301        let info = parse_fts_filename("FTS_0000000000000100_DELETED_CACHE.ibd").unwrap();
302        assert_eq!(info.file_type, FtsFileType::DeleteCache);
303    }
304
305    #[test]
306    fn test_parse_fts_being_deleted() {
307        let info = parse_fts_filename("FTS_0000000000000100_BEING_DELETED.ibd").unwrap();
308        assert_eq!(info.file_type, FtsFileType::BeingDeleted);
309
310        let info = parse_fts_filename("FTS_0000000000000100_BEING_DELETED_CACHE.ibd").unwrap();
311        assert_eq!(info.file_type, FtsFileType::BeingDeletedCache);
312    }
313
314    #[test]
315    fn test_parse_fts_invalid() {
316        assert!(parse_fts_filename("users.ibd").is_none());
317        assert!(parse_fts_filename("FTS_.ibd").is_none());
318        assert!(parse_fts_filename("FTS_GGGG000000000437_CONFIG.ibd").is_none());
319        assert!(parse_fts_filename("FTS_0000000000000437_UNKNOWN.ibd").is_none());
320    }
321
322    #[test]
323    fn test_parse_fts_with_path() {
324        let info =
325            parse_fts_filename("/var/lib/mysql/test/FTS_0000000000000437_CONFIG.ibd").unwrap();
326        assert_eq!(info.table_id_hex, "0000000000000437");
327        assert_eq!(info.file_type, FtsFileType::Config);
328    }
329
330    #[test]
331    fn test_summarize_fts_files() {
332        let files = vec![
333            "FTS_0000000000000437_CONFIG.ibd",
334            "FTS_0000000000000437_00000000000004a2_INDEX_0.ibd",
335            "FTS_0000000000000437_00000000000004a2_INDEX_1.ibd",
336            "FTS_0000000000000437_DELETED.ibd",
337            "FTS_0000000000000100_CONFIG.ibd",
338        ];
339
340        let infos: Vec<_> = files.iter().filter_map(|f| parse_fts_filename(f)).collect();
341        let summaries = summarize_fts_files(&infos);
342        assert_eq!(summaries.len(), 2);
343
344        let s437 = summaries
345            .iter()
346            .find(|s| s.table_id == "0000000000000437")
347            .unwrap();
348        assert_eq!(s437.index_count, 2);
349        assert!(s437.has_config);
350        assert!(s437.has_delete);
351
352        let s100 = summaries
353            .iter()
354            .find(|s| s.table_id == "0000000000000100")
355            .unwrap();
356        assert_eq!(s100.index_count, 0);
357        assert!(s100.has_config);
358        assert!(!s100.has_delete);
359    }
360
361    #[test]
362    fn test_fts_file_type_display() {
363        assert_eq!(format!("{}", FtsFileType::Config), "CONFIG");
364        assert_eq!(format!("{}", FtsFileType::Index(3)), "INDEX_3");
365        assert_eq!(format!("{}", FtsFileType::Delete), "DELETED");
366        assert_eq!(
367            format!("{}", FtsFileType::BeingDeletedCache),
368            "BEING_DELETED_CACHE"
369        );
370    }
371}