Skip to main content

idb/innodb/
schema.rs

1//! Schema extraction and DDL reconstruction from SDI metadata.
2//!
3//! MySQL 8.0+ embeds complete table definitions (columns, indexes, foreign keys)
4//! as SDI JSON inside every `.ibd` file. This module parses that JSON into typed
5//! Rust structs and reconstructs human-readable `CREATE TABLE` DDL.
6//!
7//! For pre-8.0 tablespaces without SDI, a best-effort inference from INDEX page
8//! structure provides basic information about detected indexes.
9//!
10//! # Usage
11//!
12//! ```rust,ignore
13//! use idb::innodb::tablespace::Tablespace;
14//! use idb::innodb::sdi::{find_sdi_pages, extract_sdi_from_pages};
15//! use idb::innodb::schema::extract_schema_from_sdi;
16//!
17//! let mut ts = Tablespace::open("table.ibd").unwrap();
18//! let sdi_pages = find_sdi_pages(&mut ts).unwrap();
19//! let records = extract_sdi_from_pages(&mut ts, &sdi_pages).unwrap();
20//! for rec in &records {
21//!     if rec.sdi_type == 1 {
22//!         let schema = extract_schema_from_sdi(&rec.data).unwrap();
23//!         println!("{}", schema.ddl);
24//!     }
25//! }
26//! ```
27
28use serde::{Deserialize, Serialize};
29use std::collections::HashMap;
30
31use crate::IdbError;
32
33// ---------------------------------------------------------------------------
34// SDI JSON deserialization structs
35// ---------------------------------------------------------------------------
36
37/// Top-level SDI envelope wrapping a dd_object.
38#[derive(Debug, Deserialize)]
39pub struct SdiEnvelope {
40    /// MySQL server version ID (e.g., 90001 for 9.0.1).
41    #[serde(default)]
42    pub mysqld_version_id: u64,
43    /// Object type: "Table" or "Tablespace".
44    #[serde(default)]
45    pub dd_object_type: String,
46    /// The data dictionary object.
47    #[serde(default)]
48    pub dd_object: DdTable,
49}
50
51/// Data dictionary table definition.
52#[derive(Debug, Default, Deserialize)]
53pub struct DdTable {
54    /// Table name.
55    #[serde(default)]
56    pub name: String,
57    /// Schema (database) name.
58    #[serde(default)]
59    pub schema_ref: String,
60    /// Storage engine name.
61    #[serde(default)]
62    pub engine: String,
63    /// Default collation ID.
64    #[serde(default)]
65    pub collation_id: u64,
66    /// Row format code (1=FIXED, 2=DYNAMIC, 3=COMPRESSED, etc.).
67    #[serde(default)]
68    pub row_format: u64,
69    /// Table comment.
70    #[serde(default)]
71    pub comment: String,
72    /// Column definitions.
73    #[serde(default)]
74    pub columns: Vec<DdColumn>,
75    /// Index definitions.
76    #[serde(default)]
77    pub indexes: Vec<DdIndex>,
78    /// Foreign key definitions.
79    #[serde(default)]
80    pub foreign_keys: Vec<DdForeignKey>,
81    /// MySQL server version ID.
82    #[serde(default)]
83    pub mysql_version_id: u64,
84    /// Storage engine private data (e.g., "instant_col=3;version_added=1;").
85    #[serde(default)]
86    pub se_private_data: Option<String>,
87    /// InnoDB internal table ID (from `se_private_id` in the data dictionary).
88    #[serde(default)]
89    pub se_private_id: u64,
90}
91
92/// Data dictionary column definition.
93#[derive(Debug, Default, Deserialize)]
94pub struct DdColumn {
95    /// Column name.
96    #[serde(default)]
97    pub name: String,
98    /// dd_type code (internal MySQL type enumeration).
99    #[serde(rename = "type", default)]
100    pub dd_type: u64,
101    /// SQL type string from MySQL (e.g., "varchar(255)", "int unsigned").
102    #[serde(default)]
103    pub column_type_utf8: String,
104    /// Position in the column list (1-based).
105    #[serde(default)]
106    pub ordinal_position: u64,
107    /// Hidden flag: 1=visible, 2=SE-hidden (DB_TRX_ID, DB_ROLL_PTR, DB_ROW_ID),
108    /// 3=SQL-hidden (functional index backing columns), 4=user INVISIBLE (MySQL 8.0.23+).
109    #[serde(default)]
110    pub hidden: u64,
111    /// Whether the column allows NULL.
112    #[serde(default)]
113    pub is_nullable: bool,
114    /// Whether the column is unsigned.
115    #[serde(default)]
116    pub is_unsigned: bool,
117    /// Whether the column is AUTO_INCREMENT.
118    #[serde(default)]
119    pub is_auto_increment: bool,
120    /// Whether the column is virtual (generated).
121    #[serde(default)]
122    pub is_virtual: bool,
123    /// Character length.
124    #[serde(default)]
125    pub char_length: u64,
126    /// Numeric precision.
127    #[serde(default)]
128    pub numeric_precision: u64,
129    /// Numeric scale.
130    #[serde(default)]
131    pub numeric_scale: u64,
132    /// Datetime fractional seconds precision.
133    #[serde(default)]
134    pub datetime_precision: u64,
135    /// Collation ID for this column.
136    #[serde(default)]
137    pub collation_id: u64,
138    /// Default value as UTF-8 string.
139    #[serde(default)]
140    pub default_value_utf8: String,
141    /// Whether default_value_utf8 is NULL.
142    #[serde(default)]
143    pub default_value_utf8_null: bool,
144    /// Whether the column has no default.
145    #[serde(default)]
146    pub has_no_default: bool,
147    /// Default option (e.g., "CURRENT_TIMESTAMP").
148    #[serde(default)]
149    pub default_option: String,
150    /// Update option (e.g., "CURRENT_TIMESTAMP").
151    #[serde(default)]
152    pub update_option: String,
153    /// Generation expression (raw).
154    #[serde(default)]
155    pub generation_expression: String,
156    /// Generation expression as UTF-8.
157    #[serde(default)]
158    pub generation_expression_utf8: String,
159    /// ENUM/SET value elements.
160    #[serde(default)]
161    pub elements: Vec<DdColumnElement>,
162    /// Column comment.
163    #[serde(default)]
164    pub comment: String,
165    /// Whether the column is zerofill.
166    #[serde(default)]
167    pub is_zerofill: bool,
168    /// Storage engine private data (e.g., "version_added=1;physical_pos=5;").
169    #[serde(default)]
170    pub se_private_data: Option<String>,
171}
172
173/// ENUM or SET value element.
174#[derive(Debug, Default, Deserialize)]
175pub struct DdColumnElement {
176    /// The element name (value string).
177    #[serde(default)]
178    pub name: String,
179}
180
181/// Data dictionary index definition.
182#[derive(Debug, Default, Deserialize)]
183pub struct DdIndex {
184    /// Index name.
185    #[serde(default)]
186    pub name: String,
187    /// Index type: 1=PRIMARY, 2=UNIQUE, 3=MULTIPLE (non-unique), 4=FULLTEXT, 5=SPATIAL.
188    #[serde(rename = "type", default)]
189    pub index_type: u64,
190    /// Algorithm code (1=SE_SPECIFIC, 2=BTREE, 3=RTREE, 4=HASH, 5=FULLTEXT).
191    #[serde(default)]
192    pub algorithm: u64,
193    /// Whether the index is hidden.
194    #[serde(default)]
195    pub hidden: bool,
196    /// Index elements (columns).
197    #[serde(default)]
198    pub elements: Vec<DdIndexElement>,
199    /// Index comment.
200    #[serde(default)]
201    pub comment: String,
202    /// Whether the index is visible.
203    #[serde(default)]
204    pub is_visible: bool,
205}
206
207/// Data dictionary index element (column reference).
208#[derive(Debug, Default, Deserialize)]
209pub struct DdIndexElement {
210    /// 0-based index into the columns array as ordered in the SDI JSON.
211    #[serde(default)]
212    pub column_opx: u64,
213    /// Prefix length (4294967295 = full column).
214    #[serde(default)]
215    pub length: u64,
216    /// Sort order: 2=ASC, 1=DESC.
217    #[serde(default)]
218    pub order: u64,
219    /// Whether this element is hidden (internal).
220    #[serde(default)]
221    pub hidden: bool,
222}
223
224/// Data dictionary foreign key definition.
225#[derive(Debug, Default, Deserialize)]
226pub struct DdForeignKey {
227    /// Constraint name.
228    #[serde(default)]
229    pub name: String,
230    /// Referenced table's schema name.
231    #[serde(default)]
232    pub referenced_table_schema_name: String,
233    /// Referenced table name.
234    #[serde(default)]
235    pub referenced_table_name: String,
236    /// ON UPDATE rule (0=NO ACTION, 1=RESTRICT, 2=CASCADE, 3=SET NULL, 4=SET DEFAULT).
237    #[serde(default)]
238    pub update_rule: u64,
239    /// ON DELETE rule (same codes as update_rule).
240    #[serde(default)]
241    pub delete_rule: u64,
242    /// Foreign key elements (column mappings).
243    #[serde(default)]
244    pub elements: Vec<DdForeignKeyElement>,
245}
246
247/// Data dictionary foreign key element (column mapping).
248#[derive(Debug, Default, Deserialize)]
249pub struct DdForeignKeyElement {
250    /// 0-based index into the table's columns array as ordered in the SDI JSON.
251    #[serde(default)]
252    pub column_opx: u64,
253    /// Name of the referenced column.
254    #[serde(default)]
255    pub referenced_column_name: String,
256}
257
258// ---------------------------------------------------------------------------
259// Output structs (for text rendering and JSON serialization)
260// ---------------------------------------------------------------------------
261
262/// Reconstructed table schema from SDI or page inference.
263#[derive(Debug, Clone, Serialize)]
264pub struct TableSchema {
265    /// Database/schema name.
266    #[serde(skip_serializing_if = "Option::is_none")]
267    pub schema_name: Option<String>,
268    /// Table name.
269    pub table_name: String,
270    /// Storage engine.
271    pub engine: String,
272    /// Row format (DYNAMIC, COMPRESSED, etc.).
273    #[serde(skip_serializing_if = "Option::is_none")]
274    pub row_format: Option<String>,
275    /// Table collation name.
276    #[serde(skip_serializing_if = "Option::is_none")]
277    pub collation: Option<String>,
278    /// Character set name.
279    #[serde(skip_serializing_if = "Option::is_none")]
280    pub charset: Option<String>,
281    /// Table comment.
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub comment: Option<String>,
284    /// MySQL version string.
285    #[serde(skip_serializing_if = "Option::is_none")]
286    pub mysql_version: Option<String>,
287    /// Schema source: "sdi" or "inferred".
288    pub source: String,
289    /// Column definitions.
290    pub columns: Vec<ColumnDef>,
291    /// Index definitions.
292    pub indexes: Vec<IndexDef>,
293    /// Foreign key definitions.
294    #[serde(skip_serializing_if = "Vec::is_empty")]
295    pub foreign_keys: Vec<ForeignKeyDef>,
296    /// Reconstructed CREATE TABLE DDL.
297    pub ddl: String,
298}
299
300/// Column definition for output.
301#[derive(Debug, Clone, Serialize)]
302pub struct ColumnDef {
303    /// Column name.
304    pub name: String,
305    /// SQL type string (e.g., "varchar(100)").
306    pub column_type: String,
307    /// Whether the column allows NULL.
308    pub is_nullable: bool,
309    /// Default value expression (None if no default).
310    #[serde(skip_serializing_if = "Option::is_none")]
311    pub default_value: Option<String>,
312    /// Whether the column is AUTO_INCREMENT.
313    #[serde(skip_serializing_if = "is_false")]
314    pub is_auto_increment: bool,
315    /// Generation expression for virtual/stored generated columns.
316    #[serde(skip_serializing_if = "Option::is_none")]
317    pub generation_expression: Option<String>,
318    /// Whether the generated column is virtual (true) or stored (false).
319    #[serde(skip_serializing_if = "Option::is_none")]
320    pub is_virtual: Option<bool>,
321    /// Whether the column is INVISIBLE (MySQL 8.0.23+).
322    #[serde(skip_serializing_if = "is_false")]
323    pub is_invisible: bool,
324    /// Column comment.
325    #[serde(skip_serializing_if = "Option::is_none")]
326    pub comment: Option<String>,
327    /// Instant ADD COLUMN version (MySQL 8.0.29+).
328    #[serde(skip_serializing_if = "Option::is_none")]
329    pub version_added: Option<u32>,
330    /// Instant DROP COLUMN version (MySQL 8.0.29+).
331    #[serde(skip_serializing_if = "Option::is_none")]
332    pub version_dropped: Option<u32>,
333}
334
335fn is_false(v: &bool) -> bool {
336    !v
337}
338
339/// Index definition for output.
340#[derive(Debug, Clone, Serialize)]
341pub struct IndexDef {
342    /// Index name.
343    pub name: String,
344    /// Index type: "PRIMARY KEY", "UNIQUE KEY", "KEY", "FULLTEXT KEY", "SPATIAL KEY".
345    pub index_type: String,
346    /// Index columns with optional prefix length and sort order.
347    pub columns: Vec<IndexColumnDef>,
348    /// Index comment.
349    #[serde(skip_serializing_if = "Option::is_none")]
350    pub comment: Option<String>,
351    /// Whether the index is visible.
352    #[serde(skip_serializing_if = "is_true")]
353    pub is_visible: bool,
354}
355
356fn is_true(v: &bool) -> bool {
357    *v
358}
359
360/// Column reference within an index.
361#[derive(Debug, Clone, Serialize)]
362pub struct IndexColumnDef {
363    /// Column name.
364    pub name: String,
365    /// Prefix length (None for full column).
366    #[serde(skip_serializing_if = "Option::is_none")]
367    pub prefix_length: Option<u64>,
368    /// Sort order ("ASC" or "DESC"). Omitted if ASC (default).
369    #[serde(skip_serializing_if = "Option::is_none")]
370    pub order: Option<String>,
371}
372
373/// Foreign key definition for output.
374#[derive(Debug, Clone, Serialize)]
375pub struct ForeignKeyDef {
376    /// Constraint name.
377    pub name: String,
378    /// Columns in this table.
379    pub columns: Vec<String>,
380    /// Referenced schema.table.
381    pub referenced_table: String,
382    /// Referenced columns.
383    pub referenced_columns: Vec<String>,
384    /// ON UPDATE action.
385    pub on_update: String,
386    /// ON DELETE action.
387    pub on_delete: String,
388}
389
390/// Inferred schema from pre-8.0 tablespaces (no SDI).
391#[derive(Debug, Clone, Serialize)]
392pub struct InferredSchema {
393    /// Source description.
394    pub source: String,
395    /// Record format: "COMPACT" or "REDUNDANT".
396    pub record_format: String,
397    /// Detected indexes.
398    pub indexes: Vec<InferredIndex>,
399}
400
401/// Inferred index from page scanning.
402#[derive(Debug, Clone, Serialize)]
403pub struct InferredIndex {
404    /// Index ID from the INDEX page header.
405    pub index_id: u64,
406    /// Number of leaf pages.
407    pub leaf_pages: u64,
408    /// Maximum non-leaf level seen.
409    pub max_level: u16,
410}
411
412// ---------------------------------------------------------------------------
413// Collation and type mapping
414// ---------------------------------------------------------------------------
415
416/// Map common MySQL collation IDs to collation names.
417///
418/// Only covers the most widely-used collations. Returns None for unknown IDs.
419///
420/// # Examples
421///
422/// ```
423/// use idb::innodb::schema::collation_name;
424///
425/// assert_eq!(collation_name(255), Some("utf8mb4_0900_ai_ci"));
426/// assert_eq!(collation_name(63), Some("binary"));
427/// assert_eq!(collation_name(45), Some("utf8mb4_general_ci"));
428/// assert_eq!(collation_name(99999), None);
429/// ```
430pub fn collation_name(id: u64) -> Option<&'static str> {
431    match id {
432        2 => Some("latin2_czech_cs"),
433        8 => Some("latin1_swedish_ci"),
434        11 => Some("ascii_general_ci"),
435        33 => Some("utf8mb3_general_ci"),
436        45 => Some("utf8mb4_general_ci"),
437        46 => Some("utf8mb4_bin"),
438        47 => Some("latin1_bin"),
439        48 => Some("latin1_general_ci"),
440        63 => Some("binary"),
441        83 => Some("utf8mb3_bin"),
442        224 => Some("utf8mb4_unicode_ci"),
443        255 => Some("utf8mb4_0900_ai_ci"),
444        _ => None,
445    }
446}
447
448/// Map collation ID to character set name.
449///
450/// # Examples
451///
452/// ```
453/// use idb::innodb::schema::charset_from_collation;
454///
455/// assert_eq!(charset_from_collation(255), Some("utf8mb4"));
456/// assert_eq!(charset_from_collation(63), Some("binary"));
457/// assert_eq!(charset_from_collation(8), Some("latin1"));
458/// assert_eq!(charset_from_collation(99999), None);
459/// ```
460pub fn charset_from_collation(id: u64) -> Option<&'static str> {
461    match id {
462        2 => Some("latin2"),
463        8 | 47 | 48 => Some("latin1"),
464        11 => Some("ascii"),
465        33 | 83 => Some("utf8mb3"),
466        45 | 46 | 224 | 255 => Some("utf8mb4"),
467        63 => Some("binary"),
468        _ => None,
469    }
470}
471
472/// Map row_format code to name.
473///
474/// # Examples
475///
476/// ```
477/// use idb::innodb::schema::row_format_name;
478///
479/// assert_eq!(row_format_name(1), "FIXED");
480/// assert_eq!(row_format_name(2), "DYNAMIC");
481/// assert_eq!(row_format_name(3), "COMPRESSED");
482/// ```
483pub fn row_format_name(id: u64) -> &'static str {
484    match id {
485        1 => "FIXED",
486        2 => "DYNAMIC",
487        3 => "COMPRESSED",
488        4 => "REDUNDANT",
489        5 => "COMPACT",
490        _ => "UNKNOWN",
491    }
492}
493
494/// Map foreign key rule code to SQL action string.
495///
496/// # Examples
497///
498/// ```
499/// use idb::innodb::schema::fk_rule_name;
500///
501/// assert_eq!(fk_rule_name(0), "NO ACTION");
502/// assert_eq!(fk_rule_name(2), "CASCADE");
503/// ```
504pub fn fk_rule_name(rule: u64) -> &'static str {
505    match rule {
506        0 => "NO ACTION",
507        1 => "RESTRICT",
508        2 => "CASCADE",
509        3 => "SET NULL",
510        4 => "SET DEFAULT",
511        _ => "NO ACTION",
512    }
513}
514
515/// Fallback type mapping from dd_type code when `column_type_utf8` is empty.
516///
517/// This should rarely be needed — `column_type_utf8` is the authoritative source.
518///
519/// # Examples
520///
521/// ```
522/// use idb::innodb::schema::{DdColumn, dd_type_to_sql};
523///
524/// let col = DdColumn { dd_type: 4, numeric_precision: 10, ..Default::default() };
525/// assert_eq!(dd_type_to_sql(&col), "int");
526///
527/// let col = DdColumn { dd_type: 16, char_length: 400, collation_id: 255, ..Default::default() };
528/// assert_eq!(dd_type_to_sql(&col), "varchar(100)");
529/// ```
530pub fn dd_type_to_sql(col: &DdColumn) -> String {
531    match col.dd_type {
532        1 => "tinyint".to_string(),
533        2 => "smallint".to_string(),
534        3 => "mediumint".to_string(),
535        4 => "int".to_string(),
536        5 => "bigint".to_string(),
537        6 => format_decimal(col),
538        7 => "float".to_string(),
539        8 => "double".to_string(),
540        9 | 10 => "binary".to_string(), // internal types (ROLL_PTR, TRX_ID)
541        11 => "year".to_string(),
542        12 => "date".to_string(),
543        13 => "time".to_string(),
544        14 => "datetime".to_string(),
545        15 => "timestamp".to_string(),
546        16 => format_varchar(col),
547        17 => format_char(col),
548        18 => "bit".to_string(),
549        19 => "enum".to_string(),
550        20 => "set".to_string(),
551        23 => "tinyblob".to_string(),
552        24 => "mediumblob".to_string(),
553        25 => "longblob".to_string(),
554        26 => "blob".to_string(),
555        27 => format_text(col),
556        28 => "varbinary".to_string(),
557        29 => "binary".to_string(),
558        30 => "geometry".to_string(),
559        31 => "json".to_string(),
560        _ => format!("unknown_type({})", col.dd_type),
561    }
562}
563
564fn format_decimal(col: &DdColumn) -> String {
565    if col.numeric_precision > 0 {
566        if col.numeric_scale > 0 {
567            format!("decimal({},{})", col.numeric_precision, col.numeric_scale)
568        } else {
569            format!("decimal({})", col.numeric_precision)
570        }
571    } else {
572        "decimal".to_string()
573    }
574}
575
576fn format_varchar(col: &DdColumn) -> String {
577    // char_length is in bytes; divide by max bytes per char for the charset
578    let max_bytes_per_char = charset_max_bytes(col.collation_id);
579    let char_len = if max_bytes_per_char > 0 {
580        col.char_length / max_bytes_per_char
581    } else {
582        col.char_length
583    };
584    format!("varchar({})", char_len)
585}
586
587fn format_char(col: &DdColumn) -> String {
588    let max_bytes_per_char = charset_max_bytes(col.collation_id);
589    let char_len = if max_bytes_per_char > 0 {
590        col.char_length / max_bytes_per_char
591    } else {
592        col.char_length
593    };
594    format!("char({})", char_len.max(1))
595}
596
597fn format_text(col: &DdColumn) -> String {
598    match col.char_length {
599        0..=255 => "tinytext".to_string(),
600        256..=65535 => "text".to_string(),
601        65536..=16777215 => "mediumtext".to_string(),
602        _ => "longtext".to_string(),
603    }
604}
605
606/// Returns max bytes per character for a collation ID.
607fn charset_max_bytes(collation_id: u64) -> u64 {
608    match collation_id {
609        2 | 8 | 11 | 47 | 48 => 1, // latin1, latin2, ascii
610        33 | 83 => 3,              // utf8mb3
611        45 | 46 | 224 | 255 => 4,  // utf8mb4
612        63 => 1,                   // binary
613        _ => 4,                    // default to utf8mb4
614    }
615}
616
617/// Format MySQL version from version_id (e.g., 90001 -> "9.0.1").
618fn format_mysql_version(version_id: u64) -> String {
619    if version_id == 0 {
620        return "unknown".to_string();
621    }
622    let major = version_id / 10000;
623    let minor = (version_id % 10000) / 100;
624    let patch = version_id % 100;
625    format!("{}.{}.{}", major, minor, patch)
626}
627
628// ---------------------------------------------------------------------------
629// Schema extraction
630// ---------------------------------------------------------------------------
631
632/// Extract a [`TableSchema`] from raw SDI JSON (type=1 "Table" record).
633///
634/// Parses the JSON into typed structs, filters out hidden columns, builds
635/// column/index/FK definitions, and generates `CREATE TABLE` DDL.
636///
637/// # Examples
638///
639/// ```
640/// use idb::innodb::schema::extract_schema_from_sdi;
641///
642/// let json = r#"{
643///   "mysqld_version_id": 90001,
644///   "dd_object_type": "Table",
645///   "dd_object": {
646///     "name": "test_table",
647///     "schema_ref": "mydb",
648///     "engine": "InnoDB",
649///     "collation_id": 255,
650///     "row_format": 2,
651///     "columns": [
652///       {
653///         "name": "id",
654///         "type": 4,
655///         "column_type_utf8": "int",
656///         "ordinal_position": 1,
657///         "hidden": 1,
658///         "is_nullable": false,
659///         "is_auto_increment": true
660///       },
661///       {
662///         "name": "DB_TRX_ID",
663///         "type": 10,
664///         "ordinal_position": 2,
665///         "hidden": 2
666///       },
667///       {
668///         "name": "DB_ROLL_PTR",
669///         "type": 9,
670///         "ordinal_position": 3,
671///         "hidden": 2
672///       }
673///     ],
674///     "indexes": [
675///       {
676///         "name": "PRIMARY",
677///         "type": 1,
678///         "hidden": false,
679///         "is_visible": true,
680///         "elements": [
681///           { "column_opx": 0, "hidden": false, "length": 4, "order": 2 }
682///         ]
683///       }
684///     ],
685///     "foreign_keys": []
686///   }
687/// }"#;
688///
689/// let schema = extract_schema_from_sdi(json).unwrap();
690/// assert_eq!(schema.table_name, "test_table");
691/// assert_eq!(schema.columns.len(), 1); // DB_TRX_ID and DB_ROLL_PTR filtered
692/// assert!(schema.ddl.contains("CREATE TABLE"));
693/// ```
694pub fn extract_schema_from_sdi(sdi_json: &str) -> Result<TableSchema, IdbError> {
695    let envelope: SdiEnvelope = serde_json::from_str(sdi_json)
696        .map_err(|e| IdbError::Parse(format!("Failed to parse SDI JSON: {}", e)))?;
697
698    let dd = &envelope.dd_object;
699
700    // Build column name lookup (all columns, including hidden)
701    let all_columns: Vec<&DdColumn> = {
702        let mut cols: Vec<&DdColumn> = dd.columns.iter().collect();
703        cols.sort_by_key(|c| c.ordinal_position);
704        cols
705    };
706
707    // Build lookup from JSON array position to column (for column_opx lookups).
708    // column_opx is a 0-based index into the columns array as ordered in the JSON.
709    let column_by_index: HashMap<u64, &DdColumn> = dd
710        .columns
711        .iter()
712        .enumerate()
713        .map(|(i, c)| (i as u64, c))
714        .collect();
715
716    // Filter user-facing columns:
717    //   hidden=1 (HT_VISIBLE) — normal visible columns
718    //   hidden=4 (HT_HIDDEN_USER) — user INVISIBLE columns (MySQL 8.0.23+)
719    // Excludes hidden=2 (HT_HIDDEN_SE: DB_TRX_ID, DB_ROLL_PTR, DB_ROW_ID)
720    //      and hidden=3 (HT_HIDDEN_SQL: functional index backing columns)
721    let visible_columns: Vec<&DdColumn> = {
722        let mut cols: Vec<&DdColumn> = all_columns
723            .iter()
724            .copied()
725            .filter(|c| c.hidden == 1 || c.hidden == 4)
726            .collect();
727        cols.sort_by_key(|c| c.ordinal_position);
728        cols
729    };
730
731    // Build ColumnDef list
732    let columns: Vec<ColumnDef> = visible_columns
733        .iter()
734        .map(|c| build_column_def(c))
735        .collect();
736
737    // Build IndexDef list
738    let indexes: Vec<IndexDef> = dd
739        .indexes
740        .iter()
741        .filter(|idx| !idx.hidden)
742        .map(|idx| build_index_def(idx, &column_by_index))
743        .collect();
744
745    // Build ForeignKeyDef list
746    let foreign_keys: Vec<ForeignKeyDef> = dd
747        .foreign_keys
748        .iter()
749        .map(|fk| build_fk_def(fk, &column_by_index))
750        .collect();
751
752    let row_fmt = row_format_name(dd.row_format);
753    let coll = collation_name(dd.collation_id);
754    let cs = charset_from_collation(dd.collation_id);
755    let mysql_ver = format_mysql_version(envelope.mysqld_version_id);
756
757    let schema_name = if dd.schema_ref.is_empty() {
758        None
759    } else {
760        Some(dd.schema_ref.clone())
761    };
762
763    let comment = if dd.comment.is_empty() {
764        None
765    } else {
766        Some(dd.comment.clone())
767    };
768
769    let mut schema = TableSchema {
770        schema_name,
771        table_name: dd.name.clone(),
772        engine: dd.engine.clone(),
773        row_format: Some(row_fmt.to_string()),
774        collation: coll.map(|s| s.to_string()),
775        charset: cs.map(|s| s.to_string()),
776        comment,
777        mysql_version: Some(mysql_ver),
778        source: "sdi".to_string(),
779        columns,
780        indexes,
781        foreign_keys,
782        ddl: String::new(),
783    };
784
785    schema.ddl = generate_ddl(&schema);
786    Ok(schema)
787}
788
789fn build_column_def(col: &DdColumn) -> ColumnDef {
790    let column_type = if !col.column_type_utf8.is_empty() {
791        col.column_type_utf8.clone()
792    } else {
793        dd_type_to_sql(col)
794    };
795
796    let default_value = if !col.default_option.is_empty() {
797        // default_option has expressions like "CURRENT_TIMESTAMP"
798        Some(col.default_option.clone())
799    } else if !col.has_no_default && !col.default_value_utf8_null {
800        // Numeric types should not be quoted in DDL
801        let is_numeric = matches!(col.dd_type, 1..=8);
802        if is_numeric {
803            Some(col.default_value_utf8.clone())
804        } else {
805            Some(format!("'{}'", col.default_value_utf8.replace('\'', "''")))
806        }
807    } else if !col.has_no_default && col.is_nullable && col.default_value_utf8_null {
808        Some("NULL".to_string())
809    } else {
810        None
811    };
812
813    let generation_expression = if !col.generation_expression_utf8.is_empty() {
814        Some(col.generation_expression_utf8.clone())
815    } else {
816        None
817    };
818
819    let is_virtual = if generation_expression.is_some() {
820        Some(col.is_virtual)
821    } else {
822        None
823    };
824
825    let comment = if col.comment.is_empty() {
826        None
827    } else {
828        Some(col.comment.clone())
829    };
830
831    let (version_added, version_dropped) = if let Some(ref spd) = col.se_private_data {
832        let map = parse_se_private_data(spd);
833        (
834            map.get("version_added").and_then(|v| v.parse().ok()),
835            map.get("version_dropped").and_then(|v| v.parse().ok()),
836        )
837    } else {
838        (None, None)
839    };
840
841    ColumnDef {
842        name: col.name.clone(),
843        column_type,
844        is_nullable: col.is_nullable,
845        default_value,
846        is_auto_increment: col.is_auto_increment,
847        generation_expression,
848        is_invisible: col.hidden == 4,
849        is_virtual,
850        comment,
851        version_added,
852        version_dropped,
853    }
854}
855
856fn build_index_def(idx: &DdIndex, columns: &HashMap<u64, &DdColumn>) -> IndexDef {
857    let index_type = match idx.index_type {
858        1 => "PRIMARY KEY",
859        2 => "UNIQUE KEY",
860        3 => "KEY",
861        4 => "FULLTEXT KEY",
862        5 => "SPATIAL KEY",
863        _ => "KEY",
864    };
865
866    let idx_columns: Vec<IndexColumnDef> = idx
867        .elements
868        .iter()
869        .filter(|e| !e.hidden)
870        .map(|e| {
871            let col_name = columns
872                .get(&e.column_opx)
873                .map(|c| c.name.clone())
874                .unwrap_or_else(|| format!("col_{}", e.column_opx));
875
876            let prefix_length = if e.length < 4294967295 {
877                // Detect prefix indexes: e.length is in bytes, char_length is in bytes
878                // for string types (but display width for numeric types). Convert to
879                // characters for comparison and DDL output.
880                let col = columns.get(&e.column_opx);
881                let full_len = col.map(|c| c.char_length).unwrap_or(0);
882                let max_bytes = col.map(|c| charset_max_bytes(c.collation_id)).unwrap_or(4);
883                let full_char_len = if max_bytes > 0 {
884                    full_len / max_bytes
885                } else {
886                    full_len
887                };
888                if e.length < full_char_len {
889                    // Convert byte-based e.length to characters for DDL
890                    let prefix_chars = if max_bytes > 0 {
891                        e.length / max_bytes
892                    } else {
893                        e.length
894                    };
895                    if prefix_chars > 0 {
896                        Some(prefix_chars)
897                    } else {
898                        Some(e.length)
899                    }
900                } else {
901                    None
902                }
903            } else {
904                None
905            };
906
907            let order = if e.order == 1 {
908                Some("DESC".to_string())
909            } else {
910                None
911            };
912
913            IndexColumnDef {
914                name: col_name,
915                prefix_length,
916                order,
917            }
918        })
919        .collect();
920
921    let comment = if idx.comment.is_empty() {
922        None
923    } else {
924        Some(idx.comment.clone())
925    };
926
927    IndexDef {
928        name: idx.name.clone(),
929        index_type: index_type.to_string(),
930        columns: idx_columns,
931        comment,
932        is_visible: idx.is_visible,
933    }
934}
935
936fn build_fk_def(fk: &DdForeignKey, columns: &HashMap<u64, &DdColumn>) -> ForeignKeyDef {
937    let fk_columns: Vec<String> = fk
938        .elements
939        .iter()
940        .map(|e| {
941            columns
942                .get(&e.column_opx)
943                .map(|c| c.name.clone())
944                .unwrap_or_else(|| format!("col_{}", e.column_opx))
945        })
946        .collect();
947
948    let ref_columns: Vec<String> = fk
949        .elements
950        .iter()
951        .map(|e| e.referenced_column_name.clone())
952        .collect();
953
954    let ref_table = if fk.referenced_table_schema_name.is_empty() {
955        format!("`{}`", fk.referenced_table_name)
956    } else {
957        format!(
958            "`{}`.`{}`",
959            fk.referenced_table_schema_name, fk.referenced_table_name
960        )
961    };
962
963    ForeignKeyDef {
964        name: fk.name.clone(),
965        columns: fk_columns,
966        referenced_table: ref_table,
967        referenced_columns: ref_columns,
968        on_update: fk_rule_name(fk.update_rule).to_string(),
969        on_delete: fk_rule_name(fk.delete_rule).to_string(),
970    }
971}
972
973// ---------------------------------------------------------------------------
974// se_private_data parsing
975// ---------------------------------------------------------------------------
976
977/// Parse a semicolon-delimited `key=value` string into a HashMap.
978///
979/// InnoDB stores metadata in `se_private_data` fields using the format
980/// `"key1=value1;key2=value2;"`. This parser handles the format used
981/// by both table-level and column-level private data.
982///
983/// # Examples
984///
985/// ```
986/// use idb::innodb::schema::parse_se_private_data;
987///
988/// let map = parse_se_private_data("version_added=1;physical_pos=5;");
989/// assert_eq!(map.get("version_added"), Some(&"1".to_string()));
990/// assert_eq!(map.get("physical_pos"), Some(&"5".to_string()));
991///
992/// let empty = parse_se_private_data("");
993/// assert!(empty.is_empty());
994/// ```
995pub fn parse_se_private_data(data: &str) -> HashMap<String, String> {
996    data.split(';')
997        .filter(|s| !s.is_empty())
998        .filter_map(|kv| {
999            let mut parts = kv.splitn(2, '=');
1000            let k = parts.next()?.to_string();
1001            let v = parts.next()?.to_string();
1002            Some((k, v))
1003        })
1004        .collect()
1005}
1006
1007/// Check whether a table's `se_private_data` indicates instant columns.
1008///
1009/// MySQL 8.0.12+ stores `instant_col=N` in the table's `se_private_data`
1010/// when columns have been added via instant DDL.
1011///
1012/// # Examples
1013///
1014/// ```
1015/// use idb::innodb::schema::has_instant_columns;
1016///
1017/// assert!(has_instant_columns("instant_col=3;table_id=1234;"));
1018/// assert!(!has_instant_columns("table_id=1234;"));
1019/// assert!(!has_instant_columns(""));
1020/// ```
1021pub fn has_instant_columns(se_private_data: &str) -> bool {
1022    parse_se_private_data(se_private_data).contains_key("instant_col")
1023}
1024
1025// ---------------------------------------------------------------------------
1026// DDL generation
1027// ---------------------------------------------------------------------------
1028
1029/// Generate `CREATE TABLE` DDL from a [`TableSchema`].
1030///
1031/// Produces MySQL-compatible DDL including column definitions, primary key,
1032/// secondary indexes, foreign keys, and table options.
1033pub fn generate_ddl(schema: &TableSchema) -> String {
1034    let mut ddl = format!("CREATE TABLE `{}` (\n", schema.table_name);
1035    let mut parts: Vec<String> = Vec::new();
1036
1037    // Columns
1038    for col in &schema.columns {
1039        parts.push(format_column_ddl(col));
1040    }
1041
1042    // Indexes
1043    for idx in &schema.indexes {
1044        parts.push(format_index_ddl(idx));
1045    }
1046
1047    // Foreign keys
1048    for fk in &schema.foreign_keys {
1049        parts.push(format_fk_ddl(fk));
1050    }
1051
1052    ddl.push_str(&parts.join(",\n"));
1053    ddl.push_str("\n)");
1054
1055    // Table options
1056    let mut options = Vec::new();
1057    options.push(format!("ENGINE={}", schema.engine));
1058    if let Some(ref cs) = schema.charset {
1059        options.push(format!("DEFAULT CHARSET={}", cs));
1060    }
1061    if let Some(ref coll) = schema.collation {
1062        options.push(format!("COLLATE={}", coll));
1063    }
1064    if let Some(ref fmt) = schema.row_format {
1065        if fmt != "DYNAMIC" {
1066            // DYNAMIC is the default, only show non-default
1067            options.push(format!("ROW_FORMAT={}", fmt));
1068        }
1069    }
1070    if let Some(ref comment) = schema.comment {
1071        options.push(format!("COMMENT='{}'", comment.replace('\'', "''")));
1072    }
1073
1074    if !options.is_empty() {
1075        ddl.push(' ');
1076        ddl.push_str(&options.join(" "));
1077    }
1078    ddl.push(';');
1079
1080    ddl
1081}
1082
1083fn format_column_ddl(col: &ColumnDef) -> String {
1084    let mut parts = vec![format!("  `{}` {}", col.name, col.column_type)];
1085
1086    if !col.is_nullable {
1087        parts.push("NOT NULL".to_string());
1088    }
1089
1090    if let Some(ref default) = col.default_value {
1091        parts.push(format!("DEFAULT {}", default));
1092    }
1093
1094    if col.is_auto_increment {
1095        parts.push("AUTO_INCREMENT".to_string());
1096    }
1097
1098    if let Some(ref expr) = col.generation_expression {
1099        let stored_or_virtual = if col.is_virtual == Some(true) {
1100            "VIRTUAL"
1101        } else {
1102            "STORED"
1103        };
1104        parts.push(format!(
1105            "GENERATED ALWAYS AS ({}) {}",
1106            expr, stored_or_virtual
1107        ));
1108    }
1109
1110    if col.is_invisible {
1111        parts.push("/*!80023 INVISIBLE */".to_string());
1112    }
1113
1114    if let Some(ref comment) = col.comment {
1115        parts.push(format!("COMMENT '{}'", comment.replace('\'', "''")));
1116    }
1117
1118    parts.join(" ")
1119}
1120
1121fn format_index_ddl(idx: &IndexDef) -> String {
1122    let cols = format_index_columns(&idx.columns);
1123
1124    let visibility = if !idx.is_visible {
1125        " /*!80000 INVISIBLE */"
1126    } else {
1127        ""
1128    };
1129
1130    let comment = if let Some(ref c) = idx.comment {
1131        format!(" COMMENT '{}'", c.replace('\'', "''"))
1132    } else {
1133        String::new()
1134    };
1135
1136    match idx.index_type.as_str() {
1137        "PRIMARY KEY" => format!("  PRIMARY KEY ({}){}{}", cols, comment, visibility),
1138        _ => format!(
1139            "  {} `{}` ({}){}{}",
1140            idx.index_type, idx.name, cols, comment, visibility
1141        ),
1142    }
1143}
1144
1145fn format_index_columns(columns: &[IndexColumnDef]) -> String {
1146    columns
1147        .iter()
1148        .map(|c| {
1149            let mut s = format!("`{}`", c.name);
1150            if let Some(len) = c.prefix_length {
1151                s.push_str(&format!("({})", len));
1152            }
1153            if let Some(ref ord) = c.order {
1154                s.push(' ');
1155                s.push_str(ord);
1156            }
1157            s
1158        })
1159        .collect::<Vec<_>>()
1160        .join(", ")
1161}
1162
1163fn format_fk_ddl(fk: &ForeignKeyDef) -> String {
1164    let cols = fk
1165        .columns
1166        .iter()
1167        .map(|c| format!("`{}`", c))
1168        .collect::<Vec<_>>()
1169        .join(", ");
1170    let ref_cols = fk
1171        .referenced_columns
1172        .iter()
1173        .map(|c| format!("`{}`", c))
1174        .collect::<Vec<_>>()
1175        .join(", ");
1176
1177    let mut s = format!(
1178        "  CONSTRAINT `{}` FOREIGN KEY ({}) REFERENCES {} ({})",
1179        fk.name, cols, fk.referenced_table, ref_cols
1180    );
1181
1182    if fk.on_delete != "NO ACTION" {
1183        s.push_str(&format!(" ON DELETE {}", fk.on_delete));
1184    }
1185    if fk.on_update != "NO ACTION" {
1186        s.push_str(&format!(" ON UPDATE {}", fk.on_update));
1187    }
1188
1189    s
1190}
1191
1192// ---------------------------------------------------------------------------
1193// Pre-8.0 inference
1194// ---------------------------------------------------------------------------
1195
1196/// Infer basic schema information from INDEX page structure (pre-8.0 fallback).
1197///
1198/// Scans all pages in the tablespace, collects INDEX page metadata (index_id,
1199/// level, compact/redundant format), and returns a summary of detected indexes.
1200pub fn infer_schema_from_pages(
1201    ts: &mut crate::innodb::tablespace::Tablespace,
1202) -> Result<InferredSchema, IdbError> {
1203    use crate::innodb::index::IndexHeader;
1204    use crate::innodb::page::FilHeader;
1205    use crate::innodb::page_types::PageType;
1206    use std::collections::BTreeMap;
1207
1208    let page_count = ts.page_count();
1209    let mut is_compact = true;
1210    let mut index_stats: BTreeMap<u64, (u64, u16)> = BTreeMap::new(); // index_id -> (leaf_count, max_level)
1211
1212    for page_num in 0..page_count {
1213        let page_data = match ts.read_page(page_num) {
1214            Ok(d) => d,
1215            Err(_) => continue,
1216        };
1217
1218        let header = match FilHeader::parse(&page_data) {
1219            Some(h) => h,
1220            None => continue,
1221        };
1222
1223        if header.page_type != PageType::Index {
1224            continue;
1225        }
1226
1227        let idx = match IndexHeader::parse(&page_data) {
1228            Some(h) => h,
1229            None => continue,
1230        };
1231
1232        if !idx.is_compact() {
1233            is_compact = false;
1234        }
1235
1236        let entry = index_stats.entry(idx.index_id).or_insert((0, 0));
1237        if idx.is_leaf() {
1238            entry.0 += 1;
1239        }
1240        if idx.level > entry.1 {
1241            entry.1 = idx.level;
1242        }
1243    }
1244
1245    let indexes = index_stats
1246        .into_iter()
1247        .map(|(index_id, (leaf_pages, max_level))| InferredIndex {
1248            index_id,
1249            leaf_pages,
1250            max_level,
1251        })
1252        .collect();
1253
1254    Ok(InferredSchema {
1255        source: "Inferred (no SDI metadata available)".to_string(),
1256        record_format: if is_compact { "COMPACT" } else { "REDUNDANT" }.to_string(),
1257        indexes,
1258    })
1259}
1260
1261// ---------------------------------------------------------------------------
1262// Tests
1263// ---------------------------------------------------------------------------
1264
1265#[cfg(test)]
1266mod tests {
1267    use super::*;
1268
1269    #[test]
1270    fn test_parse_se_private_data() {
1271        let data = "version_added=1;physical_pos=5;";
1272        let map = parse_se_private_data(data);
1273        assert_eq!(map.get("version_added"), Some(&"1".to_string()));
1274        assert_eq!(map.get("physical_pos"), Some(&"5".to_string()));
1275    }
1276
1277    #[test]
1278    fn test_parse_se_private_data_empty() {
1279        let map = parse_se_private_data("");
1280        assert!(map.is_empty());
1281    }
1282
1283    #[test]
1284    fn test_has_instant_columns() {
1285        assert!(has_instant_columns("instant_col=3;table_id=1234;"));
1286        assert!(!has_instant_columns("table_id=1234;"));
1287        assert!(!has_instant_columns(""));
1288    }
1289
1290    #[test]
1291    fn test_extract_schema_with_instant_columns() {
1292        let json = r#"{
1293            "mysqld_version_id": 80040,
1294            "dd_object_type": "Table",
1295            "dd_object": {
1296                "name": "t",
1297                "engine": "InnoDB",
1298                "collation_id": 255,
1299                "row_format": 2,
1300                "se_private_data": "instant_col=1;",
1301                "columns": [
1302                    {
1303                        "name": "id",
1304                        "type": 4,
1305                        "column_type_utf8": "int",
1306                        "ordinal_position": 1,
1307                        "hidden": 1,
1308                        "is_nullable": false,
1309                        "is_auto_increment": true,
1310                        "has_no_default": true
1311                    },
1312                    {
1313                        "name": "added_col",
1314                        "type": 4,
1315                        "column_type_utf8": "int",
1316                        "ordinal_position": 2,
1317                        "hidden": 1,
1318                        "is_nullable": true,
1319                        "se_private_data": "version_added=1;physical_pos=2;"
1320                    }
1321                ],
1322                "indexes": [],
1323                "foreign_keys": []
1324            }
1325        }"#;
1326
1327        let schema = extract_schema_from_sdi(json).unwrap();
1328        assert_eq!(schema.columns.len(), 2);
1329        assert_eq!(schema.columns[1].name, "added_col");
1330        assert_eq!(schema.columns[1].version_added, Some(1));
1331        assert_eq!(schema.columns[1].version_dropped, None);
1332
1333        // Table-level instant detection
1334        let envelope: SdiEnvelope = serde_json::from_str(json).unwrap();
1335        assert!(has_instant_columns(
1336            envelope.dd_object.se_private_data.as_deref().unwrap_or("")
1337        ));
1338    }
1339
1340    #[test]
1341    fn test_collation_name() {
1342        assert_eq!(collation_name(255), Some("utf8mb4_0900_ai_ci"));
1343        assert_eq!(collation_name(63), Some("binary"));
1344        assert_eq!(collation_name(45), Some("utf8mb4_general_ci"));
1345        assert_eq!(collation_name(46), Some("utf8mb4_bin"));
1346        assert_eq!(collation_name(33), Some("utf8mb3_general_ci"));
1347        assert_eq!(collation_name(0), None);
1348    }
1349
1350    #[test]
1351    fn test_charset_from_collation() {
1352        assert_eq!(charset_from_collation(255), Some("utf8mb4"));
1353        assert_eq!(charset_from_collation(63), Some("binary"));
1354        assert_eq!(charset_from_collation(8), Some("latin1"));
1355        assert_eq!(charset_from_collation(33), Some("utf8mb3"));
1356        assert_eq!(charset_from_collation(0), None);
1357    }
1358
1359    #[test]
1360    fn test_row_format_name() {
1361        assert_eq!(row_format_name(1), "FIXED");
1362        assert_eq!(row_format_name(2), "DYNAMIC");
1363        assert_eq!(row_format_name(3), "COMPRESSED");
1364        assert_eq!(row_format_name(99), "UNKNOWN");
1365    }
1366
1367    #[test]
1368    fn test_fk_rule_name() {
1369        assert_eq!(fk_rule_name(0), "NO ACTION");
1370        assert_eq!(fk_rule_name(1), "RESTRICT");
1371        assert_eq!(fk_rule_name(2), "CASCADE");
1372        assert_eq!(fk_rule_name(3), "SET NULL");
1373        assert_eq!(fk_rule_name(4), "SET DEFAULT");
1374    }
1375
1376    #[test]
1377    fn test_dd_type_to_sql_int() {
1378        let col = DdColumn {
1379            dd_type: 4,
1380            numeric_precision: 10,
1381            ..Default::default()
1382        };
1383        assert_eq!(dd_type_to_sql(&col), "int");
1384    }
1385
1386    #[test]
1387    fn test_dd_type_to_sql_varchar() {
1388        let col = DdColumn {
1389            dd_type: 16,
1390            char_length: 400,
1391            collation_id: 255, // utf8mb4 = 4 bytes/char
1392            ..Default::default()
1393        };
1394        assert_eq!(dd_type_to_sql(&col), "varchar(100)");
1395    }
1396
1397    #[test]
1398    fn test_dd_type_to_sql_decimal() {
1399        let col = DdColumn {
1400            dd_type: 6,
1401            numeric_precision: 10,
1402            numeric_scale: 2,
1403            ..Default::default()
1404        };
1405        assert_eq!(dd_type_to_sql(&col), "decimal(10,2)");
1406    }
1407
1408    #[test]
1409    fn test_dd_type_to_sql_text() {
1410        let col = DdColumn {
1411            dd_type: 27,
1412            char_length: 65535,
1413            ..Default::default()
1414        };
1415        assert_eq!(dd_type_to_sql(&col), "text");
1416
1417        let col = DdColumn {
1418            dd_type: 27,
1419            char_length: 16777215,
1420            ..Default::default()
1421        };
1422        assert_eq!(dd_type_to_sql(&col), "mediumtext");
1423    }
1424
1425    #[test]
1426    fn test_format_mysql_version() {
1427        assert_eq!(format_mysql_version(90001), "9.0.1");
1428        assert_eq!(format_mysql_version(80040), "8.0.40");
1429        assert_eq!(format_mysql_version(0), "unknown");
1430    }
1431
1432    #[test]
1433    fn test_extract_schema_from_sdi_minimal() {
1434        let json = r#"{
1435            "mysqld_version_id": 90001,
1436            "dd_object_type": "Table",
1437            "dd_object": {
1438                "name": "users",
1439                "schema_ref": "myapp",
1440                "engine": "InnoDB",
1441                "collation_id": 255,
1442                "row_format": 2,
1443                "columns": [
1444                    {
1445                        "name": "id",
1446                        "type": 4,
1447                        "column_type_utf8": "int unsigned",
1448                        "ordinal_position": 1,
1449                        "hidden": 1,
1450                        "is_nullable": false,
1451                        "is_auto_increment": true,
1452                        "has_no_default": true
1453                    },
1454                    {
1455                        "name": "email",
1456                        "type": 16,
1457                        "column_type_utf8": "varchar(255)",
1458                        "ordinal_position": 2,
1459                        "hidden": 1,
1460                        "is_nullable": false,
1461                        "has_no_default": true
1462                    },
1463                    {
1464                        "name": "DB_TRX_ID",
1465                        "type": 10,
1466                        "ordinal_position": 3,
1467                        "hidden": 2
1468                    },
1469                    {
1470                        "name": "DB_ROLL_PTR",
1471                        "type": 9,
1472                        "ordinal_position": 4,
1473                        "hidden": 2
1474                    }
1475                ],
1476                "indexes": [
1477                    {
1478                        "name": "PRIMARY",
1479                        "type": 1,
1480                        "hidden": false,
1481                        "is_visible": true,
1482                        "elements": [
1483                            { "column_opx": 0, "hidden": false, "length": 4, "order": 2 },
1484                            { "column_opx": 2, "hidden": true, "length": 4294967295, "order": 2 },
1485                            { "column_opx": 3, "hidden": true, "length": 4294967295, "order": 2 }
1486                        ]
1487                    },
1488                    {
1489                        "name": "idx_email",
1490                        "type": 2,
1491                        "hidden": false,
1492                        "is_visible": true,
1493                        "elements": [
1494                            { "column_opx": 1, "hidden": false, "length": 4294967295, "order": 2 },
1495                            { "column_opx": 0, "hidden": true, "length": 4294967295, "order": 2 }
1496                        ]
1497                    }
1498                ],
1499                "foreign_keys": []
1500            }
1501        }"#;
1502
1503        let schema = extract_schema_from_sdi(json).unwrap();
1504        assert_eq!(schema.table_name, "users");
1505        assert_eq!(schema.schema_name, Some("myapp".to_string()));
1506        assert_eq!(schema.engine, "InnoDB");
1507        assert_eq!(schema.source, "sdi");
1508        assert_eq!(schema.columns.len(), 2);
1509        assert_eq!(schema.columns[0].name, "id");
1510        assert_eq!(schema.columns[0].column_type, "int unsigned");
1511        assert!(schema.columns[0].is_auto_increment);
1512        assert_eq!(schema.columns[1].name, "email");
1513        assert_eq!(schema.columns[1].column_type, "varchar(255)");
1514        assert_eq!(schema.indexes.len(), 2);
1515        assert_eq!(schema.indexes[0].index_type, "PRIMARY KEY");
1516        assert_eq!(schema.indexes[0].columns.len(), 1); // hidden elements filtered
1517        assert_eq!(schema.indexes[1].index_type, "UNIQUE KEY");
1518        assert_eq!(schema.indexes[1].name, "idx_email");
1519        assert!(schema.ddl.contains("CREATE TABLE `users`"));
1520        assert!(schema
1521            .ddl
1522            .contains("`id` int unsigned NOT NULL AUTO_INCREMENT"));
1523        assert!(schema.ddl.contains("PRIMARY KEY (`id`)"));
1524        assert!(schema.ddl.contains("UNIQUE KEY `idx_email` (`email`)"));
1525    }
1526
1527    #[test]
1528    fn test_extract_schema_with_fk() {
1529        let json = r#"{
1530            "mysqld_version_id": 80040,
1531            "dd_object_type": "Table",
1532            "dd_object": {
1533                "name": "orders",
1534                "schema_ref": "shop",
1535                "engine": "InnoDB",
1536                "collation_id": 255,
1537                "row_format": 2,
1538                "columns": [
1539                    {
1540                        "name": "id",
1541                        "type": 4,
1542                        "column_type_utf8": "int",
1543                        "ordinal_position": 1,
1544                        "hidden": 1,
1545                        "is_nullable": false,
1546                        "is_auto_increment": true
1547                    },
1548                    {
1549                        "name": "user_id",
1550                        "type": 4,
1551                        "column_type_utf8": "int",
1552                        "ordinal_position": 2,
1553                        "hidden": 1,
1554                        "is_nullable": false
1555                    }
1556                ],
1557                "indexes": [
1558                    {
1559                        "name": "PRIMARY",
1560                        "type": 1,
1561                        "hidden": false,
1562                        "is_visible": true,
1563                        "elements": [
1564                            { "column_opx": 0, "hidden": false, "length": 4, "order": 2 }
1565                        ]
1566                    }
1567                ],
1568                "foreign_keys": [
1569                    {
1570                        "name": "fk_orders_user",
1571                        "referenced_table_schema_name": "shop",
1572                        "referenced_table_name": "users",
1573                        "update_rule": 0,
1574                        "delete_rule": 2,
1575                        "elements": [
1576                            { "column_opx": 1, "referenced_column_name": "id" }
1577                        ]
1578                    }
1579                ]
1580            }
1581        }"#;
1582
1583        let schema = extract_schema_from_sdi(json).unwrap();
1584        assert_eq!(schema.foreign_keys.len(), 1);
1585        let fk = &schema.foreign_keys[0];
1586        assert_eq!(fk.name, "fk_orders_user");
1587        assert_eq!(fk.columns, vec!["user_id"]);
1588        assert_eq!(fk.referenced_table, "`shop`.`users`");
1589        assert_eq!(fk.referenced_columns, vec!["id"]);
1590        assert_eq!(fk.on_delete, "CASCADE");
1591        assert_eq!(fk.on_update, "NO ACTION");
1592        assert!(schema.ddl.contains("CONSTRAINT `fk_orders_user` FOREIGN KEY (`user_id`) REFERENCES `shop`.`users` (`id`) ON DELETE CASCADE"));
1593    }
1594
1595    #[test]
1596    fn test_extract_schema_with_generated_column() {
1597        let json = r#"{
1598            "mysqld_version_id": 80040,
1599            "dd_object_type": "Table",
1600            "dd_object": {
1601                "name": "products",
1602                "schema_ref": "shop",
1603                "engine": "InnoDB",
1604                "collation_id": 255,
1605                "row_format": 2,
1606                "columns": [
1607                    {
1608                        "name": "price",
1609                        "type": 6,
1610                        "column_type_utf8": "decimal(10,2)",
1611                        "ordinal_position": 1,
1612                        "hidden": 1,
1613                        "is_nullable": false
1614                    },
1615                    {
1616                        "name": "tax",
1617                        "type": 6,
1618                        "column_type_utf8": "decimal(10,2)",
1619                        "ordinal_position": 2,
1620                        "hidden": 1,
1621                        "is_nullable": true,
1622                        "is_virtual": true,
1623                        "generation_expression_utf8": "`price` * 0.1"
1624                    }
1625                ],
1626                "indexes": [],
1627                "foreign_keys": []
1628            }
1629        }"#;
1630
1631        let schema = extract_schema_from_sdi(json).unwrap();
1632        assert_eq!(schema.columns.len(), 2);
1633        let tax = &schema.columns[1];
1634        assert_eq!(tax.generation_expression, Some("`price` * 0.1".to_string()));
1635        assert_eq!(tax.is_virtual, Some(true));
1636        assert!(schema
1637            .ddl
1638            .contains("GENERATED ALWAYS AS (`price` * 0.1) VIRTUAL"));
1639    }
1640
1641    #[test]
1642    fn test_ddl_generation_table_options() {
1643        let schema = TableSchema {
1644            schema_name: Some("mydb".to_string()),
1645            table_name: "test".to_string(),
1646            engine: "InnoDB".to_string(),
1647            row_format: Some("COMPRESSED".to_string()),
1648            collation: Some("utf8mb4_0900_ai_ci".to_string()),
1649            charset: Some("utf8mb4".to_string()),
1650            comment: None,
1651            mysql_version: Some("8.0.40".to_string()),
1652            source: "sdi".to_string(),
1653            columns: vec![ColumnDef {
1654                name: "id".to_string(),
1655                column_type: "int".to_string(),
1656                is_nullable: false,
1657                default_value: None,
1658                is_auto_increment: true,
1659                generation_expression: None,
1660                is_invisible: false,
1661                is_virtual: None,
1662                comment: None,
1663                version_added: None,
1664                version_dropped: None,
1665            }],
1666            indexes: vec![IndexDef {
1667                name: "PRIMARY".to_string(),
1668                index_type: "PRIMARY KEY".to_string(),
1669                columns: vec![IndexColumnDef {
1670                    name: "id".to_string(),
1671                    prefix_length: None,
1672                    order: None,
1673                }],
1674                comment: None,
1675                is_visible: true,
1676            }],
1677            foreign_keys: vec![],
1678            ddl: String::new(),
1679        };
1680
1681        let ddl = generate_ddl(&schema);
1682        assert!(ddl.contains("ENGINE=InnoDB"));
1683        assert!(ddl.contains("DEFAULT CHARSET=utf8mb4"));
1684        assert!(ddl.contains("COLLATE=utf8mb4_0900_ai_ci"));
1685        assert!(ddl.contains("ROW_FORMAT=COMPRESSED"));
1686    }
1687
1688    #[test]
1689    fn test_hidden_column_filtering() {
1690        let json = r#"{
1691            "mysqld_version_id": 90001,
1692            "dd_object_type": "Table",
1693            "dd_object": {
1694                "name": "t",
1695                "engine": "InnoDB",
1696                "collation_id": 255,
1697                "row_format": 2,
1698                "columns": [
1699                    { "name": "a", "type": 4, "column_type_utf8": "int", "ordinal_position": 1, "hidden": 1 },
1700                    { "name": "b", "type": 4, "column_type_utf8": "int", "ordinal_position": 2, "hidden": 1 },
1701                    { "name": "DB_TRX_ID", "type": 10, "ordinal_position": 3, "hidden": 2 },
1702                    { "name": "DB_ROLL_PTR", "type": 9, "ordinal_position": 4, "hidden": 2 },
1703                    { "name": "DB_ROW_ID", "type": 10, "ordinal_position": 5, "hidden": 2 }
1704                ],
1705                "indexes": [],
1706                "foreign_keys": []
1707            }
1708        }"#;
1709
1710        let schema = extract_schema_from_sdi(json).unwrap();
1711        assert_eq!(schema.columns.len(), 2);
1712        assert_eq!(schema.columns[0].name, "a");
1713        assert_eq!(schema.columns[1].name, "b");
1714    }
1715
1716    #[test]
1717    fn test_nullable_column_default_null() {
1718        let json = r#"{
1719            "mysqld_version_id": 90001,
1720            "dd_object_type": "Table",
1721            "dd_object": {
1722                "name": "t",
1723                "engine": "InnoDB",
1724                "collation_id": 255,
1725                "row_format": 2,
1726                "columns": [
1727                    {
1728                        "name": "notes",
1729                        "type": 16,
1730                        "column_type_utf8": "varchar(255)",
1731                        "ordinal_position": 1,
1732                        "hidden": 1,
1733                        "is_nullable": true,
1734                        "has_no_default": false,
1735                        "default_value_utf8": "",
1736                        "default_value_utf8_null": true
1737                    }
1738                ],
1739                "indexes": [],
1740                "foreign_keys": []
1741            }
1742        }"#;
1743
1744        let schema = extract_schema_from_sdi(json).unwrap();
1745        assert_eq!(schema.columns[0].default_value, Some("NULL".to_string()));
1746        assert!(schema.ddl.contains("DEFAULT NULL"));
1747    }
1748
1749    #[test]
1750    fn test_empty_string_default() {
1751        let json = r#"{
1752            "mysqld_version_id": 90001,
1753            "dd_object_type": "Table",
1754            "dd_object": {
1755                "name": "t",
1756                "engine": "InnoDB",
1757                "collation_id": 255,
1758                "row_format": 2,
1759                "columns": [
1760                    {
1761                        "name": "tag",
1762                        "type": 16,
1763                        "column_type_utf8": "varchar(50)",
1764                        "ordinal_position": 1,
1765                        "hidden": 1,
1766                        "is_nullable": false,
1767                        "has_no_default": false,
1768                        "default_value_utf8": "",
1769                        "default_value_utf8_null": false
1770                    }
1771                ],
1772                "indexes": [],
1773                "foreign_keys": []
1774            }
1775        }"#;
1776
1777        let schema = extract_schema_from_sdi(json).unwrap();
1778        assert_eq!(schema.columns[0].default_value, Some("''".to_string()));
1779        assert!(schema.ddl.contains("DEFAULT ''"));
1780    }
1781
1782    #[test]
1783    fn test_invisible_column() {
1784        let json = r#"{
1785            "mysqld_version_id": 80040,
1786            "dd_object_type": "Table",
1787            "dd_object": {
1788                "name": "t",
1789                "engine": "InnoDB",
1790                "collation_id": 255,
1791                "row_format": 2,
1792                "columns": [
1793                    {
1794                        "name": "id",
1795                        "type": 4,
1796                        "column_type_utf8": "int",
1797                        "ordinal_position": 1,
1798                        "hidden": 1,
1799                        "is_nullable": false,
1800                        "is_auto_increment": true,
1801                        "has_no_default": true
1802                    },
1803                    {
1804                        "name": "secret",
1805                        "type": 16,
1806                        "column_type_utf8": "varchar(100)",
1807                        "ordinal_position": 2,
1808                        "hidden": 4,
1809                        "is_nullable": true,
1810                        "has_no_default": false,
1811                        "default_value_utf8_null": true
1812                    },
1813                    {
1814                        "name": "DB_TRX_ID",
1815                        "type": 10,
1816                        "column_type_utf8": "",
1817                        "ordinal_position": 3,
1818                        "hidden": 2
1819                    }
1820                ],
1821                "indexes": [],
1822                "foreign_keys": []
1823            }
1824        }"#;
1825
1826        let schema = extract_schema_from_sdi(json).unwrap();
1827        // Should include both visible (hidden=1) and invisible (hidden=4), skip SE-hidden (hidden=2)
1828        assert_eq!(schema.columns.len(), 2);
1829        assert_eq!(schema.columns[0].name, "id");
1830        assert!(!schema.columns[0].is_invisible);
1831        assert_eq!(schema.columns[1].name, "secret");
1832        assert!(schema.columns[1].is_invisible);
1833        // DDL should contain INVISIBLE marker
1834        assert!(schema.ddl.contains("/*!80023 INVISIBLE */"));
1835        // DDL should not contain the SE-hidden column
1836        assert!(!schema.ddl.contains("DB_TRX_ID"));
1837    }
1838
1839    #[test]
1840    fn test_index_desc_order() {
1841        let json = r#"{
1842            "mysqld_version_id": 80040,
1843            "dd_object_type": "Table",
1844            "dd_object": {
1845                "name": "t",
1846                "engine": "InnoDB",
1847                "collation_id": 255,
1848                "row_format": 2,
1849                "columns": [
1850                    { "name": "a", "type": 4, "column_type_utf8": "int", "ordinal_position": 1, "hidden": 1 },
1851                    { "name": "b", "type": 4, "column_type_utf8": "int", "ordinal_position": 2, "hidden": 1 }
1852                ],
1853                "indexes": [
1854                    {
1855                        "name": "idx_b_desc",
1856                        "type": 3,
1857                        "hidden": false,
1858                        "is_visible": true,
1859                        "elements": [
1860                            { "column_opx": 1, "hidden": false, "length": 4294967295, "order": 1 }
1861                        ]
1862                    }
1863                ],
1864                "foreign_keys": []
1865            }
1866        }"#;
1867
1868        let schema = extract_schema_from_sdi(json).unwrap();
1869        assert_eq!(schema.indexes[0].columns[0].order, Some("DESC".to_string()));
1870        assert!(schema.ddl.contains("`b` DESC"));
1871    }
1872}