Skip to main content

mdql_core/
loader.rs

1//! Orchestrate loading a table folder into validated rows.
2
3use std::collections::HashMap;
4use std::path::Path;
5
6use rayon::prelude::*;
7
8use crate::database::{DatabaseConfig, load_database_config};
9use crate::errors::ValidationError;
10use crate::model::{Row, to_row};
11use crate::parser::parse_file;
12use crate::schema::{MDQL_FILENAME, Schema, load_schema};
13use crate::validator::validate_file;
14
15/// Load all markdown files in a folder, validate, and return rows.
16pub fn load_table(
17    folder: &Path,
18) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
19    let schema = load_schema(folder)?;
20    let (rows, errors) = load_md_files(folder, &schema, None)?;
21    Ok((schema, rows, errors))
22}
23
24/// Load with an optional mtime-based cache. Unchanged files are served from cache.
25pub fn load_table_cached(
26    folder: &Path,
27    cache: &mut crate::cache::TableCache,
28) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
29    let schema = load_schema(folder)?;
30    let (rows, errors) = load_md_files(folder, &schema, Some(cache))?;
31    Ok((schema, rows, errors))
32}
33
34fn load_md_files(
35    folder: &Path,
36    schema: &Schema,
37    mut cache: Option<&mut crate::cache::TableCache>,
38) -> crate::errors::Result<(Vec<Row>, Vec<ValidationError>)> {
39    let mut md_files: Vec<_> = std::fs::read_dir(folder)?
40        .filter_map(|e| e.ok())
41        .filter(|e| {
42            let name = e.file_name();
43            let name_str = name.to_string_lossy();
44            name_str.ends_with(".md") && name_str != MDQL_FILENAME
45        })
46        .map(|e| e.path())
47        .collect();
48    md_files.sort();
49
50    // If cache is fresh (dir mtime unchanged), try to serve all from cache
51    if let Some(ref cache) = cache {
52        if !cache.is_stale(folder) {
53            let mut rows = Vec::new();
54            let mut all_cached = true;
55            for md_file in &md_files {
56                let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
57                if let Some(mtime) = crate::cache::file_mtime(md_file) {
58                    if let Some(row) = cache.get(&rel, mtime) {
59                        rows.push(row.clone());
60                        continue;
61                    }
62                }
63                all_cached = false;
64                break;
65            }
66            if all_cached {
67                return Ok((rows, Vec::new()));
68            }
69        }
70    }
71
72    // Parse (possibly in parallel)
73    let results: Vec<_> = md_files
74        .par_iter()
75        .map(|md_file| {
76            let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
77            let parsed = parse_file(
78                md_file,
79                Some(folder),
80                schema.rules.normalize_numbered_headings,
81            );
82            match parsed {
83                Ok(p) => {
84                    let errors = validate_file(&p, schema);
85                    if errors.is_empty() {
86                        let row = to_row(&p, schema);
87                        let mtime = crate::cache::file_mtime(md_file);
88                        (Some((rel, row, mtime)), errors)
89                    } else {
90                        (None, errors)
91                    }
92                }
93                Err(e) => {
94                    let ve = ValidationError {
95                        file_path: md_file.to_string_lossy().to_string(),
96                        error_type: "parse_error".to_string(),
97                        field: None,
98                        message: e.to_string(),
99                        line_number: None,
100                    };
101                    (None, vec![ve])
102                }
103            }
104        })
105        .collect();
106
107    let mut rows = Vec::new();
108    let mut all_errors = Vec::new();
109    for (row_opt, errors) in results {
110        all_errors.extend(errors);
111        if let Some((rel, row, mtime)) = row_opt {
112            if let Some(ref mut c) = cache {
113                if let Some(mt) = mtime {
114                    c.put(rel, mt, row.clone());
115                }
116            }
117            rows.push(row);
118        }
119    }
120
121    if let Some(c) = cache {
122        c.set_table_mtime(folder);
123    }
124
125    Ok((rows, all_errors))
126}
127
128/// Load a multi-table database directory.
129pub fn load_database(
130    db_dir: &Path,
131) -> crate::errors::Result<(
132    DatabaseConfig,
133    HashMap<String, (Schema, Vec<Row>)>,
134    Vec<ValidationError>,
135)> {
136    let db_config = load_database_config(db_dir)?;
137
138    let mut tables: HashMap<String, (Schema, Vec<Row>)> = HashMap::new();
139    let mut all_errors = Vec::new();
140
141    let mut children: Vec<_> = std::fs::read_dir(db_dir)?
142        .filter_map(|e| e.ok())
143        .map(|e| e.path())
144        .filter(|p| p.is_dir() && p.join(MDQL_FILENAME).exists())
145        .collect();
146    children.sort();
147
148    for child in children {
149        let (schema, rows, errors) = load_table(&child)?;
150        tables.insert(schema.table.clone(), (schema, rows));
151        all_errors.extend(errors);
152    }
153
154    // Validate foreign key constraints across all tables
155    let fk_errors = crate::validator::validate_foreign_keys(&db_config, &tables);
156    all_errors.extend(fk_errors);
157
158    // Materialize views
159    for view_def in &db_config.views {
160        if tables.contains_key(&view_def.name) {
161            all_errors.push(ValidationError {
162                file_path: MDQL_FILENAME.to_string(),
163                error_type: "view_error".to_string(),
164                field: Some(view_def.name.clone()),
165                message: format!(
166                    "View '{}' conflicts with existing table name",
167                    view_def.name
168                ),
169                line_number: None,
170            });
171            continue;
172        }
173
174        match materialize_view(view_def, &tables) {
175            Ok((schema, rows)) => {
176                tables.insert(view_def.name.clone(), (schema, rows));
177            }
178            Err(e) => {
179                all_errors.push(ValidationError {
180                    file_path: MDQL_FILENAME.to_string(),
181                    error_type: "view_error".to_string(),
182                    field: Some(view_def.name.clone()),
183                    message: format!("View '{}': {}", view_def.name, e),
184                    line_number: None,
185                });
186            }
187        }
188    }
189
190    Ok((db_config, tables, all_errors))
191}
192
193pub fn materialize_view(
194    view_def: &crate::database::ViewDef,
195    tables: &HashMap<String, (Schema, Vec<crate::model::Row>)>,
196) -> crate::errors::Result<(Schema, Vec<crate::model::Row>)> {
197    use crate::query_parser::{Statement, parse_query};
198
199    let stmt = parse_query(&view_def.query)?;
200    let select = match stmt {
201        Statement::Select(q) => q,
202        _ => {
203            return Err(crate::errors::MdqlError::QueryExecution(
204                "View query must be a SELECT statement".into(),
205            ))
206        }
207    };
208
209    let (rows, columns) = if !select.joins.is_empty() {
210        crate::query_engine::execute_join_query(&select, tables)?
211    } else {
212        let (schema, table_rows) = tables.get(&select.table).ok_or_else(|| {
213            crate::errors::MdqlError::QueryExecution(format!(
214                "table '{}' not found in database",
215                select.table
216            ))
217        })?;
218        crate::query_engine::execute_query(&select, table_rows, schema)?
219    };
220
221    let schema = build_view_schema(&view_def.name, &columns, &rows);
222    Ok((schema, rows))
223}
224
225fn build_view_schema(
226    name: &str,
227    columns: &[String],
228    rows: &[crate::model::Row],
229) -> Schema {
230    use crate::schema::*;
231    use indexmap::IndexMap;
232
233    let mut frontmatter = IndexMap::new();
234    for col in columns {
235        if col == "path" || col == "h1" || col == "created" || col == "modified" {
236            continue;
237        }
238        let field_type = rows
239            .iter()
240            .find_map(|r| r.get(col))
241            .map(|v| match v {
242                crate::model::Value::Int(_) => FieldType::Int,
243                crate::model::Value::Float(_) => FieldType::Float,
244                crate::model::Value::Bool(_) => FieldType::Bool,
245                crate::model::Value::Date(_) => FieldType::Date,
246                crate::model::Value::DateTime(_) => FieldType::DateTime,
247                crate::model::Value::List(_) => FieldType::StringArray,
248                crate::model::Value::Dict(_) => FieldType::Dict,
249                _ => FieldType::String,
250            })
251            .unwrap_or(FieldType::String);
252
253        frontmatter.insert(
254            col.clone(),
255            FieldDef {
256                field_type,
257                required: false,
258                enum_values: None,
259            },
260        );
261    }
262
263    Schema {
264        table: name.to_string(),
265        primary_key: "path".to_string(),
266        frontmatter,
267        h1_required: false,
268        h1_must_equal_frontmatter: None,
269        sections: IndexMap::new(),
270        rules: Rules {
271            reject_unknown_frontmatter: false,
272            reject_unknown_sections: false,
273            reject_duplicate_sections: false,
274            normalize_numbered_headings: false,
275        },
276    }
277}