Skip to main content

mdql_core/
loader.rs

1//! Orchestrate loading a table folder into validated rows.
2
3use std::collections::HashMap;
4use std::path::Path;
5
6use rayon::prelude::*;
7
8use crate::database::{DatabaseConfig, load_database_config};
9use crate::errors::ValidationError;
10use crate::model::{Row, to_row};
11use crate::parser::parse_file;
12use crate::schema::{MDQL_FILENAME, Schema, load_schema};
13use crate::validator::validate_file;
14
15/// Load all markdown files in a folder, validate, and return rows.
16pub fn load_table(
17    folder: &Path,
18) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
19    let schema = load_schema(folder)?;
20    let (rows, errors) = load_md_files(folder, &schema, None)?;
21    Ok((schema, rows, errors))
22}
23
24/// Load with an optional mtime-based cache. Unchanged files are served from cache.
25pub fn load_table_cached(
26    folder: &Path,
27    cache: &mut crate::cache::TableCache,
28) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
29    let schema = load_schema(folder)?;
30    let (rows, errors) = load_md_files(folder, &schema, Some(cache))?;
31    Ok((schema, rows, errors))
32}
33
34fn load_md_files(
35    folder: &Path,
36    schema: &Schema,
37    mut cache: Option<&mut crate::cache::TableCache>,
38) -> crate::errors::Result<(Vec<Row>, Vec<ValidationError>)> {
39    let mut md_files: Vec<_> = std::fs::read_dir(folder)?
40        .filter_map(|e| e.ok())
41        .filter(|e| {
42            let name = e.file_name();
43            let name_str = name.to_string_lossy();
44            name_str.ends_with(".md") && name_str != MDQL_FILENAME
45        })
46        .map(|e| e.path())
47        .collect();
48    md_files.sort();
49
50    // If cache is fresh (dir mtime unchanged), try to serve all from cache
51    if let Some(ref cache) = cache {
52        if !cache.is_stale(folder) {
53            let mut rows = Vec::new();
54            let mut all_cached = true;
55            for md_file in &md_files {
56                let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
57                if let Some(mtime) = crate::cache::file_mtime(md_file) {
58                    if let Some(row) = cache.get(&rel, mtime) {
59                        rows.push(row.clone());
60                        continue;
61                    }
62                }
63                all_cached = false;
64                break;
65            }
66            if all_cached {
67                return Ok((rows, Vec::new()));
68            }
69        }
70    }
71
72    // Parse (possibly in parallel)
73    let results: Vec<_> = md_files
74        .par_iter()
75        .map(|md_file| {
76            let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
77            let parsed = parse_file(
78                md_file,
79                Some(folder),
80                schema.rules.normalize_numbered_headings,
81            );
82            match parsed {
83                Ok(p) => {
84                    let errors = validate_file(&p, schema);
85                    let has_blocking_errors = errors.iter().any(|e| {
86                        e.error_type != crate::errors::ValidationErrorKind::LooseBody
87                    });
88                    if has_blocking_errors {
89                        (None, errors)
90                    } else {
91                        let row = to_row(&p, schema);
92                        let mtime = crate::cache::file_mtime(md_file);
93                        (Some((rel, row, mtime)), errors)
94                    }
95                }
96                Err(e) => {
97                    let ve = ValidationError {
98                        file_path: md_file.to_string_lossy().to_string(),
99                        error_type: crate::errors::ValidationErrorKind::ParseError,
100                        field: None,
101                        message: e.to_string(),
102                        line_number: None,
103                    };
104                    (None, vec![ve])
105                }
106            }
107        })
108        .collect();
109
110    let checksums = crate::checksums::load_checksums(folder);
111
112    let mut rows = Vec::new();
113    let mut all_errors = Vec::new();
114    for (row_opt, errors) in results {
115        all_errors.extend(errors);
116        if let Some((rel, mut row, mtime)) = row_opt {
117            if let Some(ref cs) = checksums {
118                let matches = cs.files.get(&rel).map(|expected| {
119                    let file_path = folder.join(&rel);
120                    std::fs::read(&file_path)
121                        .map(|content| crate::checksums::hash_content(&content) == *expected)
122                        .unwrap_or(false)
123                });
124                match matches {
125                    Some(true) => {}
126                    Some(false) | None => {
127                        row.insert(
128                            "_modified_externally".to_string(),
129                            crate::model::Value::Bool(true),
130                        );
131                    }
132                }
133            }
134            if let Some(ref mut c) = cache {
135                if let Some(mt) = mtime {
136                    c.put(rel, mt, row.clone());
137                }
138            }
139            rows.push(row);
140        }
141    }
142
143    if let Some(c) = cache {
144        c.set_table_mtime(folder);
145    }
146
147    Ok((rows, all_errors))
148}
149
150/// Load a multi-table database directory.
151pub fn load_database(
152    db_dir: &Path,
153) -> crate::errors::Result<(
154    DatabaseConfig,
155    HashMap<String, (Schema, Vec<Row>)>,
156    Vec<ValidationError>,
157)> {
158    let db_config = load_database_config(db_dir)?;
159
160    let mut tables: HashMap<String, (Schema, Vec<Row>)> = HashMap::new();
161    let mut all_errors = Vec::new();
162
163    let mut children: Vec<_> = std::fs::read_dir(db_dir)?
164        .filter_map(|e| e.ok())
165        .map(|e| e.path())
166        .filter(|p| p.is_dir() && p.join(MDQL_FILENAME).exists())
167        .collect();
168    children.sort();
169
170    for child in children {
171        let (schema, rows, errors) = load_table(&child)?;
172        tables.insert(schema.table.clone(), (schema, rows));
173        all_errors.extend(errors);
174    }
175
176    // Validate foreign key constraints across all tables
177    let fk_errors = crate::validator::validate_foreign_keys(&db_config, &tables);
178    all_errors.extend(fk_errors);
179
180    // Materialize views
181    for view_def in &db_config.views {
182        if tables.contains_key(&view_def.name) {
183            all_errors.push(ValidationError {
184                file_path: MDQL_FILENAME.to_string(),
185                error_type: crate::errors::ValidationErrorKind::ViewError,
186                field: Some(view_def.name.clone()),
187                message: format!(
188                    "View '{}' conflicts with existing table name",
189                    view_def.name
190                ),
191                line_number: None,
192            });
193            continue;
194        }
195
196        match materialize_view(view_def, &tables) {
197            Ok((schema, rows)) => {
198                tables.insert(view_def.name.clone(), (schema, rows));
199            }
200            Err(e) => {
201                all_errors.push(ValidationError {
202                    file_path: MDQL_FILENAME.to_string(),
203                    error_type: crate::errors::ValidationErrorKind::ViewError,
204                    field: Some(view_def.name.clone()),
205                    message: format!("View '{}': {}", view_def.name, e),
206                    line_number: None,
207                });
208            }
209        }
210    }
211
212    Ok((db_config, tables, all_errors))
213}
214
215pub fn materialize_view(
216    view_def: &crate::database::ViewDef,
217    tables: &HashMap<String, (Schema, Vec<crate::model::Row>)>,
218) -> crate::errors::Result<(Schema, Vec<crate::model::Row>)> {
219    use crate::query_parser::{Statement, parse_query};
220
221    let stmt = parse_query(&view_def.query)?;
222    let select = match stmt {
223        Statement::Select(q) => q,
224        _ => {
225            return Err(crate::errors::MdqlError::QueryExecution(
226                "View query must be a SELECT statement".into(),
227            ))
228        }
229    };
230
231    let (rows, columns) = if let Some(ref sub) = select.subquery {
232        let (sub_schema, sub_table_rows) = tables.get(&sub.table).ok_or_else(|| {
233            crate::errors::MdqlError::QueryExecution(format!(
234                "table '{}' not found in database",
235                sub.table
236            ))
237        })?;
238        crate::query_engine::execute_query(&select, sub_table_rows, sub_schema)?
239    } else if !select.joins.is_empty() {
240        crate::query_engine::execute_join_query(&select, tables)?
241    } else {
242        let (schema, table_rows) = tables.get(&select.table).ok_or_else(|| {
243            crate::errors::MdqlError::QueryExecution(format!(
244                "table '{}' not found in database",
245                select.table
246            ))
247        })?;
248        crate::query_engine::execute_query(&select, table_rows, schema)?
249    };
250
251    let schema = build_view_schema(&view_def.name, &columns, &rows);
252    Ok((schema, rows))
253}
254
255fn build_view_schema(
256    name: &str,
257    columns: &[String],
258    rows: &[crate::model::Row],
259) -> Schema {
260    use crate::schema::*;
261    use indexmap::IndexMap;
262
263    let mut frontmatter = IndexMap::new();
264    for col in columns {
265        if col == "path" || col == "h1" || col == "created" || col == "modified" {
266            continue;
267        }
268        let field_type = rows
269            .iter()
270            .find_map(|r| r.get(col))
271            .map(|v| match v {
272                crate::model::Value::Int(_) => FieldType::Int,
273                crate::model::Value::Float(_) => FieldType::Float,
274                crate::model::Value::Bool(_) => FieldType::Bool,
275                crate::model::Value::Date(_) => FieldType::Date,
276                crate::model::Value::DateTime(_) => FieldType::DateTime,
277                crate::model::Value::List(_) => FieldType::StringArray,
278                crate::model::Value::Dict(_) => FieldType::Dict,
279                _ => FieldType::String,
280            })
281            .unwrap_or(FieldType::String);
282
283        frontmatter.insert(
284            col.clone(),
285            FieldDef {
286                field_type,
287                required: false,
288                enum_values: None,
289            },
290        );
291    }
292
293    Schema {
294        table: name.to_string(),
295        primary_key: "path".to_string(),
296        frontmatter,
297        h1_required: false,
298        h1_must_equal_frontmatter: None,
299        sections: IndexMap::new(),
300        rules: Rules {
301            reject_unknown_frontmatter: false,
302            reject_unknown_sections: false,
303            reject_duplicate_sections: false,
304            normalize_numbered_headings: false,
305        },
306    }
307}