Skip to main content

mdql_core/
loader.rs

1//! Orchestrate loading a table folder into validated rows.
2
3use std::collections::HashMap;
4use std::path::Path;
5
6use rayon::prelude::*;
7
8use crate::database::{DatabaseConfig, load_database_config};
9use crate::errors::ValidationError;
10use crate::model::{Row, to_row};
11use crate::parser::parse_file;
12use crate::schema::{MDQL_FILENAME, Schema, load_schema};
13use crate::validator::validate_file;
14
15/// Load all markdown files in a folder, validate, and return rows.
16pub fn load_table(
17    folder: &Path,
18) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
19    let schema = load_schema(folder)?;
20    let (rows, errors) = load_md_files(folder, &schema, None)?;
21    Ok((schema, rows, errors))
22}
23
24/// Load with an optional mtime-based cache. Unchanged files are served from cache.
25pub fn load_table_cached(
26    folder: &Path,
27    cache: &mut crate::cache::TableCache,
28) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
29    let schema = load_schema(folder)?;
30    let (rows, errors) = load_md_files(folder, &schema, Some(cache))?;
31    Ok((schema, rows, errors))
32}
33
34fn load_md_files(
35    folder: &Path,
36    schema: &Schema,
37    mut cache: Option<&mut crate::cache::TableCache>,
38) -> crate::errors::Result<(Vec<Row>, Vec<ValidationError>)> {
39    let mut md_files: Vec<_> = std::fs::read_dir(folder)?
40        .filter_map(|e| e.ok())
41        .filter(|e| {
42            let name = e.file_name();
43            let name_str = name.to_string_lossy();
44            name_str.ends_with(".md") && name_str != MDQL_FILENAME
45        })
46        .map(|e| e.path())
47        .collect();
48    md_files.sort();
49
50    // If cache is fresh (dir mtime unchanged), try to serve all from cache
51    if let Some(ref cache) = cache {
52        if !cache.is_stale(folder) {
53            let mut rows = Vec::new();
54            let mut all_cached = true;
55            for md_file in &md_files {
56                let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
57                if let Some(mtime) = crate::cache::file_mtime(md_file) {
58                    if let Some(row) = cache.get(&rel, mtime) {
59                        rows.push(row.clone());
60                        continue;
61                    }
62                }
63                all_cached = false;
64                break;
65            }
66            if all_cached {
67                return Ok((rows, Vec::new()));
68            }
69        }
70    }
71
72    // Parse (possibly in parallel)
73    let results: Vec<_> = md_files
74        .par_iter()
75        .map(|md_file| {
76            let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
77            let parsed = parse_file(
78                md_file,
79                Some(folder),
80                schema.rules.normalize_numbered_headings,
81            );
82            match parsed {
83                Ok(p) => {
84                    let errors = validate_file(&p, schema);
85                    if errors.is_empty() {
86                        let row = to_row(&p, schema);
87                        let mtime = crate::cache::file_mtime(md_file);
88                        (Some((rel, row, mtime)), errors)
89                    } else {
90                        (None, errors)
91                    }
92                }
93                Err(e) => {
94                    let ve = ValidationError {
95                        file_path: md_file.to_string_lossy().to_string(),
96                        error_type: crate::errors::ValidationErrorKind::ParseError,
97                        field: None,
98                        message: e.to_string(),
99                        line_number: None,
100                    };
101                    (None, vec![ve])
102                }
103            }
104        })
105        .collect();
106
107    let checksums = crate::checksums::load_checksums(folder);
108
109    let mut rows = Vec::new();
110    let mut all_errors = Vec::new();
111    for (row_opt, errors) in results {
112        all_errors.extend(errors);
113        if let Some((rel, mut row, mtime)) = row_opt {
114            if let Some(ref cs) = checksums {
115                let matches = cs.files.get(&rel).map(|expected| {
116                    let file_path = folder.join(&rel);
117                    std::fs::read(&file_path)
118                        .map(|content| crate::checksums::hash_content(&content) == *expected)
119                        .unwrap_or(false)
120                });
121                match matches {
122                    Some(true) => {}
123                    Some(false) | None => {
124                        row.insert(
125                            "_modified_externally".to_string(),
126                            crate::model::Value::Bool(true),
127                        );
128                    }
129                }
130            }
131            if let Some(ref mut c) = cache {
132                if let Some(mt) = mtime {
133                    c.put(rel, mt, row.clone());
134                }
135            }
136            rows.push(row);
137        }
138    }
139
140    if let Some(c) = cache {
141        c.set_table_mtime(folder);
142    }
143
144    Ok((rows, all_errors))
145}
146
147/// Load a multi-table database directory.
148pub fn load_database(
149    db_dir: &Path,
150) -> crate::errors::Result<(
151    DatabaseConfig,
152    HashMap<String, (Schema, Vec<Row>)>,
153    Vec<ValidationError>,
154)> {
155    let db_config = load_database_config(db_dir)?;
156
157    let mut tables: HashMap<String, (Schema, Vec<Row>)> = HashMap::new();
158    let mut all_errors = Vec::new();
159
160    let mut children: Vec<_> = std::fs::read_dir(db_dir)?
161        .filter_map(|e| e.ok())
162        .map(|e| e.path())
163        .filter(|p| p.is_dir() && p.join(MDQL_FILENAME).exists())
164        .collect();
165    children.sort();
166
167    for child in children {
168        let (schema, rows, errors) = load_table(&child)?;
169        tables.insert(schema.table.clone(), (schema, rows));
170        all_errors.extend(errors);
171    }
172
173    // Validate foreign key constraints across all tables
174    let fk_errors = crate::validator::validate_foreign_keys(&db_config, &tables);
175    all_errors.extend(fk_errors);
176
177    // Materialize views
178    for view_def in &db_config.views {
179        if tables.contains_key(&view_def.name) {
180            all_errors.push(ValidationError {
181                file_path: MDQL_FILENAME.to_string(),
182                error_type: crate::errors::ValidationErrorKind::ViewError,
183                field: Some(view_def.name.clone()),
184                message: format!(
185                    "View '{}' conflicts with existing table name",
186                    view_def.name
187                ),
188                line_number: None,
189            });
190            continue;
191        }
192
193        match materialize_view(view_def, &tables) {
194            Ok((schema, rows)) => {
195                tables.insert(view_def.name.clone(), (schema, rows));
196            }
197            Err(e) => {
198                all_errors.push(ValidationError {
199                    file_path: MDQL_FILENAME.to_string(),
200                    error_type: crate::errors::ValidationErrorKind::ViewError,
201                    field: Some(view_def.name.clone()),
202                    message: format!("View '{}': {}", view_def.name, e),
203                    line_number: None,
204                });
205            }
206        }
207    }
208
209    Ok((db_config, tables, all_errors))
210}
211
212pub fn materialize_view(
213    view_def: &crate::database::ViewDef,
214    tables: &HashMap<String, (Schema, Vec<crate::model::Row>)>,
215) -> crate::errors::Result<(Schema, Vec<crate::model::Row>)> {
216    use crate::query_parser::{Statement, parse_query};
217
218    let stmt = parse_query(&view_def.query)?;
219    let select = match stmt {
220        Statement::Select(q) => q,
221        _ => {
222            return Err(crate::errors::MdqlError::QueryExecution(
223                "View query must be a SELECT statement".into(),
224            ))
225        }
226    };
227
228    let (rows, columns) = if let Some(ref sub) = select.subquery {
229        let (sub_schema, sub_table_rows) = tables.get(&sub.table).ok_or_else(|| {
230            crate::errors::MdqlError::QueryExecution(format!(
231                "table '{}' not found in database",
232                sub.table
233            ))
234        })?;
235        crate::query_engine::execute_query(&select, sub_table_rows, sub_schema)?
236    } else if !select.joins.is_empty() {
237        crate::query_engine::execute_join_query(&select, tables)?
238    } else {
239        let (schema, table_rows) = tables.get(&select.table).ok_or_else(|| {
240            crate::errors::MdqlError::QueryExecution(format!(
241                "table '{}' not found in database",
242                select.table
243            ))
244        })?;
245        crate::query_engine::execute_query(&select, table_rows, schema)?
246    };
247
248    let schema = build_view_schema(&view_def.name, &columns, &rows);
249    Ok((schema, rows))
250}
251
252pub fn build_view_schema(
253    name: &str,
254    columns: &[String],
255    rows: &[crate::model::Row],
256) -> Schema {
257    use crate::schema::*;
258    use indexmap::IndexMap;
259
260    let mut frontmatter = IndexMap::new();
261    for col in columns {
262        if col == "path" || col == "h1" || col == "created" || col == "modified" {
263            continue;
264        }
265        let field_type = rows
266            .iter()
267            .find_map(|r| r.get(col))
268            .map(|v| match v {
269                crate::model::Value::Int(_) => FieldType::Int,
270                crate::model::Value::Float(_) => FieldType::Float,
271                crate::model::Value::Bool(_) => FieldType::Bool,
272                crate::model::Value::Date(_) => FieldType::Date,
273                crate::model::Value::DateTime(_) => FieldType::DateTime,
274                crate::model::Value::List(_) => FieldType::StringArray,
275                crate::model::Value::Dict(_) => FieldType::Dict,
276                _ => FieldType::String,
277            })
278            .unwrap_or(FieldType::String);
279
280        frontmatter.insert(
281            col.clone(),
282            FieldDef {
283                field_type,
284                required: false,
285                enum_values: None,
286            },
287        );
288    }
289
290    Schema {
291        table: name.to_string(),
292        primary_key: "path".to_string(),
293        frontmatter,
294        h1_required: false,
295        sections: IndexMap::new(),
296        rules: Rules {
297            reject_unknown_frontmatter: false,
298            reject_unknown_sections: false,
299            reject_duplicate_sections: false,
300            normalize_numbered_headings: false,
301        },
302    }
303}