Skip to main content

mdql_core/
loader.rs

1//! Orchestrate loading a table folder into validated rows.
2
3use std::collections::HashMap;
4use std::path::Path;
5
6use rayon::prelude::*;
7
8use crate::database::{DatabaseConfig, load_database_config};
9use crate::errors::ValidationError;
10use crate::model::{Row, to_row};
11use crate::parser::parse_file;
12use crate::schema::{MDQL_FILENAME, Schema, load_schema};
13use crate::validator::validate_file;
14
15/// Load all markdown files in a folder, validate, and return rows.
16pub fn load_table(
17    folder: &Path,
18) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
19    let schema = load_schema(folder)?;
20    let (rows, errors) = load_md_files(folder, &schema, None)?;
21    Ok((schema, rows, errors))
22}
23
24/// Load with an optional mtime-based cache. Unchanged files are served from cache.
25pub fn load_table_cached(
26    folder: &Path,
27    cache: &mut crate::cache::TableCache,
28) -> crate::errors::Result<(Schema, Vec<Row>, Vec<ValidationError>)> {
29    let schema = load_schema(folder)?;
30    let (rows, errors) = load_md_files(folder, &schema, Some(cache))?;
31    Ok((schema, rows, errors))
32}
33
34fn load_md_files(
35    folder: &Path,
36    schema: &Schema,
37    mut cache: Option<&mut crate::cache::TableCache>,
38) -> crate::errors::Result<(Vec<Row>, Vec<ValidationError>)> {
39    let mut md_files: Vec<_> = std::fs::read_dir(folder)?
40        .filter_map(|e| e.ok())
41        .filter(|e| {
42            let name = e.file_name();
43            let name_str = name.to_string_lossy();
44            name_str.ends_with(".md") && name_str != MDQL_FILENAME
45        })
46        .map(|e| e.path())
47        .collect();
48    md_files.sort();
49
50    // If cache is fresh (dir mtime unchanged), try to serve all from cache
51    if let Some(ref cache) = cache {
52        if !cache.is_stale(folder) {
53            let mut rows = Vec::new();
54            let mut all_cached = true;
55            for md_file in &md_files {
56                let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
57                if let Some(mtime) = crate::cache::file_mtime(md_file) {
58                    if let Some(row) = cache.get(&rel, mtime) {
59                        rows.push(row.clone());
60                        continue;
61                    }
62                }
63                all_cached = false;
64                break;
65            }
66            if all_cached {
67                return Ok((rows, Vec::new()));
68            }
69        }
70    }
71
72    // Parse (possibly in parallel)
73    let results: Vec<_> = md_files
74        .par_iter()
75        .map(|md_file| {
76            let rel = md_file.file_name().unwrap_or_default().to_string_lossy().to_string();
77            let parsed = parse_file(
78                md_file,
79                Some(folder),
80                schema.rules.normalize_numbered_headings,
81            );
82            match parsed {
83                Ok(p) => {
84                    let errors = validate_file(&p, schema);
85                    if errors.is_empty() {
86                        let row = to_row(&p, schema);
87                        let mtime = crate::cache::file_mtime(md_file);
88                        (Some((rel, row, mtime)), errors)
89                    } else {
90                        (None, errors)
91                    }
92                }
93                Err(e) => {
94                    let ve = ValidationError {
95                        file_path: md_file.to_string_lossy().to_string(),
96                        error_type: "parse_error".to_string(),
97                        field: None,
98                        message: e.to_string(),
99                        line_number: None,
100                    };
101                    (None, vec![ve])
102                }
103            }
104        })
105        .collect();
106
107    let mut rows = Vec::new();
108    let mut all_errors = Vec::new();
109    for (row_opt, errors) in results {
110        all_errors.extend(errors);
111        if let Some((rel, row, mtime)) = row_opt {
112            if let Some(ref mut c) = cache {
113                if let Some(mt) = mtime {
114                    c.put(rel, mt, row.clone());
115                }
116            }
117            rows.push(row);
118        }
119    }
120
121    if let Some(c) = cache {
122        c.set_table_mtime(folder);
123    }
124
125    Ok((rows, all_errors))
126}
127
128/// Load a multi-table database directory.
129pub fn load_database(
130    db_dir: &Path,
131) -> crate::errors::Result<(
132    DatabaseConfig,
133    HashMap<String, (Schema, Vec<Row>)>,
134    Vec<ValidationError>,
135)> {
136    let db_config = load_database_config(db_dir)?;
137
138    let mut tables: HashMap<String, (Schema, Vec<Row>)> = HashMap::new();
139    let mut all_errors = Vec::new();
140
141    let mut children: Vec<_> = std::fs::read_dir(db_dir)?
142        .filter_map(|e| e.ok())
143        .map(|e| e.path())
144        .filter(|p| p.is_dir() && p.join(MDQL_FILENAME).exists())
145        .collect();
146    children.sort();
147
148    for child in children {
149        let (schema, rows, errors) = load_table(&child)?;
150        tables.insert(schema.table.clone(), (schema, rows));
151        all_errors.extend(errors);
152    }
153
154    // Validate foreign key constraints across all tables
155    let fk_errors = crate::validator::validate_foreign_keys(&db_config, &tables);
156    all_errors.extend(fk_errors);
157
158    Ok((db_config, tables, all_errors))
159}