1use std::collections::{BTreeMap, HashMap};
2use std::path::Path;
3
4use anyhow::{Result, bail};
5use jsonschema::Validator;
6use serde_yaml::Value;
7use sha2::{Digest, Sha256};
8
9use crate::config;
10use crate::default_schemas;
11
12pub struct RegisteredType {
14 pub(crate) schema_path: String,
15 pub(crate) description: String,
16 pub(crate) validator: Validator,
17 pub(crate) aliases: HashMap<String, String>,
18 pub(crate) required_fields: Vec<String>,
19 pub(crate) content_hash: String,
20 pub(crate) edges: Vec<EdgeDecl>,
21}
22
23#[derive(Debug, Clone)]
25pub struct EdgeDecl {
26 pub field: String,
28 pub relation: String,
30 pub direction: String,
32 pub target_types: Vec<String>,
34}
35
36pub struct SpaceTypeRegistry {
39 types: HashMap<String, RegisteredType>,
40 schema_hash: String,
41 type_hashes: HashMap<String, String>,
42}
43
44impl std::fmt::Debug for SpaceTypeRegistry {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 f.debug_struct("SpaceTypeRegistry")
47 .field("types", &self.types.keys().collect::<Vec<_>>())
48 .field("schema_hash", &self.schema_hash)
49 .finish()
50 }
51}
52
53impl SpaceTypeRegistry {
54 pub fn build(repo_root: &Path) -> Result<Self> {
57 let schemas_dir = repo_root.join("schemas");
58 let mut types = HashMap::new();
59
60 if schemas_dir.is_dir() {
61 discover_from_dir(&schemas_dir, &mut types)?;
62 } else {
63 discover_from_embedded(&mut types)?;
64 }
65
66 let wiki_cfg = config::load_wiki(repo_root)?;
68 for (type_name, entry) in &wiki_cfg.types {
69 let schema_path = repo_root.join(&entry.schema);
70 let content = std::fs::read_to_string(&schema_path)?;
71 let registered = compile_schema(&entry.schema, &entry.description, &content)?;
72 types.insert(type_name.clone(), registered);
73 }
74
75 if !types.contains_key("default") {
77 let schemas = default_schemas::default_schemas();
79 let base = schemas["base.json"];
80 let registered =
81 compile_schema("schemas/base.json", "Fallback for unrecognized types", base)?;
82 types.insert("default".to_string(), registered);
83 } else {
84 validate_base_invariant(&types["default"])?;
85 }
86
87 let (schema_hash, type_hashes) = compute_hashes(&types);
88
89 Ok(Self {
90 types,
91 schema_hash,
92 type_hashes,
93 })
94 }
95
96 pub fn from_embedded() -> Self {
99 let mut types = HashMap::new();
100 discover_from_embedded(&mut types).expect("embedded schemas are valid");
101 let (schema_hash, type_hashes) = compute_hashes(&types);
102 Self {
103 types,
104 schema_hash,
105 type_hashes,
106 }
107 }
108
109 pub(crate) fn from_parts(
111 types: HashMap<String, RegisteredType>,
112 schema_hash: String,
113 type_hashes: HashMap<String, String>,
114 ) -> Self {
115 Self {
116 types,
117 schema_hash,
118 type_hashes,
119 }
120 }
121
122 pub fn is_known(&self, type_name: &str) -> bool {
124 self.types.contains_key(type_name)
125 }
126
127 pub fn list_types(&self) -> Vec<(&str, &str)> {
129 let mut out: Vec<_> = self
130 .types
131 .iter()
132 .map(|(name, rt)| (name.as_str(), rt.description.as_str()))
133 .collect();
134 out.sort_by_key(|(name, _)| *name);
135 out
136 }
137
138 pub fn aliases(&self, type_name: &str) -> Option<&HashMap<String, String>> {
140 self.types.get(type_name).map(|rt| &rt.aliases)
141 }
142
143 pub fn schema_path(&self, type_name: &str) -> Option<&str> {
145 self.types.get(type_name).map(|rt| rt.schema_path.as_str())
146 }
147
148 pub fn schema_hash(&self) -> &str {
150 &self.schema_hash
151 }
152
153 pub fn type_hashes(&self) -> &HashMap<String, String> {
155 &self.type_hashes
156 }
157
158 pub fn required_fields(&self, type_name: &str) -> Vec<String> {
160 self.types
161 .get(type_name)
162 .map(|rt| rt.required_fields.clone())
163 .unwrap_or_default()
164 }
165
166 pub fn edges(&self, type_name: &str) -> &[EdgeDecl] {
168 self.types
169 .get(type_name)
170 .map(|rt| rt.edges.as_slice())
171 .unwrap_or(&[])
172 }
173
174 pub fn validate(&self, fm: &BTreeMap<String, Value>, strictness: &str) -> Result<Vec<String>> {
183 let mut warnings = Vec::new();
184
185 let has_title = fm
187 .get("title")
188 .and_then(|v| v.as_str())
189 .map(|s| !s.is_empty())
190 .unwrap_or(false);
191 let has_name = fm
193 .get("name")
194 .and_then(|v| v.as_str())
195 .map(|s| !s.is_empty())
196 .unwrap_or(false);
197 if !has_title && !has_name {
198 bail!("title is required");
199 }
200
201 let page_type = fm.get("type").and_then(|v| v.as_str()).unwrap_or("");
202
203 let resolved_type = if page_type.is_empty() {
205 warnings.push("missing field: type (defaulting to \"page\")".into());
206 "default"
207 } else if self.types.contains_key(page_type) {
208 page_type
209 } else {
210 if strictness == "strict" {
211 bail!("unknown type '{page_type}'");
212 }
213 warnings.push(format!("unknown type '{page_type}'"));
214 "default"
215 };
216
217 if let Some(rt) = self.types.get(resolved_type) {
218 let json_fm = yaml_fm_to_json(fm)?;
219 let errors: Vec<_> = rt.validator.iter_errors(&json_fm).collect();
220 if !errors.is_empty() {
221 if strictness == "strict" {
222 bail!("schema validation failed: {}", errors[0]);
223 }
224 for e in &errors {
225 warnings.push(format!("schema validation: {e}"));
226 }
227 }
228 }
229
230 Ok(warnings)
231 }
232}
233
234impl Default for SpaceTypeRegistry {
235 fn default() -> Self {
236 Self::from_embedded()
237 }
238}
239
240fn discover_from_dir(
243 schemas_dir: &Path,
244 types: &mut HashMap<String, RegisteredType>,
245) -> Result<()> {
246 let mut entries: Vec<_> = std::fs::read_dir(schemas_dir)?
247 .filter_map(|e| e.ok())
248 .filter(|e| e.path().extension().and_then(|ext| ext.to_str()) == Some("json"))
249 .collect();
250 entries.sort_by_key(|e| e.file_name());
251
252 for entry in entries {
253 let path = entry.path();
254 let filename = path.file_name().unwrap().to_string_lossy();
255 let content = std::fs::read_to_string(&path)?;
256 let schema_value: serde_json::Value = serde_json::from_str(&content)?;
257
258 let schema_rel = format!("schemas/{filename}");
259 let content_hash = sha256_hex(content.as_bytes());
260
261 if let Some(wiki_types) = schema_value.get("x-wiki-types").and_then(|v| v.as_object()) {
262 let aliases = extract_aliases(&schema_value);
263 let required_fields = extract_required(&schema_value);
264 let edges = extract_edges(&schema_value);
265
266 for (type_name, desc) in wiki_types {
267 let description = desc.as_str().unwrap_or("").to_string();
268 let validator = Validator::new(&schema_value)
269 .map_err(|e| anyhow::anyhow!("invalid schema {filename}: {e}"))?;
270 types.insert(
271 type_name.clone(),
272 RegisteredType {
273 schema_path: schema_rel.clone(),
274 description,
275 validator,
276 aliases: aliases.clone(),
277 required_fields: required_fields.clone(),
278 content_hash: content_hash.clone(),
279 edges: edges.clone(),
280 },
281 );
282 }
283 }
284 }
285
286 Ok(())
287}
288
289fn discover_from_embedded(types: &mut HashMap<String, RegisteredType>) -> Result<()> {
290 for entry in default_schemas::default_type_entries() {
291 let filename = entry
292 .schema_file
293 .strip_prefix("schemas/")
294 .unwrap_or(&entry.schema_file);
295 let schemas = default_schemas::default_schemas();
296 let content = schemas
297 .get(filename)
298 .ok_or_else(|| anyhow::anyhow!("embedded schema not found: {filename}"))?;
299 let registered = compile_schema(&entry.schema_file, &entry.description, content)?;
300 types.insert(entry.type_name, registered);
301 }
302 Ok(())
303}
304
305pub(crate) fn compile_schema(
306 schema_path: &str,
307 description: &str,
308 content: &str,
309) -> Result<RegisteredType> {
310 let content_hash = sha256_hex(content.as_bytes());
311 let schema_value: serde_json::Value = serde_json::from_str(content)?;
312 compile_schema_from_value(schema_path, description, &schema_value, &content_hash)
313}
314
315pub(crate) fn compile_schema_from_value(
316 schema_path: &str,
317 description: &str,
318 schema_value: &serde_json::Value,
319 content_hash: &str,
320) -> Result<RegisteredType> {
321 let validator = Validator::new(schema_value)
322 .map_err(|e| anyhow::anyhow!("invalid schema {schema_path}: {e}"))?;
323 let aliases = extract_aliases(schema_value);
324 let required_fields = extract_required(schema_value);
325 let edges = extract_edges(schema_value);
326
327 Ok(RegisteredType {
328 schema_path: schema_path.to_string(),
329 description: description.to_string(),
330 validator,
331 aliases,
332 required_fields,
333 content_hash: content_hash.to_string(),
334 edges,
335 })
336}
337
338pub(crate) fn extract_aliases(schema: &serde_json::Value) -> HashMap<String, String> {
339 schema
340 .get("x-index-aliases")
341 .and_then(|v| v.as_object())
342 .map(|obj| {
343 obj.iter()
344 .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
345 .collect()
346 })
347 .unwrap_or_default()
348}
349
350pub(crate) fn extract_required(schema: &serde_json::Value) -> Vec<String> {
351 schema
352 .get("required")
353 .and_then(|v| v.as_array())
354 .map(|arr| {
355 arr.iter()
356 .filter_map(|v| v.as_str().map(|s| s.to_string()))
357 .collect()
358 })
359 .unwrap_or_default()
360}
361
362pub(crate) fn extract_edges(schema: &serde_json::Value) -> Vec<EdgeDecl> {
363 schema
364 .get("x-graph-edges")
365 .and_then(|v| v.as_object())
366 .map(|obj| {
367 obj.iter()
368 .map(|(field, decl)| {
369 let relation = decl
370 .get("relation")
371 .and_then(|v| v.as_str())
372 .unwrap_or("links-to")
373 .to_string();
374 let direction = decl
375 .get("direction")
376 .and_then(|v| v.as_str())
377 .unwrap_or("outgoing")
378 .to_string();
379 let target_types = decl
380 .get("target_types")
381 .and_then(|v| v.as_array())
382 .map(|arr| {
383 arr.iter()
384 .filter_map(|v| v.as_str().map(|s| s.to_string()))
385 .collect()
386 })
387 .unwrap_or_default();
388 EdgeDecl {
389 field: field.clone(),
390 relation,
391 direction,
392 target_types,
393 }
394 })
395 .collect()
396 })
397 .unwrap_or_default()
398}
399
400pub(crate) fn validate_base_invariant(rt: &RegisteredType) -> Result<()> {
402 if !rt.required_fields.contains(&"title".to_string()) {
403 bail!(
404 "base schema '{}' must require 'title' — \
405 the default type is the fallback for all unknown types",
406 rt.schema_path
407 );
408 }
409 if !rt.required_fields.contains(&"type".to_string()) {
410 bail!(
411 "base schema '{}' must require 'type' — \
412 the default type is the fallback for all unknown types",
413 rt.schema_path
414 );
415 }
416 Ok(())
417}
418
419pub(crate) fn sha256_hex(data: &[u8]) -> String {
423 hex::encode(Sha256::digest(data))
424}
425
426pub(crate) fn compute_hashes(
427 types: &HashMap<String, RegisteredType>,
428) -> (String, HashMap<String, String>) {
429 let entries: HashMap<String, (String, HashMap<String, String>, String)> = types
430 .iter()
431 .map(|(name, rt)| {
432 (
433 name.clone(),
434 (
435 rt.schema_path.clone(),
436 rt.aliases.clone(),
437 rt.content_hash.clone(),
438 ),
439 )
440 })
441 .collect();
442 hash_type_entries(&entries)
443}
444
445fn hash_type_entries(
449 entries: &HashMap<String, (String, HashMap<String, String>, String)>,
450) -> (String, HashMap<String, String>) {
451 let sorted: BTreeMap<_, _> = entries.iter().collect();
452 let mut type_hashes = HashMap::new();
453 let mut global_hasher = Sha256::new();
454
455 for (name, (schema_path, aliases, content_hash)) in &sorted {
456 let mut h = Sha256::new();
457 h.update(schema_path.as_bytes());
458 let sorted_aliases: BTreeMap<_, _> = aliases.iter().collect();
459 for (k, v) in &sorted_aliases {
460 h.update(k.as_bytes());
461 h.update(v.as_bytes());
462 }
463 h.update(content_hash.as_bytes());
464 let type_hash = hex::encode(h.finalize());
465 type_hashes.insert(name.to_string(), type_hash.clone());
466 global_hasher.update(type_hash.as_bytes());
467 }
468
469 (hex::encode(global_hasher.finalize()), type_hashes)
470}
471
472pub fn compute_disk_hashes(repo_root: &Path) -> Result<(String, HashMap<String, String>)> {
484 let schemas_dir = repo_root.join("schemas");
485
486 let mut type_data: HashMap<String, (String, HashMap<String, String>, String)> = HashMap::new();
488
489 if schemas_dir.is_dir() {
490 let mut entries: Vec<_> = std::fs::read_dir(&schemas_dir)?
491 .filter_map(|e| e.ok())
492 .filter(|e| e.path().extension().and_then(|ext| ext.to_str()) == Some("json"))
493 .collect();
494 entries.sort_by_key(|e| e.file_name());
495
496 for entry in entries {
497 let path = entry.path();
498 let filename = path.file_name().unwrap().to_string_lossy().to_string();
499 let content = std::fs::read_to_string(&path)?;
500 let content_hash = sha256_hex(content.as_bytes());
501 let schema_rel = format!("schemas/{filename}");
502 let schema_value: serde_json::Value = serde_json::from_str(&content)?;
503
504 if let Some(wiki_types) = schema_value.get("x-wiki-types").and_then(|v| v.as_object()) {
505 let aliases = extract_aliases(&schema_value);
506 for (type_name, _) in wiki_types {
507 type_data.insert(
508 type_name.clone(),
509 (schema_rel.clone(), aliases.clone(), content_hash.clone()),
510 );
511 }
512 }
513 }
514 } else {
515 for (filename, content) in default_schemas::default_schemas() {
517 let content_hash = sha256_hex(content.as_bytes());
518 let schema_rel = format!("schemas/{filename}");
519 let schema_value: serde_json::Value = serde_json::from_str(content)?;
520
521 if let Some(wiki_types) = schema_value.get("x-wiki-types").and_then(|v| v.as_object()) {
522 let aliases = extract_aliases(&schema_value);
523 for (type_name, _) in wiki_types {
524 type_data.insert(
525 type_name.clone(),
526 (schema_rel.clone(), aliases.clone(), content_hash.clone()),
527 );
528 }
529 }
530 }
531 }
532
533 let wiki_cfg = config::load_wiki(repo_root)?;
535 for (type_name, entry) in &wiki_cfg.types {
536 let schema_path = repo_root.join(&entry.schema);
537 let content = std::fs::read_to_string(&schema_path)?;
538 let content_hash = sha256_hex(content.as_bytes());
539 let schema_value: serde_json::Value = serde_json::from_str(&content)?;
540 let aliases = extract_aliases(&schema_value);
541 type_data.insert(
542 type_name.clone(),
543 (entry.schema.clone(), aliases, content_hash),
544 );
545 }
546
547 if !type_data.contains_key("default") {
549 let schemas = default_schemas::default_schemas();
550 let base = schemas["base.json"];
551 let content_hash = sha256_hex(base.as_bytes());
552 let schema_value: serde_json::Value = serde_json::from_str(base)?;
553 let aliases = extract_aliases(&schema_value);
554 type_data.insert(
555 "default".to_string(),
556 ("schemas/base.json".to_string(), aliases, content_hash),
557 );
558 }
559
560 Ok(hash_type_entries(&type_data))
562}
563
564fn yaml_fm_to_json(fm: &BTreeMap<String, Value>) -> Result<serde_json::Value> {
567 let yaml_str = serde_yaml::to_string(fm)?;
569 let json: serde_json::Value = serde_yaml::from_str(&yaml_str)?;
570 Ok(json)
571}