Skip to main content

llm_wiki/ops/
schema.rs

1use std::path::Path;
2
3use anyhow::{Context, Result, bail};
4use serde::{Deserialize, Serialize};
5
6use crate::config;
7use crate::engine::{EngineState, WikiEngine};
8use crate::git;
9use crate::markdown;
10use crate::search;
11use crate::space_builder;
12
13/// A registered page type with its schema location and description.
14#[derive(Debug, Serialize, Deserialize)]
15pub struct SchemaTypeEntry {
16    /// Type identifier (e.g. `"concept"`).
17    pub name: String,
18    /// Human-readable description of the type.
19    pub description: String,
20    /// Relative path to the JSON Schema file.
21    pub schema_path: String,
22}
23
24/// List all registered types in a wiki's type registry.
25pub fn schema_list(engine: &EngineState, wiki_name: &str) -> Result<Vec<SchemaTypeEntry>> {
26    let space = engine.space(wiki_name)?;
27    Ok(space
28        .type_registry
29        .list_types()
30        .into_iter()
31        .map(|(name, desc)| SchemaTypeEntry {
32            name: name.to_string(),
33            description: desc.to_string(),
34            schema_path: space
35                .type_registry
36                .schema_path(name)
37                .unwrap_or_default()
38                .to_string(),
39        })
40        .collect())
41}
42
43/// Return the raw JSON Schema content for a named type.
44pub fn schema_show(engine: &EngineState, wiki_name: &str, type_name: &str) -> Result<String> {
45    let space = engine.space(wiki_name)?;
46    let schema_path = space
47        .type_registry
48        .schema_path(type_name)
49        .ok_or_else(|| anyhow::anyhow!("type '{type_name}' is not registered"))?;
50    let full_path = space.repo_root.join(schema_path);
51    std::fs::read_to_string(&full_path)
52        .with_context(|| format!("failed to read schema: {}", full_path.display()))
53}
54
55/// Return a frontmatter template for a type, derived from its JSON Schema.
56pub fn schema_show_template(
57    engine: &EngineState,
58    wiki_name: &str,
59    type_name: &str,
60) -> Result<String> {
61    let content = schema_show(engine, wiki_name, type_name)?;
62    let schema: serde_json::Value = serde_json::from_str(&content)?;
63    Ok(generate_template(&schema, type_name))
64}
65
66/// Copy a schema file into the wiki and register the type in `wiki.toml`.
67pub fn schema_add(
68    engine: &EngineState,
69    wiki_name: &str,
70    type_name: &str,
71    src_path: &Path,
72) -> Result<String> {
73    let space = engine.space(wiki_name)?;
74
75    // Validate the schema file
76    let content = std::fs::read_to_string(src_path)
77        .with_context(|| format!("failed to read: {}", src_path.display()))?;
78    let schema_value: serde_json::Value =
79        serde_json::from_str(&content).context("file is not valid JSON")?;
80    jsonschema::Validator::new(&schema_value)
81        .map_err(|e| anyhow::anyhow!("file is not a valid JSON Schema: {e}"))?;
82
83    // Copy to schemas/
84    let filename = src_path
85        .file_name()
86        .ok_or_else(|| anyhow::anyhow!("invalid path"))?;
87    let dest = space.repo_root.join("schemas").join(filename);
88    std::fs::copy(src_path, &dest)?;
89
90    // Check if x-wiki-types declares the type
91    let has_type = schema_value
92        .get("x-wiki-types")
93        .and_then(|v| v.as_object())
94        .map(|obj| obj.contains_key(type_name))
95        .unwrap_or(false);
96
97    let mut msg = format!("copied to {}", dest.display());
98
99    if !has_type {
100        // Add wiki.toml override
101        let mut wiki_cfg = config::load_wiki(&space.repo_root)?;
102        wiki_cfg.types.insert(
103            type_name.to_string(),
104            config::TypeEntry {
105                schema: format!("schemas/{}", filename.to_string_lossy()),
106                description: format!("Custom type: {type_name}"),
107            },
108        );
109        config::save_wiki(&wiki_cfg, &space.repo_root)?;
110        msg.push_str(&format!(", added [types.{type_name}] to wiki.toml"));
111    }
112
113    // Validate index resolution
114    if let Err(e) = space_builder::build_space(&space.repo_root, "en_stem") {
115        msg.push_str(&format!("\nWARNING: index resolution failed: {e}"));
116    }
117
118    Ok(msg)
119}
120
121/// Summary of a `schema remove` operation.
122#[derive(Debug, Serialize, Deserialize)]
123pub struct SchemaRemoveReport {
124    /// Number of indexed pages with this type that were removed from the index.
125    pub pages_removed: usize,
126    /// Number of page files actually deleted from disk.
127    pub pages_deleted_from_disk: usize,
128    /// True if the `[types.<name>]` entry was removed from `wiki.toml`.
129    pub wiki_toml_updated: bool,
130    /// True if the schema JSON file was deleted.
131    pub schema_file_deleted: bool,
132    /// True if this was a dry run (no changes made).
133    pub dry_run: bool,
134}
135
136/// Remove a type schema and optionally delete its pages.
137pub fn schema_remove(
138    manager: &WikiEngine,
139    wiki_name: &str,
140    type_name: &str,
141    delete: bool,
142    delete_pages: bool,
143    dry_run: bool,
144) -> Result<SchemaRemoveReport> {
145    if type_name == "default" {
146        bail!("cannot remove the 'default' type");
147    }
148
149    let engine = manager
150        .state
151        .read()
152        .map_err(|_| anyhow::anyhow!("lock poisoned"))?;
153    let space = engine.space(wiki_name)?;
154
155    // Count pages of this type in the index
156    let searcher = space.index_manager.searcher()?;
157    let list_result = search::list(
158        &search::ListOptions {
159            r#type: Some(type_name.to_string()),
160            ..Default::default()
161        },
162        &searcher,
163        wiki_name,
164        &space.index_schema,
165    )?;
166    let pages_to_remove = list_result.total;
167
168    if dry_run {
169        return Ok(SchemaRemoveReport {
170            pages_removed: pages_to_remove,
171            pages_deleted_from_disk: if delete_pages { pages_to_remove } else { 0 },
172            wiki_toml_updated: space
173                .type_registry
174                .list_types()
175                .iter()
176                .any(|(n, _)| *n == type_name),
177            schema_file_deleted: delete,
178            dry_run: true,
179        });
180    }
181
182    // Remove pages from index
183    if pages_to_remove > 0 {
184        space
185            .index_manager
186            .delete_by_type(&space.index_schema, type_name)?;
187    }
188
189    // Delete page files from disk if requested
190    let mut pages_deleted_from_disk = 0;
191    if delete_pages && pages_to_remove > 0 {
192        for page in &list_result.pages {
193            if markdown::delete_page(&page.slug, &space.wiki_root)? {
194                pages_deleted_from_disk += 1;
195            }
196        }
197    }
198
199    // Remove wiki.toml override if present
200    let mut wiki_toml_updated = false;
201    let mut wiki_cfg = config::load_wiki(&space.repo_root)?;
202    if wiki_cfg.types.remove(type_name).is_some() {
203        config::save_wiki(&wiki_cfg, &space.repo_root)?;
204        wiki_toml_updated = true;
205    }
206
207    // Delete schema file if requested
208    let mut schema_file_deleted = false;
209    if delete && let Some(schema_path) = space.type_registry.schema_path(type_name) {
210        let full_path = space.repo_root.join(schema_path);
211        if full_path.exists() {
212            // Check if other types use this schema
213            let content = std::fs::read_to_string(&full_path).unwrap_or_default();
214            if let Ok(schema) = serde_json::from_str::<serde_json::Value>(&content) {
215                let wiki_types = schema
216                    .get("x-wiki-types")
217                    .and_then(|v| v.as_object())
218                    .map(|obj| obj.len())
219                    .unwrap_or(0);
220                if wiki_types <= 1 {
221                    std::fs::remove_file(&full_path)?;
222                    schema_file_deleted = true;
223                }
224                // If multiple types use this schema, don't delete
225            }
226        }
227    }
228
229    // Auto-commit if configured and changes were made
230    let resolved = space.resolved_config(&engine.config);
231    let repo_root = space.repo_root.clone();
232    if resolved.ingest.auto_commit
233        && (pages_deleted_from_disk > 0 || wiki_toml_updated || schema_file_deleted)
234    {
235        let msg = format!(
236            "schema remove: {type_name} — {} pages, wiki.toml={wiki_toml_updated}, schema={schema_file_deleted}",
237            pages_deleted_from_disk
238        );
239        let _ = git::commit(&repo_root, &msg);
240    }
241
242    Ok(SchemaRemoveReport {
243        pages_removed: pages_to_remove,
244        pages_deleted_from_disk,
245        wiki_toml_updated,
246        schema_file_deleted,
247        dry_run: false,
248    })
249}
250
251/// Validate schema files for one type or all types; returns a list of issue strings.
252pub fn schema_validate(
253    engine: &EngineState,
254    wiki_name: &str,
255    type_name: Option<&str>,
256) -> Result<Vec<String>> {
257    let space = engine.space(wiki_name)?;
258    let mut issues = Vec::new();
259
260    if let Some(name) = type_name {
261        // Validate single type
262        if !space.type_registry.is_known(name) {
263            bail!("type '{name}' is not registered");
264        }
265        let schema_path = space
266            .type_registry
267            .schema_path(name)
268            .ok_or_else(|| anyhow::anyhow!("no schema path for type '{name}'"))?;
269        let full_path = space.repo_root.join(schema_path);
270        validate_schema_file(&full_path, &mut issues);
271    } else {
272        // Validate all schemas
273        let schemas_dir = space.repo_root.join("schemas");
274        if schemas_dir.is_dir() {
275            let mut entries: Vec<_> = std::fs::read_dir(&schemas_dir)?
276                .filter_map(|e| e.ok())
277                .filter(|e| e.path().extension().and_then(|ext| ext.to_str()) == Some("json"))
278                .collect();
279            entries.sort_by_key(|e| e.file_name());
280            for entry in entries {
281                validate_schema_file(&entry.path(), &mut issues);
282            }
283        }
284    }
285
286    // Index resolution check
287    match space_builder::build_space(&space.repo_root, "en_stem") {
288        Ok(_) => {}
289        Err(e) => issues.push(format!("index resolution failed: {e}")),
290    }
291
292    Ok(issues)
293}
294
295fn validate_schema_file(path: &Path, issues: &mut Vec<String>) {
296    let filename = path.file_name().unwrap_or_default().to_string_lossy();
297
298    let content = match std::fs::read_to_string(path) {
299        Ok(c) => c,
300        Err(e) => {
301            issues.push(format!("{filename}: cannot read: {e}"));
302            return;
303        }
304    };
305
306    let schema: serde_json::Value = match serde_json::from_str(&content) {
307        Ok(v) => v,
308        Err(e) => {
309            issues.push(format!("{filename}: invalid JSON: {e}"));
310            return;
311        }
312    };
313
314    if let Err(e) = jsonschema::Validator::new(&schema) {
315        issues.push(format!("{filename}: invalid JSON Schema: {e}"));
316        return;
317    }
318
319    if schema.get("x-wiki-types").is_none() {
320        issues.push(format!(
321            "{filename}: missing x-wiki-types (types won't be discovered)"
322        ));
323    }
324}
325
326fn generate_template(schema: &serde_json::Value, type_name: &str) -> String {
327    let required: Vec<&str> = schema
328        .get("required")
329        .and_then(|v| v.as_array())
330        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
331        .unwrap_or_default();
332
333    let properties = schema
334        .get("properties")
335        .and_then(|v| v.as_object())
336        .cloned()
337        .unwrap_or_default();
338
339    let mut lines = vec!["---".to_string()];
340
341    // Required fields first
342    for field in &required {
343        if let Some(prop) = properties.get(*field) {
344            lines.push(format_template_field(field, prop, type_name));
345        }
346    }
347
348    // Common optional fields
349    for field in &["summary", "status", "last_updated", "tags"] {
350        if !required.contains(field)
351            && let Some(prop) = properties.get(*field)
352        {
353            lines.push(format_template_field(field, prop, type_name));
354        }
355    }
356
357    lines.push("---".to_string());
358    lines.join("\n")
359}
360
361fn format_template_field(name: &str, prop: &serde_json::Value, type_name: &str) -> String {
362    let prop_type = prop
363        .get("type")
364        .and_then(|v| v.as_str())
365        .unwrap_or("string");
366
367    match prop_type {
368        "array" => {
369            if name == "read_when" || name == "tags" {
370                format!("{name}:\n  - \"\"")
371            } else {
372                format!("{name}: []")
373            }
374        }
375        "string" => {
376            if name == "type" {
377                format!("type: {type_name}")
378            } else if name == "status" {
379                "status: active".to_string()
380            } else if name == "last_updated" {
381                format!(
382                    "last_updated: \"{}\"",
383                    chrono::Utc::now().format("%Y-%m-%d")
384                )
385            } else {
386                format!("{name}: \"\"")
387            }
388        }
389        "boolean" => format!("{name}: false"),
390        _ => format!("{name}: \"\""),
391    }
392}