Skip to main content

husako_core/
schema_source.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use husako_config::{HusakoConfig, SchemaSource};
5use serde_json::Value;
6
7use crate::HusakoError;
8use crate::progress::ProgressReporter;
9
10/// Resolve all schema sources from a `husako.toml` config.
11///
12/// Returns a merged `HashMap<String, Value>` where keys are discovery paths
13/// (e.g., `"apis/cert-manager.io/v1"`) and values are OpenAPI spec JSON.
14/// Later sources override for the same key.
15pub fn resolve_all(
16    config: &HusakoConfig,
17    project_root: &Path,
18    cache_dir: &Path,
19    progress: &dyn ProgressReporter,
20) -> Result<HashMap<String, Value>, HusakoError> {
21    let mut merged = HashMap::new();
22
23    for (name, source) in &config.resources {
24        let task = progress.start_task(&format!("Resolving {name}..."));
25        let specs = match source {
26            SchemaSource::File { path } => resolve_file(path, project_root)?,
27            SchemaSource::Cluster { cluster } => {
28                resolve_cluster(config, cluster.as_deref(), cache_dir)?
29            }
30            SchemaSource::Release { version } => resolve_release(version, cache_dir)?,
31            SchemaSource::Git { repo, tag, path } => resolve_git(repo, tag, path, cache_dir)?,
32        };
33
34        if specs.is_empty() {
35            eprintln!("warning: schema source '{name}' produced no specs");
36        }
37
38        task.finish_ok(&format!("{name}: {} group-versions", specs.len()));
39        merged.extend(specs);
40    }
41
42    Ok(merged)
43}
44
45/// Resolve a file-based schema source.
46///
47/// If `path` points to a single file, parse it as CRD YAML.
48/// If `path` points to a directory, read all `.yaml`/`.yml` files and convert.
49fn resolve_file(path: &str, project_root: &Path) -> Result<HashMap<String, Value>, HusakoError> {
50    let resolved = project_root.join(path);
51
52    if !resolved.exists() {
53        return Err(HusakoError::GenerateIo(format!(
54            "schema source path not found: {}",
55            resolved.display()
56        )));
57    }
58
59    let yaml = if resolved.is_dir() {
60        read_crd_directory(&resolved)?
61    } else {
62        std::fs::read_to_string(&resolved)
63            .map_err(|e| HusakoError::GenerateIo(format!("read {}: {e}", resolved.display())))?
64    };
65
66    let openapi = husako_openapi::crd::crd_yaml_to_openapi(&yaml)?;
67
68    // Build discovery keys from the schemas' GVK
69    crd_openapi_to_specs(&openapi)
70}
71
72/// Read all `.yaml`/`.yml` files in a directory and concatenate them.
73fn read_crd_directory(dir: &Path) -> Result<String, HusakoError> {
74    let mut parts = Vec::new();
75    let entries = std::fs::read_dir(dir)
76        .map_err(|e| HusakoError::GenerateIo(format!("read dir {}: {e}", dir.display())))?;
77
78    let mut paths: Vec<_> = entries
79        .filter_map(|e| e.ok())
80        .map(|e| e.path())
81        .filter(|p| {
82            p.extension()
83                .is_some_and(|ext| ext == "yaml" || ext == "yml")
84        })
85        .collect();
86    paths.sort();
87
88    for path in paths {
89        let content = std::fs::read_to_string(&path)
90            .map_err(|e| HusakoError::GenerateIo(format!("read {}: {e}", path.display())))?;
91        parts.push(content);
92    }
93
94    if parts.is_empty() {
95        return Err(HusakoError::GenerateIo(format!(
96            "no .yaml/.yml files found in {}",
97            dir.display()
98        )));
99    }
100
101    Ok(parts.join("\n---\n"))
102}
103
104/// Convert CRD-parsed OpenAPI JSON to discovery-keyed specs.
105///
106/// Groups schemas by their GVK group/version to produce keys like `"apis/cert-manager.io/v1"`.
107fn crd_openapi_to_specs(openapi: &Value) -> Result<HashMap<String, Value>, HusakoError> {
108    let schemas = openapi
109        .pointer("/components/schemas")
110        .and_then(Value::as_object)
111        .ok_or_else(|| HusakoError::GenerateIo("invalid CRD OpenAPI output".to_string()))?;
112
113    // Group schemas by their discovery key (derived from GVK)
114    let mut grouped: HashMap<String, serde_json::Map<String, Value>> = HashMap::new();
115
116    for (name, schema) in schemas {
117        let gvk = schema.get("x-kubernetes-group-version-kind");
118        let discovery_key = if let Some(gvk_arr) = gvk
119            && let Some(gvk_obj) = gvk_arr.as_array().and_then(|a| a.first())
120        {
121            let group = gvk_obj["group"].as_str().unwrap_or("");
122            let version = gvk_obj["version"].as_str().unwrap_or("");
123            if group.is_empty() {
124                format!("api/{version}")
125            } else {
126                format!("apis/{group}/{version}")
127            }
128        } else {
129            // Non-GVK schemas: try to derive key from the schema name
130            derive_discovery_key(name)
131        };
132
133        grouped
134            .entry(discovery_key)
135            .or_default()
136            .insert(name.clone(), schema.clone());
137    }
138
139    let mut result = HashMap::new();
140    for (key, schemas_map) in grouped {
141        result.insert(
142            key,
143            serde_json::json!({
144                "components": {
145                    "schemas": schemas_map
146                }
147            }),
148        );
149    }
150
151    Ok(result)
152}
153
154/// Derive a discovery key from a schema name.
155/// `io.cert-manager.v1.CertificateSpec` → `apis/cert-manager.io/v1`
156fn derive_discovery_key(name: &str) -> String {
157    // Schema names follow pattern: io.<reversed-group>.<version>.<Type>
158    // We need to reverse-engineer the group from the name
159    let parts: Vec<&str> = name.split('.').collect();
160    if parts.len() < 4 {
161        return format!("apis/unknown/{name}");
162    }
163
164    // Find the version segment (starts with 'v' followed by a digit)
165    let version_idx = parts.iter().position(|p| {
166        p.starts_with('v') && p.len() > 1 && p[1..].starts_with(|c: char| c.is_ascii_digit())
167    });
168
169    if let Some(vi) = version_idx {
170        let version = parts[vi];
171        // Reverse the prefix parts (skip the first since it's from the reversed domain)
172        let prefix_parts = &parts[..vi];
173        let group_parts: Vec<&str> = prefix_parts.iter().rev().copied().collect();
174        let group = group_parts.join(".");
175
176        if group.is_empty() || group == "k8s.io" || group == "io" {
177            format!("api/{version}")
178        } else {
179            format!("apis/{group}/{version}")
180        }
181    } else {
182        format!("apis/unknown/{name}")
183    }
184}
185
186/// Resolve a cluster-based schema source.
187fn resolve_cluster(
188    config: &HusakoConfig,
189    cluster_name: Option<&str>,
190    cache_dir: &Path,
191) -> Result<HashMap<String, Value>, HusakoError> {
192    let server =
193        if let Some(name) = cluster_name {
194            config
195                .clusters
196                .get(name)
197                .map(|c| &c.server)
198                .ok_or_else(|| {
199                    HusakoError::GenerateIo(format!("cluster '{name}' not found in config"))
200                })?
201        } else {
202            config.cluster.as_ref().map(|c| &c.server).ok_or_else(|| {
203                HusakoError::GenerateIo("no [cluster] section in config".to_string())
204            })?
205        };
206
207    let creds = husako_openapi::kubeconfig::resolve_credentials(server)?;
208
209    let client = husako_openapi::OpenApiClient::new(husako_openapi::FetchOptions {
210        source: husako_openapi::OpenApiSource::Url {
211            base_url: creds.server,
212            bearer_token: Some(creds.bearer_token),
213        },
214        cache_dir: cache_dir.to_path_buf(),
215        offline: false,
216    })?;
217
218    let specs = client.fetch_all_specs()?;
219    Ok(specs)
220}
221
222/// Resolve a GitHub release schema source.
223fn resolve_release(version: &str, cache_dir: &Path) -> Result<HashMap<String, Value>, HusakoError> {
224    let specs = husako_openapi::release::fetch_release_specs(version, cache_dir)?;
225    Ok(specs)
226}
227
228/// Resolve a git-based schema source.
229fn resolve_git(
230    repo: &str,
231    tag: &str,
232    path: &str,
233    cache_dir: &Path,
234) -> Result<HashMap<String, Value>, HusakoError> {
235    let repo_hash = simple_hash(repo);
236    let git_cache = cache_dir.join(format!("git/{repo_hash}/{tag}"));
237
238    // Check cache
239    if git_cache.exists() {
240        return load_git_cache(&git_cache);
241    }
242
243    // Clone repo at specific tag
244    let temp_dir = tempfile::tempdir()
245        .map_err(|e| HusakoError::GenerateIo(format!("create temp dir: {e}")))?;
246
247    let status = std::process::Command::new("git")
248        .args(["clone", "--depth", "1", "--branch", tag, repo])
249        .arg(temp_dir.path())
250        .stdout(std::process::Stdio::null())
251        .stderr(std::process::Stdio::piped())
252        .status()
253        .map_err(|e| HusakoError::GenerateIo(format!("git clone failed: {e}")))?;
254
255    if !status.success() {
256        return Err(HusakoError::GenerateIo(format!(
257            "git clone {repo} at tag {tag} failed (exit {})",
258            status.code().unwrap_or(-1)
259        )));
260    }
261
262    // Read CRD YAML files
263    let crd_dir = temp_dir.path().join(path);
264    if !crd_dir.exists() {
265        return Err(HusakoError::GenerateIo(format!(
266            "path '{path}' not found in repository"
267        )));
268    }
269
270    let yaml = read_crd_directory(&crd_dir)?;
271    let openapi = husako_openapi::crd::crd_yaml_to_openapi(&yaml)?;
272    let specs = crd_openapi_to_specs(&openapi)?;
273
274    // Cache the converted specs
275    std::fs::create_dir_all(&git_cache)
276        .map_err(|e| HusakoError::GenerateIo(format!("create cache dir: {e}")))?;
277    for (key, spec) in &specs {
278        let filename = key.replace('/', "__") + ".json";
279        let _ = std::fs::write(
280            git_cache.join(&filename),
281            serde_json::to_string(spec).unwrap_or_default(),
282        );
283    }
284
285    Ok(specs)
286}
287
288fn load_git_cache(cache_dir: &Path) -> Result<HashMap<String, Value>, HusakoError> {
289    let mut specs = HashMap::new();
290    let entries = std::fs::read_dir(cache_dir)
291        .map_err(|e| HusakoError::GenerateIo(format!("read cache dir: {e}")))?;
292
293    for entry in entries {
294        let entry = entry.map_err(|e| HusakoError::GenerateIo(format!("read entry: {e}")))?;
295        let path = entry.path();
296        if path.extension().is_some_and(|ext| ext == "json") {
297            let filename = path.file_stem().unwrap().to_string_lossy();
298            let key = filename.replace("__", "/");
299            let data = std::fs::read_to_string(&path)
300                .map_err(|e| HusakoError::GenerateIo(format!("read {}: {e}", path.display())))?;
301            let spec: Value = serde_json::from_str(&data)
302                .map_err(|e| HusakoError::GenerateIo(format!("parse {}: {e}", path.display())))?;
303            specs.insert(key, spec);
304        }
305    }
306
307    Ok(specs)
308}
309
310/// Simple hash for cache directory naming.
311fn simple_hash(s: &str) -> String {
312    let mut hash: u64 = 5381;
313    for byte in s.bytes() {
314        hash = hash.wrapping_mul(33).wrapping_add(byte as u64);
315    }
316    format!("{hash:016x}")
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn resolve_file_single() {
325        let tmp = tempfile::tempdir().unwrap();
326        let crd_path = tmp.path().join("my-crd.yaml");
327        std::fs::write(
328            &crd_path,
329            r#"
330apiVersion: apiextensions.k8s.io/v1
331kind: CustomResourceDefinition
332metadata:
333  name: widgets.example.com
334spec:
335  group: example.com
336  names:
337    kind: Widget
338    plural: widgets
339  scope: Namespaced
340  versions:
341    - name: v1
342      served: true
343      storage: true
344      schema:
345        openAPIV3Schema:
346          type: object
347          properties:
348            spec:
349              type: object
350              properties:
351                size:
352                  type: integer
353"#,
354        )
355        .unwrap();
356
357        let result = resolve_file("my-crd.yaml", tmp.path()).unwrap();
358        assert!(!result.is_empty());
359        // Should produce a discovery key for apis/example.com/v1
360        assert!(result.contains_key("apis/example.com/v1"));
361    }
362
363    #[test]
364    fn resolve_file_directory() {
365        let tmp = tempfile::tempdir().unwrap();
366        let crds_dir = tmp.path().join("crds");
367        std::fs::create_dir_all(&crds_dir).unwrap();
368
369        std::fs::write(
370            crds_dir.join("widget.yaml"),
371            r#"
372apiVersion: apiextensions.k8s.io/v1
373kind: CustomResourceDefinition
374metadata:
375  name: widgets.example.com
376spec:
377  group: example.com
378  names:
379    kind: Widget
380    plural: widgets
381  scope: Namespaced
382  versions:
383    - name: v1
384      served: true
385      storage: true
386      schema:
387        openAPIV3Schema:
388          type: object
389          properties:
390            spec:
391              type: object
392              properties:
393                size:
394                  type: integer
395"#,
396        )
397        .unwrap();
398
399        std::fs::write(
400            crds_dir.join("gadget.yml"),
401            r#"
402apiVersion: apiextensions.k8s.io/v1
403kind: CustomResourceDefinition
404metadata:
405  name: gadgets.example.com
406spec:
407  group: example.com
408  names:
409    kind: Gadget
410    plural: gadgets
411  scope: Namespaced
412  versions:
413    - name: v1
414      served: true
415      storage: true
416      schema:
417        openAPIV3Schema:
418          type: object
419          properties:
420            spec:
421              type: object
422              properties:
423                name:
424                  type: string
425"#,
426        )
427        .unwrap();
428
429        let result = resolve_file("crds", tmp.path()).unwrap();
430        let spec = &result["apis/example.com/v1"];
431        let schemas = spec["components"]["schemas"].as_object().unwrap();
432        // Both CRDs should be in the same group-version spec
433        assert!(schemas.contains_key("com.example.v1.Widget"));
434        assert!(schemas.contains_key("com.example.v1.Gadget"));
435    }
436
437    #[test]
438    fn resolve_file_not_found() {
439        let tmp = tempfile::tempdir().unwrap();
440        let err = resolve_file("nonexistent.yaml", tmp.path()).unwrap_err();
441        assert!(err.to_string().contains("not found"));
442    }
443
444    #[test]
445    fn crd_dir_reading_empty_dir() {
446        let tmp = tempfile::tempdir().unwrap();
447        let dir = tmp.path().join("empty");
448        std::fs::create_dir_all(&dir).unwrap();
449        let err = read_crd_directory(&dir).unwrap_err();
450        assert!(err.to_string().contains("no .yaml/.yml files"));
451    }
452
453    #[test]
454    fn derive_discovery_key_from_name() {
455        assert_eq!(
456            derive_discovery_key("io.cert-manager.v1.CertificateSpec"),
457            "apis/cert-manager.io/v1"
458        );
459        assert_eq!(
460            derive_discovery_key("io.cnpg.postgresql.v1.ClusterSpec"),
461            "apis/postgresql.cnpg.io/v1"
462        );
463    }
464
465    #[test]
466    fn simple_hash_deterministic() {
467        let h1 = simple_hash("https://github.com/cert-manager/cert-manager");
468        let h2 = simple_hash("https://github.com/cert-manager/cert-manager");
469        assert_eq!(h1, h2);
470        assert_eq!(h1.len(), 16);
471    }
472
473    #[test]
474    fn git_cache_round_trip() {
475        let tmp = tempfile::tempdir().unwrap();
476        let cache_dir = tmp.path().join("git/test/v1");
477        std::fs::create_dir_all(&cache_dir).unwrap();
478
479        let spec = serde_json::json!({"components": {"schemas": {"test": {}}}});
480        std::fs::write(
481            cache_dir.join("apis__example.com__v1.json"),
482            serde_json::to_string(&spec).unwrap(),
483        )
484        .unwrap();
485
486        let result = load_git_cache(&cache_dir).unwrap();
487        assert_eq!(result.len(), 1);
488        assert!(result.contains_key("apis/example.com/v1"));
489    }
490}