Skip to main content

alembic_engine/
transform.rs

1//! map: ir → ir transformation.
2//!
3//! map takes a canonical inventory and re-emits it under a different vocabulary —
4//! renaming types and fields, dropping or deriving values, rewiring references —
5//! using the shared render/emit half (`render_key`, `render_attrs`, templates +
6//! transforms). a `uid` is a derived projection of `(type, key)`, so references
7//! between objects are resolved internally and the emitted `uid`s (and the ref
8//! values that point at them) are re-derived at the boundary in a second pass.
9//!
10//! a rule selects source objects by a type-name pattern with optional field
11//! predicates and emits one or more target objects per match (fan-out), or — with
12//! `group_by` — buckets the matched objects and emits once per group (N->1
13//! aggregation). `lookups` follow a ref to read a field from the object it points
14//! at, so an emit can pull a value off a related object.
15
16use crate::predicate::{parse_predicates, Predicate, PredicateOp};
17use crate::render::{
18    render_attrs, render_key, render_template, RenderCtx, TransformRegistry, UidV5Spec,
19};
20use alembic_core::{
21    key_string, uid_v5, FieldType, Inventory, JsonMap, Key, Object, Schema, TypeName, Uid,
22};
23use anyhow::{anyhow, Context, Result};
24use serde::Deserialize;
25use serde_json::Value as JsonValue;
26use serde_yaml::Value as YamlValue;
27use std::collections::BTreeMap;
28use std::path::{Path, PathBuf};
29use uuid::Uuid;
30
31/// uid override for an emit: a deterministic `v5: { type, stable }` or an
32/// explicit uuid-string template.
33#[derive(Debug, Deserialize)]
34#[serde(untagged)]
35pub enum EmitUid {
36    V5 { v5: UidV5Spec },
37    Template(String),
38}
39
40/// a map specification: the target schema plus the transformation rules.
41#[derive(Debug, Deserialize)]
42pub struct MapSpec {
43    /// target schema; the output inventory is validated against it.
44    #[serde(default)]
45    pub schema: Schema,
46    #[serde(default)]
47    pub rules: Vec<MapRule>,
48    /// user-defined starlark transforms, consulted by `${var|name}` pipelines
49    /// before the built-ins (requires the `starlark` feature).
50    #[serde(default)]
51    pub transforms: Option<TransformsSpec>,
52    /// directory of the spec file, captured by `load_map_spec` and used to
53    /// resolve `transforms.file` and starlark `load()` paths. `None` for specs
54    /// parsed from strings: a relative `transforms.file` then resolves against
55    /// the process cwd and `load()` is an error.
56    #[serde(skip)]
57    pub base_dir: Option<PathBuf>,
58}
59
60/// the `transforms:` block of a map spec: starlark source from a file or
61/// inline. exactly one of the two must be set.
62#[derive(Debug, Deserialize)]
63pub struct TransformsSpec {
64    /// path to a starlark file; a relative path resolves against the spec file.
65    #[serde(default)]
66    pub file: Option<PathBuf>,
67    /// inline starlark source.
68    #[serde(default)]
69    pub inline: Option<String>,
70}
71
72/// a single ir→ir rule: select source objects, emit one or more target objects
73/// each. `uids` declares named uids (computed once per matched source) referenced
74/// as `${uids.name}` in emits — the mechanism for wiring cross-object refs in a
75/// multi-emit restructure.
76#[derive(Debug, Deserialize)]
77pub struct MapRule {
78    pub name: String,
79    /// source selector: a type-name pattern with optional field
80    /// predicates, e.g. `dcim.site`, `dcim.*`, or `dcim.device[attrs.role=leaf]`.
81    pub r#match: String,
82    /// when set, the matched objects are bucketed by this rendered template and
83    /// the rule emits once per group (N->1 aggregation) instead of once per
84    /// object. emits then draw on `${group.key}`, `${group.count}`, and
85    /// per-member values collected into lists as `${group.items.<path>}`.
86    #[serde(default)]
87    pub group_by: Option<String>,
88    /// reference lookups: each resolves a uid (`ref`) to an object in the input
89    /// and binds one of its fields (`get`) as `${lookup.name}`, so an emit can
90    /// read a value from the object a ref points at. resolved before `uids`.
91    #[serde(default)]
92    pub lookups: BTreeMap<String, Lookup>,
93    /// named uids computed once and available as `${uids.name}` in emits.
94    #[serde(default)]
95    pub uids: BTreeMap<String, EmitUid>,
96    /// a single emit (a mapping) or a list of emits.
97    pub emit: EmitSpec,
98}
99
100/// a reference lookup: render `ref` to a uid, find that object in the input, and
101/// read the dotted field path `get` from it (e.g. `attrs.name`).
102#[derive(Debug, Deserialize)]
103pub struct Lookup {
104    pub r#ref: String,
105    pub get: String,
106}
107
108/// either a single emit (backward-compatible mapping) or a list of emits.
109#[derive(Debug, Deserialize)]
110#[serde(untagged)]
111pub enum EmitSpec {
112    Single(MapEmit),
113    Multi(Vec<MapEmit>),
114}
115
116/// one emitted target object.
117#[derive(Debug, Deserialize)]
118pub struct MapEmit {
119    /// target type name (templates allowed).
120    #[serde(rename = "type", alias = "kind")]
121    pub type_name: String,
122    pub key: BTreeMap<String, YamlValue>,
123    /// optional uid override; defaults to `uid_v5(target_type, target_key)`.
124    #[serde(default)]
125    pub uid: Option<EmitUid>,
126    #[serde(default)]
127    pub attrs: BTreeMap<String, YamlValue>,
128}
129
130/// a compiled `match` selector: a type-name pattern plus field predicates over
131/// the source object (`dcim.device[attrs.role=leaf]`). predicates are evaluated
132/// against the same dotted var namespace as templates (`attrs.*`, `key.*`,
133/// `type`, `uid`).
134struct Matcher {
135    glob: TypeGlob,
136    predicates: Vec<Predicate>,
137}
138
139/// a type-name pattern: `*` (any), a trailing-`*` prefix (`dcim.*`), or exact.
140enum TypeGlob {
141    Any,
142    Prefix(String),
143    Exact(String),
144}
145
146impl Matcher {
147    fn parse(selector: &str) -> Result<Self> {
148        let selector = selector.trim();
149        let (base, predicates) = match selector.find('[') {
150            Some(idx) => (selector[..idx].trim(), parse_predicates(&selector[idx..])?),
151            None => (selector, Vec::new()),
152        };
153        if base.is_empty() {
154            return Err(anyhow!("match selector requires a type pattern"));
155        }
156        let glob = if base == "*" {
157            TypeGlob::Any
158        } else if let Some(prefix) = base.strip_suffix('*') {
159            TypeGlob::Prefix(prefix.to_string())
160        } else {
161            TypeGlob::Exact(base.to_string())
162        };
163        Ok(Self { glob, predicates })
164    }
165
166    fn type_matches(&self, type_name: &str) -> bool {
167        match &self.glob {
168            TypeGlob::Any => true,
169            TypeGlob::Prefix(prefix) => type_name.starts_with(prefix),
170            TypeGlob::Exact(exact) => type_name == exact,
171        }
172    }
173
174    fn predicates_match(&self, vars: &BTreeMap<String, JsonValue>) -> bool {
175        self.predicates
176            .iter()
177            .all(|pred| predicate_matches(pred, vars))
178    }
179}
180
181/// evaluate a predicate against the object's flattened var namespace, with
182/// scalar-comparison and existence semantics.
183fn predicate_matches(pred: &Predicate, vars: &BTreeMap<String, JsonValue>) -> bool {
184    let field = vars.get(&pred.field);
185    match pred.op {
186        PredicateOp::Exists => matches!(field, Some(value) if !value.is_null()),
187        PredicateOp::NotExists => match field {
188            Some(value) => value.is_null(),
189            None => true,
190        },
191        PredicateOp::Eq => {
192            matches!(field.and_then(json_scalar), Some(rendered) if rendered == pred.value)
193        }
194        PredicateOp::Ne => {
195            matches!(field.and_then(json_scalar), Some(rendered) if rendered != pred.value)
196        }
197    }
198}
199
200/// render a scalar json value to the text a predicate compares against; mirrors
201/// `predicate` scalar rules for the json value model.
202fn json_scalar(value: &JsonValue) -> Option<String> {
203    match value {
204        JsonValue::String(text) => Some(text.clone()),
205        JsonValue::Number(number) => Some(number.to_string()),
206        JsonValue::Bool(boolean) => Some(boolean.to_string()),
207        JsonValue::Null | JsonValue::Array(_) | JsonValue::Object(_) => None,
208    }
209}
210
211/// load a map spec from a yaml file. the spec remembers the file's directory so
212/// `transforms.file` and starlark `load()` paths resolve relative to it.
213pub fn load_map_spec(path: impl AsRef<Path>) -> Result<MapSpec> {
214    let path = path.as_ref();
215    let raw = std::fs::read_to_string(path)
216        .with_context(|| format!("read map spec: {}", path.display()))?;
217    let mut spec: MapSpec = serde_yaml::from_str(&raw)
218        .with_context(|| format!("parse map spec: {}", path.display()))?;
219    spec.base_dir = path.parent().map(Path::to_path_buf);
220    Ok(spec)
221}
222
223/// build the transform registry for a spec: empty (built-ins only) without a
224/// `transforms:` block, otherwise the compiled starlark module. starlark is
225/// compiled once here, not per template.
226fn transform_registry(spec: &MapSpec) -> Result<TransformRegistry> {
227    let Some(transforms) = &spec.transforms else {
228        return Ok(TransformRegistry::EMPTY);
229    };
230    #[cfg(not(feature = "starlark"))]
231    {
232        let _ = transforms;
233        Err(anyhow!(
234            "map spec has a transforms block but alembic-engine was built without the starlark feature"
235        ))
236    }
237    #[cfg(feature = "starlark")]
238    {
239        let (source, filename) = match (&transforms.file, &transforms.inline) {
240            (Some(_), Some(_)) | (None, None) => {
241                return Err(anyhow!(
242                    "map spec transforms: requires exactly one of file or inline"
243                ));
244            }
245            (Some(file), None) => {
246                let path = match &spec.base_dir {
247                    Some(base) if file.is_relative() => base.join(file),
248                    _ => file.clone(),
249                };
250                let source = std::fs::read_to_string(&path)
251                    .with_context(|| format!("read transforms file: {}", path.display()))?;
252                (source, path.display().to_string())
253            }
254            (None, Some(inline)) => (inline.clone(), "transforms".to_string()),
255        };
256        let user = crate::starlark_transforms::StarlarkTransforms::compile(
257            &source,
258            &filename,
259            spec.base_dir.as_deref(),
260        )?;
261        Ok(TransformRegistry::with_user(user))
262    }
263}
264
265/// evaluate a single named transform from a map spec against a json value,
266/// consulting the spec's user transforms first, then the built-ins. backs
267/// `alembic map transform`, the iteration loop for writing transforms: one
268/// value in, the typed result out, no inventory or backend involved.
269pub fn eval_map_transform(
270    spec: &MapSpec,
271    name: &str,
272    value: &JsonValue,
273    args: &[JsonValue],
274) -> Result<JsonValue> {
275    let registry = transform_registry(spec)?;
276    crate::render::apply_single_transform(&registry, name, value, args)
277}
278
279/// per-run immutables shared by every emit: the compiled transform registry and
280/// the source-object index used to resolve reference lookups.
281struct MapRun<'a> {
282    transforms: TransformRegistry,
283    index: BTreeMap<Uid, &'a Object>,
284}
285
286/// transform an ir inventory into another ir inventory under the target schema.
287pub fn compile_map(input: &Inventory, spec: &MapSpec) -> Result<Inventory> {
288    let run = MapRun {
289        transforms: transform_registry(spec)?,
290        // source uid -> object, for resolving reference lookups.
291        index: input.objects.iter().map(|o| (o.uid, o)).collect(),
292    };
293    let mut objects = Vec::new();
294    // source uid -> emitted uid, used to re-derive ref values in pass 2.
295    let mut remap: BTreeMap<Uid, Uid> = BTreeMap::new();
296
297    for rule in &spec.rules {
298        let matcher = Matcher::parse(&rule.r#match)
299            .with_context(|| format!("rule {}: invalid match selector", rule.name))?;
300        let emits = match &rule.emit {
301            EmitSpec::Single(emit) => std::slice::from_ref(emit),
302            EmitSpec::Multi(emits) => emits.as_slice(),
303        };
304        match &rule.group_by {
305            // per-object: emit once per matched source.
306            None => {
307                // a rule emitting exactly one object per source is a 1:1 rename,
308                // so we record source->target for automatic ref-rewiring. a
309                // multi-emit rule is a restructure where auto-rewiring would be
310                // ambiguous, so its cross-object refs are wired explicitly via
311                // named `uids` instead.
312                let remap_each = emits.len() == 1;
313                for src in input.objects.iter() {
314                    if !matcher.type_matches(src.type_name.as_str()) {
315                        continue;
316                    }
317                    let vars = object_vars(src);
318                    if !matcher.predicates_match(&vars) {
319                        continue;
320                    }
321                    let remap_source = remap_each.then_some(src.uid);
322                    emit_objects(
323                        rule,
324                        emits,
325                        vars,
326                        &run,
327                        remap_source,
328                        &mut objects,
329                        &mut remap,
330                    )?;
331                }
332            }
333            // aggregation: bucket matched sources by the rendered key, emit once
334            // per group. N->1, so no auto ref-rewiring (cross-object refs use
335            // named `uids`). a BTreeMap keys groups deterministically; members
336            // stay in input order.
337            Some(group_expr) => {
338                let mut groups: BTreeMap<String, Vec<&Object>> = BTreeMap::new();
339                for src in input.objects.iter() {
340                    if !matcher.type_matches(src.type_name.as_str()) {
341                        continue;
342                    }
343                    let vars = object_vars(src);
344                    if !matcher.predicates_match(&vars) {
345                        continue;
346                    }
347                    let group_key = render_template(
348                        group_expr,
349                        &RenderCtx {
350                            vars: &vars,
351                            transforms: &run.transforms,
352                            rule: &rule.name,
353                        },
354                        "group_by",
355                    )?;
356                    groups.entry(group_key).or_default().push(src);
357                }
358                for (group_key, members) in &groups {
359                    let vars = group_vars(group_key, members);
360                    emit_objects(rule, emits, vars, &run, None, &mut objects, &mut remap)?;
361                }
362            }
363        }
364    }
365
366    rewrite_refs(&mut objects, &spec.schema, &remap);
367
368    objects.sort_by(|a, b| {
369        (a.type_name.as_str(), key_string(&a.key)).cmp(&(b.type_name.as_str(), key_string(&b.key)))
370    });
371
372    let inventory = Inventory {
373        schema: spec.schema.clone(),
374        objects,
375    };
376    crate::report_to_result(crate::validate(&inventory))?;
377    Ok(inventory)
378}
379
380/// compute named uids and run a rule's emits against `vars`, pushing the
381/// resulting objects. `remap_source`, when set, records source->target uid for
382/// the automatic ref-rewiring pass (1:1 rules only).
383fn emit_objects(
384    rule: &MapRule,
385    emits: &[MapEmit],
386    mut vars: BTreeMap<String, JsonValue>,
387    run: &MapRun,
388    remap_source: Option<Uid>,
389    objects: &mut Vec<Object>,
390    remap: &mut BTreeMap<Uid, Uid>,
391) -> Result<()> {
392    // resolve reference lookups first, so named uids and emits can use them.
393    // bound under `lookup.<name>`, mirroring `uids.<name>`, so a lookup can never
394    // shadow the object's own vars (`uid`, `key.*`, `attrs.*`, ...).
395    for (name, lookup) in &rule.lookups {
396        let ctx = RenderCtx {
397            vars: &vars,
398            transforms: &run.transforms,
399            rule: &rule.name,
400        };
401        let value = resolve_lookup(name, lookup, &ctx, &run.index)?;
402        vars.insert(format!("lookup.{name}"), value);
403    }
404    // compute named uids once, exposed as `uids.name` to every emit.
405    for (name, uid_spec) in &rule.uids {
406        let context = format!("uids.{name}");
407        let ctx = RenderCtx {
408            vars: &vars,
409            transforms: &run.transforms,
410            rule: &rule.name,
411        };
412        let uid = resolve_uid_spec(uid_spec, &ctx, &context)?;
413        vars.insert(context, JsonValue::String(uid.to_string()));
414    }
415    let ctx = RenderCtx {
416        vars: &vars,
417        transforms: &run.transforms,
418        rule: &rule.name,
419    };
420    for emit in emits {
421        let key = render_key(&emit.key, &ctx)?;
422        let type_name = TypeName::new(render_template(&emit.type_name, &ctx, "type")?);
423        let uid = resolve_emit_uid(&emit.uid, &ctx, type_name.as_str(), &key)?;
424        let attrs = render_attrs(&emit.attrs, &ctx, "attrs")?;
425        let attrs = JsonMap::from(attrs.into_iter().collect::<BTreeMap<_, _>>());
426
427        if let Some(source) = remap_source {
428            if let Some(prev) = remap.insert(source, uid) {
429                if prev != uid {
430                    return Err(anyhow!(
431                        "source object {source} is matched by multiple rules emitting different uids"
432                    ));
433                }
434            }
435        }
436        objects.push(Object::new(uid, type_name, key, attrs)?);
437    }
438    Ok(())
439}
440
441/// resolve a reference lookup: render `ref` to a uid, find that object in the
442/// input index, and read its `get` field path. strict: a ref that is not a uuid,
443/// a uid not in the input, or a missing field is an error.
444fn resolve_lookup(
445    name: &str,
446    lookup: &Lookup,
447    ctx: &RenderCtx,
448    index: &BTreeMap<Uid, &Object>,
449) -> Result<JsonValue> {
450    let rule = ctx.rule;
451    let context = format!("lookups.{name}");
452    let rendered = render_template(&lookup.r#ref, ctx, &context)?;
453    let uid = Uuid::parse_str(&rendered).with_context(|| {
454        format!("rule {rule}: lookup {name} ref is not a valid uuid: {rendered}")
455    })?;
456    let referent = index
457        .get(&uid)
458        .ok_or_else(|| anyhow!("rule {rule}: lookup {name} ref {uid} is not in the input"))?;
459    object_vars(referent)
460        .get(&lookup.get)
461        .cloned()
462        .ok_or_else(|| {
463            anyhow!(
464                "rule {rule}: lookup {name} field {} is absent on {uid}",
465                lookup.get
466            )
467        })
468}
469
470/// build the template vars for an aggregation group: `group.key`, `group.count`,
471/// and every member field collected into a list under `group.items.<path>`
472/// (present, non-missing values in member order).
473fn group_vars(group_key: &str, members: &[&Object]) -> BTreeMap<String, JsonValue> {
474    let mut vars = BTreeMap::new();
475    vars.insert(
476        "group.key".to_string(),
477        JsonValue::String(group_key.to_string()),
478    );
479    vars.insert(
480        "group.count".to_string(),
481        JsonValue::Number(members.len().into()),
482    );
483
484    let per_member: Vec<BTreeMap<String, JsonValue>> =
485        members.iter().map(|member| object_vars(member)).collect();
486    let mut paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
487    for member in &per_member {
488        paths.extend(member.keys().cloned());
489    }
490    for path in paths {
491        let values: Vec<JsonValue> = per_member
492            .iter()
493            .filter_map(|member| member.get(&path).cloned())
494            .collect();
495        vars.insert(format!("group.items.{path}"), JsonValue::Array(values));
496    }
497    vars
498}
499
500/// build the template vars for a source object: `uid`, `type`, and every key /
501/// attr field flattened to a dotted path (so `${attrs.model.fabric}` works).
502fn object_vars(obj: &Object) -> BTreeMap<String, JsonValue> {
503    let mut vars = BTreeMap::new();
504    vars.insert("uid".to_string(), JsonValue::String(obj.uid.to_string()));
505    vars.insert(
506        "type".to_string(),
507        JsonValue::String(obj.type_name.as_str().to_string()),
508    );
509    for (field, value) in obj.key.iter() {
510        flatten(&format!("key.{field}"), value, &mut vars);
511    }
512    for (field, value) in obj.attrs.iter() {
513        flatten(&format!("attrs.{field}"), value, &mut vars);
514    }
515    vars
516}
517
518/// insert `value` at `prefix`, recursing into objects so both the whole value
519/// (`attrs.model`) and its leaves (`attrs.model.fabric`) are addressable.
520fn flatten(prefix: &str, value: &JsonValue, out: &mut BTreeMap<String, JsonValue>) {
521    out.insert(prefix.to_string(), value.clone());
522    if let JsonValue::Object(map) = value {
523        for (field, child) in map {
524            flatten(&format!("{prefix}.{field}"), child, out);
525        }
526    }
527}
528
529/// resolve an emit's uid: the default derives `uid_v5(target_type, target_key)`;
530/// an explicit override defers to `resolve_uid_spec`.
531fn resolve_emit_uid(
532    uid: &Option<EmitUid>,
533    ctx: &RenderCtx,
534    type_name: &str,
535    key: &Key,
536) -> Result<Uid> {
537    match uid {
538        None => Ok(uid_v5(type_name, &key_string(key))),
539        Some(spec) => resolve_uid_spec(spec, ctx, "uid"),
540    }
541}
542
543/// resolve an explicit uid spec — a `v5: {type, stable}` pair or a uuid-string
544/// template — against the current vars. shared by emit uids and named `uids`.
545fn resolve_uid_spec(spec: &EmitUid, ctx: &RenderCtx, context: &str) -> Result<Uid> {
546    let rule = ctx.rule;
547    match spec {
548        EmitUid::Template(template) => {
549            let rendered = render_template(template, ctx, context)?;
550            Uuid::parse_str(&rendered).with_context(|| {
551                format!("rule {rule}: uid template is not a valid uuid: {rendered}")
552            })
553        }
554        EmitUid::V5 { v5 } => {
555            let kind = render_template(&v5.type_name, ctx, context)?;
556            let stable = render_template(&v5.stable, ctx, context)?;
557            if kind.trim().is_empty() || stable.trim().is_empty() {
558                return Err(anyhow!(
559                    "rule {rule}: uid v5 requires non-empty type and stable values"
560                ));
561            }
562            Ok(uid_v5(&kind, &stable))
563        }
564    }
565}
566
567/// rewrite `ref` / `list_ref` attr values through the source→target uid remap,
568/// using the target schema to find which fields are references.
569fn rewrite_refs(objects: &mut [Object], schema: &Schema, remap: &BTreeMap<Uid, Uid>) {
570    for obj in objects.iter_mut() {
571        let Some(type_schema) = schema.types.get(obj.type_name.as_str()) else {
572            continue;
573        };
574        for (field, field_schema) in &type_schema.fields {
575            match field_schema.r#type {
576                FieldType::Ref { .. } => {
577                    if let Some(value) = obj.attrs.get_mut(field) {
578                        rewrite_ref_value(value, remap);
579                    }
580                }
581                FieldType::ListRef { .. } => {
582                    if let Some(JsonValue::Array(items)) = obj.attrs.get_mut(field) {
583                        for item in items {
584                            rewrite_ref_value(item, remap);
585                        }
586                    }
587                }
588                _ => {}
589            }
590        }
591    }
592}
593
594fn rewrite_ref_value(value: &mut JsonValue, remap: &BTreeMap<Uid, Uid>) {
595    let JsonValue::String(raw) = value else {
596        return;
597    };
598    let Ok(old) = Uuid::parse_str(raw) else {
599        return;
600    };
601    if let Some(new) = remap.get(&old) {
602        *value = JsonValue::String(new.to_string());
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609    use serde_json::json;
610
611    fn input_inventory(objects: JsonValue) -> Inventory {
612        serde_json::from_value(json!({ "schema": { "types": {} }, "objects": objects })).unwrap()
613    }
614
615    fn spec(yaml: &str) -> MapSpec {
616        serde_yaml::from_str(yaml).unwrap()
617    }
618
619    #[test]
620    fn renames_type_and_field_carrying_key() {
621        let input = input_inventory(json!([
622            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.site",
623              "key": { "site": "fra1" }, "attrs": { "name": "FRA1" } }
624        ]));
625        let out = compile_map(
626            &input,
627            &spec(
628                r#"
629schema:
630  types:
631    location.site:
632      key:
633        slug: { type: slug }
634      fields:
635        label: { type: string }
636rules:
637  - name: sites
638    match: "dcim.site"
639    emit:
640      type: location.site
641      key:
642        slug: "${key.site}"
643      attrs:
644        label: "${attrs.name}"
645"#,
646            ),
647        )
648        .unwrap();
649
650        assert_eq!(out.objects.len(), 1);
651        let obj = &out.objects[0];
652        assert_eq!(obj.type_name.as_str(), "location.site");
653        assert_eq!(obj.key.get("slug").unwrap(), &json!("fra1"));
654        assert_eq!(obj.attrs.get("label").unwrap(), &json!("FRA1"));
655        // default uid is derived from the *target* identity, deterministically.
656        assert_eq!(obj.uid, uid_v5("location.site", &key_string(&obj.key)));
657    }
658
659    #[test]
660    fn drops_unmapped_fields_and_derives_via_transform() {
661        let input = input_inventory(json!([
662            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.site",
663              "key": { "site": "fra1" }, "attrs": { "name": "frankfurt", "secret": "drop me" } }
664        ]));
665        let out = compile_map(
666            &input,
667            &spec(
668                r#"
669schema:
670  types:
671    location.site:
672      key:
673        slug: { type: slug }
674      fields:
675        name: { type: string }
676rules:
677  - name: sites
678    match: "dcim.site"
679    emit:
680      type: location.site
681      key:
682        slug: "${key.site}"
683      attrs:
684        name: "${attrs.name|upper}"
685"#,
686            ),
687        )
688        .unwrap();
689
690        let attrs = &out.objects[0].attrs;
691        assert_eq!(attrs.get("name").unwrap(), &json!("FRANKFURT"));
692        assert!(attrs.get("secret").is_none());
693    }
694
695    #[test]
696    fn rewires_refs_across_a_rename() {
697        let site_src = Uuid::from_u128(1).to_string();
698        let input = input_inventory(json!([
699            { "uid": site_src, "type": "dcim.site",
700              "key": { "site": "fra1" }, "attrs": { "name": "FRA1" } },
701            { "uid": Uuid::from_u128(2).to_string(), "type": "dcim.device",
702              "key": { "device": "leaf01" }, "attrs": { "name": "leaf01", "site": site_src } }
703        ]));
704        let out = compile_map(
705            &input,
706            &spec(
707                r#"
708schema:
709  types:
710    location.site:
711      key:
712        slug: { type: slug }
713      fields:
714        name: { type: string }
715    dcim.device:
716      key:
717        device: { type: slug }
718      fields:
719        name: { type: string }
720        site: { type: ref, target: location.site }
721rules:
722  - name: sites
723    match: "dcim.site"
724    emit:
725      type: location.site
726      key:
727        slug: "${key.site}"
728      attrs:
729        name: "${attrs.name}"
730  - name: devices
731    match: "dcim.device"
732    emit:
733      type: dcim.device
734      key:
735        device: "${key.device}"
736      attrs:
737        name: "${attrs.name}"
738        site: "${attrs.site}"
739"#,
740            ),
741        )
742        .unwrap();
743
744        let site = out
745            .objects
746            .iter()
747            .find(|o| o.type_name.as_str() == "location.site")
748            .unwrap();
749        let device = out
750            .objects
751            .iter()
752            .find(|o| o.type_name.as_str() == "dcim.device")
753            .unwrap();
754        // the device's ref now points at the *new* site uid, not the source one.
755        assert_eq!(
756            device.attrs.get("site").unwrap(),
757            &json!(site.uid.to_string())
758        );
759        assert_ne!(device.attrs.get("site").unwrap(), &json!(site_src));
760    }
761
762    #[test]
763    fn is_deterministic_across_runs() {
764        let input = input_inventory(json!([
765            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.site",
766              "key": { "site": "fra1" }, "attrs": { "name": "FRA1" } },
767            { "uid": Uuid::from_u128(2).to_string(), "type": "dcim.site",
768              "key": { "site": "ams1" }, "attrs": { "name": "AMS1" } }
769        ]));
770        let yaml = r#"
771schema:
772  types:
773    location.site:
774      key:
775        slug: { type: slug }
776      fields:
777        name: { type: string }
778rules:
779  - name: sites
780    match: "dcim.site"
781    emit:
782      type: location.site
783      key:
784        slug: "${key.site}"
785      attrs:
786        name: "${attrs.name}"
787"#;
788        let first = compile_map(&input, &spec(yaml)).unwrap();
789        let second = compile_map(&input, &spec(yaml)).unwrap();
790        assert_eq!(first.objects, second.objects);
791    }
792
793    #[test]
794    fn type_glob_matches_a_prefix() {
795        let input = input_inventory(json!([
796            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.site",
797              "key": { "k": "a" }, "attrs": {} },
798            { "uid": Uuid::from_u128(2).to_string(), "type": "dcim.device",
799              "key": { "k": "b" }, "attrs": {} },
800            { "uid": Uuid::from_u128(3).to_string(), "type": "ipam.prefix",
801              "key": { "k": "c" }, "attrs": {} }
802        ]));
803        let out = compile_map(
804            &input,
805            &spec(
806                r#"
807schema:
808  types:
809    thing:
810      key:
811        k: { type: string }
812rules:
813  - name: dcim-only
814    match: "dcim.*"
815    emit:
816      type: thing
817      key:
818        k: "${key.k}"
819"#,
820            ),
821        )
822        .unwrap();
823        // both dcim.* objects map to `thing`; the ipam.prefix is left out.
824        assert_eq!(out.objects.len(), 2);
825        assert!(out.objects.iter().all(|o| o.type_name.as_str() == "thing"));
826        let keys: Vec<&str> = out
827            .objects
828            .iter()
829            .map(|o| o.key.get("k").unwrap().as_str().unwrap())
830            .collect();
831        assert_eq!(keys, vec!["a", "b"]);
832    }
833
834    #[test]
835    fn predicate_filters_matched_objects() {
836        let input = input_inventory(json!([
837            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
838              "key": { "device": "leaf01" }, "attrs": { "role": "leaf" } },
839            { "uid": Uuid::from_u128(2).to_string(), "type": "dcim.device",
840              "key": { "device": "spine01" }, "attrs": { "role": "spine" } },
841            { "uid": Uuid::from_u128(3).to_string(), "type": "dcim.device",
842              "key": { "device": "leaf02" }, "attrs": { "role": "leaf" } }
843        ]));
844        let out = compile_map(
845            &input,
846            &spec(
847                r#"
848schema:
849  types:
850    fabric.leaf:
851      key:
852        name: { type: slug }
853rules:
854  - name: leaves
855    match: "dcim.device[attrs.role=leaf]"
856    emit:
857      type: fabric.leaf
858      key:
859        name: "${key.device}"
860"#,
861            ),
862        )
863        .unwrap();
864        // only the two leaves survive the predicate; the spine is filtered out.
865        let names: Vec<&str> = out
866            .objects
867            .iter()
868            .map(|o| o.key.get("name").unwrap().as_str().unwrap())
869            .collect();
870        assert_eq!(names, vec!["leaf01", "leaf02"]);
871    }
872
873    #[test]
874    fn multi_emit_fans_out_with_named_uid_reference() {
875        // one source fabric fans out into a site and a vrf; the vrf references
876        // the site via a named uid (auto ref-rewiring does not apply to
877        // multi-emit, so the relationship is wired explicitly through `uids`).
878        let input = input_inventory(json!([
879            { "uid": Uuid::from_u128(1).to_string(), "type": "net.fabric",
880              "key": { "fabric": "fra" }, "attrs": { "site": "fra1", "vrf": "blue" } }
881        ]));
882        let out = compile_map(
883            &input,
884            &spec(
885                r#"
886schema:
887  types:
888    location.site:
889      key:
890        slug: { type: slug }
891    net.vrf:
892      key:
893        name: { type: slug }
894      fields:
895        site: { type: ref, target: location.site }
896rules:
897  - name: fabric
898    match: "net.fabric"
899    uids:
900      site:
901        v5:
902          type: "location.site"
903          stable: "slug=${attrs.site}"
904    emit:
905      - type: location.site
906        key:
907          slug: "${attrs.site}"
908        uid: "${uids.site}"
909      - type: net.vrf
910        key:
911          name: "${attrs.vrf}"
912        attrs:
913          site: "${uids.site}"
914"#,
915            ),
916        )
917        .unwrap();
918
919        assert_eq!(out.objects.len(), 2);
920        let site = out
921            .objects
922            .iter()
923            .find(|o| o.type_name.as_str() == "location.site")
924            .unwrap();
925        let vrf = out
926            .objects
927            .iter()
928            .find(|o| o.type_name.as_str() == "net.vrf")
929            .unwrap();
930        // the named uid pins the site identity and the vrf ref resolves to it,
931        // passing reference-integrity validation.
932        assert_eq!(site.uid, uid_v5("location.site", "slug=fra1"));
933        assert_eq!(vrf.attrs.get("site").unwrap(), &json!(site.uid.to_string()));
934    }
935
936    #[test]
937    fn group_by_aggregates_members_into_list_fields() {
938        // many vlans collapse into one vrf per group; the members' vids are
939        // collected into the vrf's `vlans` list.
940        let input = input_inventory(json!([
941            { "uid": Uuid::from_u128(1).to_string(), "type": "ipam.vlan",
942              "key": { "vid": 10 }, "attrs": { "vrf": "blue" } },
943            { "uid": Uuid::from_u128(2).to_string(), "type": "ipam.vlan",
944              "key": { "vid": 20 }, "attrs": { "vrf": "blue" } },
945            { "uid": Uuid::from_u128(3).to_string(), "type": "ipam.vlan",
946              "key": { "vid": 30 }, "attrs": { "vrf": "red" } }
947        ]));
948        let out = compile_map(
949            &input,
950            &spec(
951                r#"
952schema:
953  types:
954    ipam.vrf:
955      key:
956        name: { type: slug }
957      fields:
958        vlans:
959          type: list
960          item: { type: int }
961rules:
962  - name: vrfs
963    match: "ipam.vlan"
964    group_by: "${attrs.vrf}"
965    emit:
966      type: ipam.vrf
967      key:
968        name: "${group.key}"
969      attrs:
970        vlans: "${group.items.key.vid}"
971"#,
972            ),
973        )
974        .unwrap();
975
976        // two groups (blue, red), sorted by key; members keep input order.
977        assert_eq!(out.objects.len(), 2);
978        let blue = out
979            .objects
980            .iter()
981            .find(|o| o.key.get("name").unwrap() == &json!("blue"))
982            .unwrap();
983        let red = out
984            .objects
985            .iter()
986            .find(|o| o.key.get("name").unwrap() == &json!("red"))
987            .unwrap();
988        assert_eq!(blue.attrs.get("vlans").unwrap(), &json!([10, 20]));
989        assert_eq!(red.attrs.get("vlans").unwrap(), &json!([30]));
990    }
991
992    #[test]
993    fn lookup_reads_a_field_from_a_referenced_object() {
994        // the device's `status` is a ref to a status object; the lookup follows
995        // it and reads the referent's label, turning a ref into a string.
996        let status_uid = Uuid::from_u128(9).to_string();
997        let input = input_inventory(json!([
998            { "uid": status_uid, "type": "extras.status",
999              "key": { "name": "active" }, "attrs": { "label": "Active" } },
1000            { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
1001              "key": { "name": "leaf01" }, "attrs": { "status": status_uid } }
1002        ]));
1003        let out = compile_map(
1004            &input,
1005            &spec(
1006                r#"
1007schema:
1008  types:
1009    dcim.device:
1010      key:
1011        name: { type: slug }
1012      fields:
1013        status: { type: string }
1014rules:
1015  - name: devices
1016    match: "dcim.device"
1017    lookups:
1018      status_label:
1019        ref: "${attrs.status}"
1020        get: "attrs.label"
1021    emit:
1022      type: dcim.device
1023      key:
1024        name: "${key.name}"
1025      attrs:
1026        status: "${lookup.status_label}"
1027"#,
1028            ),
1029        )
1030        .unwrap();
1031
1032        assert_eq!(out.objects.len(), 1);
1033        assert_eq!(
1034            out.objects[0].attrs.get("status").unwrap(),
1035            &json!("Active")
1036        );
1037    }
1038
1039    // --- user-defined starlark transforms ---
1040
1041    #[cfg(not(feature = "starlark"))]
1042    #[test]
1043    fn transforms_block_errors_without_the_feature() {
1044        let input = input_inventory(json!([]));
1045        let err = compile_map(
1046            &input,
1047            &spec(
1048                r#"
1049transforms:
1050  inline: |
1051    def f(v):
1052        return v
1053"#,
1054            ),
1055        )
1056        .unwrap_err();
1057        assert!(
1058            err.to_string().contains("without the starlark feature"),
1059            "{err:#}"
1060        );
1061    }
1062
1063    #[cfg(feature = "starlark")]
1064    mod starlark {
1065        use super::*;
1066
1067        /// the motivating example: a netbox-shaped address with a cidr suffix
1068        /// denormalised into a connectable `ansible_host`.
1069        #[test]
1070        fn inline_transform_derives_attr_end_to_end() {
1071            let input = input_inventory(json!([
1072                { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
1073                  "key": { "name": "leaf01" },
1074                  "attrs": { "address": "198.51.100.1/24", "platform": "nxos" } }
1075            ]));
1076            let out = compile_map(
1077                &input,
1078                &spec(
1079                    r#"
1080transforms:
1081  inline: |
1082    ANSIBLE_OS = {"nxos": "cisco.nxos.nxos", "eos": "arista.eos.eos"}
1083
1084    def cidr_host(v):
1085        return v.split("/")[0]
1086
1087    def ansible_os(platform):
1088        if platform not in ANSIBLE_OS:
1089            fail("no ansible_network_os mapping for platform: " + platform)
1090        return ANSIBLE_OS[platform]
1091schema:
1092  types:
1093    ansible.host:
1094      key:
1095        name: { type: string }
1096      fields:
1097        ansible_host: { type: string }
1098        ansible_network_os: { type: string }
1099rules:
1100  - name: hosts
1101    match: "dcim.device"
1102    emit:
1103      type: ansible.host
1104      key:
1105        name: "${key.name}"
1106      attrs:
1107        ansible_host: "${attrs.address|cidr_host}"
1108        ansible_network_os: "${attrs.platform|ansible_os}"
1109"#,
1110                ),
1111            )
1112            .unwrap();
1113            assert_eq!(out.objects.len(), 1);
1114            let attrs = &out.objects[0].attrs;
1115            assert_eq!(attrs.get("ansible_host").unwrap(), &json!("198.51.100.1"));
1116            assert_eq!(
1117                attrs.get("ansible_network_os").unwrap(),
1118                &json!("cisco.nxos.nxos")
1119            );
1120        }
1121
1122        /// a transform returning a dict fills a `json`-typed attr with the dict
1123        /// preserved, and passes schema validation.
1124        #[test]
1125        fn typed_dict_return_fills_a_json_attr() {
1126            let input = input_inventory(json!([
1127                { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
1128                  "key": { "name": "leaf01" }, "attrs": { "platform": "eos" } }
1129            ]));
1130            let out = compile_map(
1131                &input,
1132                &spec(
1133                    r#"
1134transforms:
1135  inline: |
1136    def profile(platform):
1137        return {"os": platform, "ports": [22, 830]}
1138schema:
1139  types:
1140    lab.node:
1141      key:
1142        name: { type: string }
1143      fields:
1144        profile: { type: json }
1145rules:
1146  - name: nodes
1147    match: "dcim.device"
1148    emit:
1149      type: lab.node
1150      key:
1151        name: "${key.name}"
1152      attrs:
1153        profile: "${attrs.platform|profile}"
1154"#,
1155                ),
1156            )
1157            .unwrap();
1158            assert_eq!(
1159                out.objects[0].attrs.get("profile").unwrap(),
1160                &json!({"os": "eos", "ports": [22, 830]})
1161            );
1162        }
1163
1164        /// `key:` templates feed uid derivation, so a transformed value there is
1165        /// coerced to a string; a collection return is rejected.
1166        #[test]
1167        fn key_context_coerces_scalars_and_rejects_collections() {
1168            let input = input_inventory(json!([
1169                { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
1170                  "key": { "name": "leaf01" }, "attrs": {} }
1171            ]));
1172            let scalar_spec = r#"
1173transforms:
1174  inline: |
1175    def n(v):
1176        return 42
1177schema:
1178  types:
1179    lab.node:
1180      key:
1181        name: { type: string }
1182rules:
1183  - name: nodes
1184    match: "dcim.device"
1185    emit:
1186      type: lab.node
1187      key:
1188        name: "${key.name|n}"
1189"#;
1190            let out = compile_map(&input, &spec(scalar_spec)).unwrap();
1191            assert_eq!(out.objects[0].key.get("name").unwrap(), &json!("42"));
1192
1193            let collection_spec = scalar_spec.replace("return 42", "return [v]");
1194            let err = compile_map(&input, &spec(&collection_spec)).unwrap_err();
1195            assert!(err.to_string().contains("must be a scalar"), "{err:#}");
1196        }
1197
1198        #[test]
1199        fn transforms_block_requires_exactly_one_source() {
1200            let input = input_inventory(json!([]));
1201            for block in [
1202                "transforms: {}",
1203                "transforms:\n  file: a.star\n  inline: \"x = 1\"",
1204            ] {
1205                let err = compile_map(&input, &spec(block)).unwrap_err();
1206                assert!(
1207                    err.to_string()
1208                        .contains("requires exactly one of file or inline"),
1209                    "{err:#}"
1210                );
1211            }
1212        }
1213
1214        /// a file-based transforms block whose script has a `load()` dependency,
1215        /// loaded the way the cli does: `load_map_spec` captures the spec
1216        /// directory, and both the transforms file and its `load()` target
1217        /// resolve against it.
1218        #[test]
1219        fn file_transforms_with_load_resolve_against_the_spec_dir() {
1220            let dir = tempfile::tempdir().unwrap();
1221            std::fs::write(
1222                dir.path().join("lib.star"),
1223                "def shout(v):\n    return v.upper()\n",
1224            )
1225            .unwrap();
1226            std::fs::write(
1227                dir.path().join("transforms.star"),
1228                "load(\"lib.star\", \"shout\")\n\ndef loud_host(v):\n    return shout(v.split(\"/\")[0])\n",
1229            )
1230            .unwrap();
1231            std::fs::write(
1232                dir.path().join("map.yaml"),
1233                r#"
1234transforms:
1235  file: ./transforms.star
1236schema:
1237  types:
1238    lab.node:
1239      key:
1240        name: { type: string }
1241rules:
1242  - name: nodes
1243    match: "dcim.device"
1244    emit:
1245      type: lab.node
1246      key:
1247        name: "${attrs.address|loud_host}"
1248"#,
1249            )
1250            .unwrap();
1251            let map_spec = load_map_spec(dir.path().join("map.yaml")).unwrap();
1252            let input = input_inventory(json!([
1253                { "uid": Uuid::from_u128(1).to_string(), "type": "dcim.device",
1254                  "key": { "name": "leaf01" }, "attrs": { "address": "leaf01/24" } }
1255            ]));
1256            let out = compile_map(&input, &map_spec).unwrap();
1257            assert_eq!(out.objects[0].key.get("name").unwrap(), &json!("LEAF01"));
1258        }
1259
1260        #[test]
1261        fn eval_map_transform_runs_user_builtin_and_errors() {
1262            let map_spec = spec(
1263                r#"
1264transforms:
1265  inline: |
1266    def pad(v, width, fill):
1267        return fill * (width - len(v)) + v
1268
1269    def reject(v):
1270        fail("rejected: " + v)
1271"#,
1272            );
1273            let result =
1274                eval_map_transform(&map_spec, "pad", &json!("7"), &[json!(3), json!("0")]).unwrap();
1275            assert_eq!(result, json!("007"));
1276
1277            let result = eval_map_transform(&map_spec, "upper", &json!("q"), &[]).unwrap();
1278            assert_eq!(result, json!("Q"));
1279
1280            let err = eval_map_transform(&map_spec, "reject", &json!("v"), &[]).unwrap_err();
1281            assert!(err.to_string().contains("rejected: v"), "{err:#}");
1282
1283            let err = eval_map_transform(&map_spec, "nope", &json!("v"), &[]).unwrap_err();
1284            assert!(
1285                err.to_string().contains("unknown transform nope"),
1286                "{err:#}"
1287            );
1288        }
1289    }
1290}