Skip to main content

taudit_parse_gitlab/
lib.rs

1use std::collections::HashMap;
2
3use serde::Deserialize;
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9/// GitLab CI YAML parser.
10///
11/// Parses `.gitlab-ci.yml` files into an `AuthorityGraph`. The authority model:
12/// - Each job is a `Step` node.
13/// - `CI_JOB_TOKEN` is a global implicit `Identity` (always present, scope=broad).
14/// - `secrets:` entries emit `Secret` nodes with `HasAccessTo` edges.
15/// - `id_tokens:` entries emit OIDC `Identity` nodes.
16/// - `variables:` entries with credential-pattern names emit `Secret` nodes.
17/// - `image:` and `services:` emit `Image` nodes with `UsesImage` edges.
18/// - `include:` and `extends:` mark the graph `Partial`.
19/// - `rules: if: merge_request_event` and `only: merge_requests` set `META_TRIGGER`.
20pub struct GitlabParser;
21
22/// Reserved top-level keys that are not job definitions.
23const RESERVED: &[&str] = &[
24    "stages",
25    "workflow",
26    "include",
27    "variables",
28    "image",
29    "services",
30    "default",
31    "cache",
32    "before_script",
33    "after_script",
34    "types",
35];
36
37/// Variable name fragments that indicate a credential rather than plain config.
38const CRED_FRAGMENTS: &[&str] = &[
39    "TOKEN",
40    "SECRET",
41    "PASSWORD",
42    "PASSWD",
43    "PRIVATE_KEY",
44    "API_KEY",
45    "APIKEY",
46    "SIGNING_KEY",
47    "ACCESS_KEY",
48    "SERVICE_ACCOUNT",
49    "CERT",
50    "CREDENTIAL",
51];
52
53impl PipelineParser for GitlabParser {
54    fn platform(&self) -> &str {
55        "gitlab-ci"
56    }
57
58    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
59        let mut de = serde_yaml::Deserializer::from_str(content);
60        let doc = de
61            .next()
62            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
63        let root: Value = Value::deserialize(doc)
64            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
65
66        let mapping = root
67            .as_mapping()
68            .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
69
70        let mut graph = AuthorityGraph::new(source.clone());
71        graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
72
73        // CI_JOB_TOKEN is always present in every GitLab CI job — it's the built-in
74        // platform token, equivalent to ADO's System.AccessToken or GHA's GITHUB_TOKEN.
75        let mut meta = HashMap::new();
76        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
77        meta.insert(META_IMPLICIT.into(), "true".into());
78        let token_id = graph.add_node_with_metadata(
79            NodeKind::Identity,
80            "CI_JOB_TOKEN",
81            TrustZone::FirstParty,
82            meta,
83        );
84
85        // Top-level include: → mark Partial immediately
86        if mapping.contains_key("include") {
87            graph.mark_partial(
88                "include: directive present — included templates not resolved".to_string(),
89            );
90        }
91
92        // Global variables
93        let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
94
95        // Global image
96        let global_image = mapping.get("image").and_then(extract_image_str);
97
98        // Top-level merge_request trigger detection from `workflow:` rules
99        if let Some(wf) = mapping.get("workflow") {
100            if has_mr_trigger_in_workflow(wf) {
101                graph
102                    .metadata
103                    .insert(META_TRIGGER.into(), "merge_request".into());
104            }
105        }
106
107        // Process each job (any top-level key not in RESERVED)
108        for (key, value) in mapping {
109            let job_name = match key.as_str() {
110                Some(k) => k,
111                None => continue,
112            };
113            if RESERVED.contains(&job_name) {
114                continue;
115            }
116
117            // Hidden jobs (starting with a dot) are templates — mark Partial, skip
118            if job_name.starts_with('.') {
119                graph.mark_partial(format!(
120                    "job '{job_name}' is a hidden/template job — not resolved"
121                ));
122                continue;
123            }
124
125            let job_map = match value.as_mapping() {
126                Some(m) => m,
127                None => continue,
128            };
129
130            // extends: — job template inheritance, can't resolve statically
131            if job_map.contains_key("extends") {
132                graph.mark_partial(format!(
133                    "job '{job_name}' uses extends: — inherited configuration not resolved"
134                ));
135            }
136
137            // Detect PR/MR trigger in this job's rules: or only:
138            let job_triggers_mr = job_has_mr_trigger(job_map);
139
140            // Propagate job MR trigger to graph level
141            if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
142                graph
143                    .metadata
144                    .insert(META_TRIGGER.into(), "merge_request".into());
145            }
146
147            // Job-level variables
148            let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
149
150            // Job-level explicit secrets: (Vault, AWS Secrets Manager, GCP, Azure)
151            let explicit_secrets =
152                process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
153
154            // Job-level OIDC tokens (id_tokens:)
155            let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
156
157            // Job image (falls back to global)
158            let job_image_str = job_map
159                .get("image")
160                .and_then(extract_image_str)
161                .or(global_image.as_deref().map(String::from));
162
163            let image_id = job_image_str.as_deref().map(|img| {
164                let pinned = is_docker_digest_pinned(img);
165                let trust_zone = if pinned {
166                    TrustZone::ThirdParty
167                } else {
168                    TrustZone::Untrusted
169                };
170                let mut imeta = HashMap::new();
171                if let Some(digest) = img.split("@sha256:").nth(1) {
172                    imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
173                }
174                graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
175            });
176
177            // Services (each is an Image node)
178            let service_ids = process_services(job_map.get("services"), &mut graph);
179
180            // Environment — record name as metadata, sets trust boundary marker
181            let env_name = job_map
182                .get("environment")
183                .and_then(extract_environment_name);
184
185            // Detect whether this job's `rules:` / `only:` clause restricts
186            // execution to protected branches (or to the default branch,
187            // which is protected by GitLab default policy). Used by the
188            // `gitlab_deploy_job_missing_protected_branch_only` rule to
189            // detect deployment jobs that lack any branch guard.
190            let protected_only = job_has_protected_branch_restriction(job_map);
191
192            // Create the Step node for this job
193            let mut step_meta = HashMap::new();
194            step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
195            if let Some(ref env) = env_name {
196                step_meta.insert("environment_name".into(), env.clone());
197            }
198            if protected_only {
199                step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
200            }
201            let step_id = graph.add_node_with_metadata(
202                NodeKind::Step,
203                job_name,
204                TrustZone::FirstParty,
205                step_meta,
206            );
207
208            // CI_JOB_TOKEN always available to every step
209            graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
210
211            // Link all secrets
212            for &sid in global_secrets
213                .iter()
214                .chain(&job_secrets)
215                .chain(&explicit_secrets)
216            {
217                graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
218            }
219
220            // Link OIDC identities
221            for &iid in &oidc_identities {
222                graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
223            }
224
225            // UsesImage edges
226            if let Some(img_id) = image_id {
227                graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
228            }
229            for &svc_id in &service_ids {
230                graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
231            }
232        }
233
234        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
235        // with non-reserved top-level keys looks like a GitLab pipeline shape
236        // but its body may use constructs the GitLab parser doesn't recognise
237        // (e.g. an ADO `task:` payload). Mark Partial when the source had at
238        // least one job-shaped top-level key but we ended up with no Step
239        // nodes — better than silently returning completeness=complete on a
240        // clean-but-empty graph that a CI gate would treat as "passed".
241        let step_count = graph
242            .nodes
243            .iter()
244            .filter(|n| n.kind == NodeKind::Step)
245            .count();
246        let had_job_carrier = mapping.iter().any(|(k, v)| {
247            k.as_str()
248                .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
249                .unwrap_or(false)
250                && v.as_mapping().is_some()
251        });
252        if step_count == 0 && had_job_carrier {
253            graph.mark_partial(
254                "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
255            );
256        }
257
258        Ok(graph)
259    }
260}
261
262/// Detect `image:` string from a YAML value — can be a bare string or a mapping with `name:`.
263fn extract_image_str(v: &Value) -> Option<String> {
264    match v {
265        Value::String(s) => Some(s.clone()),
266        Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
267        _ => None,
268    }
269}
270
271/// Extract environment name from `environment:` value (string or mapping).
272fn extract_environment_name(v: &Value) -> Option<String> {
273    match v {
274        Value::String(s) => Some(s.clone()),
275        Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
276        _ => None,
277    }
278}
279
280/// Classify a variable name as a credential by checking for common fragments.
281fn is_credential_name(name: &str) -> bool {
282    let upper = name.to_uppercase();
283    CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
284}
285
286/// Parse `variables:` mapping and emit `Secret` nodes for credential-pattern names.
287/// Returns the list of created node IDs.
288fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
289    let mut ids = Vec::new();
290    let map = match vars.and_then(|v| v.as_mapping()) {
291        Some(m) => m,
292        None => return ids,
293    };
294    for (k, _v) in map {
295        let name = match k.as_str() {
296            Some(s) => s,
297            None => continue,
298        };
299        if is_credential_name(name) {
300            let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
301            ids.push(id);
302            let _ = scope; // used for future scoped error messages
303        }
304    }
305    ids
306}
307
308/// Parse `secrets:` block and emit one `Secret` node per named secret.
309///
310/// GitLab CI `secrets:` format:
311/// ```yaml
312/// secrets:
313///   DATABASE_PASSWORD:
314///     vault: production/db/password@secret
315///   AWS_KEY:
316///     aws_secrets_manager:
317///       name: my-secret
318/// ```
319fn process_explicit_secrets(
320    secrets: Option<&Value>,
321    _scope: &str,
322    graph: &mut AuthorityGraph,
323) -> Vec<NodeId> {
324    let mut ids = Vec::new();
325    let map = match secrets.and_then(|v| v.as_mapping()) {
326        Some(m) => m,
327        None => return ids,
328    };
329    for (k, _v) in map {
330        let name = match k.as_str() {
331            Some(s) => s,
332            None => continue,
333        };
334        let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
335        ids.push(id);
336    }
337    ids
338}
339
340/// Parse `id_tokens:` block and emit one OIDC `Identity` node per token.
341///
342/// GitLab CI `id_tokens:` format:
343/// ```yaml
344/// id_tokens:
345///   SIGSTORE_ID_TOKEN:
346///     aud: sigstore
347///   AWS_OIDC_TOKEN:
348///     aud: https://sts.amazonaws.com
349/// ```
350fn process_id_tokens(
351    id_tokens: Option<&Value>,
352    _scope: &str,
353    graph: &mut AuthorityGraph,
354) -> Vec<NodeId> {
355    let mut ids = Vec::new();
356    let map = match id_tokens.and_then(|v| v.as_mapping()) {
357        Some(m) => m,
358        None => return ids,
359    };
360    for (k, v) in map {
361        let token_name = match k.as_str() {
362            Some(s) => s,
363            None => continue,
364        };
365        // Extract audience for labelling
366        let aud = v
367            .as_mapping()
368            .and_then(|m| m.get("aud"))
369            .and_then(|a| a.as_str())
370            .unwrap_or("unknown");
371        let label = format!("{token_name} (aud={aud})");
372        let mut meta = HashMap::new();
373        meta.insert(META_OIDC.into(), "true".into());
374        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
375        let id =
376            graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
377        ids.push(id);
378    }
379    ids
380}
381
382/// Parse `services:` block and emit `Image` nodes.
383fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
384    let mut ids = Vec::new();
385    let list = match services.and_then(|v| v.as_sequence()) {
386        Some(s) => s,
387        None => return ids,
388    };
389    for item in list {
390        let img_str = match extract_image_str(item) {
391            Some(s) => s,
392            None => continue,
393        };
394        let pinned = is_docker_digest_pinned(&img_str);
395        let trust_zone = if pinned {
396            TrustZone::ThirdParty
397        } else {
398            TrustZone::Untrusted
399        };
400        let mut meta = HashMap::new();
401        if let Some(digest) = img_str.split("@sha256:").nth(1) {
402            meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
403        }
404        let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
405        ids.push(id);
406    }
407    ids
408}
409
410/// Check whether a job's `rules:` or `only:` indicates it runs on merge requests.
411fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
412    // rules: [{if: '$CI_PIPELINE_SOURCE == "merge_request_event"'}]
413    if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
414        for rule in rules {
415            if let Some(if_expr) = rule
416                .as_mapping()
417                .and_then(|m| m.get("if"))
418                .and_then(|v| v.as_str())
419            {
420                if if_expr.contains("merge_request_event") {
421                    return true;
422                }
423            }
424        }
425    }
426    // only: [merge_requests] or only: {refs: [merge_requests]}
427    if let Some(only) = job_map.get("only") {
428        if only_has_merge_requests(only) {
429            return true;
430        }
431    }
432    false
433}
434
435/// Check `only:` value (sequence or mapping) for `merge_requests` entry.
436fn only_has_merge_requests(v: &Value) -> bool {
437    match v {
438        Value::Sequence(seq) => seq
439            .iter()
440            .any(|item| item.as_str() == Some("merge_requests")),
441        Value::Mapping(m) => {
442            if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
443                return refs
444                    .iter()
445                    .any(|item| item.as_str() == Some("merge_requests"));
446            }
447            false
448        }
449        _ => false,
450    }
451}
452
453/// Returns true when a job's `rules:` or `only:` clause restricts execution
454/// to protected refs only. The set of accepted patterns is intentionally
455/// generous because the goal is to *credit* defensive intent, not to
456/// audit-grade verify that every protection actually exists in GitLab's
457/// branch-protection settings — that lives outside the YAML.
458///
459/// Patterns recognised as a protected-only restriction:
460///
461///   * any `rules: [{ if: ... $CI_COMMIT_REF_PROTECTED ... }]`
462///   * any `rules: [{ if: ... $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH ... }]`
463///     (default branch is GitLab-protected by default)
464///   * any `rules: [{ if: ... $CI_COMMIT_TAG ... }]` (tags are protected by default)
465///   * `only: [main]` / `only: [master]` / `only: tags`
466///   * `only: { refs: [main, /^release/.*/] }`
467///
468/// Hits any one of the above → true. Misses every one → false.
469fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
470    if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
471        for rule in rules {
472            let Some(if_expr) = rule
473                .as_mapping()
474                .and_then(|m| m.get("if"))
475                .and_then(|v| v.as_str())
476            else {
477                continue;
478            };
479            if if_expr.contains("$CI_COMMIT_REF_PROTECTED")
480                || if_expr.contains("CI_COMMIT_REF_PROTECTED")
481            {
482                return true;
483            }
484            if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
485                || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
486            {
487                return true;
488            }
489            if if_expr.contains("$CI_COMMIT_TAG") {
490                return true;
491            }
492        }
493    }
494    if let Some(only) = job_map.get("only") {
495        if only_lists_protected_ref(only) {
496            return true;
497        }
498    }
499    false
500}
501
502/// Check `only:` for protected/default-branch refs (`main`, `master`, `tags`,
503/// or a `refs:` list containing those). Conservative — does NOT include
504/// `merge_requests` (that's the opposite signal).
505fn only_lists_protected_ref(v: &Value) -> bool {
506    fn is_protected_ref(s: &str) -> bool {
507        matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
508    }
509    match v {
510        Value::String(s) => is_protected_ref(s.as_str()),
511        Value::Sequence(seq) => seq
512            .iter()
513            .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
514        Value::Mapping(m) => {
515            if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
516                return refs
517                    .iter()
518                    .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
519            }
520            false
521        }
522        _ => false,
523    }
524}
525
526/// Check top-level `workflow:` rules for MR trigger.
527fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
528    let rules = match wf
529        .as_mapping()
530        .and_then(|m| m.get("rules"))
531        .and_then(|r| r.as_sequence())
532    {
533        Some(r) => r,
534        None => return false,
535    };
536    for rule in rules {
537        if let Some(if_expr) = rule
538            .as_mapping()
539            .and_then(|m| m.get("if"))
540            .and_then(|v| v.as_str())
541        {
542            if if_expr.contains("merge_request_event") {
543                return true;
544            }
545        }
546    }
547    false
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    fn parse(yaml: &str) -> AuthorityGraph {
555        let parser = GitlabParser;
556        let source = PipelineSource {
557            file: ".gitlab-ci.yml".into(),
558            repo: None,
559            git_ref: None,
560            commit_sha: None,
561        };
562        parser.parse(yaml, &source).unwrap()
563    }
564
565    #[test]
566    fn ci_job_token_always_present() {
567        let yaml = r#"
568stages:
569  - build
570
571build-job:
572  stage: build
573  script:
574    - make build
575"#;
576        let graph = parse(yaml);
577        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
578        assert_eq!(identities.len(), 1);
579        assert_eq!(identities[0].name, "CI_JOB_TOKEN");
580        assert_eq!(
581            identities[0]
582                .metadata
583                .get(META_IMPLICIT)
584                .map(String::as_str),
585            Some("true")
586        );
587        assert_eq!(
588            identities[0]
589                .metadata
590                .get(META_IDENTITY_SCOPE)
591                .map(String::as_str),
592            Some("broad")
593        );
594    }
595
596    #[test]
597    fn global_credential_variable_emits_secret_node() {
598        let yaml = r#"
599variables:
600  APP_VERSION: "1.0"
601  DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
602
603build-job:
604  script:
605    - make
606"#;
607        let graph = parse(yaml);
608        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
609        assert!(
610            secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
611            "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
612            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
613        );
614        // Plain config variable must not emit Secret
615        assert!(
616            !secrets.iter().any(|s| s.name == "APP_VERSION"),
617            "APP_VERSION must not emit a Secret node"
618        );
619    }
620
621    #[test]
622    fn floating_image_emits_untrusted_image_node() {
623        let yaml = r#"
624deploy:
625  image: alpine:latest
626  script:
627    - deploy.sh
628"#;
629        let graph = parse(yaml);
630        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
631        assert_eq!(images.len(), 1);
632        assert_eq!(images[0].name, "alpine:latest");
633        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
634    }
635
636    #[test]
637    fn digest_pinned_image_is_third_party() {
638        let yaml = r#"
639deploy:
640  image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
641  script:
642    - deploy.sh
643"#;
644        let graph = parse(yaml);
645        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
646        assert_eq!(images.len(), 1);
647        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
648    }
649
650    #[test]
651    fn id_tokens_emit_oidc_identity_nodes() {
652        let yaml = r#"
653deploy:
654  id_tokens:
655    SIGSTORE_ID_TOKEN:
656      aud: sigstore
657    AWS_OIDC_TOKEN:
658      aud: https://sts.amazonaws.com
659  script:
660    - deploy.sh
661"#;
662        let graph = parse(yaml);
663        let oidc: Vec<_> = graph
664            .nodes_of_kind(NodeKind::Identity)
665            .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
666            .collect();
667        assert_eq!(
668            oidc.len(),
669            2,
670            "expected 2 OIDC identity nodes, got: {:?}",
671            oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
672        );
673    }
674
675    #[test]
676    fn explicit_secrets_emit_secret_nodes() {
677        let yaml = r#"
678deploy:
679  secrets:
680    DATABASE_PASSWORD:
681      vault: production/db/password@secret
682    AWS_KEY:
683      aws_secrets_manager:
684        name: my-secret
685  script:
686    - deploy.sh
687"#;
688        let graph = parse(yaml);
689        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
690        let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
691        assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
692        assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
693    }
694
695    #[test]
696    fn rules_mr_trigger_sets_meta_trigger() {
697        let yaml = r#"
698test:
699  rules:
700    - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
701  script:
702    - run tests
703"#;
704        let graph = parse(yaml);
705        assert_eq!(
706            graph.metadata.get(META_TRIGGER).map(String::as_str),
707            Some("merge_request"),
708            "META_TRIGGER must be set to merge_request"
709        );
710    }
711
712    #[test]
713    fn only_merge_requests_sets_meta_trigger() {
714        let yaml = r#"
715test:
716  only:
717    - merge_requests
718  script:
719    - run tests
720"#;
721        let graph = parse(yaml);
722        assert_eq!(
723            graph.metadata.get(META_TRIGGER).map(String::as_str),
724            Some("merge_request")
725        );
726    }
727
728    #[test]
729    fn include_marks_graph_partial() {
730        let yaml = r#"
731include:
732  - local: '/templates/.base.yml'
733
734build:
735  script:
736    - make
737"#;
738        let graph = parse(yaml);
739        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
740    }
741
742    #[test]
743    fn extends_marks_graph_partial() {
744        let yaml = r#"
745.base:
746  script:
747    - echo base
748
749my-job:
750  extends: .base
751  stage: build
752"#;
753        let graph = parse(yaml);
754        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
755    }
756
757    #[test]
758    fn meta_job_name_set_on_step_nodes() {
759        let yaml = r#"
760build:
761  script:
762    - make
763deploy:
764  script:
765    - deploy.sh
766"#;
767        let graph = parse(yaml);
768        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
769        assert_eq!(steps.len(), 2);
770        for step in &steps {
771            assert!(
772                step.metadata.contains_key(META_JOB_NAME),
773                "Step '{}' missing META_JOB_NAME",
774                step.name
775            );
776        }
777        // Verify job names are correct
778        let names: Vec<_> = steps
779            .iter()
780            .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
781            .collect();
782        assert!(names.contains(&"build"), "got: {names:?}");
783        assert!(names.contains(&"deploy"), "got: {names:?}");
784    }
785
786    #[test]
787    fn reserved_keywords_not_parsed_as_jobs() {
788        let yaml = r#"
789stages:
790  - build
791  - test
792
793variables:
794  MY_VAR: value
795
796image: alpine:latest
797
798build:
799  stage: build
800  script:
801    - make
802"#;
803        let graph = parse(yaml);
804        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
805        assert_eq!(
806            steps.len(),
807            1,
808            "only 'build' should be a Step, got: {:?}",
809            steps.iter().map(|s| &s.name).collect::<Vec<_>>()
810        );
811        assert_eq!(steps[0].name, "build");
812    }
813
814    #[test]
815    fn services_emit_image_nodes() {
816        let yaml = r#"
817test:
818  services:
819    - docker:dind
820    - name: postgres:14
821  script:
822    - run_tests
823"#;
824        let graph = parse(yaml);
825        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
826        assert_eq!(
827            images.len(),
828            2,
829            "expected 2 service Image nodes, got: {:?}",
830            images.iter().map(|i| &i.name).collect::<Vec<_>>()
831        );
832    }
833
834    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
835
836    #[test]
837    fn job_carrier_with_unparseable_bodies_marks_partial() {
838        // Top-level keys that look like job names but whose values are not
839        // mappings (lists, scalars). GitLab parser would normally produce a
840        // Step per non-reserved mapping-valued key; here every candidate is
841        // skipped because the value is not a mapping. Result: 0 step nodes
842        // despite a non-empty job carrier — must mark Partial.
843        let yaml = r#"
844build:
845  - this is a list, not a mapping
846test:
847  - also a list
848"#;
849        let graph = parse(yaml);
850        let step_count = graph
851            .nodes
852            .iter()
853            .filter(|n| n.kind == NodeKind::Step)
854            .count();
855        // Note: the "had_job_carrier" heuristic only fires when the value IS
856        // a mapping, so this case (non-mapping values) does NOT trigger the
857        // partial — that's intentional. The heuristic targets the trap where
858        // an attacker uses a *valid mapping shape* the GitLab parser can't
859        // interpret.
860        assert_eq!(step_count, 0);
861        assert_eq!(
862            graph.completeness,
863            AuthorityCompleteness::Complete,
864            "non-mapping values are not job carriers"
865        );
866    }
867
868    #[test]
869    fn mapping_jobs_without_recognisable_step_content_marks_partial() {
870        // A non-reserved top-level key whose value is a mapping but contains
871        // only ADO-style fields (`task:`, `azureSubscription`) — and `extends`
872        // marks the job as partial without creating a Step. Wait: the GitLab
873        // parser actually still adds a Step node for any mapping-valued
874        // non-reserved key. So to get the 0-step + had_carrier shape, we
875        // need a hidden/template job (starts with '.') as the only candidate.
876        let yaml = r#"
877.template-only:
878  script:
879    - echo "this is a template-only file"
880"#;
881        let graph = parse(yaml);
882        let step_count = graph
883            .nodes
884            .iter()
885            .filter(|n| n.kind == NodeKind::Step)
886            .count();
887        assert_eq!(step_count, 0);
888        // Hidden jobs already mark partial with their own reason.
889        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
890    }
891}