1use std::collections::HashMap;
2
3use serde::Deserialize;
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9pub struct GitlabParser;
21
22const RESERVED: &[&str] = &[
24 "stages",
25 "workflow",
26 "include",
27 "variables",
28 "image",
29 "services",
30 "default",
31 "cache",
32 "before_script",
33 "after_script",
34 "types",
35];
36
37const CRED_FRAGMENTS: &[&str] = &[
39 "TOKEN",
40 "SECRET",
41 "PASSWORD",
42 "PASSWD",
43 "PRIVATE_KEY",
44 "API_KEY",
45 "APIKEY",
46 "SIGNING_KEY",
47 "ACCESS_KEY",
48 "SERVICE_ACCOUNT",
49 "CERT",
50 "CREDENTIAL",
51];
52
53impl PipelineParser for GitlabParser {
54 fn platform(&self) -> &str {
55 "gitlab-ci"
56 }
57
58 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
59 let mut de = serde_yaml::Deserializer::from_str(content);
60 let doc = de
61 .next()
62 .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
63 let root: Value = Value::deserialize(doc)
64 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
65
66 let mapping = root
67 .as_mapping()
68 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
69
70 let mut graph = AuthorityGraph::new(source.clone());
71 graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
72
73 let mut meta = HashMap::new();
76 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
77 meta.insert(META_IMPLICIT.into(), "true".into());
78 let token_id = graph.add_node_with_metadata(
79 NodeKind::Identity,
80 "CI_JOB_TOKEN",
81 TrustZone::FirstParty,
82 meta,
83 );
84
85 if mapping.contains_key("include") {
87 graph.mark_partial(
88 "include: directive present — included templates not resolved".to_string(),
89 );
90 }
91
92 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
94
95 let global_image = mapping.get("image").and_then(extract_image_str);
97
98 if let Some(wf) = mapping.get("workflow") {
100 if has_mr_trigger_in_workflow(wf) {
101 graph
102 .metadata
103 .insert(META_TRIGGER.into(), "merge_request".into());
104 }
105 }
106
107 for (key, value) in mapping {
109 let job_name = match key.as_str() {
110 Some(k) => k,
111 None => continue,
112 };
113 if RESERVED.contains(&job_name) {
114 continue;
115 }
116
117 if job_name.starts_with('.') {
119 graph.mark_partial(format!(
120 "job '{job_name}' is a hidden/template job — not resolved"
121 ));
122 continue;
123 }
124
125 let job_map = match value.as_mapping() {
126 Some(m) => m,
127 None => continue,
128 };
129
130 if job_map.contains_key("extends") {
132 graph.mark_partial(format!(
133 "job '{job_name}' uses extends: — inherited configuration not resolved"
134 ));
135 }
136
137 let job_triggers_mr = job_has_mr_trigger(job_map);
139
140 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
142 graph
143 .metadata
144 .insert(META_TRIGGER.into(), "merge_request".into());
145 }
146
147 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
149
150 let explicit_secrets =
152 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
153
154 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
156
157 let job_image_str = job_map
159 .get("image")
160 .and_then(extract_image_str)
161 .or(global_image.as_deref().map(String::from));
162
163 let image_id = job_image_str.as_deref().map(|img| {
164 let pinned = is_docker_digest_pinned(img);
165 let trust_zone = if pinned {
166 TrustZone::ThirdParty
167 } else {
168 TrustZone::Untrusted
169 };
170 let mut imeta = HashMap::new();
171 if let Some(digest) = img.split("@sha256:").nth(1) {
172 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
173 }
174 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
175 });
176
177 let service_ids = process_services(job_map.get("services"), &mut graph);
179
180 let env_name = job_map
182 .get("environment")
183 .and_then(extract_environment_name);
184
185 let protected_only = job_has_protected_branch_restriction(job_map);
191
192 let mut step_meta = HashMap::new();
194 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
195 if let Some(ref env) = env_name {
196 step_meta.insert("environment_name".into(), env.clone());
197 }
198 if protected_only {
199 step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
200 }
201 let step_id = graph.add_node_with_metadata(
202 NodeKind::Step,
203 job_name,
204 TrustZone::FirstParty,
205 step_meta,
206 );
207
208 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
210
211 for &sid in global_secrets
213 .iter()
214 .chain(&job_secrets)
215 .chain(&explicit_secrets)
216 {
217 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
218 }
219
220 for &iid in &oidc_identities {
222 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
223 }
224
225 if let Some(img_id) = image_id {
227 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
228 }
229 for &svc_id in &service_ids {
230 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
231 }
232 }
233
234 let step_count = graph
242 .nodes
243 .iter()
244 .filter(|n| n.kind == NodeKind::Step)
245 .count();
246 let had_job_carrier = mapping.iter().any(|(k, v)| {
247 k.as_str()
248 .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
249 .unwrap_or(false)
250 && v.as_mapping().is_some()
251 });
252 if step_count == 0 && had_job_carrier {
253 graph.mark_partial(
254 "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
255 );
256 }
257
258 Ok(graph)
259 }
260}
261
262fn extract_image_str(v: &Value) -> Option<String> {
264 match v {
265 Value::String(s) => Some(s.clone()),
266 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
267 _ => None,
268 }
269}
270
271fn extract_environment_name(v: &Value) -> Option<String> {
273 match v {
274 Value::String(s) => Some(s.clone()),
275 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
276 _ => None,
277 }
278}
279
280fn is_credential_name(name: &str) -> bool {
282 let upper = name.to_uppercase();
283 CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
284}
285
286fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
289 let mut ids = Vec::new();
290 let map = match vars.and_then(|v| v.as_mapping()) {
291 Some(m) => m,
292 None => return ids,
293 };
294 for (k, _v) in map {
295 let name = match k.as_str() {
296 Some(s) => s,
297 None => continue,
298 };
299 if is_credential_name(name) {
300 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
301 ids.push(id);
302 let _ = scope; }
304 }
305 ids
306}
307
308fn process_explicit_secrets(
320 secrets: Option<&Value>,
321 _scope: &str,
322 graph: &mut AuthorityGraph,
323) -> Vec<NodeId> {
324 let mut ids = Vec::new();
325 let map = match secrets.and_then(|v| v.as_mapping()) {
326 Some(m) => m,
327 None => return ids,
328 };
329 for (k, _v) in map {
330 let name = match k.as_str() {
331 Some(s) => s,
332 None => continue,
333 };
334 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
335 ids.push(id);
336 }
337 ids
338}
339
340fn process_id_tokens(
351 id_tokens: Option<&Value>,
352 _scope: &str,
353 graph: &mut AuthorityGraph,
354) -> Vec<NodeId> {
355 let mut ids = Vec::new();
356 let map = match id_tokens.and_then(|v| v.as_mapping()) {
357 Some(m) => m,
358 None => return ids,
359 };
360 for (k, v) in map {
361 let token_name = match k.as_str() {
362 Some(s) => s,
363 None => continue,
364 };
365 let aud = v
367 .as_mapping()
368 .and_then(|m| m.get("aud"))
369 .and_then(|a| a.as_str())
370 .unwrap_or("unknown");
371 let label = format!("{token_name} (aud={aud})");
372 let mut meta = HashMap::new();
373 meta.insert(META_OIDC.into(), "true".into());
374 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
375 let id =
376 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
377 ids.push(id);
378 }
379 ids
380}
381
382fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
384 let mut ids = Vec::new();
385 let list = match services.and_then(|v| v.as_sequence()) {
386 Some(s) => s,
387 None => return ids,
388 };
389 for item in list {
390 let img_str = match extract_image_str(item) {
391 Some(s) => s,
392 None => continue,
393 };
394 let pinned = is_docker_digest_pinned(&img_str);
395 let trust_zone = if pinned {
396 TrustZone::ThirdParty
397 } else {
398 TrustZone::Untrusted
399 };
400 let mut meta = HashMap::new();
401 if let Some(digest) = img_str.split("@sha256:").nth(1) {
402 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
403 }
404 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
405 ids.push(id);
406 }
407 ids
408}
409
410fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
412 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
414 for rule in rules {
415 if let Some(if_expr) = rule
416 .as_mapping()
417 .and_then(|m| m.get("if"))
418 .and_then(|v| v.as_str())
419 {
420 if if_expr.contains("merge_request_event") {
421 return true;
422 }
423 }
424 }
425 }
426 if let Some(only) = job_map.get("only") {
428 if only_has_merge_requests(only) {
429 return true;
430 }
431 }
432 false
433}
434
435fn only_has_merge_requests(v: &Value) -> bool {
437 match v {
438 Value::Sequence(seq) => seq
439 .iter()
440 .any(|item| item.as_str() == Some("merge_requests")),
441 Value::Mapping(m) => {
442 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
443 return refs
444 .iter()
445 .any(|item| item.as_str() == Some("merge_requests"));
446 }
447 false
448 }
449 _ => false,
450 }
451}
452
453fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
470 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
471 for rule in rules {
472 let Some(if_expr) = rule
473 .as_mapping()
474 .and_then(|m| m.get("if"))
475 .and_then(|v| v.as_str())
476 else {
477 continue;
478 };
479 if if_expr.contains("$CI_COMMIT_REF_PROTECTED")
480 || if_expr.contains("CI_COMMIT_REF_PROTECTED")
481 {
482 return true;
483 }
484 if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
485 || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
486 {
487 return true;
488 }
489 if if_expr.contains("$CI_COMMIT_TAG") {
490 return true;
491 }
492 }
493 }
494 if let Some(only) = job_map.get("only") {
495 if only_lists_protected_ref(only) {
496 return true;
497 }
498 }
499 false
500}
501
502fn only_lists_protected_ref(v: &Value) -> bool {
506 fn is_protected_ref(s: &str) -> bool {
507 matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
508 }
509 match v {
510 Value::String(s) => is_protected_ref(s.as_str()),
511 Value::Sequence(seq) => seq
512 .iter()
513 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
514 Value::Mapping(m) => {
515 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
516 return refs
517 .iter()
518 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
519 }
520 false
521 }
522 _ => false,
523 }
524}
525
526fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
528 let rules = match wf
529 .as_mapping()
530 .and_then(|m| m.get("rules"))
531 .and_then(|r| r.as_sequence())
532 {
533 Some(r) => r,
534 None => return false,
535 };
536 for rule in rules {
537 if let Some(if_expr) = rule
538 .as_mapping()
539 .and_then(|m| m.get("if"))
540 .and_then(|v| v.as_str())
541 {
542 if if_expr.contains("merge_request_event") {
543 return true;
544 }
545 }
546 }
547 false
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553
554 fn parse(yaml: &str) -> AuthorityGraph {
555 let parser = GitlabParser;
556 let source = PipelineSource {
557 file: ".gitlab-ci.yml".into(),
558 repo: None,
559 git_ref: None,
560 commit_sha: None,
561 };
562 parser.parse(yaml, &source).unwrap()
563 }
564
565 #[test]
566 fn ci_job_token_always_present() {
567 let yaml = r#"
568stages:
569 - build
570
571build-job:
572 stage: build
573 script:
574 - make build
575"#;
576 let graph = parse(yaml);
577 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
578 assert_eq!(identities.len(), 1);
579 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
580 assert_eq!(
581 identities[0]
582 .metadata
583 .get(META_IMPLICIT)
584 .map(String::as_str),
585 Some("true")
586 );
587 assert_eq!(
588 identities[0]
589 .metadata
590 .get(META_IDENTITY_SCOPE)
591 .map(String::as_str),
592 Some("broad")
593 );
594 }
595
596 #[test]
597 fn global_credential_variable_emits_secret_node() {
598 let yaml = r#"
599variables:
600 APP_VERSION: "1.0"
601 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
602
603build-job:
604 script:
605 - make
606"#;
607 let graph = parse(yaml);
608 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
609 assert!(
610 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
611 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
612 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
613 );
614 assert!(
616 !secrets.iter().any(|s| s.name == "APP_VERSION"),
617 "APP_VERSION must not emit a Secret node"
618 );
619 }
620
621 #[test]
622 fn floating_image_emits_untrusted_image_node() {
623 let yaml = r#"
624deploy:
625 image: alpine:latest
626 script:
627 - deploy.sh
628"#;
629 let graph = parse(yaml);
630 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
631 assert_eq!(images.len(), 1);
632 assert_eq!(images[0].name, "alpine:latest");
633 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
634 }
635
636 #[test]
637 fn digest_pinned_image_is_third_party() {
638 let yaml = r#"
639deploy:
640 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
641 script:
642 - deploy.sh
643"#;
644 let graph = parse(yaml);
645 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
646 assert_eq!(images.len(), 1);
647 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
648 }
649
650 #[test]
651 fn id_tokens_emit_oidc_identity_nodes() {
652 let yaml = r#"
653deploy:
654 id_tokens:
655 SIGSTORE_ID_TOKEN:
656 aud: sigstore
657 AWS_OIDC_TOKEN:
658 aud: https://sts.amazonaws.com
659 script:
660 - deploy.sh
661"#;
662 let graph = parse(yaml);
663 let oidc: Vec<_> = graph
664 .nodes_of_kind(NodeKind::Identity)
665 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
666 .collect();
667 assert_eq!(
668 oidc.len(),
669 2,
670 "expected 2 OIDC identity nodes, got: {:?}",
671 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
672 );
673 }
674
675 #[test]
676 fn explicit_secrets_emit_secret_nodes() {
677 let yaml = r#"
678deploy:
679 secrets:
680 DATABASE_PASSWORD:
681 vault: production/db/password@secret
682 AWS_KEY:
683 aws_secrets_manager:
684 name: my-secret
685 script:
686 - deploy.sh
687"#;
688 let graph = parse(yaml);
689 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
690 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
691 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
692 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
693 }
694
695 #[test]
696 fn rules_mr_trigger_sets_meta_trigger() {
697 let yaml = r#"
698test:
699 rules:
700 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
701 script:
702 - run tests
703"#;
704 let graph = parse(yaml);
705 assert_eq!(
706 graph.metadata.get(META_TRIGGER).map(String::as_str),
707 Some("merge_request"),
708 "META_TRIGGER must be set to merge_request"
709 );
710 }
711
712 #[test]
713 fn only_merge_requests_sets_meta_trigger() {
714 let yaml = r#"
715test:
716 only:
717 - merge_requests
718 script:
719 - run tests
720"#;
721 let graph = parse(yaml);
722 assert_eq!(
723 graph.metadata.get(META_TRIGGER).map(String::as_str),
724 Some("merge_request")
725 );
726 }
727
728 #[test]
729 fn include_marks_graph_partial() {
730 let yaml = r#"
731include:
732 - local: '/templates/.base.yml'
733
734build:
735 script:
736 - make
737"#;
738 let graph = parse(yaml);
739 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
740 }
741
742 #[test]
743 fn extends_marks_graph_partial() {
744 let yaml = r#"
745.base:
746 script:
747 - echo base
748
749my-job:
750 extends: .base
751 stage: build
752"#;
753 let graph = parse(yaml);
754 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
755 }
756
757 #[test]
758 fn meta_job_name_set_on_step_nodes() {
759 let yaml = r#"
760build:
761 script:
762 - make
763deploy:
764 script:
765 - deploy.sh
766"#;
767 let graph = parse(yaml);
768 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
769 assert_eq!(steps.len(), 2);
770 for step in &steps {
771 assert!(
772 step.metadata.contains_key(META_JOB_NAME),
773 "Step '{}' missing META_JOB_NAME",
774 step.name
775 );
776 }
777 let names: Vec<_> = steps
779 .iter()
780 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
781 .collect();
782 assert!(names.contains(&"build"), "got: {names:?}");
783 assert!(names.contains(&"deploy"), "got: {names:?}");
784 }
785
786 #[test]
787 fn reserved_keywords_not_parsed_as_jobs() {
788 let yaml = r#"
789stages:
790 - build
791 - test
792
793variables:
794 MY_VAR: value
795
796image: alpine:latest
797
798build:
799 stage: build
800 script:
801 - make
802"#;
803 let graph = parse(yaml);
804 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
805 assert_eq!(
806 steps.len(),
807 1,
808 "only 'build' should be a Step, got: {:?}",
809 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
810 );
811 assert_eq!(steps[0].name, "build");
812 }
813
814 #[test]
815 fn services_emit_image_nodes() {
816 let yaml = r#"
817test:
818 services:
819 - docker:dind
820 - name: postgres:14
821 script:
822 - run_tests
823"#;
824 let graph = parse(yaml);
825 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
826 assert_eq!(
827 images.len(),
828 2,
829 "expected 2 service Image nodes, got: {:?}",
830 images.iter().map(|i| &i.name).collect::<Vec<_>>()
831 );
832 }
833
834 #[test]
837 fn job_carrier_with_unparseable_bodies_marks_partial() {
838 let yaml = r#"
844build:
845 - this is a list, not a mapping
846test:
847 - also a list
848"#;
849 let graph = parse(yaml);
850 let step_count = graph
851 .nodes
852 .iter()
853 .filter(|n| n.kind == NodeKind::Step)
854 .count();
855 assert_eq!(step_count, 0);
861 assert_eq!(
862 graph.completeness,
863 AuthorityCompleteness::Complete,
864 "non-mapping values are not job carriers"
865 );
866 }
867
868 #[test]
869 fn mapping_jobs_without_recognisable_step_content_marks_partial() {
870 let yaml = r#"
877.template-only:
878 script:
879 - echo "this is a template-only file"
880"#;
881 let graph = parse(yaml);
882 let step_count = graph
883 .nodes
884 .iter()
885 .filter(|n| n.kind == NodeKind::Step)
886 .count();
887 assert_eq!(step_count, 0);
888 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
890 }
891}