1use std::collections::HashMap;
2
3use serde::Deserialize;
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9pub struct GitlabParser;
21
22const RESERVED: &[&str] = &[
24 "stages",
25 "workflow",
26 "include",
27 "variables",
28 "image",
29 "services",
30 "default",
31 "cache",
32 "before_script",
33 "after_script",
34 "types",
35];
36
37const CRED_FRAGMENTS: &[&str] = &[
39 "TOKEN",
40 "SECRET",
41 "PASSWORD",
42 "PASSWD",
43 "PRIVATE_KEY",
44 "API_KEY",
45 "APIKEY",
46 "SIGNING_KEY",
47 "ACCESS_KEY",
48 "SERVICE_ACCOUNT",
49 "CERT",
50 "CREDENTIAL",
51];
52
53impl PipelineParser for GitlabParser {
54 fn platform(&self) -> &str {
55 "gitlab-ci"
56 }
57
58 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
59 let mut de = serde_yaml::Deserializer::from_str(content);
60 let doc = de
61 .next()
62 .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
63 let root: Value = Value::deserialize(doc)
64 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
65
66 let mapping = root
67 .as_mapping()
68 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
69
70 let mut graph = AuthorityGraph::new(source.clone());
71
72 let mut meta = HashMap::new();
75 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
76 meta.insert(META_IMPLICIT.into(), "true".into());
77 let token_id = graph.add_node_with_metadata(
78 NodeKind::Identity,
79 "CI_JOB_TOKEN",
80 TrustZone::FirstParty,
81 meta,
82 );
83
84 if mapping.contains_key("include") {
86 graph.mark_partial(
87 "include: directive present — included templates not resolved".to_string(),
88 );
89 }
90
91 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
93
94 let global_image = mapping.get("image").and_then(extract_image_str);
96
97 if let Some(wf) = mapping.get("workflow") {
99 if has_mr_trigger_in_workflow(wf) {
100 graph
101 .metadata
102 .insert(META_TRIGGER.into(), "merge_request".into());
103 }
104 }
105
106 for (key, value) in mapping {
108 let job_name = match key.as_str() {
109 Some(k) => k,
110 None => continue,
111 };
112 if RESERVED.contains(&job_name) {
113 continue;
114 }
115
116 if job_name.starts_with('.') {
118 graph.mark_partial(format!(
119 "job '{job_name}' is a hidden/template job — not resolved"
120 ));
121 continue;
122 }
123
124 let job_map = match value.as_mapping() {
125 Some(m) => m,
126 None => continue,
127 };
128
129 if job_map.contains_key("extends") {
131 graph.mark_partial(format!(
132 "job '{job_name}' uses extends: — inherited configuration not resolved"
133 ));
134 }
135
136 let job_triggers_mr = job_has_mr_trigger(job_map);
138
139 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
141 graph
142 .metadata
143 .insert(META_TRIGGER.into(), "merge_request".into());
144 }
145
146 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
148
149 let explicit_secrets =
151 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
152
153 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
155
156 let job_image_str = job_map
158 .get("image")
159 .and_then(extract_image_str)
160 .or(global_image.as_deref().map(String::from));
161
162 let image_id = job_image_str.as_deref().map(|img| {
163 let pinned = is_docker_digest_pinned(img);
164 let trust_zone = if pinned {
165 TrustZone::ThirdParty
166 } else {
167 TrustZone::Untrusted
168 };
169 let mut imeta = HashMap::new();
170 if let Some(digest) = img.split("@sha256:").nth(1) {
171 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
172 }
173 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
174 });
175
176 let service_ids = process_services(job_map.get("services"), &mut graph);
178
179 let env_name = job_map
181 .get("environment")
182 .and_then(extract_environment_name);
183
184 let mut step_meta = HashMap::new();
186 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
187 if let Some(ref env) = env_name {
188 step_meta.insert("environment_name".into(), env.clone());
189 }
190 let step_id = graph.add_node_with_metadata(
191 NodeKind::Step,
192 job_name,
193 TrustZone::FirstParty,
194 step_meta,
195 );
196
197 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
199
200 for &sid in global_secrets
202 .iter()
203 .chain(&job_secrets)
204 .chain(&explicit_secrets)
205 {
206 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
207 }
208
209 for &iid in &oidc_identities {
211 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
212 }
213
214 if let Some(img_id) = image_id {
216 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
217 }
218 for &svc_id in &service_ids {
219 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
220 }
221 }
222
223 Ok(graph)
224 }
225}
226
227fn extract_image_str(v: &Value) -> Option<String> {
229 match v {
230 Value::String(s) => Some(s.clone()),
231 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
232 _ => None,
233 }
234}
235
236fn extract_environment_name(v: &Value) -> Option<String> {
238 match v {
239 Value::String(s) => Some(s.clone()),
240 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
241 _ => None,
242 }
243}
244
245fn is_credential_name(name: &str) -> bool {
247 let upper = name.to_uppercase();
248 CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
249}
250
251fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
254 let mut ids = Vec::new();
255 let map = match vars.and_then(|v| v.as_mapping()) {
256 Some(m) => m,
257 None => return ids,
258 };
259 for (k, _v) in map {
260 let name = match k.as_str() {
261 Some(s) => s,
262 None => continue,
263 };
264 if is_credential_name(name) {
265 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
266 ids.push(id);
267 let _ = scope; }
269 }
270 ids
271}
272
273fn process_explicit_secrets(
285 secrets: Option<&Value>,
286 _scope: &str,
287 graph: &mut AuthorityGraph,
288) -> Vec<NodeId> {
289 let mut ids = Vec::new();
290 let map = match secrets.and_then(|v| v.as_mapping()) {
291 Some(m) => m,
292 None => return ids,
293 };
294 for (k, _v) in map {
295 let name = match k.as_str() {
296 Some(s) => s,
297 None => continue,
298 };
299 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
300 ids.push(id);
301 }
302 ids
303}
304
305fn process_id_tokens(
316 id_tokens: Option<&Value>,
317 _scope: &str,
318 graph: &mut AuthorityGraph,
319) -> Vec<NodeId> {
320 let mut ids = Vec::new();
321 let map = match id_tokens.and_then(|v| v.as_mapping()) {
322 Some(m) => m,
323 None => return ids,
324 };
325 for (k, v) in map {
326 let token_name = match k.as_str() {
327 Some(s) => s,
328 None => continue,
329 };
330 let aud = v
332 .as_mapping()
333 .and_then(|m| m.get("aud"))
334 .and_then(|a| a.as_str())
335 .unwrap_or("unknown");
336 let label = format!("{token_name} (aud={aud})");
337 let mut meta = HashMap::new();
338 meta.insert(META_OIDC.into(), "true".into());
339 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
340 let id =
341 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
342 ids.push(id);
343 }
344 ids
345}
346
347fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
349 let mut ids = Vec::new();
350 let list = match services.and_then(|v| v.as_sequence()) {
351 Some(s) => s,
352 None => return ids,
353 };
354 for item in list {
355 let img_str = match extract_image_str(item) {
356 Some(s) => s,
357 None => continue,
358 };
359 let pinned = is_docker_digest_pinned(&img_str);
360 let trust_zone = if pinned {
361 TrustZone::ThirdParty
362 } else {
363 TrustZone::Untrusted
364 };
365 let mut meta = HashMap::new();
366 if let Some(digest) = img_str.split("@sha256:").nth(1) {
367 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
368 }
369 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
370 ids.push(id);
371 }
372 ids
373}
374
375fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
377 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
379 for rule in rules {
380 if let Some(if_expr) = rule
381 .as_mapping()
382 .and_then(|m| m.get("if"))
383 .and_then(|v| v.as_str())
384 {
385 if if_expr.contains("merge_request_event") {
386 return true;
387 }
388 }
389 }
390 }
391 if let Some(only) = job_map.get("only") {
393 if only_has_merge_requests(only) {
394 return true;
395 }
396 }
397 false
398}
399
400fn only_has_merge_requests(v: &Value) -> bool {
402 match v {
403 Value::Sequence(seq) => seq
404 .iter()
405 .any(|item| item.as_str() == Some("merge_requests")),
406 Value::Mapping(m) => {
407 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
408 return refs
409 .iter()
410 .any(|item| item.as_str() == Some("merge_requests"));
411 }
412 false
413 }
414 _ => false,
415 }
416}
417
418fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
420 let rules = match wf
421 .as_mapping()
422 .and_then(|m| m.get("rules"))
423 .and_then(|r| r.as_sequence())
424 {
425 Some(r) => r,
426 None => return false,
427 };
428 for rule in rules {
429 if let Some(if_expr) = rule
430 .as_mapping()
431 .and_then(|m| m.get("if"))
432 .and_then(|v| v.as_str())
433 {
434 if if_expr.contains("merge_request_event") {
435 return true;
436 }
437 }
438 }
439 false
440}
441
442#[cfg(test)]
443mod tests {
444 use super::*;
445
446 fn parse(yaml: &str) -> AuthorityGraph {
447 let parser = GitlabParser;
448 let source = PipelineSource {
449 file: ".gitlab-ci.yml".into(),
450 repo: None,
451 git_ref: None,
452 commit_sha: None,
453 };
454 parser.parse(yaml, &source).unwrap()
455 }
456
457 #[test]
458 fn ci_job_token_always_present() {
459 let yaml = r#"
460stages:
461 - build
462
463build-job:
464 stage: build
465 script:
466 - make build
467"#;
468 let graph = parse(yaml);
469 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
470 assert_eq!(identities.len(), 1);
471 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
472 assert_eq!(
473 identities[0]
474 .metadata
475 .get(META_IMPLICIT)
476 .map(String::as_str),
477 Some("true")
478 );
479 assert_eq!(
480 identities[0]
481 .metadata
482 .get(META_IDENTITY_SCOPE)
483 .map(String::as_str),
484 Some("broad")
485 );
486 }
487
488 #[test]
489 fn global_credential_variable_emits_secret_node() {
490 let yaml = r#"
491variables:
492 APP_VERSION: "1.0"
493 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
494
495build-job:
496 script:
497 - make
498"#;
499 let graph = parse(yaml);
500 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
501 assert!(
502 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
503 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
504 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
505 );
506 assert!(
508 !secrets.iter().any(|s| s.name == "APP_VERSION"),
509 "APP_VERSION must not emit a Secret node"
510 );
511 }
512
513 #[test]
514 fn floating_image_emits_untrusted_image_node() {
515 let yaml = r#"
516deploy:
517 image: alpine:latest
518 script:
519 - deploy.sh
520"#;
521 let graph = parse(yaml);
522 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
523 assert_eq!(images.len(), 1);
524 assert_eq!(images[0].name, "alpine:latest");
525 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
526 }
527
528 #[test]
529 fn digest_pinned_image_is_third_party() {
530 let yaml = r#"
531deploy:
532 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
533 script:
534 - deploy.sh
535"#;
536 let graph = parse(yaml);
537 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
538 assert_eq!(images.len(), 1);
539 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
540 }
541
542 #[test]
543 fn id_tokens_emit_oidc_identity_nodes() {
544 let yaml = r#"
545deploy:
546 id_tokens:
547 SIGSTORE_ID_TOKEN:
548 aud: sigstore
549 AWS_OIDC_TOKEN:
550 aud: https://sts.amazonaws.com
551 script:
552 - deploy.sh
553"#;
554 let graph = parse(yaml);
555 let oidc: Vec<_> = graph
556 .nodes_of_kind(NodeKind::Identity)
557 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
558 .collect();
559 assert_eq!(
560 oidc.len(),
561 2,
562 "expected 2 OIDC identity nodes, got: {:?}",
563 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
564 );
565 }
566
567 #[test]
568 fn explicit_secrets_emit_secret_nodes() {
569 let yaml = r#"
570deploy:
571 secrets:
572 DATABASE_PASSWORD:
573 vault: production/db/password@secret
574 AWS_KEY:
575 aws_secrets_manager:
576 name: my-secret
577 script:
578 - deploy.sh
579"#;
580 let graph = parse(yaml);
581 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
582 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
583 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
584 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
585 }
586
587 #[test]
588 fn rules_mr_trigger_sets_meta_trigger() {
589 let yaml = r#"
590test:
591 rules:
592 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
593 script:
594 - run tests
595"#;
596 let graph = parse(yaml);
597 assert_eq!(
598 graph.metadata.get(META_TRIGGER).map(String::as_str),
599 Some("merge_request"),
600 "META_TRIGGER must be set to merge_request"
601 );
602 }
603
604 #[test]
605 fn only_merge_requests_sets_meta_trigger() {
606 let yaml = r#"
607test:
608 only:
609 - merge_requests
610 script:
611 - run tests
612"#;
613 let graph = parse(yaml);
614 assert_eq!(
615 graph.metadata.get(META_TRIGGER).map(String::as_str),
616 Some("merge_request")
617 );
618 }
619
620 #[test]
621 fn include_marks_graph_partial() {
622 let yaml = r#"
623include:
624 - local: '/templates/.base.yml'
625
626build:
627 script:
628 - make
629"#;
630 let graph = parse(yaml);
631 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
632 }
633
634 #[test]
635 fn extends_marks_graph_partial() {
636 let yaml = r#"
637.base:
638 script:
639 - echo base
640
641my-job:
642 extends: .base
643 stage: build
644"#;
645 let graph = parse(yaml);
646 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
647 }
648
649 #[test]
650 fn meta_job_name_set_on_step_nodes() {
651 let yaml = r#"
652build:
653 script:
654 - make
655deploy:
656 script:
657 - deploy.sh
658"#;
659 let graph = parse(yaml);
660 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
661 assert_eq!(steps.len(), 2);
662 for step in &steps {
663 assert!(
664 step.metadata.contains_key(META_JOB_NAME),
665 "Step '{}' missing META_JOB_NAME",
666 step.name
667 );
668 }
669 let names: Vec<_> = steps
671 .iter()
672 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
673 .collect();
674 assert!(names.contains(&"build"), "got: {names:?}");
675 assert!(names.contains(&"deploy"), "got: {names:?}");
676 }
677
678 #[test]
679 fn reserved_keywords_not_parsed_as_jobs() {
680 let yaml = r#"
681stages:
682 - build
683 - test
684
685variables:
686 MY_VAR: value
687
688image: alpine:latest
689
690build:
691 stage: build
692 script:
693 - make
694"#;
695 let graph = parse(yaml);
696 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
697 assert_eq!(
698 steps.len(),
699 1,
700 "only 'build' should be a Step, got: {:?}",
701 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
702 );
703 assert_eq!(steps[0].name, "build");
704 }
705
706 #[test]
707 fn services_emit_image_nodes() {
708 let yaml = r#"
709test:
710 services:
711 - docker:dind
712 - name: postgres:14
713 script:
714 - run_tests
715"#;
716 let graph = parse(yaml);
717 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
718 assert_eq!(
719 images.len(),
720 2,
721 "expected 2 service Image nodes, got: {:?}",
722 images.iter().map(|i| &i.name).collect::<Vec<_>>()
723 );
724 }
725}