1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7#[allow(unused_imports)]
9use taudit_core::graph::{META_DOTENV_FILE, META_ENVIRONMENT_NAME, META_NEEDS, META_SCRIPT_BODY};
10use taudit_core::ports::PipelineParser;
11
12pub struct GitlabParser;
24
25const RESERVED: &[&str] = &[
27 "stages",
28 "workflow",
29 "include",
30 "variables",
31 "image",
32 "services",
33 "default",
34 "cache",
35 "before_script",
36 "after_script",
37 "types",
38];
39
40const CRED_FRAGMENTS: &[&str] = &[
42 "TOKEN",
43 "SECRET",
44 "PASSWORD",
45 "PASSWD",
46 "PRIVATE_KEY",
47 "API_KEY",
48 "APIKEY",
49 "SIGNING_KEY",
50 "ACCESS_KEY",
51 "SERVICE_ACCOUNT",
52 "CERT",
53 "CREDENTIAL",
54];
55
56impl PipelineParser for GitlabParser {
57 fn platform(&self) -> &str {
58 "gitlab-ci"
59 }
60
61 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
62 let mut de = serde_yaml::Deserializer::from_str(content);
63 let doc = de
64 .next()
65 .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
66 let root: Value = Value::deserialize(doc)
67 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
68
69 let mapping = root
70 .as_mapping()
71 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
72
73 let mut graph = AuthorityGraph::new(source.clone());
74 graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
75
76 let mut meta = HashMap::new();
79 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
80 meta.insert(META_IMPLICIT.into(), "true".into());
81 let token_id = graph.add_node_with_metadata(
82 NodeKind::Identity,
83 "CI_JOB_TOKEN",
84 TrustZone::FirstParty,
85 meta,
86 );
87
88 if let Some(inc) = mapping.get("include") {
92 graph.mark_partial(
93 "include: directive present — included templates not resolved".to_string(),
94 );
95 let entries = extract_include_entries(inc);
96 if !entries.is_empty() {
97 if let Ok(json) = serde_json::to_string(&entries) {
98 graph.metadata.insert(META_GITLAB_INCLUDES.into(), json);
99 }
100 }
101 }
102
103 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
105
106 let global_image = mapping.get("image").and_then(extract_image_str);
108
109 if let Some(wf) = mapping.get("workflow") {
111 if has_mr_trigger_in_workflow(wf) {
112 graph
113 .metadata
114 .insert(META_TRIGGER.into(), "merge_request".into());
115 }
116 }
117
118 for (key, value) in mapping {
120 let job_name = match key.as_str() {
121 Some(k) => k,
122 None => continue,
123 };
124 if RESERVED.contains(&job_name) {
125 continue;
126 }
127
128 if job_name.starts_with('.') {
130 graph.mark_partial(format!(
131 "job '{job_name}' is a hidden/template job — not resolved"
132 ));
133 continue;
134 }
135
136 let job_map = match value.as_mapping() {
137 Some(m) => m,
138 None => continue,
139 };
140
141 let extends_names = extract_extends_list(job_map.get("extends"));
143 if !extends_names.is_empty() {
144 graph.mark_partial(format!(
145 "job '{job_name}' uses extends: — inherited configuration not resolved"
146 ));
147 }
148
149 let job_triggers_mr = job_has_mr_trigger(job_map);
151
152 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
154 graph
155 .metadata
156 .insert(META_TRIGGER.into(), "merge_request".into());
157 }
158
159 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
161
162 let explicit_secrets =
164 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
165
166 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
168
169 let job_image_str = job_map
171 .get("image")
172 .and_then(extract_image_str)
173 .or(global_image.as_deref().map(String::from));
174
175 let image_id = job_image_str.as_deref().map(|img| {
176 let pinned = is_docker_digest_pinned(img);
177 let trust_zone = if pinned {
178 TrustZone::ThirdParty
179 } else {
180 TrustZone::Untrusted
181 };
182 let mut imeta = HashMap::new();
183 if let Some(digest) = img.split("@sha256:").nth(1) {
184 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
185 }
186 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
187 });
188
189 let service_ids = process_services(job_map.get("services"), &mut graph);
191
192 let env_name = job_map
194 .get("environment")
195 .and_then(extract_environment_name);
196 let env_url = job_map.get("environment").and_then(extract_environment_url);
197
198 let script_body = extract_script_body(job_map);
207
208 let dotenv_file = extract_dotenv_file(job_map);
214
215 let needs = extract_needs(job_map);
218
219 let protected_only = job_has_protected_branch_restriction(job_map);
225
226 let mut step_meta = HashMap::new();
228 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
229 if let Some(ref env) = env_name {
230 step_meta.insert(META_ENVIRONMENT_NAME.into(), env.clone());
231 }
232 if !script_body.is_empty() {
233 step_meta.insert(META_SCRIPT_BODY.into(), script_body);
234 }
235 if let Some(ref f) = dotenv_file {
236 step_meta.insert(META_DOTENV_FILE.into(), f.clone());
237 }
238 if !needs.is_empty() {
239 step_meta.insert(META_NEEDS.into(), needs.join(","));
240 }
241 if let Some(ref url) = env_url {
242 step_meta.insert(META_ENVIRONMENT_URL.into(), url.clone());
243 }
244 if job_triggers_mr {
249 step_meta.insert(META_TRIGGER.into(), "merge_request".into());
250 }
251 if !extends_names.is_empty() {
253 step_meta.insert(META_GITLAB_EXTENDS.into(), extends_names.join(","));
254 }
255 if let Some(af) = job_map.get("allow_failure").and_then(|v| v.as_bool()) {
258 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), af.to_string());
259 } else if job_map
260 .get("allow_failure")
261 .and_then(|v| v.as_mapping())
262 .is_some()
263 {
264 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), "true".into());
267 }
268 if job_services_have_dind(job_map.get("services")) {
270 step_meta.insert(META_GITLAB_DIND_SERVICE.into(), "true".into());
271 }
272 if let Some(kind) = classify_trigger(job_map.get("trigger")) {
274 step_meta.insert(META_GITLAB_TRIGGER_KIND.into(), kind.into());
275 }
276 if let Some((cache_key, cache_policy)) = extract_cache_key_policy(job_map.get("cache"))
278 {
279 step_meta.insert(META_GITLAB_CACHE_KEY.into(), cache_key);
280 if let Some(p) = cache_policy {
281 step_meta.insert(META_GITLAB_CACHE_POLICY.into(), p);
282 }
283 }
284 if protected_only {
285 step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
286 }
287 let step_id = graph.add_node_with_metadata(
288 NodeKind::Step,
289 job_name,
290 TrustZone::FirstParty,
291 step_meta,
292 );
293
294 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
296
297 for &sid in global_secrets
299 .iter()
300 .chain(&job_secrets)
301 .chain(&explicit_secrets)
302 {
303 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
304 }
305
306 for &iid in &oidc_identities {
308 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
309 }
310
311 if let Some(img_id) = image_id {
313 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
314 }
315 for &svc_id in &service_ids {
316 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
317 }
318 }
319
320 let step_count = graph
328 .nodes
329 .iter()
330 .filter(|n| n.kind == NodeKind::Step)
331 .count();
332 let had_job_carrier = mapping.iter().any(|(k, v)| {
333 k.as_str()
334 .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
335 .unwrap_or(false)
336 && v.as_mapping().is_some()
337 });
338 if step_count == 0 && had_job_carrier {
339 graph.mark_partial(
340 "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
341 );
342 }
343
344 Ok(graph)
345 }
346}
347
348fn extract_image_str(v: &Value) -> Option<String> {
350 match v {
351 Value::String(s) => Some(s.clone()),
352 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
353 _ => None,
354 }
355}
356
357fn extract_environment_name(v: &Value) -> Option<String> {
359 match v {
360 Value::String(s) => Some(s.clone()),
361 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
362 _ => None,
363 }
364}
365
366fn extract_environment_url(v: &Value) -> Option<String> {
368 match v {
369 Value::Mapping(m) => m.get("url").and_then(|u| u.as_str()).map(String::from),
370 _ => None,
371 }
372}
373
374fn extract_script_body(job_map: &serde_yaml::Mapping) -> String {
378 let mut lines: Vec<String> = Vec::new();
379 for key in &["before_script", "script", "after_script"] {
380 if let Some(v) = job_map.get(*key) {
381 collect_script_lines(v, &mut lines);
382 }
383 }
384 lines.join("\n")
385}
386
387fn collect_script_lines(v: &Value, out: &mut Vec<String>) {
389 match v {
390 Value::String(s) => out.push(s.clone()),
391 Value::Sequence(seq) => {
392 for item in seq {
393 if let Some(s) = item.as_str() {
394 out.push(s.to_string());
395 }
396 }
397 }
398 _ => {}
399 }
400}
401
402fn extract_dotenv_file(job_map: &serde_yaml::Mapping) -> Option<String> {
405 let dotenv = job_map
406 .get("artifacts")?
407 .as_mapping()?
408 .get("reports")?
409 .as_mapping()?
410 .get("dotenv")?;
411 match dotenv {
412 Value::String(s) => Some(s.clone()),
413 Value::Sequence(seq) => {
414 let parts: Vec<String> = seq
415 .iter()
416 .filter_map(|v| v.as_str().map(String::from))
417 .collect();
418 if parts.is_empty() {
419 None
420 } else {
421 Some(parts.join(","))
422 }
423 }
424 _ => None,
425 }
426}
427
428fn extract_needs(job_map: &serde_yaml::Mapping) -> Vec<String> {
432 let mut out: Vec<String> = Vec::new();
433 if let Some(needs) = job_map.get("needs").and_then(|v| v.as_sequence()) {
434 for item in needs {
435 match item {
436 Value::String(s) => out.push(s.clone()),
437 Value::Mapping(m) => {
438 if let Some(j) = m.get("job").and_then(|j| j.as_str()) {
439 out.push(j.to_string());
440 }
441 }
442 _ => {}
443 }
444 }
445 }
446 if let Some(deps) = job_map.get("dependencies").and_then(|v| v.as_sequence()) {
447 for item in deps {
448 if let Some(s) = item.as_str() {
449 out.push(s.to_string());
450 }
451 }
452 }
453 out.sort();
454 out.dedup();
455 out
456}
457
458fn is_credential_name(name: &str) -> bool {
460 let upper = name.to_uppercase();
461 CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
462}
463
464fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
467 let mut ids = Vec::new();
468 let map = match vars.and_then(|v| v.as_mapping()) {
469 Some(m) => m,
470 None => return ids,
471 };
472 for (k, _v) in map {
473 let name = match k.as_str() {
474 Some(s) => s,
475 None => continue,
476 };
477 if is_credential_name(name) {
478 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
479 ids.push(id);
480 let _ = scope; }
482 }
483 ids
484}
485
486fn process_explicit_secrets(
498 secrets: Option<&Value>,
499 _scope: &str,
500 graph: &mut AuthorityGraph,
501) -> Vec<NodeId> {
502 let mut ids = Vec::new();
503 let map = match secrets.and_then(|v| v.as_mapping()) {
504 Some(m) => m,
505 None => return ids,
506 };
507 for (k, _v) in map {
508 let name = match k.as_str() {
509 Some(s) => s,
510 None => continue,
511 };
512 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
513 ids.push(id);
514 }
515 ids
516}
517
518fn process_id_tokens(
529 id_tokens: Option<&Value>,
530 _scope: &str,
531 graph: &mut AuthorityGraph,
532) -> Vec<NodeId> {
533 let mut ids = Vec::new();
534 let map = match id_tokens.and_then(|v| v.as_mapping()) {
535 Some(m) => m,
536 None => return ids,
537 };
538 for (k, v) in map {
539 let token_name = match k.as_str() {
540 Some(s) => s,
541 None => continue,
542 };
543 let aud = v
547 .as_mapping()
548 .and_then(|m| m.get("aud"))
549 .and_then(|a| a.as_str())
550 .unwrap_or("unknown");
551 let label = format!("{token_name} (aud={aud})");
552 let mut meta = HashMap::new();
553 meta.insert(META_OIDC.into(), "true".into());
554 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
555 meta.insert(META_OIDC_AUDIENCE.into(), aud.to_string());
556 let id =
557 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
558 ids.push(id);
559 }
560 ids
561}
562
563fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
565 let mut ids = Vec::new();
566 let list = match services.and_then(|v| v.as_sequence()) {
567 Some(s) => s,
568 None => return ids,
569 };
570 for item in list {
571 let img_str = match extract_image_str(item) {
572 Some(s) => s,
573 None => continue,
574 };
575 let pinned = is_docker_digest_pinned(&img_str);
576 let trust_zone = if pinned {
577 TrustZone::ThirdParty
578 } else {
579 TrustZone::Untrusted
580 };
581 let mut meta = HashMap::new();
582 if let Some(digest) = img_str.split("@sha256:").nth(1) {
583 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
584 }
585 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
586 ids.push(id);
587 }
588 ids
589}
590
591fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
593 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
595 for rule in rules {
596 if let Some(if_expr) = rule
597 .as_mapping()
598 .and_then(|m| m.get("if"))
599 .and_then(|v| v.as_str())
600 {
601 if if_expr.contains("merge_request_event") {
602 return true;
603 }
604 }
605 }
606 }
607 if let Some(only) = job_map.get("only") {
609 if only_has_merge_requests(only) {
610 return true;
611 }
612 }
613 false
614}
615
616fn only_has_merge_requests(v: &Value) -> bool {
618 match v {
619 Value::Sequence(seq) => seq
620 .iter()
621 .any(|item| item.as_str() == Some("merge_requests")),
622 Value::Mapping(m) => {
623 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
624 return refs
625 .iter()
626 .any(|item| item.as_str() == Some("merge_requests"));
627 }
628 false
629 }
630 _ => false,
631 }
632}
633
634fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
651 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
652 for rule in rules {
653 let Some(if_expr) = rule
654 .as_mapping()
655 .and_then(|m| m.get("if"))
656 .and_then(|v| v.as_str())
657 else {
658 continue;
659 };
660 if if_expr.contains("$CI_COMMIT_REF_PROTECTED")
661 || if_expr.contains("CI_COMMIT_REF_PROTECTED")
662 {
663 return true;
664 }
665 if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
666 || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
667 {
668 return true;
669 }
670 if if_expr.contains("$CI_COMMIT_TAG") {
671 return true;
672 }
673 }
674 }
675 if let Some(only) = job_map.get("only") {
676 if only_lists_protected_ref(only) {
677 return true;
678 }
679 }
680 false
681}
682
683fn only_lists_protected_ref(v: &Value) -> bool {
687 fn is_protected_ref(s: &str) -> bool {
688 matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
689 }
690 match v {
691 Value::String(s) => is_protected_ref(s.as_str()),
692 Value::Sequence(seq) => seq
693 .iter()
694 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
695 Value::Mapping(m) => {
696 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
697 return refs
698 .iter()
699 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
700 }
701 false
702 }
703 _ => false,
704 }
705}
706
707fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
709 let rules = match wf
710 .as_mapping()
711 .and_then(|m| m.get("rules"))
712 .and_then(|r| r.as_sequence())
713 {
714 Some(r) => r,
715 None => return false,
716 };
717 for rule in rules {
718 if let Some(if_expr) = rule
719 .as_mapping()
720 .and_then(|m| m.get("if"))
721 .and_then(|v| v.as_str())
722 {
723 if if_expr.contains("merge_request_event") {
724 return true;
725 }
726 }
727 }
728 false
729}
730
731#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
738pub struct IncludeEntry {
739 pub kind: String,
742 pub target: String,
744 pub git_ref: String,
747}
748
749pub fn extract_include_entries(v: &Value) -> Vec<IncludeEntry> {
755 let mut out = Vec::new();
756 match v {
757 Value::String(s) => {
759 out.push(IncludeEntry {
760 kind: classify_string_include(s).into(),
761 target: s.clone(),
762 git_ref: String::new(),
763 });
764 }
765 Value::Sequence(seq) => {
766 for item in seq {
767 match item {
768 Value::String(s) => {
769 out.push(IncludeEntry {
770 kind: classify_string_include(s).into(),
771 target: s.clone(),
772 git_ref: String::new(),
773 });
774 }
775 Value::Mapping(m) => {
776 if let Some(e) = include_entry_from_mapping(m) {
777 out.push(e);
778 }
779 }
780 _ => {}
781 }
782 }
783 }
784 Value::Mapping(m) => {
785 if let Some(e) = include_entry_from_mapping(m) {
786 out.push(e);
787 }
788 }
789 _ => {}
790 }
791 out
792}
793
794fn classify_string_include(s: &str) -> &'static str {
797 let lower = s.to_ascii_lowercase();
798 if lower.starts_with("http://") || lower.starts_with("https://") {
799 "remote"
800 } else {
801 "local"
802 }
803}
804
805fn include_entry_from_mapping(m: &serde_yaml::Mapping) -> Option<IncludeEntry> {
809 let str_at = |key: &str| {
810 m.get(key)
811 .and_then(|v| v.as_str())
812 .map(str::to_string)
813 .unwrap_or_default()
814 };
815 if let Some(s) = m.get("local").and_then(|v| v.as_str()) {
816 return Some(IncludeEntry {
817 kind: "local".into(),
818 target: s.to_string(),
819 git_ref: String::new(),
820 });
821 }
822 if let Some(s) = m.get("remote").and_then(|v| v.as_str()) {
823 return Some(IncludeEntry {
824 kind: "remote".into(),
825 target: s.to_string(),
826 git_ref: String::new(),
827 });
828 }
829 if let Some(s) = m.get("template").and_then(|v| v.as_str()) {
830 return Some(IncludeEntry {
831 kind: "template".into(),
832 target: s.to_string(),
833 git_ref: String::new(),
834 });
835 }
836 if let Some(s) = m.get("component").and_then(|v| v.as_str()) {
837 let (target, git_ref) = match s.rsplit_once('@') {
839 Some((path, ver)) => (path.to_string(), ver.to_string()),
840 None => (s.to_string(), String::new()),
841 };
842 return Some(IncludeEntry {
843 kind: "component".into(),
844 target,
845 git_ref,
846 });
847 }
848 if m.contains_key("project") {
849 let project = str_at("project");
850 let git_ref = str_at("ref");
853 return Some(IncludeEntry {
854 kind: "project".into(),
855 target: project,
856 git_ref,
857 });
858 }
859 None
860}
861
862fn extract_extends_list(v: Option<&Value>) -> Vec<String> {
865 let v = match v {
866 Some(v) => v,
867 None => return Vec::new(),
868 };
869 match v {
870 Value::String(s) => vec![s.clone()],
871 Value::Sequence(seq) => seq
872 .iter()
873 .filter_map(|i| i.as_str().map(str::to_string))
874 .collect(),
875 _ => Vec::new(),
876 }
877}
878
879fn job_services_have_dind(services: Option<&Value>) -> bool {
883 let list = match services.and_then(|v| v.as_sequence()) {
884 Some(s) => s,
885 None => return false,
886 };
887 for item in list {
888 let img = match extract_image_str(item) {
889 Some(s) => s,
890 None => continue,
891 };
892 if image_is_dind(&img) {
893 return true;
894 }
895 }
896 false
897}
898
899fn image_is_dind(image: &str) -> bool {
903 let lower = image.to_ascii_lowercase();
904 let bare = match lower.split_once('@') {
907 Some((b, _)) => b,
908 None => &lower,
909 };
910 if !bare.starts_with("docker:") && !bare.starts_with("docker/") {
911 return false;
912 }
913 bare.contains("dind")
914}
915
916fn classify_trigger(trigger: Option<&Value>) -> Option<&'static str> {
921 let t = trigger?;
922 if t.is_string() {
924 return Some("static");
925 }
926 let m = t.as_mapping()?;
927 if let Some(inc) = m.get("include") {
930 if include_has_artifact_source(inc) {
931 return Some("dynamic");
932 }
933 }
934 Some("static")
935}
936
937fn include_has_artifact_source(v: &Value) -> bool {
940 match v {
941 Value::Mapping(m) => m.contains_key("artifact"),
942 Value::Sequence(seq) => seq.iter().any(|i| {
943 i.as_mapping()
944 .map(|m| m.contains_key("artifact"))
945 .unwrap_or(false)
946 }),
947 _ => false,
948 }
949}
950
951fn extract_cache_key_policy(v: Option<&Value>) -> Option<(String, Option<String>)> {
962 let v = v?;
963 let m = match v {
964 Value::Mapping(m) => m,
965 Value::Sequence(seq) => {
966 return seq
968 .iter()
969 .find_map(|i| i.as_mapping().and_then(extract_cache_key_policy_map));
970 }
971 _ => return None,
972 };
973 extract_cache_key_policy_map(m)
974}
975
976fn extract_cache_key_policy_map(m: &serde_yaml::Mapping) -> Option<(String, Option<String>)> {
977 let key = match m.get("key") {
978 Some(Value::String(s)) => s.clone(),
979 Some(Value::Number(n)) => n.to_string(),
980 Some(Value::Bool(b)) => b.to_string(),
981 Some(Value::Mapping(km)) => {
982 let mut parts = Vec::new();
983 if let Some(prefix) = km.get("prefix").and_then(|v| v.as_str()) {
984 parts.push(format!("prefix:{prefix}"));
985 }
986 if let Some(files) = km.get("files").and_then(|v| v.as_sequence()) {
987 let names: Vec<String> = files
988 .iter()
989 .filter_map(|f| f.as_str().map(str::to_string))
990 .collect();
991 if !names.is_empty() {
992 parts.push(format!("files:{}", names.join(",")));
993 }
994 }
995 if parts.is_empty() {
996 String::new()
997 } else {
998 parts.join(";")
999 }
1000 }
1001 _ => String::new(),
1002 };
1003 let policy = m.get("policy").and_then(|v| v.as_str()).map(str::to_string);
1004 Some((key, policy))
1005}
1006
1007#[cfg(test)]
1008mod tests {
1009 use super::*;
1010
1011 fn parse(yaml: &str) -> AuthorityGraph {
1012 let parser = GitlabParser;
1013 let source = PipelineSource {
1014 file: ".gitlab-ci.yml".into(),
1015 repo: None,
1016 git_ref: None,
1017 commit_sha: None,
1018 };
1019 parser.parse(yaml, &source).unwrap()
1020 }
1021
1022 #[test]
1023 fn ci_job_token_always_present() {
1024 let yaml = r#"
1025stages:
1026 - build
1027
1028build-job:
1029 stage: build
1030 script:
1031 - make build
1032"#;
1033 let graph = parse(yaml);
1034 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1035 assert_eq!(identities.len(), 1);
1036 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
1037 assert_eq!(
1038 identities[0]
1039 .metadata
1040 .get(META_IMPLICIT)
1041 .map(String::as_str),
1042 Some("true")
1043 );
1044 assert_eq!(
1045 identities[0]
1046 .metadata
1047 .get(META_IDENTITY_SCOPE)
1048 .map(String::as_str),
1049 Some("broad")
1050 );
1051 }
1052
1053 #[test]
1054 fn global_credential_variable_emits_secret_node() {
1055 let yaml = r#"
1056variables:
1057 APP_VERSION: "1.0"
1058 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
1059
1060build-job:
1061 script:
1062 - make
1063"#;
1064 let graph = parse(yaml);
1065 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1066 assert!(
1067 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
1068 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
1069 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1070 );
1071 assert!(
1073 !secrets.iter().any(|s| s.name == "APP_VERSION"),
1074 "APP_VERSION must not emit a Secret node"
1075 );
1076 }
1077
1078 #[test]
1079 fn floating_image_emits_untrusted_image_node() {
1080 let yaml = r#"
1081deploy:
1082 image: alpine:latest
1083 script:
1084 - deploy.sh
1085"#;
1086 let graph = parse(yaml);
1087 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1088 assert_eq!(images.len(), 1);
1089 assert_eq!(images[0].name, "alpine:latest");
1090 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1091 }
1092
1093 #[test]
1094 fn digest_pinned_image_is_third_party() {
1095 let yaml = r#"
1096deploy:
1097 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1098 script:
1099 - deploy.sh
1100"#;
1101 let graph = parse(yaml);
1102 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1103 assert_eq!(images.len(), 1);
1104 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1105 }
1106
1107 #[test]
1108 fn id_tokens_emit_oidc_identity_nodes() {
1109 let yaml = r#"
1110deploy:
1111 id_tokens:
1112 SIGSTORE_ID_TOKEN:
1113 aud: sigstore
1114 AWS_OIDC_TOKEN:
1115 aud: https://sts.amazonaws.com
1116 script:
1117 - deploy.sh
1118"#;
1119 let graph = parse(yaml);
1120 let oidc: Vec<_> = graph
1121 .nodes_of_kind(NodeKind::Identity)
1122 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
1123 .collect();
1124 assert_eq!(
1125 oidc.len(),
1126 2,
1127 "expected 2 OIDC identity nodes, got: {:?}",
1128 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
1129 );
1130 }
1131
1132 #[test]
1133 fn explicit_secrets_emit_secret_nodes() {
1134 let yaml = r#"
1135deploy:
1136 secrets:
1137 DATABASE_PASSWORD:
1138 vault: production/db/password@secret
1139 AWS_KEY:
1140 aws_secrets_manager:
1141 name: my-secret
1142 script:
1143 - deploy.sh
1144"#;
1145 let graph = parse(yaml);
1146 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1147 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
1148 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
1149 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
1150 }
1151
1152 #[test]
1153 fn rules_mr_trigger_sets_meta_trigger() {
1154 let yaml = r#"
1155test:
1156 rules:
1157 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
1158 script:
1159 - run tests
1160"#;
1161 let graph = parse(yaml);
1162 assert_eq!(
1163 graph.metadata.get(META_TRIGGER).map(String::as_str),
1164 Some("merge_request"),
1165 "META_TRIGGER must be set to merge_request"
1166 );
1167 }
1168
1169 #[test]
1170 fn only_merge_requests_sets_meta_trigger() {
1171 let yaml = r#"
1172test:
1173 only:
1174 - merge_requests
1175 script:
1176 - run tests
1177"#;
1178 let graph = parse(yaml);
1179 assert_eq!(
1180 graph.metadata.get(META_TRIGGER).map(String::as_str),
1181 Some("merge_request")
1182 );
1183 }
1184
1185 #[test]
1186 fn include_marks_graph_partial() {
1187 let yaml = r#"
1188include:
1189 - local: '/templates/.base.yml'
1190
1191build:
1192 script:
1193 - make
1194"#;
1195 let graph = parse(yaml);
1196 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1197 }
1198
1199 #[test]
1200 fn extends_marks_graph_partial() {
1201 let yaml = r#"
1202.base:
1203 script:
1204 - echo base
1205
1206my-job:
1207 extends: .base
1208 stage: build
1209"#;
1210 let graph = parse(yaml);
1211 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1212 }
1213
1214 #[test]
1215 fn meta_job_name_set_on_step_nodes() {
1216 let yaml = r#"
1217build:
1218 script:
1219 - make
1220deploy:
1221 script:
1222 - deploy.sh
1223"#;
1224 let graph = parse(yaml);
1225 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1226 assert_eq!(steps.len(), 2);
1227 for step in &steps {
1228 assert!(
1229 step.metadata.contains_key(META_JOB_NAME),
1230 "Step '{}' missing META_JOB_NAME",
1231 step.name
1232 );
1233 }
1234 let names: Vec<_> = steps
1236 .iter()
1237 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
1238 .collect();
1239 assert!(names.contains(&"build"), "got: {names:?}");
1240 assert!(names.contains(&"deploy"), "got: {names:?}");
1241 }
1242
1243 #[test]
1244 fn reserved_keywords_not_parsed_as_jobs() {
1245 let yaml = r#"
1246stages:
1247 - build
1248 - test
1249
1250variables:
1251 MY_VAR: value
1252
1253image: alpine:latest
1254
1255build:
1256 stage: build
1257 script:
1258 - make
1259"#;
1260 let graph = parse(yaml);
1261 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1262 assert_eq!(
1263 steps.len(),
1264 1,
1265 "only 'build' should be a Step, got: {:?}",
1266 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
1267 );
1268 assert_eq!(steps[0].name, "build");
1269 }
1270
1271 #[test]
1272 fn services_emit_image_nodes() {
1273 let yaml = r#"
1274test:
1275 services:
1276 - docker:dind
1277 - name: postgres:14
1278 script:
1279 - run_tests
1280"#;
1281 let graph = parse(yaml);
1282 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1283 assert_eq!(
1284 images.len(),
1285 2,
1286 "expected 2 service Image nodes, got: {:?}",
1287 images.iter().map(|i| &i.name).collect::<Vec<_>>()
1288 );
1289 }
1290
1291 #[test]
1294 fn job_carrier_with_unparseable_bodies_marks_partial() {
1295 let yaml = r#"
1301build:
1302 - this is a list, not a mapping
1303test:
1304 - also a list
1305"#;
1306 let graph = parse(yaml);
1307 let step_count = graph
1308 .nodes
1309 .iter()
1310 .filter(|n| n.kind == NodeKind::Step)
1311 .count();
1312 assert_eq!(step_count, 0);
1318 assert_eq!(
1319 graph.completeness,
1320 AuthorityCompleteness::Complete,
1321 "non-mapping values are not job carriers"
1322 );
1323 }
1324
1325 #[test]
1326 fn mapping_jobs_without_recognisable_step_content_marks_partial() {
1327 let yaml = r#"
1334.template-only:
1335 script:
1336 - echo "this is a template-only file"
1337"#;
1338 let graph = parse(yaml);
1339 let step_count = graph
1340 .nodes
1341 .iter()
1342 .filter(|n| n.kind == NodeKind::Step)
1343 .count();
1344 assert_eq!(step_count, 0);
1345 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1347 }
1348}