1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7#[allow(unused_imports)]
9use taudit_core::graph::{META_DOTENV_FILE, META_ENVIRONMENT_NAME, META_NEEDS, META_SCRIPT_BODY};
10use taudit_core::ports::PipelineParser;
11
12pub struct GitlabParser;
24
25const RESERVED: &[&str] = &[
27 "stages",
28 "workflow",
29 "include",
30 "variables",
31 "image",
32 "services",
33 "default",
34 "cache",
35 "before_script",
36 "after_script",
37 "types",
38];
39
40const CRED_FRAGMENTS: &[&str] = &[
42 "TOKEN",
43 "SECRET",
44 "PASSWORD",
45 "PASSWD",
46 "PRIVATE_KEY",
47 "API_KEY",
48 "APIKEY",
49 "SIGNING_KEY",
50 "ACCESS_KEY",
51 "SERVICE_ACCOUNT",
52 "CERT",
53 "CREDENTIAL",
54];
55
56impl PipelineParser for GitlabParser {
57 fn platform(&self) -> &str {
58 "gitlab-ci"
59 }
60
61 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
62 let mut de = serde_yaml::Deserializer::from_str(content);
63 let doc = de
64 .next()
65 .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
66 let root: Value = Value::deserialize(doc)
67 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
68
69 let mapping = root
70 .as_mapping()
71 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
72
73 let mut graph = AuthorityGraph::new(source.clone());
74 graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
75
76 let mut meta = HashMap::new();
79 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
80 meta.insert(META_IMPLICIT.into(), "true".into());
81 let token_id = graph.add_node_with_metadata(
82 NodeKind::Identity,
83 "CI_JOB_TOKEN",
84 TrustZone::FirstParty,
85 meta,
86 );
87
88 if let Some(inc) = mapping.get("include") {
92 graph.mark_partial(
93 GapKind::Structural,
94 "include: directive present — included templates not resolved".to_string(),
95 );
96 let entries = extract_include_entries(inc);
97 if !entries.is_empty() {
98 if let Ok(json) = serde_json::to_string(&entries) {
99 graph.metadata.insert(META_GITLAB_INCLUDES.into(), json);
100 }
101 }
102 }
103
104 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
106
107 let global_image = mapping.get("image").and_then(extract_image_str);
109
110 if let Some(wf) = mapping.get("workflow") {
112 if has_mr_trigger_in_workflow(wf) {
113 graph
114 .metadata
115 .insert(META_TRIGGER.into(), "merge_request".into());
116 }
117 }
118
119 for (key, value) in mapping {
121 let job_name = match key.as_str() {
122 Some(k) => k,
123 None => continue,
124 };
125 if RESERVED.contains(&job_name) {
126 continue;
127 }
128
129 if job_name.starts_with('.') {
131 graph.mark_partial(
132 GapKind::Structural,
133 format!("job '{job_name}' is a hidden/template job — not resolved"),
134 );
135 continue;
136 }
137
138 let job_map = match value.as_mapping() {
139 Some(m) => m,
140 None => continue,
141 };
142
143 let extends_names = extract_extends_list(job_map.get("extends"));
145 if !extends_names.is_empty() {
146 graph.mark_partial(
147 GapKind::Structural,
148 format!(
149 "job '{job_name}' uses extends: — inherited configuration not resolved"
150 ),
151 );
152 }
153
154 let job_triggers_mr = job_has_mr_trigger(job_map);
156
157 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
159 graph
160 .metadata
161 .insert(META_TRIGGER.into(), "merge_request".into());
162 }
163
164 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
166
167 let explicit_secrets =
169 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
170
171 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
173
174 let job_image_str = job_map
176 .get("image")
177 .and_then(extract_image_str)
178 .or(global_image.as_deref().map(String::from));
179
180 let image_id = job_image_str.as_deref().map(|img| {
181 let pinned = is_docker_digest_pinned(img);
182 let trust_zone = if pinned {
183 TrustZone::ThirdParty
184 } else {
185 TrustZone::Untrusted
186 };
187 let mut imeta = HashMap::new();
188 if let Some(digest) = img.split("@sha256:").nth(1) {
189 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
190 }
191 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
192 });
193
194 let service_ids = process_services(job_map.get("services"), &mut graph);
196
197 let env_name = job_map
199 .get("environment")
200 .and_then(extract_environment_name);
201 let env_url = job_map.get("environment").and_then(extract_environment_url);
202
203 let script_body = extract_script_body(job_map);
212
213 let dotenv_file = extract_dotenv_file(job_map);
219
220 let needs = extract_needs(job_map);
223
224 let protected_only = job_has_protected_branch_restriction(job_map);
230
231 let mut step_meta = HashMap::new();
233 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
234 if let Some(ref env) = env_name {
235 step_meta.insert(META_ENVIRONMENT_NAME.into(), env.clone());
236 }
237 if !script_body.is_empty() {
238 step_meta.insert(META_SCRIPT_BODY.into(), script_body);
239 }
240 if let Some(ref f) = dotenv_file {
241 step_meta.insert(META_DOTENV_FILE.into(), f.clone());
242 }
243 if !needs.is_empty() {
244 step_meta.insert(META_NEEDS.into(), needs.join(","));
245 }
246 if let Some(ref url) = env_url {
247 step_meta.insert(META_ENVIRONMENT_URL.into(), url.clone());
248 }
249 if job_triggers_mr {
254 step_meta.insert(META_TRIGGER.into(), "merge_request".into());
255 }
256 if !extends_names.is_empty() {
258 step_meta.insert(META_GITLAB_EXTENDS.into(), extends_names.join(","));
259 }
260 if let Some(af) = job_map.get("allow_failure").and_then(|v| v.as_bool()) {
263 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), af.to_string());
264 } else if job_map
265 .get("allow_failure")
266 .and_then(|v| v.as_mapping())
267 .is_some()
268 {
269 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), "true".into());
272 }
273 if job_services_have_dind(job_map.get("services")) {
275 step_meta.insert(META_GITLAB_DIND_SERVICE.into(), "true".into());
276 }
277 if let Some(kind) = classify_trigger(job_map.get("trigger")) {
279 step_meta.insert(META_GITLAB_TRIGGER_KIND.into(), kind.into());
280 }
281 if let Some((cache_key, cache_policy)) = extract_cache_key_policy(job_map.get("cache"))
283 {
284 step_meta.insert(META_GITLAB_CACHE_KEY.into(), cache_key);
285 if let Some(p) = cache_policy {
286 step_meta.insert(META_GITLAB_CACHE_POLICY.into(), p);
287 }
288 }
289 if protected_only {
290 step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
291 }
292 let step_id = graph.add_node_with_metadata(
293 NodeKind::Step,
294 job_name,
295 TrustZone::FirstParty,
296 step_meta,
297 );
298
299 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
301
302 for &sid in global_secrets
304 .iter()
305 .chain(&job_secrets)
306 .chain(&explicit_secrets)
307 {
308 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
309 }
310
311 for &iid in &oidc_identities {
313 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
314 }
315
316 if let Some(img_id) = image_id {
318 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
319 }
320 for &svc_id in &service_ids {
321 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
322 }
323 }
324
325 let step_count = graph
333 .nodes
334 .iter()
335 .filter(|n| n.kind == NodeKind::Step)
336 .count();
337 let had_job_carrier = mapping.iter().any(|(k, v)| {
338 k.as_str()
339 .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
340 .unwrap_or(false)
341 && v.as_mapping().is_some()
342 });
343 if step_count == 0 && had_job_carrier {
344 graph.mark_partial(
345 GapKind::Opaque,
346 "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
347 );
348 }
349
350 graph.stamp_edge_authority_summaries();
351 Ok(graph)
352 }
353}
354
355fn extract_image_str(v: &Value) -> Option<String> {
357 match v {
358 Value::String(s) => Some(s.clone()),
359 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
360 _ => None,
361 }
362}
363
364fn extract_environment_name(v: &Value) -> Option<String> {
366 match v {
367 Value::String(s) => Some(s.clone()),
368 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
369 _ => None,
370 }
371}
372
373fn extract_environment_url(v: &Value) -> Option<String> {
375 match v {
376 Value::Mapping(m) => m.get("url").and_then(|u| u.as_str()).map(String::from),
377 _ => None,
378 }
379}
380
381fn extract_script_body(job_map: &serde_yaml::Mapping) -> String {
385 let mut lines: Vec<String> = Vec::new();
386 for key in &["before_script", "script", "after_script"] {
387 if let Some(v) = job_map.get(*key) {
388 collect_script_lines(v, &mut lines);
389 }
390 }
391 lines.join("\n")
392}
393
394fn collect_script_lines(v: &Value, out: &mut Vec<String>) {
396 match v {
397 Value::String(s) => out.push(s.clone()),
398 Value::Sequence(seq) => {
399 for item in seq {
400 if let Some(s) = item.as_str() {
401 out.push(s.to_string());
402 }
403 }
404 }
405 _ => {}
406 }
407}
408
409fn extract_dotenv_file(job_map: &serde_yaml::Mapping) -> Option<String> {
412 let dotenv = job_map
413 .get("artifacts")?
414 .as_mapping()?
415 .get("reports")?
416 .as_mapping()?
417 .get("dotenv")?;
418 match dotenv {
419 Value::String(s) => Some(s.clone()),
420 Value::Sequence(seq) => {
421 let parts: Vec<String> = seq
422 .iter()
423 .filter_map(|v| v.as_str().map(String::from))
424 .collect();
425 if parts.is_empty() {
426 None
427 } else {
428 Some(parts.join(","))
429 }
430 }
431 _ => None,
432 }
433}
434
435fn extract_needs(job_map: &serde_yaml::Mapping) -> Vec<String> {
439 let mut out: Vec<String> = Vec::new();
440 if let Some(needs) = job_map.get("needs").and_then(|v| v.as_sequence()) {
441 for item in needs {
442 match item {
443 Value::String(s) => out.push(s.clone()),
444 Value::Mapping(m) => {
445 if let Some(j) = m.get("job").and_then(|j| j.as_str()) {
446 out.push(j.to_string());
447 }
448 }
449 _ => {}
450 }
451 }
452 }
453 if let Some(deps) = job_map.get("dependencies").and_then(|v| v.as_sequence()) {
454 for item in deps {
455 if let Some(s) = item.as_str() {
456 out.push(s.to_string());
457 }
458 }
459 }
460 out.sort();
461 out.dedup();
462 out
463}
464
465fn is_credential_name(name: &str) -> bool {
467 let upper = name.to_uppercase();
468 CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
469}
470
471fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
474 let mut ids = Vec::new();
475 let map = match vars.and_then(|v| v.as_mapping()) {
476 Some(m) => m,
477 None => return ids,
478 };
479 for (k, _v) in map {
480 let name = match k.as_str() {
481 Some(s) => s,
482 None => continue,
483 };
484 if is_credential_name(name) {
485 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
486 ids.push(id);
487 let _ = scope; }
489 }
490 ids
491}
492
493fn process_explicit_secrets(
505 secrets: Option<&Value>,
506 _scope: &str,
507 graph: &mut AuthorityGraph,
508) -> Vec<NodeId> {
509 let mut ids = Vec::new();
510 let map = match secrets.and_then(|v| v.as_mapping()) {
511 Some(m) => m,
512 None => return ids,
513 };
514 for (k, _v) in map {
515 let name = match k.as_str() {
516 Some(s) => s,
517 None => continue,
518 };
519 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
520 ids.push(id);
521 }
522 ids
523}
524
525fn process_id_tokens(
536 id_tokens: Option<&Value>,
537 _scope: &str,
538 graph: &mut AuthorityGraph,
539) -> Vec<NodeId> {
540 let mut ids = Vec::new();
541 let map = match id_tokens.and_then(|v| v.as_mapping()) {
542 Some(m) => m,
543 None => return ids,
544 };
545 for (k, v) in map {
546 let token_name = match k.as_str() {
547 Some(s) => s,
548 None => continue,
549 };
550 let aud = v
554 .as_mapping()
555 .and_then(|m| m.get("aud"))
556 .and_then(|a| a.as_str())
557 .unwrap_or("unknown");
558 let label = format!("{token_name} (aud={aud})");
559 let mut meta = HashMap::new();
560 meta.insert(META_OIDC.into(), "true".into());
561 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
562 meta.insert(META_OIDC_AUDIENCE.into(), aud.to_string());
563 let id =
564 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
565 ids.push(id);
566 }
567 ids
568}
569
570fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
572 let mut ids = Vec::new();
573 let list = match services.and_then(|v| v.as_sequence()) {
574 Some(s) => s,
575 None => return ids,
576 };
577 for item in list {
578 let img_str = match extract_image_str(item) {
579 Some(s) => s,
580 None => continue,
581 };
582 let pinned = is_docker_digest_pinned(&img_str);
583 let trust_zone = if pinned {
584 TrustZone::ThirdParty
585 } else {
586 TrustZone::Untrusted
587 };
588 let mut meta = HashMap::new();
589 if let Some(digest) = img_str.split("@sha256:").nth(1) {
590 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
591 }
592 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
593 ids.push(id);
594 }
595 ids
596}
597
598fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
600 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
602 for rule in rules {
603 if let Some(if_expr) = rule
604 .as_mapping()
605 .and_then(|m| m.get("if"))
606 .and_then(|v| v.as_str())
607 {
608 if if_expr.contains("merge_request_event") {
609 return true;
610 }
611 }
612 }
613 }
614 if let Some(only) = job_map.get("only") {
616 if only_has_merge_requests(only) {
617 return true;
618 }
619 }
620 false
621}
622
623fn only_has_merge_requests(v: &Value) -> bool {
625 match v {
626 Value::Sequence(seq) => seq
627 .iter()
628 .any(|item| item.as_str() == Some("merge_requests")),
629 Value::Mapping(m) => {
630 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
631 return refs
632 .iter()
633 .any(|item| item.as_str() == Some("merge_requests"));
634 }
635 false
636 }
637 _ => false,
638 }
639}
640
641fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
658 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
659 for rule in rules {
660 let Some(if_expr) = rule
661 .as_mapping()
662 .and_then(|m| m.get("if"))
663 .and_then(|v| v.as_str())
664 else {
665 continue;
666 };
667 if if_expr.contains("$CI_COMMIT_REF_PROTECTED")
668 || if_expr.contains("CI_COMMIT_REF_PROTECTED")
669 {
670 return true;
671 }
672 if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
673 || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
674 {
675 return true;
676 }
677 if if_expr.contains("$CI_COMMIT_TAG") {
678 return true;
679 }
680 }
681 }
682 if let Some(only) = job_map.get("only") {
683 if only_lists_protected_ref(only) {
684 return true;
685 }
686 }
687 false
688}
689
690fn only_lists_protected_ref(v: &Value) -> bool {
694 fn is_protected_ref(s: &str) -> bool {
695 matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
696 }
697 match v {
698 Value::String(s) => is_protected_ref(s.as_str()),
699 Value::Sequence(seq) => seq
700 .iter()
701 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
702 Value::Mapping(m) => {
703 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
704 return refs
705 .iter()
706 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
707 }
708 false
709 }
710 _ => false,
711 }
712}
713
714fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
716 let rules = match wf
717 .as_mapping()
718 .and_then(|m| m.get("rules"))
719 .and_then(|r| r.as_sequence())
720 {
721 Some(r) => r,
722 None => return false,
723 };
724 for rule in rules {
725 if let Some(if_expr) = rule
726 .as_mapping()
727 .and_then(|m| m.get("if"))
728 .and_then(|v| v.as_str())
729 {
730 if if_expr.contains("merge_request_event") {
731 return true;
732 }
733 }
734 }
735 false
736}
737
738#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
745pub struct IncludeEntry {
746 pub kind: String,
749 pub target: String,
751 pub git_ref: String,
754}
755
756pub fn extract_include_entries(v: &Value) -> Vec<IncludeEntry> {
762 let mut out = Vec::new();
763 match v {
764 Value::String(s) => {
766 out.push(IncludeEntry {
767 kind: classify_string_include(s).into(),
768 target: s.clone(),
769 git_ref: String::new(),
770 });
771 }
772 Value::Sequence(seq) => {
773 for item in seq {
774 match item {
775 Value::String(s) => {
776 out.push(IncludeEntry {
777 kind: classify_string_include(s).into(),
778 target: s.clone(),
779 git_ref: String::new(),
780 });
781 }
782 Value::Mapping(m) => {
783 if let Some(e) = include_entry_from_mapping(m) {
784 out.push(e);
785 }
786 }
787 _ => {}
788 }
789 }
790 }
791 Value::Mapping(m) => {
792 if let Some(e) = include_entry_from_mapping(m) {
793 out.push(e);
794 }
795 }
796 _ => {}
797 }
798 out
799}
800
801fn classify_string_include(s: &str) -> &'static str {
804 let lower = s.to_ascii_lowercase();
805 if lower.starts_with("http://") || lower.starts_with("https://") {
806 "remote"
807 } else {
808 "local"
809 }
810}
811
812fn include_entry_from_mapping(m: &serde_yaml::Mapping) -> Option<IncludeEntry> {
816 let str_at = |key: &str| {
817 m.get(key)
818 .and_then(|v| v.as_str())
819 .map(str::to_string)
820 .unwrap_or_default()
821 };
822 if let Some(s) = m.get("local").and_then(|v| v.as_str()) {
823 return Some(IncludeEntry {
824 kind: "local".into(),
825 target: s.to_string(),
826 git_ref: String::new(),
827 });
828 }
829 if let Some(s) = m.get("remote").and_then(|v| v.as_str()) {
830 return Some(IncludeEntry {
831 kind: "remote".into(),
832 target: s.to_string(),
833 git_ref: String::new(),
834 });
835 }
836 if let Some(s) = m.get("template").and_then(|v| v.as_str()) {
837 return Some(IncludeEntry {
838 kind: "template".into(),
839 target: s.to_string(),
840 git_ref: String::new(),
841 });
842 }
843 if let Some(s) = m.get("component").and_then(|v| v.as_str()) {
844 let (target, git_ref) = match s.rsplit_once('@') {
846 Some((path, ver)) => (path.to_string(), ver.to_string()),
847 None => (s.to_string(), String::new()),
848 };
849 return Some(IncludeEntry {
850 kind: "component".into(),
851 target,
852 git_ref,
853 });
854 }
855 if m.contains_key("project") {
856 let project = str_at("project");
857 let git_ref = str_at("ref");
860 return Some(IncludeEntry {
861 kind: "project".into(),
862 target: project,
863 git_ref,
864 });
865 }
866 None
867}
868
869fn extract_extends_list(v: Option<&Value>) -> Vec<String> {
872 let v = match v {
873 Some(v) => v,
874 None => return Vec::new(),
875 };
876 match v {
877 Value::String(s) => vec![s.clone()],
878 Value::Sequence(seq) => seq
879 .iter()
880 .filter_map(|i| i.as_str().map(str::to_string))
881 .collect(),
882 _ => Vec::new(),
883 }
884}
885
886fn job_services_have_dind(services: Option<&Value>) -> bool {
890 let list = match services.and_then(|v| v.as_sequence()) {
891 Some(s) => s,
892 None => return false,
893 };
894 for item in list {
895 let img = match extract_image_str(item) {
896 Some(s) => s,
897 None => continue,
898 };
899 if image_is_dind(&img) {
900 return true;
901 }
902 }
903 false
904}
905
906fn image_is_dind(image: &str) -> bool {
910 let lower = image.to_ascii_lowercase();
911 let bare = match lower.split_once('@') {
914 Some((b, _)) => b,
915 None => &lower,
916 };
917 if !bare.starts_with("docker:") && !bare.starts_with("docker/") {
918 return false;
919 }
920 bare.contains("dind")
921}
922
923fn classify_trigger(trigger: Option<&Value>) -> Option<&'static str> {
928 let t = trigger?;
929 if t.is_string() {
931 return Some("static");
932 }
933 let m = t.as_mapping()?;
934 if let Some(inc) = m.get("include") {
937 if include_has_artifact_source(inc) {
938 return Some("dynamic");
939 }
940 }
941 Some("static")
942}
943
944fn include_has_artifact_source(v: &Value) -> bool {
947 match v {
948 Value::Mapping(m) => m.contains_key("artifact"),
949 Value::Sequence(seq) => seq.iter().any(|i| {
950 i.as_mapping()
951 .map(|m| m.contains_key("artifact"))
952 .unwrap_or(false)
953 }),
954 _ => false,
955 }
956}
957
958fn extract_cache_key_policy(v: Option<&Value>) -> Option<(String, Option<String>)> {
969 let v = v?;
970 let m = match v {
971 Value::Mapping(m) => m,
972 Value::Sequence(seq) => {
973 return seq
975 .iter()
976 .find_map(|i| i.as_mapping().and_then(extract_cache_key_policy_map));
977 }
978 _ => return None,
979 };
980 extract_cache_key_policy_map(m)
981}
982
983fn extract_cache_key_policy_map(m: &serde_yaml::Mapping) -> Option<(String, Option<String>)> {
984 let key = match m.get("key") {
985 Some(Value::String(s)) => s.clone(),
986 Some(Value::Number(n)) => n.to_string(),
987 Some(Value::Bool(b)) => b.to_string(),
988 Some(Value::Mapping(km)) => {
989 let mut parts = Vec::new();
990 if let Some(prefix) = km.get("prefix").and_then(|v| v.as_str()) {
991 parts.push(format!("prefix:{prefix}"));
992 }
993 if let Some(files) = km.get("files").and_then(|v| v.as_sequence()) {
994 let names: Vec<String> = files
995 .iter()
996 .filter_map(|f| f.as_str().map(str::to_string))
997 .collect();
998 if !names.is_empty() {
999 parts.push(format!("files:{}", names.join(",")));
1000 }
1001 }
1002 if parts.is_empty() {
1003 String::new()
1004 } else {
1005 parts.join(";")
1006 }
1007 }
1008 _ => String::new(),
1009 };
1010 let policy = m.get("policy").and_then(|v| v.as_str()).map(str::to_string);
1011 Some((key, policy))
1012}
1013
1014#[cfg(test)]
1015mod tests {
1016 use super::*;
1017
1018 fn parse(yaml: &str) -> AuthorityGraph {
1019 let parser = GitlabParser;
1020 let source = PipelineSource {
1021 file: ".gitlab-ci.yml".into(),
1022 repo: None,
1023 git_ref: None,
1024 commit_sha: None,
1025 };
1026 parser.parse(yaml, &source).unwrap()
1027 }
1028
1029 #[test]
1030 fn ci_job_token_always_present() {
1031 let yaml = r#"
1032stages:
1033 - build
1034
1035build-job:
1036 stage: build
1037 script:
1038 - make build
1039"#;
1040 let graph = parse(yaml);
1041 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1042 assert_eq!(identities.len(), 1);
1043 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
1044 assert_eq!(
1045 identities[0]
1046 .metadata
1047 .get(META_IMPLICIT)
1048 .map(String::as_str),
1049 Some("true")
1050 );
1051 assert_eq!(
1052 identities[0]
1053 .metadata
1054 .get(META_IDENTITY_SCOPE)
1055 .map(String::as_str),
1056 Some("broad")
1057 );
1058 }
1059
1060 #[test]
1061 fn global_credential_variable_emits_secret_node() {
1062 let yaml = r#"
1063variables:
1064 APP_VERSION: "1.0"
1065 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
1066
1067build-job:
1068 script:
1069 - make
1070"#;
1071 let graph = parse(yaml);
1072 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1073 assert!(
1074 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
1075 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
1076 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1077 );
1078 assert!(
1080 !secrets.iter().any(|s| s.name == "APP_VERSION"),
1081 "APP_VERSION must not emit a Secret node"
1082 );
1083 }
1084
1085 #[test]
1086 fn floating_image_emits_untrusted_image_node() {
1087 let yaml = r#"
1088deploy:
1089 image: alpine:latest
1090 script:
1091 - deploy.sh
1092"#;
1093 let graph = parse(yaml);
1094 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1095 assert_eq!(images.len(), 1);
1096 assert_eq!(images[0].name, "alpine:latest");
1097 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1098 }
1099
1100 #[test]
1101 fn digest_pinned_image_is_third_party() {
1102 let yaml = r#"
1103deploy:
1104 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1105 script:
1106 - deploy.sh
1107"#;
1108 let graph = parse(yaml);
1109 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1110 assert_eq!(images.len(), 1);
1111 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1112 }
1113
1114 #[test]
1115 fn id_tokens_emit_oidc_identity_nodes() {
1116 let yaml = r#"
1117deploy:
1118 id_tokens:
1119 SIGSTORE_ID_TOKEN:
1120 aud: sigstore
1121 AWS_OIDC_TOKEN:
1122 aud: https://sts.amazonaws.com
1123 script:
1124 - deploy.sh
1125"#;
1126 let graph = parse(yaml);
1127 let oidc: Vec<_> = graph
1128 .nodes_of_kind(NodeKind::Identity)
1129 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
1130 .collect();
1131 assert_eq!(
1132 oidc.len(),
1133 2,
1134 "expected 2 OIDC identity nodes, got: {:?}",
1135 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
1136 );
1137 }
1138
1139 #[test]
1140 fn explicit_secrets_emit_secret_nodes() {
1141 let yaml = r#"
1142deploy:
1143 secrets:
1144 DATABASE_PASSWORD:
1145 vault: production/db/password@secret
1146 AWS_KEY:
1147 aws_secrets_manager:
1148 name: my-secret
1149 script:
1150 - deploy.sh
1151"#;
1152 let graph = parse(yaml);
1153 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1154 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
1155 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
1156 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
1157 }
1158
1159 #[test]
1160 fn rules_mr_trigger_sets_meta_trigger() {
1161 let yaml = r#"
1162test:
1163 rules:
1164 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
1165 script:
1166 - run tests
1167"#;
1168 let graph = parse(yaml);
1169 assert_eq!(
1170 graph.metadata.get(META_TRIGGER).map(String::as_str),
1171 Some("merge_request"),
1172 "META_TRIGGER must be set to merge_request"
1173 );
1174 }
1175
1176 #[test]
1177 fn only_merge_requests_sets_meta_trigger() {
1178 let yaml = r#"
1179test:
1180 only:
1181 - merge_requests
1182 script:
1183 - run tests
1184"#;
1185 let graph = parse(yaml);
1186 assert_eq!(
1187 graph.metadata.get(META_TRIGGER).map(String::as_str),
1188 Some("merge_request")
1189 );
1190 }
1191
1192 #[test]
1193 fn include_marks_graph_partial() {
1194 let yaml = r#"
1195include:
1196 - local: '/templates/.base.yml'
1197
1198build:
1199 script:
1200 - make
1201"#;
1202 let graph = parse(yaml);
1203 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1204 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
1205 }
1206
1207 #[test]
1208 fn extends_marks_graph_partial() {
1209 let yaml = r#"
1210.base:
1211 script:
1212 - echo base
1213
1214my-job:
1215 extends: .base
1216 stage: build
1217"#;
1218 let graph = parse(yaml);
1219 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1220 assert!(
1223 graph
1224 .completeness_gap_kinds
1225 .iter()
1226 .all(|k| *k == GapKind::Structural),
1227 "expected all gaps Structural, got: {:?}",
1228 graph.completeness_gap_kinds
1229 );
1230 }
1231
1232 #[test]
1233 fn meta_job_name_set_on_step_nodes() {
1234 let yaml = r#"
1235build:
1236 script:
1237 - make
1238deploy:
1239 script:
1240 - deploy.sh
1241"#;
1242 let graph = parse(yaml);
1243 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1244 assert_eq!(steps.len(), 2);
1245 for step in &steps {
1246 assert!(
1247 step.metadata.contains_key(META_JOB_NAME),
1248 "Step '{}' missing META_JOB_NAME",
1249 step.name
1250 );
1251 }
1252 let names: Vec<_> = steps
1254 .iter()
1255 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
1256 .collect();
1257 assert!(names.contains(&"build"), "got: {names:?}");
1258 assert!(names.contains(&"deploy"), "got: {names:?}");
1259 }
1260
1261 #[test]
1262 fn reserved_keywords_not_parsed_as_jobs() {
1263 let yaml = r#"
1264stages:
1265 - build
1266 - test
1267
1268variables:
1269 MY_VAR: value
1270
1271image: alpine:latest
1272
1273build:
1274 stage: build
1275 script:
1276 - make
1277"#;
1278 let graph = parse(yaml);
1279 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1280 assert_eq!(
1281 steps.len(),
1282 1,
1283 "only 'build' should be a Step, got: {:?}",
1284 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
1285 );
1286 assert_eq!(steps[0].name, "build");
1287 }
1288
1289 #[test]
1290 fn services_emit_image_nodes() {
1291 let yaml = r#"
1292test:
1293 services:
1294 - docker:dind
1295 - name: postgres:14
1296 script:
1297 - run_tests
1298"#;
1299 let graph = parse(yaml);
1300 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1301 assert_eq!(
1302 images.len(),
1303 2,
1304 "expected 2 service Image nodes, got: {:?}",
1305 images.iter().map(|i| &i.name).collect::<Vec<_>>()
1306 );
1307 }
1308
1309 #[test]
1312 fn job_carrier_with_unparseable_bodies_marks_partial() {
1313 let yaml = r#"
1319build:
1320 - this is a list, not a mapping
1321test:
1322 - also a list
1323"#;
1324 let graph = parse(yaml);
1325 let step_count = graph
1326 .nodes
1327 .iter()
1328 .filter(|n| n.kind == NodeKind::Step)
1329 .count();
1330 assert_eq!(step_count, 0);
1336 assert_eq!(
1337 graph.completeness,
1338 AuthorityCompleteness::Complete,
1339 "non-mapping values are not job carriers"
1340 );
1341 }
1342
1343 #[test]
1344 fn mapping_jobs_without_recognisable_step_content_marks_partial() {
1345 let yaml = r#"
1352.template-only:
1353 script:
1354 - echo "this is a template-only file"
1355"#;
1356 let graph = parse(yaml);
1357 let step_count = graph
1358 .nodes
1359 .iter()
1360 .filter(|n| n.kind == NodeKind::Step)
1361 .count();
1362 assert_eq!(step_count, 0);
1363 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1365 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
1369 }
1370}