1use std::collections::{HashMap, HashSet};
2
3use serde::{Deserialize, Serialize};
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7#[allow(unused_imports)]
9use taudit_core::graph::{META_DOTENV_FILE, META_ENVIRONMENT_NAME, META_NEEDS, META_SCRIPT_BODY};
10use taudit_core::ports::PipelineParser;
11
12pub struct GitlabParser;
24
25const RESERVED: &[&str] = &[
27 "stages",
28 "workflow",
29 "include",
30 "variables",
31 "image",
32 "services",
33 "default",
34 "cache",
35 "before_script",
36 "after_script",
37 "types",
38];
39
40const CRED_FRAGMENTS: &[&str] = &[
42 "TOKEN",
43 "SECRET",
44 "PASSWORD",
45 "PASSWD",
46 "PRIVATE_KEY",
47 "API_KEY",
48 "APIKEY",
49 "SIGNING_KEY",
50 "ACCESS_KEY",
51 "SERVICE_ACCOUNT",
52 "CERT",
53 "CREDENTIAL",
54];
55
56impl PipelineParser for GitlabParser {
57 fn platform(&self) -> &str {
58 "gitlab-ci"
59 }
60
61 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
62 let (parse_content, duplicate_recovery_note) = match parse_gitlab_yaml_value(content) {
63 Ok((root, extra_docs, first_doc_was_spec_header)) => {
64 let mut graph = build_graph_from_root(root, source)?;
65 if extra_docs {
66 graph.mark_partial(
67 GapKind::Expression,
68 if first_doc_was_spec_header {
69 "file contains GitLab spec: header plus executable config document — analyzed the executable document and preserved spec: as an unresolved header".to_string()
70 } else {
71 "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string()
72 },
73 );
74 }
75 return Ok(graph);
76 }
77 Err(e) if is_duplicate_key_parse_error(&e) => {
78 let sanitized = sanitize_duplicate_mapping_keys(content);
79 let note = format!(
80 "GitLab YAML contained duplicate mapping keys; later duplicates were preserved as opaque __taudit_duplicate_* keys during recovery ({e})"
81 );
82 (sanitized, Some(note))
83 }
84 Err(e) => return Err(TauditError::Parse(format!("YAML parse error: {e}"))),
85 };
86
87 let (root, extra_docs, first_doc_was_spec_header) = parse_gitlab_yaml_value(&parse_content)
88 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
89 let mut graph = build_graph_from_root(root, source)?;
90 if extra_docs {
91 graph.mark_partial(
92 GapKind::Expression,
93 if first_doc_was_spec_header {
94 "file contains GitLab spec: header plus executable config document — analyzed the executable document and preserved spec: as an unresolved header".to_string()
95 } else {
96 "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string()
97 },
98 );
99 }
100 if let Some(note) = duplicate_recovery_note {
101 graph.mark_partial(GapKind::Structural, note);
102 }
103 Ok(graph)
104 }
105}
106
107fn parse_gitlab_yaml_value(content: &str) -> Result<(Value, bool, bool), serde_yaml::Error> {
108 let mut de = serde_yaml::Deserializer::from_str(content);
109 let Some(doc) = de.next() else {
110 return Ok((Value::Null, false, false));
111 };
112 let first = Value::deserialize(doc)?;
113 let Some(second_doc) = de.next() else {
114 return Ok((first, false, false));
115 };
116 if gitlab_doc_is_spec_header(&first) {
117 return Ok((Value::deserialize(second_doc)?, true, true));
118 }
119 Ok((first, true, false))
120}
121
122fn gitlab_doc_is_spec_header(doc: &Value) -> bool {
123 let Some(map) = doc.as_mapping() else {
124 return false;
125 };
126 map.contains_key("spec")
127}
128
129fn is_duplicate_key_parse_error(error: &serde_yaml::Error) -> bool {
130 error.to_string().contains("duplicate entry with key")
131}
132
133fn sanitize_duplicate_mapping_keys(content: &str) -> String {
134 #[derive(Default)]
135 struct Frame {
136 indent: usize,
137 keys: HashSet<String>,
138 }
139
140 let mut out = Vec::new();
141 let mut frames: Vec<Frame> = Vec::new();
142 let mut duplicate_counts: HashMap<(usize, String), usize> = HashMap::new();
143 let mut block_scalar_indent: Option<usize> = None;
144
145 for line in content.lines() {
146 let indent = line.chars().take_while(|c| *c == ' ').count();
147 let trimmed = &line[indent..];
148
149 if let Some(block_indent) = block_scalar_indent {
150 if !trimmed.is_empty() && indent <= block_indent {
151 block_scalar_indent = None;
152 } else {
153 out.push(line.to_string());
154 continue;
155 }
156 }
157
158 if trimmed.is_empty() || trimmed.starts_with('#') {
159 out.push(line.to_string());
160 continue;
161 }
162
163 let (key_indent, key_start, key_end, key) = match yaml_mapping_key_span(line, indent) {
164 Some(parts) => parts,
165 None => {
166 out.push(line.to_string());
167 continue;
168 }
169 };
170
171 while frames.last().is_some_and(|frame| frame.indent > key_indent) {
172 frames.pop();
173 }
174 if !frames.iter().any(|frame| frame.indent == key_indent) {
175 frames.push(Frame {
176 indent: key_indent,
177 keys: HashSet::new(),
178 });
179 }
180 let frame = frames
181 .iter_mut()
182 .rev()
183 .find(|frame| frame.indent == key_indent)
184 .expect("frame inserted above");
185
186 if frame.keys.insert(key.clone()) {
187 out.push(line.to_string());
188 } else {
189 let count = duplicate_counts
190 .entry((key_indent, key.clone()))
191 .and_modify(|n| *n += 1)
192 .or_insert(2);
193 let replacement = format!(
194 "__taudit_duplicate_{}_{}",
195 sanitize_key_fragment(&key),
196 count
197 );
198 let mut rewritten = String::with_capacity(line.len() + replacement.len());
199 rewritten.push_str(&line[..key_start]);
200 rewritten.push_str(&replacement);
201 rewritten.push_str(&line[key_end..]);
202 out.push(rewritten);
203 }
204
205 let value_tail = line[key_end..].trim_start();
206 if value_tail.starts_with(": |") || value_tail.starts_with(": >") {
207 block_scalar_indent = Some(key_indent);
208 }
209 }
210
211 let mut sanitized = out.join("\n");
212 if content.ends_with('\n') {
213 sanitized.push('\n');
214 }
215 sanitized
216}
217
218fn yaml_mapping_key_span(line: &str, indent: usize) -> Option<(usize, usize, usize, String)> {
219 let trimmed = &line[indent..];
220 if trimmed.starts_with('#') {
221 return None;
222 }
223
224 let mut key_indent = indent;
225 let mut key_start = indent;
226 let key_text = if let Some(rest) = trimmed.strip_prefix("- ") {
227 key_indent = indent + 2;
228 key_start = indent + 2;
229 rest
230 } else {
231 trimmed
232 };
233
234 let mut in_single = false;
235 let mut in_double = false;
236 let mut bracket_depth = 0i32;
237 let mut prev = '\0';
238 for (offset, ch) in key_text.char_indices() {
239 match ch {
240 '\'' if !in_double => in_single = !in_single,
241 '"' if !in_single && prev != '\\' => in_double = !in_double,
242 '[' | '{' if !in_single && !in_double => bracket_depth += 1,
243 ']' | '}' if !in_single && !in_double => bracket_depth -= 1,
244 ':' if !in_single && !in_double && bracket_depth == 0 => {
245 let after = key_text[offset + ch.len_utf8()..].chars().next();
246 if after.is_some_and(|c| !c.is_whitespace()) {
247 prev = ch;
248 continue;
249 }
250 let raw = &key_text[..offset];
251 let key = raw.trim();
252 if key.is_empty() {
253 return None;
254 }
255 let leading = raw.len() - raw.trim_start().len();
256 let trailing = raw.trim_end().len();
257 let start = key_start + leading;
258 let end = key_start + trailing;
259 return Some((key_indent, start, end, key.to_string()));
260 }
261 _ => {}
262 }
263 prev = ch;
264 }
265 None
266}
267
268fn sanitize_key_fragment(key: &str) -> String {
269 let mut out = String::new();
270 for c in key.chars() {
271 if c.is_ascii_alphanumeric() {
272 out.push(c.to_ascii_lowercase());
273 } else {
274 out.push('_');
275 }
276 }
277 while out.contains("__") {
278 out = out.replace("__", "_");
279 }
280 out.trim_matches('_').chars().take(48).collect::<String>()
281}
282
283fn build_graph_from_root(
284 root: Value,
285 source: &PipelineSource,
286) -> Result<AuthorityGraph, TauditError> {
287 let mapping = root
288 .as_mapping()
289 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
290
291 let mut graph = AuthorityGraph::new(source.clone());
292 graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
293
294 let mut meta = HashMap::new();
297 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
298 meta.insert(META_IMPLICIT.into(), "true".into());
299 let token_id = graph.add_node_with_metadata(
300 NodeKind::Identity,
301 "CI_JOB_TOKEN",
302 TrustZone::FirstParty,
303 meta,
304 );
305
306 if let Some(inc) = mapping.get("include") {
310 graph.mark_partial(
311 GapKind::Structural,
312 "include: directive present — included templates not resolved".to_string(),
313 );
314 let entries = extract_include_entries(inc);
315 if !entries.is_empty() {
316 if let Ok(json) = serde_json::to_string(&entries) {
317 graph.metadata.insert(META_GITLAB_INCLUDES.into(), json);
318 }
319 }
320 }
321
322 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
324
325 let global_image = mapping.get("image").and_then(extract_image_str);
327
328 if let Some(wf) = mapping.get("workflow") {
330 if has_mr_trigger_in_workflow(wf) {
331 graph
332 .metadata
333 .insert(META_TRIGGER.into(), "merge_request".into());
334 }
335 }
336
337 let mut top_level_entries: Vec<(&Value, &Value)> = mapping.iter().collect();
340 top_level_entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
341 for (key, value) in top_level_entries {
342 let job_name = match key.as_str() {
343 Some(k) => k,
344 None => continue,
345 };
346 if RESERVED.contains(&job_name) {
347 continue;
348 }
349
350 if job_name.starts_with('.') {
352 graph.mark_partial(
353 GapKind::Structural,
354 format!("job '{job_name}' is a hidden/template job — not resolved"),
355 );
356 continue;
357 }
358
359 let job_map = match value.as_mapping() {
360 Some(m) => m,
361 None => continue,
362 };
363
364 let extends_names = extract_extends_list(job_map.get("extends"));
366 if !extends_names.is_empty() {
367 graph.mark_partial(
368 GapKind::Structural,
369 format!("job '{job_name}' uses extends: — inherited configuration not resolved"),
370 );
371 }
372
373 let job_triggers_mr = job_has_mr_trigger(job_map);
375
376 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
378 graph
379 .metadata
380 .insert(META_TRIGGER.into(), "merge_request".into());
381 }
382
383 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
385
386 let explicit_secrets =
388 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
389
390 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
392
393 let job_image_str = job_map
395 .get("image")
396 .and_then(extract_image_str)
397 .or(global_image.as_deref().map(String::from));
398
399 let image_id = job_image_str.as_deref().map(|img| {
400 let pinned = is_docker_digest_pinned(img);
401 let trust_zone = if pinned {
402 TrustZone::ThirdParty
403 } else {
404 TrustZone::Untrusted
405 };
406 let mut imeta = HashMap::new();
407 if let Some(digest) = img.split("@sha256:").nth(1) {
408 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
409 }
410 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
411 });
412
413 let service_ids = process_services(job_map.get("services"), &mut graph);
415
416 let env_name = job_map
418 .get("environment")
419 .and_then(extract_environment_name);
420 let env_url = job_map.get("environment").and_then(extract_environment_url);
421
422 let script_body = extract_script_body(job_map);
431
432 let dotenv_file = extract_dotenv_file(job_map);
438
439 let needs = extract_needs(job_map);
442
443 let protected_only = job_has_protected_branch_restriction(job_map);
449
450 let mut step_meta = HashMap::new();
452 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
453 if let Some(ref env) = env_name {
454 step_meta.insert(META_ENVIRONMENT_NAME.into(), env.clone());
455 }
456 if !script_body.is_empty() {
457 step_meta.insert(META_SCRIPT_BODY.into(), script_body);
458 }
459 if let Some(ref f) = dotenv_file {
460 step_meta.insert(META_DOTENV_FILE.into(), f.clone());
461 }
462 if !needs.is_empty() {
463 step_meta.insert(META_NEEDS.into(), needs.join(","));
464 }
465 if let Some(ref url) = env_url {
466 step_meta.insert(META_ENVIRONMENT_URL.into(), url.clone());
467 }
468 if job_triggers_mr {
473 step_meta.insert(META_TRIGGER.into(), "merge_request".into());
474 }
475 if !extends_names.is_empty() {
477 step_meta.insert(META_GITLAB_EXTENDS.into(), extends_names.join(","));
478 }
479 if let Some(af) = job_map.get("allow_failure").and_then(|v| v.as_bool()) {
482 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), af.to_string());
483 } else if job_map
484 .get("allow_failure")
485 .and_then(|v| v.as_mapping())
486 .is_some()
487 {
488 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), "true".into());
491 }
492 if job_services_have_dind(job_map.get("services")) {
494 step_meta.insert(META_GITLAB_DIND_SERVICE.into(), "true".into());
495 }
496 if let Some(kind) = classify_trigger(job_map.get("trigger")) {
498 step_meta.insert(META_GITLAB_TRIGGER_KIND.into(), kind.into());
499 }
500 if let Some((cache_key, cache_policy)) = extract_cache_key_policy(job_map.get("cache")) {
502 step_meta.insert(META_GITLAB_CACHE_KEY.into(), cache_key);
503 if let Some(p) = cache_policy {
504 step_meta.insert(META_GITLAB_CACHE_POLICY.into(), p);
505 }
506 }
507 if protected_only {
508 step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
509 }
510 let step_id = graph.add_node_with_metadata(
511 NodeKind::Step,
512 job_name,
513 TrustZone::FirstParty,
514 step_meta,
515 );
516
517 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
519
520 for &sid in global_secrets
522 .iter()
523 .chain(&job_secrets)
524 .chain(&explicit_secrets)
525 {
526 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
527 }
528
529 for &iid in &oidc_identities {
531 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
532 }
533
534 if let Some(img_id) = image_id {
536 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
537 }
538 for &svc_id in &service_ids {
539 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
540 }
541 }
542
543 let step_count = graph
551 .nodes
552 .iter()
553 .filter(|n| n.kind == NodeKind::Step)
554 .count();
555 let had_job_carrier = mapping.iter().any(|(k, v)| {
556 k.as_str()
557 .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
558 .unwrap_or(false)
559 && v.as_mapping().is_some()
560 });
561 if step_count == 0 && had_job_carrier {
562 graph.mark_partial(
563 GapKind::Opaque,
564 "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
565 );
566 }
567
568 graph.stamp_edge_authority_summaries();
569 Ok(graph)
570}
571fn extract_image_str(v: &Value) -> Option<String> {
573 match v {
574 Value::String(s) => Some(s.clone()),
575 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
576 _ => None,
577 }
578}
579
580fn extract_environment_name(v: &Value) -> Option<String> {
582 match v {
583 Value::String(s) => Some(s.clone()),
584 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
585 _ => None,
586 }
587}
588
589fn extract_environment_url(v: &Value) -> Option<String> {
591 match v {
592 Value::Mapping(m) => m.get("url").and_then(|u| u.as_str()).map(String::from),
593 _ => None,
594 }
595}
596
597fn extract_script_body(job_map: &serde_yaml::Mapping) -> String {
601 let mut lines: Vec<String> = Vec::new();
602 for key in &["before_script", "script", "after_script"] {
603 if let Some(v) = job_map.get(*key) {
604 collect_script_lines(v, &mut lines);
605 }
606 }
607 lines.join("\n")
608}
609
610fn collect_script_lines(v: &Value, out: &mut Vec<String>) {
612 match v {
613 Value::String(s) => out.push(s.clone()),
614 Value::Sequence(seq) => {
615 for item in seq {
616 if let Some(s) = item.as_str() {
617 out.push(s.to_string());
618 }
619 }
620 }
621 _ => {}
622 }
623}
624
625fn extract_dotenv_file(job_map: &serde_yaml::Mapping) -> Option<String> {
628 let dotenv = job_map
629 .get("artifacts")?
630 .as_mapping()?
631 .get("reports")?
632 .as_mapping()?
633 .get("dotenv")?;
634 match dotenv {
635 Value::String(s) => Some(s.clone()),
636 Value::Sequence(seq) => {
637 let parts: Vec<String> = seq
638 .iter()
639 .filter_map(|v| v.as_str().map(String::from))
640 .collect();
641 if parts.is_empty() {
642 None
643 } else {
644 Some(parts.join(","))
645 }
646 }
647 _ => None,
648 }
649}
650
651fn extract_needs(job_map: &serde_yaml::Mapping) -> Vec<String> {
662 let mut out: Vec<String> = Vec::new();
663 if let Some(needs) = job_map.get("needs").and_then(|v| v.as_sequence()) {
664 for item in needs {
665 match item {
666 Value::String(s) => out.push(s.clone()),
667 Value::Mapping(m) => {
668 let Some(j) = m.get("job").and_then(|j| j.as_str()) else {
669 continue;
670 };
671 let artifacts_disabled =
675 m.get("artifacts").and_then(|v| v.as_bool()) == Some(false);
676 if artifacts_disabled {
677 continue;
678 }
679 out.push(j.to_string());
680 }
681 _ => {}
682 }
683 }
684 }
685 if let Some(deps) = job_map.get("dependencies").and_then(|v| v.as_sequence()) {
686 for item in deps {
687 if let Some(s) = item.as_str() {
688 out.push(s.to_string());
689 }
690 }
691 }
692 out.sort();
693 out.dedup();
694 out
695}
696
697fn check_truthy_comparison(expr: &str, var: &str) -> Option<bool> {
715 let trimmed = expr.trim();
720 if trimmed.is_empty() {
721 return None;
722 }
723
724 if let Some((lhs, rhs)) = split_top_level(trimmed, "||") {
726 let l = check_truthy_comparison(&lhs, var);
727 let r = check_truthy_comparison(&rhs, var);
728 return match (l, r) {
729 (Some(true), _) | (_, Some(true)) => Some(true),
730 (Some(false), Some(false)) => Some(false),
731 _ => None,
732 };
733 }
734 if let Some((lhs, rhs)) = split_top_level(trimmed, "&&") {
738 let l = check_truthy_comparison(&lhs, var);
739 let r = check_truthy_comparison(&rhs, var);
740 return match (l, r) {
741 (Some(false), _) | (_, Some(false)) => Some(false),
742 (Some(true), _) | (_, Some(true)) => Some(true),
743 _ => None,
744 };
745 }
746
747 classify_atom(trimmed, var)
749}
750
751fn split_top_level(expr: &str, op: &str) -> Option<(String, String)> {
755 let bytes = expr.as_bytes();
756 let op_bytes = op.as_bytes();
757 let mut depth: i32 = 0;
758 let mut in_str: Option<u8> = None;
759 let mut in_regex = false;
760 let mut i = 0;
761 while i < bytes.len() {
762 let b = bytes[i];
763 if let Some(q) = in_str {
765 if b == b'\\' && i + 1 < bytes.len() {
766 i += 2;
767 continue;
768 }
769 if b == q {
770 in_str = None;
771 }
772 i += 1;
773 continue;
774 }
775 if in_regex {
776 if b == b'\\' && i + 1 < bytes.len() {
777 i += 2;
778 continue;
779 }
780 if b == b'/' {
781 in_regex = false;
782 }
783 i += 1;
784 continue;
785 }
786 match b {
787 b'"' | b'\'' => {
788 in_str = Some(b);
789 i += 1;
790 continue;
791 }
792 b'/' => {
793 let mut j = i;
796 while j > 0 && bytes[j - 1].is_ascii_whitespace() {
797 j -= 1;
798 }
799 if j > 0 && bytes[j - 1] == b'~' {
800 in_regex = true;
801 i += 1;
802 continue;
803 }
804 }
805 b'(' => depth += 1,
806 b')' => depth -= 1,
807 _ => {}
808 }
809 if depth == 0
810 && i + op_bytes.len() <= bytes.len()
811 && &bytes[i..i + op_bytes.len()] == op_bytes
812 {
813 let lhs = expr[..i].to_string();
814 let rhs = expr[i + op_bytes.len()..].to_string();
815 return Some((lhs, rhs));
816 }
817 i += 1;
818 }
819 None
820}
821
822fn classify_atom(atom: &str, var: &str) -> Option<bool> {
824 let s = atom.trim().trim_matches('(').trim_matches(')').trim();
825 if s == var {
828 return Some(true);
829 }
830 let (op, lhs, rhs) = if let Some((l, r)) = s.split_once("==") {
833 ("==", l.trim(), r.trim())
834 } else if let Some((l, r)) = s.split_once("!=") {
835 ("!=", l.trim(), r.trim())
836 } else {
837 return None;
838 };
839 let (lit, side_is_var) = if lhs == var {
842 (rhs, true)
843 } else if rhs == var {
844 (lhs, true)
845 } else {
846 let lhs_unq = lhs.trim_matches('"').trim_matches('\'');
850 let rhs_unq = rhs.trim_matches('"').trim_matches('\'');
851 if lhs_unq == var {
852 (rhs, true)
853 } else if rhs_unq == var {
854 (lhs, true)
855 } else {
856 return None;
857 }
858 };
859 let _ = side_is_var; let lit_norm = lit
862 .trim_matches('"')
863 .trim_matches('\'')
864 .to_ascii_lowercase();
865 let truthy_lit = matches!(lit_norm.as_str(), "true" | "1");
866 let falsy_lit = matches!(lit_norm.as_str(), "false" | "null" | "" | "0");
867 match (op, truthy_lit, falsy_lit) {
868 ("==", true, _) => Some(true),
869 ("==", _, true) => Some(false),
870 ("!=", true, _) => Some(false),
871 ("!=", _, true) => Some(true),
872 _ => None,
876 }
877}
878
879fn is_credential_name(name: &str) -> bool {
890 let upper = name.to_uppercase();
891 let bytes = upper.as_bytes();
892 CRED_FRAGMENTS.iter().any(|frag| {
893 let frag_bytes = frag.as_bytes();
894 let n = frag_bytes.len();
895 if bytes.len() < n {
896 return false;
897 }
898 for i in 0..=bytes.len() - n {
900 if &bytes[i..i + n] != frag_bytes {
901 continue;
902 }
903 let left_ok = i == 0 || bytes[i - 1] == b'_';
904 let right_ok = i + n == bytes.len() || bytes[i + n] == b'_';
905 if left_ok && right_ok {
906 return true;
907 }
908 }
909 false
910 })
911}
912
913fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
916 let mut ids = Vec::new();
917 let map = match vars.and_then(|v| v.as_mapping()) {
918 Some(m) => m,
919 None => return ids,
920 };
921 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
923 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
924 for (k, _v) in entries {
925 let name = match k.as_str() {
926 Some(s) => s,
927 None => continue,
928 };
929 if is_credential_name(name) {
930 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
931 ids.push(id);
932 let _ = scope; }
934 }
935 ids
936}
937
938fn process_explicit_secrets(
950 secrets: Option<&Value>,
951 _scope: &str,
952 graph: &mut AuthorityGraph,
953) -> Vec<NodeId> {
954 let mut ids = Vec::new();
955 let map = match secrets.and_then(|v| v.as_mapping()) {
956 Some(m) => m,
957 None => return ids,
958 };
959 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
961 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
962 for (k, _v) in entries {
963 let name = match k.as_str() {
964 Some(s) => s,
965 None => continue,
966 };
967 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
968 ids.push(id);
969 }
970 ids
971}
972
973fn process_id_tokens(
984 id_tokens: Option<&Value>,
985 _scope: &str,
986 graph: &mut AuthorityGraph,
987) -> Vec<NodeId> {
988 let mut ids = Vec::new();
989 let map = match id_tokens.and_then(|v| v.as_mapping()) {
990 Some(m) => m,
991 None => return ids,
992 };
993 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
995 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
996 for (k, v) in entries {
997 let token_name = match k.as_str() {
998 Some(s) => s,
999 None => continue,
1000 };
1001 let aud_value = v.as_mapping().and_then(|m| m.get("aud"));
1006 let (aud_joined, is_list) = match aud_value {
1007 Some(Value::String(s)) => (s.clone(), false),
1008 Some(Value::Sequence(seq)) => {
1009 let parts: Vec<String> = seq
1010 .iter()
1011 .filter_map(|item| match item {
1012 Value::String(s) => Some(s.clone()),
1013 _ => None,
1014 })
1015 .collect();
1016 if parts.is_empty() {
1017 ("unknown".into(), false)
1018 } else {
1019 (parts.join(","), true)
1020 }
1021 }
1022 _ => ("unknown".into(), false),
1023 };
1024 let label = format!("{token_name} (aud={aud_joined})");
1025 let mut meta = HashMap::new();
1026 meta.insert(META_OIDC.into(), "true".into());
1027 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1028 meta.insert(META_OIDC_AUDIENCE.into(), aud_joined.clone());
1032 if is_list {
1036 meta.insert(META_OIDC_AUDIENCES.into(), aud_joined.clone());
1037 }
1038 let id =
1039 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
1040 ids.push(id);
1041 }
1042 ids
1043}
1044
1045fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
1047 let mut ids = Vec::new();
1048 let list = match services.and_then(|v| v.as_sequence()) {
1049 Some(s) => s,
1050 None => return ids,
1051 };
1052 for item in list {
1053 let img_str = match extract_image_str(item) {
1054 Some(s) => s,
1055 None => continue,
1056 };
1057 let pinned = is_docker_digest_pinned(&img_str);
1058 let trust_zone = if pinned {
1059 TrustZone::ThirdParty
1060 } else {
1061 TrustZone::Untrusted
1062 };
1063 let mut meta = HashMap::new();
1064 if let Some(digest) = img_str.split("@sha256:").nth(1) {
1065 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
1066 }
1067 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
1068 ids.push(id);
1069 }
1070 ids
1071}
1072
1073fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
1075 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
1077 for rule in rules {
1078 if let Some(if_expr) = rule
1079 .as_mapping()
1080 .and_then(|m| m.get("if"))
1081 .and_then(|v| v.as_str())
1082 {
1083 if matches_mr_event(if_expr) {
1088 return true;
1089 }
1090 }
1091 }
1092 }
1093 if let Some(only) = job_map.get("only") {
1095 if only_has_merge_requests(only) {
1096 return true;
1097 }
1098 }
1099 false
1100}
1101
1102fn only_has_merge_requests(v: &Value) -> bool {
1104 match v {
1105 Value::Sequence(seq) => seq
1106 .iter()
1107 .any(|item| item.as_str() == Some("merge_requests")),
1108 Value::Mapping(m) => {
1109 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
1110 return refs
1111 .iter()
1112 .any(|item| item.as_str() == Some("merge_requests"));
1113 }
1114 false
1115 }
1116 _ => false,
1117 }
1118}
1119
1120fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
1137 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
1138 for rule in rules {
1139 let Some(if_expr) = rule
1140 .as_mapping()
1141 .and_then(|m| m.get("if"))
1142 .and_then(|v| v.as_str())
1143 else {
1144 continue;
1145 };
1146 if matches!(
1150 check_truthy_comparison(if_expr, "$CI_COMMIT_REF_PROTECTED"),
1151 Some(true)
1152 ) {
1153 return true;
1154 }
1155 if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
1156 || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
1157 {
1158 return true;
1159 }
1160 if matches!(
1165 check_truthy_comparison(if_expr, "$CI_COMMIT_TAG"),
1166 Some(true)
1167 ) {
1168 return true;
1169 }
1170 }
1171 }
1172 if let Some(only) = job_map.get("only") {
1173 if only_lists_protected_ref(only) {
1174 return true;
1175 }
1176 }
1177 false
1178}
1179
1180fn only_lists_protected_ref(v: &Value) -> bool {
1184 fn is_protected_ref(s: &str) -> bool {
1185 matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
1186 }
1187 match v {
1188 Value::String(s) => is_protected_ref(s.as_str()),
1189 Value::Sequence(seq) => seq
1190 .iter()
1191 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
1192 Value::Mapping(m) => {
1193 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
1194 return refs
1195 .iter()
1196 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
1197 }
1198 false
1199 }
1200 _ => false,
1201 }
1202}
1203
1204fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
1206 let rules = match wf
1207 .as_mapping()
1208 .and_then(|m| m.get("rules"))
1209 .and_then(|r| r.as_sequence())
1210 {
1211 Some(r) => r,
1212 None => return false,
1213 };
1214 for rule in rules {
1215 if let Some(if_expr) = rule
1216 .as_mapping()
1217 .and_then(|m| m.get("if"))
1218 .and_then(|v| v.as_str())
1219 {
1220 if matches_mr_event(if_expr) {
1223 return true;
1224 }
1225 }
1226 }
1227 false
1228}
1229
1230fn matches_mr_event(if_expr: &str) -> bool {
1236 fn atom_is_mr_event(atom: &str) -> bool {
1241 let s = atom.trim().trim_matches('(').trim_matches(')').trim();
1242 let (lhs, rhs) = match s.split_once("==") {
1243 Some(parts) => parts,
1244 None => return false,
1245 };
1246 let lhs = lhs.trim();
1247 let rhs_norm = rhs.trim().trim_matches('"').trim_matches('\'');
1248 let lhs_unq = lhs.trim_matches('"').trim_matches('\'');
1250 let rhs_raw = rhs.trim().trim_matches('"').trim_matches('\'');
1251 if (lhs_unq == "$CI_PIPELINE_SOURCE" && rhs_norm == "merge_request_event")
1252 || (rhs_raw == "$CI_PIPELINE_SOURCE" && lhs_unq == "merge_request_event")
1253 {
1254 return true;
1255 }
1256 false
1257 }
1258 let trimmed = if_expr.trim();
1259 if let Some((lhs, rhs)) = split_top_level(trimmed, "||") {
1261 return atom_is_mr_event(&lhs) || matches_mr_event(&rhs);
1262 }
1263 if let Some((lhs, rhs)) = split_top_level(trimmed, "&&") {
1268 return atom_is_mr_event(&lhs) || matches_mr_event(&rhs);
1269 }
1270 atom_is_mr_event(trimmed)
1271}
1272
1273#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1280pub struct IncludeEntry {
1281 pub kind: String,
1284 pub target: String,
1286 pub git_ref: String,
1289}
1290
1291pub fn extract_include_entries(v: &Value) -> Vec<IncludeEntry> {
1297 let mut out = Vec::new();
1298 match v {
1299 Value::String(s) => {
1301 out.push(IncludeEntry {
1302 kind: classify_string_include(s).into(),
1303 target: s.clone(),
1304 git_ref: String::new(),
1305 });
1306 }
1307 Value::Sequence(seq) => {
1308 for item in seq {
1309 match item {
1310 Value::String(s) => {
1311 out.push(IncludeEntry {
1312 kind: classify_string_include(s).into(),
1313 target: s.clone(),
1314 git_ref: String::new(),
1315 });
1316 }
1317 Value::Mapping(m) => {
1318 if let Some(e) = include_entry_from_mapping(m) {
1319 out.push(e);
1320 }
1321 }
1322 _ => {}
1323 }
1324 }
1325 }
1326 Value::Mapping(m) => {
1327 if let Some(e) = include_entry_from_mapping(m) {
1328 out.push(e);
1329 }
1330 }
1331 _ => {}
1332 }
1333 out
1334}
1335
1336fn classify_string_include(s: &str) -> &'static str {
1339 let lower = s.to_ascii_lowercase();
1340 if lower.starts_with("http://") || lower.starts_with("https://") {
1341 "remote"
1342 } else {
1343 "local"
1344 }
1345}
1346
1347fn include_entry_from_mapping(m: &serde_yaml::Mapping) -> Option<IncludeEntry> {
1351 let str_at = |key: &str| {
1352 m.get(key)
1353 .and_then(|v| v.as_str())
1354 .map(str::to_string)
1355 .unwrap_or_default()
1356 };
1357 if let Some(s) = m.get("local").and_then(|v| v.as_str()) {
1358 return Some(IncludeEntry {
1359 kind: "local".into(),
1360 target: s.to_string(),
1361 git_ref: String::new(),
1362 });
1363 }
1364 if let Some(s) = m.get("remote").and_then(|v| v.as_str()) {
1365 return Some(IncludeEntry {
1366 kind: "remote".into(),
1367 target: s.to_string(),
1368 git_ref: String::new(),
1369 });
1370 }
1371 if let Some(s) = m.get("template").and_then(|v| v.as_str()) {
1372 return Some(IncludeEntry {
1373 kind: "template".into(),
1374 target: s.to_string(),
1375 git_ref: String::new(),
1376 });
1377 }
1378 if let Some(s) = m.get("component").and_then(|v| v.as_str()) {
1379 let (target, git_ref) = match s.rsplit_once('@') {
1381 Some((path, ver)) => (path.to_string(), ver.to_string()),
1382 None => (s.to_string(), String::new()),
1383 };
1384 return Some(IncludeEntry {
1385 kind: "component".into(),
1386 target,
1387 git_ref,
1388 });
1389 }
1390 if m.contains_key("project") {
1391 let project = str_at("project");
1392 let git_ref = str_at("ref");
1395 return Some(IncludeEntry {
1396 kind: "project".into(),
1397 target: project,
1398 git_ref,
1399 });
1400 }
1401 None
1402}
1403
1404fn extract_extends_list(v: Option<&Value>) -> Vec<String> {
1407 let v = match v {
1408 Some(v) => v,
1409 None => return Vec::new(),
1410 };
1411 match v {
1412 Value::String(s) => vec![s.clone()],
1413 Value::Sequence(seq) => seq
1414 .iter()
1415 .filter_map(|i| i.as_str().map(str::to_string))
1416 .collect(),
1417 _ => Vec::new(),
1418 }
1419}
1420
1421fn job_services_have_dind(services: Option<&Value>) -> bool {
1425 let list = match services.and_then(|v| v.as_sequence()) {
1426 Some(s) => s,
1427 None => return false,
1428 };
1429 for item in list {
1430 let img = match extract_image_str(item) {
1431 Some(s) => s,
1432 None => continue,
1433 };
1434 if image_is_dind(&img) {
1435 return true;
1436 }
1437 }
1438 false
1439}
1440
1441fn image_is_dind(image: &str) -> bool {
1445 let lower = image.to_ascii_lowercase();
1446 let bare = match lower.split_once('@') {
1449 Some((b, _)) => b,
1450 None => &lower,
1451 };
1452 if !bare.starts_with("docker:") && !bare.starts_with("docker/") {
1453 return false;
1454 }
1455 bare.contains("dind")
1456}
1457
1458fn classify_trigger(trigger: Option<&Value>) -> Option<&'static str> {
1463 let t = trigger?;
1464 if t.is_string() {
1466 return Some("static");
1467 }
1468 let m = t.as_mapping()?;
1469 if let Some(inc) = m.get("include") {
1472 if include_has_artifact_source(inc) {
1473 return Some("dynamic");
1474 }
1475 }
1476 Some("static")
1477}
1478
1479fn include_has_artifact_source(v: &Value) -> bool {
1482 match v {
1483 Value::Mapping(m) => m.contains_key("artifact"),
1484 Value::Sequence(seq) => seq.iter().any(|i| {
1485 i.as_mapping()
1486 .map(|m| m.contains_key("artifact"))
1487 .unwrap_or(false)
1488 }),
1489 _ => false,
1490 }
1491}
1492
1493fn extract_cache_key_policy(v: Option<&Value>) -> Option<(String, Option<String>)> {
1504 let v = v?;
1505 let m = match v {
1506 Value::Mapping(m) => m,
1507 Value::Sequence(seq) => {
1508 return seq
1510 .iter()
1511 .find_map(|i| i.as_mapping().and_then(extract_cache_key_policy_map));
1512 }
1513 _ => return None,
1514 };
1515 extract_cache_key_policy_map(m)
1516}
1517
1518fn extract_cache_key_policy_map(m: &serde_yaml::Mapping) -> Option<(String, Option<String>)> {
1519 let key = match m.get("key") {
1520 Some(Value::String(s)) => s.clone(),
1521 Some(Value::Number(n)) => n.to_string(),
1522 Some(Value::Bool(b)) => b.to_string(),
1523 Some(Value::Mapping(km)) => {
1524 let mut parts = Vec::new();
1525 if let Some(prefix) = km.get("prefix").and_then(|v| v.as_str()) {
1526 parts.push(format!("prefix:{prefix}"));
1527 }
1528 if let Some(files) = km.get("files").and_then(|v| v.as_sequence()) {
1529 let names: Vec<String> = files
1530 .iter()
1531 .filter_map(|f| f.as_str().map(str::to_string))
1532 .collect();
1533 if !names.is_empty() {
1534 parts.push(format!("files:{}", names.join(",")));
1535 }
1536 }
1537 if parts.is_empty() {
1538 String::new()
1539 } else {
1540 parts.join(";")
1541 }
1542 }
1543 _ => String::new(),
1544 };
1545 let policy = m.get("policy").and_then(|v| v.as_str()).map(str::to_string);
1546 Some((key, policy))
1547}
1548
1549#[cfg(test)]
1550mod tests {
1551 use super::*;
1552
1553 fn parse(yaml: &str) -> AuthorityGraph {
1554 let parser = GitlabParser;
1555 let source = PipelineSource {
1556 file: ".gitlab-ci.yml".into(),
1557 repo: None,
1558 git_ref: None,
1559 commit_sha: None,
1560 };
1561 parser.parse(yaml, &source).unwrap()
1562 }
1563
1564 #[test]
1565 fn ci_job_token_always_present() {
1566 let yaml = r#"
1567stages:
1568 - build
1569
1570build-job:
1571 stage: build
1572 script:
1573 - make build
1574"#;
1575 let graph = parse(yaml);
1576 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1577 assert_eq!(identities.len(), 1);
1578 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
1579 assert_eq!(
1580 identities[0]
1581 .metadata
1582 .get(META_IMPLICIT)
1583 .map(String::as_str),
1584 Some("true")
1585 );
1586 assert_eq!(
1587 identities[0]
1588 .metadata
1589 .get(META_IDENTITY_SCOPE)
1590 .map(String::as_str),
1591 Some("broad")
1592 );
1593 }
1594
1595 #[test]
1596 fn global_credential_variable_emits_secret_node() {
1597 let yaml = r#"
1598variables:
1599 APP_VERSION: "1.0"
1600 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
1601
1602build-job:
1603 script:
1604 - make
1605"#;
1606 let graph = parse(yaml);
1607 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1608 assert!(
1609 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
1610 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
1611 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1612 );
1613 assert!(
1615 !secrets.iter().any(|s| s.name == "APP_VERSION"),
1616 "APP_VERSION must not emit a Secret node"
1617 );
1618 }
1619
1620 #[test]
1621 fn floating_image_emits_untrusted_image_node() {
1622 let yaml = r#"
1623deploy:
1624 image: alpine:latest
1625 script:
1626 - deploy.sh
1627"#;
1628 let graph = parse(yaml);
1629 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1630 assert_eq!(images.len(), 1);
1631 assert_eq!(images[0].name, "alpine:latest");
1632 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1633 }
1634
1635 #[test]
1636 fn digest_pinned_image_is_third_party() {
1637 let yaml = r#"
1638deploy:
1639 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1640 script:
1641 - deploy.sh
1642"#;
1643 let graph = parse(yaml);
1644 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1645 assert_eq!(images.len(), 1);
1646 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1647 }
1648
1649 #[test]
1650 fn id_tokens_emit_oidc_identity_nodes() {
1651 let yaml = r#"
1652deploy:
1653 id_tokens:
1654 SIGSTORE_ID_TOKEN:
1655 aud: sigstore
1656 AWS_OIDC_TOKEN:
1657 aud: https://sts.amazonaws.com
1658 script:
1659 - deploy.sh
1660"#;
1661 let graph = parse(yaml);
1662 let oidc: Vec<_> = graph
1663 .nodes_of_kind(NodeKind::Identity)
1664 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
1665 .collect();
1666 assert_eq!(
1667 oidc.len(),
1668 2,
1669 "expected 2 OIDC identity nodes, got: {:?}",
1670 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
1671 );
1672 }
1673
1674 #[test]
1675 fn explicit_secrets_emit_secret_nodes() {
1676 let yaml = r#"
1677deploy:
1678 secrets:
1679 DATABASE_PASSWORD:
1680 vault: production/db/password@secret
1681 AWS_KEY:
1682 aws_secrets_manager:
1683 name: my-secret
1684 script:
1685 - deploy.sh
1686"#;
1687 let graph = parse(yaml);
1688 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1689 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
1690 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
1691 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
1692 }
1693
1694 #[test]
1695 fn rules_mr_trigger_sets_meta_trigger() {
1696 let yaml = r#"
1697test:
1698 rules:
1699 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
1700 script:
1701 - run tests
1702"#;
1703 let graph = parse(yaml);
1704 assert_eq!(
1705 graph.metadata.get(META_TRIGGER).map(String::as_str),
1706 Some("merge_request"),
1707 "META_TRIGGER must be set to merge_request"
1708 );
1709 }
1710
1711 #[test]
1712 fn only_merge_requests_sets_meta_trigger() {
1713 let yaml = r#"
1714test:
1715 only:
1716 - merge_requests
1717 script:
1718 - run tests
1719"#;
1720 let graph = parse(yaml);
1721 assert_eq!(
1722 graph.metadata.get(META_TRIGGER).map(String::as_str),
1723 Some("merge_request")
1724 );
1725 }
1726
1727 #[test]
1728 fn include_marks_graph_partial() {
1729 let yaml = r#"
1730include:
1731 - local: '/templates/.base.yml'
1732
1733build:
1734 script:
1735 - make
1736"#;
1737 let graph = parse(yaml);
1738 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1739 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
1740 }
1741
1742 #[test]
1743 fn extends_marks_graph_partial() {
1744 let yaml = r#"
1745.base:
1746 script:
1747 - echo base
1748
1749my-job:
1750 extends: .base
1751 stage: build
1752"#;
1753 let graph = parse(yaml);
1754 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1755 assert!(
1758 graph
1759 .completeness_gap_kinds
1760 .iter()
1761 .all(|k| *k == GapKind::Structural),
1762 "expected all gaps Structural, got: {:?}",
1763 graph.completeness_gap_kinds
1764 );
1765 }
1766
1767 #[test]
1768 fn meta_job_name_set_on_step_nodes() {
1769 let yaml = r#"
1770build:
1771 script:
1772 - make
1773deploy:
1774 script:
1775 - deploy.sh
1776"#;
1777 let graph = parse(yaml);
1778 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1779 assert_eq!(steps.len(), 2);
1780 for step in &steps {
1781 assert!(
1782 step.metadata.contains_key(META_JOB_NAME),
1783 "Step '{}' missing META_JOB_NAME",
1784 step.name
1785 );
1786 }
1787 let names: Vec<_> = steps
1789 .iter()
1790 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
1791 .collect();
1792 assert!(names.contains(&"build"), "got: {names:?}");
1793 assert!(names.contains(&"deploy"), "got: {names:?}");
1794 }
1795
1796 #[test]
1797 fn reserved_keywords_not_parsed_as_jobs() {
1798 let yaml = r#"
1799stages:
1800 - build
1801 - test
1802
1803variables:
1804 MY_VAR: value
1805
1806image: alpine:latest
1807
1808build:
1809 stage: build
1810 script:
1811 - make
1812"#;
1813 let graph = parse(yaml);
1814 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1815 assert_eq!(
1816 steps.len(),
1817 1,
1818 "only 'build' should be a Step, got: {:?}",
1819 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
1820 );
1821 assert_eq!(steps[0].name, "build");
1822 }
1823
1824 #[test]
1825 fn services_emit_image_nodes() {
1826 let yaml = r#"
1827test:
1828 services:
1829 - docker:dind
1830 - name: postgres:14
1831 script:
1832 - run_tests
1833"#;
1834 let graph = parse(yaml);
1835 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1836 assert_eq!(
1837 images.len(),
1838 2,
1839 "expected 2 service Image nodes, got: {:?}",
1840 images.iter().map(|i| &i.name).collect::<Vec<_>>()
1841 );
1842 }
1843
1844 #[test]
1847 fn job_carrier_with_unparseable_bodies_marks_partial() {
1848 let yaml = r#"
1854build:
1855 - this is a list, not a mapping
1856test:
1857 - also a list
1858"#;
1859 let graph = parse(yaml);
1860 let step_count = graph
1861 .nodes
1862 .iter()
1863 .filter(|n| n.kind == NodeKind::Step)
1864 .count();
1865 assert_eq!(step_count, 0);
1871 assert_eq!(
1872 graph.completeness,
1873 AuthorityCompleteness::Complete,
1874 "non-mapping values are not job carriers"
1875 );
1876 }
1877
1878 #[test]
1883 fn protected_ref_only_stamps_meta_when_truly_positive() {
1884 let positive = r#"
1885deploy:
1886 rules:
1887 - if: '$CI_COMMIT_REF_PROTECTED == "true"'
1888 script:
1889 - deploy.sh
1890"#;
1891 let graph = parse(positive);
1892 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
1893 assert_eq!(
1894 step.metadata
1895 .get(META_RULES_PROTECTED_ONLY)
1896 .map(String::as_str),
1897 Some("true"),
1898 "positive == \"true\" comparison must stamp META_RULES_PROTECTED_ONLY"
1899 );
1900
1901 let negation = r#"
1902deploy:
1903 rules:
1904 - if: '$CI_COMMIT_REF_PROTECTED == "false"'
1905 script:
1906 - deploy.sh
1907"#;
1908 let graph = parse(negation);
1909 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
1910 assert!(
1911 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
1912 "== \"false\" is the OPPOSITE signal — must NOT stamp META_RULES_PROTECTED_ONLY (got: {:?})",
1913 step.metadata.get(META_RULES_PROTECTED_ONLY)
1914 );
1915
1916 let inequality = r#"
1918deploy:
1919 rules:
1920 - if: '$CI_COMMIT_REF_PROTECTED != "true"'
1921 script:
1922 - deploy.sh
1923"#;
1924 let graph = parse(inequality);
1925 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
1926 assert!(
1927 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
1928 "!= \"true\" is a negation — must NOT stamp META_RULES_PROTECTED_ONLY"
1929 );
1930
1931 let tag_message_trap = r#"
1935deploy:
1936 rules:
1937 - if: '$CI_COMMIT_TAG_MESSAGE == "release"'
1938 script:
1939 - deploy.sh
1940"#;
1941 let graph = parse(tag_message_trap);
1942 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
1943 assert!(
1944 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
1945 "$CI_COMMIT_TAG_MESSAGE must not match the $CI_COMMIT_TAG predicate"
1946 );
1947 }
1948
1949 #[test]
1953 fn mr_trigger_detection_rejects_negation() {
1954 let negation = r#"
1955build:
1956 rules:
1957 - if: '$CI_PIPELINE_SOURCE != "merge_request_event"'
1958 script:
1959 - make build
1960"#;
1961 let graph = parse(negation);
1962 assert!(
1963 graph.metadata.get(META_TRIGGER).map(String::as_str) != Some("merge_request"),
1964 "negation form must not stamp META_TRIGGER=merge_request, got: {:?}",
1965 graph.metadata.get(META_TRIGGER)
1966 );
1967 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1968 assert_eq!(steps.len(), 1);
1969 assert!(
1970 steps[0].metadata.get(META_TRIGGER).map(String::as_str) != Some("merge_request"),
1971 "negation form must not stamp per-step META_TRIGGER=merge_request"
1972 );
1973
1974 let positive = r#"
1976build:
1977 rules:
1978 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
1979 script:
1980 - make build
1981"#;
1982 let graph = parse(positive);
1983 assert_eq!(
1984 graph.metadata.get(META_TRIGGER).map(String::as_str),
1985 Some("merge_request"),
1986 "positive form must still stamp META_TRIGGER=merge_request"
1987 );
1988 }
1989
1990 #[test]
1994 fn id_tokens_aud_list_form_creates_audiences_metadata() {
1995 let yaml = r#"
1996deploy:
1997 id_tokens:
1998 MULTI_CLOUD_TOKEN:
1999 aud:
2000 - https://aws.amazonaws.com
2001 - https://gcp.googleapis.com
2002 script:
2003 - deploy.sh
2004"#;
2005 let graph = parse(yaml);
2006 let oidc: Vec<_> = graph
2007 .nodes_of_kind(NodeKind::Identity)
2008 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
2009 .collect();
2010 assert_eq!(oidc.len(), 1);
2011 assert_eq!(
2012 oidc[0]
2013 .metadata
2014 .get(META_OIDC_AUDIENCES)
2015 .map(String::as_str),
2016 Some("https://aws.amazonaws.com,https://gcp.googleapis.com"),
2017 "list-form aud must stamp comma-joined META_OIDC_AUDIENCES"
2018 );
2019 assert_eq!(
2022 oidc[0].metadata.get(META_OIDC_AUDIENCE).map(String::as_str),
2023 Some("https://aws.amazonaws.com,https://gcp.googleapis.com"),
2024 );
2025 assert!(oidc[0].name.contains("aud=https://aws"));
2026
2027 let scalar = r#"
2029deploy:
2030 id_tokens:
2031 AWS_TOKEN:
2032 aud: https://sts.amazonaws.com
2033 script:
2034 - deploy.sh
2035"#;
2036 let graph = parse(scalar);
2037 let oidc: Vec<_> = graph
2038 .nodes_of_kind(NodeKind::Identity)
2039 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
2040 .collect();
2041 assert_eq!(
2042 oidc[0].metadata.get(META_OIDC_AUDIENCE).map(String::as_str),
2043 Some("https://sts.amazonaws.com")
2044 );
2045 assert!(
2046 !oidc[0].metadata.contains_key(META_OIDC_AUDIENCES),
2047 "scalar form must NOT set the plural META_OIDC_AUDIENCES marker"
2048 );
2049 }
2050
2051 #[test]
2056 fn is_credential_name_boundary_checks() {
2057 assert!(!is_credential_name("CERTAIN_FLAG"));
2059 assert!(!is_credential_name("TOKENIZER_VERSION"));
2060 assert!(!is_credential_name("UNCERTAIN"));
2061 assert!(!is_credential_name("CERTIFICATE_PATH"));
2062 assert!(!is_credential_name("TOKEN1"));
2063 assert!(!is_credential_name("CERTIFICATE"));
2064
2065 assert!(is_credential_name("API_TOKEN"));
2067 assert!(is_credential_name("MY_CERT"));
2068 assert!(is_credential_name("DB_PASSWORD"));
2069 assert!(is_credential_name("DEPLOY_TOKEN"));
2070 assert!(is_credential_name("SIGNING_KEY"));
2071 assert!(is_credential_name("AWS_SECRET_ACCESS_KEY"));
2072 assert!(is_credential_name("TOKEN"));
2073 assert!(is_credential_name("CERT"));
2074 assert!(is_credential_name("PRIVATE_KEY"));
2075 assert!(is_credential_name("CREDENTIAL"));
2076 }
2077
2078 #[test]
2082 fn needs_artifacts_false_excludes_dotenv_flow() {
2083 let yaml = r#"
2084build:
2085 artifacts:
2086 reports:
2087 dotenv: build.env
2088 script:
2089 - make build
2090deploy:
2091 needs:
2092 - job: build
2093 artifacts: false
2094 script:
2095 - kubectl apply
2096"#;
2097 let graph = parse(yaml);
2098 let deploy_step = graph
2099 .nodes_of_kind(NodeKind::Step)
2100 .find(|n| n.metadata.get(META_JOB_NAME).map(String::as_str) == Some("deploy"))
2101 .expect("deploy step present");
2102 let needs_csv = deploy_step
2103 .metadata
2104 .get(META_NEEDS)
2105 .map(String::as_str)
2106 .unwrap_or("");
2107 assert!(
2108 !needs_csv.split(',').any(|s| s == "build"),
2109 "build must be excluded from META_NEEDS when artifacts: false (got: {needs_csv:?})"
2110 );
2111
2112 let yaml_default = r#"
2115build:
2116 artifacts:
2117 reports:
2118 dotenv: build.env
2119 script:
2120 - make build
2121deploy:
2122 needs:
2123 - job: build
2124 script:
2125 - kubectl apply
2126"#;
2127 let graph = parse(yaml_default);
2128 let deploy_step = graph
2129 .nodes_of_kind(NodeKind::Step)
2130 .find(|n| n.metadata.get(META_JOB_NAME).map(String::as_str) == Some("deploy"))
2131 .expect("deploy step present");
2132 let needs_csv = deploy_step
2133 .metadata
2134 .get(META_NEEDS)
2135 .map(String::as_str)
2136 .unwrap_or("");
2137 assert!(
2138 needs_csv.split(',').any(|s| s == "build"),
2139 "default (artifacts implicitly true) must keep build in META_NEEDS (got: {needs_csv:?})"
2140 );
2141 }
2142
2143 #[test]
2147 fn gitlab_mapping_iteration_is_deterministic_across_runs() {
2148 let yaml = r#"
2150zeta-job:
2151 variables:
2152 ZZ_TOKEN: "$CI_TOKEN"
2153 AA_PASSWORD: "x"
2154 MM_SECRET: "y"
2155 script:
2156 - echo zeta
2157alpha-job:
2158 variables:
2159 QQ_TOKEN: "$CI_TOKEN"
2160 BB_API_KEY: "z"
2161 script:
2162 - echo alpha
2163mid-job:
2164 variables:
2165 NN_PRIVATE_KEY: "k"
2166 GG_SIGNING_KEY: "j"
2167 script:
2168 - echo mid
2169"#;
2170 let canonical: Vec<(NodeKind, String)> = parse(yaml)
2171 .nodes
2172 .iter()
2173 .map(|n| (n.kind, n.name.clone()))
2174 .collect();
2175 for run in 0..9 {
2176 let again: Vec<(NodeKind, String)> = parse(yaml)
2177 .nodes
2178 .iter()
2179 .map(|n| (n.kind, n.name.clone()))
2180 .collect();
2181 assert_eq!(
2182 again, canonical,
2183 "run {run}: NodeId order must be byte-identical across runs"
2184 );
2185 }
2186 }
2187
2188 #[test]
2189 fn mapping_jobs_without_recognisable_step_content_marks_partial() {
2190 let yaml = r#"
2197.template-only:
2198 script:
2199 - echo "this is a template-only file"
2200"#;
2201 let graph = parse(yaml);
2202 let step_count = graph
2203 .nodes
2204 .iter()
2205 .filter(|n| n.kind == NodeKind::Step)
2206 .count();
2207 assert_eq!(step_count, 0);
2208 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2210 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
2214 }
2215}