1use std::collections::{HashMap, HashSet};
2
3use serde::{Deserialize, Serialize};
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7#[allow(unused_imports)]
9use taudit_core::graph::{META_DOTENV_FILE, META_ENVIRONMENT_NAME, META_NEEDS, META_SCRIPT_BODY};
10use taudit_core::ports::PipelineParser;
11
12pub struct GitlabParser;
24
25const RESERVED: &[&str] = &[
27 "stages",
28 "workflow",
29 "include",
30 "variables",
31 "image",
32 "services",
33 "default",
34 "cache",
35 "before_script",
36 "after_script",
37 "types",
38];
39
40const CRED_FRAGMENTS: &[&str] = &[
42 "TOKEN",
43 "SECRET",
44 "PASSWORD",
45 "PASSWD",
46 "PRIVATE_KEY",
47 "API_KEY",
48 "APIKEY",
49 "SIGNING_KEY",
50 "ACCESS_KEY",
51 "SERVICE_ACCOUNT",
52 "CERT",
53 "CREDENTIAL",
54];
55
56impl PipelineParser for GitlabParser {
57 fn platform(&self) -> &str {
58 "gitlab-ci"
59 }
60
61 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
62 let (parse_content, duplicate_recovery_note) = match parse_gitlab_yaml_value(content) {
63 Ok((root, extra_docs, first_doc_was_spec_header)) => {
64 let mut graph = build_graph_from_root(root, source)?;
65 if extra_docs {
66 graph.mark_partial(
67 GapKind::Expression,
68 if first_doc_was_spec_header {
69 "file contains GitLab spec: header plus executable config document — analyzed the executable document and preserved spec: as an unresolved header".to_string()
70 } else {
71 "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string()
72 },
73 );
74 }
75 return Ok(graph);
76 }
77 Err(e) if is_duplicate_key_parse_error(&e) => {
78 let sanitized = sanitize_duplicate_mapping_keys(content);
79 let note = format!(
80 "GitLab YAML contained duplicate mapping keys; later duplicates were preserved as opaque __taudit_duplicate_* keys during recovery ({e})"
81 );
82 (sanitized, Some(note))
83 }
84 Err(e) => return Err(TauditError::Parse(format!("YAML parse error: {e}"))),
85 };
86
87 let (root, extra_docs, first_doc_was_spec_header) = parse_gitlab_yaml_value(&parse_content)
88 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
89 let mut graph = build_graph_from_root(root, source)?;
90 if extra_docs {
91 graph.mark_partial(
92 GapKind::Expression,
93 if first_doc_was_spec_header {
94 "file contains GitLab spec: header plus executable config document — analyzed the executable document and preserved spec: as an unresolved header".to_string()
95 } else {
96 "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string()
97 },
98 );
99 }
100 if let Some(note) = duplicate_recovery_note {
101 graph.mark_partial(GapKind::Structural, note);
102 }
103 Ok(graph)
104 }
105}
106
107fn parse_gitlab_yaml_value(content: &str) -> Result<(Value, bool, bool), serde_yaml::Error> {
108 let mut de = serde_yaml::Deserializer::from_str(content);
109 let Some(doc) = de.next() else {
110 return Ok((Value::Null, false, false));
111 };
112 let first = Value::deserialize(doc)?;
113 let Some(second_doc) = de.next() else {
114 return Ok((first, false, false));
115 };
116 if gitlab_doc_is_spec_header(&first) {
117 return Ok((Value::deserialize(second_doc)?, true, true));
118 }
119 Ok((first, true, false))
120}
121
122fn gitlab_doc_is_spec_header(doc: &Value) -> bool {
123 let Some(map) = doc.as_mapping() else {
124 return false;
125 };
126 map.contains_key("spec")
127}
128
129fn is_duplicate_key_parse_error(error: &serde_yaml::Error) -> bool {
130 error.to_string().contains("duplicate entry with key")
131}
132
133fn sanitize_duplicate_mapping_keys(content: &str) -> String {
134 #[derive(Default)]
135 struct Frame {
136 indent: usize,
137 keys: HashSet<String>,
138 }
139
140 let mut out = Vec::new();
141 let mut frames: Vec<Frame> = Vec::new();
142 let mut duplicate_counts: HashMap<(usize, String), usize> = HashMap::new();
143 let mut block_scalar_indent: Option<usize> = None;
144
145 for line in content.lines() {
146 let indent = line.chars().take_while(|c| *c == ' ').count();
147 let trimmed = &line[indent..];
148
149 if let Some(block_indent) = block_scalar_indent {
150 if !trimmed.is_empty() && indent <= block_indent {
151 block_scalar_indent = None;
152 } else {
153 out.push(line.to_string());
154 continue;
155 }
156 }
157
158 if trimmed.is_empty() || trimmed.starts_with('#') {
159 out.push(line.to_string());
160 continue;
161 }
162
163 let (key_indent, key_start, key_end, key) = match yaml_mapping_key_span(line, indent) {
164 Some(parts) => parts,
165 None => {
166 out.push(line.to_string());
167 continue;
168 }
169 };
170
171 while frames.last().is_some_and(|frame| frame.indent > key_indent) {
172 frames.pop();
173 }
174 if !frames.iter().any(|frame| frame.indent == key_indent) {
175 frames.push(Frame {
176 indent: key_indent,
177 keys: HashSet::new(),
178 });
179 }
180 let frame = frames
181 .iter_mut()
182 .rev()
183 .find(|frame| frame.indent == key_indent)
184 .expect("frame inserted above");
185
186 if frame.keys.insert(key.clone()) {
187 out.push(line.to_string());
188 } else {
189 let count = duplicate_counts
190 .entry((key_indent, key.clone()))
191 .and_modify(|n| *n += 1)
192 .or_insert(2);
193 let replacement = format!(
194 "__taudit_duplicate_{}_{}",
195 sanitize_key_fragment(&key),
196 count
197 );
198 let mut rewritten = String::with_capacity(line.len() + replacement.len());
199 rewritten.push_str(&line[..key_start]);
200 rewritten.push_str(&replacement);
201 rewritten.push_str(&line[key_end..]);
202 out.push(rewritten);
203 }
204
205 let value_tail = line[key_end..].trim_start();
206 if value_tail.starts_with(": |") || value_tail.starts_with(": >") {
207 block_scalar_indent = Some(key_indent);
208 }
209 }
210
211 let mut sanitized = out.join("\n");
212 if content.ends_with('\n') {
213 sanitized.push('\n');
214 }
215 sanitized
216}
217
218fn yaml_mapping_key_span(line: &str, indent: usize) -> Option<(usize, usize, usize, String)> {
219 let trimmed = &line[indent..];
220 if trimmed.starts_with('#') {
221 return None;
222 }
223
224 let mut key_indent = indent;
225 let mut key_start = indent;
226 let key_text = if let Some(rest) = trimmed.strip_prefix("- ") {
227 key_indent = indent + 2;
228 key_start = indent + 2;
229 rest
230 } else {
231 trimmed
232 };
233
234 let mut in_single = false;
235 let mut in_double = false;
236 let mut bracket_depth = 0i32;
237 let mut prev = '\0';
238 for (offset, ch) in key_text.char_indices() {
239 match ch {
240 '\'' if !in_double => in_single = !in_single,
241 '"' if !in_single && prev != '\\' => in_double = !in_double,
242 '[' | '{' if !in_single && !in_double => bracket_depth += 1,
243 ']' | '}' if !in_single && !in_double => bracket_depth -= 1,
244 ':' if !in_single && !in_double && bracket_depth == 0 => {
245 let after = key_text[offset + ch.len_utf8()..].chars().next();
246 if after.is_some_and(|c| !c.is_whitespace()) {
247 prev = ch;
248 continue;
249 }
250 let raw = &key_text[..offset];
251 let key = raw.trim();
252 if key.is_empty() {
253 return None;
254 }
255 let leading = raw.len() - raw.trim_start().len();
256 let trailing = raw.trim_end().len();
257 let start = key_start + leading;
258 let end = key_start + trailing;
259 return Some((key_indent, start, end, key.to_string()));
260 }
261 _ => {}
262 }
263 prev = ch;
264 }
265 None
266}
267
268fn sanitize_key_fragment(key: &str) -> String {
269 let mut out = String::new();
270 for c in key.chars() {
271 if c.is_ascii_alphanumeric() {
272 out.push(c.to_ascii_lowercase());
273 } else {
274 out.push('_');
275 }
276 }
277 while out.contains("__") {
278 out = out.replace("__", "_");
279 }
280 out.trim_matches('_').chars().take(48).collect::<String>()
281}
282
283fn build_graph_from_root(
284 root: Value,
285 source: &PipelineSource,
286) -> Result<AuthorityGraph, TauditError> {
287 let mapping = root
288 .as_mapping()
289 .ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
290
291 let mut graph = AuthorityGraph::new(source.clone());
292 graph.metadata.insert(META_PLATFORM.into(), "gitlab".into());
293
294 let mut meta = HashMap::new();
297 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
298 meta.insert(META_IMPLICIT.into(), "true".into());
299 let token_id = graph.add_node_with_metadata(
300 NodeKind::Identity,
301 "CI_JOB_TOKEN",
302 TrustZone::FirstParty,
303 meta,
304 );
305
306 if let Some(inc) = mapping.get("include") {
310 graph.mark_partial(
311 GapKind::Structural,
312 "include: directive present — included templates not resolved".to_string(),
313 );
314 let entries = extract_include_entries(inc);
315 if !entries.is_empty() {
316 if let Ok(json) = serde_json::to_string(&entries) {
317 graph.metadata.insert(META_GITLAB_INCLUDES.into(), json);
318 }
319 }
320 }
321
322 if let Some(default_map) = mapping.get("default").and_then(|v| v.as_mapping()) {
327 if default_contains_authority_relevant_keys(default_map) {
328 graph.mark_partial(
329 GapKind::Structural,
330 "default: contains inherited authority-relevant job settings — inheritance not fully resolved".to_string(),
331 );
332 }
333 }
334
335 let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
337
338 let global_image = mapping.get("image").and_then(extract_image_str);
340
341 if let Some(wf) = mapping.get("workflow") {
343 if has_mr_trigger_in_workflow(wf) {
344 graph
345 .metadata
346 .insert(META_TRIGGER.into(), "merge_request".into());
347 }
348 if workflow_rules_define_variables(wf) {
349 graph.mark_partial(
350 GapKind::Expression,
351 "workflow:rules:variables define conditional variables — rule expressions not evaluated".to_string(),
352 );
353 }
354 }
355
356 let mut top_level_entries: Vec<(&Value, &Value)> = mapping.iter().collect();
359 top_level_entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
360 for (key, value) in top_level_entries {
361 let job_name = match key.as_str() {
362 Some(k) => k,
363 None => continue,
364 };
365 if RESERVED.contains(&job_name) {
366 continue;
367 }
368
369 if job_name.starts_with('.') {
371 graph.mark_partial(
372 GapKind::Structural,
373 format!("job '{job_name}' is a hidden/template job — not resolved"),
374 );
375 continue;
376 }
377
378 let job_map = match value.as_mapping() {
379 Some(m) => m,
380 None => continue,
381 };
382
383 let extends_names = extract_extends_list(job_map.get("extends"));
385 if !extends_names.is_empty() {
386 graph.mark_partial(
387 GapKind::Structural,
388 format!("job '{job_name}' uses extends: — inherited configuration not resolved"),
389 );
390 }
391
392 if rules_define_variables(job_map.get("rules")) {
393 graph.mark_partial(
394 GapKind::Expression,
395 format!(
396 "job '{job_name}' uses rules:variables — conditional variable scope not resolved"
397 ),
398 );
399 }
400
401 if job_map.contains_key("inherit") {
404 graph.mark_partial(
405 GapKind::Structural,
406 format!("job '{job_name}' uses inherit: — inheritance scope not resolved"),
407 );
408 }
409
410 let job_triggers_mr = job_has_mr_trigger(job_map);
412
413 if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
415 graph
416 .metadata
417 .insert(META_TRIGGER.into(), "merge_request".into());
418 }
419
420 let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
422
423 let explicit_secrets =
425 process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
426
427 let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
429
430 let job_image_str = job_map
432 .get("image")
433 .and_then(extract_image_str)
434 .or(global_image.as_deref().map(String::from));
435
436 let image_id = job_image_str.as_deref().map(|img| {
437 let pinned = is_docker_digest_pinned(img);
438 let trust_zone = if pinned {
439 TrustZone::ThirdParty
440 } else {
441 TrustZone::Untrusted
442 };
443 let mut imeta = HashMap::new();
444 if let Some(digest) = img.split("@sha256:").nth(1) {
445 imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
446 }
447 graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
448 });
449
450 let service_ids = process_services(job_map.get("services"), &mut graph);
452
453 let env_name = job_map
455 .get("environment")
456 .and_then(extract_environment_name);
457 let env_url = job_map.get("environment").and_then(extract_environment_url);
458
459 let script_body = extract_script_body(job_map);
468
469 let dotenv_file = extract_dotenv_file(job_map);
475
476 let needs = extract_needs(job_map);
479
480 let protected_only = job_has_protected_branch_restriction(job_map);
486
487 let mut step_meta = HashMap::new();
489 step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
490 if let Some(ref env) = env_name {
491 step_meta.insert(META_ENVIRONMENT_NAME.into(), env.clone());
492 }
493 if !script_body.is_empty() {
494 step_meta.insert(META_SCRIPT_BODY.into(), script_body);
495 }
496 if let Some(ref f) = dotenv_file {
497 step_meta.insert(META_DOTENV_FILE.into(), f.clone());
498 }
499 if !needs.is_empty() {
500 step_meta.insert(META_NEEDS.into(), needs.join(","));
501 }
502 if let Some(ref url) = env_url {
503 step_meta.insert(META_ENVIRONMENT_URL.into(), url.clone());
504 }
505 if job_triggers_mr {
510 step_meta.insert(META_TRIGGER.into(), "merge_request".into());
511 }
512 if !extends_names.is_empty() {
514 step_meta.insert(META_GITLAB_EXTENDS.into(), extends_names.join(","));
515 }
516 if let Some(af) = job_map.get("allow_failure").and_then(|v| v.as_bool()) {
519 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), af.to_string());
520 } else if job_map
521 .get("allow_failure")
522 .and_then(|v| v.as_mapping())
523 .is_some()
524 {
525 step_meta.insert(META_GITLAB_ALLOW_FAILURE.into(), "true".into());
528 }
529 if job_services_have_dind(job_map.get("services")) {
531 step_meta.insert(META_GITLAB_DIND_SERVICE.into(), "true".into());
532 }
533 if let Some(kind) = classify_trigger(job_map.get("trigger")) {
535 step_meta.insert(META_GITLAB_TRIGGER_KIND.into(), kind.into());
536 }
537 if let Some((cache_key, cache_policy)) = extract_cache_key_policy(job_map.get("cache")) {
539 step_meta.insert(META_GITLAB_CACHE_KEY.into(), cache_key);
540 if let Some(p) = cache_policy {
541 step_meta.insert(META_GITLAB_CACHE_POLICY.into(), p);
542 }
543 }
544 if protected_only {
545 step_meta.insert(META_RULES_PROTECTED_ONLY.into(), "true".into());
546 }
547 let step_id = graph.add_node_with_metadata(
548 NodeKind::Step,
549 job_name,
550 TrustZone::FirstParty,
551 step_meta,
552 );
553
554 graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
556
557 for &sid in global_secrets
559 .iter()
560 .chain(&job_secrets)
561 .chain(&explicit_secrets)
562 {
563 graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
564 }
565
566 for &iid in &oidc_identities {
568 graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
569 }
570
571 if let Some(img_id) = image_id {
573 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
574 }
575 for &svc_id in &service_ids {
576 graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
577 }
578 }
579
580 let step_count = graph
588 .nodes
589 .iter()
590 .filter(|n| n.kind == NodeKind::Step)
591 .count();
592 let had_job_carrier = mapping.iter().any(|(k, v)| {
593 k.as_str()
594 .map(|name| !RESERVED.contains(&name) && !name.starts_with('.'))
595 .unwrap_or(false)
596 && v.as_mapping().is_some()
597 });
598 if step_count == 0 && had_job_carrier {
599 graph.mark_partial(
600 GapKind::Opaque,
601 "non-reserved top-level keys parsed but produced 0 step nodes — possible non-GitLab YAML wrong-platform-classified".to_string(),
602 );
603 }
604
605 graph.stamp_edge_authority_summaries();
606 Ok(graph)
607}
608fn extract_image_str(v: &Value) -> Option<String> {
610 match v {
611 Value::String(s) => Some(s.clone()),
612 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
613 _ => None,
614 }
615}
616
617fn extract_environment_name(v: &Value) -> Option<String> {
619 match v {
620 Value::String(s) => Some(s.clone()),
621 Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
622 _ => None,
623 }
624}
625
626fn extract_environment_url(v: &Value) -> Option<String> {
628 match v {
629 Value::Mapping(m) => m.get("url").and_then(|u| u.as_str()).map(String::from),
630 _ => None,
631 }
632}
633
634fn extract_script_body(job_map: &serde_yaml::Mapping) -> String {
638 let mut lines: Vec<String> = Vec::new();
639 for key in &["before_script", "script", "after_script"] {
640 if let Some(v) = job_map.get(*key) {
641 collect_script_lines(v, &mut lines);
642 }
643 }
644 lines.join("\n")
645}
646
647fn collect_script_lines(v: &Value, out: &mut Vec<String>) {
649 match v {
650 Value::String(s) => out.push(s.clone()),
651 Value::Sequence(seq) => {
652 for item in seq {
653 if let Some(s) = item.as_str() {
654 out.push(s.to_string());
655 }
656 }
657 }
658 _ => {}
659 }
660}
661
662fn extract_dotenv_file(job_map: &serde_yaml::Mapping) -> Option<String> {
665 let dotenv = job_map
666 .get("artifacts")?
667 .as_mapping()?
668 .get("reports")?
669 .as_mapping()?
670 .get("dotenv")?;
671 match dotenv {
672 Value::String(s) => Some(s.clone()),
673 Value::Sequence(seq) => {
674 let parts: Vec<String> = seq
675 .iter()
676 .filter_map(|v| v.as_str().map(String::from))
677 .collect();
678 if parts.is_empty() {
679 None
680 } else {
681 Some(parts.join(","))
682 }
683 }
684 _ => None,
685 }
686}
687
688fn extract_needs(job_map: &serde_yaml::Mapping) -> Vec<String> {
699 let mut out: Vec<String> = Vec::new();
700 if let Some(needs) = job_map.get("needs").and_then(|v| v.as_sequence()) {
701 for item in needs {
702 match item {
703 Value::String(s) => out.push(s.clone()),
704 Value::Mapping(m) => {
705 let Some(j) = m.get("job").and_then(|j| j.as_str()) else {
706 continue;
707 };
708 let artifacts_disabled =
712 m.get("artifacts").and_then(|v| v.as_bool()) == Some(false);
713 if artifacts_disabled {
714 continue;
715 }
716 out.push(j.to_string());
717 }
718 _ => {}
719 }
720 }
721 }
722 if let Some(deps) = job_map.get("dependencies").and_then(|v| v.as_sequence()) {
723 for item in deps {
724 if let Some(s) = item.as_str() {
725 out.push(s.to_string());
726 }
727 }
728 }
729 out.sort();
730 out.dedup();
731 out
732}
733
734fn check_truthy_comparison(expr: &str, var: &str) -> Option<bool> {
752 let trimmed = expr.trim();
757 if trimmed.is_empty() {
758 return None;
759 }
760
761 if let Some((lhs, rhs)) = split_top_level(trimmed, "||") {
763 let l = check_truthy_comparison(&lhs, var);
764 let r = check_truthy_comparison(&rhs, var);
765 return match (l, r) {
766 (Some(true), _) | (_, Some(true)) => Some(true),
767 (Some(false), Some(false)) => Some(false),
768 _ => None,
769 };
770 }
771 if let Some((lhs, rhs)) = split_top_level(trimmed, "&&") {
775 let l = check_truthy_comparison(&lhs, var);
776 let r = check_truthy_comparison(&rhs, var);
777 return match (l, r) {
778 (Some(false), _) | (_, Some(false)) => Some(false),
779 (Some(true), _) | (_, Some(true)) => Some(true),
780 _ => None,
781 };
782 }
783
784 classify_atom(trimmed, var)
786}
787
788fn split_top_level(expr: &str, op: &str) -> Option<(String, String)> {
792 let bytes = expr.as_bytes();
793 let op_bytes = op.as_bytes();
794 let mut depth: i32 = 0;
795 let mut in_str: Option<u8> = None;
796 let mut in_regex = false;
797 let mut i = 0;
798 while i < bytes.len() {
799 let b = bytes[i];
800 if let Some(q) = in_str {
802 if b == b'\\' && i + 1 < bytes.len() {
803 i += 2;
804 continue;
805 }
806 if b == q {
807 in_str = None;
808 }
809 i += 1;
810 continue;
811 }
812 if in_regex {
813 if b == b'\\' && i + 1 < bytes.len() {
814 i += 2;
815 continue;
816 }
817 if b == b'/' {
818 in_regex = false;
819 }
820 i += 1;
821 continue;
822 }
823 match b {
824 b'"' | b'\'' => {
825 in_str = Some(b);
826 i += 1;
827 continue;
828 }
829 b'/' => {
830 let mut j = i;
833 while j > 0 && bytes[j - 1].is_ascii_whitespace() {
834 j -= 1;
835 }
836 if j > 0 && bytes[j - 1] == b'~' {
837 in_regex = true;
838 i += 1;
839 continue;
840 }
841 }
842 b'(' => depth += 1,
843 b')' => depth -= 1,
844 _ => {}
845 }
846 if depth == 0
847 && i + op_bytes.len() <= bytes.len()
848 && &bytes[i..i + op_bytes.len()] == op_bytes
849 {
850 let lhs = expr[..i].to_string();
851 let rhs = expr[i + op_bytes.len()..].to_string();
852 return Some((lhs, rhs));
853 }
854 i += 1;
855 }
856 None
857}
858
859fn classify_atom(atom: &str, var: &str) -> Option<bool> {
861 let s = atom.trim().trim_matches('(').trim_matches(')').trim();
862 if s == var {
865 return Some(true);
866 }
867 let (op, lhs, rhs) = if let Some((l, r)) = s.split_once("==") {
870 ("==", l.trim(), r.trim())
871 } else if let Some((l, r)) = s.split_once("!=") {
872 ("!=", l.trim(), r.trim())
873 } else {
874 return None;
875 };
876 let (lit, side_is_var) = if lhs == var {
879 (rhs, true)
880 } else if rhs == var {
881 (lhs, true)
882 } else {
883 let lhs_unq = lhs.trim_matches('"').trim_matches('\'');
887 let rhs_unq = rhs.trim_matches('"').trim_matches('\'');
888 if lhs_unq == var {
889 (rhs, true)
890 } else if rhs_unq == var {
891 (lhs, true)
892 } else {
893 return None;
894 }
895 };
896 let _ = side_is_var; let lit_norm = lit
899 .trim_matches('"')
900 .trim_matches('\'')
901 .to_ascii_lowercase();
902 let truthy_lit = matches!(lit_norm.as_str(), "true" | "1");
903 let falsy_lit = matches!(lit_norm.as_str(), "false" | "null" | "" | "0");
904 match (op, truthy_lit, falsy_lit) {
905 ("==", true, _) => Some(true),
906 ("==", _, true) => Some(false),
907 ("!=", true, _) => Some(false),
908 ("!=", _, true) => Some(true),
909 _ => None,
913 }
914}
915
916fn is_credential_name(name: &str) -> bool {
927 let upper = name.to_uppercase();
928 let bytes = upper.as_bytes();
929 CRED_FRAGMENTS.iter().any(|frag| {
930 let frag_bytes = frag.as_bytes();
931 let n = frag_bytes.len();
932 if bytes.len() < n {
933 return false;
934 }
935 for i in 0..=bytes.len() - n {
937 if &bytes[i..i + n] != frag_bytes {
938 continue;
939 }
940 let left_ok = i == 0 || bytes[i - 1] == b'_';
941 let right_ok = i + n == bytes.len() || bytes[i + n] == b'_';
942 if left_ok && right_ok {
943 return true;
944 }
945 }
946 false
947 })
948}
949
950fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
953 let mut ids = Vec::new();
954 let map = match vars.and_then(|v| v.as_mapping()) {
955 Some(m) => m,
956 None => return ids,
957 };
958 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
960 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
961 for (k, _v) in entries {
962 let name = match k.as_str() {
963 Some(s) => s,
964 None => continue,
965 };
966 if is_credential_name(name) {
967 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
968 ids.push(id);
969 let _ = scope; }
971 }
972 ids
973}
974
975fn process_explicit_secrets(
987 secrets: Option<&Value>,
988 _scope: &str,
989 graph: &mut AuthorityGraph,
990) -> Vec<NodeId> {
991 let mut ids = Vec::new();
992 let map = match secrets.and_then(|v| v.as_mapping()) {
993 Some(m) => m,
994 None => return ids,
995 };
996 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
998 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
999 for (k, _v) in entries {
1000 let name = match k.as_str() {
1001 Some(s) => s,
1002 None => continue,
1003 };
1004 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1005 ids.push(id);
1006 }
1007 ids
1008}
1009
1010fn process_id_tokens(
1021 id_tokens: Option<&Value>,
1022 _scope: &str,
1023 graph: &mut AuthorityGraph,
1024) -> Vec<NodeId> {
1025 let mut ids = Vec::new();
1026 let map = match id_tokens.and_then(|v| v.as_mapping()) {
1027 Some(m) => m,
1028 None => return ids,
1029 };
1030 let mut entries: Vec<(&Value, &Value)> = map.iter().collect();
1032 entries.sort_by(|a, b| a.0.as_str().unwrap_or("").cmp(b.0.as_str().unwrap_or("")));
1033 for (k, v) in entries {
1034 let token_name = match k.as_str() {
1035 Some(s) => s,
1036 None => continue,
1037 };
1038 let aud_value = v.as_mapping().and_then(|m| m.get("aud"));
1043 let (aud_joined, is_list) = match aud_value {
1044 Some(Value::String(s)) => (s.clone(), false),
1045 Some(Value::Sequence(seq)) => {
1046 let parts: Vec<String> = seq
1047 .iter()
1048 .filter_map(|item| match item {
1049 Value::String(s) => Some(s.clone()),
1050 _ => None,
1051 })
1052 .collect();
1053 if parts.is_empty() {
1054 ("unknown".into(), false)
1055 } else {
1056 (parts.join(","), true)
1057 }
1058 }
1059 _ => ("unknown".into(), false),
1060 };
1061 let label = format!("{token_name} (aud={aud_joined})");
1062 let mut meta = HashMap::new();
1063 meta.insert(META_OIDC.into(), "true".into());
1064 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1065 meta.insert(META_OIDC_AUDIENCE.into(), aud_joined.clone());
1069 if is_list {
1073 meta.insert(META_OIDC_AUDIENCES.into(), aud_joined.clone());
1074 }
1075 let id =
1076 graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
1077 ids.push(id);
1078 }
1079 ids
1080}
1081
1082fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
1084 let mut ids = Vec::new();
1085 let list = match services.and_then(|v| v.as_sequence()) {
1086 Some(s) => s,
1087 None => return ids,
1088 };
1089 for item in list {
1090 let img_str = match extract_image_str(item) {
1091 Some(s) => s,
1092 None => continue,
1093 };
1094 let pinned = is_docker_digest_pinned(&img_str);
1095 let trust_zone = if pinned {
1096 TrustZone::ThirdParty
1097 } else {
1098 TrustZone::Untrusted
1099 };
1100 let mut meta = HashMap::new();
1101 if let Some(digest) = img_str.split("@sha256:").nth(1) {
1102 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
1103 }
1104 let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
1105 ids.push(id);
1106 }
1107 ids
1108}
1109
1110fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
1112 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
1114 for rule in rules {
1115 if let Some(if_expr) = rule
1116 .as_mapping()
1117 .and_then(|m| m.get("if"))
1118 .and_then(|v| v.as_str())
1119 {
1120 if matches_mr_event(if_expr) {
1125 return true;
1126 }
1127 }
1128 }
1129 }
1130 if let Some(only) = job_map.get("only") {
1132 if only_has_merge_requests(only) {
1133 return true;
1134 }
1135 }
1136 false
1137}
1138
1139fn only_has_merge_requests(v: &Value) -> bool {
1141 match v {
1142 Value::Sequence(seq) => seq
1143 .iter()
1144 .any(|item| item.as_str() == Some("merge_requests")),
1145 Value::Mapping(m) => {
1146 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
1147 return refs
1148 .iter()
1149 .any(|item| item.as_str() == Some("merge_requests"));
1150 }
1151 false
1152 }
1153 _ => false,
1154 }
1155}
1156
1157fn job_has_protected_branch_restriction(job_map: &serde_yaml::Mapping) -> bool {
1174 if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
1175 for rule in rules {
1176 let Some(if_expr) = rule
1177 .as_mapping()
1178 .and_then(|m| m.get("if"))
1179 .and_then(|v| v.as_str())
1180 else {
1181 continue;
1182 };
1183 if matches!(
1187 check_truthy_comparison(if_expr, "$CI_COMMIT_REF_PROTECTED"),
1188 Some(true)
1189 ) {
1190 return true;
1191 }
1192 if if_expr.contains("$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH")
1193 || if_expr.contains("$CI_DEFAULT_BRANCH == $CI_COMMIT_BRANCH")
1194 {
1195 return true;
1196 }
1197 if matches!(
1202 check_truthy_comparison(if_expr, "$CI_COMMIT_TAG"),
1203 Some(true)
1204 ) {
1205 return true;
1206 }
1207 }
1208 }
1209 if let Some(only) = job_map.get("only") {
1210 if only_lists_protected_ref(only) {
1211 return true;
1212 }
1213 }
1214 false
1215}
1216
1217fn only_lists_protected_ref(v: &Value) -> bool {
1221 fn is_protected_ref(s: &str) -> bool {
1222 matches!(s, "main" | "master" | "tags") || s.starts_with("/^release")
1223 }
1224 match v {
1225 Value::String(s) => is_protected_ref(s.as_str()),
1226 Value::Sequence(seq) => seq
1227 .iter()
1228 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false)),
1229 Value::Mapping(m) => {
1230 if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
1231 return refs
1232 .iter()
1233 .any(|item| item.as_str().map(is_protected_ref).unwrap_or(false));
1234 }
1235 false
1236 }
1237 _ => false,
1238 }
1239}
1240
1241fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
1243 let rules = match wf
1244 .as_mapping()
1245 .and_then(|m| m.get("rules"))
1246 .and_then(|r| r.as_sequence())
1247 {
1248 Some(r) => r,
1249 None => return false,
1250 };
1251 for rule in rules {
1252 if let Some(if_expr) = rule
1253 .as_mapping()
1254 .and_then(|m| m.get("if"))
1255 .and_then(|v| v.as_str())
1256 {
1257 if matches_mr_event(if_expr) {
1260 return true;
1261 }
1262 }
1263 }
1264 false
1265}
1266
1267fn workflow_rules_define_variables(wf: &Value) -> bool {
1268 wf.as_mapping()
1269 .and_then(|m| m.get("rules"))
1270 .is_some_and(|rules| rules_define_variables(Some(rules)))
1271}
1272
1273fn rules_define_variables(rules: Option<&Value>) -> bool {
1274 let Some(rules) = rules.and_then(|v| v.as_sequence()) else {
1275 return false;
1276 };
1277 rules
1278 .iter()
1279 .filter_map(|rule| rule.as_mapping())
1280 .any(|rule| rule.contains_key("variables"))
1281}
1282
1283fn matches_mr_event(if_expr: &str) -> bool {
1289 fn atom_is_mr_event(atom: &str) -> bool {
1294 let s = atom.trim().trim_matches('(').trim_matches(')').trim();
1295 let (lhs, rhs) = match s.split_once("==") {
1296 Some(parts) => parts,
1297 None => return false,
1298 };
1299 let lhs = lhs.trim();
1300 let rhs_norm = rhs.trim().trim_matches('"').trim_matches('\'');
1301 let lhs_unq = lhs.trim_matches('"').trim_matches('\'');
1303 let rhs_raw = rhs.trim().trim_matches('"').trim_matches('\'');
1304 if (lhs_unq == "$CI_PIPELINE_SOURCE" && rhs_norm == "merge_request_event")
1305 || (rhs_raw == "$CI_PIPELINE_SOURCE" && lhs_unq == "merge_request_event")
1306 {
1307 return true;
1308 }
1309 false
1310 }
1311 let trimmed = if_expr.trim();
1312 if let Some((lhs, rhs)) = split_top_level(trimmed, "||") {
1314 return atom_is_mr_event(&lhs) || matches_mr_event(&rhs);
1315 }
1316 if let Some((lhs, rhs)) = split_top_level(trimmed, "&&") {
1321 return atom_is_mr_event(&lhs) || matches_mr_event(&rhs);
1322 }
1323 atom_is_mr_event(trimmed)
1324}
1325
1326#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1333pub struct IncludeEntry {
1334 pub kind: String,
1337 pub target: String,
1339 pub git_ref: String,
1342}
1343
1344pub fn extract_include_entries(v: &Value) -> Vec<IncludeEntry> {
1350 let mut out = Vec::new();
1351 match v {
1352 Value::String(s) => {
1354 out.push(IncludeEntry {
1355 kind: classify_string_include(s).into(),
1356 target: s.clone(),
1357 git_ref: String::new(),
1358 });
1359 }
1360 Value::Sequence(seq) => {
1361 for item in seq {
1362 match item {
1363 Value::String(s) => {
1364 out.push(IncludeEntry {
1365 kind: classify_string_include(s).into(),
1366 target: s.clone(),
1367 git_ref: String::new(),
1368 });
1369 }
1370 Value::Mapping(m) => {
1371 if let Some(e) = include_entry_from_mapping(m) {
1372 out.push(e);
1373 }
1374 }
1375 _ => {}
1376 }
1377 }
1378 }
1379 Value::Mapping(m) => {
1380 if let Some(e) = include_entry_from_mapping(m) {
1381 out.push(e);
1382 }
1383 }
1384 _ => {}
1385 }
1386 out
1387}
1388
1389fn classify_string_include(s: &str) -> &'static str {
1392 let lower = s.to_ascii_lowercase();
1393 if lower.starts_with("http://") || lower.starts_with("https://") {
1394 "remote"
1395 } else {
1396 "local"
1397 }
1398}
1399
1400fn include_entry_from_mapping(m: &serde_yaml::Mapping) -> Option<IncludeEntry> {
1404 let str_at = |key: &str| {
1405 m.get(key)
1406 .and_then(|v| v.as_str())
1407 .map(str::to_string)
1408 .unwrap_or_default()
1409 };
1410 if let Some(s) = m.get("local").and_then(|v| v.as_str()) {
1411 return Some(IncludeEntry {
1412 kind: "local".into(),
1413 target: s.to_string(),
1414 git_ref: String::new(),
1415 });
1416 }
1417 if let Some(s) = m.get("remote").and_then(|v| v.as_str()) {
1418 return Some(IncludeEntry {
1419 kind: "remote".into(),
1420 target: s.to_string(),
1421 git_ref: String::new(),
1422 });
1423 }
1424 if let Some(s) = m.get("template").and_then(|v| v.as_str()) {
1425 return Some(IncludeEntry {
1426 kind: "template".into(),
1427 target: s.to_string(),
1428 git_ref: String::new(),
1429 });
1430 }
1431 if let Some(s) = m.get("component").and_then(|v| v.as_str()) {
1432 let (target, git_ref) = match s.rsplit_once('@') {
1434 Some((path, ver)) => (path.to_string(), ver.to_string()),
1435 None => (s.to_string(), String::new()),
1436 };
1437 return Some(IncludeEntry {
1438 kind: "component".into(),
1439 target,
1440 git_ref,
1441 });
1442 }
1443 if m.contains_key("project") {
1444 let project = str_at("project");
1445 let git_ref = str_at("ref");
1448 return Some(IncludeEntry {
1449 kind: "project".into(),
1450 target: project,
1451 git_ref,
1452 });
1453 }
1454 None
1455}
1456
1457fn extract_extends_list(v: Option<&Value>) -> Vec<String> {
1460 let v = match v {
1461 Some(v) => v,
1462 None => return Vec::new(),
1463 };
1464 match v {
1465 Value::String(s) => vec![s.clone()],
1466 Value::Sequence(seq) => seq
1467 .iter()
1468 .filter_map(|i| i.as_str().map(str::to_string))
1469 .collect(),
1470 _ => Vec::new(),
1471 }
1472}
1473
1474fn default_contains_authority_relevant_keys(m: &serde_yaml::Mapping) -> bool {
1477 [
1478 "image",
1479 "services",
1480 "variables",
1481 "secrets",
1482 "id_tokens",
1483 "before_script",
1484 "after_script",
1485 "cache",
1486 "artifacts",
1487 ]
1488 .iter()
1489 .any(|k| m.contains_key(*k))
1490}
1491
1492fn job_services_have_dind(services: Option<&Value>) -> bool {
1496 let list = match services.and_then(|v| v.as_sequence()) {
1497 Some(s) => s,
1498 None => return false,
1499 };
1500 for item in list {
1501 let img = match extract_image_str(item) {
1502 Some(s) => s,
1503 None => continue,
1504 };
1505 if image_is_dind(&img) {
1506 return true;
1507 }
1508 }
1509 false
1510}
1511
1512fn image_is_dind(image: &str) -> bool {
1516 let lower = image.to_ascii_lowercase();
1517 let bare = match lower.split_once('@') {
1520 Some((b, _)) => b,
1521 None => &lower,
1522 };
1523 if !bare.starts_with("docker:") && !bare.starts_with("docker/") {
1524 return false;
1525 }
1526 bare.contains("dind")
1527}
1528
1529fn classify_trigger(trigger: Option<&Value>) -> Option<&'static str> {
1534 let t = trigger?;
1535 if t.is_string() {
1537 return Some("static");
1538 }
1539 let m = t.as_mapping()?;
1540 if let Some(inc) = m.get("include") {
1543 if include_has_artifact_source(inc) {
1544 return Some("dynamic");
1545 }
1546 }
1547 Some("static")
1548}
1549
1550fn include_has_artifact_source(v: &Value) -> bool {
1553 match v {
1554 Value::Mapping(m) => m.contains_key("artifact"),
1555 Value::Sequence(seq) => seq.iter().any(|i| {
1556 i.as_mapping()
1557 .map(|m| m.contains_key("artifact"))
1558 .unwrap_or(false)
1559 }),
1560 _ => false,
1561 }
1562}
1563
1564fn extract_cache_key_policy(v: Option<&Value>) -> Option<(String, Option<String>)> {
1575 let v = v?;
1576 let m = match v {
1577 Value::Mapping(m) => m,
1578 Value::Sequence(seq) => {
1579 return seq
1581 .iter()
1582 .find_map(|i| i.as_mapping().and_then(extract_cache_key_policy_map));
1583 }
1584 _ => return None,
1585 };
1586 extract_cache_key_policy_map(m)
1587}
1588
1589fn extract_cache_key_policy_map(m: &serde_yaml::Mapping) -> Option<(String, Option<String>)> {
1590 let key = match m.get("key") {
1591 Some(Value::String(s)) => s.clone(),
1592 Some(Value::Number(n)) => n.to_string(),
1593 Some(Value::Bool(b)) => b.to_string(),
1594 Some(Value::Mapping(km)) => {
1595 let mut parts = Vec::new();
1596 if let Some(prefix) = km.get("prefix").and_then(|v| v.as_str()) {
1597 parts.push(format!("prefix:{prefix}"));
1598 }
1599 if let Some(files) = km.get("files").and_then(|v| v.as_sequence()) {
1600 let names: Vec<String> = files
1601 .iter()
1602 .filter_map(|f| f.as_str().map(str::to_string))
1603 .collect();
1604 if !names.is_empty() {
1605 parts.push(format!("files:{}", names.join(",")));
1606 }
1607 }
1608 if parts.is_empty() {
1609 String::new()
1610 } else {
1611 parts.join(";")
1612 }
1613 }
1614 _ => String::new(),
1615 };
1616 let policy = m.get("policy").and_then(|v| v.as_str()).map(str::to_string);
1617 Some((key, policy))
1618}
1619
1620#[cfg(test)]
1621mod tests {
1622 use super::*;
1623
1624 fn parse(yaml: &str) -> AuthorityGraph {
1625 let parser = GitlabParser;
1626 let source = PipelineSource {
1627 file: ".gitlab-ci.yml".into(),
1628 repo: None,
1629 git_ref: None,
1630 commit_sha: None,
1631 };
1632 parser.parse(yaml, &source).unwrap()
1633 }
1634
1635 #[test]
1636 fn ci_job_token_always_present() {
1637 let yaml = r#"
1638stages:
1639 - build
1640
1641build-job:
1642 stage: build
1643 script:
1644 - make build
1645"#;
1646 let graph = parse(yaml);
1647 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1648 assert_eq!(identities.len(), 1);
1649 assert_eq!(identities[0].name, "CI_JOB_TOKEN");
1650 assert_eq!(
1651 identities[0]
1652 .metadata
1653 .get(META_IMPLICIT)
1654 .map(String::as_str),
1655 Some("true")
1656 );
1657 assert_eq!(
1658 identities[0]
1659 .metadata
1660 .get(META_IDENTITY_SCOPE)
1661 .map(String::as_str),
1662 Some("broad")
1663 );
1664 }
1665
1666 #[test]
1667 fn global_credential_variable_emits_secret_node() {
1668 let yaml = r#"
1669variables:
1670 APP_VERSION: "1.0"
1671 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
1672
1673build-job:
1674 script:
1675 - make
1676"#;
1677 let graph = parse(yaml);
1678 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1679 assert!(
1680 secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
1681 "DEPLOY_TOKEN must emit a Secret node, got: {:?}",
1682 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1683 );
1684 assert!(
1686 !secrets.iter().any(|s| s.name == "APP_VERSION"),
1687 "APP_VERSION must not emit a Secret node"
1688 );
1689 }
1690
1691 #[test]
1692 fn floating_image_emits_untrusted_image_node() {
1693 let yaml = r#"
1694deploy:
1695 image: alpine:latest
1696 script:
1697 - deploy.sh
1698"#;
1699 let graph = parse(yaml);
1700 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1701 assert_eq!(images.len(), 1);
1702 assert_eq!(images[0].name, "alpine:latest");
1703 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1704 }
1705
1706 #[test]
1707 fn digest_pinned_image_is_third_party() {
1708 let yaml = r#"
1709deploy:
1710 image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1711 script:
1712 - deploy.sh
1713"#;
1714 let graph = parse(yaml);
1715 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1716 assert_eq!(images.len(), 1);
1717 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1718 }
1719
1720 #[test]
1721 fn id_tokens_emit_oidc_identity_nodes() {
1722 let yaml = r#"
1723deploy:
1724 id_tokens:
1725 SIGSTORE_ID_TOKEN:
1726 aud: sigstore
1727 AWS_OIDC_TOKEN:
1728 aud: https://sts.amazonaws.com
1729 script:
1730 - deploy.sh
1731"#;
1732 let graph = parse(yaml);
1733 let oidc: Vec<_> = graph
1734 .nodes_of_kind(NodeKind::Identity)
1735 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
1736 .collect();
1737 assert_eq!(
1738 oidc.len(),
1739 2,
1740 "expected 2 OIDC identity nodes, got: {:?}",
1741 oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
1742 );
1743 }
1744
1745 #[test]
1746 fn explicit_secrets_emit_secret_nodes() {
1747 let yaml = r#"
1748deploy:
1749 secrets:
1750 DATABASE_PASSWORD:
1751 vault: production/db/password@secret
1752 AWS_KEY:
1753 aws_secrets_manager:
1754 name: my-secret
1755 script:
1756 - deploy.sh
1757"#;
1758 let graph = parse(yaml);
1759 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1760 let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
1761 assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
1762 assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
1763 }
1764
1765 #[test]
1766 fn rules_mr_trigger_sets_meta_trigger() {
1767 let yaml = r#"
1768test:
1769 rules:
1770 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
1771 script:
1772 - run tests
1773"#;
1774 let graph = parse(yaml);
1775 assert_eq!(
1776 graph.metadata.get(META_TRIGGER).map(String::as_str),
1777 Some("merge_request"),
1778 "META_TRIGGER must be set to merge_request"
1779 );
1780 }
1781
1782 #[test]
1783 fn only_merge_requests_sets_meta_trigger() {
1784 let yaml = r#"
1785test:
1786 only:
1787 - merge_requests
1788 script:
1789 - run tests
1790"#;
1791 let graph = parse(yaml);
1792 assert_eq!(
1793 graph.metadata.get(META_TRIGGER).map(String::as_str),
1794 Some("merge_request")
1795 );
1796 }
1797
1798 #[test]
1799 fn include_marks_graph_partial() {
1800 let yaml = r#"
1801include:
1802 - local: '/templates/.base.yml'
1803
1804build:
1805 script:
1806 - make
1807"#;
1808 let graph = parse(yaml);
1809 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1810 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
1811 }
1812
1813 #[test]
1814 fn default_with_authority_relevant_keys_marks_partial() {
1815 let yaml = r#"
1816default:
1817 image: alpine:latest
1818 before_script:
1819 - echo from default
1820
1821build:
1822 script:
1823 - make
1824"#;
1825 let graph = parse(yaml);
1826 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1827 assert!(
1828 graph
1829 .completeness_gaps
1830 .iter()
1831 .any(|r| r.contains("default:") && r.contains("inherit")),
1832 "expected default-inheritance partial reason, got: {:?}",
1833 graph.completeness_gaps
1834 );
1835 }
1836
1837 #[test]
1838 fn inherit_key_marks_partial() {
1839 let yaml = r#"
1840variables:
1841 DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
1842
1843deploy:
1844 inherit:
1845 variables: false
1846 script:
1847 - deploy.sh
1848"#;
1849 let graph = parse(yaml);
1850 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1851 assert!(
1852 graph
1853 .completeness_gaps
1854 .iter()
1855 .any(|r| r.contains("job 'deploy' uses inherit:")),
1856 "expected inherit partial reason, got: {:?}",
1857 graph.completeness_gaps
1858 );
1859 }
1860
1861 #[test]
1862 fn extends_marks_graph_partial() {
1863 let yaml = r#"
1864.base:
1865 script:
1866 - echo base
1867
1868my-job:
1869 extends: .base
1870 stage: build
1871"#;
1872 let graph = parse(yaml);
1873 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1874 assert!(
1877 graph
1878 .completeness_gap_kinds
1879 .iter()
1880 .all(|k| *k == GapKind::Structural),
1881 "expected all gaps Structural, got: {:?}",
1882 graph.completeness_gap_kinds
1883 );
1884 }
1885
1886 #[test]
1887 fn meta_job_name_set_on_step_nodes() {
1888 let yaml = r#"
1889build:
1890 script:
1891 - make
1892deploy:
1893 script:
1894 - deploy.sh
1895"#;
1896 let graph = parse(yaml);
1897 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1898 assert_eq!(steps.len(), 2);
1899 for step in &steps {
1900 assert!(
1901 step.metadata.contains_key(META_JOB_NAME),
1902 "Step '{}' missing META_JOB_NAME",
1903 step.name
1904 );
1905 }
1906 let names: Vec<_> = steps
1908 .iter()
1909 .map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
1910 .collect();
1911 assert!(names.contains(&"build"), "got: {names:?}");
1912 assert!(names.contains(&"deploy"), "got: {names:?}");
1913 }
1914
1915 #[test]
1916 fn reserved_keywords_not_parsed_as_jobs() {
1917 let yaml = r#"
1918stages:
1919 - build
1920 - test
1921
1922variables:
1923 MY_VAR: value
1924
1925image: alpine:latest
1926
1927build:
1928 stage: build
1929 script:
1930 - make
1931"#;
1932 let graph = parse(yaml);
1933 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1934 assert_eq!(
1935 steps.len(),
1936 1,
1937 "only 'build' should be a Step, got: {:?}",
1938 steps.iter().map(|s| &s.name).collect::<Vec<_>>()
1939 );
1940 assert_eq!(steps[0].name, "build");
1941 }
1942
1943 #[test]
1944 fn services_emit_image_nodes() {
1945 let yaml = r#"
1946test:
1947 services:
1948 - docker:dind
1949 - name: postgres:14
1950 script:
1951 - run_tests
1952"#;
1953 let graph = parse(yaml);
1954 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1955 assert_eq!(
1956 images.len(),
1957 2,
1958 "expected 2 service Image nodes, got: {:?}",
1959 images.iter().map(|i| &i.name).collect::<Vec<_>>()
1960 );
1961 }
1962
1963 #[test]
1966 fn job_carrier_with_unparseable_bodies_marks_partial() {
1967 let yaml = r#"
1973build:
1974 - this is a list, not a mapping
1975test:
1976 - also a list
1977"#;
1978 let graph = parse(yaml);
1979 let step_count = graph
1980 .nodes
1981 .iter()
1982 .filter(|n| n.kind == NodeKind::Step)
1983 .count();
1984 assert_eq!(step_count, 0);
1990 assert_eq!(
1991 graph.completeness,
1992 AuthorityCompleteness::Complete,
1993 "non-mapping values are not job carriers"
1994 );
1995 }
1996
1997 #[test]
2002 fn protected_ref_only_stamps_meta_when_truly_positive() {
2003 let positive = r#"
2004deploy:
2005 rules:
2006 - if: '$CI_COMMIT_REF_PROTECTED == "true"'
2007 script:
2008 - deploy.sh
2009"#;
2010 let graph = parse(positive);
2011 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
2012 assert_eq!(
2013 step.metadata
2014 .get(META_RULES_PROTECTED_ONLY)
2015 .map(String::as_str),
2016 Some("true"),
2017 "positive == \"true\" comparison must stamp META_RULES_PROTECTED_ONLY"
2018 );
2019
2020 let negation = r#"
2021deploy:
2022 rules:
2023 - if: '$CI_COMMIT_REF_PROTECTED == "false"'
2024 script:
2025 - deploy.sh
2026"#;
2027 let graph = parse(negation);
2028 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
2029 assert!(
2030 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
2031 "== \"false\" is the OPPOSITE signal — must NOT stamp META_RULES_PROTECTED_ONLY (got: {:?})",
2032 step.metadata.get(META_RULES_PROTECTED_ONLY)
2033 );
2034
2035 let inequality = r#"
2037deploy:
2038 rules:
2039 - if: '$CI_COMMIT_REF_PROTECTED != "true"'
2040 script:
2041 - deploy.sh
2042"#;
2043 let graph = parse(inequality);
2044 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
2045 assert!(
2046 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
2047 "!= \"true\" is a negation — must NOT stamp META_RULES_PROTECTED_ONLY"
2048 );
2049
2050 let tag_message_trap = r#"
2054deploy:
2055 rules:
2056 - if: '$CI_COMMIT_TAG_MESSAGE == "release"'
2057 script:
2058 - deploy.sh
2059"#;
2060 let graph = parse(tag_message_trap);
2061 let step = graph.nodes_of_kind(NodeKind::Step).next().unwrap();
2062 assert!(
2063 !step.metadata.contains_key(META_RULES_PROTECTED_ONLY),
2064 "$CI_COMMIT_TAG_MESSAGE must not match the $CI_COMMIT_TAG predicate"
2065 );
2066 }
2067
2068 #[test]
2072 fn mr_trigger_detection_rejects_negation() {
2073 let negation = r#"
2074build:
2075 rules:
2076 - if: '$CI_PIPELINE_SOURCE != "merge_request_event"'
2077 script:
2078 - make build
2079"#;
2080 let graph = parse(negation);
2081 assert!(
2082 graph.metadata.get(META_TRIGGER).map(String::as_str) != Some("merge_request"),
2083 "negation form must not stamp META_TRIGGER=merge_request, got: {:?}",
2084 graph.metadata.get(META_TRIGGER)
2085 );
2086 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2087 assert_eq!(steps.len(), 1);
2088 assert!(
2089 steps[0].metadata.get(META_TRIGGER).map(String::as_str) != Some("merge_request"),
2090 "negation form must not stamp per-step META_TRIGGER=merge_request"
2091 );
2092
2093 let positive = r#"
2095build:
2096 rules:
2097 - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
2098 script:
2099 - make build
2100"#;
2101 let graph = parse(positive);
2102 assert_eq!(
2103 graph.metadata.get(META_TRIGGER).map(String::as_str),
2104 Some("merge_request"),
2105 "positive form must still stamp META_TRIGGER=merge_request"
2106 );
2107 }
2108
2109 #[test]
2113 fn id_tokens_aud_list_form_creates_audiences_metadata() {
2114 let yaml = r#"
2115deploy:
2116 id_tokens:
2117 MULTI_CLOUD_TOKEN:
2118 aud:
2119 - https://aws.amazonaws.com
2120 - https://gcp.googleapis.com
2121 script:
2122 - deploy.sh
2123"#;
2124 let graph = parse(yaml);
2125 let oidc: Vec<_> = graph
2126 .nodes_of_kind(NodeKind::Identity)
2127 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
2128 .collect();
2129 assert_eq!(oidc.len(), 1);
2130 assert_eq!(
2131 oidc[0]
2132 .metadata
2133 .get(META_OIDC_AUDIENCES)
2134 .map(String::as_str),
2135 Some("https://aws.amazonaws.com,https://gcp.googleapis.com"),
2136 "list-form aud must stamp comma-joined META_OIDC_AUDIENCES"
2137 );
2138 assert_eq!(
2141 oidc[0].metadata.get(META_OIDC_AUDIENCE).map(String::as_str),
2142 Some("https://aws.amazonaws.com,https://gcp.googleapis.com"),
2143 );
2144 assert!(oidc[0].name.contains("aud=https://aws"));
2145
2146 let scalar = r#"
2148deploy:
2149 id_tokens:
2150 AWS_TOKEN:
2151 aud: https://sts.amazonaws.com
2152 script:
2153 - deploy.sh
2154"#;
2155 let graph = parse(scalar);
2156 let oidc: Vec<_> = graph
2157 .nodes_of_kind(NodeKind::Identity)
2158 .filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
2159 .collect();
2160 assert_eq!(
2161 oidc[0].metadata.get(META_OIDC_AUDIENCE).map(String::as_str),
2162 Some("https://sts.amazonaws.com")
2163 );
2164 assert!(
2165 !oidc[0].metadata.contains_key(META_OIDC_AUDIENCES),
2166 "scalar form must NOT set the plural META_OIDC_AUDIENCES marker"
2167 );
2168 }
2169
2170 #[test]
2175 fn is_credential_name_boundary_checks() {
2176 assert!(!is_credential_name("CERTAIN_FLAG"));
2178 assert!(!is_credential_name("TOKENIZER_VERSION"));
2179 assert!(!is_credential_name("UNCERTAIN"));
2180 assert!(!is_credential_name("CERTIFICATE_PATH"));
2181 assert!(!is_credential_name("TOKEN1"));
2182 assert!(!is_credential_name("CERTIFICATE"));
2183
2184 assert!(is_credential_name("API_TOKEN"));
2186 assert!(is_credential_name("MY_CERT"));
2187 assert!(is_credential_name("DB_PASSWORD"));
2188 assert!(is_credential_name("DEPLOY_TOKEN"));
2189 assert!(is_credential_name("SIGNING_KEY"));
2190 assert!(is_credential_name("AWS_SECRET_ACCESS_KEY"));
2191 assert!(is_credential_name("TOKEN"));
2192 assert!(is_credential_name("CERT"));
2193 assert!(is_credential_name("PRIVATE_KEY"));
2194 assert!(is_credential_name("CREDENTIAL"));
2195 }
2196
2197 #[test]
2201 fn needs_artifacts_false_excludes_dotenv_flow() {
2202 let yaml = r#"
2203build:
2204 artifacts:
2205 reports:
2206 dotenv: build.env
2207 script:
2208 - make build
2209deploy:
2210 needs:
2211 - job: build
2212 artifacts: false
2213 script:
2214 - kubectl apply
2215"#;
2216 let graph = parse(yaml);
2217 let deploy_step = graph
2218 .nodes_of_kind(NodeKind::Step)
2219 .find(|n| n.metadata.get(META_JOB_NAME).map(String::as_str) == Some("deploy"))
2220 .expect("deploy step present");
2221 let needs_csv = deploy_step
2222 .metadata
2223 .get(META_NEEDS)
2224 .map(String::as_str)
2225 .unwrap_or("");
2226 assert!(
2227 !needs_csv.split(',').any(|s| s == "build"),
2228 "build must be excluded from META_NEEDS when artifacts: false (got: {needs_csv:?})"
2229 );
2230
2231 let yaml_default = r#"
2234build:
2235 artifacts:
2236 reports:
2237 dotenv: build.env
2238 script:
2239 - make build
2240deploy:
2241 needs:
2242 - job: build
2243 script:
2244 - kubectl apply
2245"#;
2246 let graph = parse(yaml_default);
2247 let deploy_step = graph
2248 .nodes_of_kind(NodeKind::Step)
2249 .find(|n| n.metadata.get(META_JOB_NAME).map(String::as_str) == Some("deploy"))
2250 .expect("deploy step present");
2251 let needs_csv = deploy_step
2252 .metadata
2253 .get(META_NEEDS)
2254 .map(String::as_str)
2255 .unwrap_or("");
2256 assert!(
2257 needs_csv.split(',').any(|s| s == "build"),
2258 "default (artifacts implicitly true) must keep build in META_NEEDS (got: {needs_csv:?})"
2259 );
2260 }
2261
2262 #[test]
2266 fn rules_variables_mark_typed_expression_gap() {
2267 let yaml = r#"
2268workflow:
2269 rules:
2270 - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
2271 variables:
2272 DEPLOY_TOKEN: "$PROD_DEPLOY_TOKEN"
2273
2274deploy:
2275 rules:
2276 - if: '$CI_COMMIT_REF_PROTECTED == "true"'
2277 variables:
2278 CLOUD_PASSWORD: "$PROD_CLOUD_PASSWORD"
2279 script:
2280 - deploy.sh
2281"#;
2282 let graph = parse(yaml);
2283 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2284 assert_eq!(
2285 graph.completeness_gap_kinds,
2286 vec![GapKind::Expression, GapKind::Expression],
2287 "workflow and job rules:variables should each produce an expression gap"
2288 );
2289 assert!(
2290 graph
2291 .completeness_gaps
2292 .iter()
2293 .any(|gap| gap.contains("workflow:rules:variables define conditional variables")),
2294 "workflow rules:variables gap missing: {:?}",
2295 graph.completeness_gaps
2296 );
2297 assert!(
2298 graph
2299 .completeness_gaps
2300 .iter()
2301 .any(|gap| gap.contains("job 'deploy' uses rules:variables")),
2302 "job rules:variables gap missing: {:?}",
2303 graph.completeness_gaps
2304 );
2305 }
2306
2307 #[test]
2311 fn gitlab_mapping_iteration_is_deterministic_across_runs() {
2312 let yaml = r#"
2314zeta-job:
2315 variables:
2316 ZZ_TOKEN: "$CI_TOKEN"
2317 AA_PASSWORD: "x"
2318 MM_SECRET: "y"
2319 script:
2320 - echo zeta
2321alpha-job:
2322 variables:
2323 QQ_TOKEN: "$CI_TOKEN"
2324 BB_API_KEY: "z"
2325 script:
2326 - echo alpha
2327mid-job:
2328 variables:
2329 NN_PRIVATE_KEY: "k"
2330 GG_SIGNING_KEY: "j"
2331 script:
2332 - echo mid
2333"#;
2334 let canonical: Vec<(NodeKind, String)> = parse(yaml)
2335 .nodes
2336 .iter()
2337 .map(|n| (n.kind, n.name.clone()))
2338 .collect();
2339 for run in 0..9 {
2340 let again: Vec<(NodeKind, String)> = parse(yaml)
2341 .nodes
2342 .iter()
2343 .map(|n| (n.kind, n.name.clone()))
2344 .collect();
2345 assert_eq!(
2346 again, canonical,
2347 "run {run}: NodeId order must be byte-identical across runs"
2348 );
2349 }
2350 }
2351
2352 #[test]
2353 fn mapping_jobs_without_recognisable_step_content_marks_partial() {
2354 let yaml = r#"
2361.template-only:
2362 script:
2363 - echo "this is a template-only file"
2364"#;
2365 let graph = parse(yaml);
2366 let step_count = graph
2367 .nodes
2368 .iter()
2369 .filter(|n| n.kind == NodeKind::Step)
2370 .count();
2371 assert_eq!(step_count, 0);
2372 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2374 assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
2378 }
2379}