use std::collections::HashMap;
use serde::Deserialize;
use serde_yaml::Value;
use taudit_core::error::TauditError;
use taudit_core::graph::*;
use taudit_core::ports::PipelineParser;
pub struct GitlabParser;
const RESERVED: &[&str] = &[
"stages",
"workflow",
"include",
"variables",
"image",
"services",
"default",
"cache",
"before_script",
"after_script",
"types",
];
const CRED_FRAGMENTS: &[&str] = &[
"TOKEN",
"SECRET",
"PASSWORD",
"PASSWD",
"PRIVATE_KEY",
"API_KEY",
"APIKEY",
"SIGNING_KEY",
"ACCESS_KEY",
"SERVICE_ACCOUNT",
"CERT",
"CREDENTIAL",
];
impl PipelineParser for GitlabParser {
fn platform(&self) -> &str {
"gitlab-ci"
}
fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
let mut de = serde_yaml::Deserializer::from_str(content);
let doc = de
.next()
.ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
let root: Value = Value::deserialize(doc)
.map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
let mapping = root
.as_mapping()
.ok_or_else(|| TauditError::Parse("GitLab CI root must be a mapping".into()))?;
let mut graph = AuthorityGraph::new(source.clone());
let mut meta = HashMap::new();
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
meta.insert(META_IMPLICIT.into(), "true".into());
let token_id = graph.add_node_with_metadata(
NodeKind::Identity,
"CI_JOB_TOKEN",
TrustZone::FirstParty,
meta,
);
if mapping.contains_key("include") {
graph.mark_partial(
"include: directive present — included templates not resolved".to_string(),
);
}
let global_secrets = process_variables(mapping.get("variables"), &mut graph, "pipeline");
let global_image = mapping.get("image").and_then(extract_image_str);
if let Some(wf) = mapping.get("workflow") {
if has_mr_trigger_in_workflow(wf) {
graph
.metadata
.insert(META_TRIGGER.into(), "merge_request".into());
}
}
for (key, value) in mapping {
let job_name = match key.as_str() {
Some(k) => k,
None => continue,
};
if RESERVED.contains(&job_name) {
continue;
}
if job_name.starts_with('.') {
graph.mark_partial(format!(
"job '{job_name}' is a hidden/template job — not resolved"
));
continue;
}
let job_map = match value.as_mapping() {
Some(m) => m,
None => continue,
};
if job_map.contains_key("extends") {
graph.mark_partial(format!(
"job '{job_name}' uses extends: — inherited configuration not resolved"
));
}
let job_triggers_mr = job_has_mr_trigger(job_map);
if job_triggers_mr && !graph.metadata.contains_key(META_TRIGGER) {
graph
.metadata
.insert(META_TRIGGER.into(), "merge_request".into());
}
let job_secrets = process_variables(job_map.get("variables"), &mut graph, job_name);
let explicit_secrets =
process_explicit_secrets(job_map.get("secrets"), job_name, &mut graph);
let oidc_identities = process_id_tokens(job_map.get("id_tokens"), job_name, &mut graph);
let job_image_str = job_map
.get("image")
.and_then(extract_image_str)
.or(global_image.as_deref().map(String::from));
let image_id = job_image_str.as_deref().map(|img| {
let pinned = is_docker_digest_pinned(img);
let trust_zone = if pinned {
TrustZone::ThirdParty
} else {
TrustZone::Untrusted
};
let mut imeta = HashMap::new();
if let Some(digest) = img.split("@sha256:").nth(1) {
imeta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
}
graph.add_node_with_metadata(NodeKind::Image, img, trust_zone, imeta)
});
let service_ids = process_services(job_map.get("services"), &mut graph);
let env_name = job_map
.get("environment")
.and_then(extract_environment_name);
let mut step_meta = HashMap::new();
step_meta.insert(META_JOB_NAME.into(), job_name.to_string());
if let Some(ref env) = env_name {
step_meta.insert("environment_name".into(), env.clone());
}
let step_id = graph.add_node_with_metadata(
NodeKind::Step,
job_name,
TrustZone::FirstParty,
step_meta,
);
graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
for &sid in global_secrets
.iter()
.chain(&job_secrets)
.chain(&explicit_secrets)
{
graph.add_edge(step_id, sid, EdgeKind::HasAccessTo);
}
for &iid in &oidc_identities {
graph.add_edge(step_id, iid, EdgeKind::HasAccessTo);
}
if let Some(img_id) = image_id {
graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
}
for &svc_id in &service_ids {
graph.add_edge(step_id, svc_id, EdgeKind::UsesImage);
}
}
Ok(graph)
}
}
fn extract_image_str(v: &Value) -> Option<String> {
match v {
Value::String(s) => Some(s.clone()),
Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
_ => None,
}
}
fn extract_environment_name(v: &Value) -> Option<String> {
match v {
Value::String(s) => Some(s.clone()),
Value::Mapping(m) => m.get("name").and_then(|n| n.as_str()).map(String::from),
_ => None,
}
}
fn is_credential_name(name: &str) -> bool {
let upper = name.to_uppercase();
CRED_FRAGMENTS.iter().any(|frag| upper.contains(frag))
}
fn process_variables(vars: Option<&Value>, graph: &mut AuthorityGraph, scope: &str) -> Vec<NodeId> {
let mut ids = Vec::new();
let map = match vars.and_then(|v| v.as_mapping()) {
Some(m) => m,
None => return ids,
};
for (k, _v) in map {
let name = match k.as_str() {
Some(s) => s,
None => continue,
};
if is_credential_name(name) {
let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
ids.push(id);
let _ = scope; }
}
ids
}
fn process_explicit_secrets(
secrets: Option<&Value>,
_scope: &str,
graph: &mut AuthorityGraph,
) -> Vec<NodeId> {
let mut ids = Vec::new();
let map = match secrets.and_then(|v| v.as_mapping()) {
Some(m) => m,
None => return ids,
};
for (k, _v) in map {
let name = match k.as_str() {
Some(s) => s,
None => continue,
};
let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
ids.push(id);
}
ids
}
fn process_id_tokens(
id_tokens: Option<&Value>,
_scope: &str,
graph: &mut AuthorityGraph,
) -> Vec<NodeId> {
let mut ids = Vec::new();
let map = match id_tokens.and_then(|v| v.as_mapping()) {
Some(m) => m,
None => return ids,
};
for (k, v) in map {
let token_name = match k.as_str() {
Some(s) => s,
None => continue,
};
let aud = v
.as_mapping()
.and_then(|m| m.get("aud"))
.and_then(|a| a.as_str())
.unwrap_or("unknown");
let label = format!("{token_name} (aud={aud})");
let mut meta = HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let id =
graph.add_node_with_metadata(NodeKind::Identity, label, TrustZone::FirstParty, meta);
ids.push(id);
}
ids
}
fn process_services(services: Option<&Value>, graph: &mut AuthorityGraph) -> Vec<NodeId> {
let mut ids = Vec::new();
let list = match services.and_then(|v| v.as_sequence()) {
Some(s) => s,
None => return ids,
};
for item in list {
let img_str = match extract_image_str(item) {
Some(s) => s,
None => continue,
};
let pinned = is_docker_digest_pinned(&img_str);
let trust_zone = if pinned {
TrustZone::ThirdParty
} else {
TrustZone::Untrusted
};
let mut meta = HashMap::new();
if let Some(digest) = img_str.split("@sha256:").nth(1) {
meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
}
let id = graph.add_node_with_metadata(NodeKind::Image, &img_str, trust_zone, meta);
ids.push(id);
}
ids
}
fn job_has_mr_trigger(job_map: &serde_yaml::Mapping) -> bool {
if let Some(rules) = job_map.get("rules").and_then(|v| v.as_sequence()) {
for rule in rules {
if let Some(if_expr) = rule
.as_mapping()
.and_then(|m| m.get("if"))
.and_then(|v| v.as_str())
{
if if_expr.contains("merge_request_event") {
return true;
}
}
}
}
if let Some(only) = job_map.get("only") {
if only_has_merge_requests(only) {
return true;
}
}
false
}
fn only_has_merge_requests(v: &Value) -> bool {
match v {
Value::Sequence(seq) => seq
.iter()
.any(|item| item.as_str() == Some("merge_requests")),
Value::Mapping(m) => {
if let Some(refs) = m.get("refs").and_then(|r| r.as_sequence()) {
return refs
.iter()
.any(|item| item.as_str() == Some("merge_requests"));
}
false
}
_ => false,
}
}
fn has_mr_trigger_in_workflow(wf: &Value) -> bool {
let rules = match wf
.as_mapping()
.and_then(|m| m.get("rules"))
.and_then(|r| r.as_sequence())
{
Some(r) => r,
None => return false,
};
for rule in rules {
if let Some(if_expr) = rule
.as_mapping()
.and_then(|m| m.get("if"))
.and_then(|v| v.as_str())
{
if if_expr.contains("merge_request_event") {
return true;
}
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(yaml: &str) -> AuthorityGraph {
let parser = GitlabParser;
let source = PipelineSource {
file: ".gitlab-ci.yml".into(),
repo: None,
git_ref: None,
commit_sha: None,
};
parser.parse(yaml, &source).unwrap()
}
#[test]
fn ci_job_token_always_present() {
let yaml = r#"
stages:
- build
build-job:
stage: build
script:
- make build
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(identities[0].name, "CI_JOB_TOKEN");
assert_eq!(
identities[0]
.metadata
.get(META_IMPLICIT)
.map(String::as_str),
Some("true")
);
assert_eq!(
identities[0]
.metadata
.get(META_IDENTITY_SCOPE)
.map(String::as_str),
Some("broad")
);
}
#[test]
fn global_credential_variable_emits_secret_node() {
let yaml = r#"
variables:
APP_VERSION: "1.0"
DEPLOY_TOKEN: "$CI_DEPLOY_TOKEN"
build-job:
script:
- make
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(
secrets.iter().any(|s| s.name == "DEPLOY_TOKEN"),
"DEPLOY_TOKEN must emit a Secret node, got: {:?}",
secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
);
assert!(
!secrets.iter().any(|s| s.name == "APP_VERSION"),
"APP_VERSION must not emit a Secret node"
);
}
#[test]
fn floating_image_emits_untrusted_image_node() {
let yaml = r#"
deploy:
image: alpine:latest
script:
- deploy.sh
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "alpine:latest");
assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
}
#[test]
fn digest_pinned_image_is_third_party() {
let yaml = r#"
deploy:
image: "alpine@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
script:
- deploy.sh
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
}
#[test]
fn id_tokens_emit_oidc_identity_nodes() {
let yaml = r#"
deploy:
id_tokens:
SIGSTORE_ID_TOKEN:
aud: sigstore
AWS_OIDC_TOKEN:
aud: https://sts.amazonaws.com
script:
- deploy.sh
"#;
let graph = parse(yaml);
let oidc: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.metadata.get(META_OIDC).map(String::as_str) == Some("true"))
.collect();
assert_eq!(
oidc.len(),
2,
"expected 2 OIDC identity nodes, got: {:?}",
oidc.iter().map(|n| &n.name).collect::<Vec<_>>()
);
}
#[test]
fn explicit_secrets_emit_secret_nodes() {
let yaml = r#"
deploy:
secrets:
DATABASE_PASSWORD:
vault: production/db/password@secret
AWS_KEY:
aws_secrets_manager:
name: my-secret
script:
- deploy.sh
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
let names: Vec<_> = secrets.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"DATABASE_PASSWORD"), "got: {names:?}");
assert!(names.contains(&"AWS_KEY"), "got: {names:?}");
}
#[test]
fn rules_mr_trigger_sets_meta_trigger() {
let yaml = r#"
test:
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
script:
- run tests
"#;
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER).map(String::as_str),
Some("merge_request"),
"META_TRIGGER must be set to merge_request"
);
}
#[test]
fn only_merge_requests_sets_meta_trigger() {
let yaml = r#"
test:
only:
- merge_requests
script:
- run tests
"#;
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER).map(String::as_str),
Some("merge_request")
);
}
#[test]
fn include_marks_graph_partial() {
let yaml = r#"
include:
- local: '/templates/.base.yml'
build:
script:
- make
"#;
let graph = parse(yaml);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
}
#[test]
fn extends_marks_graph_partial() {
let yaml = r#"
.base:
script:
- echo base
my-job:
extends: .base
stage: build
"#;
let graph = parse(yaml);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
}
#[test]
fn meta_job_name_set_on_step_nodes() {
let yaml = r#"
build:
script:
- make
deploy:
script:
- deploy.sh
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
for step in &steps {
assert!(
step.metadata.contains_key(META_JOB_NAME),
"Step '{}' missing META_JOB_NAME",
step.name
);
}
let names: Vec<_> = steps
.iter()
.map(|s| s.metadata.get(META_JOB_NAME).unwrap().as_str())
.collect();
assert!(names.contains(&"build"), "got: {names:?}");
assert!(names.contains(&"deploy"), "got: {names:?}");
}
#[test]
fn reserved_keywords_not_parsed_as_jobs() {
let yaml = r#"
stages:
- build
- test
variables:
MY_VAR: value
image: alpine:latest
build:
stage: build
script:
- make
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(
steps.len(),
1,
"only 'build' should be a Step, got: {:?}",
steps.iter().map(|s| &s.name).collect::<Vec<_>>()
);
assert_eq!(steps[0].name, "build");
}
#[test]
fn services_emit_image_nodes() {
let yaml = r#"
test:
services:
- docker:dind
- name: postgres:14
script:
- run_tests
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(
images.len(),
2,
"expected 2 service Image nodes, got: {:?}",
images.iter().map(|i| &i.name).collect::<Vec<_>>()
);
}
}