1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use serde_yaml::Value;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9pub struct BitbucketParser;
10
11const CRED_FRAGMENTS: &[&str] = &[
12 "TOKEN",
13 "SECRET",
14 "PASSWORD",
15 "PASSWD",
16 "PRIVATE_KEY",
17 "API_KEY",
18 "APIKEY",
19 "SIGNING_KEY",
20 "ACCESS_KEY",
21 "SERVICE_ACCOUNT",
22 "CERT",
23 "CREDENTIAL",
24 "KEYSTORE",
25 "SSH_KEY",
26];
27
28impl PipelineParser for BitbucketParser {
29 fn platform(&self) -> &str {
30 "bitbucket"
31 }
32
33 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
34 let (root, extra_docs, duplicate_recovery_note) = match parse_bitbucket_yaml_value(content)
35 {
36 Ok((root, extra_docs)) => (root, extra_docs, None),
37 Err(e) if is_duplicate_key_parse_error(&e) => {
38 let sanitized = sanitize_duplicate_mapping_keys(content);
39 let note = format!(
40 "Bitbucket YAML contained duplicate mapping keys; later duplicates were preserved as opaque __taudit_duplicate_* keys during recovery ({e})"
41 );
42 let (root, extra_docs) = parse_bitbucket_yaml_value(&sanitized)
43 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
44 (root, extra_docs, Some(note))
45 }
46 Err(e) => return Err(TauditError::Parse(format!("YAML parse error: {e}"))),
47 };
48
49 let mapping = root.as_mapping().ok_or_else(|| {
50 TauditError::Parse("Bitbucket Pipelines root must be a mapping".into())
51 })?;
52
53 let mut graph = AuthorityGraph::new(source.clone());
54 graph
55 .metadata
56 .insert(META_PLATFORM.into(), "bitbucket".into());
57 if extra_docs {
58 graph.mark_partial(
59 GapKind::Expression,
60 "file contains multiple YAML documents (--- separator) — only the first was analyzed"
61 .to_string(),
62 );
63 }
64 if let Some(note) = duplicate_recovery_note {
65 graph.mark_partial(GapKind::Structural, note);
66 }
67
68 let definitions = mapping.get("definitions");
69 let service_images = collect_defined_services(definitions);
70 let global_image = mapping.get("image").and_then(extract_image_str);
71 let Some(pipelines) = mapping.get("pipelines").and_then(|v| v.as_mapping()) else {
72 graph.mark_partial(
73 GapKind::Structural,
74 "Bitbucket file has no top-level pipelines: mapping".to_string(),
75 );
76 graph.stamp_edge_authority_summaries();
77 return Ok(graph);
78 };
79
80 let mut secret_ids = HashMap::new();
81 let mut prior_artifacts = Vec::new();
82 let mut triggers = HashSet::new();
83 let mut contexts = Vec::new();
84 collect_pipeline_contexts(pipelines, &mut contexts, &mut triggers);
85
86 if !triggers.is_empty() {
87 let mut list: Vec<_> = triggers.into_iter().collect();
88 list.sort();
89 if list.contains(&"pull_request") {
90 graph
91 .metadata
92 .insert(META_TRIGGER.into(), "pull_request".into());
93 }
94 graph.metadata.insert(META_TRIGGERS.into(), list.join(","));
95 }
96
97 for ctx in contexts {
98 process_step_carrier(
99 ctx.value,
100 &ctx.name,
101 ctx.trigger,
102 global_image.as_deref(),
103 &service_images,
104 &mut graph,
105 &mut secret_ids,
106 &mut prior_artifacts,
107 );
108 }
109
110 graph.stamp_edge_authority_summaries();
111 Ok(graph)
112 }
113}
114
115fn parse_bitbucket_yaml_value(content: &str) -> Result<(Value, bool), serde_yaml::Error> {
116 let mut de = serde_yaml::Deserializer::from_str(content);
117 let Some(doc) = de.next() else {
118 return Ok((Value::Null, false));
119 };
120 let root = Value::deserialize(doc)?;
121 Ok((root, de.next().is_some()))
122}
123
124fn is_duplicate_key_parse_error(error: &serde_yaml::Error) -> bool {
125 error.to_string().contains("duplicate entry with key")
126}
127
128#[derive(Clone)]
129struct PipelineContext<'a> {
130 name: String,
131 trigger: &'static str,
132 value: &'a Value,
133}
134
135fn collect_pipeline_contexts<'a>(
136 pipelines: &'a serde_yaml::Mapping,
137 out: &mut Vec<PipelineContext<'a>>,
138 triggers: &mut HashSet<&'static str>,
139) {
140 for (key, value) in pipelines {
141 let Some(kind) = key.as_str() else {
142 continue;
143 };
144 match kind {
145 "default" => {
146 triggers.insert("push");
147 out.push(PipelineContext {
148 name: "default".into(),
149 trigger: "push",
150 value,
151 });
152 }
153 "branches" | "tags" | "pull-requests" | "custom" => {
154 let trigger = match kind {
155 "pull-requests" => "pull_request",
156 "custom" => "manual",
157 "tags" => "tag",
158 _ => "push",
159 };
160 triggers.insert(trigger);
161 if let Some(map) = value.as_mapping() {
162 for (pattern, body) in map {
163 let label = pattern.as_str().unwrap_or("*");
164 out.push(PipelineContext {
165 name: format!("{kind}:{label}"),
166 trigger,
167 value: body,
168 });
169 }
170 }
171 }
172 _ => {
173 graphless_ignore(value);
174 }
175 }
176 }
177}
178
179fn graphless_ignore(_: &Value) {}
180
181fn sanitize_duplicate_mapping_keys(content: &str) -> String {
182 #[derive(Default)]
183 struct Frame {
184 indent: usize,
185 keys: HashSet<String>,
186 }
187
188 let mut out = Vec::new();
189 let mut frames: Vec<Frame> = Vec::new();
190 let mut duplicate_counts: HashMap<(usize, String), usize> = HashMap::new();
191 let mut block_scalar_indent: Option<usize> = None;
192
193 for line in content.lines() {
194 let indent = line.chars().take_while(|c| *c == ' ').count();
195 let trimmed = &line[indent..];
196
197 if let Some(block_indent) = block_scalar_indent {
198 if !trimmed.is_empty() && indent <= block_indent {
199 block_scalar_indent = None;
200 } else {
201 out.push(line.to_string());
202 continue;
203 }
204 }
205
206 if trimmed.is_empty() || trimmed.starts_with('#') {
207 out.push(line.to_string());
208 continue;
209 }
210
211 let (key_indent, key_start, key_end, key) = match yaml_mapping_key_span(line, indent) {
212 Some(parts) => parts,
213 None => {
214 out.push(line.to_string());
215 continue;
216 }
217 };
218
219 while frames.last().is_some_and(|frame| frame.indent > key_indent) {
220 frames.pop();
221 }
222 if !frames.iter().any(|frame| frame.indent == key_indent) {
223 frames.push(Frame {
224 indent: key_indent,
225 keys: HashSet::new(),
226 });
227 }
228 let frame = frames
229 .iter_mut()
230 .rev()
231 .find(|frame| frame.indent == key_indent)
232 .expect("frame inserted above");
233
234 if frame.keys.insert(key.clone()) {
235 out.push(line.to_string());
236 } else {
237 let count = duplicate_counts
238 .entry((key_indent, key.clone()))
239 .and_modify(|n| *n += 1)
240 .or_insert(2);
241 let replacement = format!(
242 "__taudit_duplicate_{}_{}",
243 sanitize_key_fragment(&key),
244 count
245 );
246 let mut rewritten = String::with_capacity(line.len() + replacement.len());
247 rewritten.push_str(&line[..key_start]);
248 rewritten.push_str(&replacement);
249 rewritten.push_str(&line[key_end..]);
250 out.push(rewritten);
251 }
252
253 let value_tail = line[key_end..].trim_start();
254 if value_tail.starts_with(": |") || value_tail.starts_with(": >") {
255 block_scalar_indent = Some(key_indent);
256 }
257 }
258
259 let mut sanitized = out.join("\n");
260 if content.ends_with('\n') {
261 sanitized.push('\n');
262 }
263 sanitized
264}
265
266fn yaml_mapping_key_span(line: &str, indent: usize) -> Option<(usize, usize, usize, String)> {
267 let trimmed = &line[indent..];
268 if trimmed.starts_with('#') {
269 return None;
270 }
271
272 let mut key_indent = indent;
273 let mut key_start = indent;
274 let key_text = if let Some(rest) = trimmed.strip_prefix("- ") {
275 key_indent = indent + 2;
276 key_start = indent + 2;
277 rest
278 } else {
279 trimmed
280 };
281
282 let mut in_single = false;
283 let mut in_double = false;
284 let mut bracket_depth = 0i32;
285 let mut prev = '\0';
286 for (offset, ch) in key_text.char_indices() {
287 match ch {
288 '\'' if !in_double => in_single = !in_single,
289 '"' if !in_single && prev != '\\' => in_double = !in_double,
290 '[' | '{' if !in_single && !in_double => bracket_depth += 1,
291 ']' | '}' if !in_single && !in_double => bracket_depth -= 1,
292 ':' if !in_single && !in_double && bracket_depth == 0 => {
293 let after = key_text[offset + ch.len_utf8()..].chars().next();
294 if after.is_some_and(|c| !c.is_whitespace()) {
295 prev = ch;
296 continue;
297 }
298 let raw = &key_text[..offset];
299 let key = raw.trim();
300 if key.is_empty() {
301 return None;
302 }
303 let leading = raw.len() - raw.trim_start().len();
304 let trailing = raw.trim_end().len();
305 let start = key_start + leading;
306 let end = key_start + trailing;
307 return Some((key_indent, start, end, key.to_string()));
308 }
309 _ => {}
310 }
311 prev = ch;
312 }
313 None
314}
315
316fn sanitize_key_fragment(key: &str) -> String {
317 let mut out = String::new();
318 for c in key.chars() {
319 if c.is_ascii_alphanumeric() {
320 out.push(c.to_ascii_lowercase());
321 } else {
322 out.push('_');
323 }
324 }
325 while out.contains("__") {
326 out = out.replace("__", "_");
327 }
328 out.trim_matches('_').chars().take(48).collect::<String>()
329}
330
331fn process_step_carrier(
332 value: &Value,
333 context: &str,
334 trigger: &'static str,
335 global_image: Option<&str>,
336 service_images: &HashMap<String, String>,
337 graph: &mut AuthorityGraph,
338 secret_ids: &mut HashMap<String, NodeId>,
339 prior_artifacts: &mut Vec<NodeId>,
340) {
341 match value {
342 Value::Sequence(seq) => {
343 for item in seq {
344 process_step_carrier(
345 item,
346 context,
347 trigger,
348 global_image,
349 service_images,
350 graph,
351 secret_ids,
352 prior_artifacts,
353 );
354 }
355 }
356 Value::Mapping(map) => {
357 if let Some(step) = map.get("step") {
358 process_step(
359 step,
360 context,
361 trigger,
362 global_image,
363 service_images,
364 graph,
365 secret_ids,
366 prior_artifacts,
367 );
368 } else if let Some(parallel) = map.get("parallel") {
369 if let Some(steps) = parallel.get("steps") {
370 process_step_carrier(
371 steps,
372 context,
373 trigger,
374 global_image,
375 service_images,
376 graph,
377 secret_ids,
378 prior_artifacts,
379 );
380 } else {
381 process_step_carrier(
382 parallel,
383 context,
384 trigger,
385 global_image,
386 service_images,
387 graph,
388 secret_ids,
389 prior_artifacts,
390 );
391 }
392 }
393 }
394 _ => {}
395 }
396}
397
398fn process_step(
399 value: &Value,
400 context: &str,
401 trigger: &'static str,
402 global_image: Option<&str>,
403 service_images: &HashMap<String, String>,
404 graph: &mut AuthorityGraph,
405 secret_ids: &mut HashMap<String, NodeId>,
406 prior_artifacts: &mut Vec<NodeId>,
407) {
408 let Some(map) = value.as_mapping() else {
409 graph.mark_partial(
410 GapKind::Structural,
411 format!("Bitbucket step in {context} is not a mapping"),
412 );
413 return;
414 };
415
416 let name = map
417 .get("name")
418 .and_then(|v| v.as_str())
419 .unwrap_or(context)
420 .to_string();
421 let mut meta = HashMap::new();
422 meta.insert(META_JOB_NAME.into(), context.to_string());
423 meta.insert(META_TRIGGER.into(), trigger.to_string());
424 if let Some(deployment) = map.get("deployment").and_then(|v| v.as_str()) {
425 meta.insert(META_ENVIRONMENT_NAME.into(), deployment.to_string());
426 if is_protected_deployment_name(deployment) {
427 meta.insert(META_ENV_APPROVAL.into(), "true".into());
428 }
429 }
430 let script_body = extract_script_body(map.get("script"));
431 if !script_body.is_empty() {
432 meta.insert(META_SCRIPT_BODY.into(), script_body.clone());
433 }
434 if map.get("oidc").and_then(|v| v.as_bool()) == Some(true) {
435 meta.insert(META_OIDC.into(), "true".into());
436 }
437 if step_looks_self_hosted(map) {
438 meta.insert(META_SELF_HOSTED.into(), "true".into());
439 }
440
441 let step_id = graph.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, meta);
442
443 for artifact_id in prior_artifacts.iter().copied() {
444 graph.add_edge(artifact_id, step_id, EdgeKind::Consumes);
445 }
446
447 if map.get("oidc").and_then(|v| v.as_bool()) == Some(true) {
448 let mut id_meta = HashMap::new();
449 id_meta.insert(META_OIDC.into(), "true".into());
450 id_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
451 id_meta.insert(META_IMPLICIT.into(), "true".into());
452 let id = graph.add_node_with_metadata(
453 NodeKind::Identity,
454 "BITBUCKET_STEP_OIDC_TOKEN",
455 TrustZone::FirstParty,
456 id_meta,
457 );
458 graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
459 }
460
461 let step_image = map
462 .get("image")
463 .and_then(extract_image_str)
464 .or_else(|| global_image.map(str::to_string));
465 if let Some(image) = step_image {
466 let image_id = add_image(graph, &image);
467 graph.add_edge(step_id, image_id, EdgeKind::UsesImage);
468 }
469
470 if let Some(services) = map.get("services").and_then(|v| v.as_sequence()) {
471 for service in services {
472 let Some(name) = service.as_str() else {
473 continue;
474 };
475 let image = service_images.get(name).cloned().unwrap_or_else(|| {
476 if name == "docker" {
477 "docker:dind".into()
478 } else {
479 name.into()
480 }
481 });
482 let image_id = add_image(graph, &image);
483 graph.add_edge(step_id, image_id, EdgeKind::UsesImage);
484 }
485 }
486
487 for pipe in extract_pipe_refs(map.get("script")) {
488 let image_id = add_image(graph, &pipe);
489 graph.add_edge(step_id, image_id, EdgeKind::UsesImage);
490 }
491
492 for secret_name in extract_env_secret_refs(&script_body) {
493 let secret_id = find_or_create_secret(graph, secret_ids, &secret_name);
494 graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
495 }
496
497 if let Some(artifacts) = map.get("artifacts") {
498 for artifact in extract_artifact_names(artifacts) {
499 let artifact_id = graph.add_node(NodeKind::Artifact, artifact, TrustZone::FirstParty);
500 graph.add_edge(step_id, artifact_id, EdgeKind::Produces);
501 prior_artifacts.push(artifact_id);
502 }
503 }
504}
505
506fn add_image(graph: &mut AuthorityGraph, image: &str) -> NodeId {
507 let trust_zone = if is_docker_digest_pinned(image) {
508 TrustZone::ThirdParty
509 } else {
510 TrustZone::Untrusted
511 };
512 let mut meta = HashMap::new();
513 if let Some(digest) = image.split("@sha256:").nth(1) {
514 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
515 }
516 graph.add_node_with_metadata(NodeKind::Image, image, trust_zone, meta)
517}
518
519fn extract_image_str(value: &Value) -> Option<String> {
520 match value {
521 Value::String(s) => Some(s.clone()),
522 Value::Mapping(m) => m.get("name").and_then(|v| v.as_str()).map(str::to_string),
523 _ => None,
524 }
525}
526
527fn collect_defined_services(definitions: Option<&Value>) -> HashMap<String, String> {
528 let mut out = HashMap::new();
529 let Some(services) = definitions
530 .and_then(|v| v.as_mapping())
531 .and_then(|m| m.get("services"))
532 .and_then(|v| v.as_mapping())
533 else {
534 return out;
535 };
536 for (name, body) in services {
537 let Some(name) = name.as_str() else {
538 continue;
539 };
540 if let Some(image) = body
541 .as_mapping()
542 .and_then(|m| m.get("image"))
543 .and_then(extract_image_str)
544 {
545 out.insert(name.to_string(), image);
546 } else if name == "docker" {
547 out.insert(name.to_string(), "docker:dind".into());
548 }
549 }
550 out
551}
552
553fn extract_script_body(value: Option<&Value>) -> String {
554 let mut lines = Vec::new();
555 collect_script_lines(value, &mut lines);
556 lines.join("\n")
557}
558
559fn collect_script_lines(value: Option<&Value>, out: &mut Vec<String>) {
560 match value {
561 Some(Value::String(s)) => out.push(s.clone()),
562 Some(Value::Sequence(seq)) => {
563 for item in seq {
564 if let Some(s) = item.as_str() {
565 out.push(s.to_string());
566 } else if let Some(pipe) = item
567 .as_mapping()
568 .and_then(|m| m.get("pipe"))
569 .and_then(|v| v.as_str())
570 {
571 out.push(format!("pipe: {pipe}"));
572 }
573 }
574 }
575 _ => {}
576 }
577}
578
579fn extract_pipe_refs(value: Option<&Value>) -> Vec<String> {
580 let mut out = Vec::new();
581 let Some(Value::Sequence(seq)) = value else {
582 return out;
583 };
584 for item in seq {
585 if let Some(pipe) = item
586 .as_mapping()
587 .and_then(|m| m.get("pipe"))
588 .and_then(|v| v.as_str())
589 {
590 out.push(pipe.to_string());
591 }
592 }
593 out
594}
595
596fn extract_artifact_names(value: &Value) -> Vec<String> {
597 match value {
598 Value::Sequence(seq) => seq
599 .iter()
600 .filter_map(|v| v.as_str().map(str::to_string))
601 .collect(),
602 Value::Mapping(map) => map
603 .get("paths")
604 .and_then(|v| v.as_sequence())
605 .map(|seq| {
606 seq.iter()
607 .filter_map(|v| v.as_str().map(str::to_string))
608 .collect()
609 })
610 .unwrap_or_default(),
611 _ => Vec::new(),
612 }
613}
614
615fn is_protected_deployment_name(name: &str) -> bool {
616 let lower = name.to_ascii_lowercase();
617 lower.contains("prod") || lower.contains("stag") || lower.contains("deploy")
618}
619
620fn step_looks_self_hosted(map: &serde_yaml::Mapping) -> bool {
621 map.get("runs-on")
622 .and_then(|v| v.as_str())
623 .map(|s| s.to_ascii_lowercase().contains("self"))
624 .unwrap_or(false)
625}
626
627fn extract_env_secret_refs(body: &str) -> Vec<String> {
628 let mut out = Vec::new();
629 let bytes = body.as_bytes();
630 let mut i = 0;
631 while i < bytes.len() {
632 if bytes[i] != b'$' {
633 i += 1;
634 continue;
635 }
636 let mut j = i + 1;
637 if j < bytes.len() && bytes[j] == b'{' {
638 j += 1;
639 let start = j;
640 while j < bytes.len() && is_var_char(bytes[j]) {
641 j += 1;
642 }
643 if j < bytes.len() && bytes[j] == b'}' {
644 let name = &body[start..j];
645 if is_credential_name(name) {
646 out.push(name.to_string());
647 }
648 i = j + 1;
649 continue;
650 }
651 } else {
652 let start = j;
653 while j < bytes.len() && is_var_char(bytes[j]) {
654 j += 1;
655 }
656 if j > start {
657 let name = &body[start..j];
658 if is_credential_name(name) {
659 out.push(name.to_string());
660 }
661 i = j;
662 continue;
663 }
664 }
665 i += 1;
666 }
667 out.sort();
668 out.dedup();
669 out
670}
671
672fn is_var_char(b: u8) -> bool {
673 b.is_ascii_alphanumeric() || b == b'_'
674}
675
676fn is_credential_name(name: &str) -> bool {
677 let upper = name.to_ascii_uppercase();
678 let bytes = upper.as_bytes();
679 CRED_FRAGMENTS.iter().any(|frag| {
680 let frag_bytes = frag.as_bytes();
681 let n = frag_bytes.len();
682 if bytes.len() < n {
683 return false;
684 }
685 for i in 0..=bytes.len() - n {
686 if &bytes[i..i + n] != frag_bytes {
687 continue;
688 }
689 let left_ok = i == 0 || bytes[i - 1] == b'_';
690 let right_ok = i + n == bytes.len() || bytes[i + n] == b'_';
691 if left_ok && right_ok {
692 return true;
693 }
694 }
695 false
696 })
697}
698
699fn find_or_create_secret(
700 graph: &mut AuthorityGraph,
701 cache: &mut HashMap<String, NodeId>,
702 name: &str,
703) -> NodeId {
704 if let Some(&id) = cache.get(name) {
705 return id;
706 }
707 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
708 cache.insert(name.to_string(), id);
709 id
710}
711
712#[cfg(test)]
713mod tests {
714 use super::*;
715
716 fn parse(yaml: &str) -> AuthorityGraph {
717 let parser = BitbucketParser;
718 let source = PipelineSource {
719 file: "bitbucket-pipelines.yml".into(),
720 repo: None,
721 git_ref: None,
722 commit_sha: None,
723 };
724 parser.parse(yaml, &source).unwrap()
725 }
726
727 #[test]
728 fn parses_step_image_script_oidc_and_secret_refs() {
729 let yaml = r#"
730image: node:20
731pipelines:
732 pull-requests:
733 "**":
734 - step:
735 name: test
736 oidc: true
737 script:
738 - echo $DEPLOY_TOKEN
739"#;
740 let graph = parse(yaml);
741 assert_eq!(graph.metadata.get(META_PLATFORM).unwrap(), "bitbucket");
742 assert_eq!(graph.metadata.get(META_TRIGGER).unwrap(), "pull_request");
743 assert_eq!(graph.nodes_of_kind(NodeKind::Step).count(), 1);
744 assert!(graph
745 .nodes_of_kind(NodeKind::Identity)
746 .any(|n| n.name == "BITBUCKET_STEP_OIDC_TOKEN"));
747 assert!(graph
748 .nodes_of_kind(NodeKind::Secret)
749 .any(|n| n.name == "DEPLOY_TOKEN"));
750 assert!(graph
751 .nodes_of_kind(NodeKind::Image)
752 .any(|n| n.name == "node:20"));
753 }
754
755 #[test]
756 fn parses_pipes_services_and_artifacts() {
757 let yaml = r#"
758definitions:
759 services:
760 docker:
761 memory: 2048
762pipelines:
763 default:
764 - step:
765 name: build
766 services: [docker]
767 script:
768 - pipe: atlassian/aws-s3-deploy:1.1.0
769 artifacts:
770 - dist/**
771 - step:
772 name: deploy
773 script:
774 - cat dist/file
775"#;
776 let graph = parse(yaml);
777 assert!(graph
778 .nodes_of_kind(NodeKind::Image)
779 .any(|n| n.name == "docker:dind"));
780 assert!(graph
781 .nodes_of_kind(NodeKind::Image)
782 .any(|n| n.name == "atlassian/aws-s3-deploy:1.1.0"));
783 assert!(graph
784 .nodes_of_kind(NodeKind::Artifact)
785 .any(|n| n.name == "dist/**"));
786 assert!(graph.edges.iter().any(|e| e.kind == EdgeKind::Consumes));
787 }
788}