use std::{
collections::{HashMap, HashSet},
fs,
num::NonZeroUsize,
ops::Deref,
str::FromStr,
};
use anyhow::{Context as _, anyhow};
use camino::Utf8Path;
use github_actions_models::common::RepositoryUses;
use serde::{
Deserialize,
de::{self, DeserializeOwned},
};
use thiserror::Error;
#[cfg(feature = "schema")]
pub mod schema;
use crate::{
App, CollectionOptions,
audit::{
AuditCore, dependabot_cooldown::DependabotCooldown, forbidden_uses::ForbiddenUses,
secrets_outside_env::SecretsOutsideEnvironment, unpinned_uses::UnpinnedUses,
},
finding::Finding,
github::{Client, ClientError},
models::uses::RepositoryUsesPattern,
registry::input::RepoSlug,
};
const CONFIG_CANDIDATES: &[&str] = &[
".github/zizmor.yml",
".github/zizmor.yaml",
"zizmor.yml",
"zizmor.yaml",
];
#[derive(Error, Debug)]
#[error("configuration error in {path}")]
pub(crate) struct ConfigError {
path: String,
pub(crate) source: ConfigErrorInner,
}
#[derive(Error, Debug)]
pub(crate) enum ConfigErrorInner {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("invalid configuration syntax")]
Syntax(#[source] serde_yaml::Error),
#[error("invalid syntax for audit `{1}`")]
AuditSyntax(#[source] serde_yaml::Error, &'static str),
#[error("invalid `unpinned-uses` config")]
UnpinnedUsesConfig(#[from] UnpinnedUsesConfigError),
#[error("GitHub API error while fetching remote config")]
Client(#[from] ClientError),
}
#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(
feature = "schema",
derive(schemars::JsonSchema),
schemars(with = "String", extend("pattern" = r"^[^:]+\.ya?ml(:[1-9][0-9]*)?(:[1-9][0-9]*)?$"))
)]
pub(crate) struct WorkflowRule {
pub(crate) filename: String,
pub(crate) line: Option<usize>,
pub(crate) column: Option<usize>,
}
impl FromStr for WorkflowRule {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self, Self::Err> {
let parts = s.rsplitn(3, ':').collect::<Vec<_>>();
let mut parts = parts.iter().rev();
let filename = parts
.next()
.ok_or_else(|| anyhow!("rule is missing a filename component"))?;
if !filename.ends_with(".yml") && !filename.ends_with(".yaml") {
return Err(anyhow!("invalid workflow filename: {filename}"));
}
let line = parts
.next()
.map(|line| NonZeroUsize::from_str(line).map(|line| line.get()))
.transpose()
.with_context(|| "invalid line number component (must be 1-based)")?;
let column = parts
.next()
.map(|col| NonZeroUsize::from_str(col).map(|col| col.get()))
.transpose()
.with_context(|| "invalid column number component (must be 1-based)")?;
Ok(Self {
filename: filename.to_string(),
line,
column,
})
}
}
impl<'de> Deserialize<'de> for WorkflowRule {
fn deserialize<D>(deserializer: D) -> anyhow::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
WorkflowRule::from_str(&raw).map_err(de::Error::custom)
}
}
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub(crate) struct AuditRuleConfig {
#[serde(default)]
disable: bool,
#[serde(default)]
ignore: Vec<WorkflowRule>,
#[serde(default)]
config: Option<serde_yaml::Mapping>,
}
#[derive(Clone, Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
struct RawConfig {
rules: HashMap<String, AuditRuleConfig>,
}
impl RawConfig {
fn load(contents: &str) -> Result<Self, ConfigErrorInner> {
serde_yaml::from_str(contents).map_err(ConfigErrorInner::Syntax)
}
fn rule_config<T>(&self, ident: &'static str) -> Result<Option<T>, ConfigErrorInner>
where
T: DeserializeOwned,
{
self.rules
.get(ident)
.and_then(|rule_config| rule_config.config.as_ref())
.map(|policy| serde_yaml::from_value::<T>(serde_yaml::Value::Mapping(policy.clone())))
.transpose()
.map_err(|e| ConfigErrorInner::AuditSyntax(e, ident))
}
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[serde(default)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub(crate) struct DependabotCooldownConfig {
pub(crate) days: NonZeroUsize,
}
impl Default for DependabotCooldownConfig {
fn default() -> Self {
Self {
days: NonZeroUsize::new(7).expect("impossible"),
}
}
}
#[derive(Clone, Debug, Deserialize)]
#[cfg_attr(
feature = "schema",
derive(schemars::JsonSchema),
schemars(with = "ForbiddenUsesConfigInner")
)]
#[serde(transparent)]
pub(crate) struct ForbiddenUsesConfig(
)
#[serde(with = "serde_yaml::with::singleton_map")] pub(crate) ForbiddenUsesConfigInner,
);
impl Deref for ForbiddenUsesConfig {
type Target = ForbiddenUsesConfigInner;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Clone, Debug, Deserialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[serde(rename_all = "kebab-case")]
pub(crate) enum ForbiddenUsesConfigInner {
Allow(Vec<RepositoryUsesPattern>),
Deny(Vec<RepositoryUsesPattern>),
}
#[derive(Clone, Debug, Default, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[serde(default)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub(crate) struct SecretsOutsideEnvConfig {
pub(crate) allow: Vec<String>,
}
#[derive(Clone, Debug)]
pub(crate) struct SecretsOutsideEnvPolicy {
pub(crate) allow: HashSet<String>,
}
impl Default for SecretsOutsideEnvPolicy {
fn default() -> Self {
let mut allow = HashSet::new();
allow.insert("github_token".into());
Self { allow }
}
}
impl From<SecretsOutsideEnvConfig> for SecretsOutsideEnvPolicy {
fn from(value: SecretsOutsideEnvConfig) -> Self {
let mut allow = value
.allow
.iter()
.map(|item| item.to_ascii_lowercase())
.collect::<HashSet<_>>();
let default = Self::default();
allow.extend(default.allow);
Self { allow }
}
}
#[derive(Debug, Default, Deserialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
pub(crate) struct UnpinnedUsesConfig {
#[serde(default)]
policies: HashMap<RepositoryUsesPattern, UsesPolicy>,
}
#[derive(Copy, Clone, Debug, Deserialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[serde(rename_all = "kebab-case")]
pub(crate) enum UsesPolicy {
Any,
RefPin,
HashPin,
}
#[derive(Clone, Debug)]
pub(crate) struct UnpinnedUsesPolicies {
policy_tree: HashMap<String, Vec<(RepositoryUsesPattern, UsesPolicy)>>,
default_policy: UsesPolicy,
}
impl UnpinnedUsesPolicies {
pub(crate) fn get_policy(
&self,
uses: &RepositoryUses,
) -> (Option<&RepositoryUsesPattern>, UsesPolicy) {
match self.policy_tree.get(uses.owner()) {
Some(policies) => {
for (uses_pattern, policy) in policies {
if uses_pattern.matches(uses) {
return (Some(uses_pattern), *policy);
}
}
(None, self.default_policy)
}
None => (None, self.default_policy),
}
}
}
impl Default for UnpinnedUsesPolicies {
fn default() -> Self {
Self {
policy_tree: [].into(),
default_policy: UsesPolicy::HashPin,
}
}
}
#[derive(Error, Debug)]
pub(crate) enum UnpinnedUsesConfigError {
#[error("cannot use exact ref patterns here: `{0}`")]
ExactWithRefUsed(String),
}
impl TryFrom<UnpinnedUsesConfig> for UnpinnedUsesPolicies {
type Error = UnpinnedUsesConfigError;
fn try_from(config: UnpinnedUsesConfig) -> anyhow::Result<Self, Self::Error> {
let mut policy_tree: HashMap<String, Vec<(RepositoryUsesPattern, UsesPolicy)>> =
HashMap::new();
let mut default_policy = UsesPolicy::HashPin;
for (pattern, policy) in config.policies {
match pattern {
RepositoryUsesPattern::ExactWithRef { .. } => {
return Err(UnpinnedUsesConfigError::ExactWithRefUsed(
pattern.to_string(),
));
}
RepositoryUsesPattern::ExactPath { ref owner, .. } => {
policy_tree
.entry(owner.clone())
.or_default()
.push((pattern, policy));
}
RepositoryUsesPattern::ExactRepo { ref owner, .. } => {
policy_tree
.entry(owner.clone())
.or_default()
.push((pattern, policy));
}
RepositoryUsesPattern::InRepo { ref owner, .. } => {
policy_tree
.entry(owner.clone())
.or_default()
.push((pattern, policy));
}
RepositoryUsesPattern::InOwner(ref owner) => {
policy_tree
.entry(owner.clone())
.or_default()
.push((pattern, policy));
}
RepositoryUsesPattern::Any => {
default_policy = policy;
}
}
}
for policies in policy_tree.values_mut() {
policies.sort_by(|a, b| a.0.cmp(&b.0));
}
Ok(Self {
policy_tree,
default_policy,
})
}
}
#[derive(Clone, Debug, Default)]
pub(crate) struct Config {
raw: RawConfig,
pub(crate) dependabot_cooldown_config: DependabotCooldownConfig,
pub(crate) forbidden_uses_config: Option<ForbiddenUsesConfig>,
pub(crate) secrets_outside_env_policy: SecretsOutsideEnvPolicy,
pub(crate) unpinned_uses_policies: UnpinnedUsesPolicies,
}
impl Config {
fn load(contents: &str) -> Result<Self, ConfigErrorInner> {
let raw = RawConfig::load(contents)?;
let dependabot_cooldown_config = raw
.rule_config(DependabotCooldown::ident())?
.unwrap_or_default();
let forbidden_uses_config = raw.rule_config(ForbiddenUses::ident())?;
let secrets_outside_env_config =
raw.rule_config::<SecretsOutsideEnvConfig>(SecretsOutsideEnvironment::ident())?;
let secrets_outside_env_policy = secrets_outside_env_config
.map(Into::into)
.unwrap_or_default();
let unpinned_uses_policies = {
if let Some(unpinned_uses_config) =
raw.rule_config::<UnpinnedUsesConfig>(UnpinnedUses::ident())?
{
UnpinnedUsesPolicies::try_from(unpinned_uses_config)?
} else {
UnpinnedUsesPolicies::default()
}
};
Ok(Self {
raw,
dependabot_cooldown_config,
forbidden_uses_config,
secrets_outside_env_policy,
unpinned_uses_policies,
})
}
pub(crate) async fn discover<F>(
options: &CollectionOptions,
discover_fn: F,
) -> Result<Self, ConfigError>
where
F: AsyncFnOnce() -> Result<Option<Self>, ConfigError>,
{
if options.no_config {
tracing::debug!("skipping config discovery: explicitly disabled");
Ok(Self::default())
} else if let Some(config) = &options.global_config {
tracing::debug!("config discovery: using global config: {config:?}");
Ok(config.clone())
} else {
discover_fn().await.map(|conf| conf.unwrap_or_default())
}
}
fn discover_in_dir(path: &Utf8Path) -> Result<Option<Self>, ConfigErrorInner> {
tracing::debug!("attempting config discovery in `{path}`");
let canonical = path.canonicalize_utf8()?;
let mut candidate_path = if canonical.file_name() == Some("workflows") {
let Some(parent) = canonical.parent() else {
tracing::debug!("no parent for `{canonical}`, cannot discover config");
return Ok(None);
};
parent
} else {
canonical.as_path()
};
loop {
for candidate in CONFIG_CANDIDATES {
let candidate_path = candidate_path.join(candidate);
if candidate_path.is_file() {
tracing::debug!("found config candidate at `{candidate_path}`");
return Ok(Some(Self::load(&fs::read_to_string(&candidate_path)?)?));
}
}
if candidate_path.join(".git").is_dir() {
tracing::debug!("found `{candidate_path}/.git`, stopping search");
return Ok(None);
}
let Some(parent) = candidate_path.parent() else {
tracing::debug!("reached filesystem root without finding a config");
return Ok(None);
};
candidate_path = parent;
}
}
pub(crate) async fn discover_local(path: &Utf8Path) -> Result<Option<Self>, ConfigError> {
tracing::debug!("discovering config for local input `{path}`");
if path.is_dir() {
Self::discover_in_dir(path).map_err(|err| ConfigError {
path: path.to_string(),
source: err,
})
} else {
let parent = match path.parent().map(|p| p.as_str()) {
Some("") => Utf8Path::new("."),
Some(p) => p.into(),
None => {
tracing::debug!("no parent for {path:?}, cannot discover config");
return Ok(None);
}
};
Self::discover_in_dir(parent).map_err(|err| ConfigError {
path: path.to_string(),
source: err,
})
}
}
pub(crate) async fn discover_remote(
client: &Client,
slug: &RepoSlug,
) -> Result<Option<Self>, ConfigError> {
for candidate in CONFIG_CANDIDATES {
match client.fetch_single_file(slug, candidate).await {
Ok(Some(contents)) => {
tracing::debug!("retrieved config candidate `{candidate}` for {slug}");
return Some(Self::load(&contents).map_err(|err| ConfigError {
path: candidate.to_string(),
source: err,
}))
.transpose();
}
Ok(None) => {
continue;
}
Err(err) => {
return Err(ConfigError {
path: candidate.to_string(),
source: err.into(),
});
}
}
}
Ok(None)
}
pub(crate) fn global(app: &App) -> Result<Option<Self>, ConfigError> {
if app.args.no_config {
Ok(None)
} else if let Some(path) = &app.args.config {
tracing::debug!("loading config from `{path}`");
let contents = fs::read_to_string(path).map_err(|err| ConfigError {
path: path.to_string(),
source: ConfigErrorInner::Io(err),
})?;
Ok(Some(Self::load(&contents).map_err(|err| ConfigError {
path: path.to_string(),
source: err,
})?))
} else {
Ok(None)
}
}
pub(crate) fn disables(&self, ident: &str) -> bool {
self.raw
.rules
.get(ident)
.map(|rule_config| rule_config.disable)
.unwrap_or(false)
}
pub(crate) fn ignores(&self, finding: &Finding<'_>) -> bool {
let Some(rule_config) = self.raw.rules.get(finding.ident) else {
return false;
};
let ignores = &rule_config.ignore;
for loc in &finding.locations {
for rule in ignores
.iter()
.filter(|i| i.filename == loc.symbolic.key.filename())
{
match rule {
WorkflowRule {
line: Some(line),
column,
..
} => {
if *line == loc.concrete.location.start_point.row + 1
&& column.is_none_or(|col| {
col == loc.concrete.location.start_point.column + 1
})
{
return true;
} else {
continue;
}
}
WorkflowRule {
line: None,
column: None,
..
} => return true,
_ => unreachable!(),
}
}
}
false
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use super::WorkflowRule;
#[test]
fn test_parse_workflow_rule() -> anyhow::Result<()> {
assert_eq!(
WorkflowRule::from_str("foo.yml:1:2")?,
WorkflowRule {
filename: "foo.yml".into(),
line: Some(1),
column: Some(2)
}
);
assert_eq!(
WorkflowRule::from_str("foo.yml:123")?,
WorkflowRule {
filename: "foo.yml".into(),
line: Some(123),
column: None
}
);
assert!(WorkflowRule::from_str("foo.yml:0:0").is_err());
assert!(WorkflowRule::from_str("foo.yml:1:0").is_err());
assert!(WorkflowRule::from_str("foo.yml:0:1").is_err());
assert!(WorkflowRule::from_str("foo.yml:123:").is_err());
assert!(WorkflowRule::from_str("foo.yml::").is_err());
assert!(WorkflowRule::from_str("foo.yml::1").is_err());
assert!(WorkflowRule::from_str("foo::1").is_err());
assert!(WorkflowRule::from_str("foo.unrelated::1").is_err());
assert!(WorkflowRule::from_str("::1").is_err());
assert!(WorkflowRule::from_str(":1:1").is_err());
assert!(WorkflowRule::from_str("1:1").is_err());
Ok(())
}
}