droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
//! `SemgrepArgs` clap struct + arg composition.

use std::ffi::OsString;
use std::path::PathBuf;

/// Env var name for path-separator-merged additional rule sources.
/// POSIX `PATH`-style on unix (colon); semicolon on Windows.
/// Parsed via [`std::env::split_paths`].
pub const ENV_RULES: &str = "DROIDSAW_SEMGREP_RULES";

/// User-rule source for the semgrep subprocess.
///
/// Droidsaw does not ship semgrep rules. This struct collects user-provided
/// rule paths and a flag to suppress the registry default. Composes with
/// the [`ENV_RULES`] env var.
#[derive(Debug, Clone, Default, clap::Args)]
pub struct SemgrepArgs {
    /// Additional semgrep rule sources. Repeatable. Each path is passed
    /// as a `--config` flag to semgrep. Composes with the
    /// `DROIDSAW_SEMGREP_RULES` env var (path-separator-merged on the
    /// host platform). Droidsaw does not ship rules — bring your own.
    #[arg(long = "rules", value_name = "PATH", action = clap::ArgAction::Append)]
    pub rules: Vec<PathBuf>,

    /// Suppress semgrep's default `--config auto` registry. Use when your
    /// `--rules` set is sufficient and you don't want community defaults.
    /// Errors if no `--rules` and no `DROIDSAW_SEMGREP_RULES` env var
    /// are provided.
    #[arg(long = "no-auto")]
    pub no_auto: bool,
}

/// Errors composing the semgrep subprocess argument set.
#[derive(Debug, thiserror::Error)]
pub enum SemgrepError {
    #[error(
        "--no-auto was passed but no rule sources were provided. \
Add --rules <path> or set DROIDSAW_SEMGREP_RULES, or drop --no-auto."
    )]
    NoRuleSource,
}

impl SemgrepArgs {
    /// Merge `--rules` with the `DROIDSAW_SEMGREP_RULES` env var.
    /// CLI flags appear first; env var entries appear after.
    pub fn effective_rules(&self) -> Vec<PathBuf> {
        let env_value = std::env::var(ENV_RULES).ok();
        self.effective_rules_with_env(env_value.as_deref())
    }

    /// Test seam — accepts an explicit env value so unit tests don't
    /// race against process-global env state.
    pub fn effective_rules_with_env(&self, env_value: Option<&str>) -> Vec<PathBuf> {
        let mut out = self.rules.clone();
        if let Some(raw) = env_value {
            for p in std::env::split_paths(raw) {
                if !p.as_os_str().is_empty() {
                    out.push(p);
                }
            }
        }
        out
    }
}

/// Private composition iterator shared by both public wrappers below.
///
/// Yields `("--config", path_bytes)` pairs in subprocess order:
/// `--config auto` first (when not suppressed), then each user-rule path.
/// The `path_fn` closure converts each `PathBuf` into the platform token.
///
/// Callers must check `NoRuleSource` before calling this (the two public
/// wrappers both do so).
fn compose_iter<F, T>(args: &SemgrepArgs, env_value: Option<&str>, path_fn: F) -> Vec<T>
where
    F: Fn(PathBuf) -> T,
    T: From<&'static str>,
{
    let user_rules = args.effective_rules_with_env(env_value);
    let mut out: Vec<T> = Vec::new();
    if !args.no_auto {
        out.push(T::from("--config"));
        out.push(T::from("auto"));
    }
    for p in user_rules {
        out.push(T::from("--config"));
        out.push(path_fn(p));
    }
    out
}

/// Compose the semgrep subprocess `--config` arg list as `Vec<String>`.
///
/// **Lossy on non-utf8 paths**: each `PathBuf` entry is converted via
/// `PathBuf::display().to_string()`, which substitutes `U+FFFD` for any
/// non-utf8 byte sequences. This is intentional for callers that embed
/// the result in a JSON string (JSON is utf-8 by spec and cannot carry
/// raw bytes), such as the operator-facing `command` hint.
///
/// **For actual subprocess invocation**, use [`compose_argv`] instead.
/// It preserves all path bytes, including non-utf8 sequences that
/// `display()` would corrupt.
///
/// Order: `--config auto` first (when not suppressed), then each user
/// rule path. User rules go LAST so the user-supplied rule wins on
/// rule-id collision (semgrep's later-config-wins semantics).
///
/// Returns [`SemgrepError::NoRuleSource`] if `no_auto` is set and no
/// user rules are provided.
pub fn compose_config_args(args: &SemgrepArgs) -> Result<Vec<String>, SemgrepError> {
    let env_value = std::env::var(ENV_RULES).ok();
    compose_config_args_with_env(args, env_value.as_deref())
}

/// Test seam — see [`SemgrepArgs::effective_rules_with_env`].
pub fn compose_config_args_with_env(
    args: &SemgrepArgs,
    env_value: Option<&str>,
) -> Result<Vec<String>, SemgrepError> {
    if args.no_auto && args.effective_rules_with_env(env_value).is_empty() {
        return Err(SemgrepError::NoRuleSource);
    }
    Ok(compose_iter(args, env_value, |p| p.display().to_string()))
}

/// Compose the semgrep subprocess `--config` arg list as `Vec<OsString>`.
///
/// **Byte-preserving**: each `PathBuf` entry is converted via
/// `p.into_os_string()`, which round-trips all path bytes intact on Linux
/// (where paths are arbitrary non-NUL byte sequences). Non-utf8 paths are
/// passed to the semgrep subprocess without substitution.
///
/// Use this function wherever the result is passed to
/// [`std::process::Command::args`]. Use [`compose_config_args`] only for
/// JSON/display contexts where a `String` is required (with the documented
/// U+FFFD lossy caveat).
///
/// Returns [`SemgrepError::NoRuleSource`] if `no_auto` is set and no
/// user rules are provided.
pub fn compose_argv(args: &SemgrepArgs) -> Result<Vec<OsString>, SemgrepError> {
    let env_value = std::env::var(ENV_RULES).ok();
    compose_argv_with_env(args, env_value.as_deref())
}

/// Test seam — see [`SemgrepArgs::effective_rules_with_env`].
pub fn compose_argv_with_env(
    args: &SemgrepArgs,
    env_value: Option<&str>,
) -> Result<Vec<OsString>, SemgrepError> {
    if args.no_auto && args.effective_rules_with_env(env_value).is_empty() {
        return Err(SemgrepError::NoRuleSource);
    }
    Ok(compose_iter(args, env_value, |p| p.into_os_string()))
}

#[cfg(test)]
mod tests {
    use super::*;

    fn args_with(rules: Vec<&str>, no_auto: bool) -> SemgrepArgs {
        SemgrepArgs {
            rules: rules.into_iter().map(PathBuf::from).collect(),
            no_auto,
        }
    }

    #[test]
    fn default_emits_auto_only() {
        let v = compose_config_args_with_env(&SemgrepArgs::default(), None).unwrap();
        assert_eq!(v, vec!["--config", "auto"]);
    }

    #[test]
    fn rules_after_auto() {
        let v = compose_config_args_with_env(&args_with(vec!["a.yml", "b.yml"], false), None)
            .unwrap();
        assert_eq!(
            v,
            vec!["--config", "auto", "--config", "a.yml", "--config", "b.yml"]
        );
    }

    #[test]
    fn no_auto_with_rules_omits_auto() {
        let v = compose_config_args_with_env(&args_with(vec!["x.yml"], true), None).unwrap();
        assert_eq!(v, vec!["--config", "x.yml"]);
    }

    #[test]
    fn no_auto_no_rules_errors() {
        let err =
            compose_config_args_with_env(&args_with(vec![], true), None).unwrap_err();
        assert!(matches!(err, SemgrepError::NoRuleSource));
    }

    #[test]
    fn env_var_appends_after_cli_rules() {
        let v = compose_config_args_with_env(
            &args_with(vec!["cli.yml"], false),
            Some(if cfg!(windows) {
                "env1.yml;env2.yml"
            } else {
                "env1.yml:env2.yml"
            }),
        )
        .unwrap();
        assert_eq!(
            v,
            vec![
                "--config", "auto", "--config", "cli.yml", "--config", "env1.yml", "--config",
                "env2.yml",
            ]
        );
    }

    #[test]
    fn env_only_no_cli_rules() {
        // Single path; no separator → portable across split_paths impls.
        let v = compose_config_args_with_env(&SemgrepArgs::default(), Some("env.yml")).unwrap();
        assert_eq!(v, vec!["--config", "auto", "--config", "env.yml"]);
    }

    #[test]
    fn empty_env_segments_ignored() {
        let v = compose_config_args_with_env(
            &SemgrepArgs::default(),
            Some(if cfg!(windows) { ";a.yml;" } else { ":a.yml:" }),
        )
        .unwrap();
        assert_eq!(v, vec!["--config", "auto", "--config", "a.yml"]);
    }

    #[test]
    fn no_auto_satisfied_by_env() {
        let v = compose_config_args_with_env(
            &args_with(vec![], true),
            Some("env.yml"),
        )
        .unwrap();
        assert_eq!(v, vec!["--config", "env.yml"]);
    }
}