droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
//! Minimal STIX 2.1 bundle consumer.
//!
//! droidsaw consumes STIX 2.1 via a `--stix-feed <path>` flag rather
//! than depending on the `droidsaw/iocs` repo as a submodule. Any
//! STIX 2.1 source the operator trusts (their MISP export, a paid
//! feed, the public iocs repo's STIX export) is acceptable.
//!
//! ## Scope
//!
//! This module parses Indicator SDOs from a bundle and round-trips
//! unrecognized objects via `StixObject::Other`. It does NOT match
//! indicators against APK content — that's the next stream
//! (`threat-model-third-party-inventory`).
//!
//! Supported STIX 2.1 surface:
//!   - `bundle` containing an `objects: []` array.
//!   - Each object's `type` discriminator: `indicator` is parsed
//!     structurally; everything else carries through as
//!     `StixObject::Other(serde_json::Value)`.
//!   - `Indicator` SDO fields: `id`, `spec_version`, `name`, `description`,
//!     `indicator_types`, `pattern`, `pattern_type`, `valid_from`,
//!     `valid_until`, `confidence`, `labels`, `external_references`.
//!   - Arbitrary `x_*` extension properties on Indicator are preserved on
//!     the `extensions` map.
//!
//! Not yet supported (deferred to a follow-on stream):
//!   - Tool / Malware / Infrastructure SDOs as structured types.
//!   - STIX patterning grammar — only recognized `pattern_type` values
//!     produce structured signals; everything else falls back to a single
//!     `Other` carry.

use std::collections::BTreeMap;
use std::path::Path;

use chrono::{DateTime, Utc};
use droidsaw_common::threat_model::{Signal, SinkKind};
use serde::{Deserialize, Serialize};

use super::{Result, ThreatModelError};

/// Top-level STIX 2.1 Bundle. The discriminator is `type: "bundle"` per spec;
/// we don't parse that — we just consume the `objects` array. `id` is kept
/// for caller-side dedup/logging.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct StixBundle {
    /// Bundle ID (`bundle--<uuid>`).
    pub id: String,
    /// SDO objects in the bundle.
    pub objects: Vec<StixObject>,
}

/// A single STIX object. `Indicator` is parsed structurally; every other
/// SDO type round-trips via `Other` so unknown shapes don't fail the parse.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StixObject {
    /// A fully-parsed Indicator SDO.
    Indicator(StixIndicator),
    /// Any other SDO; stored verbatim as JSON.
    Other(serde_json::Value),
}

// Custom serde wiring for `StixObject` — peek at `type` and dispatch.
impl Serialize for StixObject {
    fn serialize<S: serde::Serializer>(&self, s: S) -> std::result::Result<S::Ok, S::Error> {
        match self {
            StixObject::Indicator(ind) => ind.serialize(s),
            StixObject::Other(v) => v.serialize(s),
        }
    }
}

impl<'de> Deserialize<'de> for StixObject {
    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
        // Two-stage: first to Value (cheap), then dispatch on `type`.
        let v = serde_json::Value::deserialize(d)?;
        let ty = v.get("type").and_then(|t| t.as_str()).unwrap_or("");
        if ty == "indicator" {
            let ind: StixIndicator = serde_json::from_value(v).map_err(serde::de::Error::custom)?;
            Ok(StixObject::Indicator(ind))
        } else {
            Ok(StixObject::Other(v))
        }
    }
}

/// Parsed STIX 2.1 Indicator SDO. Required fields are typed; every other
/// `x_*` extension property is collected on `extensions`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct StixIndicator {
    /// Object ID (`indicator--<uuid>`).
    pub id: String,
    /// STIX spec version (`"2.1"`).
    pub spec_version: String,
    /// Human-readable name.
    #[serde(default)]
    pub name: Option<String>,
    /// Long-form description.
    #[serde(default)]
    pub description: Option<String>,
    /// Indicator-types vocabulary (e.g., `["malicious-activity"]`).
    #[serde(default)]
    pub indicator_types: Vec<String>,
    /// Pattern body — interpretation depends on `pattern_type`.
    pub pattern: String,
    /// Pattern grammar tag (`stix` / `pcre` / `x-droidsaw-pattern` / …).
    pub pattern_type: String,
    /// Earliest validity (RFC-3339).
    pub valid_from: DateTime<Utc>,
    /// Latest validity (RFC-3339).
    #[serde(default)]
    pub valid_until: Option<DateTime<Utc>>,
    /// 0-100 confidence.
    #[serde(default)]
    pub confidence: Option<u8>,
    /// Free-form labels (`category/analytics`, `tier/hand-curated`, …).
    #[serde(default)]
    pub labels: Vec<String>,
    /// External references (vendor docs, news articles).
    #[serde(default)]
    pub external_references: Vec<StixExternalReference>,
    /// `x_*` extension properties — STIX allows producers to emit
    /// custom fields. We preserve them through parse → re-serialize so
    /// downstream consumers don't lose information droidsaw doesn't
    /// model yet.
    #[serde(flatten)]
    pub extensions: BTreeMap<String, serde_json::Value>,
}

/// One STIX external reference attached to an Indicator.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct StixExternalReference {
    /// Source name (`vendor_docs`, `news_article`, vendor-specific).
    pub source_name: String,
    /// Description, if provided.
    #[serde(default)]
    pub description: Option<String>,
    /// Canonical URL.
    #[serde(default)]
    pub url: Option<String>,
    /// External ID at the source (CVE-…, ATT&CK technique ID, …).
    #[serde(default)]
    pub external_id: Option<String>,
}

/// Load a STIX 2.1 bundle from a local file path.
///
/// Network IO is explicitly out of scope — the operator hosts STIX bundles
/// they trust, and `--stix-feed` accepts only local paths. URL support is
/// deferred until a use case surfaces and a network-fetch security review
/// lands.
pub fn load_bundle(path: &Path) -> Result<StixBundle> {
    let bytes = std::fs::read(path)?;
    let bundle: StixBundle =
        serde_json::from_slice(&bytes).map_err(|source| ThreatModelError::StixParse {
            path: path.to_path_buf(),
            source,
        })?;
    Ok(bundle)
}

/// Map a [`StixIndicator`] onto zero or more [`Signal`]s using the
/// `pattern_type`-specific decoder.
///
/// `pattern_type == "x-droidsaw-pattern"` parses the `pattern` field as
/// JSON matching droidsaw's internal indicator shape. Anything else
/// returns `Vec::new()` for now — richer grammars are a follow-on stream.
///
/// This function never panics on malformed patterns; an unparseable
/// payload simply yields an empty signal vec (with a typed Err only when
/// the JSON itself is malformed).
pub fn indicator_to_signals(ind: &StixIndicator) -> Vec<Signal> {
    if ind.pattern_type != "x-droidsaw-pattern" {
        // STIX patterning + other grammars: deferred. Returning empty is
        // intentional and not an error — downstream callers expect to
        // accumulate signals from the subset they recognize.
        return Vec::new();
    }
    let Ok(parsed) = serde_json::from_str::<DroidsawPattern>(&ind.pattern) else {
        return Vec::new();
    };
    parsed
        .indicators
        .into_iter()
        .map(|p| Signal {
            source_api: format!("{}:{}", p.kind, p.value),
            source_api_args: None,
            sink_kind: SinkKind::Other,
            resolution: droidsaw_common::threat_model::Resolution::None,
            completeness: droidsaw_common::threat_model::Completeness::default(),
            adversary_profile_relevance: Vec::new(),
        })
        .collect()
}

/// Internal `x-droidsaw-pattern` payload shape.
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct DroidsawPattern {
    indicators: Vec<DroidsawPatternIndicator>,
}

#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct DroidsawPatternIndicator {
    /// Indicator type (`android_package_prefix`, `class_name`, …).
    #[serde(rename = "type")]
    kind: String,
    /// Literal value to match.
    value: String,
    /// Match mode (literal_prefix / literal_exact / regex / …). Optional.
    #[serde(default, rename = "match")]
    _match_mode: Option<String>,
}

/// Merge a slice of bundle paths into a single deduplicated indicator set.
///
/// Dedup key: `StixIndicator::id` (the STIX UUID). First-win across paths
/// — if two bundles ship the same `id`, the indicator from the earlier
/// path is preserved.
pub fn load_indicators_dedup(paths: &[std::path::PathBuf]) -> Result<Vec<StixIndicator>> {
    let mut seen: BTreeMap<String, StixIndicator> = BTreeMap::new();
    for path in paths {
        let bundle = load_bundle(path)?;
        for obj in bundle.objects {
            if let StixObject::Indicator(ind) = obj {
                seen.entry(ind.id.clone()).or_insert(ind);
            }
        }
    }
    Ok(seen.into_values().collect())
}

#[cfg(test)]
mod tests {
    use super::*;

    fn sample_bundle_json() -> &'static str {
        r#"{
          "type": "bundle",
          "id": "bundle--00000000-0000-7000-8000-000000000001",
          "objects": [
            {
              "type": "indicator",
              "spec_version": "2.1",
              "id": "indicator--019dbba3-c987-7353-be7d-592937bd75ac",
              "created":  "2026-04-23T00:00:00Z",
              "modified": "2026-04-23T00:00:00Z",
              "name": "Mixpanel",
              "description": "Mixpanel SDK.",
              "indicator_types": ["malicious-activity"],
              "pattern_type": "x-droidsaw-pattern",
              "pattern": "{\"indicators\":[{\"type\":\"android_package_prefix\",\"value\":\"com.mixpanel.android.\",\"match\":\"literal_prefix\"}]}",
              "valid_from": "2012-01-01T00:00:00Z",
              "confidence": 98,
              "labels": ["category/analytics"],
              "x_droidsaw_kind": "tracker",
              "x_droidsaw_for_detection": true
            },
            {
              "type": "marking-definition",
              "id": "marking-definition--abc",
              "definition_type": "tlp",
              "definition": {"tlp": "white"}
            }
          ]
        }"#
    }

    #[test]
    fn parses_indicator_and_carries_unknown_object_to_other() {
        let bundle: StixBundle = serde_json::from_str(sample_bundle_json()).expect("parse");
        assert_eq!(bundle.id, "bundle--00000000-0000-7000-8000-000000000001");
        assert_eq!(bundle.objects.len(), 2);
        match &bundle.objects[0] {
            StixObject::Indicator(ind) => {
                assert_eq!(ind.id, "indicator--019dbba3-c987-7353-be7d-592937bd75ac");
                assert_eq!(ind.spec_version, "2.1");
                assert_eq!(ind.name.as_deref(), Some("Mixpanel"));
                assert_eq!(ind.confidence, Some(98));
                assert_eq!(ind.pattern_type, "x-droidsaw-pattern");
            }
            other => panic!("expected Indicator, got {other:?}"),
        }
        match &bundle.objects[1] {
            StixObject::Other(v) => {
                assert_eq!(v.get("type").and_then(|t| t.as_str()), Some("marking-definition"));
            }
            other => panic!("expected Other(marking-definition), got {other:?}"),
        }
    }

    #[test]
    fn extensions_round_trip_unknown_x_fields() {
        let bundle: StixBundle = serde_json::from_str(sample_bundle_json()).expect("parse");
        let StixObject::Indicator(ind) = &bundle.objects[0] else {
            panic!("expected Indicator");
        };
        assert_eq!(
            ind.extensions.get("x_droidsaw_kind").and_then(|v| v.as_str()),
            Some("tracker")
        );
        assert_eq!(
            ind.extensions.get("x_droidsaw_for_detection").and_then(|v| v.as_bool()),
            Some(true)
        );
    }

    #[test]
    fn malformed_json_returns_typed_stixparse_err() {
        // Spell-check the parse-error path. Network and FS-error paths
        // are exercised by the integration tests in
        // `tests/threat_model_stix.rs`.
        let bytes = b"{not json";
        let err: super::super::ThreatModelError = serde_json::from_slice::<StixBundle>(bytes)
            .map_err(|source| super::super::ThreatModelError::StixParse {
                path: std::path::PathBuf::from("/dev/null"),
                source,
            })
            .expect_err("malformed JSON must Err");
        match err {
            super::super::ThreatModelError::StixParse { source, .. } => {
                assert!(!source.to_string().is_empty())
            }
            other => panic!("expected StixParse, got {other:?}"),
        }
    }

    #[test]
    fn indicator_to_signals_decodes_droidsaw_pattern() {
        let bundle: StixBundle = serde_json::from_str(sample_bundle_json()).expect("parse");
        let StixObject::Indicator(ind) = &bundle.objects[0] else {
            panic!("expected Indicator");
        };
        let signals = indicator_to_signals(ind);
        assert_eq!(signals.len(), 1);
        let s = &signals[0];
        assert_eq!(s.source_api, "android_package_prefix:com.mixpanel.android.");
    }

    #[test]
    fn indicator_to_signals_returns_empty_for_unsupported_pattern_type() {
        let mut ind: StixIndicator = serde_json::from_str(
            r#"{
                "type": "indicator",
                "spec_version": "2.1",
                "id": "indicator--xyz",
                "pattern": "[ipv4-addr:value = '1.2.3.4']",
                "pattern_type": "stix",
                "valid_from": "2026-01-01T00:00:00Z"
            }"#,
        )
        .expect("parse");
        ind.pattern_type = "stix".to_string();
        let signals = indicator_to_signals(&ind);
        assert!(signals.is_empty());
    }

    #[test]
    fn round_trip_preserves_extensions() {
        // Parse → re-serialize → parse: the `extensions` map must round-trip
        // through `serde_json::Value` without losing the `x_*` fields.
        let bundle1: StixBundle = serde_json::from_str(sample_bundle_json()).expect("parse 1");
        let json2 = serde_json::to_string(&bundle1).expect("serialize");
        let bundle2: StixBundle = serde_json::from_str(&json2).expect("parse 2");
        assert_eq!(bundle1, bundle2, "round-trip must preserve every field");
    }
}