Skip to main content

candor_classify/
policy.rs

1//! The canonical CANDOR_POLICY DSL parser (candor-spec SPEC §6.2).
2//!
3//! This is the **single** Rust implementation of the policy grammar — shared by the nightly dylint
4//! gate (`src/lib.rs`, AS-EFF-006/008/009) and the stable `candor-query` (`whatif`, and the
5//! `parsepolicy` dump the cross-impl conformance suite diffs against the JVM engine). Keeping one
6//! parser here is what makes "the gate means the same thing in every language" a fact rather than a
7//! hope: the Rust gate, the Rust pre-edit tool, and the cross-impl differential all read THIS code.
8//!
9//! Pure, stable Rust (string parsing only — no rustc types), so it lives beside the classifier.
10
11use crate::cap_from_name;
12use std::collections::BTreeSet;
13
14/// The honesty marker (SPEC §4). Denyable so `deny Unknown <scope>` forbids the *unverifiable* case.
15pub const UNKNOWN: &str = "Unknown";
16
17/// One `deny <Effect…> [scope]` / `pure <scope>` rule (AS-EFF-006). `effects` empty ⇒ a `pure` rule
18/// (ANY effect forbidden). `scope` is a path segment-scope the rule applies to (None = whole unit).
19#[derive(Debug, Clone)]
20pub struct PolicyRule {
21    pub effects: BTreeSet<&'static str>,
22    pub scope: Option<String>,
23    pub raw: String,
24}
25
26/// One `allow <Effect> [in <scope>] <literal>…` rule (AS-EFF-008). The effect is one of the three
27/// that carry a literal surface (`Net`/`Exec`/`Fs`); a function in `scope` performing it may reach
28/// ONLY the listed literals. Matching is effect-specific (`literal_allowed`).
29#[derive(Debug, Clone)]
30pub struct AllowRule {
31    pub effect: &'static str,
32    pub scope: Option<String>,
33    pub literals: BTreeSet<String>,
34    pub raw: String,
35}
36
37/// One `forbid <A> -> <B>` module-layering rule (AS-EFF-009): a function in scope `A` must not
38/// transitively call into scope `B`.
39#[derive(Debug, Clone)]
40pub struct LayerRule {
41    pub from: String,
42    pub to: String,
43    pub raw: String,
44}
45
46/// The rule kinds parsed from a CANDOR_POLICY file.
47#[derive(Default, Debug)]
48pub struct ParsedPolicy {
49    pub rules: Vec<PolicyRule>,
50    pub allow_rules: Vec<AllowRule>,
51    pub layer_rules: Vec<LayerRule>,
52}
53
54/// The hostname part of a `host[:port]` literal (everything before the first `:`). Host-allowlist
55/// matching is by hostname so `api.stripe.com` in a rule accepts a reached `api.stripe.com:443`.
56pub fn host_part(h: &str) -> &str {
57    h.split(':').next().unwrap_or(h)
58}
59
60/// The basename of a command (`/usr/bin/git` → `git`), so `allow Exec … git` accepts an absolute path.
61pub fn cmd_base(c: &str) -> &str {
62    c.rsplit(['/', '\\']).next().unwrap_or(c)
63}
64
65/// Whether an allowed path `a` covers a reached path `r` (SPEC §6.2: path-boundary-respecting prefix).
66/// A directory covers itself and everything beneath it, but NOT a sibling sharing a textual prefix
67/// (`/etc/app` ⊉ `/etc/apppwned`); a `..` that climbs out is never covered; absolute/relative are
68/// never conflated.
69pub fn fs_path_covered(a: &str, r: &str) -> bool {
70    if r.split(['/', '\\']).any(|c| c == "..") {
71        return false;
72    }
73    let absolute = |s: &str| s.starts_with('/') || s.starts_with('\\');
74    if absolute(a) != absolute(r) {
75        return false;
76    }
77    let norm = |s: &str| -> Vec<String> {
78        s.split(['/', '\\'])
79            .filter(|c| !c.is_empty() && *c != ".")
80            .map(|c| c.to_string())
81            .collect()
82    };
83    let (ac, rc) = (norm(a), norm(r));
84    ac.len() <= rc.len() && ac.iter().zip(&rc).all(|(x, y)| x == y)
85}
86
87/// Whether a reached literal is allowed under an effect-specific match (SPEC §6.2): `Net` host by
88/// name (port ignored), `Exec` command by basename, `Fs` path by boundary-respecting prefix.
89pub fn literal_allowed(effect: &str, reached: &str, allow: &BTreeSet<String>) -> bool {
90    match effect {
91        "Net" => allow.iter().any(|a| host_part(a) == host_part(reached)),
92        "Exec" => allow.iter().any(|a| cmd_base(a) == cmd_base(reached)),
93        "Fs" => allow.iter().any(|a| fs_path_covered(a, reached)),
94        _ => allow.contains(reached),
95    }
96}
97
98/// A policy scope matches a function name by **path segment** (SPEC §6.2), not substring: split both
99/// on `::`; the scope matches a contiguous run of name-segments where every segment except the last
100/// matches exactly and the last is a prefix. So `domain` matches `app::domain::h` and `domain_logic`
101/// but not `subdomain`. (Used directly by the Rust gate; the JVM engine mirrors it over `.`.)
102pub fn scope_matches(name: &str, scope: &str) -> bool {
103    let segs: Vec<&str> = name.split("::").collect();
104    let parts: Vec<&str> = scope.split("::").collect();
105    if parts.is_empty() || parts.len() > segs.len() {
106        return false;
107    }
108    let (last, init) = parts.split_last().unwrap();
109    segs.windows(parts.len()).any(|w| {
110        let (w_last, w_init) = w.split_last().unwrap();
111        w_init == init && w_last.starts_with(last)
112    })
113}
114
115/// Parse a CANDOR_POLICY file (SPEC §6.2). One rule per line; `#` comments and blanks ignored:
116///
117/// ```text
118/// deny Net Db  domain     # functions whose path contains segment "domain" must not perform Net or Db
119/// deny Exec               # no function anywhere may perform Exec
120/// deny Unknown  api        # functions in "api" must be fully resolvable (forbid the unverifiable)
121/// pure         parse      # functions whose path contains segment "parse" must be effect-free
122/// allow Net in billing  api.stripe.com
123/// forbid domain -> infra
124/// ```
125///
126/// In a `deny` rule, leading tokens that name a known effect (or `Unknown`) are forbidden; the FIRST
127/// non-effect token is the scope and ends the rule. A `deny` naming no known effect is dropped (it is
128/// NOT a `pure` rule). Malformed/unknown lines are ignored with a warning — never silently widened.
129pub fn parse_policy(text: &str) -> ParsedPolicy {
130    let mut out = ParsedPolicy::default();
131    for raw_line in text.lines() {
132        let line = raw_line.split('#').next().unwrap_or("").trim();
133        if line.is_empty() {
134            continue;
135        }
136        let mut toks = line.split_whitespace();
137        match toks.next().unwrap_or("") {
138            "allow" => {
139                let effect = match toks.next().unwrap_or("") {
140                    "Net" => "Net",
141                    "Exec" => "Exec",
142                    "Fs" => "Fs",
143                    _ => {
144                        eprintln!(
145                            "candor: ignoring policy rule (allow supports only Net hosts / Exec commands / Fs paths): {line}"
146                        );
147                        continue;
148                    }
149                };
150                let mut rest: Vec<&str> = toks.collect();
151                let scope = if rest.first() == Some(&"in") {
152                    let s = rest.get(1).map(|s| s.to_string());
153                    rest.drain(..2.min(rest.len()));
154                    s
155                } else {
156                    None
157                };
158                let literals: BTreeSet<String> = rest.iter().map(|h| h.to_string()).collect();
159                if literals.is_empty() {
160                    eprintln!("candor: ignoring policy rule (allow {effect} names no values): {line}");
161                    continue;
162                }
163                out.allow_rules.push(AllowRule { effect, scope, literals, raw: line.to_string() });
164            }
165            "deny" => {
166                let mut effects = BTreeSet::new();
167                let mut scope = None;
168                for t in toks {
169                    let e = if t == UNKNOWN { Some(UNKNOWN) } else { cap_from_name(t) };
170                    match e {
171                        Some(e) => {
172                            effects.insert(e);
173                        }
174                        None => {
175                            scope = Some(t.to_string());
176                            break;
177                        }
178                    }
179                }
180                if effects.is_empty() {
181                    eprintln!("candor: ignoring policy rule (no known effect named): {line}");
182                    continue;
183                }
184                out.rules.push(PolicyRule { effects, scope, raw: line.to_string() });
185            }
186            "pure" => out.rules.push(PolicyRule {
187                effects: BTreeSet::new(),
188                scope: toks.next().map(str::to_string),
189                raw: line.to_string(),
190            }),
191            "forbid" => {
192                let a = toks.next().unwrap_or("");
193                let arrow = toks.next().unwrap_or("");
194                let b = toks.next().unwrap_or("");
195                if a.is_empty() || arrow != "->" || b.is_empty() {
196                    eprintln!("candor: ignoring layering rule (want `forbid <scope> -> <scope>`): {line}");
197                    continue;
198                }
199                out.layer_rules.push(LayerRule {
200                    from: a.to_string(),
201                    to: b.to_string(),
202                    raw: line.to_string(),
203                });
204            }
205            other => eprintln!("candor: ignoring policy rule (unknown kind `{other}`): {line}"),
206        }
207    }
208    out
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn policy_parses() {
217        let p = parse_policy(
218            "# the domain layer must stay pure of I/O\n\
219             deny Net Db  domain\n\
220             deny Exec\n\
221             pure  parse\n\
222             nonsense line\n\
223             deny notaneffect\n",
224        );
225        let rules = &p.rules;
226        assert_eq!(rules.len(), 3);
227        assert_eq!(rules[0].effects, ["Db", "Net"].into_iter().collect::<BTreeSet<_>>());
228        assert_eq!(rules[0].scope.as_deref(), Some("domain"));
229        assert!(rules[1].effects.contains("Exec") && rules[1].scope.is_none());
230        assert!(rules[2].effects.is_empty() && rules[2].scope.as_deref() == Some("parse"));
231        // `Unknown` is a denyable token; a bare `deny` with no effect is ignored.
232        assert_eq!(parse_policy("deny Unknown core").rules[0].effects, ["Unknown"].into_iter().collect());
233        assert!(parse_policy("deny\ndeny   \n").rules.is_empty());
234        // a `deny` whose first token is a non-effect names no effect -> dropped, NOT a pure rule.
235        assert!(parse_policy("deny notaneffect scope").rules.is_empty());
236        // the first non-effect token ENDS the rule: a later effect token is not collected.
237        let p2 = parse_policy("deny Net foo Db");
238        assert_eq!(p2.rules[0].effects, ["Net"].into_iter().collect::<BTreeSet<_>>());
239        assert_eq!(p2.rules[0].scope.as_deref(), Some("foo"));
240    }
241
242    #[test]
243    fn allowlist_parses() {
244        let p = parse_policy(
245            "allow Net in billing  api.stripe.com  hooks.stripe.com\n\
246             allow Exec in ci  git\n\
247             allow Fs in config  /etc/app\n\
248             allow Net  github.com\n\
249             allow Db  whatever\n\
250             allow Net in nohosts\n\
251             allow\n",
252        );
253        assert_eq!(p.allow_rules.len(), 4);
254        assert_eq!((p.allow_rules[0].effect, p.allow_rules[0].scope.as_deref()), ("Net", Some("billing")));
255        assert_eq!(
256            p.allow_rules[0].literals,
257            ["api.stripe.com", "hooks.stripe.com"].iter().map(|s| s.to_string()).collect()
258        );
259        assert_eq!((p.allow_rules[1].effect, p.allow_rules[1].scope.as_deref()), ("Exec", Some("ci")));
260        assert!(p.allow_rules[1].literals.contains("git"));
261        assert_eq!((p.allow_rules[2].effect, p.allow_rules[2].scope.as_deref()), ("Fs", Some("config")));
262        assert_eq!((p.allow_rules[3].effect, p.allow_rules[3].scope.is_none()), ("Net", true));
263
264        let set = |xs: &[&str]| xs.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>();
265        assert!(literal_allowed("Net", "api.stripe.com:443", &set(&["api.stripe.com"])));
266        assert!(literal_allowed("Exec", "/usr/bin/git", &set(&["git"])));
267        assert!(!literal_allowed("Exec", "/usr/bin/curl", &set(&["git"])));
268        assert!(literal_allowed("Fs", "/etc/app/conf.toml", &set(&["/etc/app"])));
269        assert!(!literal_allowed("Fs", "/etc/shadow", &set(&["/etc/app"])));
270        assert_eq!(cmd_base("/usr/bin/git"), "git");
271    }
272
273    #[test]
274    fn layering_rule_parses() {
275        let p = parse_policy(
276            "forbid domain -> infra\n\
277             forbid  app::web  ->  app::db \n\
278             forbid domain infra\n\
279             forbid domain ->\n\
280             forbid\n",
281        );
282        assert_eq!(p.layer_rules.len(), 2);
283        assert_eq!((p.layer_rules[0].from.as_str(), p.layer_rules[0].to.as_str()), ("domain", "infra"));
284        assert_eq!((p.layer_rules[1].from.as_str(), p.layer_rules[1].to.as_str()), ("app::web", "app::db"));
285    }
286
287    #[test]
288    fn scope_matches_by_segment_not_substring() {
289        assert!(scope_matches("app::domain::handle", "domain"));
290        assert!(scope_matches("domain::handle", "domain"));
291        assert!(scope_matches("app::domain", "domain"));
292        assert!(scope_matches("crate::domain_logic", "domain"));
293        assert!(!scope_matches("app::subdomain::handle", "domain"));
294        assert!(!scope_matches("app::not_my_domain::f", "domain"));
295        // multi-segment: intermediates exact, last is a prefix, contiguous.
296        assert!(scope_matches("crate::net::client::send", "net::client"));
297        assert!(scope_matches("crate::net::client_pool::get", "net::client"));
298        assert!(!scope_matches("crate::net::server::send", "net::client"));
299        assert!(!scope_matches("crate::network::client::send", "net::client"));
300        assert!(!scope_matches("crate::net::x::client", "net::client"));
301        assert!(!scope_matches("net", "net::client"));
302    }
303
304    #[test]
305    fn fs_path_covered_respects_boundaries() {
306        assert!(fs_path_covered("/etc/app", "/etc/app"));
307        assert!(fs_path_covered("/etc/app", "/etc/app/cfg.toml"));
308        assert!(fs_path_covered("/etc/app/", "/etc/app/cfg"));
309        assert!(!fs_path_covered("/etc/app", "/etc/apppwned"));
310        assert!(!fs_path_covered("/etc/app", "/etc/application/x"));
311        assert!(!fs_path_covered("/etc/app/cfg", "/etc/app"));
312        assert!(!fs_path_covered("/etc/app", "/etc/app/../passwd"));
313        assert!(fs_path_covered("/", "/etc/app/x"));
314        assert!(!fs_path_covered("etc/app", "/etc/app/cfg"));
315        assert!(!fs_path_covered("/etc/app", "etc/app/cfg"));
316        assert!(fs_path_covered("etc/app", "etc/app/cfg"));
317    }
318}