Skip to main content

candor_classify/
policy.rs

1//! The canonical CANDOR_POLICY DSL parser (candor-spec SPEC §6.2).
2//!
3//! This is the **single** Rust implementation of the policy grammar — shared by the nightly dylint
4//! gate (`src/lib.rs`, AS-EFF-006/008/009) and the stable `candor-query` (`whatif`, and the
5//! `parsepolicy` dump the cross-impl conformance suite diffs against the JVM engine). Keeping one
6//! parser here is what makes "the gate means the same thing in every language" a fact rather than a
7//! hope: the Rust gate, the Rust pre-edit tool, and the cross-impl differential all read THIS code.
8//!
9//! Pure, stable Rust (string parsing only — no rustc types), so it lives beside the classifier.
10
11use crate::cap_from_name;
12use std::collections::BTreeSet;
13
14/// The honesty marker (SPEC §4). Denyable so `deny Unknown <scope>` forbids the *unverifiable* case.
15pub const UNKNOWN: &str = "Unknown";
16
17/// One `deny <Effect…> [scope]` / `pure <scope>` rule (AS-EFF-006). `effects` empty ⇒ a `pure` rule
18/// (ANY effect forbidden). `scope` is a path segment-scope the rule applies to (None = whole unit).
19#[derive(Debug, Clone)]
20pub struct PolicyRule {
21    pub effects: BTreeSet<&'static str>,
22    pub scope: Option<String>,
23    pub raw: String,
24}
25
26/// One `allow <Effect> [in <scope>] <literal>…` rule (AS-EFF-008). The effect is one of the three
27/// that carry a literal surface (`Net`/`Exec`/`Fs`); a function in `scope` performing it may reach
28/// ONLY the listed literals. Matching is effect-specific (`literal_allowed`).
29#[derive(Debug, Clone)]
30pub struct AllowRule {
31    pub effect: &'static str,
32    pub scope: Option<String>,
33    pub literals: BTreeSet<String>,
34    pub raw: String,
35}
36
37/// One `forbid <A> -> <B>` module-layering rule (AS-EFF-009): a function in scope `A` must not
38/// transitively call into scope `B`.
39#[derive(Debug, Clone)]
40pub struct LayerRule {
41    pub from: String,
42    pub to: String,
43    pub raw: String,
44}
45
46/// The rule kinds parsed from a CANDOR_POLICY file.
47#[derive(Default, Debug)]
48pub struct ParsedPolicy {
49    pub rules: Vec<PolicyRule>,
50    pub allow_rules: Vec<AllowRule>,
51    pub layer_rules: Vec<LayerRule>,
52}
53
54/// The hostname part of a `host[:port]` literal, port stripped — so `api.stripe.com` in a rule accepts
55/// a reached `api.stripe.com:443`. IPv6-aware: a bracketed `[host]:port` yields the bracketed host, and
56/// a BARE IPv6 literal (>1 colon, no brackets) has no port to strip and is returned whole — a naive
57/// first-colon split collapsed every `2001:db8::*` to `2001`, so one allowed IPv6 accepted any address
58/// in that block (/code-review). A hostname/IPv4 `host` or `host:port` (≤1 colon) splits at the colon.
59pub fn host_part(h: &str) -> &str {
60    if let Some(rest) = h.strip_prefix('[') {
61        // `[ipv6]` or `[ipv6]:port` — the host is between the brackets.
62        return rest.split(']').next().unwrap_or(rest);
63    }
64    if h.matches(':').count() > 1 {
65        return h; // bare IPv6 literal — no port suffix to strip
66    }
67    h.split(':').next().unwrap_or(h)
68}
69
70/// The basename of a command (`/usr/bin/git` → `git`), so `allow Exec … git` accepts an absolute path.
71pub fn cmd_base(c: &str) -> &str {
72    c.rsplit(['/', '\\']).next().unwrap_or(c)
73}
74
75/// Whether an allowed path `a` covers a reached path `r` (SPEC §6.2: path-boundary-respecting prefix).
76/// A directory covers itself and everything beneath it, but NOT a sibling sharing a textual prefix
77/// (`/etc/app` ⊉ `/etc/apppwned`); a `..` that climbs out is never covered; absolute/relative are
78/// never conflated.
79pub fn fs_path_covered(a: &str, r: &str) -> bool {
80    if r.split(['/', '\\']).any(|c| c == "..") {
81        return false;
82    }
83    let absolute = |s: &str| s.starts_with('/') || s.starts_with('\\');
84    if absolute(a) != absolute(r) {
85        return false;
86    }
87    let norm = |s: &str| -> Vec<String> {
88        s.split(['/', '\\'])
89            .filter(|c| !c.is_empty() && *c != ".")
90            .map(|c| c.to_string())
91            .collect()
92    };
93    let (ac, rc) = (norm(a), norm(r));
94    ac.len() <= rc.len() && ac.iter().zip(&rc).all(|(x, y)| x == y)
95}
96
97/// Whether a reached literal is allowed under an effect-specific match (SPEC §6.2): `Net` host by
98/// name (port ignored), `Exec` command by basename, `Fs` path by boundary-respecting prefix.
99pub fn literal_allowed(effect: &str, reached: &str, allow: &BTreeSet<String>) -> bool {
100    match effect {
101        "Net" => allow.iter().any(|a| host_part(a) == host_part(reached)),
102        "Exec" => allow.iter().any(|a| cmd_base(a) == cmd_base(reached)),
103        "Fs" => allow.iter().any(|a| fs_path_covered(a, reached)),
104        _ => allow.contains(reached),
105    }
106}
107
108/// A policy scope matches a function name by **path segment** (SPEC §6.2), not substring: split both
109/// on `::`; the scope matches a contiguous run of name-segments where every segment except the last
110/// matches exactly and the last is a prefix. So `domain` matches `app::domain::h` and `domain_logic`
111/// but not `subdomain`. (Used directly by the Rust gate; the JVM engine mirrors it over `.`.)
112pub fn scope_matches(name: &str, scope: &str) -> bool {
113    let segs: Vec<&str> = name.split("::").collect();
114    let parts: Vec<&str> = scope.split("::").collect();
115    if parts.is_empty() || parts.len() > segs.len() {
116        return false;
117    }
118    let (last, init) = parts.split_last().unwrap();
119    segs.windows(parts.len()).any(|w| {
120        let (w_last, w_init) = w.split_last().unwrap();
121        w_init == init && w_last.starts_with(last)
122    })
123}
124
125/// Parse a CANDOR_POLICY file (SPEC §6.2). One rule per line; `#` comments and blanks ignored:
126///
127/// ```text
128/// deny Net Db  domain     # functions whose path contains segment "domain" must not perform Net or Db
129/// deny Exec               # no function anywhere may perform Exec
130/// deny Unknown  api        # functions in "api" must be fully resolvable (forbid the unverifiable)
131/// pure         parse      # functions whose path contains segment "parse" must be effect-free
132/// allow Net in billing  api.stripe.com
133/// forbid domain -> infra
134/// ```
135///
136/// In a `deny` rule, leading tokens that name a known effect (or `Unknown`) are forbidden; the FIRST
137/// non-effect token is the scope and ends the rule. A `deny` naming no known effect is dropped (it is
138/// NOT a `pure` rule). Malformed/unknown lines are ignored with a warning — never silently widened.
139pub fn parse_policy(text: &str) -> ParsedPolicy {
140    let mut out = ParsedPolicy::default();
141    for raw_line in text.lines() {
142        let line = raw_line.split('#').next().unwrap_or("").trim();
143        if line.is_empty() {
144            continue;
145        }
146        let mut toks = line.split_whitespace();
147        match toks.next().unwrap_or("") {
148            "allow" => {
149                let effect = match toks.next().unwrap_or("") {
150                    "Net" => "Net",
151                    "Exec" => "Exec",
152                    "Fs" => "Fs",
153                    _ => {
154                        eprintln!(
155                            "candor: ignoring policy rule (allow supports only Net hosts / Exec commands / Fs paths): {line}"
156                        );
157                        continue;
158                    }
159                };
160                let mut rest: Vec<&str> = toks.collect();
161                let scope = if rest.first() == Some(&"in") {
162                    let s = rest.get(1).map(|s| s.to_string());
163                    rest.drain(..2.min(rest.len()));
164                    s
165                } else {
166                    None
167                };
168                let literals: BTreeSet<String> = rest.iter().map(|h| h.to_string()).collect();
169                if literals.is_empty() {
170                    eprintln!("candor: ignoring policy rule (allow {effect} names no values): {line}");
171                    continue;
172                }
173                out.allow_rules.push(AllowRule { effect, scope, literals, raw: line.to_string() });
174            }
175            "deny" => {
176                let mut effects = BTreeSet::new();
177                let mut scope = None;
178                for t in toks {
179                    let e = if t == UNKNOWN { Some(UNKNOWN) } else { cap_from_name(t) };
180                    match e {
181                        Some(e) => {
182                            effects.insert(e);
183                        }
184                        None => {
185                            scope = Some(t.to_string());
186                            break;
187                        }
188                    }
189                }
190                if effects.is_empty() {
191                    eprintln!("candor: ignoring policy rule (no known effect named): {line}");
192                    continue;
193                }
194                out.rules.push(PolicyRule { effects, scope, raw: line.to_string() });
195            }
196            "pure" => out.rules.push(PolicyRule {
197                effects: BTreeSet::new(),
198                scope: toks.next().map(str::to_string),
199                raw: line.to_string(),
200            }),
201            "forbid" => {
202                let a = toks.next().unwrap_or("");
203                let arrow = toks.next().unwrap_or("");
204                let b = toks.next().unwrap_or("");
205                if a.is_empty() || arrow != "->" || b.is_empty() {
206                    eprintln!("candor: ignoring layering rule (want `forbid <scope> -> <scope>`): {line}");
207                    continue;
208                }
209                out.layer_rules.push(LayerRule {
210                    from: a.to_string(),
211                    to: b.to_string(),
212                    raw: line.to_string(),
213                });
214            }
215            other => eprintln!("candor: ignoring policy rule (unknown kind `{other}`): {line}"),
216        }
217    }
218    out
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn policy_parses() {
227        let p = parse_policy(
228            "# the domain layer must stay pure of I/O\n\
229             deny Net Db  domain\n\
230             deny Exec\n\
231             pure  parse\n\
232             nonsense line\n\
233             deny notaneffect\n",
234        );
235        let rules = &p.rules;
236        assert_eq!(rules.len(), 3);
237        assert_eq!(rules[0].effects, ["Db", "Net"].into_iter().collect::<BTreeSet<_>>());
238        assert_eq!(rules[0].scope.as_deref(), Some("domain"));
239        assert!(rules[1].effects.contains("Exec") && rules[1].scope.is_none());
240        assert!(rules[2].effects.is_empty() && rules[2].scope.as_deref() == Some("parse"));
241        // `Unknown` is a denyable token; a bare `deny` with no effect is ignored.
242        assert_eq!(parse_policy("deny Unknown core").rules[0].effects, ["Unknown"].into_iter().collect());
243        assert!(parse_policy("deny\ndeny   \n").rules.is_empty());
244        // a `deny` whose first token is a non-effect names no effect -> dropped, NOT a pure rule.
245        assert!(parse_policy("deny notaneffect scope").rules.is_empty());
246        // the first non-effect token ENDS the rule: a later effect token is not collected.
247        let p2 = parse_policy("deny Net foo Db");
248        assert_eq!(p2.rules[0].effects, ["Net"].into_iter().collect::<BTreeSet<_>>());
249        assert_eq!(p2.rules[0].scope.as_deref(), Some("foo"));
250    }
251
252    #[test]
253    fn allowlist_parses() {
254        let p = parse_policy(
255            "allow Net in billing  api.stripe.com  hooks.stripe.com\n\
256             allow Exec in ci  git\n\
257             allow Fs in config  /etc/app\n\
258             allow Net  github.com\n\
259             allow Db  whatever\n\
260             allow Net in nohosts\n\
261             allow\n",
262        );
263        assert_eq!(p.allow_rules.len(), 4);
264        assert_eq!((p.allow_rules[0].effect, p.allow_rules[0].scope.as_deref()), ("Net", Some("billing")));
265        assert_eq!(
266            p.allow_rules[0].literals,
267            ["api.stripe.com", "hooks.stripe.com"].iter().map(|s| s.to_string()).collect()
268        );
269        assert_eq!((p.allow_rules[1].effect, p.allow_rules[1].scope.as_deref()), ("Exec", Some("ci")));
270        assert!(p.allow_rules[1].literals.contains("git"));
271        assert_eq!((p.allow_rules[2].effect, p.allow_rules[2].scope.as_deref()), ("Fs", Some("config")));
272        assert_eq!((p.allow_rules[3].effect, p.allow_rules[3].scope.is_none()), ("Net", true));
273
274        let set = |xs: &[&str]| xs.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>();
275        assert!(literal_allowed("Net", "api.stripe.com:443", &set(&["api.stripe.com"])));
276        // IPv6: a bare literal is matched WHOLE (no first-colon collapse), so a different address in the
277        // same block is NOT accepted; a bracketed `[host]:port` matches the bare host. (/code-review.)
278        assert!(literal_allowed("Net", "2001:db8::aa", &set(&["2001:db8::aa"])));
279        assert!(!literal_allowed("Net", "2001:db8::ff", &set(&["2001:db8::aa"])));
280        assert!(!literal_allowed("Net", "2001:dead::1", &set(&["2001:db8::aa"])));
281        assert!(literal_allowed("Net", "[2001:db8::aa]:443", &set(&["2001:db8::aa"])));
282        assert_eq!(host_part("2001:db8::aa"), "2001:db8::aa");
283        assert_eq!(host_part("[2001:db8::aa]:443"), "2001:db8::aa");
284        assert_eq!(host_part("api.stripe.com:443"), "api.stripe.com");
285        assert!(literal_allowed("Exec", "/usr/bin/git", &set(&["git"])));
286        assert!(!literal_allowed("Exec", "/usr/bin/curl", &set(&["git"])));
287        assert!(literal_allowed("Fs", "/etc/app/conf.toml", &set(&["/etc/app"])));
288        assert!(!literal_allowed("Fs", "/etc/shadow", &set(&["/etc/app"])));
289        assert_eq!(cmd_base("/usr/bin/git"), "git");
290    }
291
292    #[test]
293    fn layering_rule_parses() {
294        let p = parse_policy(
295            "forbid domain -> infra\n\
296             forbid  app::web  ->  app::db \n\
297             forbid domain infra\n\
298             forbid domain ->\n\
299             forbid\n",
300        );
301        assert_eq!(p.layer_rules.len(), 2);
302        assert_eq!((p.layer_rules[0].from.as_str(), p.layer_rules[0].to.as_str()), ("domain", "infra"));
303        assert_eq!((p.layer_rules[1].from.as_str(), p.layer_rules[1].to.as_str()), ("app::web", "app::db"));
304    }
305
306    #[test]
307    fn scope_matches_by_segment_not_substring() {
308        assert!(scope_matches("app::domain::handle", "domain"));
309        assert!(scope_matches("domain::handle", "domain"));
310        assert!(scope_matches("app::domain", "domain"));
311        assert!(scope_matches("crate::domain_logic", "domain"));
312        assert!(!scope_matches("app::subdomain::handle", "domain"));
313        assert!(!scope_matches("app::not_my_domain::f", "domain"));
314        // multi-segment: intermediates exact, last is a prefix, contiguous.
315        assert!(scope_matches("crate::net::client::send", "net::client"));
316        assert!(scope_matches("crate::net::client_pool::get", "net::client"));
317        assert!(!scope_matches("crate::net::server::send", "net::client"));
318        assert!(!scope_matches("crate::network::client::send", "net::client"));
319        assert!(!scope_matches("crate::net::x::client", "net::client"));
320        assert!(!scope_matches("net", "net::client"));
321    }
322
323    #[test]
324    fn fs_path_covered_respects_boundaries() {
325        assert!(fs_path_covered("/etc/app", "/etc/app"));
326        assert!(fs_path_covered("/etc/app", "/etc/app/cfg.toml"));
327        assert!(fs_path_covered("/etc/app/", "/etc/app/cfg"));
328        assert!(!fs_path_covered("/etc/app", "/etc/apppwned"));
329        assert!(!fs_path_covered("/etc/app", "/etc/application/x"));
330        assert!(!fs_path_covered("/etc/app/cfg", "/etc/app"));
331        assert!(!fs_path_covered("/etc/app", "/etc/app/../passwd"));
332        assert!(fs_path_covered("/", "/etc/app/x"));
333        assert!(!fs_path_covered("etc/app", "/etc/app/cfg"));
334        assert!(!fs_path_covered("/etc/app", "etc/app/cfg"));
335        assert!(fs_path_covered("etc/app", "etc/app/cfg"));
336    }
337}