Skip to main content

candor_classify/
policy.rs

1//! The canonical CANDOR_POLICY DSL parser (candor-spec SPEC §6.2).
2//!
3//! This is the **single** Rust implementation of the policy grammar — shared by the nightly dylint
4//! gate (`src/lib.rs`, AS-EFF-006/008/009) and the stable `candor-query` (`whatif`, and the
5//! `parsepolicy` dump the cross-impl conformance suite diffs against the JVM engine). Keeping one
6//! parser here is what makes "the gate means the same thing in every language" a fact rather than a
7//! hope: the Rust gate, the Rust pre-edit tool, and the cross-impl differential all read THIS code.
8//!
9//! Pure, stable Rust (string parsing only — no rustc types), so it lives beside the classifier.
10
11use crate::cap_from_name;
12use std::collections::BTreeSet;
13
14/// The honesty marker (SPEC §4). Denyable so `deny Unknown <scope>` forbids the *unverifiable* case.
15pub const UNKNOWN: &str = "Unknown";
16
17/// One `deny <Effect…> [scope]` / `pure <scope>` rule (AS-EFF-006). `effects` empty ⇒ a `pure` rule
18/// (ANY effect forbidden). `scope` is a path segment-scope the rule applies to (None = whole unit).
19#[derive(Debug, Clone)]
20pub struct PolicyRule {
21    pub effects: BTreeSet<&'static str>,
22    pub scope: Option<String>,
23    pub raw: String,
24}
25
26/// One `allow <Effect> [in <scope>] <literal>…` rule (AS-EFF-008). The effect is one of the four
27/// that carry a literal surface (`Net` hosts / `Exec` commands / `Fs` paths / `Db` tables); a
28/// function in `scope` performing it may reach ONLY the listed literals. Matching is
29/// effect-specific (`literal_allowed`).
30#[derive(Debug, Clone)]
31pub struct AllowRule {
32    pub effect: &'static str,
33    pub scope: Option<String>,
34    pub literals: BTreeSet<String>,
35    pub raw: String,
36}
37
38/// One `forbid <A> -> <B>` module-layering rule (AS-EFF-009): a function in scope `A` must not
39/// transitively call into scope `B`.
40#[derive(Debug, Clone)]
41pub struct LayerRule {
42    pub from: String,
43    pub to: String,
44    pub raw: String,
45}
46
47/// The rule kinds parsed from a CANDOR_POLICY file.
48#[derive(Default, Debug)]
49pub struct ParsedPolicy {
50    pub rules: Vec<PolicyRule>,
51    pub allow_rules: Vec<AllowRule>,
52    pub layer_rules: Vec<LayerRule>,
53}
54
55/// The hostname part of a `host[:port]` literal, port stripped — so `api.stripe.com` in a rule accepts
56/// a reached `api.stripe.com:443`. IPv6-aware: a bracketed `[host]:port` yields the bracketed host, and
57/// a BARE IPv6 literal (>1 colon, no brackets) has no port to strip and is returned whole — a naive
58/// first-colon split collapsed every `2001:db8::*` to `2001`, so one allowed IPv6 accepted any address
59/// in that block (/code-review). A hostname/IPv4 `host` or `host:port` (≤1 colon) splits at the colon.
60pub fn host_part(h: &str) -> &str {
61    if let Some(rest) = h.strip_prefix('[') {
62        // `[ipv6]` or `[ipv6]:port` — the host is between the brackets.
63        return rest.split(']').next().unwrap_or(rest);
64    }
65    if h.matches(':').count() > 1 {
66        return h; // bare IPv6 literal — no port suffix to strip
67    }
68    h.split(':').next().unwrap_or(h)
69}
70
71/// The basename of a command (`/usr/bin/git` → `git`), so `allow Exec … git` accepts an absolute path.
72pub fn cmd_base(c: &str) -> &str {
73    c.rsplit(['/', '\\']).next().unwrap_or(c)
74}
75
76/// Whether an allowed path `a` covers a reached path `r` (SPEC §6.2: path-boundary-respecting prefix).
77/// A directory covers itself and everything beneath it, but NOT a sibling sharing a textual prefix
78/// (`/etc/app` ⊉ `/etc/apppwned`); a `..` that climbs out is never covered; absolute/relative are
79/// never conflated.
80pub fn fs_path_covered(a: &str, r: &str) -> bool {
81    if r.split(['/', '\\']).any(|c| c == "..") {
82        return false;
83    }
84    let absolute = |s: &str| s.starts_with('/') || s.starts_with('\\');
85    if absolute(a) != absolute(r) {
86        return false;
87    }
88    let norm = |s: &str| -> Vec<String> {
89        s.split(['/', '\\'])
90            .filter(|c| !c.is_empty() && *c != ".")
91            .map(|c| c.to_string())
92            .collect()
93    };
94    let (ac, rc) = (norm(a), norm(r));
95    ac.len() <= rc.len() && ac.iter().zip(&rc).all(|(x, y)| x == y)
96}
97
98/// Whether an allowed table entry `a` covers a reached table `r` (SPEC §6.2): case-insensitive
99/// exact match on the (possibly schema-qualified) name, or a `schema.*` entry covering every table
100/// in that schema. Strict on qualification — an allowed `entries` does NOT cover a reached
101/// `ledger.entries` (write both forms if your queries mix them); silent widening is the failure
102/// mode an allowlist exists to prevent.
103pub fn db_table_covered(a: &str, r: &str) -> bool {
104    let (a, r) = (a.to_lowercase(), r.to_lowercase());
105    if let Some(schema) = a.strip_suffix(".*") {
106        return r.strip_prefix(schema).is_some_and(|rest| rest.starts_with('.'));
107    }
108    a == r
109}
110
111/// Whether a reached literal is allowed under an effect-specific match (SPEC §6.2): `Net` host by
112/// name (port ignored), `Exec` command by basename, `Fs` path by boundary-respecting prefix,
113/// `Db` table by qualified name or `schema.*`.
114pub fn literal_allowed(effect: &str, reached: &str, allow: &BTreeSet<String>) -> bool {
115    match effect {
116        "Net" => allow.iter().any(|a| host_part(a) == host_part(reached)),
117        "Exec" => allow.iter().any(|a| cmd_base(a) == cmd_base(reached)),
118        "Fs" => allow.iter().any(|a| fs_path_covered(a, reached)),
119        "Db" => allow.iter().any(|a| db_table_covered(a, reached)),
120        _ => allow.contains(reached),
121    }
122}
123
124/// Split a function name (or scope) into PATH SEGMENTS on either separator. Reports reach the Rust gate
125/// AND `candor-query` from BOTH the Rust engines (`::`-separated names) and the JVM/Swift/TS engines
126/// (`.`-separated names — `candor-query` is explicitly built to read them). Segmenting on `::` ALONE
127/// left a scoped `deny`/`pure` rule silently INERT on a dotted name: the scope matched nothing, so
128/// `whatif` returned a false green on the security boundary (gate-evasion). The JVM engine's own
129/// `scopeMatches` already splits on `.`; this aligns the Rust side. A `:`/`.` never appears WITHIN a
130/// real segment, so splitting on both never over-segments a Rust name (no spurious match).
131fn name_segments(s: &str) -> Vec<&str> {
132    s.split(['.', ':']).filter(|p| !p.is_empty()).collect()
133}
134
135/// A policy scope matches a function name by **path segment** (SPEC §6.2), not substring: split both
136/// into segments (on `::` or `.`); the scope matches a contiguous run of name-segments where every
137/// segment except the last matches exactly and the last is a prefix. So `domain` matches
138/// `app::domain::h`, `com.acme.domain.h`, and `domain_logic` but not `subdomain`.
139pub fn scope_matches(name: &str, scope: &str) -> bool {
140    let segs = name_segments(name);
141    let parts = name_segments(scope);
142    if parts.is_empty() || parts.len() > segs.len() {
143        return false;
144    }
145    let (last, init) = parts.split_last().unwrap();
146    segs.windows(parts.len()).any(|w| {
147        let (w_last, w_init) = w.split_last().unwrap();
148        w_init == init && w_last.starts_with(last)
149    })
150}
151
152/// Parse a CANDOR_POLICY file (SPEC §6.2). One rule per line; `#` comments and blanks ignored:
153///
154/// ```text
155/// deny Net Db  domain     # functions whose path contains segment "domain" must not perform Net or Db
156/// deny Exec               # no function anywhere may perform Exec
157/// deny Unknown  api        # functions in "api" must be fully resolvable (forbid the unverifiable)
158/// pure         parse      # functions whose path contains segment "parse" must be effect-free
159/// allow Net in billing  api.stripe.com
160/// forbid domain -> infra
161/// ```
162///
163/// In a `deny` rule, leading tokens that name a known effect (or `Unknown`) are forbidden; the FIRST
164/// non-effect token is the scope and ends the rule. A `deny` naming no known effect is dropped (it is
165/// NOT a `pure` rule). Malformed/unknown lines are ignored with a warning — never silently widened.
166/// The §6.2 token separator: ASCII whitespace ONLY (space/tab/CR/LF/VT/FF). `split_whitespace`/`trim`
167/// use Unicode `White_Space`, which would split a NBSP/ideographic space that Java drops — a gateless-
168/// green cross-engine divergence (adversarial DSL review). A non-ASCII space stays part of its token, so
169/// the rule is malformed and ignored, uniformly.
170fn is_ascii_ws(c: char) -> bool {
171    matches!(c, ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r')
172}
173
174pub fn parse_policy(text: &str) -> ParsedPolicy {
175    let mut out = ParsedPolicy::default();
176    // `str::lines()` splits on \n and \r\n but NOT bare \r — a classic-Mac file then collapses to ONE
177    // line, and since \r is also an in-line ASCII-ws token separator (is_ascii_ws), every rule after the
178    // first was glued into the first rule's tokens and dropped (sweep [16], a gateless-green divergence).
179    // Java's Files.readAllLines (the reference) breaks on bare \r too — normalize to match it. Allocation
180    // only when a bare \r is actually present (the overwhelmingly-common \n / \r\n files are untouched).
181    let normalized;
182    let text = if text.contains('\r') {
183        normalized = text.replace("\r\n", "\n").replace('\r', "\n");
184        normalized.as_str()
185    } else {
186        text
187    };
188    for raw_line in text.lines() {
189        let line = raw_line.split('#').next().unwrap_or("").trim_matches(is_ascii_ws);
190        if line.is_empty() {
191            continue;
192        }
193        let mut toks = line.split(is_ascii_ws).filter(|s| !s.is_empty());
194        match toks.next().unwrap_or("") {
195            "allow" => {
196                let effect = match toks.next().unwrap_or("") {
197                    "Net" => "Net",
198                    "Exec" => "Exec",
199                    "Fs" => "Fs",
200                    "Db" => "Db",
201                    _ => {
202                        eprintln!(
203                            "candor: ignoring policy rule (allow supports only Net hosts / Exec commands / Fs paths / Db tables): {line}"
204                        );
205                        continue;
206                    }
207                };
208                let mut rest: Vec<&str> = toks.collect();
209                let scope = if rest.first() == Some(&"in") {
210                    let s = rest.get(1).map(|s| s.to_string());
211                    rest.drain(..2.min(rest.len()));
212                    s
213                } else {
214                    None
215                };
216                let literals: BTreeSet<String> = rest.iter().map(|h| h.to_string()).collect();
217                if literals.is_empty() {
218                    eprintln!("candor: ignoring policy rule (allow {effect} names no values): {line}");
219                    continue;
220                }
221                out.allow_rules.push(AllowRule { effect, scope, literals, raw: line.to_string() });
222            }
223            "deny" => {
224                let mut effects = BTreeSet::new();
225                let mut scope = None;
226                for t in toks {
227                    let e = if t == UNKNOWN { Some(UNKNOWN) } else { cap_from_name(t) };
228                    match e {
229                        Some(e) => {
230                            effects.insert(e);
231                        }
232                        None => {
233                            scope = Some(t.to_string());
234                            break;
235                        }
236                    }
237                }
238                if effects.is_empty() {
239                    eprintln!("candor: ignoring policy rule (no known effect named): {line}");
240                    continue;
241                }
242                out.rules.push(PolicyRule { effects, scope, raw: line.to_string() });
243            }
244            "pure" => out.rules.push(PolicyRule {
245                effects: BTreeSet::new(),
246                scope: toks.next().map(str::to_string),
247                raw: line.to_string(),
248            }),
249            "forbid" => {
250                let a = toks.next().unwrap_or("");
251                let arrow = toks.next().unwrap_or("");
252                let b = toks.next().unwrap_or("");
253                if a.is_empty() || arrow != "->" || b.is_empty() {
254                    eprintln!("candor: ignoring layering rule (want `forbid <scope> -> <scope>`): {line}");
255                    continue;
256                }
257                out.layer_rules.push(LayerRule {
258                    from: a.to_string(),
259                    to: b.to_string(),
260                    raw: line.to_string(),
261                });
262            }
263            other => eprintln!("candor: ignoring policy rule (unknown kind `{other}`): {line}"),
264        }
265    }
266    out
267}
268
269#[cfg(test)]
270mod tests {
271    #[test]
272    fn db_table_covering_is_strict() {
273        use super::db_table_covered as c;
274        assert!(c("ledger.entries", "Ledger.Entries")); // case-insensitive exact
275        assert!(c("ledger.*", "ledger.entries"));       // schema wildcard
276        assert!(!c("ledger.*", "ledgerx.entries"));     // boundary-respecting
277        assert!(!c("entries", "ledger.entries"));       // no silent qualification widening
278        assert!(c("entries", "entries"));
279    }
280
281    #[test]
282    fn allow_db_parses_and_gates() {
283        let p = super::parse_policy("allow Db in billing  ledger.* customers\n");
284        assert_eq!(p.allow_rules.len(), 1);
285        assert_eq!(p.allow_rules[0].effect, "Db");
286        assert!(super::literal_allowed("Db", "ledger.entries", &p.allow_rules[0].literals));
287        assert!(super::literal_allowed("Db", "customers", &p.allow_rules[0].literals));
288        assert!(!super::literal_allowed("Db", "audit.log", &p.allow_rules[0].literals));
289    }
290
291    use super::*;
292
293    #[test]
294    fn policy_parses() {
295        let p = parse_policy(
296            "# the domain layer must stay pure of I/O\n\
297             deny Net Db  domain\n\
298             deny Exec\n\
299             pure  parse\n\
300             nonsense line\n\
301             deny notaneffect\n",
302        );
303        let rules = &p.rules;
304        assert_eq!(rules.len(), 3);
305        assert_eq!(rules[0].effects, ["Db", "Net"].into_iter().collect::<BTreeSet<_>>());
306        assert_eq!(rules[0].scope.as_deref(), Some("domain"));
307        assert!(rules[1].effects.contains("Exec") && rules[1].scope.is_none());
308        assert!(rules[2].effects.is_empty() && rules[2].scope.as_deref() == Some("parse"));
309        // sweep [16]: a classic-Mac (bare \r) multi-rule policy must NOT collapse to the first rule.
310        let cr = parse_policy("deny Net a\rdeny Exec b\rdeny Db c\r");
311        assert_eq!(cr.rules.len(), 3, "bare-CR lines must each parse");
312        assert!(cr.rules.iter().any(|r| r.effects.contains("Exec") && r.scope.as_deref() == Some("b")));
313        // mixed \r\n and bare \r normalize identically.
314        assert_eq!(parse_policy("deny Net a\r\ndeny Exec b\r").rules.len(), 2);
315        // `Unknown` is a denyable token; a bare `deny` with no effect is ignored.
316        assert_eq!(parse_policy("deny Unknown core").rules[0].effects, ["Unknown"].into_iter().collect());
317        assert!(parse_policy("deny\ndeny   \n").rules.is_empty());
318        // a `deny` whose first token is a non-effect names no effect -> dropped, NOT a pure rule.
319        assert!(parse_policy("deny notaneffect scope").rules.is_empty());
320        // the first non-effect token ENDS the rule: a later effect token is not collected.
321        let p2 = parse_policy("deny Net foo Db");
322        assert_eq!(p2.rules[0].effects, ["Net"].into_iter().collect::<BTreeSet<_>>());
323        assert_eq!(p2.rules[0].scope.as_deref(), Some("foo"));
324    }
325
326    #[test]
327    fn allowlist_parses() {
328        let p = parse_policy(
329            "allow Net in billing  api.stripe.com  hooks.stripe.com\n\
330             allow Exec in ci  git\n\
331             allow Fs in config  /etc/app\n\
332             allow Net  github.com\n\
333             allow Clock  whatever\n\
334             allow Net in nohosts\n\
335             allow\n",
336        );
337        assert_eq!(p.allow_rules.len(), 4); // Clock carries no literal surface — rejected; Db now does
338        assert_eq!((p.allow_rules[0].effect, p.allow_rules[0].scope.as_deref()), ("Net", Some("billing")));
339        assert_eq!(
340            p.allow_rules[0].literals,
341            ["api.stripe.com", "hooks.stripe.com"].iter().map(|s| s.to_string()).collect()
342        );
343        assert_eq!((p.allow_rules[1].effect, p.allow_rules[1].scope.as_deref()), ("Exec", Some("ci")));
344        assert!(p.allow_rules[1].literals.contains("git"));
345        assert_eq!((p.allow_rules[2].effect, p.allow_rules[2].scope.as_deref()), ("Fs", Some("config")));
346        assert_eq!((p.allow_rules[3].effect, p.allow_rules[3].scope.is_none()), ("Net", true));
347
348        let set = |xs: &[&str]| xs.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>();
349        assert!(literal_allowed("Net", "api.stripe.com:443", &set(&["api.stripe.com"])));
350        // IPv6: a bare literal is matched WHOLE (no first-colon collapse), so a different address in the
351        // same block is NOT accepted; a bracketed `[host]:port` matches the bare host. (/code-review.)
352        assert!(literal_allowed("Net", "2001:db8::aa", &set(&["2001:db8::aa"])));
353        assert!(!literal_allowed("Net", "2001:db8::ff", &set(&["2001:db8::aa"])));
354        assert!(!literal_allowed("Net", "2001:dead::1", &set(&["2001:db8::aa"])));
355        assert!(literal_allowed("Net", "[2001:db8::aa]:443", &set(&["2001:db8::aa"])));
356        assert_eq!(host_part("2001:db8::aa"), "2001:db8::aa");
357        assert_eq!(host_part("[2001:db8::aa]:443"), "2001:db8::aa");
358        assert_eq!(host_part("api.stripe.com:443"), "api.stripe.com");
359        assert!(literal_allowed("Exec", "/usr/bin/git", &set(&["git"])));
360        assert!(!literal_allowed("Exec", "/usr/bin/curl", &set(&["git"])));
361        assert!(literal_allowed("Fs", "/etc/app/conf.toml", &set(&["/etc/app"])));
362        assert!(!literal_allowed("Fs", "/etc/shadow", &set(&["/etc/app"])));
363        assert_eq!(cmd_base("/usr/bin/git"), "git");
364    }
365
366    #[test]
367    fn layering_rule_parses() {
368        let p = parse_policy(
369            "forbid domain -> infra\n\
370             forbid  app::web  ->  app::db \n\
371             forbid domain infra\n\
372             forbid domain ->\n\
373             forbid\n",
374        );
375        assert_eq!(p.layer_rules.len(), 2);
376        assert_eq!((p.layer_rules[0].from.as_str(), p.layer_rules[0].to.as_str()), ("domain", "infra"));
377        assert_eq!((p.layer_rules[1].from.as_str(), p.layer_rules[1].to.as_str()), ("app::web", "app::db"));
378    }
379
380    #[test]
381    fn scope_matches_by_segment_not_substring() {
382        assert!(scope_matches("app::domain::handle", "domain"));
383        assert!(scope_matches("domain::handle", "domain"));
384        assert!(scope_matches("app::domain", "domain"));
385        assert!(scope_matches("crate::domain_logic", "domain"));
386        assert!(!scope_matches("app::subdomain::handle", "domain"));
387        assert!(!scope_matches("app::not_my_domain::f", "domain"));
388        // multi-segment: intermediates exact, last is a prefix, contiguous.
389        assert!(scope_matches("crate::net::client::send", "net::client"));
390        assert!(scope_matches("crate::net::client_pool::get", "net::client"));
391        assert!(!scope_matches("crate::net::server::send", "net::client"));
392        assert!(!scope_matches("crate::network::client::send", "net::client"));
393        assert!(!scope_matches("crate::net::x::client", "net::client"));
394        assert!(!scope_matches("net", "net::client"));
395        // DOTTED names (JVM/Swift/TS reports `candor-query` consumes): a scope must match across `.` too,
396        // else a scoped deny/pure rule is silently inert → whatif false-green (gate-evasion). Both a
397        // `.`-written and a `::`-written scope must match a dotted name.
398        assert!(scope_matches("com.acme.domain.Pricing.quote", "domain"));
399        assert!(scope_matches("com.acme.domain.Pricing.quote", "acme.domain"));
400        assert!(scope_matches("com.acme.domain.Pricing.quote", "acme::domain"));
401        assert!(scope_matches("com.acme.infra.Net.fetch", "infra.Net"));
402        assert!(!scope_matches("com.acme.subdomain.h", "domain"));
403        assert!(!scope_matches("com.acme.domain.h", "infra"));
404    }
405
406    #[test]
407    fn fs_path_covered_respects_boundaries() {
408        assert!(fs_path_covered("/etc/app", "/etc/app"));
409        assert!(fs_path_covered("/etc/app", "/etc/app/cfg.toml"));
410        assert!(fs_path_covered("/etc/app/", "/etc/app/cfg"));
411        assert!(!fs_path_covered("/etc/app", "/etc/apppwned"));
412        assert!(!fs_path_covered("/etc/app", "/etc/application/x"));
413        assert!(!fs_path_covered("/etc/app/cfg", "/etc/app"));
414        assert!(!fs_path_covered("/etc/app", "/etc/app/../passwd"));
415        assert!(fs_path_covered("/", "/etc/app/x"));
416        assert!(!fs_path_covered("etc/app", "/etc/app/cfg"));
417        assert!(!fs_path_covered("/etc/app", "etc/app/cfg"));
418        assert!(fs_path_covered("etc/app", "etc/app/cfg"));
419    }
420}