Skip to main content

candor_classify/
policy.rs

1//! The canonical CANDOR_POLICY DSL parser (candor-spec SPEC §6.2).
2//!
3//! This is the **single** Rust implementation of the policy grammar — shared by the nightly dylint
4//! gate (`src/lib.rs`, AS-EFF-006/008/009) and the stable `candor-query` (`whatif`, and the
5//! `parsepolicy` dump the cross-impl conformance suite diffs against the JVM engine). Keeping one
6//! parser here is what makes "the gate means the same thing in every language" a fact rather than a
7//! hope: the Rust gate, the Rust pre-edit tool, and the cross-impl differential all read THIS code.
8//!
9//! Pure, stable Rust (string parsing only — no rustc types), so it lives beside the classifier.
10
11use crate::cap_from_name;
12use std::collections::BTreeSet;
13
14/// The honesty marker (SPEC §4). Denyable so `deny Unknown <scope>` forbids the *unverifiable* case.
15pub const UNKNOWN: &str = "Unknown";
16
17/// One `deny <Effect…> [scope]` / `pure <scope>` rule (AS-EFF-006). `effects` empty ⇒ a `pure` rule
18/// (ANY effect forbidden). `scope` is a path segment-scope the rule applies to (None = whole unit).
19#[derive(Debug, Clone)]
20pub struct PolicyRule {
21    pub effects: BTreeSet<&'static str>,
22    pub scope: Option<String>,
23    pub raw: String,
24}
25
26/// One `allow <Effect> [in <scope>] <literal>…` rule (AS-EFF-008). The effect is one of the four
27/// that carry a literal surface (`Net` hosts / `Exec` commands / `Fs` paths / `Db` tables); a
28/// function in `scope` performing it may reach ONLY the listed literals. Matching is
29/// effect-specific (`literal_allowed`).
30#[derive(Debug, Clone)]
31pub struct AllowRule {
32    pub effect: &'static str,
33    pub scope: Option<String>,
34    pub literals: BTreeSet<String>,
35    pub raw: String,
36}
37
38/// One `forbid <A> -> <B>` module-layering rule (AS-EFF-009): a function in scope `A` must not
39/// transitively call into scope `B`.
40#[derive(Debug, Clone)]
41pub struct LayerRule {
42    pub from: String,
43    pub to: String,
44    pub raw: String,
45}
46
47/// The rule kinds parsed from a CANDOR_POLICY file.
48#[derive(Default, Debug)]
49pub struct ParsedPolicy {
50    pub rules: Vec<PolicyRule>,
51    pub allow_rules: Vec<AllowRule>,
52    pub layer_rules: Vec<LayerRule>,
53}
54
55/// The hostname part of a `host[:port]` literal, port stripped — so `api.stripe.com` in a rule accepts
56/// a reached `api.stripe.com:443`. IPv6-aware: a bracketed `[host]:port` yields the bracketed host, and
57/// a BARE IPv6 literal (>1 colon, no brackets) has no port to strip and is returned whole — a naive
58/// first-colon split collapsed every `2001:db8::*` to `2001`, so one allowed IPv6 accepted any address
59/// in that block (/code-review). A hostname/IPv4 `host` or `host:port` (≤1 colon) splits at the colon.
60pub fn host_part(h: &str) -> &str {
61    if let Some(rest) = h.strip_prefix('[') {
62        // `[ipv6]` or `[ipv6]:port` — the host is between the brackets.
63        return rest.split(']').next().unwrap_or(rest);
64    }
65    if h.matches(':').count() > 1 {
66        return h; // bare IPv6 literal — no port suffix to strip
67    }
68    h.split(':').next().unwrap_or(h)
69}
70
71/// The basename of a command (`/usr/bin/git` → `git`), so `allow Exec … git` accepts an absolute path.
72pub fn cmd_base(c: &str) -> &str {
73    c.rsplit(['/', '\\']).next().unwrap_or(c)
74}
75
76/// Whether an allowed path `a` covers a reached path `r` (SPEC §6.2: path-boundary-respecting prefix).
77/// A directory covers itself and everything beneath it, but NOT a sibling sharing a textual prefix
78/// (`/etc/app` ⊉ `/etc/apppwned`); a `..` that climbs out is never covered; absolute/relative are
79/// never conflated.
80pub fn fs_path_covered(a: &str, r: &str) -> bool {
81    if r.split(['/', '\\']).any(|c| c == "..") {
82        return false;
83    }
84    let absolute = |s: &str| s.starts_with('/') || s.starts_with('\\');
85    if absolute(a) != absolute(r) {
86        return false;
87    }
88    let norm = |s: &str| -> Vec<String> {
89        s.split(['/', '\\'])
90            .filter(|c| !c.is_empty() && *c != ".")
91            .map(|c| c.to_string())
92            .collect()
93    };
94    let (ac, rc) = (norm(a), norm(r));
95    ac.len() <= rc.len() && ac.iter().zip(&rc).all(|(x, y)| x == y)
96}
97
98/// Whether an allowed table entry `a` covers a reached table `r` (SPEC §6.2): case-insensitive
99/// exact match on the (possibly schema-qualified) name, or a `schema.*` entry covering every table
100/// in that schema. Strict on qualification — an allowed `entries` does NOT cover a reached
101/// `ledger.entries` (write both forms if your queries mix them); silent widening is the failure
102/// mode an allowlist exists to prevent.
103pub fn db_table_covered(a: &str, r: &str) -> bool {
104    let (a, r) = (a.to_lowercase(), r.to_lowercase());
105    if let Some(schema) = a.strip_suffix(".*") {
106        return r.strip_prefix(schema).is_some_and(|rest| rest.starts_with('.'));
107    }
108    a == r
109}
110
111/// Whether a reached literal is allowed under an effect-specific match (SPEC §6.2): `Net` host by
112/// name (port ignored), `Exec` command by basename, `Fs` path by boundary-respecting prefix,
113/// `Db` table by qualified name or `schema.*`.
114pub fn literal_allowed(effect: &str, reached: &str, allow: &BTreeSet<String>) -> bool {
115    match effect {
116        "Net" => allow.iter().any(|a| host_part(a) == host_part(reached)),
117        "Exec" => allow.iter().any(|a| cmd_base(a) == cmd_base(reached)),
118        "Fs" => allow.iter().any(|a| fs_path_covered(a, reached)),
119        "Db" => allow.iter().any(|a| db_table_covered(a, reached)),
120        _ => allow.contains(reached),
121    }
122}
123
124/// A policy scope matches a function name by **path segment** (SPEC §6.2), not substring: split both
125/// on `::`; the scope matches a contiguous run of name-segments where every segment except the last
126/// matches exactly and the last is a prefix. So `domain` matches `app::domain::h` and `domain_logic`
127/// but not `subdomain`. (Used directly by the Rust gate; the JVM engine mirrors it over `.`.)
128pub fn scope_matches(name: &str, scope: &str) -> bool {
129    let segs: Vec<&str> = name.split("::").collect();
130    let parts: Vec<&str> = scope.split("::").collect();
131    if parts.is_empty() || parts.len() > segs.len() {
132        return false;
133    }
134    let (last, init) = parts.split_last().unwrap();
135    segs.windows(parts.len()).any(|w| {
136        let (w_last, w_init) = w.split_last().unwrap();
137        w_init == init && w_last.starts_with(last)
138    })
139}
140
141/// Parse a CANDOR_POLICY file (SPEC §6.2). One rule per line; `#` comments and blanks ignored:
142///
143/// ```text
144/// deny Net Db  domain     # functions whose path contains segment "domain" must not perform Net or Db
145/// deny Exec               # no function anywhere may perform Exec
146/// deny Unknown  api        # functions in "api" must be fully resolvable (forbid the unverifiable)
147/// pure         parse      # functions whose path contains segment "parse" must be effect-free
148/// allow Net in billing  api.stripe.com
149/// forbid domain -> infra
150/// ```
151///
152/// In a `deny` rule, leading tokens that name a known effect (or `Unknown`) are forbidden; the FIRST
153/// non-effect token is the scope and ends the rule. A `deny` naming no known effect is dropped (it is
154/// NOT a `pure` rule). Malformed/unknown lines are ignored with a warning — never silently widened.
155/// The §6.2 token separator: ASCII whitespace ONLY (space/tab/CR/LF/VT/FF). `split_whitespace`/`trim`
156/// use Unicode `White_Space`, which would split a NBSP/ideographic space that Java drops — a gateless-
157/// green cross-engine divergence (adversarial DSL review). A non-ASCII space stays part of its token, so
158/// the rule is malformed and ignored, uniformly.
159fn is_ascii_ws(c: char) -> bool {
160    matches!(c, ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r')
161}
162
163pub fn parse_policy(text: &str) -> ParsedPolicy {
164    let mut out = ParsedPolicy::default();
165    for raw_line in text.lines() {
166        let line = raw_line.split('#').next().unwrap_or("").trim_matches(is_ascii_ws);
167        if line.is_empty() {
168            continue;
169        }
170        let mut toks = line.split(is_ascii_ws).filter(|s| !s.is_empty());
171        match toks.next().unwrap_or("") {
172            "allow" => {
173                let effect = match toks.next().unwrap_or("") {
174                    "Net" => "Net",
175                    "Exec" => "Exec",
176                    "Fs" => "Fs",
177                    "Db" => "Db",
178                    _ => {
179                        eprintln!(
180                            "candor: ignoring policy rule (allow supports only Net hosts / Exec commands / Fs paths / Db tables): {line}"
181                        );
182                        continue;
183                    }
184                };
185                let mut rest: Vec<&str> = toks.collect();
186                let scope = if rest.first() == Some(&"in") {
187                    let s = rest.get(1).map(|s| s.to_string());
188                    rest.drain(..2.min(rest.len()));
189                    s
190                } else {
191                    None
192                };
193                let literals: BTreeSet<String> = rest.iter().map(|h| h.to_string()).collect();
194                if literals.is_empty() {
195                    eprintln!("candor: ignoring policy rule (allow {effect} names no values): {line}");
196                    continue;
197                }
198                out.allow_rules.push(AllowRule { effect, scope, literals, raw: line.to_string() });
199            }
200            "deny" => {
201                let mut effects = BTreeSet::new();
202                let mut scope = None;
203                for t in toks {
204                    let e = if t == UNKNOWN { Some(UNKNOWN) } else { cap_from_name(t) };
205                    match e {
206                        Some(e) => {
207                            effects.insert(e);
208                        }
209                        None => {
210                            scope = Some(t.to_string());
211                            break;
212                        }
213                    }
214                }
215                if effects.is_empty() {
216                    eprintln!("candor: ignoring policy rule (no known effect named): {line}");
217                    continue;
218                }
219                out.rules.push(PolicyRule { effects, scope, raw: line.to_string() });
220            }
221            "pure" => out.rules.push(PolicyRule {
222                effects: BTreeSet::new(),
223                scope: toks.next().map(str::to_string),
224                raw: line.to_string(),
225            }),
226            "forbid" => {
227                let a = toks.next().unwrap_or("");
228                let arrow = toks.next().unwrap_or("");
229                let b = toks.next().unwrap_or("");
230                if a.is_empty() || arrow != "->" || b.is_empty() {
231                    eprintln!("candor: ignoring layering rule (want `forbid <scope> -> <scope>`): {line}");
232                    continue;
233                }
234                out.layer_rules.push(LayerRule {
235                    from: a.to_string(),
236                    to: b.to_string(),
237                    raw: line.to_string(),
238                });
239            }
240            other => eprintln!("candor: ignoring policy rule (unknown kind `{other}`): {line}"),
241        }
242    }
243    out
244}
245
246#[cfg(test)]
247mod tests {
248    #[test]
249    fn db_table_covering_is_strict() {
250        use super::db_table_covered as c;
251        assert!(c("ledger.entries", "Ledger.Entries")); // case-insensitive exact
252        assert!(c("ledger.*", "ledger.entries"));       // schema wildcard
253        assert!(!c("ledger.*", "ledgerx.entries"));     // boundary-respecting
254        assert!(!c("entries", "ledger.entries"));       // no silent qualification widening
255        assert!(c("entries", "entries"));
256    }
257
258    #[test]
259    fn allow_db_parses_and_gates() {
260        let p = super::parse_policy("allow Db in billing  ledger.* customers\n");
261        assert_eq!(p.allow_rules.len(), 1);
262        assert_eq!(p.allow_rules[0].effect, "Db");
263        assert!(super::literal_allowed("Db", "ledger.entries", &p.allow_rules[0].literals));
264        assert!(super::literal_allowed("Db", "customers", &p.allow_rules[0].literals));
265        assert!(!super::literal_allowed("Db", "audit.log", &p.allow_rules[0].literals));
266    }
267
268    use super::*;
269
270    #[test]
271    fn policy_parses() {
272        let p = parse_policy(
273            "# the domain layer must stay pure of I/O\n\
274             deny Net Db  domain\n\
275             deny Exec\n\
276             pure  parse\n\
277             nonsense line\n\
278             deny notaneffect\n",
279        );
280        let rules = &p.rules;
281        assert_eq!(rules.len(), 3);
282        assert_eq!(rules[0].effects, ["Db", "Net"].into_iter().collect::<BTreeSet<_>>());
283        assert_eq!(rules[0].scope.as_deref(), Some("domain"));
284        assert!(rules[1].effects.contains("Exec") && rules[1].scope.is_none());
285        assert!(rules[2].effects.is_empty() && rules[2].scope.as_deref() == Some("parse"));
286        // `Unknown` is a denyable token; a bare `deny` with no effect is ignored.
287        assert_eq!(parse_policy("deny Unknown core").rules[0].effects, ["Unknown"].into_iter().collect());
288        assert!(parse_policy("deny\ndeny   \n").rules.is_empty());
289        // a `deny` whose first token is a non-effect names no effect -> dropped, NOT a pure rule.
290        assert!(parse_policy("deny notaneffect scope").rules.is_empty());
291        // the first non-effect token ENDS the rule: a later effect token is not collected.
292        let p2 = parse_policy("deny Net foo Db");
293        assert_eq!(p2.rules[0].effects, ["Net"].into_iter().collect::<BTreeSet<_>>());
294        assert_eq!(p2.rules[0].scope.as_deref(), Some("foo"));
295    }
296
297    #[test]
298    fn allowlist_parses() {
299        let p = parse_policy(
300            "allow Net in billing  api.stripe.com  hooks.stripe.com\n\
301             allow Exec in ci  git\n\
302             allow Fs in config  /etc/app\n\
303             allow Net  github.com\n\
304             allow Clock  whatever\n\
305             allow Net in nohosts\n\
306             allow\n",
307        );
308        assert_eq!(p.allow_rules.len(), 4); // Clock carries no literal surface — rejected; Db now does
309        assert_eq!((p.allow_rules[0].effect, p.allow_rules[0].scope.as_deref()), ("Net", Some("billing")));
310        assert_eq!(
311            p.allow_rules[0].literals,
312            ["api.stripe.com", "hooks.stripe.com"].iter().map(|s| s.to_string()).collect()
313        );
314        assert_eq!((p.allow_rules[1].effect, p.allow_rules[1].scope.as_deref()), ("Exec", Some("ci")));
315        assert!(p.allow_rules[1].literals.contains("git"));
316        assert_eq!((p.allow_rules[2].effect, p.allow_rules[2].scope.as_deref()), ("Fs", Some("config")));
317        assert_eq!((p.allow_rules[3].effect, p.allow_rules[3].scope.is_none()), ("Net", true));
318
319        let set = |xs: &[&str]| xs.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>();
320        assert!(literal_allowed("Net", "api.stripe.com:443", &set(&["api.stripe.com"])));
321        // IPv6: a bare literal is matched WHOLE (no first-colon collapse), so a different address in the
322        // same block is NOT accepted; a bracketed `[host]:port` matches the bare host. (/code-review.)
323        assert!(literal_allowed("Net", "2001:db8::aa", &set(&["2001:db8::aa"])));
324        assert!(!literal_allowed("Net", "2001:db8::ff", &set(&["2001:db8::aa"])));
325        assert!(!literal_allowed("Net", "2001:dead::1", &set(&["2001:db8::aa"])));
326        assert!(literal_allowed("Net", "[2001:db8::aa]:443", &set(&["2001:db8::aa"])));
327        assert_eq!(host_part("2001:db8::aa"), "2001:db8::aa");
328        assert_eq!(host_part("[2001:db8::aa]:443"), "2001:db8::aa");
329        assert_eq!(host_part("api.stripe.com:443"), "api.stripe.com");
330        assert!(literal_allowed("Exec", "/usr/bin/git", &set(&["git"])));
331        assert!(!literal_allowed("Exec", "/usr/bin/curl", &set(&["git"])));
332        assert!(literal_allowed("Fs", "/etc/app/conf.toml", &set(&["/etc/app"])));
333        assert!(!literal_allowed("Fs", "/etc/shadow", &set(&["/etc/app"])));
334        assert_eq!(cmd_base("/usr/bin/git"), "git");
335    }
336
337    #[test]
338    fn layering_rule_parses() {
339        let p = parse_policy(
340            "forbid domain -> infra\n\
341             forbid  app::web  ->  app::db \n\
342             forbid domain infra\n\
343             forbid domain ->\n\
344             forbid\n",
345        );
346        assert_eq!(p.layer_rules.len(), 2);
347        assert_eq!((p.layer_rules[0].from.as_str(), p.layer_rules[0].to.as_str()), ("domain", "infra"));
348        assert_eq!((p.layer_rules[1].from.as_str(), p.layer_rules[1].to.as_str()), ("app::web", "app::db"));
349    }
350
351    #[test]
352    fn scope_matches_by_segment_not_substring() {
353        assert!(scope_matches("app::domain::handle", "domain"));
354        assert!(scope_matches("domain::handle", "domain"));
355        assert!(scope_matches("app::domain", "domain"));
356        assert!(scope_matches("crate::domain_logic", "domain"));
357        assert!(!scope_matches("app::subdomain::handle", "domain"));
358        assert!(!scope_matches("app::not_my_domain::f", "domain"));
359        // multi-segment: intermediates exact, last is a prefix, contiguous.
360        assert!(scope_matches("crate::net::client::send", "net::client"));
361        assert!(scope_matches("crate::net::client_pool::get", "net::client"));
362        assert!(!scope_matches("crate::net::server::send", "net::client"));
363        assert!(!scope_matches("crate::network::client::send", "net::client"));
364        assert!(!scope_matches("crate::net::x::client", "net::client"));
365        assert!(!scope_matches("net", "net::client"));
366    }
367
368    #[test]
369    fn fs_path_covered_respects_boundaries() {
370        assert!(fs_path_covered("/etc/app", "/etc/app"));
371        assert!(fs_path_covered("/etc/app", "/etc/app/cfg.toml"));
372        assert!(fs_path_covered("/etc/app/", "/etc/app/cfg"));
373        assert!(!fs_path_covered("/etc/app", "/etc/apppwned"));
374        assert!(!fs_path_covered("/etc/app", "/etc/application/x"));
375        assert!(!fs_path_covered("/etc/app/cfg", "/etc/app"));
376        assert!(!fs_path_covered("/etc/app", "/etc/app/../passwd"));
377        assert!(fs_path_covered("/", "/etc/app/x"));
378        assert!(!fs_path_covered("etc/app", "/etc/app/cfg"));
379        assert!(!fs_path_covered("/etc/app", "etc/app/cfg"));
380        assert!(fs_path_covered("etc/app", "etc/app/cfg"));
381    }
382}