1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use regex::Regex;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Finding {
pub id: String,
pub file: PathBuf,
pub line_start: usize,
pub line_end: usize,
pub code_snippet: String,
pub language: Language,
pub category: AuthCategory,
pub confidence: Confidence,
pub description: String,
pub pattern_rule: Option<String>,
pub rego_stub: Option<String>,
/// Cedar policy stub. Populated when `--engine cedar` is used during
/// extract, or when a deep-mode response carries one. Kept parallel to
/// `rego_stub` (rather than collapsed into a `policy_outputs` collection)
/// so persisted findings JSON stays backward-compatible — older consumers
/// see an extra optional field they can ignore.
#[serde(default)]
pub cedar_stub: Option<String>,
pub pass: ScanPass,
/// Where in a typical app this finding lives — frontend (UI/client) or
/// backend (server/API). Inferred from the file path via simple
/// directory-name heuristics in [`Surface::classify`]. Surfaced so
/// downstream consumers can filter out frontend `ownership`-style noise
/// (e.g. Zulip's `web/src/...` `user.user_id === current_user.user_id`
/// matches, which are UI state checks rather than security gates) without
/// us having to bake that filter into rule predicates. Defaults to
/// `Backend` when the path doesn't match any frontend-shaped directory,
/// which is the right default for the scanner's primary target (backend
/// authz code).
#[serde(default)]
pub surface: Surface,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, clap::ValueEnum)]
#[serde(rename_all = "lowercase")]
pub enum Surface {
#[default]
Backend,
Frontend,
}
impl Surface {
/// Classify a finding's surface from its file path. Heuristic-only —
/// requires **both** a well-known *strong* frontend directory name
/// (`web`, `webapp`, `client`, `frontend`, `ui`) AND a frontend file
/// extension on the leaf. Generic tokens like `public/` and `static/`
/// are intentionally **not** in the strong list — they show up in
/// plenty of backend trees (`lib/public/api.go`,
/// `services/static/registry.rs`) and treating them as Frontend on
/// their own would silently downgrade real findings. They only count
/// when a strong marker is also present in the path (e.g.
/// `apps/web/public/main.js`), which already classifies as Frontend
/// via the strong marker — so dropping them from the regex is
/// equivalent to "weak markers require a nearby strong marker."
///
/// The extension gate covers the inverse trap: a Java/Rust/Go backend
/// project may legitimately use a `web/`, `client/`, or `ui/`
/// subdirectory for its server-side HTTP/UI-glue layer
/// (`src/main/java/com/x/web/UserService.java`,
/// `crates/server/src/web/handler.rs`). Without an extension check the
/// directory marker alone would misclassify those as Frontend and
/// silently suppress real backend findings for consumers that filter
/// frontend results.
///
/// Conservative on purpose: false negatives (frontend code classified
/// as backend) leave the existing behavior unchanged, while false
/// positives (backend code classified as frontend) would silently
/// downgrade real findings — so when in doubt, return `Backend`.
///
/// The directory match is case-insensitive and segment-bounded (we
/// want `app/web/foo.ts` but not `apps/network/foo.ts` or
/// `webhooks/foo.ts`). The extension match is also case-insensitive.
pub fn classify(path: &Path) -> Self {
static FRONTEND_RE: OnceLock<Regex> = OnceLock::new();
let re = FRONTEND_RE.get_or_init(|| {
// Anchored at a path separator (or string start) and followed by
// a separator so we don't accidentally match `webhooks/`,
// `clientservice/`, etc. `(?i)` makes it case-insensitive.
Regex::new(r"(?i)(^|[\\/])(web|webapp|client|frontend|ui)[\\/]")
.expect("static regex compiles")
});
if !re.is_match(&path.to_string_lossy()) {
return Surface::Backend;
}
if !has_frontend_extension(path) {
return Surface::Backend;
}
Surface::Frontend
}
}
/// Frontend-asset file extensions we recognize when gating
/// [`Surface::classify`]. Lowercase, no leading dot. Kept narrow on
/// purpose: a backend `web/` package full of `.java`/`.go`/`.py` files
/// must not slip through. Extensionless files (no extension at all)
/// stay Backend by design — the directory marker on its own isn't a
/// strong enough signal to override the safe default.
fn has_frontend_extension(path: &Path) -> bool {
const FRONTEND_EXTS: &[&str] = &[
"js", "jsx", "ts", "tsx", "mjs", "cjs", "html", "htm", "css", "scss", "sass", "less",
"vue", "svelte", "astro",
];
let Some(ext) = path.extension().and_then(|s| s.to_str()) else {
return false;
};
let lower = ext.to_ascii_lowercase();
FRONTEND_EXTS.iter().any(|e| *e == lower)
}
impl std::fmt::Display for Surface {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Surface::Backend => write!(f, "backend"),
Surface::Frontend => write!(f, "frontend"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum AuthCategory {
Rbac,
Abac,
Middleware,
#[value(name = "business-rule")]
BusinessRule,
Ownership,
#[value(name = "feature-gate")]
FeatureGate,
Custom,
}
impl AuthCategory {
/// Snake_case wire form, matching the serde `rename_all = "snake_case"`
/// applied to this enum. Use this anywhere the canonical wire spelling
/// is needed (JSON map keys, prompt schema enum, MCP tool args).
/// Distinct from [`std::fmt::Display`], which produces a human-friendly
/// form (`"Business Rule"`) — mixing the two in one JSON document
/// produces inconsistent keys (e.g. summary `"business rule"` vs.
/// finding `"business_rule"`), which breaks consumers grouping by
/// category.
pub fn slug(&self) -> &'static str {
match self {
AuthCategory::Rbac => "rbac",
AuthCategory::Abac => "abac",
AuthCategory::Middleware => "middleware",
AuthCategory::BusinessRule => "business_rule",
AuthCategory::Ownership => "ownership",
AuthCategory::FeatureGate => "feature_gate",
AuthCategory::Custom => "custom",
}
}
}
#[derive(
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, clap::ValueEnum,
)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
Low,
Medium,
High,
}
impl std::str::FromStr for Confidence {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"low" => Ok(Confidence::Low),
"medium" => Ok(Confidence::Medium),
"high" => Ok(Confidence::High),
_ => Err(format!("unknown confidence level: {s}")),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ScanPass {
Structural,
Semantic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, clap::ValueEnum)]
#[serde(rename_all = "lowercase")]
pub enum Language {
Java,
#[serde(rename = "typescript")]
#[value(name = "typescript")]
TypeScript,
#[serde(rename = "javascript")]
#[value(name = "javascript")]
JavaScript,
Python,
Go,
#[serde(rename = "csharp")]
#[value(name = "csharp")]
CSharp,
Kotlin,
Ruby,
Php,
}
impl std::fmt::Display for Language {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Language::Java => write!(f, "java"),
Language::TypeScript => write!(f, "typescript"),
Language::JavaScript => write!(f, "javascript"),
Language::Python => write!(f, "python"),
Language::Go => write!(f, "go"),
Language::CSharp => write!(f, "csharp"),
Language::Kotlin => write!(f, "kotlin"),
Language::Ruby => write!(f, "ruby"),
Language::Php => write!(f, "php"),
}
}
}
impl std::fmt::Display for AuthCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AuthCategory::Rbac => write!(f, "RBAC"),
AuthCategory::Abac => write!(f, "ABAC"),
AuthCategory::Middleware => write!(f, "Middleware"),
AuthCategory::BusinessRule => write!(f, "Business Rule"),
AuthCategory::Ownership => write!(f, "Ownership"),
AuthCategory::FeatureGate => write!(f, "Feature Gate"),
AuthCategory::Custom => write!(f, "Custom"),
}
}
}
impl std::fmt::Display for Confidence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Confidence::Low => write!(f, "low"),
Confidence::Medium => write!(f, "medium"),
Confidence::High => write!(f, "high"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn surface_classifies_frontend_directories() {
assert_eq!(
Surface::classify(Path::new("web/src/foo.ts")),
Surface::Frontend
);
assert_eq!(
Surface::classify(Path::new("apps/web/src/page.tsx")),
Surface::Frontend
);
assert_eq!(
Surface::classify(Path::new("packages/client/index.ts")),
Surface::Frontend
);
assert_eq!(
Surface::classify(Path::new("frontend/components/Foo.tsx")),
Surface::Frontend
);
assert_eq!(
Surface::classify(Path::new("apps/ui/Settings.tsx")),
Surface::Frontend
);
// `public/` co-occurring with a strong marker (`web`) is still
// Frontend — the strong marker carries the classification.
assert_eq!(
Surface::classify(Path::new("apps/web/public/main.js")),
Surface::Frontend
);
// Case-insensitive — matches `Web/` as well as `web/`.
assert_eq!(
Surface::classify(Path::new("Apps/Web/Foo.tsx")),
Surface::Frontend
);
}
#[test]
fn surface_does_not_misclassify_backend_paths() {
// Adjacent-name traps: `webhooks/` is not `web/`, `clientservice/`
// is not `client/` — segment boundaries matter.
assert_eq!(
Surface::classify(Path::new("internal/webhooks/handler.go")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("services/clientservice/main.go")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("server/api/users.py")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("zerver/views/auth.py")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("models/perm/access/role.go")),
Surface::Backend
);
// Generic tokens without a strong marker stay Backend, by design —
// false-positive avoidance for trees like `lib/public/api.go` or
// `services/static/registry.rs`. Accepts a false negative on
// bare `public/assets/main.js`-style trees in exchange.
assert_eq!(
Surface::classify(Path::new("lib/public/api.go")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("services/static/registry.rs")),
Surface::Backend
);
}
#[test]
fn surface_requires_frontend_extension_even_with_strong_marker() {
// Strong directory marker present, but the leaf file is a backend
// language — must classify as Backend. Java/Rust/Go projects
// legitimately use `web/`, `client/`, `ui/` packages for their
// server-side HTTP/UI-glue layer, and treating those as Frontend
// would silently downgrade real findings for consumers that filter
// frontend results.
assert_eq!(
Surface::classify(Path::new("src/main/java/com/x/web/UserService.java")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("crates/server/src/web/handler.rs")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("internal/client/auth.go")),
Surface::Backend
);
assert_eq!(
Surface::classify(Path::new("services/ui/templating.py")),
Surface::Backend
);
// Extensionless files stay Backend — directory marker alone isn't
// strong enough signal to override the safe default.
assert_eq!(
Surface::classify(Path::new("apps/web/Makefile")),
Surface::Backend
);
}
#[test]
fn surface_extension_gate_is_case_insensitive() {
// Same as `Apps/Web/Foo.tsx` upstream, but capitalised extension —
// the `.TSX` should still trip the frontend gate.
assert_eq!(
Surface::classify(Path::new("apps/web/Component.TSX")),
Surface::Frontend
);
}
#[test]
fn surface_default_round_trips_through_serde() {
// Old JSON output predates the `surface` field — deserialization
// must default to Backend rather than failing.
let no_surface = r#"{
"id": "x",
"file": "a.ts",
"line_start": 1,
"line_end": 1,
"code_snippet": "",
"language": "typescript",
"category": "rbac",
"confidence": "low",
"description": "",
"pattern_rule": null,
"rego_stub": null,
"pass": "structural"
}"#;
let f: Finding = serde_json::from_str(no_surface).unwrap();
assert_eq!(f.surface, Surface::Backend);
}
}