Skip to main content

plsql_ir/
fact_emit.rs

1//! Emit declaration / reference / call facts plus
2//! privilege / dynamic-SQL / unknown facts.
3//!
4//! Bridges the semantic-layer extractors (calls, dml-edges,
5//! privilege model, dynamic-SQL evidence, opacity reasons) and
6//! the declaration table into the normalized [`Fact`] stream
7//! defined by. Each emitter takes the typed
8//! per-family input + a [`FactProvenance`] and pushes minted
9//! facts into a [`FactStore`].
10//!
11//! "With evidence" (FACT-004): the privilege / dynamic-SQL /
12//! opacity payloads are deliberately lightweight — the evidence a
13//! consumer needs to defend the fact (the grant tuple, the
14//! dynamic-SQL site text, the opacity reason) travels *in* the
15//! payload string, and richer structured evidence is re-fetched
16//! from the originating crate's model by `FactId`.
17//!
18//! Keeping emission in one module means the engine wiring layer
19//! has a single call site per fact family and the
20//! `FactId` derivation stays consistent.
21//!
22//! ## /oracle evidence
23//!
24//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
25//!   declaration / reference / call grammar 1:1 with the fact
26//!   families.
27//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
28//!   `ALL_OBJECTS` (declarations), `ALL_DEPENDENCIES`
29//!   (call edges), `ALL_IDENTIFIERS` (references) are the
30//!   server-side mirrors.
31
32use std::collections::BTreeSet;
33
34use crate::DeclId;
35use crate::calls::CallSite;
36use crate::fact::{FactPayload, FactProvenance, FactStore};
37use crate::flow::{ValueFlow, ValueSet};
38use crate::flow_intra::FlowEnv;
39use crate::table_stub::DeclLike;
40
41/// Emit one `Declaration` fact per registered declaration.
42/// Returns the count emitted (post-dedup).
43pub fn emit_declaration_facts<I>(store: &mut FactStore, prov: &FactProvenance, decls: I) -> usize
44where
45    I: IntoIterator<Item = (DeclId, String)>,
46{
47    let before = store.len();
48    for (decl, logical_id) in decls {
49        let f = crate::fact::mint_fact(prov.clone(), FactPayload::Declaration { decl, logical_id });
50        store.push(f);
51    }
52    store.len() - before
53}
54
55/// Emit one `Reference` fact per (from_decl, to_logical_id) pair.
56pub fn emit_reference_facts<I>(store: &mut FactStore, prov: &FactProvenance, refs: I) -> usize
57where
58    I: IntoIterator<Item = (DeclId, String)>,
59{
60    let before = store.len();
61    for (from_decl, to_logical_id) in refs {
62        let f = crate::fact::mint_fact(
63            prov.clone(),
64            FactPayload::Reference {
65                from_decl,
66                to_logical_id,
67            },
68        );
69        store.push(f);
70    }
71    store.len() - before
72}
73
74/// Emit one `DependencyEdge` fact per call site. `from_logical_id`
75/// is the routine the call appeared in; the callee path is joined
76/// with `.` into the edge target.
77pub fn emit_call_facts(
78    store: &mut FactStore,
79    prov: &FactProvenance,
80    from_logical_id: &str,
81    calls: &[CallSite],
82) -> usize {
83    let before = store.len();
84    for c in calls {
85        let to = c.callee_parts.join(".").to_ascii_lowercase();
86        let f = crate::fact::mint_fact(
87            prov.clone(),
88            FactPayload::DependencyEdge {
89                from_logical_id: from_logical_id.to_string(),
90                to_logical_id: to,
91                edge_kind: "Calls".to_string(),
92            },
93        );
94        store.push(f);
95    }
96    store.len() - before
97}
98
99/// Emit one `Privilege` fact per resolved `(grantee, privilege,
100/// on)` triple. The triple *is* the evidence:
101/// who can do what to which object. Returns the post-dedup count.
102pub fn emit_privilege_facts<I>(store: &mut FactStore, prov: &FactProvenance, grants: I) -> usize
103where
104    I: IntoIterator<Item = (String, String, String)>,
105{
106    let before = store.len();
107    for (grantee, privilege, on) in grants {
108        let f = crate::fact::mint_fact(
109            prov.clone(),
110            FactPayload::Privilege {
111                grantee,
112                privilege,
113                on,
114            },
115        );
116        store.push(f);
117    }
118    store.len() - before
119}
120
121/// Emit one `DynamicSqlEvidence` fact per recognised dynamic-SQL
122/// site. `site` carries the evidence — typically
123/// the logical id of the unit plus a fragment/classification
124/// summary from `DynamicSqlEvidence`.
125pub fn emit_dynamic_sql_facts<I>(store: &mut FactStore, prov: &FactProvenance, sites: I) -> usize
126where
127    I: IntoIterator<Item = String>,
128{
129    let before = store.len();
130    for site in sites {
131        let f = crate::fact::mint_fact(prov.clone(), FactPayload::DynamicSqlEvidence { site });
132        store.push(f);
133    }
134    store.len() - before
135}
136
137/// Emit one `Opacity` fact per `(target_logical_id, reason)` pair
138///  — the "unknown" family. `reason` is the
139/// evidence string (typically a stringified `UnknownReason`) so a
140/// consumer can explain *why* the analyser could not see through
141/// the target.
142pub fn emit_unknown_facts<I>(store: &mut FactStore, prov: &FactProvenance, unknowns: I) -> usize
143where
144    I: IntoIterator<Item = (String, String)>,
145{
146    let before = store.len();
147    for (target_logical_id, reason) in unknowns {
148        let f = crate::fact::mint_fact(
149            prov.clone(),
150            FactPayload::Opacity {
151                target_logical_id,
152                reason,
153            },
154        );
155        store.push(f);
156    }
157    store.len() - before
158}
159
160/// Emit flow-lattice facts for every tracked name in a [`FlowEnv`].
161///
162/// This is the materialization boundary for FLOW state: the solver keeps
163/// returning its compact per-name lattice, and this projector lowers that
164/// state into stable `FactStore` rows for downstream consumers.
165pub fn emit_flow_env_facts(
166    store: &mut FactStore,
167    prov: &FactProvenance,
168    unit_logical_id: &str,
169    env: &FlowEnv,
170) -> usize {
171    emit_flow_facts(
172        store,
173        prov,
174        unit_logical_id,
175        env.iter()
176            .map(|(name, flow)| (name.to_string(), flow.clone())),
177    )
178}
179
180/// Emit flow-lattice facts from explicit `(name, ValueFlow)` rows.
181///
182/// The name is trimmed and upper-cased before fact emission, so
183/// whitespace/comment-only source changes that preserve semantic identity
184/// keep stable fact IDs.
185pub fn emit_flow_facts<I, N>(
186    store: &mut FactStore,
187    prov: &FactProvenance,
188    unit_logical_id: &str,
189    flows: I,
190) -> usize
191where
192    I: IntoIterator<Item = (N, ValueFlow)>,
193    N: Into<String>,
194{
195    let before = store.len();
196    let unit = unit_logical_id.trim().to_string();
197    let mut rows: Vec<(String, ValueFlow)> = flows
198        .into_iter()
199        .map(|(name, flow)| (normalise_flow_name(name.into()), flow))
200        .collect();
201    rows.sort_by(|(left, _), (right, _)| left.cmp(right));
202
203    for (name, flow) in rows {
204        if let Some(value) = flow.constant.clone() {
205            store.push(crate::fact::mint_fact(
206                prov.clone(),
207                FactPayload::ConstantValue {
208                    unit_logical_id: unit.clone(),
209                    name: name.clone(),
210                    value,
211                },
212            ));
213        }
214        if !matches!(flow.value_set, ValueSet::Top) {
215            store.push(crate::fact::mint_fact(
216                prov.clone(),
217                FactPayload::ValueSet {
218                    unit_logical_id: unit.clone(),
219                    name: name.clone(),
220                    value_set: flow.value_set.clone(),
221                },
222            ));
223        }
224        if let Some(shape) = flow.string_shape.clone() {
225            store.push(crate::fact::mint_fact(
226                prov.clone(),
227                FactPayload::StringShape {
228                    unit_logical_id: unit.clone(),
229                    name: name.clone(),
230                    shape,
231                },
232            ));
233        }
234        if !flow.taint.kinds.is_empty() {
235            store.push(crate::fact::mint_fact(
236                prov.clone(),
237                FactPayload::Taint {
238                    unit_logical_id: unit.clone(),
239                    name: name.clone(),
240                    kinds: flow.taint.kinds.clone(),
241                },
242            ));
243        }
244        if !flow.taint.cleansed_by.is_empty() {
245            store.push(crate::fact::mint_fact(
246                prov.clone(),
247                FactPayload::Sanitizer {
248                    unit_logical_id: unit.clone(),
249                    name,
250                    cleansed_by: flow.taint.cleansed_by,
251                },
252            ));
253        }
254    }
255
256    store.len() - before
257}
258
259fn normalise_flow_name(name: String) -> String {
260    name.trim().to_ascii_uppercase()
261}
262
263/// Convenience: emit a declaration fact for every entry a
264/// `DeclLike` source yields. The trait keeps this module free of
265/// a hard `plsql-symbols` dependency (which would invert the
266/// layer order — symbols depends on ir, not the reverse).
267pub fn emit_declarations_from<T: DeclLike>(
268    store: &mut FactStore,
269    prov: &FactProvenance,
270    source: &T,
271) -> usize {
272    emit_declaration_facts(store, prov, source.iter_decls())
273}
274
275/// One detected `EXCEPTION WHEN <scope> THEN <body>` handler.
276#[derive(Clone, Debug, PartialEq, Eq)]
277pub struct ExceptionHandlerSite {
278    pub unit_logical_id: String,
279    /// Caught condition, normalized: `others` or the named
280    /// exception text (lowercased, whitespace-collapsed).
281    pub scope: String,
282    /// `noop` (body is only `NULL;` — QUAL001 swallowed
283    /// exception), `commit` / `rollback` (QUAL004 transaction
284    /// control in a handler), or `other`.
285    pub body_class: String,
286}
287
288/// Classify an exception-handler body for the syntactic rules.
289///
290/// Conservative by design (R13): only an all-`NULL;` body is
291/// `noop`; `COMMIT`/`ROLLBACK` anywhere in the body is reported;
292/// anything else is `other` (the rule decides what to do, this
293/// never asserts safety).
294#[must_use]
295fn classify_handler_body(body: &str) -> &'static str {
296    let norm = body.trim().to_ascii_lowercase();
297    let stmts: Vec<&str> = norm
298        .split(';')
299        .map(str::trim)
300        .filter(|s| !s.is_empty())
301        .collect();
302    if stmts.is_empty() || stmts.iter().all(|s| s.eq(&"null")) {
303        return "noop";
304    }
305    if stmts
306        .iter()
307        .any(|s| s.eq(&"commit") || s.starts_with("commit "))
308    {
309        return "commit";
310    }
311    if stmts
312        .iter()
313        .any(|s| s.eq(&"rollback") || s.starts_with("rollback ") || s.starts_with("rollback to"))
314    {
315        return "rollback";
316    }
317    "other"
318}
319
320/// Does `src[..at]` end on a word boundary so the keyword at `at`
321/// is not the tail of an identifier (e.g. `bad_exception`)?
322fn keyword_boundary_before(src: &str, at: usize) -> bool {
323    src[..at]
324        .chars()
325        .next_back()
326        .is_none_or(|c| !(c.is_alphanumeric() || c == '_'))
327}
328
329/// Scan a routine `source` for its exception section and yield one
330/// [`ExceptionHandlerSite`] per `WHEN ... THEN ...` handler.
331///
332/// Text-level, matching this crate's existing lightweight evidence
333/// approach (cf. dynamic-SQL sites). It recognizes the common
334/// single `EXCEPTION ... END` section: ambiguous / unparseable
335/// shapes simply yield no site rather than a wrong one (R13 — a
336/// false fact is worse than a missing one).
337#[must_use]
338pub fn scan_exception_handlers(unit_logical_id: &str, source: &str) -> Vec<ExceptionHandlerSite> {
339    let lower = source.to_ascii_lowercase();
340    let Some(mut idx) = lower.find("exception") else {
341        return Vec::new();
342    };
343    // Find a standalone `exception` keyword (word boundaries).
344    loop {
345        let end = idx + "exception".len();
346        let boundary = keyword_boundary_before(&lower, idx)
347            && lower[end..]
348                .chars()
349                .next()
350                .is_none_or(|c| !(c.is_alphanumeric() || c == '_'));
351        if boundary {
352            break;
353        }
354        match lower[end..].find("exception") {
355            Some(next) => idx = end + next,
356            None => return Vec::new(),
357        }
358    }
359
360    let section = &lower[idx + "exception".len()..];
361    let mut sites = Vec::new();
362    for chunk in section.split(" when ").skip(1) {
363        let Some((scope_raw, rest)) = chunk.split_once(" then ") else {
364            continue;
365        };
366        // Body runs to the next handler / section end.
367        let body = rest
368            .split(" when ")
369            .next()
370            .unwrap_or(rest)
371            .rsplit_once(" end")
372            .map_or(rest, |(b, _)| b);
373        let scope_norm = scope_raw.split_whitespace().collect::<Vec<_>>().join(" ");
374        let scope = if scope_norm.split_whitespace().any(|w| w == "others") {
375            "others".to_string()
376        } else {
377            scope_norm
378        };
379        sites.push(ExceptionHandlerSite {
380            unit_logical_id: unit_logical_id.to_string(),
381            scope,
382            body_class: classify_handler_body(body).to_string(),
383        });
384    }
385    sites
386}
387
388/// Emit one `ExceptionHandler` fact per detected handler so
389/// QUAL001 / QUAL004 can consume them via `by_kind` like every
390/// other fact-based rule.
391pub fn emit_exception_handler_facts<I>(
392    store: &mut FactStore,
393    prov: &FactProvenance,
394    sites: I,
395) -> usize
396where
397    I: IntoIterator<Item = ExceptionHandlerSite>,
398{
399    let before = store.len();
400    for site in sites {
401        let f = crate::fact::mint_fact(
402            prov.clone(),
403            FactPayload::ExceptionHandler {
404                unit_logical_id: site.unit_logical_id,
405                scope: site.scope,
406                body_class: site.body_class,
407            },
408        );
409        store.push(f);
410    }
411    store.len() - before
412}
413
414/// One detected cursor `FOR` loop.
415#[derive(Clone, Debug, PartialEq, Eq)]
416pub struct CursorForLoopSite {
417    pub unit_logical_id: String,
418    /// The loop record variable (`FOR <var> IN …`).
419    pub loop_var: String,
420    /// Body contains a row-level INSERT/UPDATE/DELETE/MERGE.
421    pub has_body_dml: bool,
422}
423
424/// One routine body with no recognized instrumentation call.
425/// Reports *absence* only — STYLE001 (opt-in) decides whether
426/// that is a finding.
427#[derive(Clone, Debug, PartialEq, Eq)]
428pub struct MissingInstrumentationSite {
429    pub unit_logical_id: String,
430}
431
432/// Substrings that count as an instrumentation / logging /
433/// tracing / error-signal call. Deliberately broad so STYLE001
434/// only fires when a unit has *nothing* — a false "missing" is
435/// worse than a missed one (R13).
436const INSTRUMENTATION_MARKERS: &[&str] = &[
437    "dbms_output.put_line",
438    "dbms_application_info",
439    "raise_application_error",
440    "apex_debug",
441    "logger.",
442    "log_",
443    ".log(",
444    ".info(",
445    ".warn(",
446    ".error(",
447    ".debug(",
448    "audit_",
449];
450
451fn body_has_dml(body: &str) -> bool {
452    // Scan *every* occurrence of each DML keyword, not just the first.
453    // A first-hit-only check under-reports when an earlier occurrence is
454    // the tail of an identifier (e.g. `v_last_update`, `deleted_flag`):
455    // the boundary check fails on that decoy and, without a retry loop,
456    // the genuine row-level `update t`/`delete from t` later in the body
457    // is never reached. Mirrors `scan_dml_in_function` (line ~692) and
458    // `scan_deterministic_misuse` (line ~812).
459    ["insert ", "update ", "delete ", "merge "]
460        .iter()
461        .any(|kw| {
462            body.match_indices(kw)
463                .any(|(at, _)| keyword_boundary_before(body, at))
464        })
465}
466
467/// Scan a routine `source` for cursor `FOR` loops, yielding one
468/// [`CursorForLoopSite`] per loop. Text-level, mirroring
469/// [`scan_exception_handlers`]. A numeric range loop
470/// (`FOR i IN 1..10 LOOP`) is **not** a cursor loop and yields no
471/// site (R13: a false fact is worse than a missing one).
472#[must_use]
473pub fn scan_cursor_for_loops(unit_logical_id: &str, source: &str) -> Vec<CursorForLoopSite> {
474    let lower = source.to_ascii_lowercase();
475    let mut sites = Vec::new();
476    let mut search_from = 0;
477    while let Some(rel) = lower[search_from..].find("for ") {
478        let at = search_from + rel;
479        search_from = at + 4;
480        if !keyword_boundary_before(&lower, at) {
481            continue;
482        }
483        let after = &lower[at + 4..];
484        let Some((var_raw, rest)) = after.split_once(" in ") else {
485            continue;
486        };
487        let loop_var = var_raw.trim();
488        if loop_var.is_empty() || loop_var.split_whitespace().count() != 1 {
489            continue;
490        }
491        let Some((in_clause, body_and_more)) = rest.split_once(" loop ") else {
492            continue;
493        };
494        // Numeric range (`1..10`) ⇒ not a cursor loop.
495        if in_clause.contains("..") {
496            continue;
497        }
498        // Cursor loop iff the iterable is a query or a cursor
499        // reference: contains `select`, an opening paren, or is a
500        // bare identifier (cursor name). Anything else: skip (R13).
501        let ic = in_clause.trim();
502        let looks_cursor =
503            ic.contains("select") || ic.contains('(') || ic.split_whitespace().count() == 1;
504        if !looks_cursor {
505            continue;
506        }
507        let body = body_and_more
508            .split_once(" end loop")
509            .map_or(body_and_more, |(b, _)| b);
510        sites.push(CursorForLoopSite {
511            unit_logical_id: unit_logical_id.to_string(),
512            loop_var: loop_var.to_string(),
513            has_body_dml: body_has_dml(body),
514        });
515    }
516    sites
517}
518
519/// Scan a routine `source`: if it has a body (`BEGIN`) but no
520/// recognized instrumentation marker, yield a single
521/// [`MissingInstrumentationSite`]. A spec with no body yields
522/// nothing (R13 — we only report a unit we can see executes).
523#[must_use]
524pub fn scan_missing_instrumentation(
525    unit_logical_id: &str,
526    source: &str,
527) -> Vec<MissingInstrumentationSite> {
528    let lower = source.to_ascii_lowercase();
529    // Scan *every* occurrence of `begin`, not just the first. A first-hit-only
530    // check under-reports when an earlier occurrence is the tail/head of an
531    // identifier (e.g. a `v_begin_dt` declared before the real BEGIN): the
532    // boundary check fails on that decoy and, without a retry loop, the genuine
533    // body-introducing BEGIN later in the source is never reached, so the
534    // routine is wrongly classified as a body-less spec and silently escapes
535    // STYLE001. Mirrors `body_has_dml` (oracle-j1ep.5).
536    let has_body = lower
537        .match_indices("begin")
538        .any(|(at, _)| keyword_boundary_before(&lower, at));
539    if !has_body {
540        return Vec::new();
541    }
542    if INSTRUMENTATION_MARKERS.iter().any(|m| lower.contains(m)) {
543        return Vec::new();
544    }
545    vec![MissingInstrumentationSite {
546        unit_logical_id: unit_logical_id.to_string(),
547    }]
548}
549
550/// Emit one `CursorForLoop` fact per site, mirroring
551/// [`emit_exception_handler_facts`].
552pub fn emit_cursor_for_loop_facts<I>(
553    store: &mut FactStore,
554    prov: &FactProvenance,
555    sites: I,
556) -> usize
557where
558    I: IntoIterator<Item = CursorForLoopSite>,
559{
560    let before = store.len();
561    for site in sites {
562        let f = crate::fact::mint_fact(
563            prov.clone(),
564            FactPayload::CursorForLoop {
565                unit_logical_id: site.unit_logical_id,
566                loop_var: site.loop_var,
567                has_body_dml: site.has_body_dml,
568            },
569        );
570        store.push(f);
571    }
572    store.len() - before
573}
574
575/// Emit one `MissingInstrumentation` fact per site.
576pub fn emit_missing_instrumentation_facts<I>(
577    store: &mut FactStore,
578    prov: &FactProvenance,
579    sites: I,
580) -> usize
581where
582    I: IntoIterator<Item = MissingInstrumentationSite>,
583{
584    let before = store.len();
585    for site in sites {
586        let f = crate::fact::mint_fact(
587            prov.clone(),
588            FactPayload::MissingInstrumentation {
589                unit_logical_id: site.unit_logical_id,
590            },
591        );
592        store.push(f);
593    }
594    store.len() - before
595}
596
597/// One string literal that is, by strong syntactic context, a
598/// hardcoded secret (SEC003).
599#[derive(Clone, Debug, PartialEq, Eq)]
600pub struct HardcodedCredentialSite {
601    pub unit_logical_id: String,
602    /// The credential context that matched (e.g. `identified by`,
603    /// `password :=`).
604    pub marker: String,
605}
606
607/// Credential context markers. Each must be followed (within the
608/// same statement) by a `'…'` string literal to count — a bind
609/// variable or column ref is *not* a hardcoded secret (R13: a
610/// false credential finding erodes trust fast).
611const CREDENTIAL_MARKERS: &[&str] = &[
612    "identified by",
613    "password",
614    "passwd",
615    "pwd",
616    "secret",
617    "api_key",
618    "apikey",
619    "credential",
620    "private_key",
621];
622
623/// Blank the *contents* of every `'…'` string literal (keeping the
624/// quotes and the byte length) so a marker can never self-match inside a
625/// literal value (e.g. `secret` inside `'Sup3rSecret'`, or a clause keyword
626/// like `FROM`/`INTO` inside `'failed to INSERT INTO orders'`). Doubled `''`
627/// escapes are treated as literal content. Byte-length-preserving, so callers
628/// can scan the masked buffer for keyword positions and still slice the
629/// ORIGINAL buffer at the same offsets for the matched name. ASCII-only
630/// transform; non-ASCII bytes are left as-is. Shared with the DML table-access
631/// extractors (`dml_edges`, `sql_resolve`) which scan raw SQL for clause
632/// keywords and must not match one buried in a literal (oracle-qbqf.2).
633pub(crate) fn mask_string_literals(lower: &str) -> String {
634    let bytes = lower.as_bytes();
635    let mut out = String::with_capacity(lower.len());
636    let mut i = 0;
637    while i < bytes.len() {
638        if bytes[i] == b'\'' {
639            out.push('\'');
640            i += 1;
641            while i < bytes.len() {
642                if bytes[i] == b'\'' {
643                    if bytes.get(i + 1) == Some(&b'\'') {
644                        out.push_str("__");
645                        i += 2;
646                        continue;
647                    }
648                    out.push('\'');
649                    i += 1;
650                    break;
651                }
652                out.push('_');
653                i += 1;
654            }
655        } else {
656            out.push(bytes[i] as char);
657            i += 1;
658        }
659    }
660    out
661}
662
663/// Scan `source` for hardcoded credentials: a credential marker
664/// (in code position, never inside a literal) immediately followed
665/// (same statement, before `;`) by a quoted string literal.
666/// Text-level + conservative, mirroring [`scan_exception_handlers`].
667#[must_use]
668pub fn scan_hardcoded_credentials(
669    unit_logical_id: &str,
670    source: &str,
671) -> Vec<HardcodedCredentialSite> {
672    let lower = mask_string_literals(&source.to_ascii_lowercase());
673    let mut sites = Vec::new();
674    for marker in CREDENTIAL_MARKERS {
675        let mut from = 0;
676        while let Some(rel) = lower[from..].find(marker) {
677            let at = from + rel;
678            from = at + marker.len();
679            // No word-boundary gate here: the credential marker is
680            // frequently *part of* the secret-bearing identifier
681            // (`v_password := '…'`, `l_api_key := '…'`). The
682            // literal-in-same-statement constraint below is what
683            // keeps this conservative (R13).
684            // Look only within the rest of this statement.
685            let rest = &lower[at + marker.len()..];
686            let stmt = rest.split(';').next().unwrap_or(rest);
687            // A quoted literal must appear, and before any obvious
688            // bind/identifier-only continuation. We accept the
689            // first `'` within the statement window.
690            if let Some(q) = stmt.find('\'') {
691                // Guard: the gap between marker and the quote must
692                // be short-ish (an assignment/clause, not a whole
693                // unrelated statement). 64 chars is generous for
694                // `password  varchar2(30) := '…'` style.
695                if q <= 64 {
696                    sites.push(HardcodedCredentialSite {
697                        unit_logical_id: unit_logical_id.to_string(),
698                        marker: (*marker).to_string(),
699                    });
700                }
701            }
702        }
703    }
704    sites
705}
706
707/// Emit one `HardcodedCredential` fact per site (SEC003).
708pub fn emit_hardcoded_credential_facts<I>(
709    store: &mut FactStore,
710    prov: &FactProvenance,
711    sites: I,
712) -> usize
713where
714    I: IntoIterator<Item = HardcodedCredentialSite>,
715{
716    let before = store.len();
717    for site in sites {
718        let f = crate::fact::mint_fact(
719            prov.clone(),
720            FactPayload::HardcodedCredential {
721                unit_logical_id: site.unit_logical_id,
722                marker: site.marker,
723            },
724        );
725        store.push(f);
726    }
727    store.len() - before
728}
729
730/// One unit declaring invoker's rights (`AUTHID CURRENT_USER`)
731/// (SEC004).
732#[derive(Clone, Debug, PartialEq, Eq)]
733pub struct InvokerRightsSite {
734    pub unit_logical_id: String,
735}
736
737/// Scan `source` for an `AUTHID CURRENT_USER` clause. Literal
738/// contents are masked first so the phrase can't self-match inside
739/// a string; whitespace between `authid` and `current_user` is
740/// collapsed. Conservative: `AUTHID DEFINER` (or absence) yields
741/// no site. At most one site per unit.
742#[must_use]
743pub fn scan_invoker_rights(unit_logical_id: &str, source: &str) -> Vec<InvokerRightsSite> {
744    let masked = mask_string_literals(&source.to_ascii_lowercase());
745    // Collapse all whitespace runs to a single space so
746    // `authid\n  current_user` matches.
747    let collapsed: String = masked.split_whitespace().collect::<Vec<_>>().join(" ");
748    if collapsed.contains("authid current_user") {
749        vec![InvokerRightsSite {
750            unit_logical_id: unit_logical_id.to_string(),
751        }]
752    } else {
753        Vec::new()
754    }
755}
756
757/// Emit one `InvokerRights` fact per site (SEC004).
758pub fn emit_invoker_rights_facts<I>(store: &mut FactStore, prov: &FactProvenance, sites: I) -> usize
759where
760    I: IntoIterator<Item = InvokerRightsSite>,
761{
762    let before = store.len();
763    for site in sites {
764        let f = crate::fact::mint_fact(
765            prov.clone(),
766            FactPayload::InvokerRights {
767                unit_logical_id: site.unit_logical_id,
768            },
769        );
770        store.push(f);
771    }
772    store.len() - before
773}
774
775/// One unit whose `RETURN` type is a REF CURSOR (SEC007),
776/// one function with row-level DML in its body (QUAL007), or
777/// one unbounded `BULK COLLECT` (QUAL003). All carry only the
778/// unit id — the rule explains; the fact reports presence.
779#[derive(Clone, Debug, PartialEq, Eq)]
780pub struct UnitFactSite {
781    pub unit_logical_id: String,
782}
783
784fn collapsed_masked(source: &str) -> String {
785    mask_string_literals(&source.to_ascii_lowercase())
786        .split_whitespace()
787        .collect::<Vec<_>>()
788        .join(" ")
789}
790
791/// SEC007: a function returning a REF CURSOR. Detects the common
792/// `RETURN SYS_REFCURSOR` and explicit `RETURN REF CURSOR` forms
793/// (strongly-typed named ref-cursor returns need type resolution
794/// and are out of this text-level scope — R13, documented).
795#[must_use]
796pub fn scan_ref_cursor_return(unit_logical_id: &str, source: &str) -> Vec<UnitFactSite> {
797    let c = collapsed_masked(source);
798    if c.contains("return sys_refcursor") || c.contains("return ref cursor") {
799        vec![UnitFactSite {
800            unit_logical_id: unit_logical_id.to_string(),
801        }]
802    } else {
803        Vec::new()
804    }
805}
806
807/// QUAL007: a `FUNCTION` whose body performs row-level DML. Only
808/// fires when the source is a function (the `function` keyword is
809/// present as a word) and `body_has_dml` (R13: a procedure with
810/// DML is normal and is not flagged here).
811#[must_use]
812pub fn scan_dml_in_function(unit_logical_id: &str, source: &str) -> Vec<UnitFactSite> {
813    let masked = mask_string_literals(&source.to_ascii_lowercase());
814    let is_function = masked
815        .match_indices("function")
816        .any(|(at, _)| keyword_boundary_before(&masked, at));
817    if is_function && body_has_dml(&masked) {
818        vec![UnitFactSite {
819            unit_logical_id: unit_logical_id.to_string(),
820        }]
821    } else {
822        Vec::new()
823    }
824}
825
826/// QUAL003: a `BULK COLLECT INTO` with no `LIMIT` in the same
827/// statement — unbounded PGA materialization. One site per
828/// offending statement.
829#[must_use]
830pub fn scan_unbounded_bulk_collect(unit_logical_id: &str, source: &str) -> Vec<UnitFactSite> {
831    let masked = mask_string_literals(&source.to_ascii_lowercase());
832    let mut sites = Vec::new();
833    let mut from = 0;
834    while let Some(rel) = masked[from..].find("bulk collect into") {
835        let at = from + rel;
836        from = at + "bulk collect into".len();
837        let stmt = masked[at..].split(';').next().unwrap_or(&masked[at..]);
838        if !stmt.contains("limit") {
839            sites.push(UnitFactSite {
840                unit_logical_id: unit_logical_id.to_string(),
841            });
842        }
843    }
844    sites
845}
846
847fn emit_unit_facts<I, F>(store: &mut FactStore, prov: &FactProvenance, sites: I, mk: F) -> usize
848where
849    I: IntoIterator<Item = UnitFactSite>,
850    F: Fn(String) -> FactPayload,
851{
852    let before = store.len();
853    for site in sites {
854        let f = crate::fact::mint_fact(prov.clone(), mk(site.unit_logical_id));
855        store.push(f);
856    }
857    store.len() - before
858}
859
860/// Emit `RefCursorReturn` facts (SEC007).
861pub fn emit_ref_cursor_return_facts<I: IntoIterator<Item = UnitFactSite>>(
862    store: &mut FactStore,
863    prov: &FactProvenance,
864    sites: I,
865) -> usize {
866    emit_unit_facts(store, prov, sites, |unit_logical_id| {
867        FactPayload::RefCursorReturn { unit_logical_id }
868    })
869}
870
871/// Emit `DmlInFunction` facts (QUAL007).
872pub fn emit_dml_in_function_facts<I: IntoIterator<Item = UnitFactSite>>(
873    store: &mut FactStore,
874    prov: &FactProvenance,
875    sites: I,
876) -> usize {
877    emit_unit_facts(store, prov, sites, |unit_logical_id| {
878        FactPayload::DmlInFunction { unit_logical_id }
879    })
880}
881
882/// Emit `UnboundedBulkCollect` facts (QUAL003).
883pub fn emit_unbounded_bulk_collect_facts<I: IntoIterator<Item = UnitFactSite>>(
884    store: &mut FactStore,
885    prov: &FactProvenance,
886    sites: I,
887) -> usize {
888    emit_unit_facts(store, prov, sites, |unit_logical_id| {
889        FactPayload::UnboundedBulkCollect { unit_logical_id }
890    })
891}
892
893/// A site carrying a unit id plus a short detail string (the
894/// matched deprecated feature / non-deterministic construct).
895#[derive(Clone, Debug, PartialEq, Eq)]
896pub struct DetailFactSite {
897    pub unit_logical_id: String,
898    pub detail: String,
899}
900
901/// QUAL005: well-known deprecated / legacy constructs. Conservative
902/// (R13): only unambiguous, widely policy-flagged forms; literals
903/// are masked so a mention in a string never matches. One site per
904/// distinct feature found.
905#[must_use]
906pub fn scan_deprecated_features(unit_logical_id: &str, source: &str) -> Vec<DetailFactSite> {
907    let m = mask_string_literals(&source.to_ascii_lowercase());
908    let mut sites = Vec::new();
909    let mut push = |feature: &str| {
910        sites.push(DetailFactSite {
911            unit_logical_id: unit_logical_id.to_string(),
912            detail: feature.to_string(),
913        });
914    };
915    if m.match_indices("dbms_job")
916        .any(|(at, _)| keyword_boundary_before(&m, at))
917    {
918        push("dbms_job (deprecated; use DBMS_SCHEDULER)");
919    }
920    if m.contains("(+)") {
921        push("legacy (+) outer-join operator (use ANSI JOIN)");
922    }
923    if m.contains("commit work") || m.contains("rollback work") {
924        push("legacy `WORK` transaction-control keyword");
925    }
926    sites
927}
928
929/// QUAL008: a `DETERMINISTIC` function whose body contains a
930/// non-deterministic construct. One site per distinct construct.
931#[must_use]
932pub fn scan_deterministic_misuse(unit_logical_id: &str, source: &str) -> Vec<DetailFactSite> {
933    let m = mask_string_literals(&source.to_ascii_lowercase());
934    let is_deterministic = m
935        .match_indices("deterministic")
936        .any(|(at, _)| keyword_boundary_before(&m, at));
937    if !is_deterministic {
938        return Vec::new();
939    }
940    let mut sites = Vec::new();
941    let mut push = |c: &str| {
942        sites.push(DetailFactSite {
943            unit_logical_id: unit_logical_id.to_string(),
944            detail: c.to_string(),
945        });
946    };
947    if body_has_dml(&m) {
948        push("row-level DML");
949    }
950    for (needle, label) in [
951        ("sysdate", "SYSDATE"),
952        ("systimestamp", "SYSTIMESTAMP"),
953        ("current_timestamp", "CURRENT_TIMESTAMP"),
954        ("dbms_random", "DBMS_RANDOM"),
955        (".nextval", "sequence .NEXTVAL"),
956    ] {
957        if m.contains(needle) {
958            push(label);
959        }
960    }
961    sites
962}
963
964/// Emit `DeprecatedFeature` facts (QUAL005).
965pub fn emit_deprecated_feature_facts<I: IntoIterator<Item = DetailFactSite>>(
966    store: &mut FactStore,
967    prov: &FactProvenance,
968    sites: I,
969) -> usize {
970    let before = store.len();
971    for s in sites {
972        let f = crate::fact::mint_fact(
973            prov.clone(),
974            FactPayload::DeprecatedFeature {
975                unit_logical_id: s.unit_logical_id,
976                feature: s.detail,
977            },
978        );
979        store.push(f);
980    }
981    store.len() - before
982}
983
984/// Emit `DeterministicMisuse` facts (QUAL008).
985pub fn emit_deterministic_misuse_facts<I: IntoIterator<Item = DetailFactSite>>(
986    store: &mut FactStore,
987    prov: &FactProvenance,
988    sites: I,
989) -> usize {
990    let before = store.len();
991    for s in sites {
992        let f = crate::fact::mint_fact(
993            prov.clone(),
994            FactPayload::DeterministicMisuse {
995                unit_logical_id: s.unit_logical_id,
996                construct: s.detail,
997            },
998        );
999        store.push(f);
1000    }
1001    store.len() - before
1002}
1003
1004/// QUAL006: a `FOR EACH ROW` trigger whose body references its own
1005/// base table in a query/DML (ORA-04091 mutating-table hazard).
1006/// R13-conservative: requires a clean `on <table>` extraction and
1007/// `for each row`; otherwise no fact.
1008#[must_use]
1009pub fn scan_mutating_table_trigger(unit_logical_id: &str, source: &str) -> Vec<DetailFactSite> {
1010    let c = collapsed_masked(source);
1011    if !c.contains("trigger") || !c.contains("for each row") {
1012        return Vec::new();
1013    }
1014    // Table is the token after the first ` on ` following `trigger`.
1015    let Some(trig_at) = c.find("trigger") else {
1016        return Vec::new();
1017    };
1018    let after = &c[trig_at..];
1019    let Some(on_rel) = after.find(" on ") else {
1020        return Vec::new();
1021    };
1022    let tail = &after[on_rel + 4..];
1023    let raw = tail
1024        .split([' ', '(', '\n', '\t'])
1025        .next()
1026        .unwrap_or("")
1027        .trim_end_matches(|ch: char| !(ch.is_alphanumeric() || ch == '_'));
1028    if raw.is_empty() {
1029        return Vec::new();
1030    }
1031    // Strip schema qualifier for the body-reference check.
1032    let table = raw.rsplit('.').next().unwrap_or(raw).to_string();
1033    if table.is_empty() {
1034        return Vec::new();
1035    }
1036    let body_refs = [
1037        format!("from {table}"),
1038        format!("update {table}"),
1039        format!("insert into {table}"),
1040        format!("delete from {table}"),
1041        format!("merge into {table}"),
1042    ];
1043    if body_refs.iter().any(|p| c.contains(p.as_str())) {
1044        vec![DetailFactSite {
1045            unit_logical_id: unit_logical_id.to_string(),
1046            detail: table,
1047        }]
1048    } else {
1049        Vec::new()
1050    }
1051}
1052
1053/// QUAL002: an exception handler that instruments/logs but neither
1054/// re-raises nor signals — the error is recorded then swallowed.
1055/// At most one site per unit. Mirrors the lightweight exception-
1056/// section split used by [`scan_exception_handlers`].
1057#[must_use]
1058pub fn scan_log_without_reraise(unit_logical_id: &str, source: &str) -> Vec<InvokerRightsSite> {
1059    let lower = mask_string_literals(&source.to_ascii_lowercase());
1060    let Some(mut idx) = lower.find("exception") else {
1061        return Vec::new();
1062    };
1063    loop {
1064        let end = idx + "exception".len();
1065        let boundary = keyword_boundary_before(&lower, idx)
1066            && lower[end..]
1067                .chars()
1068                .next()
1069                .is_none_or(|ch| !(ch.is_alphanumeric() || ch == '_'));
1070        if boundary {
1071            break;
1072        }
1073        match lower[end..].find("exception") {
1074            Some(next) => idx = end + next,
1075            None => return Vec::new(),
1076        }
1077    }
1078    let section = &lower[idx + "exception".len()..];
1079    for chunk in section.split(" when ").skip(1) {
1080        let Some((_scope, rest)) = chunk.split_once(" then ") else {
1081            continue;
1082        };
1083        let body = rest
1084            .split(" when ")
1085            .next()
1086            .unwrap_or(rest)
1087            .rsplit_once(" end")
1088            .map_or(rest, |(b, _)| b);
1089        let has_log = INSTRUMENTATION_MARKERS.iter().any(|m| body.contains(m));
1090        let has_raise = body
1091            .match_indices("raise")
1092            .any(|(at, _)| keyword_boundary_before(body, at));
1093        if has_log && !has_raise {
1094            return vec![InvokerRightsSite {
1095                unit_logical_id: unit_logical_id.to_string(),
1096            }];
1097        }
1098    }
1099    Vec::new()
1100}
1101
1102/// DEP001: a DML statement whose target is schema-qualified to a
1103/// schema other than the unit's own (cross-schema write surface).
1104/// Unit schema = first dotted segment of `unit_logical_id`.
1105#[must_use]
1106pub fn scan_cross_schema_write(unit_logical_id: &str, source: &str) -> Vec<DetailFactSite> {
1107    let unit_schema = unit_logical_id
1108        .split('.')
1109        .next()
1110        .unwrap_or("")
1111        .to_ascii_lowercase();
1112    let m = mask_string_literals(&source.to_ascii_lowercase());
1113    let mut sites = Vec::new();
1114    // The DELETE lead is `delete ` (not `delete from `): Oracle's `FROM` is
1115    // optional, so a FROM-less cross-schema `delete fin.audit where …` must be
1116    // scanned too, or it silently escapes DEP001 (oracle-j1ep.2). After the
1117    // lead we skip an optional `from ` before reading the target so both
1118    // `delete fin.audit` and `delete from fin.audit` resolve to `fin.audit`.
1119    for lead in ["insert into ", "update ", "delete ", "merge into "] {
1120        let mut from = 0;
1121        while let Some(rel) = m[from..].find(lead) {
1122            let at = from + rel;
1123            from = at + lead.len();
1124            if !keyword_boundary_before(&m, at) {
1125                continue;
1126            }
1127            let mut rest = &m[at + lead.len()..];
1128            if lead == "delete " {
1129                rest = rest.trim_start();
1130                if let Some(after_from) = rest.strip_prefix("from ") {
1131                    rest = after_from.trim_start();
1132                }
1133            }
1134            let target = rest
1135                .split([' ', '(', ';', '\n', '\t'])
1136                .next()
1137                .unwrap_or("")
1138                .trim();
1139            if let Some((schema, obj)) = target.split_once('.')
1140                && !schema.is_empty()
1141                && !obj.is_empty()
1142                && schema != unit_schema
1143                && schema.chars().all(|ch| ch.is_alphanumeric() || ch == '_')
1144            {
1145                sites.push(DetailFactSite {
1146                    unit_logical_id: unit_logical_id.to_string(),
1147                    detail: format!("{schema}.{}", obj.split('.').next().unwrap_or(obj)),
1148                });
1149            }
1150        }
1151    }
1152    sites
1153}
1154
1155/// Emit `MutatingTableTrigger` facts (QUAL006).
1156pub fn emit_mutating_table_trigger_facts<I: IntoIterator<Item = DetailFactSite>>(
1157    store: &mut FactStore,
1158    prov: &FactProvenance,
1159    sites: I,
1160) -> usize {
1161    let before = store.len();
1162    for s in sites {
1163        store.push(crate::fact::mint_fact(
1164            prov.clone(),
1165            FactPayload::MutatingTableTrigger {
1166                unit_logical_id: s.unit_logical_id,
1167                table: s.detail,
1168            },
1169        ));
1170    }
1171    store.len() - before
1172}
1173
1174/// Emit `LogWithoutReraise` facts (QUAL002).
1175pub fn emit_log_without_reraise_facts<I: IntoIterator<Item = InvokerRightsSite>>(
1176    store: &mut FactStore,
1177    prov: &FactProvenance,
1178    sites: I,
1179) -> usize {
1180    let before = store.len();
1181    for s in sites {
1182        store.push(crate::fact::mint_fact(
1183            prov.clone(),
1184            FactPayload::LogWithoutReraise {
1185                unit_logical_id: s.unit_logical_id,
1186            },
1187        ));
1188    }
1189    store.len() - before
1190}
1191
1192/// Emit `CrossSchemaWrite` facts (DEP001).
1193pub fn emit_cross_schema_write_facts<I: IntoIterator<Item = DetailFactSite>>(
1194    store: &mut FactStore,
1195    prov: &FactProvenance,
1196    sites: I,
1197) -> usize {
1198    let before = store.len();
1199    for s in sites {
1200        store.push(crate::fact::mint_fact(
1201            prov.clone(),
1202            FactPayload::CrossSchemaWrite {
1203                unit_logical_id: s.unit_logical_id,
1204                target: s.detail,
1205            },
1206        ));
1207    }
1208    store.len() - before
1209}
1210
1211/// One sensitive `CREATE PUBLIC SYNONYM` site (SEC005).
1212#[derive(Clone, Debug, PartialEq, Eq)]
1213pub struct SynonymFactSite {
1214    pub unit_logical_id: String,
1215    pub synonym: String,
1216    pub target: String,
1217}
1218
1219/// Substrings that mark a synonym/target name as sensitive
1220/// (credential / PII / finance). Conservative wordlist — a public
1221/// synonym on a benign object is not flagged (R13).
1222const SENSITIVITY_MARKERS: &[&str] = &[
1223    "password",
1224    "passwd",
1225    "pwd",
1226    "credential",
1227    "secret",
1228    "token",
1229    "apikey",
1230    "api_key",
1231    "private_key",
1232    "ssn",
1233    "salary",
1234    "payroll",
1235    "bank",
1236    "account",
1237    "acct",
1238    "card",
1239    "tax",
1240    "patient",
1241    "medical",
1242    "wallet",
1243];
1244
1245fn name_is_sensitive(name: &str) -> bool {
1246    SENSITIVITY_MARKERS.iter().any(|m| name.contains(m))
1247}
1248
1249/// SEC005: a `CREATE [OR REPLACE] PUBLIC SYNONYM <syn> FOR <tgt>`
1250/// where the synonym or its target name matches the sensitivity
1251/// heuristic. Literal-masked, conservative: a non-public synonym
1252/// or a benign name yields no fact.
1253#[must_use]
1254pub fn scan_sensitive_public_synonym(unit_logical_id: &str, source: &str) -> Vec<SynonymFactSite> {
1255    let c = collapsed_masked(source);
1256    let mut sites = Vec::new();
1257    let mut from = 0;
1258    while let Some(rel) = c[from..].find("public synonym ") {
1259        let at = from + rel;
1260        from = at + "public synonym ".len();
1261        let rest = &c[at + "public synonym ".len()..];
1262        let Some((syn_raw, after)) = rest.split_once(" for ") else {
1263            continue;
1264        };
1265        let synonym = syn_raw
1266            .split([' ', '(', ';', '\n', '\t'])
1267            .next()
1268            .unwrap_or("")
1269            .rsplit('.')
1270            .next()
1271            .unwrap_or("")
1272            .to_string();
1273        let target = after
1274            .split([' ', '(', ';', '\n', '\t'])
1275            .next()
1276            .unwrap_or("")
1277            .trim_end_matches(';')
1278            .to_string();
1279        let tgt_name = target.rsplit('.').next().unwrap_or(&target);
1280        if synonym.is_empty() || target.is_empty() {
1281            continue;
1282        }
1283        if name_is_sensitive(&synonym) || name_is_sensitive(tgt_name) {
1284            sites.push(SynonymFactSite {
1285                unit_logical_id: unit_logical_id.to_string(),
1286                synonym,
1287                target,
1288            });
1289        }
1290    }
1291    sites
1292}
1293
1294/// Emit `SensitivePublicSynonym` facts (SEC005).
1295pub fn emit_sensitive_public_synonym_facts<I: IntoIterator<Item = SynonymFactSite>>(
1296    store: &mut FactStore,
1297    prov: &FactProvenance,
1298    sites: I,
1299) -> usize {
1300    let before = store.len();
1301    for s in sites {
1302        store.push(crate::fact::mint_fact(
1303            prov.clone(),
1304            FactPayload::SensitivePublicSynonym {
1305                unit_logical_id: s.unit_logical_id,
1306                synonym: s.synonym,
1307                target: s.target,
1308            },
1309        ));
1310    }
1311    store.len() - before
1312}
1313
1314/// One `<col> IS NULL` predicate on a column the same source
1315/// indexes (PERF003).
1316#[derive(Clone, Debug, PartialEq, Eq)]
1317pub struct IsNullIndexedSite {
1318    pub unit_logical_id: String,
1319    pub column: String,
1320}
1321
1322fn simple_ident(tok: &str) -> String {
1323    tok.rsplit('.')
1324        .next()
1325        .unwrap_or(tok)
1326        .trim_matches(|ch: char| !(ch.is_alphanumeric() || ch == '_'))
1327        .to_string()
1328}
1329
1330/// Columns this source declares an index on:
1331/// `CREATE [UNIQUE|BITMAP] INDEX <name> ON <table> ( c1, c2, … )`.
1332fn indexed_columns(c: &str) -> BTreeSet<String> {
1333    let mut cols = BTreeSet::new();
1334    let mut from = 0;
1335    while let Some(rel) = c[from..].find("index ") {
1336        let at = from + rel;
1337        from = at + "index ".len();
1338        // Must be a CREATE … INDEX (skip `alter index`, etc.).
1339        let pre = &c[..at];
1340        if !pre
1341            .trim_end()
1342            .rsplit([' ', '\n', '\t'])
1343            .next()
1344            .map(|w| w == "create" || w == "unique" || w == "bitmap")
1345            .unwrap_or(false)
1346        {
1347            continue;
1348        }
1349        let rest = &c[at + "index ".len()..];
1350        let Some(on_rel) = rest.find(" on ") else {
1351            continue;
1352        };
1353        let after_on = &rest[on_rel + 4..];
1354        let Some(lp) = after_on.find('(') else {
1355            continue;
1356        };
1357        let Some(rp) = after_on[lp..].find(')') else {
1358            continue;
1359        };
1360        for raw in after_on[lp + 1..lp + rp].split(',') {
1361            let id = simple_ident(raw.split_whitespace().next().unwrap_or(""));
1362            if !id.is_empty() {
1363                cols.insert(id);
1364            }
1365        }
1366    }
1367    cols
1368}
1369
1370/// PERF003: a `<col> IS NULL` predicate where the same source
1371/// declares an index whose key list contains `col`. B-tree indexes
1372/// do not store all-NULL keys, so the predicate forces a full scan.
1373/// R13: requires BOTH the index DDL and the predicate in this
1374/// source; catalog-only indexes are out of this source-level scope.
1375/// ` is null` is not a substring of `is not null`, so negated
1376/// predicates never match.
1377#[must_use]
1378pub fn scan_is_null_on_indexed_column(
1379    unit_logical_id: &str,
1380    source: &str,
1381) -> Vec<IsNullIndexedSite> {
1382    let c = collapsed_masked(source);
1383    let indexed = indexed_columns(&c);
1384    if indexed.is_empty() {
1385        return Vec::new();
1386    }
1387    let mut flagged: BTreeSet<String> = BTreeSet::new();
1388    let mut from = 0;
1389    while let Some(rel) = c[from..].find(" is null") {
1390        let at = from + rel;
1391        from = at + " is null".len();
1392        // Token immediately before ` is null` is the column.
1393        let col = simple_ident(c[..at].rsplit([' ', '(', ',']).next().unwrap_or(""));
1394        if !col.is_empty() && indexed.contains(&col) {
1395            flagged.insert(col);
1396        }
1397    }
1398    flagged
1399        .into_iter()
1400        .map(|column| IsNullIndexedSite {
1401            unit_logical_id: unit_logical_id.to_string(),
1402            column,
1403        })
1404        .collect()
1405}
1406
1407/// Emit `IsNullOnIndexedColumn` facts (PERF003).
1408pub fn emit_is_null_on_indexed_column_facts<I: IntoIterator<Item = IsNullIndexedSite>>(
1409    store: &mut FactStore,
1410    prov: &FactProvenance,
1411    sites: I,
1412) -> usize {
1413    let before = store.len();
1414    for s in sites {
1415        store.push(crate::fact::mint_fact(
1416            prov.clone(),
1417            FactPayload::IsNullOnIndexedColumn {
1418                unit_logical_id: s.unit_logical_id,
1419                column: s.column,
1420            },
1421        ));
1422    }
1423    store.len() - before
1424}
1425
1426#[cfg(test)]
1427mod tests {
1428    use super::*;
1429    use crate::calls::{CallContext, CallSite};
1430    use crate::fact::FactKind;
1431    use crate::flow::{ConstantValue, StringShape, Taint, TaintCleanser, TaintKind};
1432
1433    fn prov() -> FactProvenance {
1434        FactProvenance {
1435            component: "plsql-ir".into(),
1436            component_version: "0.1.0".into(),
1437            run_id: String::new(),
1438            source_logical_id: None,
1439            source_file: None,
1440        }
1441    }
1442
1443    fn flow_fixture_rows() -> Vec<(String, ValueFlow)> {
1444        (0..10)
1445            .map(|idx| {
1446                let int_value = idx.to_string();
1447                let next_value = (idx + 1).to_string();
1448                (
1449                    format!("v_{idx:02}"),
1450                    ValueFlow {
1451                        taint: Taint {
1452                            kinds: vec![if idx % 2 == 0 {
1453                                TaintKind::UserInput
1454                            } else {
1455                                TaintKind::BindVariable
1456                            }],
1457                            cleansed_by: vec![if idx % 2 == 0 {
1458                                TaintCleanser::DbmsAssert
1459                            } else {
1460                                TaintCleanser::HexEncode
1461                            }],
1462                        },
1463                        constant: Some(ConstantValue::Int {
1464                            value: int_value.clone(),
1465                        }),
1466                        value_set: ValueSet::OneOf {
1467                            values: vec![
1468                                ConstantValue::Int { value: int_value },
1469                                ConstantValue::Int { value: next_value },
1470                            ],
1471                        },
1472                        string_shape: Some(StringShape::InterpolatedWithFix {
1473                            literal_prefix: format!("select {idx} from "),
1474                            literal_suffix: String::from(" where id = :id"),
1475                        }),
1476                    },
1477                )
1478            })
1479            .collect()
1480    }
1481
1482    fn flow_payload_rows(store: &FactStore) -> Vec<String> {
1483        store
1484            .facts
1485            .iter()
1486            .filter_map(|fact| match &fact.payload {
1487                FactPayload::ConstantValue {
1488                    unit_logical_id,
1489                    name,
1490                    value,
1491                } => Some(format!(
1492                    "constant_value|{unit_logical_id}|{name}|{}",
1493                    constant_value_label(value)
1494                )),
1495                FactPayload::ValueSet {
1496                    unit_logical_id,
1497                    name,
1498                    value_set,
1499                } => Some(format!(
1500                    "value_set|{unit_logical_id}|{name}|{}",
1501                    value_set_label(value_set)
1502                )),
1503                FactPayload::StringShape {
1504                    unit_logical_id,
1505                    name,
1506                    shape,
1507                } => Some(format!(
1508                    "string_shape|{unit_logical_id}|{name}|{}",
1509                    string_shape_label(shape)
1510                )),
1511                FactPayload::Taint {
1512                    unit_logical_id,
1513                    name,
1514                    kinds,
1515                } => Some(format!("taint|{unit_logical_id}|{name}|{kinds:?}")),
1516                FactPayload::Sanitizer {
1517                    unit_logical_id,
1518                    name,
1519                    cleansed_by,
1520                } => Some(format!(
1521                    "sanitizer|{unit_logical_id}|{name}|{cleansed_by:?}"
1522                )),
1523                _ => None,
1524            })
1525            .collect()
1526    }
1527
1528    fn constant_value_label(value: &ConstantValue) -> String {
1529        match value {
1530            ConstantValue::Int { value } => format!("int:{value}"),
1531            ConstantValue::Float { value } => format!("float:{value}"),
1532            ConstantValue::Str { value } => format!("str:{value}"),
1533            ConstantValue::Bool { value } => format!("bool:{value}"),
1534            ConstantValue::Null => String::from("null"),
1535        }
1536    }
1537
1538    fn value_set_label(value_set: &ValueSet) -> String {
1539        match value_set {
1540            ValueSet::Top => String::from("top"),
1541            ValueSet::Bottom => String::from("bottom"),
1542            ValueSet::OneOf { values } => {
1543                let labels: Vec<String> = values.iter().map(constant_value_label).collect();
1544                format!("one_of:{}", labels.join(","))
1545            }
1546            ValueSet::Range { lo, hi } => {
1547                format!(
1548                    "range:{}..{}",
1549                    constant_value_label(lo),
1550                    constant_value_label(hi)
1551                )
1552            }
1553        }
1554    }
1555
1556    fn string_shape_label(shape: &StringShape) -> String {
1557        match shape {
1558            StringShape::Literal { value } => format!("literal:{value}"),
1559            StringShape::InterpolatedWithFix {
1560                literal_prefix,
1561                literal_suffix,
1562            } => format!("fix:{literal_prefix}|{literal_suffix}"),
1563            StringShape::FullyOpaque => String::from("fully_opaque"),
1564            StringShape::Empty => String::from("empty"),
1565        }
1566    }
1567
1568    #[test]
1569    fn declaration_facts_emitted_and_counted() {
1570        let mut store = FactStore::default();
1571        let n = emit_declaration_facts(
1572            &mut store,
1573            &prov(),
1574            vec![
1575                (DeclId::new(1), "hr.employees".to_string()),
1576                (DeclId::new(2), "hr.departments".to_string()),
1577            ],
1578        );
1579        assert_eq!(n, 2);
1580        assert_eq!(store.by_kind(FactKind::Declaration).count(), 2);
1581    }
1582
1583    #[test]
1584    fn declaration_facts_dedupe_identical_entries() {
1585        let mut store = FactStore::default();
1586        emit_declaration_facts(
1587            &mut store,
1588            &prov(),
1589            vec![(DeclId::new(1), "hr.x".to_string())],
1590        );
1591        let n2 = emit_declaration_facts(
1592            &mut store,
1593            &prov(),
1594            vec![(DeclId::new(1), "hr.x".to_string())],
1595        );
1596        // Same fact id → dedup → 0 new.
1597        assert_eq!(n2, 0);
1598        assert_eq!(store.len(), 1);
1599    }
1600
1601    #[test]
1602    fn reference_facts_emitted() {
1603        let mut store = FactStore::default();
1604        let n = emit_reference_facts(
1605            &mut store,
1606            &prov(),
1607            vec![(DeclId::new(3), "hr.audit_pkg".to_string())],
1608        );
1609        assert_eq!(n, 1);
1610        assert_eq!(store.by_kind(FactKind::Reference).count(), 1);
1611    }
1612
1613    #[test]
1614    fn call_facts_join_callee_path() {
1615        let mut store = FactStore::default();
1616        let calls = vec![CallSite {
1617            callee_parts: vec!["BILLING_PKG".into(), "POST_INVOICE".into()],
1618            callee_display: "billing_pkg.post_invoice".into(),
1619            arg_count: 2,
1620            context: CallContext::Statement,
1621        }];
1622        let n = emit_call_facts(&mut store, &prov(), "hr.run_billing", &calls);
1623        assert_eq!(n, 1);
1624        let f = store.by_kind(FactKind::DependencyEdge).next().unwrap();
1625        assert!(
1626            matches!(
1627                &f.payload,
1628                FactPayload::DependencyEdge { from_logical_id, to_logical_id, edge_kind }
1629                    if from_logical_id == "hr.run_billing"
1630                        && to_logical_id == "billing_pkg.post_invoice"
1631                        && edge_kind == "Calls"
1632            ),
1633            "unexpected DependencyEdge payload: {:?}",
1634            f.payload
1635        );
1636    }
1637
1638    #[test]
1639    fn mixed_families_filter_independently() {
1640        let mut store = FactStore::default();
1641        emit_declaration_facts(&mut store, &prov(), vec![(DeclId::new(1), "a".into())]);
1642        emit_reference_facts(&mut store, &prov(), vec![(DeclId::new(1), "b".into())]);
1643        emit_call_facts(
1644            &mut store,
1645            &prov(),
1646            "a",
1647            &[CallSite {
1648                callee_parts: vec!["C".into()],
1649                callee_display: "c".into(),
1650                arg_count: 0,
1651                context: CallContext::Statement,
1652            }],
1653        );
1654        assert_eq!(store.by_kind(FactKind::Declaration).count(), 1);
1655        assert_eq!(store.by_kind(FactKind::Reference).count(), 1);
1656        assert_eq!(store.by_kind(FactKind::DependencyEdge).count(), 1);
1657        assert_eq!(store.len(), 3);
1658    }
1659
1660    #[test]
1661    fn flow_fact_projection_covers_ten_fixtures_per_family() {
1662        let mut store = FactStore::default();
1663        let emitted = emit_flow_facts(&mut store, &prov(), "hr.flow_pkg", flow_fixture_rows());
1664
1665        assert_eq!(emitted, 50);
1666        assert_eq!(store.by_kind(FactKind::ConstantValue).count(), 10);
1667        assert_eq!(store.by_kind(FactKind::ValueSet).count(), 10);
1668        assert_eq!(store.by_kind(FactKind::StringShape).count(), 10);
1669        assert_eq!(store.by_kind(FactKind::Taint).count(), 10);
1670        assert_eq!(store.by_kind(FactKind::Sanitizer).count(), 10);
1671    }
1672
1673    #[test]
1674    fn flow_fact_projection_has_golden_payload_snapshot() {
1675        let mut store = FactStore::default();
1676        let flow = ValueFlow {
1677            taint: Taint {
1678                kinds: vec![TaintKind::UserInput, TaintKind::DynamicSql],
1679                cleansed_by: vec![TaintCleanser::DbmsAssert],
1680            },
1681            constant: Some(ConstantValue::Str {
1682                value: String::from("select * from users"),
1683            }),
1684            value_set: ValueSet::Range {
1685                lo: ConstantValue::Int {
1686                    value: String::from("1"),
1687                },
1688                hi: ConstantValue::Int {
1689                    value: String::from("9"),
1690                },
1691            },
1692            string_shape: Some(StringShape::InterpolatedWithFix {
1693                literal_prefix: String::from("select * from "),
1694                literal_suffix: String::from(" where id = :id"),
1695            }),
1696        };
1697
1698        emit_flow_facts(&mut store, &prov(), "hr.flow_pkg", vec![("v_sql", flow)]);
1699
1700        assert_eq!(
1701            flow_payload_rows(&store),
1702            vec![
1703                "constant_value|hr.flow_pkg|V_SQL|str:select * from users",
1704                "value_set|hr.flow_pkg|V_SQL|range:int:1..int:9",
1705                "string_shape|hr.flow_pkg|V_SQL|fix:select * from | where id = :id",
1706                "taint|hr.flow_pkg|V_SQL|[UserInput, DynamicSql]",
1707                "sanitizer|hr.flow_pkg|V_SQL|[DbmsAssert]",
1708            ]
1709        );
1710    }
1711
1712    #[test]
1713    fn flow_fact_ids_are_stable_for_normalized_semantic_names() {
1714        let flow = ValueFlow {
1715            taint: Taint {
1716                kinds: vec![TaintKind::UserInput],
1717                cleansed_by: vec![TaintCleanser::DbmsAssert],
1718            },
1719            constant: Some(ConstantValue::Str {
1720                value: String::from("safe_table"),
1721            }),
1722            value_set: ValueSet::OneOf {
1723                values: vec![ConstantValue::Str {
1724                    value: String::from("safe_table"),
1725                }],
1726            },
1727            string_shape: Some(StringShape::Literal {
1728                value: String::from("safe_table"),
1729            }),
1730        };
1731        let mut left = FactStore::default();
1732        let mut right = FactStore::default();
1733
1734        emit_flow_facts(
1735            &mut left,
1736            &prov(),
1737            "hr.flow_pkg",
1738            vec![("  v_table  ", flow.clone())],
1739        );
1740        emit_flow_facts(&mut right, &prov(), "hr.flow_pkg", vec![("V_TABLE", flow)]);
1741
1742        let left_ids: Vec<_> = left.facts.iter().map(|fact| fact.id.clone()).collect();
1743        let right_ids: Vec<_> = right.facts.iter().map(|fact| fact.id.clone()).collect();
1744        assert_eq!(left_ids, right_ids);
1745        assert_eq!(left_ids.len(), 5);
1746    }
1747
1748    #[test]
1749    fn flow_env_projection_consumes_solver_output() {
1750        let stmts = crate::lower_statement_body("v_sql := 'select * from ' || p_table;");
1751        let env = crate::analyze_flow(
1752            &stmts,
1753            &crate::TaintSources {
1754                user_input_names: vec![String::from("p_table")],
1755                bind_names: vec![],
1756            },
1757        );
1758        let mut store = FactStore::default();
1759
1760        emit_flow_env_facts(&mut store, &prov(), "hr.flow_pkg", &env);
1761
1762        assert_eq!(store.by_kind(FactKind::Taint).count(), 1);
1763        assert_eq!(store.by_kind(FactKind::StringShape).count(), 1);
1764    }
1765
1766    #[test]
1767    fn privilege_facts_emitted_and_filterable() {
1768        let mut store = FactStore::default();
1769        let n = emit_privilege_facts(
1770            &mut store,
1771            &prov(),
1772            vec![
1773                ("HR_ROLE".into(), "EXECUTE".into(), "hr.billing_pkg".into()),
1774                ("PUBLIC".into(), "SELECT".into(), "hr.audit_v".into()),
1775            ],
1776        );
1777        assert_eq!(n, 2);
1778        assert_eq!(store.by_kind(FactKind::Privilege).count(), 2);
1779        let f = store.by_kind(FactKind::Privilege).next().unwrap();
1780        assert!(
1781            matches!(
1782                &f.payload,
1783                FactPayload::Privilege { grantee, privilege, on }
1784                    if grantee == "HR_ROLE"
1785                        && privilege == "EXECUTE"
1786                        && on == "hr.billing_pkg"
1787            ),
1788            "unexpected Privilege payload: {:?}",
1789            f.payload
1790        );
1791    }
1792
1793    #[test]
1794    fn dynamic_sql_facts_emitted() {
1795        let mut store = FactStore::default();
1796        let n = emit_dynamic_sql_facts(
1797            &mut store,
1798            &prov(),
1799            vec![
1800                "hr.run_dyn: EXECUTE IMMEDIATE <sql-like, 1 bind>".to_string(),
1801                "hr.run_dyn2: OPEN cur FOR <opaque>".to_string(),
1802            ],
1803        );
1804        assert_eq!(n, 2);
1805        assert_eq!(store.by_kind(FactKind::DynamicSqlEvidence).count(), 2);
1806    }
1807
1808    #[test]
1809    fn unknown_facts_carry_reason_evidence() {
1810        let mut store = FactStore::default();
1811        let n = emit_unknown_facts(
1812            &mut store,
1813            &prov(),
1814            vec![
1815                ("hr.remote_call".into(), "DbLinkRemoteObject".into()),
1816                ("hr.wrapped_pkg".into(), "WrappedSource".into()),
1817            ],
1818        );
1819        assert_eq!(n, 2);
1820        let f = store.by_kind(FactKind::Opacity).next().unwrap();
1821        assert!(
1822            matches!(
1823                &f.payload,
1824                FactPayload::Opacity { target_logical_id, reason }
1825                    if target_logical_id == "hr.remote_call"
1826                        && reason == "DbLinkRemoteObject"
1827            ),
1828            "unexpected Opacity payload: {:?}",
1829            f.payload
1830        );
1831    }
1832
1833    #[test]
1834    fn fact004_families_dedupe_and_filter_independently() {
1835        let mut store = FactStore::default();
1836        emit_privilege_facts(
1837            &mut store,
1838            &prov(),
1839            vec![("R".into(), "EXECUTE".into(), "o".into())],
1840        );
1841        // Identical privilege fact → same id → dedup → 0 new.
1842        let dup = emit_privilege_facts(
1843            &mut store,
1844            &prov(),
1845            vec![("R".into(), "EXECUTE".into(), "o".into())],
1846        );
1847        assert_eq!(dup, 0);
1848        emit_dynamic_sql_facts(&mut store, &prov(), vec!["site".into()]);
1849        emit_unknown_facts(&mut store, &prov(), vec![("t".into(), "r".into())]);
1850        assert_eq!(store.by_kind(FactKind::Privilege).count(), 1);
1851        assert_eq!(store.by_kind(FactKind::DynamicSqlEvidence).count(), 1);
1852        assert_eq!(store.by_kind(FactKind::Opacity).count(), 1);
1853        assert_eq!(store.len(), 3);
1854    }
1855
1856    struct FakeDeclSource;
1857    impl DeclLike for FakeDeclSource {
1858        fn iter_decls(&self) -> Vec<(DeclId, String)> {
1859            vec![
1860                (DeclId::new(10), "hr.p1".into()),
1861                (DeclId::new(11), "hr.p2".into()),
1862            ]
1863        }
1864    }
1865
1866    #[test]
1867    fn emit_declarations_from_trait_source() {
1868        let mut store = FactStore::default();
1869        let n = emit_declarations_from(&mut store, &prov(), &FakeDeclSource);
1870        assert_eq!(n, 2);
1871    }
1872
1873    #[test]
1874    fn classify_handler_body_buckets() {
1875        assert_eq!(classify_handler_body(" NULL; "), "noop");
1876        assert_eq!(classify_handler_body("null;null;"), "noop");
1877        assert_eq!(classify_handler_body(""), "noop");
1878        assert_eq!(classify_handler_body("commit;"), "commit");
1879        assert_eq!(classify_handler_body("rollback to sp1;"), "rollback");
1880        assert_eq!(classify_handler_body("rollback; null;"), "rollback");
1881        assert_eq!(classify_handler_body("log_error(sqlerrm);"), "other");
1882    }
1883
1884    #[test]
1885    fn scan_when_others_then_null_is_noop_others() {
1886        let src = "begin do_work; exception when others then null; end;";
1887        let sites = scan_exception_handlers("hr.pkg.run", src);
1888        assert_eq!(sites.len(), 1);
1889        assert_eq!(sites[0].scope, "others");
1890        assert_eq!(sites[0].body_class, "noop");
1891        assert_eq!(sites[0].unit_logical_id, "hr.pkg.run");
1892    }
1893
1894    #[test]
1895    fn scan_named_handler_and_commit_classified() {
1896        let src = "BEGIN x; EXCEPTION WHEN no_data_found THEN COMMIT; WHEN OTHERS THEN raise; END;";
1897        let sites = scan_exception_handlers("hr.p", src);
1898        assert_eq!(sites.len(), 2);
1899        assert_eq!(sites[0].scope, "no_data_found");
1900        assert_eq!(sites[0].body_class, "commit");
1901        assert_eq!(sites[1].scope, "others");
1902        assert_eq!(sites[1].body_class, "other");
1903    }
1904
1905    #[test]
1906    fn scan_ignores_identifier_containing_exception() {
1907        // `bad_exception` must not be read as the section keyword.
1908        let src = "declare bad_exception number; begin null; end;";
1909        assert!(scan_exception_handlers("hr.p", src).is_empty());
1910    }
1911
1912    #[test]
1913    fn emit_exception_handler_facts_pushes_typed_facts() {
1914        let mut store = FactStore::default();
1915        let sites = scan_exception_handlers(
1916            "hr.pkg.run",
1917            "begin go; exception when others then null; end;",
1918        );
1919        let n = emit_exception_handler_facts(&mut store, &prov(), sites);
1920        assert_eq!(n, 1);
1921        assert_eq!(store.by_kind(FactKind::ExceptionHandler).count(), 1);
1922    }
1923
1924    // --- PLSQL-SAST-FACTS-LOOP (oracle-kcjx) ---
1925
1926    #[test]
1927    fn scan_cursor_for_loop_query_form_detected() {
1928        let s = scan_cursor_for_loops(
1929            "hr.pkg.p",
1930            "begin for r in (select id from emps) loop dbms_output.put_line(r.id); end loop; end;",
1931        );
1932        assert_eq!(s.len(), 1);
1933        assert_eq!(s[0].loop_var, "r");
1934        assert!(!s[0].has_body_dml);
1935    }
1936
1937    #[test]
1938    fn scan_cursor_for_loop_with_dml_sets_flag() {
1939        let s = scan_cursor_for_loops(
1940            "hr.pkg.p",
1941            "begin for rec in (select * from src) loop insert into dst values (rec.a); end loop; end;",
1942        );
1943        assert_eq!(s.len(), 1);
1944        assert!(s[0].has_body_dml, "INSERT in body must set has_body_dml");
1945    }
1946
1947    #[test]
1948    fn scan_cursor_for_loop_bare_cursor_name_detected() {
1949        let s = scan_cursor_for_loops(
1950            "hr.pkg.p",
1951            "begin for c in emp_cur loop go(c); end loop; end;",
1952        );
1953        assert_eq!(s.len(), 1);
1954        assert_eq!(s[0].loop_var, "c");
1955    }
1956
1957    #[test]
1958    fn scan_numeric_range_loop_is_not_a_cursor_loop() {
1959        // R13: a numeric FOR loop must NOT produce a CursorForLoop fact.
1960        let s = scan_cursor_for_loops(
1961            "hr.pkg.p",
1962            "begin for i in 1..10 loop go(i); end loop; end;",
1963        );
1964        assert!(s.is_empty(), "numeric range must yield no site, got {s:?}");
1965    }
1966
1967    #[test]
1968    fn scan_for_keyword_inside_identifier_ignored() {
1969        // `before_x` contains "for" but is not a FOR loop.
1970        let s = scan_cursor_for_loops("hr.pkg.p", "begin before_x := 1; end;");
1971        assert!(s.is_empty(), "got {s:?}");
1972    }
1973
1974    #[test]
1975    fn missing_instrumentation_flagged_when_body_has_no_marker() {
1976        let s = scan_missing_instrumentation("hr.pkg.silent", "begin update t set a=1; end;");
1977        assert_eq!(s.len(), 1);
1978        assert_eq!(s[0].unit_logical_id, "hr.pkg.silent");
1979    }
1980
1981    #[test]
1982    fn missing_instrumentation_not_flagged_when_marker_present() {
1983        let s = scan_missing_instrumentation(
1984            "hr.pkg.logged",
1985            "begin dbms_output.put_line('x'); update t set a=1; end;",
1986        );
1987        assert!(s.is_empty(), "instrumented body must not flag, got {s:?}");
1988    }
1989
1990    // oracle-j1ep.5: a `_begin`-suffixed identifier in the declaration section
1991    // (e.g. `v_begin_dt`) appears before the real BEGIN. A first-occurrence
1992    // `find("begin")` lands inside that decoy whose preceding `_` fails the
1993    // word-boundary check, short-circuiting `has_body` to false and silently
1994    // skipping the STYLE001 instrumentation check. Scanning every `begin`
1995    // occurrence (like `body_has_dml`) finds the genuine BEGIN.
1996    #[test]
1997    fn missing_instrumentation_flagged_past_begin_suffixed_decoy() {
1998        let s = scan_missing_instrumentation(
1999            "hr.pkg.silent",
2000            "procedure p is v_begin_dt date; begin update t set x=1; end;",
2001        );
2002        assert_eq!(
2003            s.len(),
2004            1,
2005            "real BEGIN past a v_begin_dt decoy must yield one site: {s:?}"
2006        );
2007        assert_eq!(s[0].unit_logical_id, "hr.pkg.silent");
2008    }
2009
2010    #[test]
2011    fn missing_instrumentation_skips_specs_without_body() {
2012        // No BEGIN ⇒ cannot see it executes ⇒ no fact (R13).
2013        let s = scan_missing_instrumentation("hr.pkg.spec", "procedure p(x in number);");
2014        assert!(s.is_empty(), "got {s:?}");
2015    }
2016
2017    #[test]
2018    fn emit_cursor_for_loop_and_missing_instrumentation_facts_are_typed() {
2019        let mut store = FactStore::default();
2020        let cfl = scan_cursor_for_loops(
2021            "hr.pkg.p",
2022            "begin for r in (select 1 from dual) loop null; end loop; end;",
2023        );
2024        let n1 = emit_cursor_for_loop_facts(&mut store, &prov(), cfl);
2025        assert_eq!(n1, 1);
2026        assert_eq!(store.by_kind(FactKind::CursorForLoop).count(), 1);
2027
2028        let mi = scan_missing_instrumentation("hr.pkg.p", "begin null; end;");
2029        let n2 = emit_missing_instrumentation_facts(&mut store, &prov(), mi);
2030        assert_eq!(n2, 1);
2031        assert_eq!(store.by_kind(FactKind::MissingInstrumentation).count(), 1);
2032    }
2033
2034    // --- SEC003 hardcoded-credentials substrate ---
2035
2036    #[test]
2037    fn scan_identified_by_literal_flagged() {
2038        let s =
2039            scan_hardcoded_credentials("hr.admin", "alter user hr identified by 'Sup3rSecret';");
2040        assert_eq!(s.len(), 1);
2041        assert_eq!(s[0].marker, "identified by");
2042    }
2043
2044    #[test]
2045    fn scan_password_assignment_literal_flagged() {
2046        let s = scan_hardcoded_credentials("hr.pkg.connect", "begin v_password := 'hunter2'; end;");
2047        assert!(s.iter().any(|x| x.marker.eq("password")));
2048    }
2049
2050    #[test]
2051    fn scan_credential_marker_with_bind_not_flagged() {
2052        // A bind/variable (no string literal in the statement) must
2053        // NOT be flagged — R13, avoid false credential findings.
2054        let s = scan_hardcoded_credentials("hr.pkg.connect", "begin v_password := p_input; end;");
2055        assert!(s.is_empty(), "bind, not a literal: {s:?}");
2056    }
2057
2058    #[test]
2059    fn scan_credential_keyword_in_identifier_ignored() {
2060        // `old_password_hash` substring should not match without a
2061        // following literal in the statement.
2062        let s = scan_hardcoded_credentials("hr.pkg.p", "begin x := old_password_hash; end;");
2063        assert!(s.is_empty(), "{s:?}");
2064    }
2065
2066    #[test]
2067    fn emit_hardcoded_credential_facts_typed() {
2068        let mut store = FactStore::default();
2069        let sites = scan_hardcoded_credentials("hr.admin", "alter user x identified by 'p';");
2070        let n = emit_hardcoded_credential_facts(&mut store, &prov(), sites);
2071        assert_eq!(n, 1);
2072        assert_eq!(store.by_kind(FactKind::HardcodedCredential).count(), 1);
2073    }
2074
2075    // --- SEC004 invoker-rights substrate ---
2076
2077    #[test]
2078    fn scan_authid_current_user_flagged_whitespace_insensitive() {
2079        let s = scan_invoker_rights(
2080            "hr.pkg",
2081            "create or replace package hr.pkg\n  authid\tcurrent_user as ...",
2082        );
2083        assert_eq!(s.len(), 1);
2084    }
2085
2086    #[test]
2087    fn scan_authid_definer_not_flagged() {
2088        let s = scan_invoker_rights("hr.pkg", "create package hr.pkg authid definer as ...");
2089        assert!(s.is_empty(), "{s:?}");
2090    }
2091
2092    #[test]
2093    fn scan_authid_current_user_inside_literal_not_flagged() {
2094        // Masked literals ⇒ the phrase in a comment-string doesn't match.
2095        let s = scan_invoker_rights(
2096            "hr.pkg",
2097            "begin msg := 'note: authid current_user is risky'; end;",
2098        );
2099        assert!(s.is_empty(), "literal mention must not flag: {s:?}");
2100    }
2101
2102    #[test]
2103    fn emit_invoker_rights_facts_typed() {
2104        let mut store = FactStore::default();
2105        let sites = scan_invoker_rights("hr.pkg", "package p authid current_user as end;");
2106        let n = emit_invoker_rights_facts(&mut store, &prov(), sites);
2107        assert_eq!(n, 1);
2108        assert_eq!(store.by_kind(FactKind::InvokerRights).count(), 1);
2109    }
2110
2111    // --- SEC007 / QUAL007 / QUAL003 substrate ---
2112
2113    #[test]
2114    fn scan_ref_cursor_return_detects_sys_refcursor() {
2115        assert_eq!(
2116            scan_ref_cursor_return("hr.f", "function f return sys_refcursor is begin ... end;")
2117                .len(),
2118            1
2119        );
2120        assert!(
2121            scan_ref_cursor_return("hr.f", "function f return number is begin ... end;").is_empty()
2122        );
2123    }
2124
2125    #[test]
2126    fn scan_dml_in_function_only_flags_functions_with_dml() {
2127        assert_eq!(
2128            scan_dml_in_function(
2129                "hr.f",
2130                "function f return number is begin insert into log values(1); return 1; end;"
2131            )
2132            .len(),
2133            1
2134        );
2135        // Function without DML: clean.
2136        assert!(
2137            scan_dml_in_function("hr.f", "function f return number is begin return 1; end;")
2138                .is_empty()
2139        );
2140        // Procedure with DML: not QUAL007's concern.
2141        assert!(
2142            scan_dml_in_function("hr.p", "procedure p is begin delete from t; end;").is_empty()
2143        );
2144    }
2145
2146    #[test]
2147    fn scan_dml_in_function_finds_dml_after_identifier_decoy() {
2148        // Regression (oracle-73t1.7): `body_has_dml` must scan *every*
2149        // occurrence of a DML keyword, not just the first. A declared local
2150        // whose name ends in the keyword (`v_last_update`, `deleted_flag`,
2151        // `last_inserted`) is preceded by `_`, so its boundary check fails;
2152        // a first-hit-only scan would stop there and miss the genuine
2153        // row-level DML that follows.
2154        assert_eq!(
2155            scan_dml_in_function(
2156                "hr.f",
2157                "function f(p int) return number is v_last_update date; \
2158                 begin update t set c = 1 where id = p; return 1; end;",
2159            )
2160            .len(),
2161            1,
2162            "decoy `v_last_update` local must not mask the genuine `update t`",
2163        );
2164        assert_eq!(
2165            scan_dml_in_function(
2166                "hr.f",
2167                "function f(p int) return number is deleted_flag char(1); \
2168                 begin delete from t where id = p; return 1; end;",
2169            )
2170            .len(),
2171            1,
2172            "decoy `deleted_flag` local must not mask the genuine `delete from t`",
2173        );
2174        assert_eq!(
2175            scan_dml_in_function(
2176                "hr.f",
2177                "function f(p int) return number is last_inserted int; \
2178                 begin insert into log values (p); return 1; end;",
2179            )
2180            .len(),
2181            1,
2182            "decoy `last_inserted` local must not mask the genuine `insert into`",
2183        );
2184        // No genuine DML behind the decoy ⇒ still clean (no false positive).
2185        assert!(
2186            scan_dml_in_function(
2187                "hr.f",
2188                "function f return number is v_last_update date; begin return 1; end;",
2189            )
2190            .is_empty(),
2191            "identifier-only `v_last_update` must not be read as DML",
2192        );
2193    }
2194
2195    #[test]
2196    fn scan_unbounded_bulk_collect_flags_missing_limit() {
2197        assert_eq!(
2198            scan_unbounded_bulk_collect(
2199                "hr.p",
2200                "begin select id bulk collect into ids from huge_t; end;"
2201            )
2202            .len(),
2203            1
2204        );
2205        // LIMIT present in the same statement ⇒ bounded ⇒ no site.
2206        assert!(
2207            scan_unbounded_bulk_collect(
2208                "hr.p",
2209                "begin fetch c bulk collect into ids limit 100; end;"
2210            )
2211            .is_empty()
2212        );
2213    }
2214
2215    #[test]
2216    fn emit_sec007_qual007_qual003_facts_typed() {
2217        let mut store = FactStore::default();
2218        emit_ref_cursor_return_facts(
2219            &mut store,
2220            &prov(),
2221            scan_ref_cursor_return("hr.f", "function f return sys_refcursor is begin end;"),
2222        );
2223        emit_dml_in_function_facts(
2224            &mut store,
2225            &prov(),
2226            scan_dml_in_function(
2227                "hr.f",
2228                "function f return int is begin update t set a=1; end;",
2229            ),
2230        );
2231        emit_unbounded_bulk_collect_facts(
2232            &mut store,
2233            &prov(),
2234            scan_unbounded_bulk_collect("hr.p", "begin x bulk collect into y from t; end;"),
2235        );
2236        assert_eq!(store.by_kind(FactKind::RefCursorReturn).count(), 1);
2237        assert_eq!(store.by_kind(FactKind::DmlInFunction).count(), 1);
2238        assert_eq!(store.by_kind(FactKind::UnboundedBulkCollect).count(), 1);
2239    }
2240
2241    // --- QUAL005 / QUAL008 substrate ---
2242
2243    #[test]
2244    fn scan_deprecated_features_detects_known_forms() {
2245        let s = scan_deprecated_features(
2246            "hr.p",
2247            "begin dbms_job.submit(j); select a from t1, t2 where t1.id = t2.id (+); commit work; end;",
2248        );
2249        let feats: Vec<&str> = s.iter().map(|x| x.detail.as_str()).collect();
2250        assert!(feats.iter().any(|f| f.contains("dbms_job")));
2251        assert!(feats.iter().any(|f| f.contains("(+)")));
2252        assert!(feats.iter().any(|f| f.contains("WORK")));
2253        // Clean modern code: nothing.
2254        assert!(scan_deprecated_features("hr.q", "begin commit; end;").is_empty());
2255    }
2256
2257    #[test]
2258    fn scan_deprecated_in_literal_not_flagged() {
2259        let s = scan_deprecated_features("hr.p", "begin msg := 'use dbms_job here'; end;");
2260        assert!(s.is_empty(), "{s:?}");
2261    }
2262
2263    #[test]
2264    fn scan_deterministic_misuse_requires_pragma_and_construct() {
2265        let s = scan_deterministic_misuse(
2266            "hr.f",
2267            "function f return date deterministic is begin return sysdate; end;",
2268        );
2269        assert!(s.iter().any(|x| x.detail.eq("SYSDATE")));
2270        // DETERMINISTIC but pure: clean.
2271        assert!(
2272            scan_deterministic_misuse(
2273                "hr.g",
2274                "function g(x int) return int deterministic is begin return x*2; end;"
2275            )
2276            .is_empty()
2277        );
2278        // Non-deterministic but NOT marked deterministic: not QUAL008.
2279        assert!(
2280            scan_deterministic_misuse(
2281                "hr.h",
2282                "function h return date is begin return sysdate; end;"
2283            )
2284            .is_empty()
2285        );
2286    }
2287
2288    #[test]
2289    fn emit_qual005_qual008_facts_typed() {
2290        let mut store = FactStore::default();
2291        emit_deprecated_feature_facts(
2292            &mut store,
2293            &prov(),
2294            scan_deprecated_features("hr.p", "begin dbms_job.run(1); end;"),
2295        );
2296        emit_deterministic_misuse_facts(
2297            &mut store,
2298            &prov(),
2299            scan_deterministic_misuse(
2300                "hr.f",
2301                "function f return int deterministic is begin insert into log values(1); return 1; end;",
2302            ),
2303        );
2304        assert_eq!(store.by_kind(FactKind::DeprecatedFeature).count(), 1);
2305        assert_eq!(store.by_kind(FactKind::DeterministicMisuse).count(), 1);
2306    }
2307
2308    // --- QUAL006 / QUAL002 / DEP001 substrate ---
2309
2310    #[test]
2311    fn scan_mutating_table_trigger_flags_self_reference() {
2312        let s = scan_mutating_table_trigger(
2313            "hr.trg_emp",
2314            "create trigger trg_emp before insert on employees for each row \
2315             begin select count(*) into n from employees; end;",
2316        );
2317        assert_eq!(s.len(), 1);
2318        assert_eq!(s[0].detail, "employees");
2319        // No FOR EACH ROW ⇒ statement-level trigger, no ORA-04091.
2320        assert!(
2321            scan_mutating_table_trigger(
2322                "hr.t",
2323                "create trigger t after insert on employees begin null; end;"
2324            )
2325            .is_empty()
2326        );
2327    }
2328
2329    #[test]
2330    fn scan_log_without_reraise_flags_swallowed_after_log() {
2331        let s = scan_log_without_reraise(
2332            "hr.p",
2333            "begin go; exception when others then dbms_output.put_line('failed'); end;",
2334        );
2335        assert_eq!(s.len(), 1);
2336        // Re-raises ⇒ not swallowed.
2337        assert!(
2338            scan_log_without_reraise(
2339                "hr.p",
2340                "begin go; exception when others then logger.error('x'); raise; end;"
2341            )
2342            .is_empty()
2343        );
2344    }
2345
2346    #[test]
2347    fn scan_cross_schema_write_flags_other_schema_dml() {
2348        let s = scan_cross_schema_write(
2349            "hr.pkg.p",
2350            "begin insert into fin.ledger(a) values(1); update hr.local set x=1; end;",
2351        );
2352        assert_eq!(s.len(), 1, "only the fin.* write is cross-schema: {s:?}");
2353        assert_eq!(s[0].detail, "fin.ledger");
2354    }
2355
2356    // oracle-j1ep.2: Oracle's `FROM` is optional in a DELETE, so a FROM-less
2357    // cross-schema `DELETE fin.audit_log WHERE …` must still be flagged DEP001.
2358    // The old hardcoded `delete from ` lead matched only the FROM form, so a
2359    // FROM-less cross-schema delete silently escaped the scan.
2360    #[test]
2361    fn scan_cross_schema_write_flags_from_less_delete() {
2362        let s =
2363            scan_cross_schema_write("hr.proc1", "begin delete fin.audit_log where id = 5; end;");
2364        assert_eq!(s.len(), 1, "FROM-less cross-schema delete must flag: {s:?}");
2365        assert_eq!(s[0].detail, "fin.audit_log");
2366    }
2367
2368    // Both `delete fin.audit` and `delete from fin.audit` must resolve to the
2369    // same cross-schema target.
2370    #[test]
2371    fn scan_cross_schema_write_from_and_from_less_delete_agree() {
2372        let with_from = scan_cross_schema_write("hr.p", "begin delete from fin.audit; end;");
2373        let without_from = scan_cross_schema_write("hr.p", "begin delete fin.audit; end;");
2374        assert_eq!(with_from.len(), 1);
2375        assert_eq!(without_from.len(), 1);
2376        assert_eq!(with_from[0].detail, without_from[0].detail);
2377        assert_eq!(without_from[0].detail, "fin.audit");
2378    }
2379
2380    #[test]
2381    fn emit_qual006_qual002_dep001_facts_typed() {
2382        let mut store = FactStore::default();
2383        emit_mutating_table_trigger_facts(
2384            &mut store,
2385            &prov(),
2386            scan_mutating_table_trigger(
2387                "hr.trg",
2388                "create trigger trg before update on accounts for each row begin update accounts set z=1; end;",
2389            ),
2390        );
2391        emit_log_without_reraise_facts(
2392            &mut store,
2393            &prov(),
2394            scan_log_without_reraise(
2395                "hr.p",
2396                "begin x; exception when others then log_error('e'); end;",
2397            ),
2398        );
2399        emit_cross_schema_write_facts(
2400            &mut store,
2401            &prov(),
2402            scan_cross_schema_write("hr.p", "begin delete from fin.audit; end;"),
2403        );
2404        assert_eq!(store.by_kind(FactKind::MutatingTableTrigger).count(), 1);
2405        assert_eq!(store.by_kind(FactKind::LogWithoutReraise).count(), 1);
2406        assert_eq!(store.by_kind(FactKind::CrossSchemaWrite).count(), 1);
2407    }
2408
2409    // --- SEC005 substrate ---
2410
2411    #[test]
2412    fn scan_sensitive_public_synonym_flags_credential_target() {
2413        let s = scan_sensitive_public_synonym(
2414            "hr.ddl",
2415            "create public synonym emp_pwd for hr.employee_passwords;",
2416        );
2417        assert_eq!(s.len(), 1);
2418        assert_eq!(s[0].synonym, "emp_pwd");
2419        assert_eq!(s[0].target, "hr.employee_passwords");
2420    }
2421
2422    #[test]
2423    fn scan_public_synonym_benign_not_flagged() {
2424        let s = scan_sensitive_public_synonym(
2425            "hr.ddl",
2426            "create public synonym depts for hr.departments;",
2427        );
2428        assert!(s.is_empty(), "benign synonym must not flag: {s:?}");
2429    }
2430
2431    #[test]
2432    fn scan_private_synonym_not_flagged() {
2433        // Only PUBLIC synonyms are in scope.
2434        let s = scan_sensitive_public_synonym("hr.ddl", "create synonym sal for hr.salary_tbl;");
2435        assert!(s.is_empty(), "{s:?}");
2436    }
2437
2438    #[test]
2439    fn emit_sensitive_public_synonym_facts_typed() {
2440        let mut store = FactStore::default();
2441        let sites = scan_sensitive_public_synonym(
2442            "hr.ddl",
2443            "create or replace public synonym bank_acct for fin.bank_accounts;",
2444        );
2445        let n = emit_sensitive_public_synonym_facts(&mut store, &prov(), sites);
2446        assert_eq!(n, 1);
2447        assert_eq!(store.by_kind(FactKind::SensitivePublicSynonym).count(), 1);
2448    }
2449
2450    // --- PERF003 substrate ---
2451
2452    #[test]
2453    fn scan_is_null_on_indexed_column_flags_correlated_case() {
2454        let s = scan_is_null_on_indexed_column(
2455            "hr.q",
2456            "create index emp_dt_ix on employees(deleted_at); \
2457             begin select id from employees where deleted_at is null; end;",
2458        );
2459        assert_eq!(s.len(), 1);
2460        assert_eq!(s[0].column, "deleted_at");
2461    }
2462
2463    #[test]
2464    fn scan_is_null_without_index_not_flagged() {
2465        // No CREATE INDEX in source ⇒ catalog-only; out of scope (R13).
2466        let s = scan_is_null_on_indexed_column(
2467            "hr.q",
2468            "begin select id from employees where deleted_at is null; end;",
2469        );
2470        assert!(s.is_empty(), "{s:?}");
2471    }
2472
2473    #[test]
2474    fn scan_is_not_null_never_matches() {
2475        let s = scan_is_null_on_indexed_column(
2476            "hr.q",
2477            "create index ix on t(c); begin select 1 from t where c is not null; end;",
2478        );
2479        assert!(s.is_empty(), "`is not null` must not match: {s:?}");
2480    }
2481
2482    #[test]
2483    fn scan_is_null_on_non_indexed_column_not_flagged() {
2484        let s = scan_is_null_on_indexed_column(
2485            "hr.q",
2486            "create index ix on t(a); begin select 1 from t where b is null; end;",
2487        );
2488        assert!(s.is_empty(), "b is not indexed: {s:?}");
2489    }
2490
2491    #[test]
2492    fn emit_is_null_on_indexed_column_facts_typed() {
2493        let mut store = FactStore::default();
2494        let sites = scan_is_null_on_indexed_column(
2495            "hr.q",
2496            "create unique index ix on t(k); begin delete from t where k is null; end;",
2497        );
2498        let n = emit_is_null_on_indexed_column_facts(&mut store, &prov(), sites);
2499        assert_eq!(n, 1);
2500        assert_eq!(store.by_kind(FactKind::IsNullOnIndexedColumn).count(), 1);
2501    }
2502}