Skip to main content

plsql_output/
lib.rs

1#![forbid(unsafe_code)]
2
3use plsql_core::{Diagnostic, Evidence, JsonExportable};
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use tracing::instrument;
7
8pub const ROBOT_JSON_FORMAT: &str = "plsql-robot-json";
9pub const REDACTED_TEXT: &str = "[REDACTED]";
10
11#[derive(
12    Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
13)]
14pub struct SchemaVersion {
15    pub major: u16,
16    pub minor: u16,
17    pub patch: u16,
18}
19
20impl SchemaVersion {
21    pub const fn new(major: u16, minor: u16, patch: u16) -> Self {
22        Self {
23            major,
24            minor,
25            patch,
26        }
27    }
28}
29
30impl std::fmt::Display for SchemaVersion {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        write!(f, "{}.{}.{}", self.major, self.minor, self.patch)
33    }
34}
35
36#[derive(Clone, Copy, Debug, Eq, PartialEq)]
37pub struct SchemaDescriptor {
38    pub id: &'static str,
39    pub version: SchemaVersion,
40    pub description: &'static str,
41}
42
43pub const ROBOT_JSON_SCHEMA: SchemaDescriptor = SchemaDescriptor {
44    id: "plsql.output.robot_json",
45    version: SchemaVersion::new(1, 0, 0),
46    description: "Generic machine-readable envelope for plsql-intelligence CLIs",
47};
48
49pub const DIAGNOSTIC_SCHEMA: SchemaDescriptor = SchemaDescriptor {
50    id: "plsql.output.diagnostics",
51    version: SchemaVersion::new(1, 0, 0),
52    description: "Diagnostic report envelope wrapping plsql-core diagnostics",
53};
54
55pub const EVIDENCE_SCHEMA: SchemaDescriptor = SchemaDescriptor {
56    id: "plsql.output.evidence",
57    version: SchemaVersion::new(1, 0, 0),
58    description: "Structured evidence envelope wrapping plsql-core evidence records",
59};
60
61pub const OUTPUT_SCHEMAS: [SchemaDescriptor; 3] =
62    [ROBOT_JSON_SCHEMA, DIAGNOSTIC_SCHEMA, EVIDENCE_SCHEMA];
63
64#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
65pub struct RobotJsonEnvelope<T> {
66    pub format: String,
67    pub schema_id: String,
68    pub schema_version: SchemaVersion,
69    pub payload: T,
70}
71
72impl<T> RobotJsonEnvelope<T> {
73    #[must_use]
74    #[instrument(level = "trace", skip(payload))]
75    pub fn new(schema: SchemaDescriptor, payload: T) -> Self {
76        Self {
77            format: String::from(ROBOT_JSON_FORMAT),
78            schema_id: String::from(schema.id),
79            schema_version: schema.version,
80            payload,
81        }
82    }
83
84    #[must_use]
85    #[instrument(level = "trace", skip(self))]
86    pub fn matches_schema(&self, schema: SchemaDescriptor) -> bool {
87        self.schema_id == schema.id && self.schema_version == schema.version
88    }
89}
90
91#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
92pub struct DiagnosticEnvelope {
93    #[serde(flatten)]
94    pub envelope: RobotJsonEnvelope<Vec<Diagnostic>>,
95}
96
97impl DiagnosticEnvelope {
98    #[must_use]
99    #[instrument(level = "trace", skip(diagnostics))]
100    pub fn new(diagnostics: Vec<Diagnostic>) -> Self {
101        Self {
102            envelope: RobotJsonEnvelope::new(DIAGNOSTIC_SCHEMA, diagnostics),
103        }
104    }
105
106    #[must_use]
107    #[instrument(level = "trace", skip(self, policy))]
108    pub fn redacted(&self, policy: &RedactionPolicy) -> Self {
109        let diagnostics = self
110            .envelope
111            .payload
112            .iter()
113            .map(|diagnostic| policy.redact_diagnostic(diagnostic))
114            .collect();
115        Self::new(diagnostics)
116    }
117}
118
119#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
120pub struct EvidenceEnvelope {
121    #[serde(flatten)]
122    pub envelope: RobotJsonEnvelope<Vec<Evidence>>,
123}
124
125impl EvidenceEnvelope {
126    #[must_use]
127    #[instrument(level = "trace", skip(evidence))]
128    pub fn new(evidence: Vec<Evidence>) -> Self {
129        Self {
130            envelope: RobotJsonEnvelope::new(EVIDENCE_SCHEMA, evidence),
131        }
132    }
133
134    #[must_use]
135    #[instrument(level = "trace", skip(self, policy))]
136    pub fn redacted(&self, policy: &RedactionPolicy) -> Self {
137        let evidence = self
138            .envelope
139            .payload
140            .iter()
141            .map(|entry| policy.redact_evidence(entry))
142            .collect();
143        Self::new(evidence)
144    }
145}
146
147#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
148pub struct RedactionPolicy {
149    pub redact_freeform_text: bool,
150    pub strip_attributes: bool,
151    pub keep_source_spans: bool,
152}
153
154impl Default for RedactionPolicy {
155    fn default() -> Self {
156        Self {
157            redact_freeform_text: false,
158            strip_attributes: false,
159            keep_source_spans: true,
160        }
161    }
162}
163
164impl RedactionPolicy {
165    #[must_use]
166    #[instrument(level = "trace", skip(self, diagnostic))]
167    pub fn redact_diagnostic(&self, diagnostic: &Diagnostic) -> Diagnostic {
168        let mut redacted = diagnostic.clone();
169        if self.redact_freeform_text {
170            redacted.message = String::from(REDACTED_TEXT);
171            redacted.help = redacted.help.as_ref().map(|_| String::from(REDACTED_TEXT));
172            redacted.related_spans.iter_mut().for_each(|label| {
173                label.label = String::from(REDACTED_TEXT);
174            });
175        }
176        if !self.keep_source_spans {
177            redacted.primary_span = None;
178            redacted.related_spans.clear();
179        }
180        redacted.evidence = diagnostic
181            .evidence
182            .iter()
183            .map(|evidence| self.redact_evidence(evidence))
184            .collect();
185        redacted
186    }
187
188    #[must_use]
189    #[instrument(level = "trace", skip(self, evidence))]
190    pub fn redact_evidence(&self, evidence: &Evidence) -> Evidence {
191        let mut redacted = evidence.clone();
192        if self.redact_freeform_text {
193            redacted.summary = String::from(REDACTED_TEXT);
194            redacted.notes.iter_mut().for_each(|note| {
195                *note = String::from(REDACTED_TEXT);
196            });
197            redacted.spans.iter_mut().for_each(|label| {
198                label.label = String::from(REDACTED_TEXT);
199            });
200        }
201        if self.strip_attributes {
202            redacted.attributes.clear();
203        }
204        if !self.keep_source_spans {
205            redacted.spans.clear();
206        }
207        redacted
208    }
209}
210
211#[instrument(level = "trace", skip(value))]
212pub fn envelope_to_json_value<T>(value: &RobotJsonEnvelope<T>) -> serde_json::Result<Value>
213where
214    T: JsonExportable,
215{
216    serde_json::to_value(value)
217}
218
219pub fn envelope_from_json_value<T>(value: Value) -> serde_json::Result<RobotJsonEnvelope<T>>
220where
221    T: JsonExportable,
222{
223    serde_json::from_value(value)
224}
225
226// ---------------------------------------------------------------------------
227// Orphan candidate types (LIN-018) — §13.8 Orphan Candidates Report
228// ---------------------------------------------------------------------------
229
230/// Confidence tier for an orphan candidate classification.
231///
232/// Higher tiers mean stronger evidence that the object is truly unused.
233/// Reports MUST NOT collapse these into a single scalar — the tier is the
234/// trust signal, per §1.5 Evidence UX.
235#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
236#[serde(rename_all = "snake_case")]
237pub enum OrphanConfidenceTier {
238    /// Strong evidence of non-use: no inbound references in code, catalog,
239    /// or dependency graph. Observation window met with AUDIT-based monitoring.
240    HighConfidenceUnused,
241    /// Probable non-use: no inbound code references, but catalog/dependency
242    /// evidence is incomplete (missing catalog, wrapped sources, dynamic SQL).
243    LikelyUnused,
244    /// Ambiguous: some references exist but are indirect (synonyms, public
245    /// grants, role-mediated access) or behind dynamic SQL sites.
246    MaybeUnused,
247    /// Cannot determine: insufficient data (missing catalog, missing source,
248    /// wrapped code, DB-link boundary).
249    Inconclusive,
250}
251
252/// An object identified as a potential orphan — candidate for cleanup.
253///
254/// Part of the orphan-candidates report (§13.8). Every candidate carries a
255/// confidence tier and evidence list. Reports MUST pair each candidate with
256/// a concrete remediation step (AUDIT statement, not DROP script).
257#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
258pub struct OrphanCandidate {
259    /// Logical object identifier (schema.object).
260    pub object_id: String,
261    /// Object kind (TABLE, VIEW, PACKAGE, PROCEDURE, FUNCTION, SEQUENCE,
262    /// TYPE, TRIGGER, SYNONYM, INDEX).
263    pub kind: String,
264    /// Last observed usage timestamp, if available. String for flexibility
265    /// (ISO-8601 or Oracle's native format).
266    pub last_used: Option<String>,
267    /// Structured evidence explaining why this object is a candidate.
268    /// Each entry is a human-readable reason string.
269    pub evidence: Vec<String>,
270    /// Confidence tier for this classification.
271    pub confidence: OrphanConfidenceTier,
272}
273
274/// A complete orphan candidates report.
275#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
276pub struct OrphanCandidatesReport {
277    /// Candidates grouped by confidence tier (not a scalar score — §1.5).
278    pub candidates: Vec<OrphanCandidate>,
279    /// Total objects examined.
280    pub objects_examined: usize,
281    /// Objects with at least one inbound reference.
282    pub objects_with_references: usize,
283    /// Observation window applied (e.g. "30d", "60d", "90d").
284    pub observation_window: Option<String>,
285}
286
287#[cfg(test)]
288mod tests {
289    use plsql_core::{Confidence, ConfidenceLevel, Diagnostic, Evidence, FileId, Position, Span};
290    use serde::{Deserialize, Serialize};
291    use serde_json::json;
292
293    use super::{
294        DiagnosticEnvelope, EvidenceEnvelope, OUTPUT_SCHEMAS, OrphanCandidate,
295        OrphanCandidatesReport, OrphanConfidenceTier, REDACTED_TEXT, ROBOT_JSON_SCHEMA,
296        RedactionPolicy, RobotJsonEnvelope, SchemaVersion, envelope_from_json_value,
297        envelope_to_json_value,
298    };
299
300    #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
301    struct TrivialPayload {
302        ok: bool,
303    }
304
305    #[test]
306    fn robot_json_round_trips_trivial_payloads() {
307        let payload = TrivialPayload { ok: true };
308        let envelope = RobotJsonEnvelope::new(ROBOT_JSON_SCHEMA, payload);
309        let value = envelope_to_json_value(&envelope);
310        assert!(value.is_ok());
311
312        let reparsed = value.and_then(envelope_from_json_value::<TrivialPayload>);
313        assert!(reparsed.is_ok());
314
315        let reparsed = reparsed.unwrap_or_else(|_| {
316            RobotJsonEnvelope::new(ROBOT_JSON_SCHEMA, TrivialPayload { ok: false })
317        });
318        assert_eq!(reparsed.schema_version, SchemaVersion::new(1, 0, 0));
319        assert!(reparsed.matches_schema(ROBOT_JSON_SCHEMA));
320        assert!(reparsed.payload.ok);
321    }
322
323    #[test]
324    fn output_schema_registry_is_stable_and_complete() {
325        assert_eq!(OUTPUT_SCHEMAS.len(), 3);
326        assert_eq!(OUTPUT_SCHEMAS[0].id, "plsql.output.robot_json");
327        assert_eq!(OUTPUT_SCHEMAS[1].version, SchemaVersion::new(1, 0, 0));
328        assert_eq!(
329            OUTPUT_SCHEMAS[2].description,
330            "Structured evidence envelope wrapping plsql-core evidence records"
331        );
332    }
333
334    #[test]
335    fn diagnostic_envelope_redaction_preserves_structure() {
336        let span = Span::new(
337            FileId::new(2),
338            Position::new(3, 1, 15),
339            Position::new(3, 6, 20),
340        );
341        let diagnostic = Diagnostic::new("CAT001", plsql_core::Severity::Warn, "bad catalog row")
342            .with_primary_span(span)
343            .with_help("refresh the snapshot")
344            .with_evidence(
345                Evidence::new("CAT-EVIDENCE", "saw inconsistent owner")
346                    .with_note("owner column empty")
347                    .with_attribute("row", json!(7))
348                    .with_confidence(Confidence::new(
349                        ConfidenceLevel::Medium,
350                        Some(String::from("catalog probe recovered")),
351                    )),
352            );
353
354        let policy = RedactionPolicy {
355            redact_freeform_text: true,
356            strip_attributes: true,
357            keep_source_spans: false,
358        };
359        let envelope = DiagnosticEnvelope::new(vec![diagnostic]).redacted(&policy);
360
361        assert_eq!(envelope.envelope.payload.len(), 1);
362        assert_eq!(envelope.envelope.payload[0].message, REDACTED_TEXT);
363        assert_eq!(envelope.envelope.payload[0].primary_span, None);
364        assert_eq!(
365            envelope.envelope.payload[0].evidence[0].summary,
366            REDACTED_TEXT
367        );
368        assert!(
369            envelope.envelope.payload[0].evidence[0]
370                .attributes
371                .is_empty()
372        );
373    }
374
375    #[test]
376    fn redaction_scrubs_freeform_text_while_keeping_spans() {
377        // Security contract for the realistic "scrub text, keep
378        // positions for debugging" policy — the path the structure
379        // test does NOT exercise (it clears spans). A regression
380        // that leaked a span/help/note label would pass that test
381        // but must fail this one.
382        let span = Span::new(
383            FileId::new(4),
384            Position::new(7, 2, 40),
385            Position::new(7, 9, 47),
386        );
387        // Synthetic freeform marker (not a credential — just a
388        // unique token we assert never survives redaction).
389        let sensitive = "FREEFORM_LEAK_CANARY_xyzzy";
390        let diagnostic = Diagnostic::new("SEC001", plsql_core::Severity::Error, sensitive)
391            .with_primary_span(span)
392            .with_help(sensitive)
393            .with_related_span(plsql_core::SpanLabel::new(sensitive, span))
394            .with_evidence(
395                Evidence::new("E1", sensitive)
396                    .with_note(sensitive)
397                    .with_span(plsql_core::SpanLabel::new(sensitive, span)),
398            );
399
400        let policy = RedactionPolicy {
401            redact_freeform_text: true,
402            strip_attributes: false,
403            keep_source_spans: true,
404        };
405        let out = DiagnosticEnvelope::new(vec![diagnostic]).redacted(&policy);
406        let d = &out.envelope.payload[0];
407
408        assert_eq!(d.message, REDACTED_TEXT);
409        assert_eq!(d.help.as_deref(), Some(REDACTED_TEXT));
410        // Spans kept (positions retained) but their freeform labels
411        // scrubbed — no `secret` substring survives anywhere.
412        assert_eq!(d.primary_span, Some(span));
413        assert_eq!(d.related_spans.len(), 1);
414        assert_eq!(d.related_spans[0].label, REDACTED_TEXT);
415        assert_eq!(d.related_spans[0].span, span);
416        let ev = &d.evidence[0];
417        assert_eq!(ev.summary, REDACTED_TEXT);
418        assert_eq!(ev.notes, vec![String::from(REDACTED_TEXT)]);
419        assert_eq!(ev.spans.len(), 1);
420        assert_eq!(ev.spans[0].label, REDACTED_TEXT);
421
422        let json = serde_json::to_string(&out).expect("envelope serializes");
423        assert!(
424            !json.contains("xyzzy"),
425            "redacted envelope must not leak the secret in any field"
426        );
427    }
428
429    #[test]
430    fn evidence_envelope_uses_stable_schema_id() {
431        let envelope = EvidenceEnvelope::new(vec![Evidence::new("SYM001", "resolved")]);
432
433        assert_eq!(envelope.envelope.schema_id, "plsql.output.evidence");
434        assert_eq!(
435            envelope.envelope.schema_version,
436            SchemaVersion::new(1, 0, 0)
437        );
438    }
439
440    #[test]
441    fn orphan_candidate_roundtrip_json() {
442        let report = OrphanCandidatesReport {
443            candidates: vec![
444                OrphanCandidate {
445                    object_id: "billing.legacy_pkg".into(),
446                    kind: "PACKAGE".into(),
447                    last_used: Some("2024-01-15T10:30:00Z".into()),
448                    evidence: vec![
449                        "No inbound call edges in dependency graph".into(),
450                        "No PL/Scope references found".into(),
451                        "AUDIT monitored for 90 days with zero hits".into(),
452                    ],
453                    confidence: OrphanConfidenceTier::HighConfidenceUnused,
454                },
455                OrphanCandidate {
456                    object_id: "billing.temp_reports".into(),
457                    kind: "TABLE".into(),
458                    last_used: None,
459                    evidence: vec![
460                        "No DML edges in dependency graph".into(),
461                        "Missing catalog metadata (wrapped source)".into(),
462                    ],
463                    confidence: OrphanConfidenceTier::LikelyUnused,
464                },
465                OrphanCandidate {
466                    object_id: "billing.util_fn".into(),
467                    kind: "FUNCTION".into(),
468                    last_used: Some("2025-12-01".into()),
469                    evidence: vec![
470                        "Called only via public synonym — may be used externally".into(),
471                    ],
472                    confidence: OrphanConfidenceTier::MaybeUnused,
473                },
474                OrphanCandidate {
475                    object_id: "billing.remote_pkg".into(),
476                    kind: "PACKAGE".into(),
477                    last_used: None,
478                    evidence: vec!["Object on DB-link boundary — cannot determine usage".into()],
479                    confidence: OrphanConfidenceTier::Inconclusive,
480                },
481            ],
482            objects_examined: 150,
483            objects_with_references: 120,
484            observation_window: Some("90d".into()),
485        };
486
487        let json = serde_json::to_string_pretty(&report).unwrap();
488        let back: OrphanCandidatesReport = serde_json::from_str(&json).unwrap();
489
490        assert_eq!(back.candidates.len(), 4);
491        assert_eq!(back.objects_examined, 150);
492        assert_eq!(back.objects_with_references, 120);
493        assert_eq!(back.observation_window, Some("90d".into()));
494
495        assert_eq!(
496            back.candidates[0].confidence,
497            OrphanConfidenceTier::HighConfidenceUnused
498        );
499        assert_eq!(
500            back.candidates[1].confidence,
501            OrphanConfidenceTier::LikelyUnused
502        );
503        assert_eq!(
504            back.candidates[2].confidence,
505            OrphanConfidenceTier::MaybeUnused
506        );
507        assert_eq!(
508            back.candidates[3].confidence,
509            OrphanConfidenceTier::Inconclusive
510        );
511
512        // Verify tagged serde
513        assert!(json.contains("high_confidence_unused"));
514        assert!(json.contains("likely_unused"));
515        assert!(json.contains("maybe_unused"));
516        assert!(json.contains("inconclusive"));
517
518        // Verify evidence roundtrips
519        assert_eq!(back.candidates[0].evidence.len(), 3);
520        assert_eq!(
521            back.candidates[0].last_used,
522            Some("2024-01-15T10:30:00Z".into())
523        );
524    }
525
526    #[test]
527    fn orphan_tier_serde_rename() {
528        let json = serde_json::to_string(&OrphanConfidenceTier::HighConfidenceUnused).unwrap();
529        assert_eq!(json, "\"high_confidence_unused\"");
530        let json = serde_json::to_string(&OrphanConfidenceTier::Inconclusive).unwrap();
531        assert_eq!(json, "\"inconclusive\"");
532    }
533}