Skip to main content

codetether_agent/rlm/oracle/
schema.rs

1//! FINAL(JSON) schema for oracle-eligible RLM outputs.
2//!
3//! This module defines the structured JSON schema that RLM FINAL() outputs must conform to
4//! for deterministic oracle verification. The `kind` field drives validator routing.
5//!
6//! # Schema Types
7//!
8//! - **Grep**: Pattern-match results (line numbers, text content)
9//! - **Ast**: Structural AST query results (function signatures, struct fields)
10//! - **Semantic**: Free-form text answers (unverifiable - stored but not golden)
11//!
12//! # Usage
13//!
14//! ```ignore
15//! use codetether_agent::rlm::oracle::schema::{FinalPayload, GrepPayload, AstPayload};
16//!
17//! // Parse a FINAL() JSON output
18//! let payload = FinalPayload::parse(r#"{"kind": "grep", "file": "src/main.rs", ...}"#)?;
19//!
20//! match payload {
21//!     FinalPayload::Grep(grep) => { /* verify with GrepOracle */ }
22//!     FinalPayload::Ast(ast) => { /* verify with TreeSitterOracle */ }
23//!     FinalPayload::Semantic(_) => { /* cannot verify - skip */ }
24//!     FinalPayload::Malformed { .. } => { /* log and skip */ }
25//! }
26//! ```
27
28use serde::{Deserialize, Serialize};
29use std::fmt;
30
31/// The top-level FINAL() payload envelope.
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33#[serde(tag = "kind", rename_all = "lowercase")]
34pub enum FinalPayload {
35    /// Grep/pattern-match query results
36    Grep(GrepPayload),
37    /// AST/structural query results
38    Ast(AstPayload),
39    /// Semantic/free-form text (unverifiable)
40    Semantic(SemanticPayload),
41    /// Malformed JSON that couldn't be parsed
42    Malformed {
43        /// The raw string that failed to parse
44        raw: String,
45        /// Error message from parsing attempt
46        error: String,
47    },
48}
49
50impl FinalPayload {
51    /// Parse a JSON string into a FinalPayload.
52    ///
53    /// Returns `FinalPayload::Malformed` if parsing fails.
54    pub fn parse(json_str: &str) -> Self {
55        let trimmed = json_str.trim();
56
57        // Try to parse as JSON
58        let parsed: Result<serde_json::Value, _> = serde_json::from_str(trimmed);
59
60        match parsed {
61            Ok(value) => {
62                // Try to deserialize into our enum
63                match serde_json::from_value::<FinalPayload>(value.clone()) {
64                    Ok(payload) => payload,
65                    Err(e) => {
66                        // JSON is valid but doesn't match our schema
67                        // Check if it has a "kind" field we can use
68                        if let Some(kind) = value.get("kind").and_then(|k| k.as_str()) {
69                            match kind {
70                                "grep" => serde_json::from_value(value).unwrap_or_else(|e2| {
71                                    FinalPayload::Malformed {
72                                        raw: trimmed.to_string(),
73                                        error: format!("GrepPayload parse error: {}", e2),
74                                    }
75                                }),
76                                "ast" => serde_json::from_value(value).unwrap_or_else(|e2| {
77                                    FinalPayload::Malformed {
78                                        raw: trimmed.to_string(),
79                                        error: format!("AstPayload parse error: {}", e2),
80                                    }
81                                }),
82                                "semantic" => serde_json::from_value(value).unwrap_or_else(|e2| {
83                                    FinalPayload::Malformed {
84                                        raw: trimmed.to_string(),
85                                        error: format!("SemanticPayload parse error: {}", e2),
86                                    }
87                                }),
88                                _ => FinalPayload::Malformed {
89                                    raw: trimmed.to_string(),
90                                    error: format!("Unknown kind: {}", kind),
91                                },
92                            }
93                        } else {
94                            FinalPayload::Malformed {
95                                raw: trimmed.to_string(),
96                                error: format!("Missing 'kind' field: {}", e),
97                            }
98                        }
99                    }
100                }
101            }
102            Err(e) => {
103                // Not valid JSON at all
104                FinalPayload::Malformed {
105                    raw: trimmed.to_string(),
106                    error: format!("JSON parse error: {}", e),
107                }
108            }
109        }
110    }
111
112    /// Check if this payload is verifiable by an oracle.
113    pub fn is_verifiable(&self) -> bool {
114        matches!(self, FinalPayload::Grep(_) | FinalPayload::Ast(_))
115    }
116
117    /// Get the file path this payload references (if any).
118    pub fn file(&self) -> Option<&str> {
119        match self {
120            FinalPayload::Grep(p) => Some(&p.file),
121            FinalPayload::Ast(p) => Some(&p.file),
122            FinalPayload::Semantic(p) => Some(&p.file),
123            FinalPayload::Malformed { .. } => None,
124        }
125    }
126
127    /// Convert to a debuggable string representation.
128    pub fn summary(&self) -> String {
129        match self {
130            FinalPayload::Grep(p) => {
131                format!(
132                    "Grep(file={}, pattern={}, {} matches)",
133                    p.file,
134                    p.pattern,
135                    p.matches.len()
136                )
137            }
138            FinalPayload::Ast(p) => {
139                format!(
140                    "Ast(file={}, query={}, {} results)",
141                    p.file,
142                    p.query,
143                    p.results.len()
144                )
145            }
146            FinalPayload::Semantic(p) => {
147                let preview = if p.answer.len() > 50 {
148                    format!("{}...", &p.answer[..50])
149                } else {
150                    p.answer.clone()
151                };
152                format!("Semantic(file={}, answer={})", p.file, preview)
153            }
154            FinalPayload::Malformed { error, .. } => {
155                format!("Malformed({})", error)
156            }
157        }
158    }
159}
160
161impl fmt::Display for FinalPayload {
162    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163        write!(f, "{}", self.summary())
164    }
165}
166
167/// Grep/pattern-match payload.
168#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
169pub struct GrepPayload {
170    /// File that was searched
171    pub file: String,
172    /// Regex pattern used
173    pub pattern: String,
174    /// Matched lines
175    pub matches: Vec<GrepMatch>,
176}
177
178/// A single grep match with line number and text.
179#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
180pub struct GrepMatch {
181    /// Line number (1-indexed, matching `grep -n`)
182    pub line: usize,
183    /// Full text of the matched line (or substring)
184    pub text: String,
185}
186
187impl GrepMatch {
188    /// Create a new match.
189    pub fn new(line: usize, text: String) -> Self {
190        Self { line, text }
191    }
192
193    /// Check if this match's text is a substring of the actual line.
194    pub fn text_matches(&self, actual_line: &str) -> bool {
195        actual_line.contains(&self.text)
196    }
197}
198
199/// AST/structural query payload.
200#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
201pub struct AstPayload {
202    /// File that was queried
203    pub file: String,
204    /// Tree-sitter query or query type
205    pub query: String,
206    /// Query results
207    pub results: Vec<AstResult>,
208}
209
210/// A single AST query result.
211#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
212pub struct AstResult {
213    /// Name of the matched item (function name, struct name, etc.)
214    pub name: String,
215    /// Function arguments/parameters (as string)
216    #[serde(default)]
217    pub args: Vec<String>,
218    /// Return type (as string)
219    #[serde(default)]
220    pub return_type: Option<String>,
221    /// Span: (start_line, end_line)
222    #[serde(default)]
223    pub span: Option<(usize, usize)>,
224}
225
226impl AstResult {
227    /// Create a new AST result for a function.
228    pub fn function(
229        name: String,
230        args: Vec<String>,
231        return_type: Option<String>,
232        span: Option<(usize, usize)>,
233    ) -> Self {
234        Self {
235            name,
236            args,
237            return_type,
238            span,
239        }
240    }
241}
242
243/// Semantic/free-form text payload (unverifiable).
244#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
245pub struct SemanticPayload {
246    /// File that was analyzed
247    pub file: String,
248    /// Free-form text answer
249    pub answer: String,
250}
251
252impl SemanticPayload {
253    /// Create a new semantic payload.
254    pub fn new(file: String, answer: String) -> Self {
255        Self { file, answer }
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn parse_grep_payload() {
265        let json = r#"{
266            "kind": "grep",
267            "file": "src/main.rs",
268            "pattern": "async fn",
269            "matches": [
270                {"line": 42, "text": "async fn process() {"},
271                {"line": 100, "text": "async fn handle() {"}
272            ]
273        }"#;
274
275        let payload = FinalPayload::parse(json);
276        match payload {
277            FinalPayload::Grep(p) => {
278                assert_eq!(p.file, "src/main.rs");
279                assert_eq!(p.pattern, "async fn");
280                assert_eq!(p.matches.len(), 2);
281                assert_eq!(p.matches[0].line, 42);
282            }
283            _ => panic!("Expected Grep payload"),
284        }
285    }
286
287    #[test]
288    fn parse_ast_payload() {
289        let json = r#"{
290            "kind": "ast",
291            "file": "src/main.rs",
292            "query": "functions",
293            "results": [
294                {"name": "process", "args": ["input: &str"], "return_type": "Result<String>"}
295            ]
296        }"#;
297
298        let payload = FinalPayload::parse(json);
299        match payload {
300            FinalPayload::Ast(p) => {
301                assert_eq!(p.file, "src/main.rs");
302                assert_eq!(p.query, "functions");
303                assert_eq!(p.results.len(), 1);
304                assert_eq!(p.results[0].name, "process");
305            }
306            _ => panic!("Expected Ast payload"),
307        }
308    }
309
310    #[test]
311    fn parse_semantic_payload() {
312        let json = r#"{
313            "kind": "semantic",
314            "file": "src/main.rs",
315            "answer": "This module provides async processing."
316        }"#;
317
318        let payload = FinalPayload::parse(json);
319        match payload {
320            FinalPayload::Semantic(p) => {
321                assert_eq!(p.file, "src/main.rs");
322                assert!(p.answer.contains("async processing"));
323            }
324            _ => panic!("Expected Semantic payload"),
325        }
326    }
327
328    #[test]
329    fn parse_malformed_json() {
330        let json = "not valid json at all";
331        let payload = FinalPayload::parse(json);
332        match payload {
333            FinalPayload::Malformed { raw, error } => {
334                assert_eq!(raw, "not valid json at all");
335                assert!(error.contains("JSON parse error"));
336            }
337            _ => panic!("Expected Malformed payload"),
338        }
339    }
340
341    #[test]
342    fn parse_missing_kind_field() {
343        let json = r#"{"file": "src/main.rs", "data": "value"}"#;
344        let payload = FinalPayload::parse(json);
345        match payload {
346            FinalPayload::Malformed { error, .. } => {
347                assert!(error.contains("kind"));
348            }
349            _ => panic!("Expected Malformed payload"),
350        }
351    }
352
353    #[test]
354    fn malformed_payload_is_serializable() {
355        let payload = FinalPayload::Malformed {
356            raw: "oops".to_string(),
357            error: "parse error".to_string(),
358        };
359        let json = serde_json::to_string(&payload).expect("malformed payload should serialize");
360        assert!(json.contains("\"kind\":\"malformed\""));
361        assert!(json.contains("\"raw\":\"oops\""));
362        assert!(json.contains("\"error\":\"parse error\""));
363    }
364
365    #[test]
366    fn grep_match_text_matching() {
367        let m = GrepMatch::new(42, "async fn".to_string());
368        assert!(m.text_matches("pub async fn process() -> Result<()> {"));
369        assert!(!m.text_matches("fn process() -> Result<()> {"));
370    }
371
372    #[test]
373    fn is_verifiable() {
374        let grep_json = r#"{"kind": "grep", "file": "x.rs", "pattern": "fn", "matches": []}"#;
375        let semantic_json = r#"{"kind": "semantic", "file": "x.rs", "answer": "text"}"#;
376
377        assert!(FinalPayload::parse(grep_json).is_verifiable());
378        assert!(!FinalPayload::parse(semantic_json).is_verifiable());
379    }
380
381    #[test]
382    fn file_extraction() {
383        let grep_json =
384            r#"{"kind": "grep", "file": "src/main.rs", "pattern": "fn", "matches": []}"#;
385        assert_eq!(FinalPayload::parse(grep_json).file(), Some("src/main.rs"));
386    }
387}