Skip to main content

harn_parser/
ast_json.rs

1//! Stable JSON projections for parser-facing tooling.
2//!
3//! The core AST keeps its Rust-native shape for compiler consumers.
4//! This module owns the CLI/tooling projection so `harn parse --json`
5//! and `harn tokens --json` can evolve under explicit schema versions.
6
7use harn_lexer::{Span, Token, TokenKind};
8use serde::Serialize;
9use serde_json::{Map, Value};
10
11use crate::SNode;
12
13pub const AST_JSON_SCHEMA_VERSION: u32 = 1;
14pub const TOKEN_JSON_SCHEMA_VERSION: u32 = 1;
15
16#[derive(Debug, Clone, PartialEq, Serialize)]
17pub struct AstJsonProgram {
18    pub kind: &'static str,
19    #[serde(rename = "schemaVersion")]
20    pub schema_version: u32,
21    pub body: Vec<Value>,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
25pub struct TokenJson {
26    pub kind: &'static str,
27    pub lexeme: String,
28    pub start: usize,
29    pub end: usize,
30    pub line: usize,
31    pub column: usize,
32}
33
34pub fn program_to_json(nodes: &[SNode]) -> AstJsonProgram {
35    AstJsonProgram {
36        kind: "Program",
37        schema_version: AST_JSON_SCHEMA_VERSION,
38        body: nodes.iter().map(node_to_json).collect(),
39    }
40}
41
42pub fn node_to_json(node: &SNode) -> Value {
43    let raw = serde_json::to_value(node).expect("AST node should serialize to JSON");
44    normalize_value(raw)
45}
46
47pub fn tokens_to_json(source: &str, tokens: &[Token]) -> Vec<TokenJson> {
48    tokens
49        .iter()
50        .map(|token| token_to_json(source, token))
51        .collect()
52}
53
54pub fn token_to_json(source: &str, token: &Token) -> TokenJson {
55    TokenJson {
56        kind: token_kind_name(&token.kind),
57        lexeme: lexeme_for_span(source, token.span),
58        start: token.span.start,
59        end: token.span.end,
60        line: token.span.line,
61        column: token.span.column,
62    }
63}
64
65pub fn token_kind_name(kind: &TokenKind) -> &'static str {
66    match kind {
67        TokenKind::Pipeline => "Pipeline",
68        TokenKind::Extends => "Extends",
69        TokenKind::Override => "Override",
70        TokenKind::Let => "Let",
71        TokenKind::Var => "Var",
72        TokenKind::If => "If",
73        TokenKind::Else => "Else",
74        TokenKind::For => "For",
75        TokenKind::In => "In",
76        TokenKind::Match => "Match",
77        TokenKind::Retry => "Retry",
78        TokenKind::Parallel => "Parallel",
79        TokenKind::Return => "Return",
80        TokenKind::Import => "Import",
81        TokenKind::True => "True",
82        TokenKind::False => "False",
83        TokenKind::Nil => "Nil",
84        TokenKind::Try => "Try",
85        TokenKind::Catch => "Catch",
86        TokenKind::Throw => "Throw",
87        TokenKind::Finally => "Finally",
88        TokenKind::Fn => "Fn",
89        TokenKind::Spawn => "Spawn",
90        TokenKind::While => "While",
91        TokenKind::TypeKw => "TypeKw",
92        TokenKind::Enum => "Enum",
93        TokenKind::EvalPack => "EvalPack",
94        TokenKind::Struct => "Struct",
95        TokenKind::Interface => "Interface",
96        TokenKind::Emit => "Emit",
97        TokenKind::Pub => "Pub",
98        TokenKind::From => "From",
99        TokenKind::To => "To",
100        TokenKind::Tool => "Tool",
101        TokenKind::Exclusive => "Exclusive",
102        TokenKind::Guard => "Guard",
103        TokenKind::Require => "Require",
104        TokenKind::Deadline => "Deadline",
105        TokenKind::Defer => "Defer",
106        TokenKind::Yield => "Yield",
107        TokenKind::Mutex => "Mutex",
108        TokenKind::Break => "Break",
109        TokenKind::Continue => "Continue",
110        TokenKind::Select => "Select",
111        TokenKind::Impl => "Impl",
112        TokenKind::Skill => "Skill",
113        TokenKind::RequestApproval => "RequestApproval",
114        TokenKind::DualControl => "DualControl",
115        TokenKind::AskUser => "AskUser",
116        TokenKind::EscalateTo => "EscalateTo",
117        TokenKind::Identifier(_) => "Identifier",
118        TokenKind::StringLiteral(_) => "StringLiteral",
119        TokenKind::InterpolatedString(_) => "InterpolatedString",
120        TokenKind::RawStringLiteral(_) => "RawStringLiteral",
121        TokenKind::IntLiteral(_) => "IntLiteral",
122        TokenKind::FloatLiteral(_) => "FloatLiteral",
123        TokenKind::DurationLiteral(_) => "DurationLiteral",
124        TokenKind::Eq => "Eq",
125        TokenKind::Neq => "Neq",
126        TokenKind::And => "And",
127        TokenKind::Or => "Or",
128        TokenKind::Pipe => "Pipe",
129        TokenKind::NilCoal => "NilCoal",
130        TokenKind::Pow => "Pow",
131        TokenKind::QuestionDot => "QuestionDot",
132        TokenKind::Arrow => "Arrow",
133        TokenKind::Lte => "Lte",
134        TokenKind::Gte => "Gte",
135        TokenKind::PlusAssign => "PlusAssign",
136        TokenKind::MinusAssign => "MinusAssign",
137        TokenKind::StarAssign => "StarAssign",
138        TokenKind::SlashAssign => "SlashAssign",
139        TokenKind::PercentAssign => "PercentAssign",
140        TokenKind::Assign => "Assign",
141        TokenKind::Not => "Not",
142        TokenKind::Dot => "Dot",
143        TokenKind::Plus => "Plus",
144        TokenKind::Minus => "Minus",
145        TokenKind::Star => "Star",
146        TokenKind::Slash => "Slash",
147        TokenKind::Percent => "Percent",
148        TokenKind::Lt => "Lt",
149        TokenKind::Gt => "Gt",
150        TokenKind::Question => "Question",
151        TokenKind::Bar => "Bar",
152        TokenKind::Amp => "Amp",
153        TokenKind::LBrace => "LBrace",
154        TokenKind::RBrace => "RBrace",
155        TokenKind::LParen => "LParen",
156        TokenKind::RParen => "RParen",
157        TokenKind::LBracket => "LBracket",
158        TokenKind::RBracket => "RBracket",
159        TokenKind::Comma => "Comma",
160        TokenKind::Colon => "Colon",
161        TokenKind::Semicolon => "Semicolon",
162        TokenKind::At => "At",
163        TokenKind::LineComment { .. } => "LineComment",
164        TokenKind::BlockComment { .. } => "BlockComment",
165        TokenKind::Newline => "Newline",
166        TokenKind::Eof => "Eof",
167    }
168}
169
170fn lexeme_for_span(source: &str, span: Span) -> String {
171    source
172        .get(span.start..span.end)
173        .unwrap_or_default()
174        .to_string()
175}
176
177fn normalize_value(value: Value) -> Value {
178    match value {
179        Value::Array(values) => Value::Array(values.into_iter().map(normalize_value).collect()),
180        Value::Object(map) => normalize_object(map),
181        other => other,
182    }
183}
184
185fn normalize_object(mut map: Map<String, Value>) -> Value {
186    if is_span_map(&map) {
187        return normalize_span_map(map);
188    }
189
190    if is_spanned_map(&map) {
191        return normalize_spanned_map(map);
192    }
193
194    if map.len() == 1 {
195        let key = map.keys().next().cloned().expect("map has one key");
196        if is_variant_name(&key) {
197            let fields = map.remove(&key).expect("variant payload is present");
198            return tagged_value(key, normalize_value(fields));
199        }
200    }
201
202    for value in map.values_mut() {
203        let raw = std::mem::take(value);
204        *value = normalize_value(raw);
205    }
206    Value::Object(map)
207}
208
209fn normalize_spanned_map(mut map: Map<String, Value>) -> Value {
210    let node = map.remove("node").expect("spanned node is present");
211    let span = map.remove("span").expect("spanned span is present");
212    let mut normalized = normalize_enum_value(node);
213    if let Value::Object(object) = &mut normalized {
214        object.insert("span".to_string(), normalize_value(span));
215    }
216    normalized
217}
218
219fn normalize_enum_value(value: Value) -> Value {
220    match value {
221        Value::Object(mut map) if map.len() == 1 => {
222            let key = map.keys().next().cloned().expect("map has one key");
223            let fields = map.remove(&key).expect("variant payload is present");
224            tagged_value(key, normalize_value(fields))
225        }
226        Value::String(kind) if is_variant_name(&kind) => tagged_value(kind, Value::Null),
227        other => normalize_value(other),
228    }
229}
230
231fn tagged_value(kind: String, fields: Value) -> Value {
232    let mut object = Map::new();
233    object.insert("kind".to_string(), Value::String(kind));
234    object.insert("fields".to_string(), fields);
235    Value::Object(object)
236}
237
238fn normalize_span_map(mut map: Map<String, Value>) -> Value {
239    if let Some(end_line) = map.remove("end_line") {
240        map.insert("endLine".to_string(), end_line);
241    }
242    Value::Object(map)
243}
244
245fn is_spanned_map(map: &Map<String, Value>) -> bool {
246    map.len() == 2 && map.contains_key("node") && map.contains_key("span")
247}
248
249fn is_span_map(map: &Map<String, Value>) -> bool {
250    map.len() == 5
251        && map.contains_key("start")
252        && map.contains_key("end")
253        && map.contains_key("line")
254        && map.contains_key("column")
255        && map.contains_key("end_line")
256}
257
258fn is_variant_name(name: &str) -> bool {
259    name.chars()
260        .next()
261        .is_some_and(|ch| ch.is_ascii_uppercase())
262}
263
264#[cfg(test)]
265mod tests {
266    use harn_lexer::Lexer;
267
268    use super::*;
269    use crate::parse_source;
270
271    #[test]
272    fn program_json_projects_tagged_nodes_with_spans() {
273        let source = "pipeline main(task) {\n  return 1\n}\n";
274        let program = parse_source(source).expect("parse");
275
276        let json = program_to_json(&program);
277
278        assert_eq!(json.kind, "Program");
279        assert_eq!(json.schema_version, AST_JSON_SCHEMA_VERSION);
280        let pipeline = &json.body[0];
281        assert_eq!(pipeline["kind"], "Pipeline");
282        assert_eq!(pipeline["span"]["start"], 0);
283        assert_eq!(pipeline["span"]["line"], 1);
284        assert_eq!(pipeline["fields"]["name"], "main");
285        assert_eq!(pipeline["fields"]["body"][0]["kind"], "ReturnStmt");
286        assert_eq!(
287            pipeline["fields"]["body"][0]["fields"]["value"]["kind"],
288            "IntLiteral"
289        );
290    }
291
292    #[test]
293    fn token_json_preserves_byte_spans_and_lexemes() {
294        let source = "let x = \"é\"\n";
295        let mut lexer = Lexer::new(source);
296        let tokens = lexer.tokenize_with_comments().expect("tokenize");
297
298        let json = tokens_to_json(source, &tokens);
299        let string = json
300            .iter()
301            .find(|token| token.kind == "StringLiteral")
302            .expect("string literal token");
303
304        let quote = source.find('"').expect("opening quote");
305        assert_eq!(string.lexeme, "\"é\"");
306        assert_eq!(string.start, quote);
307        assert_eq!(string.end, quote + "\"é\"".len());
308        assert_eq!(string.line, 1);
309        assert_eq!(string.column, 9);
310    }
311}