1use harn_lexer::{Span, Token, TokenKind};
8use serde::Serialize;
9use serde_json::{Map, Value};
10
11use crate::SNode;
12
13pub const AST_JSON_SCHEMA_VERSION: u32 = 1;
14pub const TOKEN_JSON_SCHEMA_VERSION: u32 = 1;
15
16#[derive(Debug, Clone, PartialEq, Serialize)]
17pub struct AstJsonProgram {
18 pub kind: &'static str,
19 #[serde(rename = "schemaVersion")]
20 pub schema_version: u32,
21 pub body: Vec<Value>,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
25pub struct TokenJson {
26 pub kind: &'static str,
27 pub lexeme: String,
28 pub start: usize,
29 pub end: usize,
30 pub line: usize,
31 pub column: usize,
32}
33
34pub fn program_to_json(nodes: &[SNode]) -> AstJsonProgram {
35 AstJsonProgram {
36 kind: "Program",
37 schema_version: AST_JSON_SCHEMA_VERSION,
38 body: nodes.iter().map(node_to_json).collect(),
39 }
40}
41
42pub fn node_to_json(node: &SNode) -> Value {
43 let raw = serde_json::to_value(node).expect("AST node should serialize to JSON");
44 normalize_value(raw)
45}
46
47pub fn tokens_to_json(source: &str, tokens: &[Token]) -> Vec<TokenJson> {
48 tokens
49 .iter()
50 .map(|token| token_to_json(source, token))
51 .collect()
52}
53
54pub fn token_to_json(source: &str, token: &Token) -> TokenJson {
55 TokenJson {
56 kind: token_kind_name(&token.kind),
57 lexeme: lexeme_for_span(source, token.span),
58 start: token.span.start,
59 end: token.span.end,
60 line: token.span.line,
61 column: token.span.column,
62 }
63}
64
65pub fn token_kind_name(kind: &TokenKind) -> &'static str {
66 match kind {
67 TokenKind::Pipeline => "Pipeline",
68 TokenKind::Extends => "Extends",
69 TokenKind::Override => "Override",
70 TokenKind::Let => "Let",
71 TokenKind::Const => "Const",
72 TokenKind::Var => "Var",
73 TokenKind::If => "If",
74 TokenKind::Else => "Else",
75 TokenKind::For => "For",
76 TokenKind::In => "In",
77 TokenKind::Match => "Match",
78 TokenKind::Retry => "Retry",
79 TokenKind::Parallel => "Parallel",
80 TokenKind::Return => "Return",
81 TokenKind::Import => "Import",
82 TokenKind::True => "True",
83 TokenKind::False => "False",
84 TokenKind::Nil => "Nil",
85 TokenKind::Try => "Try",
86 TokenKind::Catch => "Catch",
87 TokenKind::Throw => "Throw",
88 TokenKind::Finally => "Finally",
89 TokenKind::Fn => "Fn",
90 TokenKind::Spawn => "Spawn",
91 TokenKind::While => "While",
92 TokenKind::TypeKw => "TypeKw",
93 TokenKind::Enum => "Enum",
94 TokenKind::EvalPack => "EvalPack",
95 TokenKind::Struct => "Struct",
96 TokenKind::Interface => "Interface",
97 TokenKind::Emit => "Emit",
98 TokenKind::Pub => "Pub",
99 TokenKind::From => "From",
100 TokenKind::To => "To",
101 TokenKind::Tool => "Tool",
102 TokenKind::Exclusive => "Exclusive",
103 TokenKind::Guard => "Guard",
104 TokenKind::Require => "Require",
105 TokenKind::Deadline => "Deadline",
106 TokenKind::Defer => "Defer",
107 TokenKind::Yield => "Yield",
108 TokenKind::Mutex => "Mutex",
109 TokenKind::Break => "Break",
110 TokenKind::Continue => "Continue",
111 TokenKind::Select => "Select",
112 TokenKind::Impl => "Impl",
113 TokenKind::Skill => "Skill",
114 TokenKind::RequestApproval => "RequestApproval",
115 TokenKind::DualControl => "DualControl",
116 TokenKind::AskUser => "AskUser",
117 TokenKind::EscalateTo => "EscalateTo",
118 TokenKind::Identifier(_) => "Identifier",
119 TokenKind::StringLiteral(_) => "StringLiteral",
120 TokenKind::InterpolatedString(_) => "InterpolatedString",
121 TokenKind::RawStringLiteral(_) => "RawStringLiteral",
122 TokenKind::IntLiteral(_) => "IntLiteral",
123 TokenKind::FloatLiteral(_) => "FloatLiteral",
124 TokenKind::DurationLiteral(_) => "DurationLiteral",
125 TokenKind::Eq => "Eq",
126 TokenKind::Neq => "Neq",
127 TokenKind::And => "And",
128 TokenKind::Or => "Or",
129 TokenKind::Pipe => "Pipe",
130 TokenKind::NilCoal => "NilCoal",
131 TokenKind::Pow => "Pow",
132 TokenKind::QuestionDot => "QuestionDot",
133 TokenKind::Arrow => "Arrow",
134 TokenKind::Lte => "Lte",
135 TokenKind::Gte => "Gte",
136 TokenKind::PlusAssign => "PlusAssign",
137 TokenKind::MinusAssign => "MinusAssign",
138 TokenKind::StarAssign => "StarAssign",
139 TokenKind::SlashAssign => "SlashAssign",
140 TokenKind::PercentAssign => "PercentAssign",
141 TokenKind::Assign => "Assign",
142 TokenKind::Not => "Not",
143 TokenKind::Dot => "Dot",
144 TokenKind::Plus => "Plus",
145 TokenKind::Minus => "Minus",
146 TokenKind::Star => "Star",
147 TokenKind::Slash => "Slash",
148 TokenKind::Percent => "Percent",
149 TokenKind::Lt => "Lt",
150 TokenKind::Gt => "Gt",
151 TokenKind::Question => "Question",
152 TokenKind::Bar => "Bar",
153 TokenKind::Amp => "Amp",
154 TokenKind::LBrace => "LBrace",
155 TokenKind::RBrace => "RBrace",
156 TokenKind::LParen => "LParen",
157 TokenKind::RParen => "RParen",
158 TokenKind::LBracket => "LBracket",
159 TokenKind::RBracket => "RBracket",
160 TokenKind::Comma => "Comma",
161 TokenKind::Colon => "Colon",
162 TokenKind::Semicolon => "Semicolon",
163 TokenKind::At => "At",
164 TokenKind::LineComment { .. } => "LineComment",
165 TokenKind::BlockComment { .. } => "BlockComment",
166 TokenKind::Newline => "Newline",
167 TokenKind::Eof => "Eof",
168 }
169}
170
171fn lexeme_for_span(source: &str, span: Span) -> String {
172 source
173 .get(span.start..span.end)
174 .unwrap_or_default()
175 .to_string()
176}
177
178fn normalize_value(value: Value) -> Value {
179 match value {
180 Value::Array(values) => Value::Array(values.into_iter().map(normalize_value).collect()),
181 Value::Object(map) => normalize_object(map),
182 other => other,
183 }
184}
185
186fn normalize_object(mut map: Map<String, Value>) -> Value {
187 if is_span_map(&map) {
188 return normalize_span_map(map);
189 }
190
191 if is_spanned_map(&map) {
192 return normalize_spanned_map(map);
193 }
194
195 if map.len() == 1 {
196 let key = map.keys().next().cloned().expect("map has one key");
197 if is_variant_name(&key) {
198 let fields = map.remove(&key).expect("variant payload is present");
199 return tagged_value(key, normalize_value(fields));
200 }
201 }
202
203 for value in map.values_mut() {
204 let raw = std::mem::take(value);
205 *value = normalize_value(raw);
206 }
207 Value::Object(map)
208}
209
210fn normalize_spanned_map(mut map: Map<String, Value>) -> Value {
211 let node = map.remove("node").expect("spanned node is present");
212 let span = map.remove("span").expect("spanned span is present");
213 let mut normalized = normalize_enum_value(node);
214 if let Value::Object(object) = &mut normalized {
215 object.insert("span".to_string(), normalize_value(span));
216 }
217 normalized
218}
219
220fn normalize_enum_value(value: Value) -> Value {
221 match value {
222 Value::Object(mut map) if map.len() == 1 => {
223 let key = map.keys().next().cloned().expect("map has one key");
224 let fields = map.remove(&key).expect("variant payload is present");
225 tagged_value(key, normalize_value(fields))
226 }
227 Value::String(kind) if is_variant_name(&kind) => tagged_value(kind, Value::Null),
228 other => normalize_value(other),
229 }
230}
231
232fn tagged_value(kind: String, fields: Value) -> Value {
233 let mut object = Map::new();
234 object.insert("kind".to_string(), Value::String(kind));
235 object.insert("fields".to_string(), fields);
236 Value::Object(object)
237}
238
239fn normalize_span_map(mut map: Map<String, Value>) -> Value {
240 if let Some(end_line) = map.remove("end_line") {
241 map.insert("endLine".to_string(), end_line);
242 }
243 Value::Object(map)
244}
245
246fn is_spanned_map(map: &Map<String, Value>) -> bool {
247 map.len() == 2 && map.contains_key("node") && map.contains_key("span")
248}
249
250fn is_span_map(map: &Map<String, Value>) -> bool {
251 map.len() == 5
252 && map.contains_key("start")
253 && map.contains_key("end")
254 && map.contains_key("line")
255 && map.contains_key("column")
256 && map.contains_key("end_line")
257}
258
259fn is_variant_name(name: &str) -> bool {
260 name.chars()
261 .next()
262 .is_some_and(|ch| ch.is_ascii_uppercase())
263}
264
265#[cfg(test)]
266mod tests {
267 use harn_lexer::Lexer;
268
269 use super::*;
270 use crate::parse_source;
271
272 #[test]
273 fn program_json_projects_tagged_nodes_with_spans() {
274 let source = "pipeline main(task) {\n return 1\n}\n";
275 let program = parse_source(source).expect("parse");
276
277 let json = program_to_json(&program);
278
279 assert_eq!(json.kind, "Program");
280 assert_eq!(json.schema_version, AST_JSON_SCHEMA_VERSION);
281 let pipeline = &json.body[0];
282 assert_eq!(pipeline["kind"], "Pipeline");
283 assert_eq!(pipeline["span"]["start"], 0);
284 assert_eq!(pipeline["span"]["line"], 1);
285 assert_eq!(pipeline["fields"]["name"], "main");
286 assert_eq!(pipeline["fields"]["body"][0]["kind"], "ReturnStmt");
287 assert_eq!(
288 pipeline["fields"]["body"][0]["fields"]["value"]["kind"],
289 "IntLiteral"
290 );
291 }
292
293 #[test]
294 fn token_json_preserves_byte_spans_and_lexemes() {
295 let source = "let x = \"é\"\n";
296 let mut lexer = Lexer::new(source);
297 let tokens = lexer.tokenize_with_comments().expect("tokenize");
298
299 let json = tokens_to_json(source, &tokens);
300 let string = json
301 .iter()
302 .find(|token| token.kind == "StringLiteral")
303 .expect("string literal token");
304
305 let quote = source.find('"').expect("opening quote");
306 assert_eq!(string.lexeme, "\"é\"");
307 assert_eq!(string.start, quote);
308 assert_eq!(string.end, quote + "\"é\"".len());
309 assert_eq!(string.line, 1);
310 assert_eq!(string.column, 9);
311 }
312}