1use harn_lexer::{Span, Token, TokenKind};
8use serde::Serialize;
9use serde_json::{Map, Value};
10
11use crate::SNode;
12
13pub const AST_JSON_SCHEMA_VERSION: u32 = 1;
14pub const TOKEN_JSON_SCHEMA_VERSION: u32 = 1;
15
16#[derive(Debug, Clone, PartialEq, Serialize)]
17pub struct AstJsonProgram {
18 pub kind: &'static str,
19 #[serde(rename = "schemaVersion")]
20 pub schema_version: u32,
21 pub body: Vec<Value>,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
25pub struct TokenJson {
26 pub kind: &'static str,
27 pub lexeme: String,
28 pub start: usize,
29 pub end: usize,
30 pub line: usize,
31 pub column: usize,
32}
33
34pub fn program_to_json(nodes: &[SNode]) -> AstJsonProgram {
35 AstJsonProgram {
36 kind: "Program",
37 schema_version: AST_JSON_SCHEMA_VERSION,
38 body: nodes.iter().map(node_to_json).collect(),
39 }
40}
41
42pub fn node_to_json(node: &SNode) -> Value {
43 let raw = serde_json::to_value(node).expect("AST node should serialize to JSON");
44 normalize_value(raw)
45}
46
47pub fn tokens_to_json(source: &str, tokens: &[Token]) -> Vec<TokenJson> {
48 tokens
49 .iter()
50 .map(|token| token_to_json(source, token))
51 .collect()
52}
53
54pub fn token_to_json(source: &str, token: &Token) -> TokenJson {
55 TokenJson {
56 kind: token_kind_name(&token.kind),
57 lexeme: lexeme_for_span(source, token.span),
58 start: token.span.start,
59 end: token.span.end,
60 line: token.span.line,
61 column: token.span.column,
62 }
63}
64
65pub fn token_kind_name(kind: &TokenKind) -> &'static str {
66 match kind {
67 TokenKind::Pipeline => "Pipeline",
68 TokenKind::Extends => "Extends",
69 TokenKind::Override => "Override",
70 TokenKind::Let => "Let",
71 TokenKind::Var => "Var",
72 TokenKind::If => "If",
73 TokenKind::Else => "Else",
74 TokenKind::For => "For",
75 TokenKind::In => "In",
76 TokenKind::Match => "Match",
77 TokenKind::Retry => "Retry",
78 TokenKind::Parallel => "Parallel",
79 TokenKind::Return => "Return",
80 TokenKind::Import => "Import",
81 TokenKind::True => "True",
82 TokenKind::False => "False",
83 TokenKind::Nil => "Nil",
84 TokenKind::Try => "Try",
85 TokenKind::Catch => "Catch",
86 TokenKind::Throw => "Throw",
87 TokenKind::Finally => "Finally",
88 TokenKind::Fn => "Fn",
89 TokenKind::Spawn => "Spawn",
90 TokenKind::While => "While",
91 TokenKind::TypeKw => "TypeKw",
92 TokenKind::Enum => "Enum",
93 TokenKind::EvalPack => "EvalPack",
94 TokenKind::Struct => "Struct",
95 TokenKind::Interface => "Interface",
96 TokenKind::Emit => "Emit",
97 TokenKind::Pub => "Pub",
98 TokenKind::From => "From",
99 TokenKind::To => "To",
100 TokenKind::Tool => "Tool",
101 TokenKind::Exclusive => "Exclusive",
102 TokenKind::Guard => "Guard",
103 TokenKind::Require => "Require",
104 TokenKind::Deadline => "Deadline",
105 TokenKind::Defer => "Defer",
106 TokenKind::Yield => "Yield",
107 TokenKind::Mutex => "Mutex",
108 TokenKind::Break => "Break",
109 TokenKind::Continue => "Continue",
110 TokenKind::Select => "Select",
111 TokenKind::Impl => "Impl",
112 TokenKind::Skill => "Skill",
113 TokenKind::RequestApproval => "RequestApproval",
114 TokenKind::DualControl => "DualControl",
115 TokenKind::AskUser => "AskUser",
116 TokenKind::EscalateTo => "EscalateTo",
117 TokenKind::Identifier(_) => "Identifier",
118 TokenKind::StringLiteral(_) => "StringLiteral",
119 TokenKind::InterpolatedString(_) => "InterpolatedString",
120 TokenKind::RawStringLiteral(_) => "RawStringLiteral",
121 TokenKind::IntLiteral(_) => "IntLiteral",
122 TokenKind::FloatLiteral(_) => "FloatLiteral",
123 TokenKind::DurationLiteral(_) => "DurationLiteral",
124 TokenKind::Eq => "Eq",
125 TokenKind::Neq => "Neq",
126 TokenKind::And => "And",
127 TokenKind::Or => "Or",
128 TokenKind::Pipe => "Pipe",
129 TokenKind::NilCoal => "NilCoal",
130 TokenKind::Pow => "Pow",
131 TokenKind::QuestionDot => "QuestionDot",
132 TokenKind::Arrow => "Arrow",
133 TokenKind::Lte => "Lte",
134 TokenKind::Gte => "Gte",
135 TokenKind::PlusAssign => "PlusAssign",
136 TokenKind::MinusAssign => "MinusAssign",
137 TokenKind::StarAssign => "StarAssign",
138 TokenKind::SlashAssign => "SlashAssign",
139 TokenKind::PercentAssign => "PercentAssign",
140 TokenKind::Assign => "Assign",
141 TokenKind::Not => "Not",
142 TokenKind::Dot => "Dot",
143 TokenKind::Plus => "Plus",
144 TokenKind::Minus => "Minus",
145 TokenKind::Star => "Star",
146 TokenKind::Slash => "Slash",
147 TokenKind::Percent => "Percent",
148 TokenKind::Lt => "Lt",
149 TokenKind::Gt => "Gt",
150 TokenKind::Question => "Question",
151 TokenKind::Bar => "Bar",
152 TokenKind::Amp => "Amp",
153 TokenKind::LBrace => "LBrace",
154 TokenKind::RBrace => "RBrace",
155 TokenKind::LParen => "LParen",
156 TokenKind::RParen => "RParen",
157 TokenKind::LBracket => "LBracket",
158 TokenKind::RBracket => "RBracket",
159 TokenKind::Comma => "Comma",
160 TokenKind::Colon => "Colon",
161 TokenKind::Semicolon => "Semicolon",
162 TokenKind::At => "At",
163 TokenKind::LineComment { .. } => "LineComment",
164 TokenKind::BlockComment { .. } => "BlockComment",
165 TokenKind::Newline => "Newline",
166 TokenKind::Eof => "Eof",
167 }
168}
169
170fn lexeme_for_span(source: &str, span: Span) -> String {
171 source
172 .get(span.start..span.end)
173 .unwrap_or_default()
174 .to_string()
175}
176
177fn normalize_value(value: Value) -> Value {
178 match value {
179 Value::Array(values) => Value::Array(values.into_iter().map(normalize_value).collect()),
180 Value::Object(map) => normalize_object(map),
181 other => other,
182 }
183}
184
185fn normalize_object(mut map: Map<String, Value>) -> Value {
186 if is_span_map(&map) {
187 return normalize_span_map(map);
188 }
189
190 if is_spanned_map(&map) {
191 return normalize_spanned_map(map);
192 }
193
194 if map.len() == 1 {
195 let key = map.keys().next().cloned().expect("map has one key");
196 if is_variant_name(&key) {
197 let fields = map.remove(&key).expect("variant payload is present");
198 return tagged_value(key, normalize_value(fields));
199 }
200 }
201
202 for value in map.values_mut() {
203 let raw = std::mem::take(value);
204 *value = normalize_value(raw);
205 }
206 Value::Object(map)
207}
208
209fn normalize_spanned_map(mut map: Map<String, Value>) -> Value {
210 let node = map.remove("node").expect("spanned node is present");
211 let span = map.remove("span").expect("spanned span is present");
212 let mut normalized = normalize_enum_value(node);
213 if let Value::Object(object) = &mut normalized {
214 object.insert("span".to_string(), normalize_value(span));
215 }
216 normalized
217}
218
219fn normalize_enum_value(value: Value) -> Value {
220 match value {
221 Value::Object(mut map) if map.len() == 1 => {
222 let key = map.keys().next().cloned().expect("map has one key");
223 let fields = map.remove(&key).expect("variant payload is present");
224 tagged_value(key, normalize_value(fields))
225 }
226 Value::String(kind) if is_variant_name(&kind) => tagged_value(kind, Value::Null),
227 other => normalize_value(other),
228 }
229}
230
231fn tagged_value(kind: String, fields: Value) -> Value {
232 let mut object = Map::new();
233 object.insert("kind".to_string(), Value::String(kind));
234 object.insert("fields".to_string(), fields);
235 Value::Object(object)
236}
237
238fn normalize_span_map(mut map: Map<String, Value>) -> Value {
239 if let Some(end_line) = map.remove("end_line") {
240 map.insert("endLine".to_string(), end_line);
241 }
242 Value::Object(map)
243}
244
245fn is_spanned_map(map: &Map<String, Value>) -> bool {
246 map.len() == 2 && map.contains_key("node") && map.contains_key("span")
247}
248
249fn is_span_map(map: &Map<String, Value>) -> bool {
250 map.len() == 5
251 && map.contains_key("start")
252 && map.contains_key("end")
253 && map.contains_key("line")
254 && map.contains_key("column")
255 && map.contains_key("end_line")
256}
257
258fn is_variant_name(name: &str) -> bool {
259 name.chars()
260 .next()
261 .is_some_and(|ch| ch.is_ascii_uppercase())
262}
263
264#[cfg(test)]
265mod tests {
266 use harn_lexer::Lexer;
267
268 use super::*;
269 use crate::parse_source;
270
271 #[test]
272 fn program_json_projects_tagged_nodes_with_spans() {
273 let source = "pipeline main(task) {\n return 1\n}\n";
274 let program = parse_source(source).expect("parse");
275
276 let json = program_to_json(&program);
277
278 assert_eq!(json.kind, "Program");
279 assert_eq!(json.schema_version, AST_JSON_SCHEMA_VERSION);
280 let pipeline = &json.body[0];
281 assert_eq!(pipeline["kind"], "Pipeline");
282 assert_eq!(pipeline["span"]["start"], 0);
283 assert_eq!(pipeline["span"]["line"], 1);
284 assert_eq!(pipeline["fields"]["name"], "main");
285 assert_eq!(pipeline["fields"]["body"][0]["kind"], "ReturnStmt");
286 assert_eq!(
287 pipeline["fields"]["body"][0]["fields"]["value"]["kind"],
288 "IntLiteral"
289 );
290 }
291
292 #[test]
293 fn token_json_preserves_byte_spans_and_lexemes() {
294 let source = "let x = \"é\"\n";
295 let mut lexer = Lexer::new(source);
296 let tokens = lexer.tokenize_with_comments().expect("tokenize");
297
298 let json = tokens_to_json(source, &tokens);
299 let string = json
300 .iter()
301 .find(|token| token.kind == "StringLiteral")
302 .expect("string literal token");
303
304 let quote = source.find('"').expect("opening quote");
305 assert_eq!(string.lexeme, "\"é\"");
306 assert_eq!(string.start, quote);
307 assert_eq!(string.end, quote + "\"é\"".len());
308 assert_eq!(string.line, 1);
309 assert_eq!(string.column, 9);
310 }
311}