1use crate::error::ParseError;
2use crate::value::{Object, Value};
3
4pub fn parse(source: &str) -> Result<Value, ParseError> {
8 Parser::new(source).parse()
9}
10
11#[derive(Debug)]
14enum FrameContent {
15 Object(Object),
16 Array(Vec<Value>),
17}
18
19#[derive(Debug)]
20struct Frame {
21 key: String,
22 content: FrameContent,
23 is_anon: bool,
24}
25
26impl Frame {
27 fn new_object(key: impl Into<String>) -> Self {
28 Self { key: key.into(), content: FrameContent::Object(Object::new()), is_anon: false }
29 }
30 fn new_anon() -> Self {
31 Self { key: "-".into(), content: FrameContent::Object(Object::new()), is_anon: true }
32 }
33
34 fn is_array(&self) -> bool {
35 matches!(self.content, FrameContent::Array(_))
36 }
37
38 #[allow(dead_code)]
39 fn obj_len(&self) -> usize {
40 match &self.content {
41 FrameContent::Object(o) => o.len(),
42 FrameContent::Array(_) => 0,
43 }
44 }
45
46 fn to_value(self) -> Value {
47 match self.content {
48 FrameContent::Object(o) => Value::Object(o),
49 FrameContent::Array(a) => Value::Array(a),
50 }
51 }
52}
53
54struct Parser<'a> {
57 lines: Vec<&'a str>,
58 line_num: usize,
59 stack: Vec<Frame>,
60 in_multiline: bool,
61 multiline_key: String,
62 multiline_lines: Vec<String>,
63}
64
65impl<'a> Parser<'a> {
66 fn new(source: &'a str) -> Self {
67 let lines: Vec<&str> = source.split('\n').collect();
68 Self {
69 lines,
70 line_num: 0,
71 stack: Vec::new(),
72 in_multiline: false,
73 multiline_key: String::new(),
74 multiline_lines: Vec::new(),
75 }
76 }
77
78 fn parse(mut self) -> Result<Value, ParseError> {
79 self.stack.push(Frame::new_object("__root__"));
80
81 let lines: Vec<String> = self.lines.iter()
82 .map(|l| l.trim_end_matches('\r').to_string())
83 .collect();
84
85 for (i, raw) in lines.iter().enumerate() {
86 self.line_num = i + 1;
87
88 if self.in_multiline {
89 self.process_multiline_line(raw)?;
90 continue;
91 }
92 self.process_line(raw)?;
93 }
94
95 if self.in_multiline {
96 return Err(self.err("E03: Unexpected end of document inside multiline string"));
97 }
98 if self.stack.len() > 1 {
99 let top_key = self.stack.last().unwrap().key.clone();
100 return Err(ParseError::new(
101 lines.len(),
102 format!("E03: Unexpected end of document — unclosed block {:?}", top_key),
103 ));
104 }
105
106 let root = self.stack.pop().unwrap().to_value();
107 Ok(root)
108 }
109
110 fn process_line(&mut self, raw: &str) -> Result<(), ParseError> {
113 let line = raw.trim();
114
115 if line.is_empty() || line.starts_with('#') {
116 return Ok(());
117 }
118
119 if let Some(closer) = line.strip_prefix(":: ") {
121 if closer.is_empty() {
122 return Err(self.err("E02: Block closer missing identifier after \"::\""));
123 }
124 return self.close_block(closer);
125 }
126 if line == "::" {
127 return Err(self.err("E02: Bare \"::\" not permitted in SAS 1.1; use \":: key\" or \":: -\""));
128 }
129
130 if line == "- ::" {
132 return self.open_anon_block();
133 }
134
135 if let Some(rest) = line.strip_prefix("- ") {
137 let val = self.parse_value(rest)?;
138 return self.add_array_item(val);
139 }
140
141 let key_end = line.find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
143 .unwrap_or(line.len());
144
145 if key_end == 0 {
146 if line.starts_with('-') && line.len() > 1 && line.chars().nth(1).map_or(false, |c| c.is_alphanumeric() || c == '_') {
147 return Err(self.errf(format!("E13: Key must not begin with \"-\": {:?}", line.split_whitespace().next().unwrap_or(line))));
148 }
149 return Err(self.errf(format!("Unexpected token: {:?}", line)));
150 }
151
152 if line.starts_with('-') {
154 return Err(self.errf(format!("E13: Key must not begin with \"-\": {:?}", &line[..key_end])));
155 }
156
157 let key = &line[..key_end];
158 let rest = &line[key_end..];
159
160 if rest == " ::" {
162 return self.open_block(key);
163 }
164
165 if let Some(value_str) = rest.strip_prefix(" -> ") {
167 if value_str.is_empty() {
168 return Err(self.errf(format!("Missing value for key {:?}", key)));
169 }
170 self.check_no_inline_comment(value_str)?;
171 if value_str == "\"\"\"" {
172 return self.start_multiline(key);
173 }
174 let val = self.parse_value(value_str)?;
175 return self.assign_to_frame(key, val);
176 }
177
178 if rest.contains("->") || line.contains("->") {
179 return Err(self.err("E08: Missing spaces around \"->\"; expected \" -> \""));
180 }
181
182 Err(self.errf(format!("Unexpected token after key {:?}: {:?}", key, rest)))
183 }
184
185 fn process_multiline_line(&mut self, raw: &str) -> Result<(), ParseError> {
188 if raw.trim_end() == "\"\"\"" {
189 let value = if self.multiline_lines.is_empty() {
190 String::new()
191 } else {
192 self.multiline_lines.join("\n") + "\n"
193 };
194 let key = std::mem::take(&mut self.multiline_key);
195 self.assign_to_frame(&key, Value::String(value))?;
196 self.in_multiline = false;
197 self.multiline_lines.clear();
198 Ok(())
199 } else {
200 self.multiline_lines.push(raw.to_string());
201 Ok(())
202 }
203 }
204
205 fn start_multiline(&mut self, key: &str) -> Result<(), ParseError> {
206 let frame = self.current_frame_mut();
207 if frame.is_array() {
208 return Err(ParseError::new(self.line_num, "E14: Key-value pair inside array block"));
209 }
210 if let FrameContent::Object(ref obj) = frame.content {
211 if obj.contains_key(key) {
212 return Err(self.errf(format!("E01: Duplicate key {:?}", key)));
213 }
214 }
215 self.in_multiline = true;
216 self.multiline_key = key.to_string();
217 self.multiline_lines.clear();
218 Ok(())
219 }
220
221 fn open_block(&mut self, key: &str) -> Result<(), ParseError> {
224 {
225 let parent = self.current_frame();
226 if parent.is_array() {
227 return Err(self.errf(format!(
228 "E14: Named block opener {:?} inside array block; use \"- ::\" for anonymous elements",
229 format!("{} ::", key)
230 )));
231 }
232 if let FrameContent::Object(ref obj) = parent.content {
233 if obj.contains_key(key) {
234 return Err(self.errf(format!("E01: Duplicate key {:?}", key)));
235 }
236 }
237 }
238 self.stack.push(Frame::new_object(key));
239 Ok(())
240 }
241
242 fn open_anon_block(&mut self) -> Result<(), ParseError> {
243 {
244 let parent = self.current_frame();
245 if let FrameContent::Object(ref obj) = parent.content {
246 if obj.len() > 0 {
247 return Err(self.err("E14: Anonymous block \"- ::\" inside object block (mixed block content)"));
248 }
249 }
250 }
251 {
253 let parent = self.current_frame_mut();
254 if let FrameContent::Object(_) = &parent.content {
255 parent.content = FrameContent::Array(Vec::new());
256 }
257 }
258 if !self.current_frame().is_array() {
259 return Err(self.err("E15: Anonymous block opener \"- ::\" only valid inside array block"));
260 }
261 self.stack.push(Frame::new_anon());
262 Ok(())
263 }
264
265 fn close_block(&mut self, closer: &str) -> Result<(), ParseError> {
266 if self.stack.len() <= 1 {
267 return Err(self.errf(format!("E02: Unexpected block closer {:?} at top level", format!(":: {}", closer))));
268 }
269
270 let frame_key = self.stack.last().unwrap().key.clone();
271 let frame_anon = self.stack.last().unwrap().is_anon;
272
273 if closer == "-" {
274 if !frame_anon {
275 return Err(self.errf(format!(
276 "E15: Anonymous closer \":: -\" used to close named block {:?}", frame_key
277 )));
278 }
279 let frame = self.stack.pop().unwrap();
280 let val = frame.to_value();
281 let parent = self.current_frame_mut();
283 if let FrameContent::Array(ref mut arr) = parent.content {
284 arr.push(val);
285 }
286 return Ok(());
287 }
288
289 if frame_key != closer {
290 return Err(self.errf(format!(
291 "E02: Block closer {:?} does not match opener {:?}",
292 format!(":: {}", closer),
293 format!(":: {}", frame_key),
294 )));
295 }
296
297 let frame = self.stack.pop().unwrap();
298 let val = frame.to_value();
299
300 let parent = self.current_frame_mut();
301 match &mut parent.content {
302 FrameContent::Array(arr) => arr.push(val),
303 FrameContent::Object(obj) => {
304 obj.insert(frame_key, val);
305 }
306 }
307 Ok(())
308 }
309
310 fn assign_to_frame(&mut self, key: &str, val: Value) -> Result<(), ParseError> {
313 let frame = self.current_frame_mut();
314 if frame.is_array() {
315 return Err(ParseError::new(self.line_num, "E14: Key-value pair inside array block"));
316 }
317 if let FrameContent::Object(ref mut obj) = frame.content {
318 if !obj.insert(key.to_string(), val) {
319 return Err(self.errf(format!("E01: Duplicate key {:?}", key)));
320 }
321 }
322 Ok(())
323 }
324
325 fn add_array_item(&mut self, val: Value) -> Result<(), ParseError> {
326 let frame = self.current_frame_mut();
327 if let FrameContent::Object(ref obj) = frame.content {
328 if obj.len() > 0 {
329 return Err(ParseError::new(self.line_num, "E14: Array item inside object block (mixed block content)"));
330 }
331 }
332 if let FrameContent::Object(_) = &frame.content {
333 frame.content = FrameContent::Array(Vec::new());
334 }
335 if let FrameContent::Array(ref mut arr) = frame.content {
336 arr.push(val);
337 }
338 Ok(())
339 }
340
341 fn current_frame(&self) -> &Frame {
342 self.stack.last().unwrap()
343 }
344
345 fn current_frame_mut(&mut self) -> &mut Frame {
346 self.stack.last_mut().unwrap()
347 }
348
349 fn parse_value(&self, raw: &str) -> Result<Value, ParseError> {
352 let s = raw.trim();
353
354 match s {
355 "null" => return Ok(Value::Null),
356 "true" => return Ok(Value::Bool(true)),
357 "false" => return Ok(Value::Bool(false)),
358 _ => {}
359 }
360
361 if matches!(s, "True" | "TRUE" | "False" | "FALSE" | "Null" | "NULL") {
363 return Err(self.errf(format!("E06: Boolean and null must be lowercase; got {:?}", s)));
364 }
365
366 let s_lower = s.to_lowercase();
368 if s_lower == "nan" || s_lower == "infinity" || s_lower == "inf"
369 || s_lower == "+nan" || s_lower == "+infinity"
370 || s_lower == "-nan" || s_lower == "-infinity"
371 {
372 return Err(self.err("E05: NaN and Infinity are not valid SAS number values"));
373 }
374
375 if s.starts_with('+') {
377 return Err(self.errf(format!("E05: Numbers must not have a leading \"+\": {:?}", s)));
378 }
379
380 if s.starts_with('[') { return self.parse_inline_array(s); }
381 if s.starts_with('{') { return self.parse_inline_object(s); }
382 if s.starts_with('"') { return self.parse_string(s).map(Value::String); }
383 if s.starts_with('-') || s.starts_with(|c: char| c.is_ascii_digit()) {
384 return self.parse_number(s);
385 }
386
387 Err(self.errf(format!("Unknown value: {:?}", s)))
388 }
389
390 fn parse_string(&self, raw: &str) -> Result<String, ParseError> {
393 if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
394 return Err(self.errf(format!("Malformed string: {}", raw)));
395 }
396 self.process_escapes(&raw[1..raw.len() - 1])
397 }
398
399 fn process_escapes(&self, s: &str) -> Result<String, ParseError> {
400 let mut result = String::with_capacity(s.len());
401 let chars: Vec<char> = s.chars().collect();
402 let mut i = 0;
403 while i < chars.len() {
404 let ch = chars[i];
405 if ch == '\\' {
406 i += 1;
407 if i >= chars.len() {
408 return Err(self.err("E04: Invalid escape sequence at end of string"));
409 }
410 match chars[i] {
411 '"' => result.push('"'),
412 '\\' => result.push('\\'),
413 'n' => result.push('\n'),
414 't' => result.push('\t'),
415 'r' => result.push('\r'),
416 'u' => {
417 if i + 4 >= chars.len() {
418 return Err(self.err("E04: Invalid \\u escape: insufficient digits"));
419 }
420 let hex: String = chars[i + 1..=i + 4].iter().collect();
421 if !hex.chars().all(|c| c.is_ascii_hexdigit()) {
422 return Err(self.errf(format!("E04: Invalid \\u escape: \"\\u{}\"", hex)));
423 }
424 let codepoint = u32::from_str_radix(&hex, 16).unwrap();
425 let ch = char::from_u32(codepoint)
426 .ok_or_else(|| self.errf(format!("E04: Invalid Unicode codepoint U+{}", hex)))?;
427 result.push(ch);
428 i += 4;
429 }
430 c => return Err(self.errf(format!("E04: Invalid escape sequence \"\\{}\"", c))),
431 }
432 } else if ch == '"' {
433 return Err(self.err("E04: Unescaped double-quote inside string"));
434 } else {
435 result.push(ch);
436 }
437 i += 1;
438 }
439 Ok(result)
440 }
441
442 fn parse_number(&self, s: &str) -> Result<Value, ParseError> {
445 if !is_valid_number(s) {
447 return Err(self.errf(format!("E05: Invalid number format: {:?}", s)));
448 }
449 if s.contains('.') || s.contains('e') || s.contains('E') {
450 let f: f64 = s.parse().map_err(|_| self.errf(format!("E05: Number out of range: {:?}", s)))?;
451 if f.is_infinite() || f.is_nan() {
452 return Err(self.errf(format!("E05: Number out of range: {:?}", s)));
453 }
454 Ok(Value::Float(f))
455 } else {
456 let n: i64 = s.parse().map_err(|_| self.errf(format!("E05: Integer out of range: {:?}", s)))?;
457 Ok(Value::Int(n))
458 }
459 }
460
461 fn parse_inline_array(&self, s: &str) -> Result<Value, ParseError> {
464 if !s.starts_with('[') || !s.ends_with(']') {
465 return Err(self.errf(format!("Malformed inline array: {:?}", s)));
466 }
467 let inner = s[1..s.len() - 1].trim();
468 if inner.is_empty() {
469 return Ok(Value::Array(Vec::new()));
470 }
471 if inner.ends_with(" |") || inner.ends_with('\t') {
472 return Err(self.err("E10: Trailing \"|\" in inline array"));
473 }
474 self.check_pipe_syntax(inner, "inline array")?;
475 let parts = split_by_pipe(inner);
476 let mut result = Vec::with_capacity(parts.len());
477 for part in parts {
478 let val = self.parse_value(part.trim())?;
479 if !val.is_scalar() {
480 return Err(self.err("E11: Inline array elements must be scalar (string, number, boolean, null)"));
481 }
482 result.push(val);
483 }
484 Ok(Value::Array(result))
485 }
486
487 fn parse_inline_object(&self, s: &str) -> Result<Value, ParseError> {
490 if !s.starts_with('{') || !s.ends_with('}') {
491 return Err(self.errf(format!("Malformed inline object: {:?}", s)));
492 }
493 let inner = s[1..s.len() - 1].trim();
494 if inner.is_empty() {
495 return Ok(Value::Object(Object::new()));
496 }
497 if inner.ends_with(" |") {
498 return Err(self.err("E10: Trailing \"|\" in inline object"));
499 }
500 self.check_pipe_syntax(inner, "inline object")?;
501
502 let mut obj = Object::new();
503 for part in split_by_pipe(inner) {
504 let part = part.trim();
505 let arrow = part.find(" -> ")
506 .ok_or_else(|| self.errf(format!("Invalid field in inline object: {:?}", part)))?;
507 let k = &part[..arrow];
508 let v_str = &part[arrow + 4..];
509
510 if !is_valid_key(k) {
511 return Err(self.errf(format!("Invalid key in inline object: {:?}", k)));
512 }
513 if obj.contains_key(k) {
514 return Err(self.errf(format!("E01: Duplicate key {:?} in inline object", k)));
515 }
516 if v_str.trim().starts_with('{') {
517 return Err(self.err("E12: Nested inline objects are not permitted"));
518 }
519 let val = self.parse_value(v_str.trim())?;
520 if !val.is_scalar() {
521 return Err(self.err("E11: Inline object values must be scalar"));
522 }
523 obj.insert(k.to_string(), val);
524 }
525 Ok(Value::Object(obj))
526 }
527
528 fn check_pipe_syntax(&self, inner: &str, context: &str) -> Result<(), ParseError> {
531 let chars: Vec<char> = inner.chars().collect();
532 let mut in_str = false;
533 for (i, &ch) in chars.iter().enumerate() {
534 if ch == '"' { in_str = !in_str; continue; }
535 if !in_str && ch == '|' {
536 let before = if i > 0 { chars[i - 1] } else { '\0' };
537 let after = if i + 1 < chars.len() { chars[i + 1] } else { '\0' };
538 if before != ' ' || after != ' ' {
539 return Err(self.errf(format!(
540 "E09: \"|\" in {} must be surrounded by single spaces", context
541 )));
542 }
543 }
544 }
545 Ok(())
546 }
547
548 fn check_no_inline_comment(&self, value_str: &str) -> Result<(), ParseError> {
549 let mut in_str = false;
550 for ch in value_str.chars() {
551 if ch == '"' { in_str = !in_str; continue; }
552 if !in_str && ch == '#' {
553 return Err(self.err("E07: Inline comments are not permitted"));
554 }
555 }
556 Ok(())
557 }
558
559 fn err(&self, msg: &str) -> ParseError {
562 ParseError::new(self.line_num, msg)
563 }
564
565 fn errf(&self, msg: String) -> ParseError {
566 ParseError::new(self.line_num, msg)
567 }
568}
569
570fn split_by_pipe(s: &str) -> Vec<&str> {
573 let bytes = s.as_bytes();
574 let mut parts = Vec::new();
575 let mut start = 0;
576 let mut in_str = false;
577 let mut i = 0;
578 while i < bytes.len() {
579 if bytes[i] == b'"' { in_str = !in_str; }
580 else if !in_str && bytes[i] == b' ' && i + 2 < bytes.len() && bytes[i + 1] == b'|' && bytes[i + 2] == b' ' {
581 parts.push(&s[start..i]);
582 i += 3;
583 start = i;
584 continue;
585 }
586 i += 1;
587 }
588 if start < s.len() {
589 parts.push(&s[start..]);
590 }
591 parts
592}
593
594fn is_valid_number(s: &str) -> bool {
595 let s = if s.starts_with('-') { &s[1..] } else { s };
596 if s.is_empty() { return false; }
597
598 let (mantissa, _exp) = if let Some(pos) = s.find(|c| c == 'e' || c == 'E') {
600 let exp = &s[pos + 1..];
601 let exp_body = exp.strip_prefix('+').or_else(|| exp.strip_prefix('-')).unwrap_or(exp);
602 if exp_body.is_empty() || !exp_body.chars().all(|c| c.is_ascii_digit()) {
603 return false;
604 }
605 (&s[..pos], true)
606 } else {
607 (s, false)
608 };
609
610 let (int_part, dec_part) = if let Some(pos) = mantissa.find('.') {
612 let dec = &mantissa[pos + 1..];
613 if dec.is_empty() || !dec.chars().all(|c| c.is_ascii_digit()) {
614 return false;
615 }
616 (&mantissa[..pos], Some(dec))
617 } else {
618 (mantissa, None)
619 };
620
621 let _ = dec_part;
622
623 if int_part.is_empty() { return false; }
625 if int_part.len() > 1 && int_part.starts_with('0') { return false; }
626 int_part.chars().all(|c| c.is_ascii_digit())
627}
628
629fn is_valid_key(s: &str) -> bool {
630 if s.is_empty() || s.starts_with('-') { return false; }
631 s.chars().all(|c| c.is_alphanumeric() || c == '_' || c == '-')
632}